diff --git a/bfps/DNS.py b/bfps/DNS.py index e6ace758a1f6efb240cfa0655cc661a83a1ab6e2..eb51862e414437b50151df870c739ebdcf23dae0 100644 --- a/bfps/DNS.py +++ b/bfps/DNS.py @@ -120,6 +120,7 @@ class DNS(_code): return None def generate_default_parameters(self): # these parameters are relevant for all DNS classes + self.parameters['fftw_plan_rigor'] = 'FFTW_ESTIMATE' self.parameters['dealias_type'] = int(1) self.parameters['dkx'] = float(1.0) self.parameters['dky'] = float(1.0) @@ -929,18 +930,9 @@ class DNS(_code): 3) src_file = h5py.File(src_file_name, 'r') if (src_file[src_dset_name].shape == dst_shape): - if make_link and (src_file[src_dset_name].dtype == self.ctype): - dst_file[dst_dset_name] = h5py.ExternalLink( - src_file_name, - src_dset_name) - else: - dst_file.create_dataset( - dst_dset_name, - shape = dst_shape, - dtype = self.ctype, - fillvalue = 0.0) - for kz in range(src_file[src_dset_name].shape[0]): - dst_file[dst_dset_name][kz] = src_file[src_dset_name][kz] + dst_file[dst_dset_name] = h5py.ExternalLink( + src_file_name, + src_dset_name) else: min_shape = (min(dst_shape[0], src_file[src_dset_name].shape[0]), min(dst_shape[1], src_file[src_dset_name].shape[1]), diff --git a/bfps/FluidConvert.py b/bfps/FluidConvert.py deleted file mode 100644 index 58d19116bfb8ab386ef9783babb2ad8da79760e4..0000000000000000000000000000000000000000 --- a/bfps/FluidConvert.py +++ /dev/null @@ -1,140 +0,0 @@ -####################################################################### -# # -# Copyright 2015 Max Planck Institute # -# for Dynamics and Self-Organization # -# # -# This file is part of bfps. # -# # -# bfps is free software: you can redistribute it and/or modify # -# it under the terms of the GNU General Public License as published # -# by the Free Software Foundation, either version 3 of the License, # -# or (at your option) any later version. # -# # -# bfps is distributed in the hope that it will be useful, # -# but WITHOUT ANY WARRANTY; without even the implied warranty of # -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # -# GNU General Public License for more details. # -# # -# You should have received a copy of the GNU General Public License # -# along with bfps. If not, see <http://www.gnu.org/licenses/> # -# # -# Contact: Cristian.Lalescu@ds.mpg.de # -# # -####################################################################### - - - -import numpy as np -import pickle -import os -from ._fluid_base import _fluid_particle_base -from ._base import _base -import bfps - -class FluidConvert(_fluid_particle_base): - """This class is meant to be used for conversion of native DNS field - representations to real-space representations of velocity/vorticity - fields. - It may be superseeded by streamlined functionality in the future... - """ - def __init__( - self, - name = 'FluidConvert-v' + bfps.__version__, - work_dir = './', - simname = 'test', - fluid_precision = 'single', - use_fftw_wisdom = False): - _fluid_particle_base.__init__( - self, - name = name + '-' + fluid_precision, - work_dir = work_dir, - simname = simname, - dtype = fluid_precision, - use_fftw_wisdom = use_fftw_wisdom) - self.spec_parameters = {} - self.spec_parameters['write_rvelocity'] = 1 - self.spec_parameters['write_rvorticity'] = 1 - self.spec_parameters['write_rTrS2'] = 1 - self.spec_parameters['write_renstrophy'] = 1 - self.spec_parameters['write_rpressure'] = 1 - self.spec_parameters['iter0'] = 0 - self.spec_parameters['iter1'] = -1 - self.fill_up_fluid_code() - self.finalize_code(postprocess_mode = True) - return None - def fill_up_fluid_code(self): - self.definitions += self.cread_pars( - parameters = self.spec_parameters, - function_suffix = '_specific', - file_group = 'conversion_parameters') - self.variables += self.cdef_pars( - parameters = self.spec_parameters) - self.main_start += 'read_parameters_specific();\n' - self.fluid_includes += '#include <cstring>\n' - self.fluid_variables += ('double t;\n' + - 'fluid_solver<{0}> *fs;\n').format(self.C_dtype) - self.fluid_definitions += """ - //begincpp - void do_conversion(fluid_solver<{0}> *bla) - {{ - bla->read('v', 'c'); - if (write_rvelocity) - bla->write('u', 'r'); - if (write_rvorticity) - bla->write('v', 'r'); - if (write_rTrS2) - bla->write_rTrS2(); - if (write_renstrophy) - bla->write_renstrophy(); - if (write_rpressure) - bla->write_rpressure(); - }} - //endcpp - """.format(self.C_dtype) - self.fluid_start += """ - //begincpp - fs = new fluid_solver<{0}>( - simname, - nx, ny, nz, - dkx, dky, dkz, - dealias_type, - DEFAULT_FFTW_FLAG); - //endcpp - """.format(self.C_dtype) - self.fluid_loop += """ - //begincpp - fs->iteration = frame_index; - do_conversion(fs); - //endcpp - """ - self.fluid_end += 'delete fs;\n' - return None - def specific_parser_arguments( - self, - parser): - _fluid_particle_base.specific_parser_arguments(self, parser) - self.parameters_to_parser_arguments( - parser, - parameters = self.spec_parameters) - return None - def launch( - self, - args = [], - **kwargs): - opt = self.prepare_launch(args) - if opt.iter1 == -1: - opt.iter1 = self.get_data_file()['iteration'].value - self.pars_from_namespace( - opt, - parameters = self.spec_parameters) - self.rewrite_par( - group = 'conversion_parameters', - parameters = self.spec_parameters) - self.run(opt.nb_processes, - 1, - hours = opt.minutes // 60, - minutes = opt.minutes % 60, - err_file = 'err_convert', - out_file = 'out_convert') - return None - diff --git a/bfps/NSManyParticles.py b/bfps/NSManyParticles.py deleted file mode 100644 index 03f7345f61b27299bd2da60ea0c4d44924112837..0000000000000000000000000000000000000000 --- a/bfps/NSManyParticles.py +++ /dev/null @@ -1,92 +0,0 @@ -####################################################################### -# # -# Copyright 2015 Max Planck Institute # -# for Dynamics and Self-Organization # -# # -# This file is part of bfps. # -# # -# bfps is free software: you can redistribute it and/or modify # -# it under the terms of the GNU General Public License as published # -# by the Free Software Foundation, either version 3 of the License, # -# or (at your option) any later version. # -# # -# bfps is distributed in the hope that it will be useful, # -# but WITHOUT ANY WARRANTY; without even the implied warranty of # -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # -# GNU General Public License for more details. # -# # -# You should have received a copy of the GNU General Public License # -# along with bfps. If not, see <http://www.gnu.org/licenses/> # -# # -# Contact: Cristian.Lalescu@ds.mpg.de # -# # -####################################################################### - - - -import bfps - -class NSManyParticles(bfps.NavierStokes): - def specific_parser_arguments( - self, - parser): - bfps.NavierStokes.specific_parser_arguments(self, parser) - parser.add_argument( - '--particle-class', - default = 'rFFTW_distributed_particles', - dest = 'particle_class', - type = str) - parser.add_argument( - '--interpolator-class', - default = 'rFFTW_interpolator', - dest = 'interpolator_class', - type = str) - parser.add_argument('--neighbours', - type = int, - dest = 'neighbours', - default = 3) - parser.add_argument('--smoothness', - type = int, - dest = 'smoothness', - default = 2) - return None - def launch( - self, - args = [], - **kwargs): - opt = self.prepare_launch(args = args) - self.fill_up_fluid_code() - if type(opt.nparticles) == int: - if opt.nparticles > 0: - self.add_3D_rFFTW_field( - name = 'rFFTW_acc') - interp_list = [] - for n in range(1, opt.neighbours): - interp_list.append('Lagrange_n{0}'.format(n)) - self.add_interpolator( - interp_type = 'Lagrange', - name = interp_list[-1], - neighbours = n, - class_name = opt.interpolator_class) - for m in range(1, opt.smoothness): - interp_list.append('spline_n{0}m{1}'.format(n, m)) - self.add_interpolator( - interp_type = 'spline', - name = interp_list[-1], - neighbours = n, - smoothness = m, - class_name = opt.interpolator_class) - self.add_particles( - integration_steps = 2, - interpolator = interp_list, - acc_name = 'rFFTW_acc', - class_name = opt.particle_class) - self.add_particles( - integration_steps = 4, - interpolator = interp_list, - acc_name = 'rFFTW_acc', - class_name = opt.particle_class) - self.finalize_code() - self.launch_jobs(opt = opt) - return None - diff --git a/bfps/NSVorticityEquation.py b/bfps/NSVorticityEquation.py deleted file mode 100644 index 5f87097fefbb56f731a75597395d42423fc17ba6..0000000000000000000000000000000000000000 --- a/bfps/NSVorticityEquation.py +++ /dev/null @@ -1,864 +0,0 @@ -####################################################################### -# # -# Copyright 2015 Max Planck Institute # -# for Dynamics and Self-Organization # -# # -# This file is part of bfps. # -# # -# bfps is free software: you can redistribute it and/or modify # -# it under the terms of the GNU General Public License as published # -# by the Free Software Foundation, either version 3 of the License, # -# or (at your option) any later version. # -# # -# bfps is distributed in the hope that it will be useful, # -# but WITHOUT ANY WARRANTY; without even the implied warranty of # -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # -# GNU General Public License for more details. # -# # -# You should have received a copy of the GNU General Public License # -# along with bfps. If not, see <http://www.gnu.org/licenses/> # -# # -# Contact: Cristian.Lalescu@ds.mpg.de # -# # -####################################################################### - - - -import sys -import os -import numpy as np -import h5py -import argparse - -import bfps -import bfps.tools -from bfps._code import _code -from bfps._fluid_base import _fluid_particle_base - -class NSVorticityEquation(_fluid_particle_base): - def __init__( - self, - name = 'NSVorticityEquation-v' + bfps.__version__, - work_dir = './', - simname = 'test', - fluid_precision = 'single', - fftw_plan_rigor = 'FFTW_MEASURE', - use_fftw_wisdom = True): - """ - This code uses checkpoints for DNS restarts, and it can be stopped - by creating the file "stop_<simname>" in the working directory. - For postprocessing of field snapshots, consider creating a separate - HDF5 file (from the python wrapper) which contains links to all the - different snapshots. - """ - self.fftw_plan_rigor = fftw_plan_rigor - _fluid_particle_base.__init__( - self, - name = name + '-' + fluid_precision, - work_dir = work_dir, - simname = simname, - dtype = fluid_precision, - use_fftw_wisdom = use_fftw_wisdom) - self.parameters['nu'] = float(0.1) - self.parameters['fmode'] = 1 - self.parameters['famplitude'] = float(0.5) - self.parameters['fk0'] = float(2.0) - self.parameters['fk1'] = float(4.0) - self.parameters['forcing_type'] = 'linear' - self.parameters['histogram_bins'] = int(256) - self.parameters['max_velocity_estimate'] = float(1) - self.parameters['max_vorticity_estimate'] = float(1) - self.parameters['checkpoints_per_file'] = int(1) - self.file_datasets_grow = """ - //begincpp - hid_t group; - group = H5Gopen(stat_file, "/statistics", H5P_DEFAULT); - H5Ovisit(group, H5_INDEX_NAME, H5_ITER_NATIVE, grow_statistics_dataset, NULL); - H5Gclose(group); - //endcpp - """ - self.style = {} - self.statistics = {} - self.fluid_output = """ - fs->io_checkpoint(false); - """ - # vorticity_equation specific things - self.includes += '#include "vorticity_equation.hpp"\n' - self.store_kspace = """ - //begincpp - if (myrank == 0 && iteration == 0) - { - TIMEZONE("fluid_base::store_kspace"); - hsize_t dims[4]; - hid_t space, dset; - // store kspace information - dset = H5Dopen(stat_file, "/kspace/kshell", H5P_DEFAULT); - space = H5Dget_space(dset); - H5Sget_simple_extent_dims(space, dims, NULL); - H5Sclose(space); - if (fs->kk->nshells != dims[0]) - { - DEBUG_MSG( - "ERROR: computed nshells %d not equal to data file nshells %d\\n", - fs->kk->nshells, dims[0]); - } - H5Dwrite(dset, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, &fs->kk->kshell.front()); - H5Dclose(dset); - dset = H5Dopen(stat_file, "/kspace/nshell", H5P_DEFAULT); - H5Dwrite(dset, H5T_NATIVE_INT64, H5S_ALL, H5S_ALL, H5P_DEFAULT, &fs->kk->nshell.front()); - H5Dclose(dset); - dset = H5Dopen(stat_file, "/kspace/kM", H5P_DEFAULT); - H5Dwrite(dset, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, &fs->kk->kM); - H5Dclose(dset); - dset = H5Dopen(stat_file, "/kspace/dk", H5P_DEFAULT); - H5Dwrite(dset, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, &fs->kk->dk); - H5Dclose(dset); - } - //endcpp - """ - return None - def add_particles( - self, - integration_steps = 2, - neighbours = 1, - smoothness = 1): - assert(integration_steps > 0 and integration_steps < 6) - self.particle_species = 1 - self.parameters['tracers0_integration_steps'] = int(integration_steps) - self.parameters['tracers0_neighbours'] = int(neighbours) - self.parameters['tracers0_smoothness'] = int(smoothness) - self.parameters['tracers0_interpolator'] = 'spline' - self.particle_includes += """ - #include "particles/particles_system_builder.hpp" - #include "particles/particles_output_hdf5.hpp" - """ - ## initialize - self.particle_start += """ - DEBUG_MSG( - "current fname is %s\\n and iteration is %d", - fs->get_current_fname().c_str(), - fs->iteration); - std::unique_ptr<abstract_particles_system<long long int, double>> ps = particles_system_builder( - fs->cvelocity, // (field object) - fs->kk, // (kspace object, contains dkx, dky, dkz) - tracers0_integration_steps, // to check coherency between parameters and hdf input file (nb rhs) - (long long int)nparticles, // to check coherency between parameters and hdf input file - fs->get_current_fname(), // particles input filename - std::string("/tracers0/state/") + std::to_string(fs->iteration), // dataset name for initial input - std::string("/tracers0/rhs/") + std::to_string(fs->iteration), // dataset name for initial input - tracers0_neighbours, // parameter (interpolation no neighbours) - tracers0_smoothness, // parameter - MPI_COMM_WORLD, - fs->iteration+1); - particles_output_hdf5<long long int, double,3,3> particles_output_writer_mpi( - MPI_COMM_WORLD, - "tracers0", - nparticles, - tracers0_integration_steps); - """ - self.particle_loop += """ - fs->compute_velocity(fs->cvorticity); - fs->cvelocity->ift(); - ps->completeLoop(dt); - """ - self.particle_output = """ - { - particles_output_writer_mpi.open_file(fs->get_current_fname()); - particles_output_writer_mpi.save(ps->getParticlesPositions(), - ps->getParticlesRhs(), - ps->getParticlesIndexes(), - ps->getLocalNbParticles(), - fs->iteration); - particles_output_writer_mpi.close_file(); - } - """ - self.particle_end += 'ps.release();\n' - return None - def create_stat_output( - self, - dset_name, - data_buffer, - data_type = 'H5T_NATIVE_DOUBLE', - size_setup = None, - close_spaces = True): - new_stat_output_txt = 'Cdset = H5Dopen(stat_file, "{0}", H5P_DEFAULT);\n'.format(dset_name) - if not type(size_setup) == type(None): - new_stat_output_txt += ( - size_setup + - 'wspace = H5Dget_space(Cdset);\n' + - 'ndims = H5Sget_simple_extent_dims(wspace, dims, NULL);\n' + - 'mspace = H5Screate_simple(ndims, count, NULL);\n' + - 'H5Sselect_hyperslab(wspace, H5S_SELECT_SET, offset, NULL, count, NULL);\n') - new_stat_output_txt += ('H5Dwrite(Cdset, {0}, mspace, wspace, H5P_DEFAULT, {1});\n' + - 'H5Dclose(Cdset);\n').format(data_type, data_buffer) - if close_spaces: - new_stat_output_txt += ('H5Sclose(mspace);\n' + - 'H5Sclose(wspace);\n') - return new_stat_output_txt - def write_fluid_stats(self): - self.fluid_includes += '#include <cmath>\n' - self.fluid_includes += '#include "fftw_tools.hpp"\n' - self.stat_src += """ - //begincpp - hid_t stat_group; - if (myrank == 0) - stat_group = H5Gopen(stat_file, "statistics", H5P_DEFAULT); - fs->compute_velocity(fs->cvorticity); - *tmp_vec_field = fs->cvelocity->get_cdata(); - tmp_vec_field->compute_stats( - fs->kk, - stat_group, - "velocity", - fs->iteration / niter_stat, - max_velocity_estimate/sqrt(3)); - //endcpp - """ - self.stat_src += """ - //begincpp - *tmp_vec_field = fs->cvorticity->get_cdata(); - tmp_vec_field->compute_stats( - fs->kk, - stat_group, - "vorticity", - fs->iteration / niter_stat, - max_vorticity_estimate/sqrt(3)); - //endcpp - """ - self.stat_src += """ - //begincpp - if (myrank == 0) - H5Gclose(stat_group); - if (myrank == 0) - {{ - hid_t Cdset, wspace, mspace; - int ndims; - hsize_t count[4], offset[4], dims[4]; - offset[0] = fs->iteration/niter_stat; - offset[1] = 0; - offset[2] = 0; - offset[3] = 0; - //endcpp - """.format(self.C_dtype) - if self.dtype == np.float32: - field_H5T = 'H5T_NATIVE_FLOAT' - elif self.dtype == np.float64: - field_H5T = 'H5T_NATIVE_DOUBLE' - self.stat_src += self.create_stat_output( - '/statistics/xlines/velocity', - 'fs->rvelocity->get_rdata()', - data_type = field_H5T, - size_setup = """ - count[0] = 1; - count[1] = nx; - count[2] = 3; - """, - close_spaces = False) - self.stat_src += self.create_stat_output( - '/statistics/xlines/vorticity', - 'fs->rvorticity->get_rdata()', - data_type = field_H5T) - self.stat_src += '}\n' - ## checkpoint - self.stat_src += """ - //begincpp - if (myrank == 0) - { - std::string fname = ( - std::string("stop_") + - std::string(simname)); - { - struct stat file_buffer; - stop_code_now = (stat(fname.c_str(), &file_buffer) == 0); - } - } - MPI_Bcast(&stop_code_now, 1, MPI_C_BOOL, 0, MPI_COMM_WORLD); - //endcpp - """ - return None - def fill_up_fluid_code(self): - self.fluid_includes += '#include <cstring>\n' - self.fluid_variables += ( - 'vorticity_equation<{0}, FFTW> *fs;\n'.format(self.C_dtype) + - 'field<{0}, FFTW, THREE> *tmp_vec_field;\n'.format(self.C_dtype) + - 'field<{0}, FFTW, ONE> *tmp_scal_field;\n'.format(self.C_dtype)) - self.fluid_definitions += """ - typedef struct {{ - {0} re; - {0} im; - }} tmp_complex_type; - """.format(self.C_dtype) - self.write_fluid_stats() - if self.dtype == np.float32: - field_H5T = 'H5T_NATIVE_FLOAT' - elif self.dtype == np.float64: - field_H5T = 'H5T_NATIVE_DOUBLE' - self.variables += 'int checkpoint;\n' - self.variables += 'bool stop_code_now;\n' - self.read_checkpoint = """ - //begincpp - if (myrank == 0) - { - hid_t dset = H5Dopen(stat_file, "checkpoint", H5P_DEFAULT); - H5Dread( - dset, - H5T_NATIVE_INT, - H5S_ALL, - H5S_ALL, - H5P_DEFAULT, - &checkpoint); - H5Dclose(dset); - } - MPI_Bcast(&checkpoint, 1, MPI_INT, 0, MPI_COMM_WORLD); - fs->checkpoint = checkpoint; - //endcpp - """ - self.store_checkpoint = """ - //begincpp - checkpoint = fs->checkpoint; - if (myrank == 0) - { - hid_t dset = H5Dopen(stat_file, "checkpoint", H5P_DEFAULT); - H5Dwrite( - dset, - H5T_NATIVE_INT, - H5S_ALL, - H5S_ALL, - H5P_DEFAULT, - &checkpoint); - H5Dclose(dset); - } - //endcpp - """ - self.fluid_start += """ - //begincpp - char fname[512]; - fs = new vorticity_equation<{0}, FFTW>( - simname, - nx, ny, nz, - dkx, dky, dkz, - {1}); - tmp_vec_field = new field<{0}, FFTW, THREE>( - nx, ny, nz, - MPI_COMM_WORLD, - {1}); - tmp_scal_field = new field<{0}, FFTW, ONE>( - nx, ny, nz, - MPI_COMM_WORLD, - {1}); - fs->checkpoints_per_file = checkpoints_per_file; - fs->nu = nu; - fs->fmode = fmode; - fs->famplitude = famplitude; - fs->fk0 = fk0; - fs->fk1 = fk1; - strncpy(fs->forcing_type, forcing_type, 128); - fs->iteration = iteration; - {2} - fs->cvorticity->real_space_representation = false; - fs->io_checkpoint(); - //endcpp - """.format( - self.C_dtype, - self.fftw_plan_rigor, - self.read_checkpoint) - self.fluid_start += self.store_kspace - self.fluid_start += 'stop_code_now = false;\n' - self.fluid_loop = 'fs->step(dt);\n' - self.fluid_loop += ('if (fs->iteration % niter_out == 0)\n{\n' + - self.fluid_output + - self.particle_output + - self.store_checkpoint + - '\n}\n' + - 'if (stop_code_now){\n' + - 'iteration = fs->iteration;\n' + - 'break;\n}\n') - self.fluid_end = ('if (fs->iteration % niter_out != 0)\n{\n' + - self.fluid_output + - self.particle_output + - self.store_checkpoint + - 'DEBUG_MSG("checkpoint value is %d\\n", checkpoint);\n' + - '\n}\n' + - 'delete fs;\n' + - 'delete tmp_vec_field;\n' + - 'delete tmp_scal_field;\n') - return None - def get_postprocess_file_name(self): - return os.path.join(self.work_dir, self.simname + '_postprocess.h5') - def get_postprocess_file(self): - return h5py.File(self.get_postprocess_file_name(), 'r') - def compute_statistics(self, iter0 = 0, iter1 = None): - """Run basic postprocessing on raw data. - The energy spectrum :math:`E(t, k)` and the enstrophy spectrum - :math:`\\frac{1}{2}\omega^2(t, k)` are computed from the - - .. math:: - - \sum_{k \\leq \\|\\mathbf{k}\\| \\leq k+dk}\\hat{u_i} \\hat{u_j}^*, \\hskip .5cm - \sum_{k \\leq \\|\\mathbf{k}\\| \\leq k+dk}\\hat{\omega_i} \\hat{\\omega_j}^* - - tensors, and the enstrophy spectrum is also used to - compute the dissipation :math:`\\varepsilon(t)`. - These basic quantities are stored in a newly created HDF5 file, - ``simname_postprocess.h5``. - """ - if len(list(self.statistics.keys())) > 0: - return None - self.read_parameters() - with self.get_data_file() as data_file: - if 'moments' not in data_file['statistics'].keys(): - return None - iter0 = min((data_file['statistics/moments/velocity'].shape[0] * - self.parameters['niter_stat']-1), - iter0) - if type(iter1) == type(None): - iter1 = data_file['iteration'].value - else: - iter1 = min(data_file['iteration'].value, iter1) - ii0 = iter0 // self.parameters['niter_stat'] - ii1 = iter1 // self.parameters['niter_stat'] - self.statistics['kshell'] = data_file['kspace/kshell'].value - self.statistics['kM'] = data_file['kspace/kM'].value - self.statistics['dk'] = data_file['kspace/dk'].value - computation_needed = True - pp_file = h5py.File(self.get_postprocess_file_name(), 'a') - if 'ii0' in pp_file.keys(): - computation_needed = not (ii0 == pp_file['ii0'].value and - ii1 == pp_file['ii1'].value) - if computation_needed: - for k in pp_file.keys(): - del pp_file[k] - if computation_needed: - pp_file['iter0'] = iter0 - pp_file['iter1'] = iter1 - pp_file['ii0'] = ii0 - pp_file['ii1'] = ii1 - pp_file['t'] = (self.parameters['dt']* - self.parameters['niter_stat']* - (np.arange(ii0, ii1+1).astype(np.float))) - pp_file['energy(t, k)'] = ( - data_file['statistics/spectra/velocity_velocity'][ii0:ii1+1, :, 0, 0] + - data_file['statistics/spectra/velocity_velocity'][ii0:ii1+1, :, 1, 1] + - data_file['statistics/spectra/velocity_velocity'][ii0:ii1+1, :, 2, 2])/2 - pp_file['enstrophy(t, k)'] = ( - data_file['statistics/spectra/vorticity_vorticity'][ii0:ii1+1, :, 0, 0] + - data_file['statistics/spectra/vorticity_vorticity'][ii0:ii1+1, :, 1, 1] + - data_file['statistics/spectra/vorticity_vorticity'][ii0:ii1+1, :, 2, 2])/2 - pp_file['vel_max(t)'] = data_file['statistics/moments/velocity'] [ii0:ii1+1, 9, 3] - pp_file['renergy(t)'] = data_file['statistics/moments/velocity'][ii0:ii1+1, 2, 3]/2 - for k in ['t', - 'energy(t, k)', - 'enstrophy(t, k)', - 'vel_max(t)', - 'renergy(t)']: - if k in pp_file.keys(): - self.statistics[k] = pp_file[k].value - self.compute_time_averages() - return None - def compute_time_averages(self): - """Compute easy stats. - - Further computation of statistics based on the contents of - ``simname_postprocess.h5``. - Standard quantities are as follows - (consistent with [Ishihara]_): - - .. math:: - - U_{\\textrm{int}}(t) = \\sqrt{\\frac{2E(t)}{3}}, \\hskip .5cm - L_{\\textrm{int}}(t) = \\frac{\pi}{2U_{int}^2(t)} \\int \\frac{dk}{k} E(t, k), \\hskip .5cm - T_{\\textrm{int}}(t) = - \\frac{L_{\\textrm{int}}(t)}{U_{\\textrm{int}}(t)} - - \\eta_K = \\left(\\frac{\\nu^3}{\\varepsilon}\\right)^{1/4}, \\hskip .5cm - \\tau_K = \\left(\\frac{\\nu}{\\varepsilon}\\right)^{1/2}, \\hskip .5cm - \\lambda = \\sqrt{\\frac{15 \\nu U_{\\textrm{int}}^2}{\\varepsilon}} - - Re = \\frac{U_{\\textrm{int}} L_{\\textrm{int}}}{\\nu}, \\hskip - .5cm - R_{\\lambda} = \\frac{U_{\\textrm{int}} \\lambda}{\\nu} - - .. [Ishihara] T. Ishihara et al, - *Small-scale statistics in high-resolution direct numerical - simulation of turbulence: Reynolds number dependence of - one-point velocity gradient statistics*. - J. Fluid Mech., - **592**, 335-366, 2007 - """ - for key in ['energy', 'enstrophy']: - self.statistics[key + '(t)'] = (self.statistics['dk'] * - np.sum(self.statistics[key + '(t, k)'], axis = 1)) - self.statistics['Uint(t)'] = np.sqrt(2*self.statistics['energy(t)'] / 3) - self.statistics['Lint(t)'] = ((self.statistics['dk']*np.pi / - (2*self.statistics['Uint(t)']**2)) * - np.nansum(self.statistics['energy(t, k)'] / - self.statistics['kshell'][None, :], axis = 1)) - for key in ['energy', - 'enstrophy', - 'vel_max', - 'Uint', - 'Lint']: - if key + '(t)' in self.statistics.keys(): - self.statistics[key] = np.average(self.statistics[key + '(t)'], axis = 0) - for suffix in ['', '(t)']: - self.statistics['diss' + suffix] = (self.parameters['nu'] * - self.statistics['enstrophy' + suffix]*2) - self.statistics['etaK' + suffix] = (self.parameters['nu']**3 / - self.statistics['diss' + suffix])**.25 - self.statistics['tauK' + suffix] = (self.parameters['nu'] / - self.statistics['diss' + suffix])**.5 - self.statistics['Re' + suffix] = (self.statistics['Uint' + suffix] * - self.statistics['Lint' + suffix] / - self.parameters['nu']) - self.statistics['lambda' + suffix] = (15 * self.parameters['nu'] * - self.statistics['Uint' + suffix]**2 / - self.statistics['diss' + suffix])**.5 - self.statistics['Rlambda' + suffix] = (self.statistics['Uint' + suffix] * - self.statistics['lambda' + suffix] / - self.parameters['nu']) - self.statistics['kMeta' + suffix] = (self.statistics['kM'] * - self.statistics['etaK' + suffix]) - if self.parameters['dealias_type'] == 1: - self.statistics['kMeta' + suffix] *= 0.8 - self.statistics['Tint'] = self.statistics['Lint'] / self.statistics['Uint'] - self.statistics['Taylor_microscale'] = self.statistics['lambda'] - return None - def set_plt_style( - self, - style = {'dashes' : (None, None)}): - self.style.update(style) - return None - def convert_complex_from_binary( - self, - field_name = 'vorticity', - iteration = 0, - file_name = None): - """read the Fourier representation of a vector field. - - Read the binary file containing iteration ``iteration`` of the - field ``field_name``, and write it in a ``.h5`` file. - """ - data = np.memmap( - os.path.join(self.work_dir, - self.simname + '_{0}_i{1:0>5x}'.format('c' + field_name, iteration)), - dtype = self.ctype, - mode = 'r', - shape = (self.parameters['ny'], - self.parameters['nz'], - self.parameters['nx']//2+1, - 3)) - if type(file_name) == type(None): - file_name = self.simname + '_{0}_i{1:0>5x}.h5'.format('c' + field_name, iteration) - file_name = os.path.join(self.work_dir, file_name) - f = h5py.File(file_name, 'a') - f[field_name + '/complex/{0}'.format(iteration)] = data - f.close() - return None - def write_par( - self, - iter0 = 0, - particle_ic = None): - _fluid_particle_base.write_par(self, iter0 = iter0) - with h5py.File(self.get_data_file_name(), 'r+') as ofile: - kspace = self.get_kspace() - nshells = kspace['nshell'].shape[0] - vec_stat_datasets = ['velocity', 'vorticity'] - scal_stat_datasets = [] - for k in vec_stat_datasets: - time_chunk = 2**20//(8*3*self.parameters['nx']) # FIXME: use proper size of self.dtype - time_chunk = max(time_chunk, 1) - ofile.create_dataset('statistics/xlines/' + k, - (1, self.parameters['nx'], 3), - chunks = (time_chunk, self.parameters['nx'], 3), - maxshape = (None, self.parameters['nx'], 3), - dtype = self.dtype) - for k in vec_stat_datasets: - time_chunk = 2**20//(8*3*3*nshells) - time_chunk = max(time_chunk, 1) - ofile.create_dataset('statistics/spectra/' + k + '_' + k, - (1, nshells, 3, 3), - chunks = (time_chunk, nshells, 3, 3), - maxshape = (None, nshells, 3, 3), - dtype = np.float64) - time_chunk = 2**20//(8*4*10) - time_chunk = max(time_chunk, 1) - a = ofile.create_dataset('statistics/moments/' + k, - (1, 10, 4), - chunks = (time_chunk, 10, 4), - maxshape = (None, 10, 4), - dtype = np.float64) - time_chunk = 2**20//(8*4*self.parameters['histogram_bins']) - time_chunk = max(time_chunk, 1) - ofile.create_dataset('statistics/histograms/' + k, - (1, - self.parameters['histogram_bins'], - 4), - chunks = (time_chunk, - self.parameters['histogram_bins'], - 4), - maxshape = (None, - self.parameters['histogram_bins'], - 4), - dtype = np.int64) - ofile['checkpoint'] = int(0) - if self.particle_species == 0: - return None - - if type(particle_ic) == type(None): - pbase_shape = (self.parameters['nparticles'],) - number_of_particles = self.parameters['nparticles'] - else: - pbase_shape = particle_ic.shape[:-1] - assert(particle_ic.shape[-1] == 3) - number_of_particles = 1 - for val in pbase_shape[1:]: - number_of_particles *= val - with h5py.File(self.get_checkpoint_0_fname(), 'a') as ofile: - s = 0 - ofile.create_group('tracers{0}'.format(s)) - ofile.create_group('tracers{0}/rhs'.format(s)) - ofile.create_group('tracers{0}/state'.format(s)) - ofile['tracers{0}/rhs'.format(s)].create_dataset( - '0', - shape = ( - (self.parameters['tracers{0}_integration_steps'.format(s)],) + - pbase_shape + - (3,)), - dtype = np.float) - ofile['tracers{0}/state'.format(s)].create_dataset( - '0', - shape = ( - pbase_shape + - (3,)), - dtype = np.float) - return None - def specific_parser_arguments( - self, - parser): - _fluid_particle_base.specific_parser_arguments(self, parser) - parser.add_argument( - '--src-wd', - type = str, - dest = 'src_work_dir', - default = '') - parser.add_argument( - '--src-simname', - type = str, - dest = 'src_simname', - default = '') - parser.add_argument( - '--src-iteration', - type = int, - dest = 'src_iteration', - default = 0) - parser.add_argument( - '--njobs', - type = int, dest = 'njobs', - default = 1) - parser.add_argument( - '--kMeta', - type = float, - dest = 'kMeta', - default = 2.0) - parser.add_argument( - '--dtfactor', - type = float, - dest = 'dtfactor', - default = 0.5, - help = 'dt is computed as DTFACTOR / N') - parser.add_argument( - '--particle-rand-seed', - type = int, - dest = 'particle_rand_seed', - default = None) - parser.add_argument( - '--pclouds', - type = int, - dest = 'pclouds', - default = 1, - help = ('number of particle clouds. Particle "clouds" ' - 'consist of particles distributed according to ' - 'pcloud-type.')) - parser.add_argument( - '--pcloud-type', - choices = ['random-cube', - 'regular-cube'], - dest = 'pcloud_type', - default = 'random-cube') - parser.add_argument( - '--particle-cloud-size', - type = float, - dest = 'particle_cloud_size', - default = 2*np.pi) - parser.add_argument( - '--neighbours', - type = int, - dest = 'neighbours', - default = 1) - parser.add_argument( - '--smoothness', - type = int, - dest = 'smoothness', - default = 1) - return None - def prepare_launch( - self, - args = []): - """Set up reasonable parameters. - - With the default Lundgren forcing applied in the band [2, 4], - we can estimate the dissipation, therefore we can estimate - :math:`k_M \\eta_K` and constrain the viscosity. - - In brief, the command line parameter :math:`k_M \\eta_K` is - used in the following formula for :math:`\\nu` (:math:`N` is the - number of real space grid points per coordinate): - - .. math:: - - \\nu = \\left(\\frac{2 k_M \\eta_K}{N} \\right)^{4/3} - - With this choice, the average dissipation :math:`\\varepsilon` - will be close to 0.4, and the integral scale velocity will be - close to 0.77, yielding the approximate value for the Taylor - microscale and corresponding Reynolds number: - - .. math:: - - \\lambda \\approx 4.75\\left(\\frac{2 k_M \\eta_K}{N} \\right)^{4/6}, \\hskip .5in - R_\\lambda \\approx 3.7 \\left(\\frac{N}{2 k_M \\eta_K} \\right)^{4/6} - - """ - opt = _code.prepare_launch(self, args = args) - self.parameters['nu'] = (opt.kMeta * 2 / opt.n)**(4./3) - self.parameters['dt'] = (opt.dtfactor / opt.n) - # custom famplitude for 288 and 576 - if opt.n == 288: - self.parameters['famplitude'] = 0.45 - elif opt.n == 576: - self.parameters['famplitude'] = 0.47 - if ((self.parameters['niter_todo'] % self.parameters['niter_out']) != 0): - self.parameters['niter_out'] = self.parameters['niter_todo'] - if len(opt.src_work_dir) == 0: - opt.src_work_dir = os.path.realpath(opt.work_dir) - self.pars_from_namespace(opt) - return opt - def launch( - self, - args = [], - **kwargs): - opt = self.prepare_launch(args = args) - if type(opt.nparticles) != type(None): - if opt.nparticles > 0: - self.name += '-particles' - self.add_particles( - integration_steps = 4, - neighbours = opt.neighbours, - smoothness = opt.smoothness) - self.fill_up_fluid_code() - self.finalize_code() - self.launch_jobs(opt = opt, **kwargs) - return None - def get_checkpoint_0_fname(self): - return os.path.join( - self.work_dir, - self.simname + '_checkpoint_0.h5') - def generate_tracer_state( - self, - rseed = None, - iteration = 0, - species = 0, - write_to_file = False, - ncomponents = 3, - testing = False, - data = None): - if (type(data) == type(None)): - if not type(rseed) == type(None): - np.random.seed(rseed) - #point with problems: 5.37632864e+00, 6.10414710e+00, 6.25256493e+00] - data = np.zeros(self.parameters['nparticles']*ncomponents).reshape(-1, ncomponents) - data[:, :3] = np.random.random((self.parameters['nparticles'], 3))*2*np.pi - if testing: - #data[0] = np.array([3.26434, 4.24418, 3.12157]) - data[:] = np.array([ 0.72086101, 2.59043666, 6.27501953]) - with h5py.File(self.get_checkpoint_0_fname(), 'a') as data_file: - data_file['tracers{0}/state/0'.format(species)][:] = data - if write_to_file: - data.tofile( - os.path.join( - self.work_dir, - "tracers{0}_state_i{1:0>5x}".format(species, iteration))) - return data - def launch_jobs( - self, - opt = None, - particle_initial_condition = None): - if not os.path.exists(os.path.join(self.work_dir, self.simname + '.h5')): - # take care of fields' initial condition - if not os.path.exists(self.get_checkpoint_0_fname()): - f = h5py.File(self.get_checkpoint_0_fname(), 'w') - if len(opt.src_simname) > 0: - source_cp = 0 - src_file = 'not_a_file' - while True: - src_file = os.path.join( - os.path.realpath(opt.src_work_dir), - opt.src_simname + '_checkpoint_{0}.h5'.format(source_cp)) - f0 = h5py.File(src_file, 'r') - if '{0}'.format(opt.src_iteration) in f0['vorticity/complex'].keys(): - f0.close() - break - source_cp += 1 - f['vorticity/complex/{0}'.format(0)] = h5py.ExternalLink( - src_file, - 'vorticity/complex/{0}'.format(opt.src_iteration)) - else: - data = self.generate_vector_field( - write_to_file = False, - spectra_slope = 2.0, - amplitude = 0.05) - f['vorticity/complex/{0}'.format(0)] = data - f.close() - # take care of particles' initial condition - if opt.pclouds > 1: - np.random.seed(opt.particle_rand_seed) - if opt.pcloud_type == 'random-cube': - particle_initial_condition = ( - np.random.random((opt.pclouds, 1, 3))*2*np.pi + - np.random.random((1, self.parameters['nparticles'], 3))*opt.particle_cloud_size) - elif opt.pcloud_type == 'regular-cube': - onedarray = np.linspace( - -opt.particle_cloud_size/2, - opt.particle_cloud_size/2, - self.parameters['nparticles']) - particle_initial_condition = np.zeros( - (opt.pclouds, - self.parameters['nparticles'], - self.parameters['nparticles'], - self.parameters['nparticles'], 3), - dtype = np.float64) - particle_initial_condition[:] = \ - np.random.random((opt.pclouds, 1, 1, 1, 3))*2*np.pi - particle_initial_condition[..., 0] += onedarray[None, None, None, :] - particle_initial_condition[..., 1] += onedarray[None, None, :, None] - particle_initial_condition[..., 2] += onedarray[None, :, None, None] - self.write_par( - particle_ic = particle_initial_condition) - if self.parameters['nparticles'] > 0: - data = self.generate_tracer_state( - species = 0, - rseed = opt.particle_rand_seed, - data = particle_initial_condition) - for s in range(1, self.particle_species): - self.generate_tracer_state(species = s, data = data) - self.run( - nb_processes = opt.nb_processes, - nb_threads_per_process = opt.nb_threads_per_process, - njobs = opt.njobs, - hours = opt.minutes // 60, - minutes = opt.minutes % 60, - no_submit = opt.no_submit) - return None - -if __name__ == '__main__': - pass - diff --git a/bfps/NavierStokes.py b/bfps/NavierStokes.py deleted file mode 100644 index c30adbe2ec41dac86993399a0235a18d20820269..0000000000000000000000000000000000000000 --- a/bfps/NavierStokes.py +++ /dev/null @@ -1,1263 +0,0 @@ -####################################################################### -# # -# Copyright 2015 Max Planck Institute # -# for Dynamics and Self-Organization # -# # -# This file is part of bfps. # -# # -# bfps is free software: you can redistribute it and/or modify # -# it under the terms of the GNU General Public License as published # -# by the Free Software Foundation, either version 3 of the License, # -# or (at your option) any later version. # -# # -# bfps is distributed in the hope that it will be useful, # -# but WITHOUT ANY WARRANTY; without even the implied warranty of # -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # -# GNU General Public License for more details. # -# # -# You should have received a copy of the GNU General Public License # -# along with bfps. If not, see <http://www.gnu.org/licenses/> # -# # -# Contact: Cristian.Lalescu@ds.mpg.de # -# # -####################################################################### - - - -import sys -import os -import numpy as np -import h5py -import argparse - -import bfps -import bfps.tools -from ._code import _code -from ._fluid_base import _fluid_particle_base - -class NavierStokes(_fluid_particle_base): - """Objects of this class can be used to generate production DNS codes. - Any functionality that users require should be available through this class, - in the sense that they can implement whatever they need by simply inheriting - this class. - """ - def __init__( - self, - name = 'NavierStokes-v' + bfps.__version__, - work_dir = './', - simname = 'test', - fluid_precision = 'single', - fftw_plan_rigor = 'FFTW_MEASURE', - frozen_fields = False, - use_fftw_wisdom = True, - QR_stats_on = False, - Lag_acc_stats_on = False): - self.QR_stats_on = QR_stats_on - self.Lag_acc_stats_on = Lag_acc_stats_on - self.frozen_fields = frozen_fields - self.fftw_plan_rigor = fftw_plan_rigor - _fluid_particle_base.__init__( - self, - name = name + '-' + fluid_precision, - work_dir = work_dir, - simname = simname, - dtype = fluid_precision, - use_fftw_wisdom = use_fftw_wisdom) - self.parameters['nu'] = 0.1 - self.parameters['fmode'] = 1 - self.parameters['famplitude'] = 0.5 - self.parameters['fk0'] = 2.0 - self.parameters['fk1'] = 4.0 - self.parameters['forcing_type'] = 'linear' - self.parameters['histogram_bins'] = 256 - self.parameters['max_velocity_estimate'] = 1.0 - self.parameters['max_vorticity_estimate'] = 1.0 - self.parameters['max_Lag_acc_estimate'] = 1.0 - self.parameters['max_pressure_estimate'] = 1.0 - self.parameters['QR2D_histogram_bins'] = 64 - self.parameters['max_trS2_estimate'] = 1.0 - self.parameters['max_Q_estimate'] = 1.0 - self.parameters['max_R_estimate'] = 1.0 - self.file_datasets_grow = """ - //begincpp - hid_t group; - group = H5Gopen(stat_file, "/statistics", H5P_DEFAULT); - H5Ovisit(group, H5_INDEX_NAME, H5_ITER_NATIVE, grow_statistics_dataset, NULL); - H5Gclose(group); - //endcpp - """ - self.style = {} - self.statistics = {} - self.fluid_output = 'fs->write(\'v\', \'c\');\n' - return None - def create_stat_output( - self, - dset_name, - data_buffer, - data_type = 'H5T_NATIVE_DOUBLE', - size_setup = None, - close_spaces = True): - new_stat_output_txt = 'Cdset = H5Dopen(stat_file, "{0}", H5P_DEFAULT);\n'.format(dset_name) - if not type(size_setup) == type(None): - new_stat_output_txt += ( - size_setup + - 'wspace = H5Dget_space(Cdset);\n' + - 'ndims = H5Sget_simple_extent_dims(wspace, dims, NULL);\n' + - 'mspace = H5Screate_simple(ndims, count, NULL);\n' + - 'H5Sselect_hyperslab(wspace, H5S_SELECT_SET, offset, NULL, count, NULL);\n') - new_stat_output_txt += ('H5Dwrite(Cdset, {0}, mspace, wspace, H5P_DEFAULT, {1});\n' + - 'H5Dclose(Cdset);\n').format(data_type, data_buffer) - if close_spaces: - new_stat_output_txt += ('H5Sclose(mspace);\n' + - 'H5Sclose(wspace);\n') - return new_stat_output_txt - def write_fluid_stats(self): - self.fluid_includes += '#include <cmath>\n' - self.fluid_includes += '#include "fftw_tools.hpp"\n' - self.stat_src += """ - //begincpp - hid_t stat_group; - if (myrank == 0) - stat_group = H5Gopen(stat_file, "statistics", H5P_DEFAULT); - fs->compute_velocity(fs->cvorticity); - std::vector<double> max_estimate_vector; - max_estimate_vector.resize(4); - *tmp_vec_field = fs->cvelocity; - switch(fs->dealias_type) - { - case 0: - tmp_vec_field->compute_stats( - kk_two_thirds, - stat_group, - "velocity", - fs->iteration / niter_stat, - max_velocity_estimate/sqrt(3)); - break; - case 1: - tmp_vec_field->compute_stats( - kk_smooth, - stat_group, - "velocity", - fs->iteration / niter_stat, - max_velocity_estimate/sqrt(3)); - break; - } - //endcpp - """ - if self.Lag_acc_stats_on: - self.stat_src += """ - //begincpp - tmp_vec_field->real_space_representation = false; - fs->compute_Lagrangian_acceleration(tmp_vec_field->get_cdata()); - switch(fs->dealias_type) - { - case 0: - tmp_vec_field->compute_stats( - kk_two_thirds, - stat_group, - "Lagrangian_acceleration", - fs->iteration / niter_stat, - max_Lag_acc_estimate); - break; - case 1: - tmp_vec_field->compute_stats( - kk_smooth, - stat_group, - "Lagrangian_acceleration", - fs->iteration / niter_stat, - max_Lag_acc_estimate); - break; - } - tmp_scal_field->real_space_representation = false; - fs->compute_velocity(fs->cvorticity); - fs->ift_velocity(); - fs->compute_pressure(tmp_scal_field->get_cdata()); - switch(fs->dealias_type) - { - case 0: - tmp_scal_field->compute_stats( - kk_two_thirds, - stat_group, - "pressure", - fs->iteration / niter_stat, - max_pressure_estimate); - break; - case 1: - tmp_scal_field->compute_stats( - kk_smooth, - stat_group, - "pressure", - fs->iteration / niter_stat, - max_pressure_estimate); - break; - } - //endcpp - """ - self.stat_src += """ - //begincpp - *tmp_vec_field = fs->cvorticity; - switch(fs->dealias_type) - { - case 0: - tmp_vec_field->compute_stats( - kk_two_thirds, - stat_group, - "vorticity", - fs->iteration / niter_stat, - max_vorticity_estimate/sqrt(3)); - break; - case 1: - tmp_vec_field->compute_stats( - kk_smooth, - stat_group, - "vorticity", - fs->iteration / niter_stat, - max_vorticity_estimate/sqrt(3)); - break; - } - //endcpp - """ - if self.QR_stats_on: - self.stat_src += """ - //begincpp - double *trS2_Q_R_moments = new double[10*3]; - double *gradu_moments = new double[10*9]; - ptrdiff_t *hist_trS2_Q_R = new ptrdiff_t[histogram_bins*3]; - ptrdiff_t *hist_gradu = new ptrdiff_t[histogram_bins*9]; - ptrdiff_t *hist_QR2D = new ptrdiff_t[QR2D_histogram_bins*QR2D_histogram_bins]; - double trS2QR_max_estimates[3]; - double gradu_max_estimates[9]; - trS2QR_max_estimates[0] = max_trS2_estimate; - trS2QR_max_estimates[1] = max_Q_estimate; - trS2QR_max_estimates[2] = max_R_estimate; - std::fill_n(gradu_max_estimates, 9, sqrt(3*max_trS2_estimate)); - fs->compute_gradient_statistics( - fs->cvelocity, - gradu_moments, - trS2_Q_R_moments, - hist_gradu, - hist_trS2_Q_R, - hist_QR2D, - trS2QR_max_estimates, - gradu_max_estimates, - histogram_bins, - QR2D_histogram_bins); - //endcpp - """ - self.stat_src += """ - //begincpp - if (myrank == 0) - H5Gclose(stat_group); - if (fs->cd->myrank == 0) - {{ - hid_t Cdset, wspace, mspace; - int ndims; - hsize_t count[4], offset[4], dims[4]; - offset[0] = fs->iteration/niter_stat; - offset[1] = 0; - offset[2] = 0; - offset[3] = 0; - //endcpp - """.format(self.C_dtype) - if self.dtype == np.float32: - field_H5T = 'H5T_NATIVE_FLOAT' - elif self.dtype == np.float64: - field_H5T = 'H5T_NATIVE_DOUBLE' - if self.QR_stats_on: - self.stat_src += self.create_stat_output( - '/statistics/moments/trS2_Q_R', - 'trS2_Q_R_moments', - size_setup =""" - count[0] = 1; - count[1] = 10; - count[2] = 3; - """) - self.stat_src += self.create_stat_output( - '/statistics/moments/velocity_gradient', - 'gradu_moments', - size_setup =""" - count[0] = 1; - count[1] = 10; - count[2] = 3; - count[3] = 3; - """) - self.stat_src += self.create_stat_output( - '/statistics/histograms/trS2_Q_R', - 'hist_trS2_Q_R', - data_type = 'H5T_NATIVE_INT64', - size_setup = """ - count[0] = 1; - count[1] = histogram_bins; - count[2] = 3; - """) - self.stat_src += self.create_stat_output( - '/statistics/histograms/velocity_gradient', - 'hist_gradu', - data_type = 'H5T_NATIVE_INT64', - size_setup = """ - count[0] = 1; - count[1] = histogram_bins; - count[2] = 3; - count[3] = 3; - """) - self.stat_src += self.create_stat_output( - '/statistics/histograms/QR2D', - 'hist_QR2D', - data_type = 'H5T_NATIVE_INT64', - size_setup = """ - count[0] = 1; - count[1] = QR2D_histogram_bins; - count[2] = QR2D_histogram_bins; - """) - self.stat_src += '}\n' - if self.QR_stats_on: - self.stat_src += """ - //begincpp - delete[] trS2_Q_R_moments; - delete[] gradu_moments; - delete[] hist_trS2_Q_R; - delete[] hist_gradu; - delete[] hist_QR2D; - //endcpp - """ - return None - def fill_up_fluid_code(self): - self.fluid_includes += '#include <cstring>\n' - self.fluid_variables += ( - 'fluid_solver<{0}> *fs;\n'.format(self.C_dtype) + - 'field<{0}, FFTW, THREE> *tmp_vec_field;\n'.format(self.C_dtype) + - 'field<{0}, FFTW, ONE> *tmp_scal_field;\n'.format(self.C_dtype) + - 'kspace<FFTW, SMOOTH> *kk_smooth;\n' + - 'kspace<FFTW, TWO_THIRDS> *kk_two_thirds;\n') - self.fluid_definitions += """ - typedef struct {{ - {0} re; - {0} im; - }} tmp_complex_type; - """.format(self.C_dtype) - self.write_fluid_stats() - if self.dtype == np.float32: - field_H5T = 'H5T_NATIVE_FLOAT' - elif self.dtype == np.float64: - field_H5T = 'H5T_NATIVE_DOUBLE' - self.fluid_start += """ - //begincpp - char fname[512]; - fs = new fluid_solver<{0}>( - simname, - nx, ny, nz, - dkx, dky, dkz, - dealias_type, - {1}); - tmp_vec_field = new field<{0}, FFTW, THREE>( - nx, ny, nz, - MPI_COMM_WORLD, - {1}); - tmp_scal_field = new field<{0}, FFTW, ONE>( - nx, ny, nz, - MPI_COMM_WORLD, - {1}); - kk_smooth = new kspace<FFTW, SMOOTH>( - tmp_vec_field->clayout, - fs->dkx, fs->dky, fs->dkz); - kk_two_thirds = new kspace<FFTW, TWO_THIRDS>( - tmp_vec_field->clayout, - fs->dkx, fs->dky, fs->dkz); - fs->nu = nu; - fs->fmode = fmode; - fs->famplitude = famplitude; - fs->fk0 = fk0; - fs->fk1 = fk1; - strncpy(fs->forcing_type, forcing_type, 128); - fs->iteration = iteration; - fs->read('v', 'c'); - //endcpp - """.format(self.C_dtype, self.fftw_plan_rigor, field_H5T) - self.fluid_start += self.store_kspace - if not self.frozen_fields: - self.fluid_loop = 'fs->step(dt);\n' - else: - self.fluid_loop = '' - self.fluid_loop += ('if (fs->iteration % niter_out == 0)\n{\n' + - self.fluid_output + '\n}\n') - self.fluid_end = ('if (fs->iteration % niter_out != 0)\n{\n' + - self.fluid_output + '\n}\n' + - 'delete fs;\n' + - 'delete tmp_vec_field;\n' + - 'delete tmp_scal_field;\n' + - 'delete kk_smooth;\n' + - 'delete kk_two_thirds;\n') - return None - def add_3D_rFFTW_field( - self, - name = 'rFFTW_acc'): - if self.dtype == np.float32: - FFTW = 'fftwf' - elif self.dtype == np.float64: - FFTW = 'fftw' - self.fluid_variables += '{0} *{1};\n'.format(self.C_dtype, name) - self.fluid_start += '{0} = {1}_alloc_real(2*fs->cd->local_size);\n'.format(name, FFTW) - self.fluid_end += '{0}_free({1});\n'.format(FFTW, name) - return None - def add_interpolator( - self, - interp_type = 'spline', - neighbours = 1, - smoothness = 1, - name = 'field_interpolator', - field_name = 'fs->rvelocity', - class_name = 'rFFTW_interpolator'): - self.fluid_includes += '#include "{0}.hpp"\n'.format(class_name) - self.fluid_variables += '{0} <{1}, {2}> *{3};\n'.format( - class_name, self.C_dtype, neighbours, name) - self.parameters[name + '_type'] = interp_type - self.parameters[name + '_neighbours'] = neighbours - if interp_type == 'spline': - self.parameters[name + '_smoothness'] = smoothness - beta_name = 'beta_n{0}_m{1}'.format(neighbours, smoothness) - elif interp_type == 'Lagrange': - beta_name = 'beta_Lagrange_n{0}'.format(neighbours) - self.fluid_start += '{0} = new {1}<{2}, {3}>(fs, {4}, {5});\n'.format( - name, - class_name, - self.C_dtype, - neighbours, - beta_name, - field_name) - self.fluid_end += 'delete {0};\n'.format(name) - return None - def add_particles( - self, - integration_steps = 2, - kcut = None, - interpolator = 'field_interpolator', - frozen_particles = False, - acc_name = None, - class_name = 'particles'): - """Adds code for tracking a series of particle species, each - consisting of `nparticles` particles. - - :type integration_steps: int, list of int - :type kcut: None (default), str, list of str - :type interpolator: str, list of str - :type frozen_particles: bool - :type acc_name: str - - .. warning:: if not None, kcut must be a list of decreasing - wavenumbers, since filtering is done sequentially - on the same complex FFTW field. - """ - if self.dtype == np.float32: - FFTW = 'fftwf' - elif self.dtype == np.float64: - FFTW = 'fftw' - s0 = self.particle_species - if type(integration_steps) == int: - integration_steps = [integration_steps] - if type(kcut) == str: - kcut = [kcut] - if type(interpolator) == str: - interpolator = [interpolator] - nspecies = max(len(integration_steps), len(interpolator)) - if type(kcut) == list: - nspecies = max(nspecies, len(kcut)) - if len(integration_steps) == 1: - integration_steps = [integration_steps[0] for s in range(nspecies)] - if len(interpolator) == 1: - interpolator = [interpolator[0] for s in range(nspecies)] - if type(kcut) == list: - if len(kcut) == 1: - kcut = [kcut[0] for s in range(nspecies)] - assert(len(integration_steps) == nspecies) - assert(len(interpolator) == nspecies) - if type(kcut) == list: - assert(len(kcut) == nspecies) - for s in range(nspecies): - neighbours = self.parameters[interpolator[s] + '_neighbours'] - if type(kcut) == list: - self.parameters['tracers{0}_kcut'.format(s0 + s)] = kcut[s] - self.parameters['tracers{0}_interpolator'.format(s0 + s)] = interpolator[s] - self.parameters['tracers{0}_acc_on'.format(s0 + s)] = int(not type(acc_name) == type(None)) - self.parameters['tracers{0}_integration_steps'.format(s0 + s)] = integration_steps[s] - self.file_datasets_grow += """ - //begincpp - group = H5Gopen(particle_file, "/tracers{0}", H5P_DEFAULT); - grow_particle_datasets(group, "", NULL, NULL); - H5Gclose(group); - //endcpp - """.format(s0 + s) - - #### code that outputs statistics - output_vel_acc = '{\n' - # array for putting sampled velocity in - # must compute velocity, just in case it was messed up by some - # other particle species before the stats - output_vel_acc += 'fs->compute_velocity(fs->cvorticity);\n' - if not type(kcut) == list: - output_vel_acc += 'fs->ift_velocity();\n' - if not type(acc_name) == type(None): - # array for putting sampled acceleration in - # must compute acceleration - output_vel_acc += 'fs->compute_Lagrangian_acceleration({0});\n'.format(acc_name) - for s in range(nspecies): - if type(kcut) == list: - output_vel_acc += 'fs->low_pass_Fourier(fs->cvelocity, 3, {0});\n'.format(kcut[s]) - output_vel_acc += 'fs->ift_velocity();\n' - output_vel_acc += """ - {0}->read_rFFTW(fs->rvelocity); - ps{1}->sample({0}, "velocity"); - """.format(interpolator[s], s0 + s) - if not type(acc_name) == type(None): - output_vel_acc += """ - {0}->read_rFFTW({1}); - ps{2}->sample({0}, "acceleration"); - """.format(interpolator[s], acc_name, s0 + s) - output_vel_acc += '}\n' - - #### initialize, stepping and finalize code - if not type(kcut) == list: - update_fields = ('fs->compute_velocity(fs->cvorticity);\n' + - 'fs->ift_velocity();\n') - self.particle_start += update_fields - self.particle_loop += update_fields - else: - self.particle_loop += 'fs->compute_velocity(fs->cvorticity);\n' - self.particle_includes += '#include "{0}.hpp"\n'.format(class_name) - self.particle_stat_src += ( - 'if (ps0->iteration % niter_part == 0)\n' + - '{\n') - for s in range(nspecies): - neighbours = self.parameters[interpolator[s] + '_neighbours'] - self.particle_start += 'sprintf(fname, "tracers{0}");\n'.format(s0 + s) - self.particle_end += ('ps{0}->write();\n' + - 'delete ps{0};\n').format(s0 + s) - self.particle_variables += '{0}<VELOCITY_TRACER, {1}, {2}> *ps{3};\n'.format( - class_name, - self.C_dtype, - neighbours, - s0 + s) - self.particle_start += ('ps{0} = new {1}<VELOCITY_TRACER, {2}, {3}>(\n' + - 'fname, particle_file, {4},\n' + - 'niter_part, tracers{0}_integration_steps);\n').format( - s0 + s, - class_name, - self.C_dtype, - neighbours, - interpolator[s]) - self.particle_start += ('ps{0}->dt = dt;\n' + - 'ps{0}->iteration = iteration;\n' + - 'ps{0}->read();\n').format(s0 + s) - if not frozen_particles: - if type(kcut) == list: - update_field = ('fs->low_pass_Fourier(fs->cvelocity, 3, {0});\n'.format(kcut[s]) + - 'fs->ift_velocity();\n') - self.particle_loop += update_field - self.particle_loop += '{0}->read_rFFTW(fs->rvelocity);\n'.format(interpolator[s]) - self.particle_loop += 'ps{0}->step();\n'.format(s0 + s) - self.particle_stat_src += 'ps{0}->write(false);\n'.format(s0 + s) - self.particle_stat_src += output_vel_acc - self.particle_stat_src += '}\n' - self.particle_species += nspecies - return None - def get_cache_file_name(self): - return os.path.join(self.work_dir, self.simname + '_cache.h5') - def get_cache_file(self): - return h5py.File(self.get_postprocess_file_name(), 'r') - def get_postprocess_file_name(self): - return self.get_cache_file_name() - def get_postprocess_file(self): - return h5py.File(self.get_postprocess_file_name(), 'r') - def compute_statistics(self, iter0 = 0, iter1 = None): - """Run basic postprocessing on raw data. - The energy spectrum :math:`E(t, k)` and the enstrophy spectrum - :math:`\\frac{1}{2}\omega^2(t, k)` are computed from the - - .. math:: - - \sum_{k \\leq \\|\\mathbf{k}\\| \\leq k+dk}\\hat{u_i} \\hat{u_j}^*, \\hskip .5cm - \sum_{k \\leq \\|\\mathbf{k}\\| \\leq k+dk}\\hat{\omega_i} \\hat{\\omega_j}^* - - tensors, and the enstrophy spectrum is also used to - compute the dissipation :math:`\\varepsilon(t)`. - These basic quantities are stored in a newly created HDF5 file, - ``simname_cache.h5``. - """ - if len(list(self.statistics.keys())) > 0: - return None - if not os.path.exists(self.get_data_file_name()): - if os.path.exists(self.get_cache_file_name()): - self.read_parameters(fname = self.get_cache_file_name()) - with self.get_cache_file() as pp_file: - for k in ['t', - 'energy(t)', - 'energy(k)', - 'enstrophy(t)', - 'enstrophy(k)', - 'R_ij(t)', - 'vel_max(t)', - 'renergy(t)']: - if k in pp_file.keys(): - self.statistics[k] = pp_file[k].value - self.statistics['kM'] = pp_file['kspace/kM'].value - self.statistics['dk'] = pp_file['kspace/dk'].value - self.statistics['kshell'] = pp_file['kspace/kshell'].value - self.statistics['nshell'] = pp_file['kspace/nshell'].value - else: - self.read_parameters() - with self.get_data_file() as data_file: - if 'moments' not in data_file['statistics'].keys(): - return None - iter0 = min((data_file['statistics/moments/velocity'].shape[0] * - self.parameters['niter_stat']-1), - iter0) - if type(iter1) == type(None): - iter1 = data_file['iteration'].value - else: - iter1 = min(data_file['iteration'].value, iter1) - ii0 = iter0 // self.parameters['niter_stat'] - ii1 = iter1 // self.parameters['niter_stat'] - self.statistics['kshell'] = data_file['kspace/kshell'].value - self.statistics['nshell'] = data_file['kspace/nshell'].value - for kk in [-1, -2]: - if (self.statistics['kshell'][kk] == 0): - self.statistics['kshell'][kk] = np.nan - self.statistics['kM'] = data_file['kspace/kM'].value - self.statistics['dk'] = data_file['kspace/dk'].value - computation_needed = True - pp_file = h5py.File(self.get_postprocess_file_name(), 'a') - if not ('parameters' in pp_file.keys()): - data_file.copy('parameters', pp_file) - data_file.copy('kspace', pp_file) - if 'ii0' in pp_file.keys(): - computation_needed = not (ii0 == pp_file['ii0'].value and - ii1 == pp_file['ii1'].value) - if computation_needed: - for k in ['t', 'vel_max(t)', 'renergy(t)', - 'energy(t)', 'enstrophy(t)', - 'energy(k)', 'enstrophy(k)', - 'energy(t, k)', - 'enstrophy(t, k)', - 'R_ij(t)', - 'ii0', 'ii1', 'iter0', 'iter1']: - if k in pp_file.keys(): - del pp_file[k] - if computation_needed: - pp_file['iter0'] = iter0 - pp_file['iter1'] = iter1 - pp_file['ii0'] = ii0 - pp_file['ii1'] = ii1 - pp_file['t'] = (self.parameters['dt']* - self.parameters['niter_stat']* - (np.arange(ii0, ii1+1).astype(np.float))) - phi_ij = data_file['statistics/spectra/velocity_velocity'][ii0:ii1+1] - pp_file['R_ij(t)'] = np.sum(phi_ij, axis = 1) - energy_tk = ( - phi_ij[:, :, 0, 0] + - phi_ij[:, :, 1, 1] + - phi_ij[:, :, 2, 2])/2 - pp_file['energy(t)'] = np.sum(energy_tk, axis = 1) - pp_file['energy(k)'] = np.mean(energy_tk, axis = 0)*(4*np.pi*self.statistics['kshell']**2) / (self.statistics['dk']*self.statistics['nshell']) - enstrophy_tk = ( - data_file['statistics/spectra/vorticity_vorticity'][ii0:ii1+1, :, 0, 0] + - data_file['statistics/spectra/vorticity_vorticity'][ii0:ii1+1, :, 1, 1] + - data_file['statistics/spectra/vorticity_vorticity'][ii0:ii1+1, :, 2, 2])/2 - pp_file['enstrophy(t)'] = np.sum(enstrophy_tk, axis = 1) - pp_file['enstrophy(k)'] = np.mean(enstrophy_tk, axis = 0)*(4*np.pi*self.statistics['kshell']**2) / (self.statistics['dk']*self.statistics['nshell']) - pp_file['vel_max(t)'] = data_file['statistics/moments/velocity'][ii0:ii1+1, 9, 3] - pp_file['renergy(t)'] = data_file['statistics/moments/velocity'][ii0:ii1+1, 2, 3]/2 - if 'trS2_Q_R' in data_file['statistics/moments'].keys(): - pp_file['mean_trS2(t)'] = data_file['statistics/moments/trS2_Q_R'][:, 1, 0] - for k in ['t', - 'energy(t)', - 'energy(k)', - 'enstrophy(t)', - 'enstrophy(k)', - 'R_ij(t)', - 'vel_max(t)', - 'renergy(t)', - 'mean_trS2(t)']: - if k in pp_file.keys(): - self.statistics[k] = pp_file[k].value - # sanity check --- Parseval theorem check - assert(np.max(np.abs( - self.statistics['renergy(t)'] - - self.statistics['energy(t)']) / self.statistics['energy(t)']) < 1e-5) - self.compute_time_averages() - return None - def compute_Reynolds_stress_invariants( - self): - Rij = self.statistics['R_ij(t)'] - Rij /= (2*self.statistics['energy(t)'][:, None, None]) - Rij[:, 0, 0] -= 1./3 - Rij[:, 1, 1] -= 1./3 - Rij[:, 2, 2] -= 1./3 - self.statistics['I2(t)'] = np.sqrt(np.einsum('...ij,...ij', Rij, Rij, optimize = True) / 6) - self.statistics['I3(t)'] = np.cbrt(np.einsum('...ij,...jk,...ki', Rij, Rij, Rij, optimize = True) / 6) - return None - def compute_time_averages(self): - """Compute easy stats. - - Further computation of statistics based on the contents of - ``simname_cache.h5``. - Standard quantities are as follows - (consistent with [Ishihara]_): - - .. math:: - - U_{\\textrm{int}}(t) = \\sqrt{\\frac{2E(t)}{3}}, \\hskip .5cm - L_{\\textrm{int}} = \\frac{\pi}{2U_{int}^2} \\int \\frac{dk}{k} E(k), \\hskip .5cm - T_{\\textrm{int}} = - \\frac{L_{\\textrm{int}}}{U_{\\textrm{int}}} - - \\eta_K = \\left(\\frac{\\nu^3}{\\varepsilon}\\right)^{1/4}, \\hskip .5cm - \\tau_K = \\left(\\frac{\\nu}{\\varepsilon}\\right)^{1/2}, \\hskip .5cm - \\lambda = \\sqrt{\\frac{15 \\nu U_{\\textrm{int}}^2}{\\varepsilon}} - - Re = \\frac{U_{\\textrm{int}} L_{\\textrm{int}}}{\\nu}, \\hskip - .5cm - R_{\\lambda} = \\frac{U_{\\textrm{int}} \\lambda}{\\nu} - - .. [Ishihara] T. Ishihara et al, - *Small-scale statistics in high-resolution direct numerical - simulation of turbulence: Reynolds number dependence of - one-point velocity gradient statistics*. - J. Fluid Mech., - **592**, 335-366, 2007 - """ - self.statistics['Uint(t)'] = np.sqrt(2*self.statistics['energy(t)'] / 3) - for key in ['energy', - 'enstrophy', - 'mean_trS2', - 'Uint']: - if key + '(t)' in self.statistics.keys(): - self.statistics[key] = np.average(self.statistics[key + '(t)'], axis = 0) - self.statistics['vel_max'] = np.max(self.statistics['vel_max(t)']) - for suffix in ['', '(t)']: - self.statistics['diss' + suffix] = (self.parameters['nu'] * - self.statistics['enstrophy' + suffix]*2) - self.statistics['etaK' + suffix] = (self.parameters['nu']**3 / - self.statistics['diss' + suffix])**.25 - self.statistics['tauK' + suffix] = (self.parameters['nu'] / - self.statistics['diss' + suffix])**.5 - self.statistics['lambda' + suffix] = (15 * self.parameters['nu'] * - self.statistics['Uint' + suffix]**2 / - self.statistics['diss' + suffix])**.5 - self.statistics['Rlambda' + suffix] = (self.statistics['Uint' + suffix] * - self.statistics['lambda' + suffix] / - self.parameters['nu']) - self.statistics['kMeta' + suffix] = (self.statistics['kM'] * - self.statistics['etaK' + suffix]) - if self.parameters['dealias_type'] == 1: - self.statistics['kMeta' + suffix] *= 0.8 - self.statistics['Lint'] = ((np.pi / - (2*self.statistics['Uint']**2)) * - np.nansum(self.statistics['energy(k)'] / - self.statistics['kshell'])) - self.statistics['Re'] = (self.statistics['Uint'] * - self.statistics['Lint'] / - self.parameters['nu']) - self.statistics['Tint'] = self.statistics['Lint'] / self.statistics['Uint'] - self.statistics['Taylor_microscale'] = self.statistics['lambda'] - return None - def set_plt_style( - self, - style = {'dashes' : (None, None)}): - self.style.update(style) - return None - def read_cfield( - self, - field_name = 'vorticity', - iteration = 0): - """read the Fourier representation of a vector field. - - Read the binary file containing iteration ``iteration`` of the - field ``field_name``, and return it as a properly shaped - ``numpy.memmap`` object. - """ - return np.memmap( - os.path.join(self.work_dir, - self.simname + '_{0}_i{1:0>5x}'.format('c' + field_name, iteration)), - dtype = self.ctype, - mode = 'r', - shape = (self.parameters['ny'], - self.parameters['nz'], - self.parameters['nx']//2+1, - 3)) - def write_par( - self, - iter0 = 0, - particle_ic = None): - _fluid_particle_base.write_par(self, iter0 = iter0) - with h5py.File(self.get_data_file_name(), 'r+') as ofile: - kspace = self.get_kspace() - nshells = kspace['nshell'].shape[0] - vec_stat_datasets = ['velocity', 'vorticity'] - scal_stat_datasets = [] - for k in vec_stat_datasets: - time_chunk = 2**20 // ( - self.dtype.itemsize*3* - self.parameters['nx']*self.parameters['ny']) - time_chunk = max(time_chunk, 1) - ofile.create_dataset('statistics/0slices/' + k + '/real', - (1, self.parameters['ny'], self.parameters['nx'], 3), - chunks = (time_chunk, self.parameters['ny'], self.parameters['nx'], 3), - maxshape = (None, self.parameters['ny'], self.parameters['nx'], 3), - dtype = self.dtype) - if self.Lag_acc_stats_on: - vec_stat_datasets += ['Lagrangian_acceleration'] - scal_stat_datasets += ['pressure'] - for k in vec_stat_datasets: - time_chunk = 2**20//(8*3*3*nshells) - time_chunk = max(time_chunk, 1) - ofile.create_dataset('statistics/spectra/' + k + '_' + k, - (1, nshells, 3, 3), - chunks = (time_chunk, nshells, 3, 3), - maxshape = (None, nshells, 3, 3), - dtype = np.float64) - time_chunk = 2**20//(8*4*10) - time_chunk = max(time_chunk, 1) - a = ofile.create_dataset('statistics/moments/' + k, - (1, 10, 4), - chunks = (time_chunk, 10, 4), - maxshape = (None, 10, 4), - dtype = np.float64) - time_chunk = 2**20//(8*4*self.parameters['histogram_bins']) - time_chunk = max(time_chunk, 1) - ofile.create_dataset('statistics/histograms/' + k, - (1, - self.parameters['histogram_bins'], - 4), - chunks = (time_chunk, - self.parameters['histogram_bins'], - 4), - maxshape = (None, - self.parameters['histogram_bins'], - 4), - dtype = np.int64) - for k in scal_stat_datasets: - time_chunk = 2**20//(8*nshells) - time_chunk = max(time_chunk, 1) - ofile.create_dataset('statistics/spectra/' + k + '_' + k, - (1, nshells), - chunks = (time_chunk, nshells), - maxshape = (None, nshells), - dtype = np.float64) - time_chunk = 2**20//(8*10) - time_chunk = max(time_chunk, 1) - a = ofile.create_dataset('statistics/moments/' + k, - (1, 10), - chunks = (time_chunk, 10), - maxshape = (None, 10), - dtype = np.float64) - time_chunk = 2**20//(8*self.parameters['histogram_bins']) - time_chunk = max(time_chunk, 1) - ofile.create_dataset('statistics/histograms/' + k, - (1, - self.parameters['histogram_bins']), - chunks = (time_chunk, - self.parameters['histogram_bins']), - maxshape = (None, - self.parameters['histogram_bins']), - dtype = np.int64) - if self.QR_stats_on: - time_chunk = 2**20//(8*3*self.parameters['histogram_bins']) - time_chunk = max(time_chunk, 1) - ofile.create_dataset('statistics/histograms/trS2_Q_R', - (1, - self.parameters['histogram_bins'], - 3), - chunks = (time_chunk, - self.parameters['histogram_bins'], - 3), - maxshape = (None, - self.parameters['histogram_bins'], - 3), - dtype = np.int64) - time_chunk = 2**20//(8*9*self.parameters['histogram_bins']) - time_chunk = max(time_chunk, 1) - ofile.create_dataset('statistics/histograms/velocity_gradient', - (1, - self.parameters['histogram_bins'], - 3, - 3), - chunks = (time_chunk, - self.parameters['histogram_bins'], - 3, - 3), - maxshape = (None, - self.parameters['histogram_bins'], - 3, - 3), - dtype = np.int64) - time_chunk = 2**20//(8*3*10) - time_chunk = max(time_chunk, 1) - a = ofile.create_dataset('statistics/moments/trS2_Q_R', - (1, 10, 3), - chunks = (time_chunk, 10, 3), - maxshape = (None, 10, 3), - dtype = np.float64) - time_chunk = 2**20//(8*9*10) - time_chunk = max(time_chunk, 1) - a = ofile.create_dataset('statistics/moments/velocity_gradient', - (1, 10, 3, 3), - chunks = (time_chunk, 10, 3, 3), - maxshape = (None, 10, 3, 3), - dtype = np.float64) - time_chunk = 2**20//(8*self.parameters['QR2D_histogram_bins']**2) - time_chunk = max(time_chunk, 1) - ofile.create_dataset('statistics/histograms/QR2D', - (1, - self.parameters['QR2D_histogram_bins'], - self.parameters['QR2D_histogram_bins']), - chunks = (time_chunk, - self.parameters['QR2D_histogram_bins'], - self.parameters['QR2D_histogram_bins']), - maxshape = (None, - self.parameters['QR2D_histogram_bins'], - self.parameters['QR2D_histogram_bins']), - dtype = np.int64) - if self.particle_species == 0: - return None - - if type(particle_ic) == type(None): - pbase_shape = (self.parameters['nparticles'],) - number_of_particles = self.parameters['nparticles'] - else: - pbase_shape = particle_ic.shape[:-1] - assert(particle_ic.shape[-1] == 3) - if len(pbase_shape) == 1: - number_of_particles = pbase_shape[0] - else: - number_of_particles = 1 - for val in pbase_shape[1:]: - number_of_particles *= val - - with h5py.File(self.get_particle_file_name(), 'a') as ofile: - for s in range(self.particle_species): - ofile.create_group('tracers{0}'.format(s)) - time_chunk = 2**20 // (8*3*number_of_particles) - time_chunk = max(time_chunk, 1) - dims = ((1, - self.parameters['tracers{0}_integration_steps'.format(s)]) + - pbase_shape + (3,)) - maxshape = (h5py.h5s.UNLIMITED,) + dims[1:] - if len(pbase_shape) > 1: - chunks = (time_chunk, 1, 1) + dims[3:] - else: - chunks = (time_chunk, 1) + dims[2:] - bfps.tools.create_alloc_early_dataset( - ofile, - '/tracers{0}/rhs'.format(s), - dims, maxshape, chunks) - if len(pbase_shape) > 1: - chunks = (time_chunk, 1) + pbase_shape[1:] + (3,) - else: - chunks = (time_chunk, pbase_shape[0], 3) - bfps.tools.create_alloc_early_dataset( - ofile, - '/tracers{0}/state'.format(s), - (1,) + pbase_shape + (3,), - (h5py.h5s.UNLIMITED,) + pbase_shape + (3,), - chunks) - # "velocity" is sampled, single precision is enough - # for the results we are interested in. - bfps.tools.create_alloc_early_dataset( - ofile, - '/tracers{0}/velocity'.format(s), - (1,) + pbase_shape + (3,), - (h5py.h5s.UNLIMITED,) + pbase_shape + (3,), - chunks, - dset_dtype = h5py.h5t.IEEE_F32LE) - if self.parameters['tracers{0}_acc_on'.format(s)]: - bfps.tools.create_alloc_early_dataset( - ofile, - '/tracers{0}/acceleration'.format(s), - (1,) + pbase_shape + (3,), - (h5py.h5s.UNLIMITED,) + pbase_shape + (3,), - chunks, - dset_dtype = h5py.h5t.IEEE_F32LE) - return None - def add_particle_fields( - self, - interp_type = 'spline', - kcut = None, - neighbours = 1, - smoothness = 1, - name = 'particle_field', - field_class = 'rFFTW_interpolator', - acc_field_name = 'rFFTW_acc'): - self.fluid_includes += '#include "{0}.hpp"\n'.format(field_class) - self.fluid_variables += field_class + '<{0}, {1}> *vel_{2}, *acc_{2};\n'.format( - self.C_dtype, neighbours, name) - self.parameters[name + '_type'] = interp_type - self.parameters[name + '_neighbours'] = neighbours - if interp_type == 'spline': - self.parameters[name + '_smoothness'] = smoothness - beta_name = 'beta_n{0}_m{1}'.format(neighbours, smoothness) - elif interp_type == 'Lagrange': - beta_name = 'beta_Lagrange_n{0}'.format(neighbours) - if field_class == 'rFFTW_interpolator': - self.fluid_start += ('vel_{0} = new {1}<{2}, {3}>(fs, {4}, fs->rvelocity);\n' + - 'acc_{0} = new {1}<{2}, {3}>(fs, {4}, {5});\n').format(name, - field_class, - self.C_dtype, - neighbours, - beta_name, - acc_field_name) - elif field_class == 'interpolator': - self.fluid_start += ('vel_{0} = new {1}<{2}, {3}>(fs, {4});\n' + - 'acc_{0} = new {1}<{2}, {3}>(fs, {4});\n').format(name, - field_class, - self.C_dtype, - neighbours, - beta_name, - acc_field_name) - self.fluid_end += ('delete vel_{0};\n' + - 'delete acc_{0};\n').format(name) - update_fields = 'fs->compute_velocity(fs->cvorticity);\n' - if not type(kcut) == type(None): - update_fields += 'fs->low_pass_Fourier(fs->cvelocity, 3, {0});\n'.format(kcut) - update_fields += ('fs->ift_velocity();\n' + - 'fs->compute_Lagrangian_acceleration(acc_{0}->field);\n').format(name) - self.fluid_start += update_fields - self.fluid_loop += update_fields - return None - def specific_parser_arguments( - self, - parser): - _fluid_particle_base.specific_parser_arguments(self, parser) - parser.add_argument( - '--src-wd', - type = str, - dest = 'src_work_dir', - default = '') - parser.add_argument( - '--src-simname', - type = str, - dest = 'src_simname', - default = '') - parser.add_argument( - '--src-iteration', - type = int, - dest = 'src_iteration', - default = 0) - parser.add_argument( - '--njobs', - type = int, dest = 'njobs', - default = 1) - parser.add_argument( - '--QR-stats', - action = 'store_true', - dest = 'QR_stats', - help = 'add this option if you want to compute velocity gradient and QR stats') - parser.add_argument( - '--Lag-acc-stats', - action = 'store_true', - dest = 'Lag_acc_stats', - help = 'add this option if you want to compute Lagrangian acceleration statistics') - parser.add_argument( - '--kMeta', - type = float, - dest = 'kMeta', - default = 2.0) - parser.add_argument( - '--dtfactor', - type = float, - dest = 'dtfactor', - default = 0.5, - help = 'dt is computed as DTFACTOR / N') - parser.add_argument( - '--particle-rand-seed', - type = int, - dest = 'particle_rand_seed', - default = None) - parser.add_argument( - '--pclouds', - type = int, - dest = 'pclouds', - default = 1, - help = ('number of particle clouds. Particle "clouds" ' - 'consist of particles distributed according to ' - 'pcloud-type.')) - parser.add_argument( - '--pcloud-type', - choices = ['random-cube', - 'regular-cube'], - dest = 'pcloud_type', - default = 'random-cube') - parser.add_argument( - '--particle-cloud-size', - type = float, - dest = 'particle_cloud_size', - default = 2*np.pi) - parser.add_argument( - '--neighbours', - type = int, - dest = 'neighbours', - default = 1) - parser.add_argument( - '--smoothness', - type = int, - dest = 'smoothness', - default = 1) - return None - def prepare_launch( - self, - args = []): - """Set up reasonable parameters. - - With the default Lundgren forcing applied in the band [2, 4], - we can estimate the dissipation, therefore we can estimate - :math:`k_M \\eta_K` and constrain the viscosity. - Also, if velocity gradient statistics are computed, the - dissipation is used for estimating the bins of the QR histogram. - - In brief, the command line parameter :math:`k_M \\eta_K` is - used in the following formula for :math:`\\nu` (:math:`N` is the - number of real space grid points per coordinate): - - .. math:: - - \\nu = \\left(\\frac{2 k_M \\eta_K}{N} \\right)^{4/3} - - With this choice, the average dissipation :math:`\\varepsilon` - will be close to 0.4, and the integral scale velocity will be - close to 0.77, yielding the approximate value for the Taylor - microscale and corresponding Reynolds number: - - .. math:: - - \\lambda \\approx 4.75\\left(\\frac{2 k_M \\eta_K}{N} \\right)^{4/6}, \\hskip .5in - R_\\lambda \\approx 3.7 \\left(\\frac{N}{2 k_M \\eta_K} \\right)^{4/6} - - """ - opt = _code.prepare_launch(self, args = args) - self.QR_stats_on = opt.QR_stats - self.Lag_acc_stats_on = opt.Lag_acc_stats - self.parameters['nu'] = (opt.kMeta * 2 / opt.n)**(4./3) - self.parameters['dt'] = (opt.dtfactor / opt.n) - # custom famplitude for 288 and 576 - if opt.n == 288: - self.parameters['famplitude'] = 0.45 - elif opt.n == 576: - self.parameters['famplitude'] = 0.47 - if ((self.parameters['niter_todo'] % self.parameters['niter_out']) != 0): - self.parameters['niter_out'] = self.parameters['niter_todo'] - if self.QR_stats_on: - # max_Q_estimate and max_R_estimate are just used for the 2D pdf - # therefore I just want them to be small multiples of mean trS2 - # I'm already estimating the dissipation with kMeta... - meantrS2 = (opt.n//2 / opt.kMeta)**4 * self.parameters['nu']**2 - self.parameters['max_Q_estimate'] = meantrS2 - self.parameters['max_R_estimate'] = .4*meantrS2**1.5 - # add QR suffix to code name, since we now expect additional - # datasets in the .h5 file - self.name += '-QR' - if self.Lag_acc_stats_on: - self.name += '-Lag_acc' - if len(opt.src_work_dir) == 0: - opt.src_work_dir = os.path.realpath(opt.work_dir) - self.pars_from_namespace(opt) - return opt - def launch( - self, - args = [], - noparticles = False, - **kwargs): - opt = self.prepare_launch(args = args) - self.fill_up_fluid_code() - if noparticles: - opt.nparticles = 0 - elif type(opt.nparticles) == int: - if opt.nparticles > 0: - self.name += '-particles' - self.add_3D_rFFTW_field( - name = 'rFFTW_acc') - self.add_interpolator( - name = 'cubic_spline', - neighbours = opt.neighbours, - smoothness = opt.smoothness, - class_name = 'rFFTW_interpolator') - self.add_particles( - integration_steps = [4], - interpolator = 'cubic_spline', - acc_name = 'rFFTW_acc', - class_name = 'rFFTW_distributed_particles') - self.variables += 'hid_t particle_file;\n' - self.main_start += """ - if (myrank == 0) - { - // set caching parameters - hid_t fapl = H5Pcreate(H5P_FILE_ACCESS); - herr_t cache_err = H5Pset_cache(fapl, 0, 521, 134217728, 1.0); - DEBUG_MSG("when setting cache for particles I got %d\\n", cache_err); - sprintf(fname, "%s_particles.h5", simname); - particle_file = H5Fopen(fname, H5F_ACC_RDWR, fapl); - } - """ - self.main_end = ('if (myrank == 0)\n' + - '{\n' + - 'H5Fclose(particle_file);\n' + - '}\n') + self.main_end - self.finalize_code() - self.launch_jobs(opt = opt, **kwargs) - return None - def launch_jobs( - self, - opt = None, - particle_initial_condition = None): - if not os.path.exists(os.path.join(self.work_dir, self.simname + '.h5')): - if opt.pclouds > 1: - np.random.seed(opt.particle_rand_seed) - if opt.pcloud_type == 'random-cube': - particle_initial_condition = ( - np.random.random((opt.pclouds, 1, 3))*2*np.pi + - np.random.random((1, self.parameters['nparticles'], 3))*opt.particle_cloud_size) - elif opt.pcloud_type == 'regular-cube': - onedarray = np.linspace( - -opt.particle_cloud_size/2, - opt.particle_cloud_size/2, - self.parameters['nparticles']) - particle_initial_condition = np.zeros( - (opt.pclouds, - self.parameters['nparticles'], - self.parameters['nparticles'], - self.parameters['nparticles'], 3), - dtype = np.float64) - particle_initial_condition[:] = \ - np.random.random((opt.pclouds, 1, 1, 1, 3))*2*np.pi - particle_initial_condition[..., 0] += onedarray[None, None, None, :] - particle_initial_condition[..., 1] += onedarray[None, None, :, None] - particle_initial_condition[..., 2] += onedarray[None, :, None, None] - self.write_par( - particle_ic = particle_initial_condition) - if self.parameters['nparticles'] > 0: - data = self.generate_tracer_state( - species = 0, - rseed = opt.particle_rand_seed, - data = particle_initial_condition) - for s in range(1, self.particle_species): - self.generate_tracer_state(species = s, data = data) - init_condition_file = os.path.join( - self.work_dir, - self.simname + '_cvorticity_i{0:0>5x}'.format(0)) - if not os.path.exists(init_condition_file): - if len(opt.src_simname) > 0: - src_file = os.path.join( - os.path.realpath(opt.src_work_dir), - opt.src_simname + '_cvorticity_i{0:0>5x}'.format(opt.src_iteration)) - os.symlink(src_file, init_condition_file) - else: - self.generate_vector_field( - write_to_file = True, - spectra_slope = 2.0, - amplitude = 0.05) - self.run( - nb_processes = opt.nb_processes, - nb_threads_per_process = opt.nb_threads_per_process, - njobs = opt.njobs, - hours = opt.minutes // 60, - minutes = opt.minutes % 60, - no_submit = opt.no_submit) - return None - diff --git a/bfps/PP.py b/bfps/PP.py index 27a359287dca65f01f2a66eaaef1fe56c13862fc..5716a7fe793c71413b823e4aad10dc6886294ef4 100644 --- a/bfps/PP.py +++ b/bfps/PP.py @@ -118,6 +118,7 @@ class PP(_code): return None def generate_default_parameters(self): # these parameters are relevant for all PP classes + self.parameters['fftw_plan_rigor'] = 'FFTW_ESTIMATE' self.parameters['dealias_type'] = int(1) self.parameters['dkx'] = float(1.0) self.parameters['dky'] = float(1.0) diff --git a/bfps/TEST.py b/bfps/TEST.py index cd4d3e4a82874c53b9dff5134e1a1da0067a61a7..66b2b4aad6b308c78735005f55765bfa3d3eb98c 100644 --- a/bfps/TEST.py +++ b/bfps/TEST.py @@ -119,6 +119,7 @@ class TEST(_code): return None def generate_default_parameters(self): # these parameters are relevant for all TEST classes + self.parameters['fftw_plan_rigor'] = 'FFTW_ESTIMATE' self.parameters['dealias_type'] = int(1) self.parameters['dkx'] = float(1.0) self.parameters['dky'] = float(1.0) diff --git a/bfps/__init__.py b/bfps/__init__.py index 29dc62a16cafafa3361480ce0bc7904fc92bc521..babbc203cc6ea1f788458415147d4a48f3b328f1 100644 --- a/bfps/__init__.py +++ b/bfps/__init__.py @@ -49,8 +49,5 @@ from host_information import host_info from .DNS import DNS from .PP import PP from .TEST import TEST -from .FluidConvert import FluidConvert -from .NavierStokes import NavierStokes -from .NSVorticityEquation import NSVorticityEquation #import test diff --git a/bfps/__main__.py b/bfps/__main__.py index 03f68a971351ea8eb23eb6621c597e660dc7c825..16a7cf7d099c49a39368a8ff09cb05bf890feb6f 100644 --- a/bfps/__main__.py +++ b/bfps/__main__.py @@ -31,10 +31,6 @@ import bfps from .DNS import DNS from .PP import PP from .TEST import TEST -from .NavierStokes import NavierStokes -from .NSVorticityEquation import NSVorticityEquation -from .FluidConvert import FluidConvert -from .NSManyParticles import NSManyParticles def main(): parser = argparse.ArgumentParser(prog = 'bfps') @@ -42,29 +38,9 @@ def main(): '-v', '--version', action = 'version', version = '%(prog)s ' + bfps.__version__) - NSoptions = ['NavierStokes', - 'NavierStokes-single', - 'NavierStokes-double', - 'NS', - 'NS-single', - 'NS-double'] - NSVEoptions = ['NSVorticityEquation', - 'NSVorticityEquation-single', - 'NSVorticityEquation-double', - 'NSVE', - 'NSVE-single', - 'NSVE-double'] - FCoptions = ['FluidConvert'] - NSMPopt = ['NSManyParticles', - 'NSManyParticles-single', - 'NSManyParticles-double'] parser.add_argument( 'base_class', - choices = ['DNS', 'PP', 'TEST'] + - NSoptions + - NSVEoptions + - FCoptions + - NSMPopt, + choices = ['DNS', 'PP', 'TEST'], type = str) # first option is the choice of base class or -h or -v # all other options are passed on to the base_class instance @@ -73,29 +49,10 @@ def main(): # cannot be executed by mistake. if opt.base_class == 'DNS': c = DNS() - c.launch(args = sys.argv[2:]) - return None if opt.base_class == 'PP': c = PP() - c.launch(args = sys.argv[2:]) - return None if opt.base_class == 'TEST': c = TEST() - c.launch(args = sys.argv[2:]) - return None - if 'double' in opt.base_class: - precision = 'double' - else: - precision = 'single' - if opt.base_class in NSoptions: - base_class = NavierStokes - if opt.base_class in NSVEoptions: - base_class = NSVorticityEquation - elif opt.base_class in FCoptions: - base_class = FluidConvert - elif opt.base_class in NSMPopt: - base_class = NSManyParticles - c = base_class(fluid_precision = precision) c.launch(args = sys.argv[2:]) return None diff --git a/bfps/_code.py b/bfps/_code.py index fed603e3da2b6fe7e32a1dd398d59cdd38a5b49b..143ef29a5d4fc9e3da1c9c00b8e4df915b532beb 100644 --- a/bfps/_code.py +++ b/bfps/_code.py @@ -443,7 +443,7 @@ class _code(_base): script_file.write('mpiexec.hydra ' + ' -np {} '.format(nb_mpi_processes) + ' -ppn {} '.format(nb_processes_per_node) - + ' -ordered-output -prepend-rank ' + #+ ' -ordered-output -prepend-rank ' + os.path.join( self.work_dir, command_atoms[0]) + diff --git a/bfps/_fluid_base.py b/bfps/_fluid_base.py deleted file mode 100644 index 757e6cb81e6c605cbcb3c2e9d19bd7487add115f..0000000000000000000000000000000000000000 --- a/bfps/_fluid_base.py +++ /dev/null @@ -1,503 +0,0 @@ -####################################################################### -# # -# Copyright 2015 Max Planck Institute # -# for Dynamics and Self-Organization # -# # -# This file is part of bfps. # -# # -# bfps is free software: you can redistribute it and/or modify # -# it under the terms of the GNU General Public License as published # -# by the Free Software Foundation, either version 3 of the License, # -# or (at your option) any later version. # -# # -# bfps is distributed in the hope that it will be useful, # -# but WITHOUT ANY WARRANTY; without even the implied warranty of # -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # -# GNU General Public License for more details. # -# # -# You should have received a copy of the GNU General Public License # -# along with bfps. If not, see <http://www.gnu.org/licenses/> # -# # -# Contact: Cristian.Lalescu@ds.mpg.de # -# # -####################################################################### - - - -from ._code import _code -from bfps import tools - -import os -import numpy as np -import h5py - -class _fluid_particle_base(_code): - """This class is meant to put together all common code between the - different C++ solvers/postprocessing tools, so that development of - specific functionalities is not overwhelming. - """ - def __init__( - self, - name = 'solver', - work_dir = './', - simname = 'test', - dtype = np.float32, - use_fftw_wisdom = True): - _code.__init__( - self, - work_dir = work_dir, - simname = simname) - self.use_fftw_wisdom = use_fftw_wisdom - self.name = name - self.particle_species = 0 - if dtype in [np.float32, np.float64]: - self.dtype = dtype - elif dtype in ['single', 'double']: - if dtype == 'single': - self.dtype = np.dtype(np.float32) - elif dtype == 'double': - self.dtype = np.dtype(np.float64) - self.rtype = self.dtype - if self.rtype == np.float32: - self.ctype = np.dtype(np.complex64) - self.C_dtype = 'float' - elif self.rtype == np.float64: - self.ctype = np.dtype(np.complex128) - self.C_dtype = 'double' - self.parameters['dealias_type'] = 1 - self.parameters['dkx'] = 1.0 - self.parameters['dky'] = 1.0 - self.parameters['dkz'] = 1.0 - self.parameters['niter_todo'] = 8 - self.parameters['niter_part'] = 1 - self.parameters['niter_stat'] = 1 - self.parameters['niter_out'] = 1024 - self.parameters['nparticles'] = 0 - self.parameters['dt'] = 0.01 - self.fluid_includes = '#include "fluid_solver.hpp"\n' - self.fluid_includes = '#include "field.hpp"\n' - self.fluid_variables = '' - self.fluid_definitions = '' - self.fluid_start = '' - self.fluid_loop = '' - self.fluid_end = '' - self.fluid_output = '' - self.stat_src = '' - self.particle_includes = '' - self.particle_variables = '' - self.particle_definitions = '' - self.particle_start = '' - self.particle_loop = '' - self.particle_output = '' - self.particle_end = '' - self.particle_stat_src = '' - self.file_datasets_grow = '' - self.store_kspace = """ - //begincpp - if (myrank == 0 && iteration == 0) - { - TIMEZONE("fuild_base::store_kspace"); - hsize_t dims[4]; - hid_t space, dset; - // store kspace information - hid_t parameter_file = stat_file; - //char fname[256]; - //sprintf(fname, "%s.h5", simname); - //parameter_file = H5Fopen(fname, H5F_ACC_RDWR, H5P_DEFAULT); - dset = H5Dopen(parameter_file, "/kspace/kshell", H5P_DEFAULT); - space = H5Dget_space(dset); - H5Sget_simple_extent_dims(space, dims, NULL); - H5Sclose(space); - if (fs->nshells != dims[0]) - { - DEBUG_MSG( - "ERROR: computed nshells %d not equal to data file nshells %d\\n", - fs->nshells, dims[0]); - } - H5Dwrite(dset, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, fs->kshell); - H5Dclose(dset); - dset = H5Dopen(parameter_file, "/kspace/nshell", H5P_DEFAULT); - H5Dwrite(dset, H5T_NATIVE_INT64, H5S_ALL, H5S_ALL, H5P_DEFAULT, fs->nshell); - H5Dclose(dset); - dset = H5Dopen(parameter_file, "/kspace/kM", H5P_DEFAULT); - H5Dwrite(dset, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, &fs->kMspec); - H5Dclose(dset); - dset = H5Dopen(parameter_file, "/kspace/dk", H5P_DEFAULT); - H5Dwrite(dset, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, &fs->dk); - H5Dclose(dset); - //H5Fclose(parameter_file); - } - //endcpp - """ - return None - def get_data_file_name(self): - return os.path.join(self.work_dir, self.simname + '.h5') - def get_data_file(self): - return h5py.File(self.get_data_file_name(), 'r') - def get_particle_file_name(self): - return os.path.join(self.work_dir, self.simname + '_particles.h5') - def get_particle_file(self): - return h5py.File(self.get_particle_file_name(), 'r') - def finalize_code( - self, - postprocess_mode = False): - self.includes += self.fluid_includes - self.includes += '#include <ctime>\n' - self.variables += self.fluid_variables - self.definitions += ('int grow_single_dataset(hid_t dset, int tincrement)\n{\n' + - 'int ndims;\n' + - 'hsize_t space;\n' + - 'space = H5Dget_space(dset);\n' + - 'ndims = H5Sget_simple_extent_ndims(space);\n' + - 'hsize_t *dims = new hsize_t[ndims];\n' + - 'H5Sget_simple_extent_dims(space, dims, NULL);\n' + - 'dims[0] += tincrement;\n' + - 'H5Dset_extent(dset, dims);\n' + - 'H5Sclose(space);\n' + - 'delete[] dims;\n' + - 'return EXIT_SUCCESS;\n}\n') - self.definitions+= self.fluid_definitions - if self.particle_species > 0: - self.includes += self.particle_includes - self.variables += self.particle_variables - self.definitions += self.particle_definitions - self.definitions += ('herr_t grow_statistics_dataset(hid_t o_id, const char *name, const H5O_info_t *info, void *op_data)\n{\n' + - 'if (info->type == H5O_TYPE_DATASET)\n{\n' + - 'hsize_t dset = H5Dopen(o_id, name, H5P_DEFAULT);\n' + - 'grow_single_dataset(dset, niter_todo/niter_stat);\n' - 'H5Dclose(dset);\n}\n' + - 'return 0;\n}\n') - self.definitions += ('herr_t grow_particle_datasets(hid_t g_id, const char *name, const H5L_info_t *info, void *op_data)\n{\n' + - 'hsize_t dset;\n') - for key in ['state', 'velocity', 'acceleration']: - self.definitions += ('if (H5Lexists(g_id, "{0}", H5P_DEFAULT))\n'.format(key) + - '{\n' + - 'dset = H5Dopen(g_id, "{0}", H5P_DEFAULT);\n'.format(key) + - 'grow_single_dataset(dset, niter_todo/niter_part);\n' + - 'H5Dclose(dset);\n}\n') - self.definitions += ('if (H5Lexists(g_id, "rhs", H5P_DEFAULT))\n{\n' + - 'dset = H5Dopen(g_id, "rhs", H5P_DEFAULT);\n' + - 'grow_single_dataset(dset, 1);\n' + - 'H5Dclose(dset);\n}\n' + - 'return 0;\n}\n') - self.definitions += ('int grow_file_datasets()\n{\n' + - 'int file_problems = 0;\n' + - self.file_datasets_grow + - 'return file_problems;\n' - '}\n') - self.definitions += 'void do_stats()\n{\n' + self.stat_src + '}\n' - self.definitions += 'void do_particle_stats()\n{\n' + self.particle_stat_src + '}\n' - # take care of wisdom - if self.use_fftw_wisdom: - if self.dtype == np.float32: - fftw_prefix = 'fftwf_' - elif self.dtype == np.float64: - fftw_prefix = 'fftw_' - self.main_start += """ - //begincpp - if (myrank == 0) - {{ - char fname[256]; - sprintf(fname, "%s_fftw_wisdom.txt", simname); - {0}import_wisdom_from_filename(fname); - }} - {0}mpi_broadcast_wisdom(MPI_COMM_WORLD); - //endcpp - """.format(fftw_prefix) - self.main_end = """ - //begincpp - {0}mpi_gather_wisdom(MPI_COMM_WORLD); - MPI_Barrier(MPI_COMM_WORLD); - if (myrank == 0) - {{ - char fname[256]; - sprintf(fname, "%s_fftw_wisdom.txt", simname); - {0}export_wisdom_to_filename(fname); - }} - //endcpp - """.format(fftw_prefix) + self.main_end - self.main = """ - //begincpp - int data_file_problem; - clock_t time0, time1; - double time_difference, local_time_difference; - time0 = clock(); - if (myrank == 0) data_file_problem = grow_file_datasets(); - MPI_Bcast(&data_file_problem, 1, MPI_INT, 0, MPI_COMM_WORLD); - if (data_file_problem > 0) - { - std::cerr << data_file_problem << " problems growing file datasets.\\ntrying to exit now." << std::endl; - MPI_Finalize(); - return EXIT_SUCCESS; - } - //endcpp - """ - self.main += self.fluid_start - if self.particle_species > 0: - self.main += self.particle_start - output_time_difference = ('time1 = clock();\n' + - 'local_time_difference = ((unsigned int)(time1 - time0))/((double)CLOCKS_PER_SEC);\n' + - 'time_difference = 0.0;\n' + - 'MPI_Allreduce(&local_time_difference, &time_difference, ' + - '1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);\n' + - 'if (myrank == 0) std::cout << "iteration " ' + - '<< {0} << " took " ' + - '<< time_difference/nprocs << " seconds" << std::endl;\n' + - 'if (myrank == 0) std::cerr << "iteration " ' + - '<< {0} << " took " ' + - '<< time_difference/nprocs << " seconds" << std::endl;\n' + - 'time0 = time1;\n') - if not postprocess_mode: - self.main += 'for (int max_iter = iteration+niter_todo-iteration%niter_todo; iteration < max_iter; iteration++)\n' - self.main += '{\n' - - self.main += """ - #ifdef USE_TIMINGOUTPUT - const std::string loopLabel = "code::main_start::loop-" + std::to_string(iteration); - TIMEZONE(loopLabel.c_str()); - #endif - """ - self.main += 'if (iteration % niter_stat == 0) do_stats();\n' - if self.particle_species > 0: - self.main += 'if (iteration % niter_part == 0) do_particle_stats();\n' - self.main += self.particle_loop - self.main += self.fluid_loop - self.main += output_time_difference.format('iteration') - self.main += '}\n' - self.main += 'do_stats();\n' - self.main += 'do_particle_stats();\n' - self.main += output_time_difference.format('iteration') - else: - self.main += 'for (int frame_index = iter0; frame_index <= iter1; frame_index += niter_out)\n' - self.main += '{\n' - self.main += """ - #ifdef USE_TIMINGOUTPUT - const std::string loopLabel = "code::main_start::loop-" + std::to_string(frame_index); - TIMEZONE(loopLabel.c_str()); - #endif - """ - if self.particle_species > 0: - self.main += self.particle_loop - self.main += self.fluid_loop - self.main += output_time_difference.format('frame_index') - self.main += '}\n' - self.main += self.fluid_end - if self.particle_species > 0: - self.main += self.particle_end - return None - def read_rfield( - self, - field = 'velocity', - iteration = 0, - filename = None): - """ - :note: assumes field is a vector field - """ - if type(filename) == type(None): - filename = os.path.join( - self.work_dir, - self.simname + '_r' + field + '_i{0:0>5x}'.format(iteration)) - return np.memmap( - filename, - dtype = self.dtype, - mode = 'r', - shape = (self.parameters['nz'], - self.parameters['ny'], - self.parameters['nx'], 3)) - def transpose_frame( - self, - field = 'velocity', - iteration = 0, - filename = None, - ofile = None): - Rdata = self.read_rfield( - field = field, - iteration = iteration, - filename = filename) - new_data = np.zeros( - (3, - self.parameters['nz'], - self.parameters['ny'], - self.parameters['nx']), - dtype = self.dtype) - for i in range(3): - new_data[i] = Rdata[..., i] - if type(ofile) == type(None): - ofile = os.path.join( - self.work_dir, - self.simname + '_r' + field + '_i{0:0>5x}_3xNZxNYxNX'.format(iteration)) - else: - new_data.tofile(ofile) - return new_data - def plot_vel_cut( - self, - axis, - field = 'velocity', - iteration = 0, - yval = 13, - filename = None): - axis.set_axis_off() - Rdata0 = self.read_rfield(field = field, iteration = iteration, filename = filename) - energy = np.sum(Rdata0[:, yval, :, :]**2, axis = 2)*.5 - axis.imshow(energy, interpolation='none') - axis.set_title('{0}'.format(np.average(Rdata0[..., 0]**2 + - Rdata0[..., 1]**2 + - Rdata0[..., 2]**2)*.5)) - return Rdata0 - def generate_vector_field( - self, - rseed = 7547, - spectra_slope = 1., - amplitude = 1., - iteration = 0, - field_name = 'vorticity', - write_to_file = False, - # to switch to constant field, use generate_data_3D_uniform - # for scalar_generator - scalar_generator = tools.generate_data_3D): - """generate vector field. - - The generated field is not divergence free, but it has the proper - shape. - - :param rseed: seed for random number generator - :param spectra_slope: spectrum of field will look like k^(-p) - :param amplitude: all amplitudes are multiplied with this value - :param iteration: the field is written at this iteration - :param field_name: the name of the field being generated - :param write_to_file: should we write the field to file? - :param scalar_generator: which function to use for generating the - individual components. - Possible values: bfps.tools.generate_data_3D, - bfps.tools.generate_data_3D_uniform - :type rseed: int - :type spectra_slope: float - :type amplitude: float - :type iteration: int - :type field_name: str - :type write_to_file: bool - :type scalar_generator: function - - :returns: ``Kdata``, a complex valued 4D ``numpy.array`` that uses the - transposed FFTW layout. - Kdata[ky, kz, kx, i] is the amplitude of mode (kx, ky, kz) for - the i-th component of the field. - (i.e. x is the fastest index and z the slowest index in the - real-space representation). - """ - np.random.seed(rseed) - Kdata00 = scalar_generator( - self.parameters['nz']//2, - self.parameters['ny']//2, - self.parameters['nx']//2, - p = spectra_slope, - amplitude = amplitude).astype(self.ctype) - Kdata01 = scalar_generator( - self.parameters['nz']//2, - self.parameters['ny']//2, - self.parameters['nx']//2, - p = spectra_slope, - amplitude = amplitude).astype(self.ctype) - Kdata02 = scalar_generator( - self.parameters['nz']//2, - self.parameters['ny']//2, - self.parameters['nx']//2, - p = spectra_slope, - amplitude = amplitude).astype(self.ctype) - Kdata0 = np.zeros( - Kdata00.shape + (3,), - Kdata00.dtype) - Kdata0[..., 0] = Kdata00 - Kdata0[..., 1] = Kdata01 - Kdata0[..., 2] = Kdata02 - Kdata1 = tools.padd_with_zeros( - Kdata0, - self.parameters['nz'], - self.parameters['ny'], - self.parameters['nx']) - if write_to_file: - Kdata1.tofile( - os.path.join(self.work_dir, - self.simname + "_c{0}_i{1:0>5x}".format(field_name, iteration))) - return Kdata1 - def generate_tracer_state( - self, - rseed = None, - iteration = 0, - species = 0, - write_to_file = False, - ncomponents = 3, - testing = False, - data = None): - if (type(data) == type(None)): - if not type(rseed) == type(None): - np.random.seed(rseed) - #point with problems: 5.37632864e+00, 6.10414710e+00, 6.25256493e+00] - data = np.zeros(self.parameters['nparticles']*ncomponents).reshape(-1, ncomponents) - data[:, :3] = np.random.random((self.parameters['nparticles'], 3))*2*np.pi - if testing: - #data[0] = np.array([3.26434, 4.24418, 3.12157]) - data[0] = np.array([ 0.72086101, 2.59043666, 6.27501953]) - with h5py.File(self.get_particle_file_name(), 'r+') as data_file: - data_file['tracers{0}/state'.format(species)][0] = data - if write_to_file: - data.tofile( - os.path.join( - self.work_dir, - "tracers{0}_state_i{1:0>5x}".format(species, iteration))) - return data - def generate_initial_condition(self): - self.generate_vector_field(write_to_file = True) - for species in range(self.particle_species): - self.generate_tracer_state( - species = species, - write_to_file = False) - return None - def get_kspace(self): - kspace = {} - if self.parameters['dealias_type'] == 1: - kMx = self.parameters['dkx']*(self.parameters['nx']//2 - 1) - kMy = self.parameters['dky']*(self.parameters['ny']//2 - 1) - kMz = self.parameters['dkz']*(self.parameters['nz']//2 - 1) - else: - kMx = self.parameters['dkx']*(self.parameters['nx']//3 - 1) - kMy = self.parameters['dky']*(self.parameters['ny']//3 - 1) - kMz = self.parameters['dkz']*(self.parameters['nz']//3 - 1) - kspace['kM'] = max(kMx, kMy, kMz) - kspace['dk'] = min(self.parameters['dkx'], - self.parameters['dky'], - self.parameters['dkz']) - nshells = int(kspace['kM'] / kspace['dk']) + 2 - kspace['nshell'] = np.zeros(nshells, dtype = np.int64) - kspace['kshell'] = np.zeros(nshells, dtype = np.float64) - kspace['kx'] = np.arange( 0, - self.parameters['nx']//2 + 1).astype(np.float64)*self.parameters['dkx'] - kspace['ky'] = np.arange(-self.parameters['ny']//2 + 1, - self.parameters['ny']//2 + 1).astype(np.float64)*self.parameters['dky'] - kspace['ky'] = np.roll(kspace['ky'], self.parameters['ny']//2+1) - kspace['kz'] = np.arange(-self.parameters['nz']//2 + 1, - self.parameters['nz']//2 + 1).astype(np.float64)*self.parameters['dkz'] - kspace['kz'] = np.roll(kspace['kz'], self.parameters['nz']//2+1) - return kspace - def write_par(self, iter0 = 0): - assert (self.parameters['niter_todo'] % self.parameters['niter_stat'] == 0) - assert (self.parameters['niter_todo'] % self.parameters['niter_out'] == 0) - assert (self.parameters['niter_todo'] % self.parameters['niter_part'] == 0) - assert (self.parameters['niter_out'] % self.parameters['niter_stat'] == 0) - assert (self.parameters['niter_out'] % self.parameters['niter_part'] == 0) - _code.write_par(self, iter0 = iter0) - with h5py.File(os.path.join(self.work_dir, self.simname + '.h5'), 'r+') as ofile: - ofile['bfps_info/exec_name'] = self.name - ofile['field_dtype'] = np.dtype(self.dtype).str - kspace = self.get_kspace() - for k in kspace.keys(): - ofile['kspace/' + k] = kspace[k] - nshells = kspace['nshell'].shape[0] - ofile.close() - return None - def specific_parser_arguments( - self, - parser): - _code.specific_parser_arguments(self, parser) - return None - diff --git a/bfps/cpp/distributed_particles.cpp b/bfps/cpp/distributed_particles.cpp deleted file mode 100644 index 73fd0275d8138d41bb4ee7fbc28e2d41e8017661..0000000000000000000000000000000000000000 --- a/bfps/cpp/distributed_particles.cpp +++ /dev/null @@ -1,472 +0,0 @@ -/********************************************************************** -* * -* Copyright 2015 Max Planck Institute * -* for Dynamics and Self-Organization * -* * -* This file is part of bfps. * -* * -* bfps is free software: you can redistribute it and/or modify * -* it under the terms of the GNU General Public License as published * -* by the Free Software Foundation, either version 3 of the License, * -* or (at your option) any later version. * -* * -* bfps is distributed in the hope that it will be useful, * -* but WITHOUT ANY WARRANTY; without even the implied warranty of * -* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * -* GNU General Public License for more details. * -* * -* You should have received a copy of the GNU General Public License * -* along with bfps. If not, see <http://www.gnu.org/licenses/> * -* * -* Contact: Cristian.Lalescu@ds.mpg.de * -* * -**********************************************************************/ - - - -//#define NDEBUG - -#include <cmath> -#include <cassert> -#include <cstring> -#include <string> -#include <sstream> -#include <array> - -#include "base.hpp" -#include "distributed_particles.hpp" -#include "fftw_tools.hpp" -#include "scope_timer.hpp" - - -extern int myrank, nprocs; - -template <particle_types particle_type, class rnumber, int interp_neighbours> -distributed_particles<particle_type, rnumber, interp_neighbours>::distributed_particles( - const char *NAME, - const hid_t data_file_id, - interpolator<rnumber, interp_neighbours> *VEL, - const int TRAJ_SKIP, - const int INTEGRATION_STEPS) : particles_io_base<particle_type>( - NAME, - TRAJ_SKIP, - data_file_id, - VEL->descriptor->comm) -{ - assert((INTEGRATION_STEPS <= 6) && - (INTEGRATION_STEPS >= 1)); - this->vel = VEL; - this->rhs.resize(INTEGRATION_STEPS); - this->integration_steps = INTEGRATION_STEPS; - this->state.reserve(2*this->nparticles / this->nprocs); - for (unsigned int i=0; i<this->rhs.size(); i++) - this->rhs[i].reserve(2*this->nparticles / this->nprocs); -} - -template <particle_types particle_type, class rnumber, int interp_neighbours> -distributed_particles<particle_type, rnumber, interp_neighbours>::~distributed_particles() -{ -} - -template <particle_types particle_type, class rnumber, int interp_neighbours> -void distributed_particles<particle_type, rnumber, interp_neighbours>::sample( - interpolator<rnumber, interp_neighbours> *field, - const std::unordered_map<int, single_particle_state<particle_type>> &x, - std::unordered_map<int, single_particle_state<POINT3D>> &y) -{ - std::array<double, 3> yy; - y.clear(); - for (auto &pp: x) - { - (*field)(pp.second.data, &yy.front()); - y[pp.first] = &yy.front(); - } -} - -template <particle_types particle_type, class rnumber, int interp_neighbours> -void distributed_particles<particle_type, rnumber, interp_neighbours>::get_rhs( - const std::unordered_map<int, single_particle_state<particle_type>> &x, - std::unordered_map<int, single_particle_state<particle_type>> &y) -{ - std::unordered_map<int, single_particle_state<POINT3D>> yy; - switch(particle_type) - { - case VELOCITY_TRACER: - this->sample(this->vel, this->state, yy); - y.clear(); - for (auto &pp: x) - y[pp.first] = yy[pp.first].data; - break; - } -} - -template <particle_types particle_type, class rnumber, int interp_neighbours> -void distributed_particles<particle_type, rnumber, interp_neighbours>::sample( - interpolator<rnumber, interp_neighbours> *field, - const char *dset_name) -{ - std::unordered_map<int, single_particle_state<POINT3D>> y; - this->sample(field, this->state, y); - this->write(dset_name, y); -} - -template <particle_types particle_type, class rnumber, int interp_neighbours> -void distributed_particles<particle_type, rnumber, interp_neighbours>::roll_rhs() -{ - for (int i=this->integration_steps-2; i>=0; i--) - rhs[i+1] = rhs[i]; -} - -template <particle_types particle_type, class rnumber, int interp_neighbours> -void distributed_particles<particle_type, rnumber, interp_neighbours>::redistribute( - std::unordered_map<int, single_particle_state<particle_type>> &x, - std::vector<std::unordered_map<int, single_particle_state<particle_type>>> &vals) -{ - TIMEZONE("distributed_particles::redistribute"); - //DEBUG_MSG("entered redistribute\n"); - /* neighbouring rank offsets */ - int ro[2]; - ro[0] = -1; - ro[1] = 1; - /* neighbouring ranks */ - int nr[2]; - nr[0] = MOD(this->myrank+ro[0], this->nprocs); - nr[1] = MOD(this->myrank+ro[1], this->nprocs); - /* particles to send, particles to receive */ - std::vector<int> ps[2], pr[2]; - /* number of particles to send, number of particles to receive */ - int nps[2], npr[2]; - int rsrc, rdst; - /* get list of id-s to send */ - for (auto &pp: x) - for (unsigned int i=0; i<2; i++) - if (this->vel->get_rank(pp.second.data[2]) == nr[i]) - ps[i].push_back(pp.first); - /* prepare data for send recv */ - for (unsigned int i=0; i<2; i++) - nps[i] = ps[i].size(); - for (rsrc = 0; rsrc<this->nprocs; rsrc++) - for (unsigned int i=0; i<2; i++) - { - rdst = MOD(rsrc+ro[i], this->nprocs); - if (this->myrank == rsrc) - MPI_Send( - nps+i, - 1, - MPI_INTEGER, - rdst, - 2*(rsrc*this->nprocs + rdst)+i, - this->comm); - if (this->myrank == rdst) - MPI_Recv( - npr+1-i, - 1, - MPI_INTEGER, - rsrc, - 2*(rsrc*this->nprocs + rdst)+i, - this->comm, - MPI_STATUS_IGNORE); - } - //DEBUG_MSG("I have to send %d %d particles\n", nps[0], nps[1]); - //DEBUG_MSG("I have to recv %d %d particles\n", npr[0], npr[1]); - for (unsigned int i=0; i<2; i++) - pr[i].resize(npr[i]); - - int buffer_size = (nps[0] > nps[1]) ? nps[0] : nps[1]; - buffer_size = (buffer_size > npr[0])? buffer_size : npr[0]; - buffer_size = (buffer_size > npr[1])? buffer_size : npr[1]; - //DEBUG_MSG("buffer size is %d\n", buffer_size); - double *buffer = new double[buffer_size*state_dimension(particle_type)*(1+vals.size())]; - for (rsrc = 0; rsrc<this->nprocs; rsrc++) - for (unsigned int i=0; i<2; i++) - { - rdst = MOD(rsrc+ro[i], this->nprocs); - if (this->myrank == rsrc && nps[i] > 0) - { - MPI_Send( - &ps[i].front(), - nps[i], - MPI_INTEGER, - rdst, - 2*(rsrc*this->nprocs + rdst), - this->comm); - int pcounter = 0; - for (int p: ps[i]) - { - std::copy(x[p].data, - x[p].data + state_dimension(particle_type), - buffer + pcounter*(1+vals.size())*state_dimension(particle_type)); - x.erase(p); - for (unsigned int tindex=0; tindex<vals.size(); tindex++) - { - std::copy(vals[tindex][p].data, - vals[tindex][p].data + state_dimension(particle_type), - buffer + (pcounter*(1+vals.size()) + tindex+1)*state_dimension(particle_type)); - vals[tindex].erase(p); - } - pcounter++; - } - MPI_Send( - buffer, - nps[i]*(1+vals.size())*state_dimension(particle_type), - MPI_DOUBLE, - rdst, - 2*(rsrc*this->nprocs + rdst)+1, - this->comm); - } - if (this->myrank == rdst && npr[1-i] > 0) - { - MPI_Recv( - &pr[1-i].front(), - npr[1-i], - MPI_INTEGER, - rsrc, - 2*(rsrc*this->nprocs + rdst), - this->comm, - MPI_STATUS_IGNORE); - MPI_Recv( - buffer, - npr[1-i]*(1+vals.size())*state_dimension(particle_type), - MPI_DOUBLE, - rsrc, - 2*(rsrc*this->nprocs + rdst)+1, - this->comm, - MPI_STATUS_IGNORE); - unsigned int pcounter = 0; - for (int p: pr[1-i]) - { - x[p] = buffer + (pcounter*(1+vals.size()))*state_dimension(particle_type); - for (unsigned int tindex=0; tindex<vals.size(); tindex++) - { - vals[tindex][p] = buffer + (pcounter*(1+vals.size()) + tindex+1)*state_dimension(particle_type); - } - pcounter++; - } - } - } - delete[] buffer; - - -#ifndef NDEBUG - /* check that all particles at x are local */ - for (auto &pp: x) - if (this->vel->get_rank(pp.second.data[2]) != this->myrank) - { - DEBUG_MSG("found particle %d with rank %d\n", - pp.first, - this->vel->get_rank(pp.second.data[2])); - assert(false); - } -#endif - //DEBUG_MSG("exiting redistribute\n"); -} - - - -template <particle_types particle_type, class rnumber, int interp_neighbours> -void distributed_particles<particle_type, rnumber, interp_neighbours>::AdamsBashforth( - const int nsteps) -{ - this->get_rhs(this->state, this->rhs[0]); - for (auto &pp: this->state) - for (unsigned int i=0; i<state_dimension(particle_type); i++) - switch(nsteps) - { - case 1: - pp.second[i] += this->dt*this->rhs[0][pp.first][i]; - break; - case 2: - pp.second[i] += this->dt*(3*this->rhs[0][pp.first][i] - - this->rhs[1][pp.first][i])/2; - break; - case 3: - pp.second[i] += this->dt*(23*this->rhs[0][pp.first][i] - - 16*this->rhs[1][pp.first][i] - + 5*this->rhs[2][pp.first][i])/12; - break; - case 4: - pp.second[i] += this->dt*(55*this->rhs[0][pp.first][i] - - 59*this->rhs[1][pp.first][i] - + 37*this->rhs[2][pp.first][i] - - 9*this->rhs[3][pp.first][i])/24; - break; - case 5: - pp.second[i] += this->dt*(1901*this->rhs[0][pp.first][i] - - 2774*this->rhs[1][pp.first][i] - + 2616*this->rhs[2][pp.first][i] - - 1274*this->rhs[3][pp.first][i] - + 251*this->rhs[4][pp.first][i])/720; - break; - case 6: - pp.second[i] += this->dt*(4277*this->rhs[0][pp.first][i] - - 7923*this->rhs[1][pp.first][i] - + 9982*this->rhs[2][pp.first][i] - - 7298*this->rhs[3][pp.first][i] - + 2877*this->rhs[4][pp.first][i] - - 475*this->rhs[5][pp.first][i])/1440; - break; - } - this->redistribute(this->state, this->rhs); - this->roll_rhs(); -} - - -template <particle_types particle_type, class rnumber, int interp_neighbours> -void distributed_particles<particle_type, rnumber, interp_neighbours>::step() -{ - TIMEZONE("distributed_particles::step"); - this->AdamsBashforth((this->iteration < this->integration_steps) ? - this->iteration+1 : - this->integration_steps); - this->iteration++; -} - - -template <particle_types particle_type, class rnumber, int interp_neighbours> -void distributed_particles<particle_type, rnumber, interp_neighbours>::read() -{ - double *temp = new double[this->chunk_size*state_dimension(particle_type)]; - for (unsigned int cindex=0; cindex<this->get_number_of_chunks(); cindex++) - { - //read state - if (this->myrank == 0) - this->read_state_chunk(cindex, temp); - MPI_Bcast( - temp, - this->chunk_size*state_dimension(particle_type), - MPI_DOUBLE, - 0, - this->comm); - for (unsigned int p=0; p<this->chunk_size; p++) - { - if (this->vel->get_rank(temp[state_dimension(particle_type)*p+2]) == this->myrank) - this->state[p+cindex*this->chunk_size] = temp + state_dimension(particle_type)*p; - } - //read rhs - if (this->iteration > 0) - for (int i=0; i<this->integration_steps; i++) - { - if (this->myrank == 0) - this->read_rhs_chunk(cindex, i, temp); - MPI_Bcast( - temp, - this->chunk_size*state_dimension(particle_type), - MPI_DOUBLE, - 0, - this->comm); - for (unsigned int p=0; p<this->chunk_size; p++) - { - auto pp = this->state.find(p+cindex*this->chunk_size); - if (pp != this->state.end()) - this->rhs[i][p+cindex*this->chunk_size] = temp + state_dimension(particle_type)*p; - } - } - } - DEBUG_MSG("%s->state.size = %ld\n", this->name.c_str(), this->state.size()); - delete[] temp; -} - -template <particle_types particle_type, class rnumber, int interp_neighbours> -void distributed_particles<particle_type, rnumber, interp_neighbours>::write( - const char *dset_name, - std::unordered_map<int, single_particle_state<POINT3D>> &y) -{ - TIMEZONE("distributed_particles::write"); - double *data = new double[this->nparticles*3]; - double *yy = new double[this->nparticles*3]; - for (unsigned int cindex=0; cindex<this->get_number_of_chunks(); cindex++) - { - std::fill_n(yy, this->chunk_size*3, 0); - for (unsigned int p=0; p<this->chunk_size; p++) - { - auto pp = y.find(p+cindex*this->chunk_size); - if (pp != y.end()) - std::copy(pp->second.data, - pp->second.data + 3, - yy + pp->first*3); - } - MPI_Allreduce( - yy, - data, - 3*this->nparticles, - MPI_DOUBLE, - MPI_SUM, - this->comm); - if (this->myrank == 0) - this->write_point3D_chunk(dset_name, cindex, data); - } - delete[] yy; - delete[] data; -} - -template <particle_types particle_type, class rnumber, int interp_neighbours> -void distributed_particles<particle_type, rnumber, interp_neighbours>::write( - const bool write_rhs) -{ - TIMEZONE("distributed_particles::write2"); - double *temp0 = new double[this->chunk_size*state_dimension(particle_type)]; - double *temp1 = new double[this->chunk_size*state_dimension(particle_type)]; - for (unsigned int cindex=0; cindex<this->get_number_of_chunks(); cindex++) - { - //write state - std::fill_n(temp0, state_dimension(particle_type)*this->chunk_size, 0); - for (unsigned int p=0; p<this->chunk_size; p++) - { - auto pp = this->state.find(p + cindex*this->chunk_size); - if (pp != this->state.end()) - std::copy(pp->second.data, - pp->second.data + state_dimension(particle_type), - temp0 + p*state_dimension(particle_type)); - } - MPI_Allreduce( - temp0, - temp1, - state_dimension(particle_type)*this->chunk_size, - MPI_DOUBLE, - MPI_SUM, - this->comm); - if (this->myrank == 0) - this->write_state_chunk(cindex, temp1); - //write rhs - if (write_rhs) - for (int i=0; i<this->integration_steps; i++) - { - std::fill_n(temp0, state_dimension(particle_type)*this->chunk_size, 0); - for (unsigned int p=0; p<this->chunk_size; p++) - { - auto pp = this->rhs[i].find(p + cindex*this->chunk_size); - if (pp != this->rhs[i].end()) - std::copy(pp->second.data, - pp->second.data + state_dimension(particle_type), - temp0 + p*state_dimension(particle_type)); - } - MPI_Allreduce( - temp0, - temp1, - state_dimension(particle_type)*this->chunk_size, - MPI_DOUBLE, - MPI_SUM, - this->comm); - if (this->myrank == 0) - this->write_rhs_chunk(cindex, i, temp1); - } - } - delete[] temp0; - delete[] temp1; -} - - -/*****************************************************************************/ -template class distributed_particles<VELOCITY_TRACER, float, 1>; -template class distributed_particles<VELOCITY_TRACER, float, 2>; -template class distributed_particles<VELOCITY_TRACER, float, 3>; -template class distributed_particles<VELOCITY_TRACER, float, 4>; -template class distributed_particles<VELOCITY_TRACER, float, 5>; -template class distributed_particles<VELOCITY_TRACER, float, 6>; -template class distributed_particles<VELOCITY_TRACER, double, 1>; -template class distributed_particles<VELOCITY_TRACER, double, 2>; -template class distributed_particles<VELOCITY_TRACER, double, 3>; -template class distributed_particles<VELOCITY_TRACER, double, 4>; -template class distributed_particles<VELOCITY_TRACER, double, 5>; -template class distributed_particles<VELOCITY_TRACER, double, 6>; -/*****************************************************************************/ diff --git a/bfps/cpp/distributed_particles.hpp b/bfps/cpp/distributed_particles.hpp deleted file mode 100644 index cf6e124a7744c049b6fcf0c84c1618a0a214c30e..0000000000000000000000000000000000000000 --- a/bfps/cpp/distributed_particles.hpp +++ /dev/null @@ -1,105 +0,0 @@ -/********************************************************************** -* * -* Copyright 2015 Max Planck Institute * -* for Dynamics and Self-Organization * -* * -* This file is part of bfps. * -* * -* bfps is free software: you can redistribute it and/or modify * -* it under the terms of the GNU General Public License as published * -* by the Free Software Foundation, either version 3 of the License, * -* or (at your option) any later version. * -* * -* bfps is distributed in the hope that it will be useful, * -* but WITHOUT ANY WARRANTY; without even the implied warranty of * -* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * -* GNU General Public License for more details. * -* * -* You should have received a copy of the GNU General Public License * -* along with bfps. If not, see <http://www.gnu.org/licenses/> * -* * -* Contact: Cristian.Lalescu@ds.mpg.de * -* * -**********************************************************************/ - - - -#include <stdio.h> -#include <stdlib.h> -#include <iostream> -#include <unordered_map> -#include <vector> -#include <hdf5.h> -#include "base.hpp" -#include "particles_base.hpp" -#include "fluid_solver_base.hpp" -#include "interpolator.hpp" - -#ifndef DISTRIBUTED_PARTICLES - -#define DISTRIBUTED_PARTICLES - -template <particle_types particle_type, class rnumber, int interp_neighbours> -class distributed_particles: public particles_io_base<particle_type> -{ - private: - std::unordered_map<int, single_particle_state<particle_type> > state; - std::vector<std::unordered_map<int, single_particle_state<particle_type>>> rhs; - - public: - int integration_steps; - // this class only works with buffered interpolator - interpolator<rnumber, interp_neighbours> *vel; - - /* simulation parameters */ - double dt; - - /* methods */ - - /* constructor and destructor. - * allocate and deallocate: - * this->state - * this->rhs - * */ - distributed_particles( - const char *NAME, - const hid_t data_file_id, - interpolator<rnumber, interp_neighbours> *FIELD, - const int TRAJ_SKIP, - const int INTEGRATION_STEPS = 2); - ~distributed_particles(); - - void sample( - interpolator<rnumber, interp_neighbours> *field, - const char *dset_name); - void sample( - interpolator<rnumber, interp_neighbours> *field, - const std::unordered_map<int, single_particle_state<particle_type>> &x, - std::unordered_map<int, single_particle_state<POINT3D>> &y); - void get_rhs( - const std::unordered_map<int, single_particle_state<particle_type>> &x, - std::unordered_map<int, single_particle_state<particle_type>> &y); - - void redistribute( - std::unordered_map<int, single_particle_state<particle_type>> &x, - std::vector<std::unordered_map<int, single_particle_state<particle_type>>> &vals); - - - /* input/output */ - void read(); - void write( - const char *dset_name, - std::unordered_map<int, single_particle_state<POINT3D>> &y); - void write( - const char *dset_name, - std::unordered_map<int, single_particle_state<particle_type>> &y); - void write(const bool write_rhs = true); - - /* solvers */ - void step(); - void roll_rhs(); - void AdamsBashforth(const int nsteps); -}; - -#endif//DISTRIBUTED_PARTICLES - diff --git a/bfps/cpp/fftw_interface.hpp b/bfps/cpp/fftw_interface.hpp index 7f5e014400c86ffe4caa594a67e1756c890c6503..0a840dd5ba3d864b36271515faa7cb81f3042c01 100644 --- a/bfps/cpp/fftw_interface.hpp +++ b/bfps/cpp/fftw_interface.hpp @@ -26,6 +26,8 @@ #define FFTW_INTERFACE_HPP #include <fftw3-mpi.h> +#include <map> +#include <string> #ifdef USE_FFTWESTIMATE #define DEFAULT_FFTW_FLAG FFTW_ESTIMATE diff --git a/bfps/cpp/fftw_tools.cpp b/bfps/cpp/fftw_tools.cpp index 61e03d292f81aed1fa4b2dfcab880fb7105b676e..55794b41ebf8ebfa03977d5a79704aa38b39af52 100644 --- a/bfps/cpp/fftw_tools.cpp +++ b/bfps/cpp/fftw_tools.cpp @@ -31,192 +31,10 @@ #define NDEBUG -template <class rnumber> -int clip_zero_padding( - field_descriptor<rnumber> *f, - rnumber *a, - int howmany) -{ - if (f->ndims < 3) - return EXIT_FAILURE; - rnumber *b = a; - ptrdiff_t copy_size = f->sizes[2] * howmany; - ptrdiff_t skip_size = copy_size + 2*howmany; - for (int i0 = 0; i0 < f->subsizes[0]; i0++) - for (int i1 = 0; i1 < f->sizes[1]; i1++) - { - std::copy(a, a + copy_size, b); - a += skip_size; - b += copy_size; - } - return EXIT_SUCCESS; -} - -template -int clip_zero_padding<float>( - field_descriptor<float> *f, - float *a, - int howmany); - -template -int clip_zero_padding<double>( - field_descriptor<double> *f, - double *a, - int howmany); - - - -template <class rnumber> -int copy_complex_array( - field_descriptor<rnumber> *fi, - rnumber (*ai)[2], -field_descriptor<rnumber> *fo, -rnumber (*ao)[2], -int howmany) -{ - DEBUG_MSG("entered copy_complex_array\n"); - typename fftw_interface<rnumber>::complex *buffer; - buffer = fftw_interface<rnumber>::alloc_complex(fi->slice_size*howmany); - - int min_fast_dim; - min_fast_dim = - (fi->sizes[2] > fo->sizes[2]) ? - fo->sizes[2] : fi->sizes[2]; - - /* clean up destination, in case we're padding with zeros - (even if only for one dimension) */ - std::fill_n((rnumber*)ao, fo->local_size*2, 0.0); - - int64_t ii0, ii1; - int64_t oi0, oi1; - int64_t delta1, delta0; - int irank, orank; - delta0 = (fo->sizes[0] - fi->sizes[0]); - delta1 = (fo->sizes[1] - fi->sizes[1]); - for (ii0=0; ii0 < fi->sizes[0]; ii0++) - { - if (ii0 <= fi->sizes[0]/2) - { - oi0 = ii0; - if (oi0 > fo->sizes[0]/2) - continue; - } - else - { - oi0 = ii0 + delta0; - if ((oi0 < 0) || ((fo->sizes[0] - oi0) >= fo->sizes[0]/2)) - continue; - } - irank = fi->rank[ii0]; - orank = fo->rank[oi0]; - if ((irank == orank) && - (irank == fi->myrank)) - { - std::copy( - (rnumber*)(ai + (ii0 - fi->starts[0] )*fi->slice_size), - (rnumber*)(ai + (ii0 - fi->starts[0] + 1)*fi->slice_size), - (rnumber*)buffer); - } - else - { - if (fi->myrank == irank) - { - MPI_Send( - (void*)(ai + (ii0-fi->starts[0])*fi->slice_size), - fi->slice_size, - mpi_real_type<rnumber>::complex(), - orank, - ii0, - fi->comm); - } - if (fi->myrank == orank) - { - MPI_Recv( - (void*)(buffer), - fi->slice_size, - mpi_real_type<rnumber>::complex(), - irank, - ii0, - fi->comm, - MPI_STATUS_IGNORE); - } - } - if (fi->myrank == orank) - { - for (ii1 = 0; ii1 < fi->sizes[1]; ii1++) - { - if (ii1 <= fi->sizes[1]/2) - { - oi1 = ii1; - if (oi1 > fo->sizes[1]/2) - continue; - } - else - { - oi1 = ii1 + delta1; - if ((oi1 < 0) || ((fo->sizes[1] - oi1) >= fo->sizes[1]/2)) - continue; - } - std::copy( - (rnumber*)(buffer + (ii1*fi->sizes[2]*howmany)), - (rnumber*)(buffer + (ii1*fi->sizes[2] + min_fast_dim)*howmany), - (rnumber*)(ao + - ((oi0 - fo->starts[0])*fo->sizes[1] + - oi1)*fo->sizes[2]*howmany)); - } - } - } - fftw_interface<rnumber>::free(buffer); - MPI_Barrier(fi->comm); - - DEBUG_MSG("exiting copy_complex_array\n"); - return EXIT_SUCCESS; -} - -template -int copy_complex_array<float>( - field_descriptor<float> *fi, - float (*ai)[2], - field_descriptor<float> *fo, - float (*ao)[2], - int howmany); - -template -int copy_complex_array<double>( - field_descriptor<double> *fi, - double (*ai)[2], - field_descriptor<double> *fo, - double (*ao)[2], - int howmany); - - -template <class rnumber> -int get_descriptors_3D( - int n0, int n1, int n2, - field_descriptor<rnumber> **fr, - field_descriptor<rnumber> **fc) -{ - int ntmp[3]; - ntmp[0] = n0; - ntmp[1] = n1; - ntmp[2] = n2; - *fr = new field_descriptor<rnumber>(3, ntmp, mpi_real_type<rnumber>::real(), MPI_COMM_WORLD); - ntmp[0] = n0; - ntmp[1] = n1; - ntmp[2] = n2/2+1; - *fc = new field_descriptor<rnumber>(3, ntmp, mpi_real_type<rnumber>::complex(), MPI_COMM_WORLD); - return EXIT_SUCCESS; -} - -template -int get_descriptors_3D<float>( - int n0, int n1, int n2, - field_descriptor<float> **fr, - field_descriptor<float> **fc); - -template -int get_descriptors_3D<double>( - int n0, int n1, int n2, - field_descriptor<double> **fr, - field_descriptor<double> **fc); +std::map<std::string, unsigned> fftw_planner_string_to_flag = { + {"FFTW_ESTIMATE", FFTW_ESTIMATE}, + {"FFTW_MEASURE", FFTW_MEASURE}, + {"FFTW_PATIENT", FFTW_PATIENT}, + {"parameter does not exist", DEFAULT_FFTW_FLAG}, +}; diff --git a/bfps/cpp/fftw_tools.hpp b/bfps/cpp/fftw_tools.hpp index d0f3dbf30df3ee95f3d7934f0dd7fca633858b44..e32500fd734803a5884877398fc13fff22aa44c4 100644 --- a/bfps/cpp/fftw_tools.hpp +++ b/bfps/cpp/fftw_tools.hpp @@ -34,37 +34,7 @@ extern int myrank, nprocs; -/* given two arrays of the same dimension, we do a simple resize in - * Fourier space: either chop off high modes, or pad with zeros. - * the arrays are assumed to use 3D mpi fftw layout. - * */ -template <class rnumber> -int copy_complex_array( - field_descriptor<rnumber> *fi, - rnumber (*ai)[2], - field_descriptor<rnumber> *fo, - rnumber (*ao)[2], - int howmany=1); - -template <class rnumber> -int clip_zero_padding( - field_descriptor<rnumber> *f, - rnumber *a, - int howmany=1); - -/* function to get pair of descriptors for real and Fourier space - * arrays used with fftw. - * the n0, n1, n2 correspond to the real space data WITHOUT the zero - * padding that FFTW needs. - * IMPORTANT: the real space array must be allocated with - * 2*fc->local_size, and then the zeros cleaned up before trying - * to write data. - * */ -template <class rnumber> -int get_descriptors_3D( - int n0, int n1, int n2, - field_descriptor<rnumber> **fr, - field_descriptor<rnumber> **fc); +extern std::map<std::string, unsigned> fftw_planner_string_to_flag; #endif//FFTW_TOOLS diff --git a/bfps/cpp/field.cpp b/bfps/cpp/field.cpp index b70825cb472316171e37e5150e09eba9d4b48eee..d5bc78a58fb84e28d9ffc2fc5cb6cd2517420fdf 100644 --- a/bfps/cpp/field.cpp +++ b/bfps/cpp/field.cpp @@ -23,6 +23,9 @@ **********************************************************************/ + +#define NDEBUG + #include <sys/stat.h> #include <cmath> #include <cstdlib> diff --git a/bfps/cpp/field_layout.cpp b/bfps/cpp/field_layout.cpp index 908904991d5d95b0c89ba679b402d8d5727b8c85..61dd3f2ac1094e5f93a375fa295cffab669b34f9 100644 --- a/bfps/cpp/field_layout.cpp +++ b/bfps/cpp/field_layout.cpp @@ -23,10 +23,15 @@ **********************************************************************/ + +#define NDEBUG + #include <cassert> #include "field_layout.hpp" #include "scope_timer.hpp" + + template <field_components fc> field_layout<fc>::field_layout( const hsize_t *SIZES, diff --git a/bfps/cpp/fluid_solver.cpp b/bfps/cpp/fluid_solver.cpp deleted file mode 100644 index 7ec0c978102f2d0cad00d57d837fad6c141f91fb..0000000000000000000000000000000000000000 --- a/bfps/cpp/fluid_solver.cpp +++ /dev/null @@ -1,1057 +0,0 @@ -/********************************************************************** -* * -* Copyright 2015 Max Planck Institute * -* for Dynamics and Self-Organization * -* * -* This file is part of bfps. * -* * -* bfps is free software: you can redistribute it and/or modify * -* it under the terms of the GNU General Public License as published * -* by the Free Software Foundation, either version 3 of the License, * -* or (at your option) any later version. * -* * -* bfps is distributed in the hope that it will be useful, * -* but WITHOUT ANY WARRANTY; without even the implied warranty of * -* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * -* GNU General Public License for more details. * -* * -* You should have received a copy of the GNU General Public License * -* along with bfps. If not, see <http://www.gnu.org/licenses/> * -* * -* Contact: Cristian.Lalescu@ds.mpg.de * -* * -**********************************************************************/ - - - -//#define NDEBUG - -#include <cassert> -#include <cmath> -#include <cstring> -#include "fluid_solver.hpp" -#include "fftw_tools.hpp" -#include "scope_timer.hpp" -#include "shared_array.hpp" - - -template <class rnumber> -void fluid_solver<rnumber>::impose_zero_modes() -{ - if (this->cd->myrank == this->cd->rank[0]) - { - std::fill_n((rnumber*)(this->cu), 6, 0.0); - std::fill_n((rnumber*)(this->cv[0]), 6, 0.0); - std::fill_n((rnumber*)(this->cv[1]), 6, 0.0); - std::fill_n((rnumber*)(this->cv[2]), 6, 0.0); - } -} -/*****************************************************************************/ -/* macro for specializations to numeric types compatible with FFTW */ - -template <class rnumber> -fluid_solver<rnumber>::fluid_solver( - const char *NAME, - int nx, - int ny, - int nz, - double DKX, - double DKY, - double DKZ, - int DEALIAS_TYPE, - unsigned FFTW_PLAN_RIGOR) : fluid_solver_base<rnumber>( - NAME, - nx , ny , nz, - DKX, DKY, DKZ, - DEALIAS_TYPE, - FFTW_PLAN_RIGOR) -{ - TIMEZONE("fluid_solver::fluid_solver"); - this->cvorticity = fftw_interface<rnumber>::alloc_complex(this->cd->local_size); - this->cvelocity = fftw_interface<rnumber>::alloc_complex(this->cd->local_size); - this->rvorticity = fftw_interface<rnumber>::alloc_real(this->cd->local_size*2); - /*this->rvelocity = (rnumber*)(this->cvelocity);*/ - this->rvelocity = fftw_interface<rnumber>::alloc_real(this->cd->local_size*2); - - this->ru = this->rvelocity; - this->cu = this->cvelocity; - - this->rv[0] = this->rvorticity; - this->rv[3] = this->rvorticity; - this->cv[0] = this->cvorticity; - this->cv[3] = this->cvorticity; - - this->cv[1] = fftw_interface<rnumber>::alloc_complex(this->cd->local_size); - this->cv[2] = this->cv[1]; - this->rv[1] = fftw_interface<rnumber>::alloc_real(this->cd->local_size*2); - this->rv[2] = this->rv[1]; - - this->c2r_vorticity = new typename fftw_interface<rnumber>::many_plan; - this->r2c_vorticity = new typename fftw_interface<rnumber>::many_plan; - this->c2r_velocity = new typename fftw_interface<rnumber>::many_plan; - this->r2c_velocity = new typename fftw_interface<rnumber>::many_plan; - - ptrdiff_t sizes[] = {nz, - ny, - nx}; - - *this->c2r_vorticity = fftw_interface<rnumber>::mpi_plan_many_dft_c2r( - 3, sizes, 3, FFTW_MPI_DEFAULT_BLOCK, FFTW_MPI_DEFAULT_BLOCK, - this->cvorticity, this->rvorticity, - MPI_COMM_WORLD, this->fftw_plan_rigor | FFTW_MPI_TRANSPOSED_IN); - - *this->r2c_vorticity = fftw_interface<rnumber>::mpi_plan_many_dft_r2c( - 3, sizes, 3, FFTW_MPI_DEFAULT_BLOCK, FFTW_MPI_DEFAULT_BLOCK, - this->rvorticity, this->cvorticity, - MPI_COMM_WORLD, this->fftw_plan_rigor | FFTW_MPI_TRANSPOSED_OUT); - - *this->c2r_velocity = fftw_interface<rnumber>::mpi_plan_many_dft_c2r( - 3, sizes, 3, FFTW_MPI_DEFAULT_BLOCK, FFTW_MPI_DEFAULT_BLOCK, - this->cvelocity, this->rvelocity, - MPI_COMM_WORLD, this->fftw_plan_rigor | FFTW_MPI_TRANSPOSED_IN); - - *this->r2c_velocity = fftw_interface<rnumber>::mpi_plan_many_dft_r2c( - 3, sizes, 3, FFTW_MPI_DEFAULT_BLOCK, FFTW_MPI_DEFAULT_BLOCK, - this->rvelocity, this->cvelocity, - MPI_COMM_WORLD, this->fftw_plan_rigor | FFTW_MPI_TRANSPOSED_OUT); - - this->uc2r = this->c2r_velocity; - this->ur2c = this->r2c_velocity; - this->vc2r[0] = this->c2r_vorticity; - this->vr2c[0] = this->r2c_vorticity; - - this->vc2r[1] = new typename fftw_interface<rnumber>::many_plan; - this->vr2c[1] = new typename fftw_interface<rnumber>::many_plan; - this->vc2r[2] = new typename fftw_interface<rnumber>::many_plan; - this->vr2c[2] = new typename fftw_interface<rnumber>::many_plan; - - *(this->vc2r[1]) = fftw_interface<rnumber>::mpi_plan_many_dft_c2r( - 3, sizes, 3, FFTW_MPI_DEFAULT_BLOCK, FFTW_MPI_DEFAULT_BLOCK, - this->cv[1], this->rv[1], - MPI_COMM_WORLD, this->fftw_plan_rigor | FFTW_MPI_TRANSPOSED_IN); - - *this->vc2r[2] = fftw_interface<rnumber>::mpi_plan_many_dft_c2r( - 3, sizes, 3, FFTW_MPI_DEFAULT_BLOCK, FFTW_MPI_DEFAULT_BLOCK, - this->cv[2], this->rv[2], - MPI_COMM_WORLD, this->fftw_plan_rigor | FFTW_MPI_TRANSPOSED_IN); - - *this->vr2c[1] = fftw_interface<rnumber>::mpi_plan_many_dft_r2c( - 3, sizes, 3, FFTW_MPI_DEFAULT_BLOCK, FFTW_MPI_DEFAULT_BLOCK, - this->rv[1], this->cv[1], - MPI_COMM_WORLD, this->fftw_plan_rigor | FFTW_MPI_TRANSPOSED_OUT); - - *this->vr2c[2] = fftw_interface<rnumber>::mpi_plan_many_dft_r2c( - 3, sizes, 3, FFTW_MPI_DEFAULT_BLOCK, FFTW_MPI_DEFAULT_BLOCK, - this->rv[2], this->cv[2], - MPI_COMM_WORLD, this->fftw_plan_rigor | FFTW_MPI_TRANSPOSED_OUT); - - /* ``physical'' parameters etc, initialized here just in case */ - - this->nu = 0.1; - this->fmode = 1; - this->famplitude = 1.0; - this->fk0 = 0; - this->fk1 = 3.0; - /* initialization of fields must be done AFTER planning */ - std::fill_n((rnumber*)this->cvorticity, this->cd->local_size*2, 0.0); - std::fill_n((rnumber*)this->cvelocity, this->cd->local_size*2, 0.0); - std::fill_n(this->rvelocity, this->cd->local_size*2, 0.0); - std::fill_n(this->rvorticity, this->cd->local_size*2, 0.0); - std::fill_n((rnumber*)this->cv[1], this->cd->local_size*2, 0.0); - std::fill_n(this->rv[1], this->cd->local_size*2, 0.0); - std::fill_n(this->rv[2], this->cd->local_size*2, 0.0); -} - -template <class rnumber> -fluid_solver<rnumber>::~fluid_solver() -{ - fftw_interface<rnumber>::destroy_plan(*this->c2r_vorticity); - fftw_interface<rnumber>::destroy_plan(*this->r2c_vorticity); - fftw_interface<rnumber>::destroy_plan(*this->c2r_velocity ); - fftw_interface<rnumber>::destroy_plan(*this->r2c_velocity ); - fftw_interface<rnumber>::destroy_plan(*this->vc2r[1]); - fftw_interface<rnumber>::destroy_plan(*this->vr2c[1]); - fftw_interface<rnumber>::destroy_plan(*this->vc2r[2]); - fftw_interface<rnumber>::destroy_plan(*this->vr2c[2]); - - delete this->c2r_vorticity; - delete this->r2c_vorticity; - delete this->c2r_velocity ; - delete this->r2c_velocity ; - delete this->vc2r[1]; - delete this->vr2c[1]; - delete this->vc2r[2]; - delete this->vr2c[2]; - - fftw_interface<rnumber>::free(this->cv[1]); - fftw_interface<rnumber>::free(this->rv[1]); - fftw_interface<rnumber>::free(this->cvorticity); - fftw_interface<rnumber>::free(this->rvorticity); - fftw_interface<rnumber>::free(this->cvelocity); - fftw_interface<rnumber>::free(this->rvelocity); -} - -template <class rnumber> -void fluid_solver<rnumber>::compute_vorticity() -{ - TIMEZONE("fluid_solver::compute_vorticity"); - CLOOP_K2( - this, - [&](ptrdiff_t cindex, ptrdiff_t xindex, ptrdiff_t yindex, ptrdiff_t zindex, double k2){ - // cindex indexing is thread safe (and tindex too) + it is a write - ptrdiff_t tindex = 3*cindex; - if (k2 <= this->kM2) - { - this->cvorticity[tindex+0][0] = -(this->ky[yindex]*this->cu[tindex+2][1] - this->kz[zindex]*this->cu[tindex+1][1]); - this->cvorticity[tindex+1][0] = -(this->kz[zindex]*this->cu[tindex+0][1] - this->kx[xindex]*this->cu[tindex+2][1]); - this->cvorticity[tindex+2][0] = -(this->kx[xindex]*this->cu[tindex+1][1] - this->ky[yindex]*this->cu[tindex+0][1]); - this->cvorticity[tindex+0][1] = (this->ky[yindex]*this->cu[tindex+2][0] - this->kz[zindex]*this->cu[tindex+1][0]); - this->cvorticity[tindex+1][1] = (this->kz[zindex]*this->cu[tindex+0][0] - this->kx[xindex]*this->cu[tindex+2][0]); - this->cvorticity[tindex+2][1] = (this->kx[xindex]*this->cu[tindex+1][0] - this->ky[yindex]*this->cu[tindex+0][0]); - } - else{ - std::fill_n((rnumber*)(this->cvorticity+tindex), 6, 0.0); - } - } - ); - this->symmetrize(this->cvorticity, 3); -} - -template <class rnumber> -void fluid_solver<rnumber>::compute_velocity(rnumber (*__restrict__ vorticity)[2]) -{ - TIMEZONE("fluid_solver::compute_velocity"); - CLOOP_K2( - this, - [&](ptrdiff_t cindex, ptrdiff_t xindex, ptrdiff_t yindex, ptrdiff_t zindex, double k2){ - // cindex indexing is thread safe (and tindex too) + it is a write - ptrdiff_t tindex = 3*cindex; - if (k2 <= this->kM2 && k2 > 0) - { - this->cu[tindex+0][0] = -(this->ky[yindex]*vorticity[tindex+2][1] - this->kz[zindex]*vorticity[tindex+1][1]) / k2; - this->cu[tindex+1][0] = -(this->kz[zindex]*vorticity[tindex+0][1] - this->kx[xindex]*vorticity[tindex+2][1]) / k2; - this->cu[tindex+2][0] = -(this->kx[xindex]*vorticity[tindex+1][1] - this->ky[yindex]*vorticity[tindex+0][1]) / k2; - this->cu[tindex+0][1] = (this->ky[yindex]*vorticity[tindex+2][0] - this->kz[zindex]*vorticity[tindex+1][0]) / k2; - this->cu[tindex+1][1] = (this->kz[zindex]*vorticity[tindex+0][0] - this->kx[xindex]*vorticity[tindex+2][0]) / k2; - this->cu[tindex+2][1] = (this->kx[xindex]*vorticity[tindex+1][0] - this->ky[yindex]*vorticity[tindex+0][0]) / k2; - } - else - std::fill_n((rnumber*)(this->cu+tindex), 6, 0.0); - } - ); - /*this->symmetrize(this->cu, 3);*/ -} - -template <class rnumber> -void fluid_solver<rnumber>::ift_velocity() -{ - TIMEZONE("fluid_solver::ift_velocity"); - fftw_interface<rnumber>::execute(*(this->c2r_velocity )); -} - -template <class rnumber> -void fluid_solver<rnumber>::ift_vorticity() -{ - TIMEZONE("fluid_solver::ift_vorticity"); - std::fill_n(this->rvorticity, this->cd->local_size*2, 0.0); - fftw_interface<rnumber>::execute(*(this->c2r_vorticity )); -} - -template <class rnumber> -void fluid_solver<rnumber>::dft_velocity() -{ - TIMEZONE("fluid_solver::dft_velocity"); - fftw_interface<rnumber>::execute(*(this->r2c_velocity )); -} - -template <class rnumber> -void fluid_solver<rnumber>::dft_vorticity() -{ - TIMEZONE("fluid_solver::dft_vorticity"); - std::fill_n((rnumber*)this->cvorticity, this->cd->local_size*2, 0.0); - fftw_interface<rnumber>::execute(*(this->r2c_vorticity )); -} - -template <class rnumber> -void fluid_solver<rnumber>::add_forcing( - rnumber (*__restrict__ acc_field)[2], rnumber (*__restrict__ vort_field)[2], rnumber factor) -{ - TIMEZONE("fluid_solver::add_forcing"); - if (strcmp(this->forcing_type, "none") == 0) - return; - if (strcmp(this->forcing_type, "Kolmogorov") == 0) - { - ptrdiff_t cindex; - if (this->cd->myrank == this->cd->rank[this->fmode]) - { - cindex = ((this->fmode - this->cd->starts[0]) * this->cd->sizes[1])*this->cd->sizes[2]*3; - acc_field[cindex+2][0] -= this->famplitude*factor/2; - } - if (this->cd->myrank == this->cd->rank[this->cd->sizes[0] - this->fmode]) - { - cindex = ((this->cd->sizes[0] - this->fmode - this->cd->starts[0]) * this->cd->sizes[1])*this->cd->sizes[2]*3; - acc_field[cindex+2][0] -= this->famplitude*factor/2; - } - return; - } - if (strcmp(this->forcing_type, "linear") == 0) - { - CLOOP( - this, - [&](ptrdiff_t cindex, ptrdiff_t xindex, ptrdiff_t yindex, ptrdiff_t zindex){ - // cindex indexing is thread safe (and cindex*3+c too) - double knorm = sqrt(this->kx[xindex]*this->kx[xindex] + - this->ky[yindex]*this->ky[yindex] + - this->kz[zindex]*this->kz[zindex]); - if ((this->fk0 <= knorm) && (this->fk1 >= knorm)) - for (int c=0; c<3; c++) - for (int i=0; i<2; i++) - acc_field[cindex*3+c][i] += this->famplitude*vort_field[cindex*3+c][i]*factor; - } - ); - return; - } -} - -template <class rnumber> -void fluid_solver<rnumber>::omega_nonlin( - int src) -{ - TIMEZONE("fluid_solver::omega_nonlin"); - assert(src >= 0 && src < 3); - this->compute_velocity(this->cv[src]); - /* get fields from Fourier space to real space */ - { - TIMEZONE("fluid_solver::omega_nonlin::fftw"); - fftw_interface<rnumber>::execute(*(this->c2r_velocity )); - fftw_interface<rnumber>::execute(*(this->vc2r[src])); - } - /* compute cross product $u \times \omega$, and normalize */ - { - TIMEZONE("fluid_solver::omega_nonlin::RLOOP"); - RLOOP ( - this, - [&](ptrdiff_t rindex, ptrdiff_t /*xindex*/, ptrdiff_t /*yindex*/, ptrdiff_t /*zindex*/){ - ptrdiff_t tindex = 3*rindex; - rnumber tmp[3][2]; - for (int cc=0; cc<3; cc++) - tmp[cc][0] = (this->ru[tindex+(cc+1)%3]*this->rv[src][tindex+(cc+2)%3] - - this->ru[tindex+(cc+2)%3]*this->rv[src][tindex+(cc+1)%3]); - // Access to rindex is thread safe so there is no overlap between threads - for (int cc=0; cc<3; cc++) - this->ru[(3*rindex)+cc] = tmp[cc][0] / this->normalization_factor; - } - ); - } - /* go back to Fourier space */ - this->clean_up_real_space(this->ru, 3); - { - TIMEZONE("fluid_solver::omega_nonlin::fftw-2"); - fftw_interface<rnumber>::execute(*(this->r2c_velocity )); - } - this->dealias(this->cu, 3); - /* $\imath k \times Fourier(u \times \omega)$ */ - { - TIMEZONE("fluid_solver::omega_nonlin::CLOOP"); - CLOOP( - this, - [&](ptrdiff_t cindex, ptrdiff_t xindex, ptrdiff_t yindex, ptrdiff_t zindex){ - rnumber tmp[3][2]; - ptrdiff_t tindex = 3*cindex; - { - tmp[0][0] = -(this->ky[yindex]*this->cu[tindex+2][1] - this->kz[zindex]*this->cu[tindex+1][1]); - tmp[1][0] = -(this->kz[zindex]*this->cu[tindex+0][1] - this->kx[xindex]*this->cu[tindex+2][1]); - tmp[2][0] = -(this->kx[xindex]*this->cu[tindex+1][1] - this->ky[yindex]*this->cu[tindex+0][1]); - tmp[0][1] = (this->ky[yindex]*this->cu[tindex+2][0] - this->kz[zindex]*this->cu[tindex+1][0]); - tmp[1][1] = (this->kz[zindex]*this->cu[tindex+0][0] - this->kx[xindex]*this->cu[tindex+2][0]); - tmp[2][1] = (this->kx[xindex]*this->cu[tindex+1][0] - this->ky[yindex]*this->cu[tindex+0][0]); - } - // cindex indexing is thread safe so it is 3*cindex so there is no overlap between threads - for (int cc=0; cc<3; cc++) - for (int i=0; i<2; i++) - this->cu[tindex+cc][i] = tmp[cc][i]; - } - ); - } - { - TIMEZONE("fluid_solver::omega_nonlin::add_forcing"); - this->add_forcing(this->cu, this->cv[src], 1.0); - } - { - TIMEZONE("fluid_solver::omega_nonlin::force_divfree"); - this->force_divfree(this->cu); - } -} - -template <class rnumber> -void fluid_solver<rnumber>::step(double dt) -{ - TIMEZONE("fluid_solver::step"); - std::fill_n((rnumber*)this->cv[1], this->cd->local_size*2, 0.0); - this->omega_nonlin(0); - CLOOP_K2( - this, - [&](ptrdiff_t cindex, ptrdiff_t /*xindex*/, ptrdiff_t /*yindex*/, ptrdiff_t /*zindex*/, double k2){ - if (k2 <= this->kM2) - { - double factor0 = exp(-this->nu * k2 * dt); - // cindex indexing is thread safe so there is no overlap between threads - for (int cc=0; cc<3; cc++) for (int i=0; i<2; i++) - this->cv[1][3*cindex+cc][i] = (this->cv[0][3*cindex+cc][i] + - dt*this->cu[3*cindex+cc][i])*factor0; - } - } - ); - - this->omega_nonlin(1); - CLOOP_K2( - this, - [&](ptrdiff_t cindex, ptrdiff_t /*xindex*/, ptrdiff_t /*yindex*/, ptrdiff_t /*zindex*/, double k2){ - if (k2 <= this->kM2) - { - double factor0 = exp(-this->nu * k2 * dt/2); - double factor1 = exp( this->nu * k2 * dt/2); - // cindex indexing is thread safe so there is no overlap between threads - for (int cc=0; cc<3; cc++) for (int i=0; i<2; i++) - this->cv[2][3*cindex+cc][i] = (3*this->cv[0][3*cindex+cc][i]*factor0 + - (this->cv[1][3*cindex+cc][i] + - dt*this->cu[3*cindex+cc][i])*factor1)*0.25; - } - } - ); - - this->omega_nonlin(2); - CLOOP_K2( - this, - [&](ptrdiff_t cindex, ptrdiff_t /*xindex*/, ptrdiff_t /*yindex*/, ptrdiff_t /*zindex*/, double k2){ - if (k2 <= this->kM2) - { - double factor0 = exp(-this->nu * k2 * dt * 0.5); - // cindex indexing is thread safe so there is no overlap between threads - for (int cc=0; cc<3; cc++) for (int i=0; i<2; i++) - this->cv[3][3*cindex+cc][i] = (this->cv[0][3*cindex+cc][i]*factor0 + - 2*(this->cv[2][3*cindex+cc][i] + - dt*this->cu[3*cindex+cc][i]))*factor0/3; - } - } - ); - - this->force_divfree(this->cvorticity); - this->symmetrize(this->cvorticity, 3); - this->iteration++; -} - -template <class rnumber> -int fluid_solver<rnumber>::read(char field, char representation) -{ - TIMEZONE("fluid_solver::read"); - char fname[512]; - int read_result; - if (field == 'v') - { - if (representation == 'c') - { - this->fill_up_filename("cvorticity", fname); - read_result = this->cd->read(fname, (void*)this->cvorticity); - if (read_result != EXIT_SUCCESS) - return read_result; - } - if (representation == 'r') - { - read_result = this->read_base("rvorticity", this->rvorticity); - if (read_result != EXIT_SUCCESS) - return read_result; - else - fftw_interface<rnumber>::execute(*(this->r2c_vorticity )); - } - this->low_pass_Fourier(this->cvorticity, 3, this->kM); - this->force_divfree(this->cvorticity); - this->symmetrize(this->cvorticity, 3); - return EXIT_SUCCESS; - } - if ((field == 'u') && (representation == 'c')) - { - read_result = this->read_base("cvelocity", this->cvelocity); - this->low_pass_Fourier(this->cvelocity, 3, this->kM); - this->force_divfree(this->cvorticity); - this->symmetrize(this->cvorticity, 3); - return read_result; - } - if ((field == 'u') && (representation == 'r')) - return this->read_base("rvelocity", this->rvelocity); - return EXIT_FAILURE; -} - -template <class rnumber> -int fluid_solver<rnumber>::write(char field, char representation) -{ - TIMEZONE("fluid_solver::write"); - char fname[512]; - if ((field == 'v') && (representation == 'c')) - { - this->fill_up_filename("cvorticity", fname); - return this->cd->write(fname, (void*)this->cvorticity); - } - if ((field == 'v') && (representation == 'r')) - { - fftw_interface<rnumber>::execute(*(this->c2r_vorticity )); - clip_zero_padding<rnumber>(this->rd, this->rvorticity, 3); - this->fill_up_filename("rvorticity", fname); - return this->rd->write(fname, this->rvorticity); - } - this->compute_velocity(this->cvorticity); - if ((field == 'u') && (representation == 'c')) - { - this->fill_up_filename("cvelocity", fname); - return this->cd->write(fname, this->cvelocity); - } - if ((field == 'u') && (representation == 'r')) - { - this->ift_velocity(); - clip_zero_padding<rnumber>(this->rd, this->rvelocity, 3); - this->fill_up_filename("rvelocity", fname); - return this->rd->write(fname, this->rvelocity); - } - return EXIT_FAILURE; -} - -template <class rnumber> -int fluid_solver<rnumber>::write_rTrS2() -{ - TIMEZONE("fluid_solver::write_rTrS2"); - char fname[512]; - this->fill_up_filename("rTrS2", fname); - typename fftw_interface<rnumber>::complex *ca; - rnumber *ra; - ca = fftw_interface<rnumber>::alloc_complex(this->cd->local_size*3); - ra = (rnumber*)(ca); - this->compute_velocity(this->cvorticity); - this->compute_vector_gradient(ca, this->cvelocity); - for (int cc=0; cc<3; cc++) - { - std::copy( - (rnumber*)(ca + cc*this->cd->local_size), - (rnumber*)(ca + (cc+1)*this->cd->local_size), - (rnumber*)this->cv[1]); - fftw_interface<rnumber>::execute(*(this->vc2r[1])); - std::copy( - this->rv[1], - this->rv[1] + this->cd->local_size*2, - ra + cc*this->cd->local_size*2); - } - /* velocity gradient is now stored, in real space, in ra */ - rnumber *dx_u, *dy_u, *dz_u; - dx_u = ra; - dy_u = ra + 2*this->cd->local_size; - dz_u = ra + 4*this->cd->local_size; - rnumber *trS2 = fftw_interface<rnumber>::alloc_real((this->cd->local_size/3)*2); - shared_array<double> average_local(1, [&](double* data){ - data[0] = 0; - }); - - RLOOP( - this, - [&](ptrdiff_t rindex, ptrdiff_t /*xindex*/, ptrdiff_t /*yindex*/, ptrdiff_t /*zindex*/){ - rnumber AxxAxx; - rnumber AyyAyy; - rnumber AzzAzz; - rnumber Sxy; - rnumber Syz; - rnumber Szx; - ptrdiff_t tindex = 3*rindex; - AxxAxx = dx_u[tindex+0]*dx_u[tindex+0]; - AyyAyy = dy_u[tindex+1]*dy_u[tindex+1]; - AzzAzz = dz_u[tindex+2]*dz_u[tindex+2]; - Sxy = dx_u[tindex+1]+dy_u[tindex+0]; - Syz = dy_u[tindex+2]+dz_u[tindex+1]; - Szx = dz_u[tindex+0]+dx_u[tindex+2]; - // rindex is thread safe + No overlap between thread it is a write - trS2[rindex] = (AxxAxx + AyyAyy + AzzAzz + - (Sxy*Sxy + Syz*Syz + Szx*Szx)/2); - average_local.getMine()[0] += trS2[rindex]; - } - ); - average_local.mergeParallel(); - double average; - MPI_Allreduce( - average_local.getMasterData(), - &average, - 1, - MPI_DOUBLE, MPI_SUM, this->cd->comm); - DEBUG_MSG("average TrS2 is %g\n", average); - fftw_interface<rnumber>::free(ca); - /* output goes here */ - int ntmp[3]; - ntmp[0] = this->rd->sizes[0]; - ntmp[1] = this->rd->sizes[1]; - ntmp[2] = this->rd->sizes[2]; - field_descriptor<rnumber> *scalar_descriptor = new field_descriptor<rnumber>(3, ntmp, mpi_real_type<rnumber>::real(), this->cd->comm); - clip_zero_padding<rnumber>(scalar_descriptor, trS2, 1); - int return_value = scalar_descriptor->write(fname, trS2); - delete scalar_descriptor; - fftw_interface<rnumber>::free(trS2); - return return_value; -} - -template <class rnumber> -int fluid_solver<rnumber>::write_renstrophy() -{ - TIMEZONE("fluid_solver::write_renstrophy"); - char fname[512]; - this->fill_up_filename("renstrophy", fname); - rnumber *enstrophy = fftw_interface<rnumber>::alloc_real((this->cd->local_size/3)*2); - this->ift_vorticity(); - shared_array<double> average_local(1, [&](double* data){ - data[0] = 0; - }); - - RLOOP( - this, - [&](ptrdiff_t rindex, ptrdiff_t /*xindex*/, ptrdiff_t /*yindex*/, ptrdiff_t /*zindex*/){ - ptrdiff_t tindex = 3*rindex; - // rindex indexing is thread safe so there is no overlap between threads - enstrophy[rindex] = ( - this->rvorticity[tindex+0]*this->rvorticity[tindex+0] + - this->rvorticity[tindex+1]*this->rvorticity[tindex+1] + - this->rvorticity[tindex+2]*this->rvorticity[tindex+2] - )/2; - average_local.getMine()[0] += enstrophy[rindex]; - } - ); - average_local.mergeParallel(); - double average; - MPI_Allreduce( - average_local.getMasterData(), - &average, - 1, - MPI_DOUBLE, MPI_SUM, this->cd->comm); - DEBUG_MSG("average enstrophy is %g\n", average); - /* output goes here */ - int ntmp[3]; - ntmp[0] = this->rd->sizes[0]; - ntmp[1] = this->rd->sizes[1]; - ntmp[2] = this->rd->sizes[2]; - field_descriptor<rnumber> *scalar_descriptor = new field_descriptor<rnumber>(3, ntmp, mpi_real_type<rnumber>::real(), this->cd->comm); - clip_zero_padding<rnumber>(scalar_descriptor, enstrophy, 1); - int return_value = scalar_descriptor->write(fname, enstrophy); - delete scalar_descriptor; - fftw_interface<rnumber>::free(enstrophy); - return return_value; -} - -template <class rnumber> -void fluid_solver<rnumber>::compute_pressure(rnumber (*__restrict__ pressure)[2]) -{ - TIMEZONE("fluid_solver::compute_pressure"); - /* assume velocity is already in real space representation */ - /* diagonal terms 11 22 33 */ - RLOOP ( - this, - [&](ptrdiff_t rindex, ptrdiff_t /*xindex*/, ptrdiff_t /*yindex*/, ptrdiff_t /*zindex*/){ - // rindex indexing is thread safe so there is no overlap between threads - ptrdiff_t tindex = 3*rindex; - for (int cc=0; cc<3; cc++) - this->rv[1][tindex+cc] = this->ru[tindex+cc]*this->ru[tindex+cc]; - } - ); - this->clean_up_real_space(this->rv[1], 3); - { - TIMEZONE("fftw_interface<rnumber>::execute"); - fftw_interface<rnumber>::execute(*(this->vr2c[1])); - } - this->dealias(this->cv[1], 3); - CLOOP_K2( - this, - [&](ptrdiff_t cindex, ptrdiff_t xindex, ptrdiff_t yindex, ptrdiff_t zindex, double k2){ - if (k2 <= this->kM2 && k2 > 0) - { - // cindex indexing is thread safe so there is no overlap between threads - ptrdiff_t tindex = 3*cindex; - for (int i=0; i<2; i++) - { - pressure[cindex][i] = -(this->kx[xindex]*this->kx[xindex]*this->cv[1][tindex+0][i] + - this->ky[yindex]*this->ky[yindex]*this->cv[1][tindex+1][i] + - this->kz[zindex]*this->kz[zindex]*this->cv[1][tindex+2][i]); - } - } - else - std::fill_n((rnumber*)(pressure+cindex), 2, 0.0); - } - ); - /* off-diagonal terms 12 23 31 */ - RLOOP ( - this, - [&](ptrdiff_t rindex, ptrdiff_t /*xindex*/, ptrdiff_t /*yindex*/, ptrdiff_t /*zindex*/){ - // rindex indexing is thread safe so there is no overlap between threads - ptrdiff_t tindex = 3*rindex; - for (int cc=0; cc<3; cc++) - this->rv[1][tindex+cc] = this->ru[tindex+cc]*this->ru[tindex+(cc+1)%3]; - } - ); - this->clean_up_real_space(this->rv[1], 3); - { - TIMEZONE("fftw_interface<rnumber>::execute"); - fftw_interface<rnumber>::execute(*(this->vr2c[1])); - } - this->dealias(this->cv[1], 3); - CLOOP_K2( - this, - [&](ptrdiff_t cindex, ptrdiff_t xindex, ptrdiff_t yindex, ptrdiff_t zindex, double k2){ - if (k2 <= this->kM2 && k2 > 0) - { - // cindex indexing is thread safe so there is no overlap between threads - ptrdiff_t tindex = 3*cindex; - for (int i=0; i<2; i++) - { - pressure[cindex][i] -= 2*(this->kx[xindex]*this->ky[yindex]*this->cv[1][tindex+0][i] + - this->ky[yindex]*this->kz[zindex]*this->cv[1][tindex+1][i] + - this->kz[zindex]*this->kx[xindex]*this->cv[1][tindex+2][i]); - pressure[cindex][i] /= this->normalization_factor*k2; - } - } - } - ); -} - -template <class rnumber> -void fluid_solver<rnumber>::compute_gradient_statistics( - rnumber (*__restrict__ vec)[2], -double *gradu_moments, -double *trS2QR_moments, -ptrdiff_t *gradu_hist, -ptrdiff_t *trS2QR_hist, -ptrdiff_t *QR2D_hist, -double trS2QR_max_estimates[], -double gradu_max_estimates[], -int nbins, -int QR2D_nbins) -{ - TIMEZONE("fluid_solver::compute_gradient_statistics"); - typename fftw_interface<rnumber>::complex *ca; - rnumber *ra; - ca = fftw_interface<rnumber>::alloc_complex(this->cd->local_size*3); - ra = (rnumber*)(ca); - this->compute_vector_gradient(ca, vec); - for (int cc=0; cc<3; cc++) - { - std::copy( - (rnumber*)(ca + cc*this->cd->local_size), - (rnumber*)(ca + (cc+1)*this->cd->local_size), - (rnumber*)this->cv[1]); - fftw_interface<rnumber>::execute(*(this->vc2r[1])); - std::copy( - this->rv[1], - this->rv[1] + this->cd->local_size*2, - ra + cc*this->cd->local_size*2); - } - /* velocity gradient is now stored, in real space, in ra */ - std::fill_n(this->rv[1], 2*this->cd->local_size, 0.0); - rnumber *dx_u, *dy_u, *dz_u; - dx_u = ra; - dy_u = ra + 2*this->cd->local_size; - dz_u = ra + 4*this->cd->local_size; - double binsize[2]; - double tmp_max_estimate[3]; - tmp_max_estimate[0] = trS2QR_max_estimates[0]; - tmp_max_estimate[1] = trS2QR_max_estimates[1]; - tmp_max_estimate[2] = trS2QR_max_estimates[2]; - binsize[0] = 2*tmp_max_estimate[2] / QR2D_nbins; - binsize[1] = 2*tmp_max_estimate[1] / QR2D_nbins; - ptrdiff_t *local_hist = new ptrdiff_t[QR2D_nbins*QR2D_nbins]; - std::fill_n(local_hist, QR2D_nbins*QR2D_nbins, 0); - RLOOP( - this, - [&](ptrdiff_t rindex, ptrdiff_t /*xindex*/, ptrdiff_t /*yindex*/, ptrdiff_t /*zindex*/){ - rnumber AxxAxx; - rnumber AyyAyy; - rnumber AzzAzz; - rnumber AxyAyx; - rnumber AyzAzy; - rnumber AzxAxz; - rnumber Sxy; - rnumber Syz; - rnumber Szx; - // rindex indexing is thread safe so there is no overlap between threads - // tindex[0:2] is thread safe too - ptrdiff_t tindex = 3*rindex; - AxxAxx = dx_u[tindex+0]*dx_u[tindex+0]; - AyyAyy = dy_u[tindex+1]*dy_u[tindex+1]; - AzzAzz = dz_u[tindex+2]*dz_u[tindex+2]; - AxyAyx = dx_u[tindex+1]*dy_u[tindex+0]; - AyzAzy = dy_u[tindex+2]*dz_u[tindex+1]; - AzxAxz = dz_u[tindex+0]*dx_u[tindex+2]; - this->rv[1][tindex+1] = - (AxxAxx + AyyAyy + AzzAzz)/2 - AxyAyx - AyzAzy - AzxAxz; - this->rv[1][tindex+2] = - (dx_u[tindex+0]*(AxxAxx/3 + AxyAyx + AzxAxz) + - dy_u[tindex+1]*(AyyAyy/3 + AxyAyx + AyzAzy) + - dz_u[tindex+2]*(AzzAzz/3 + AzxAxz + AyzAzy) + - dx_u[tindex+1]*dy_u[tindex+2]*dz_u[tindex+0] + - dx_u[tindex+2]*dy_u[tindex+0]*dz_u[tindex+1]); - int bin0 = int(floor((this->rv[1][tindex+2] + tmp_max_estimate[2]) / binsize[0])); - int bin1 = int(floor((this->rv[1][tindex+1] + tmp_max_estimate[1]) / binsize[1])); - if ((bin0 >= 0 && bin0 < QR2D_nbins) && - (bin1 >= 0 && bin1 < QR2D_nbins)) - local_hist[bin1*QR2D_nbins + bin0]++; - Sxy = dx_u[tindex+1]+dy_u[tindex+0]; - Syz = dy_u[tindex+2]+dz_u[tindex+1]; - Szx = dz_u[tindex+0]+dx_u[tindex+2]; - this->rv[1][tindex] = (AxxAxx + AyyAyy + AzzAzz + - (Sxy*Sxy + Syz*Syz + Szx*Szx)/2); - } - ); - MPI_Allreduce( - local_hist, - QR2D_hist, - QR2D_nbins * QR2D_nbins, - MPI_INT64_T, MPI_SUM, this->cd->comm); - delete[] local_hist; - this->compute_rspace_stats3( - this->rv[1], - trS2QR_moments, - trS2QR_hist, - tmp_max_estimate, - nbins); - double *tmp_moments = new double[10*3]; - ptrdiff_t *tmp_hist = new ptrdiff_t[nbins*3]; - for (int cc=0; cc<3; cc++) - { - tmp_max_estimate[0] = gradu_max_estimates[cc*3 + 0]; - tmp_max_estimate[1] = gradu_max_estimates[cc*3 + 1]; - tmp_max_estimate[2] = gradu_max_estimates[cc*3 + 2]; - this->compute_rspace_stats3( - dx_u + cc*2*this->cd->local_size, - tmp_moments, - tmp_hist, - tmp_max_estimate, - nbins); - for (int n = 0; n < 10; n++) - for (int i = 0; i < 3 ; i++) - { - gradu_moments[(n*3 + cc)*3 + i] = tmp_moments[n*3 + i]; - } - for (int n = 0; n < nbins; n++) - for (int i = 0; i < 3; i++) - { - gradu_hist[(n*3 + cc)*3 + i] = tmp_hist[n*3 + i]; - } - } - delete[] tmp_moments; - delete[] tmp_hist; - fftw_interface<rnumber>::free(ca); -} - -template <class rnumber> -void fluid_solver<rnumber>::compute_Lagrangian_acceleration(rnumber (*acceleration)[2]) -{ - TIMEZONE("fluid_solver::compute_Lagrangian_acceleration"); - typename fftw_interface<rnumber>::complex *pressure; - pressure = fftw_interface<rnumber>::alloc_complex(this->cd->local_size/3); - this->compute_velocity(this->cvorticity); - this->ift_velocity(); - this->compute_pressure(pressure); - this->compute_velocity(this->cvorticity); - std::fill_n((rnumber*)this->cv[1], 2*this->cd->local_size, 0.0); - CLOOP_K2( - this, - [&](ptrdiff_t cindex, ptrdiff_t xindex, ptrdiff_t yindex, ptrdiff_t zindex, double k2){ - if (k2 <= this->kM2) - { - // cindex indexing is thread safe so there is no overlap between threads - ptrdiff_t tindex = 3*cindex; - for (int cc=0; cc<3; cc++) - for (int i=0; i<2; i++) - this->cv[1][tindex+cc][i] = - this->nu*k2*this->cu[tindex+cc][i]; - if (strcmp(this->forcing_type, "linear") == 0) - { - double knorm = sqrt(k2); - if ((this->fk0 <= knorm) && - (this->fk1 >= knorm)) - for (int c=0; c<3; c++) - for (int i=0; i<2; i++) - this->cv[1][tindex+c][i] += this->famplitude*this->cu[tindex+c][i]; - } - this->cv[1][tindex+0][0] += this->kx[xindex]*pressure[cindex][1]; - this->cv[1][tindex+1][0] += this->ky[yindex]*pressure[cindex][1]; - this->cv[1][tindex+2][0] += this->kz[zindex]*pressure[cindex][1]; - this->cv[1][tindex+0][1] -= this->kx[xindex]*pressure[cindex][0]; - this->cv[1][tindex+1][1] -= this->ky[yindex]*pressure[cindex][0]; - this->cv[1][tindex+2][1] -= this->kz[zindex]*pressure[cindex][0]; - } - } - ); - std::copy( - (rnumber*)this->cv[1], - (rnumber*)(this->cv[1] + this->cd->local_size), - (rnumber*)acceleration); - fftw_interface<rnumber>::free(pressure); -} - -template <class rnumber> -void fluid_solver<rnumber>::compute_Eulerian_acceleration(rnumber (*__restrict__ acceleration)[2]) -{ - TIMEZONE("fluid_solver::compute_Eulerian_acceleration"); - std::fill_n((rnumber*)(acceleration), 2*this->cd->local_size, 0.0); - this->compute_velocity(this->cvorticity); - /* put in linear terms */ - CLOOP_K2( - this, - [&](ptrdiff_t cindex, ptrdiff_t /*xindex*/, ptrdiff_t /*yindex*/, ptrdiff_t /*zindex*/, double k2){ - if (k2 <= this->kM2) - { - // cindex indexing is thread safe so there is no overlap between threads - ptrdiff_t tindex = 3*cindex; - for (int cc=0; cc<3; cc++) - for (int i=0; i<2; i++) - acceleration[tindex+cc][i] = - this->nu*k2*this->cu[tindex+cc][i]; - if (strcmp(this->forcing_type, "linear") == 0) - { - double knorm = sqrt(k2); - if ((this->fk0 <= knorm) && - (this->fk1 >= knorm)) - { - for (int c=0; c<3; c++) - for (int i=0; i<2; i++) - acceleration[tindex+c][i] += this->famplitude*this->cu[tindex+c][i]; - } - } - } - } - ); - this->ift_velocity(); - /* compute uu */ - /* 11 22 33 */ - RLOOP ( - this, - [&](ptrdiff_t rindex, ptrdiff_t /*xindex*/, ptrdiff_t /*yindex*/, ptrdiff_t /*zindex*/){ - // cindex indexing is thread safe so there is no overlap between threads - ptrdiff_t tindex = 3*rindex; - for (int cc=0; cc<3; cc++) - this->rv[1][tindex+cc] = this->ru[tindex+cc]*this->ru[tindex+cc] / this->normalization_factor; - } - ); - this->clean_up_real_space(this->rv[1], 3); - fftw_interface<rnumber>::execute(*(this->vr2c[1])); - this->dealias(this->cv[1], 3); - CLOOP_K2( - this, - [&](ptrdiff_t cindex, ptrdiff_t xindex, ptrdiff_t yindex, ptrdiff_t zindex, double k2){ - if (k2 <= this->kM2) - { - // cindex indexing is thread safe so there is no overlap between threads - ptrdiff_t tindex = 3*cindex; - acceleration[tindex+0][0] += - this->kx[xindex]*this->cv[1][tindex+0][1]; - acceleration[tindex+0][1] += - -this->kx[xindex]*this->cv[1][tindex+0][0]; - acceleration[tindex+1][0] += - this->ky[yindex]*this->cv[1][tindex+1][1]; - acceleration[tindex+1][1] += - -this->ky[yindex]*this->cv[1][tindex+1][0]; - acceleration[tindex+2][0] += - this->kz[zindex]*this->cv[1][tindex+2][1]; - acceleration[tindex+2][1] += - -this->kz[zindex]*this->cv[1][tindex+2][0]; - } - } - ); - /* 12 23 31 */ - RLOOP ( - this, - [&](ptrdiff_t rindex, ptrdiff_t /*xindex*/, ptrdiff_t /*yindex*/, ptrdiff_t /*zindex*/){ - // cindex indexing is thread safe so there is no overlap between threads - ptrdiff_t tindex = 3*rindex; - for (int cc=0; cc<3; cc++) - this->rv[1][tindex+cc] = this->ru[tindex+cc]*this->ru[tindex+(cc+1)%3] / this->normalization_factor; - } - ); - this->clean_up_real_space(this->rv[1], 3); - fftw_interface<rnumber>::execute(*(this->vr2c[1])); - this->dealias(this->cv[1], 3); - CLOOP_K2( - this, - [&](ptrdiff_t cindex, ptrdiff_t xindex, ptrdiff_t yindex, ptrdiff_t zindex, double k2){ - if (k2 <= this->kM2) - { - // cindex indexing is thread safe so there is no overlap between threads - ptrdiff_t tindex = 3*cindex; - acceleration[tindex+0][0] += - (this->ky[yindex]*this->cv[1][tindex+0][1] + - this->kz[zindex]*this->cv[1][tindex+2][1]); - acceleration[tindex+0][1] += - - (this->ky[yindex]*this->cv[1][tindex+0][0] + - this->kz[zindex]*this->cv[1][tindex+2][0]); - acceleration[tindex+1][0] += - (this->kz[zindex]*this->cv[1][tindex+1][1] + - this->kx[xindex]*this->cv[1][tindex+0][1]); - acceleration[tindex+1][1] += - - (this->kz[zindex]*this->cv[1][tindex+1][0] + - this->kx[xindex]*this->cv[1][tindex+0][0]); - acceleration[tindex+2][0] += - (this->kx[xindex]*this->cv[1][tindex+2][1] + - this->ky[yindex]*this->cv[1][tindex+1][1]); - acceleration[tindex+2][1] += - - (this->kx[xindex]*this->cv[1][tindex+2][0] + - this->ky[yindex]*this->cv[1][tindex+1][0]); - } - } - ); - if (this->cd->myrank == this->cd->rank[0]) - std::fill_n((rnumber*)(acceleration), 6, 0.0); - this->force_divfree(acceleration); -} - -template <class rnumber> -void fluid_solver<rnumber>::compute_Lagrangian_acceleration(rnumber *__restrict__ acceleration) -{ - TIMEZONE("fluid_solver::compute_Lagrangian_acceleration"); - this->compute_Lagrangian_acceleration((typename fftw_interface<rnumber>::complex*)acceleration); - fftw_interface<rnumber>::execute(*(this->vc2r[1])); - std::copy( - this->rv[1], - this->rv[1] + 2*this->cd->local_size, - acceleration); -} - -template <class rnumber> -int fluid_solver<rnumber>::write_rpressure() -{ - TIMEZONE("fluid_solver::write_rpressure"); - char fname[512]; - typename fftw_interface<rnumber>::complex *pressure; - pressure = fftw_interface<rnumber>::alloc_complex(this->cd->local_size/3); - this->compute_velocity(this->cvorticity); - this->ift_velocity(); - this->compute_pressure(pressure); - this->fill_up_filename("rpressure", fname); - rnumber *rpressure = fftw_interface<rnumber>::alloc_real((this->cd->local_size/3)*2); - typename fftw_interface<rnumber>::plan c2r; - c2r = fftw_interface<rnumber>::mpi_plan_dft_c2r_3d( - this->rd->sizes[0], this->rd->sizes[1], this->rd->sizes[2], - pressure, rpressure, this->cd->comm, - this->fftw_plan_rigor | FFTW_MPI_TRANSPOSED_IN); - fftw_interface<rnumber>::execute(c2r); - /* output goes here */ - int ntmp[3]; - ntmp[0] = this->rd->sizes[0]; - ntmp[1] = this->rd->sizes[1]; - ntmp[2] = this->rd->sizes[2]; - field_descriptor<rnumber> *scalar_descriptor = new field_descriptor<rnumber>(3, ntmp, mpi_real_type<rnumber>::real(), this->cd->comm); - clip_zero_padding<rnumber>(scalar_descriptor, rpressure, 1); - int return_value = scalar_descriptor->write(fname, rpressure); - delete scalar_descriptor; - fftw_interface<rnumber>::destroy_plan(c2r); - fftw_interface<rnumber>::free(pressure); - fftw_interface<rnumber>::free(rpressure); - return return_value; -} - -/*****************************************************************************/ - - - - -/*****************************************************************************/ -/* finally, force generation of code for single precision */ -template class fluid_solver<float>; -template class fluid_solver<double>; -/*****************************************************************************/ - diff --git a/bfps/cpp/fluid_solver.hpp b/bfps/cpp/fluid_solver.hpp deleted file mode 100644 index aaddbb59b4a29530779e3dba81f90a06c790bdcb..0000000000000000000000000000000000000000 --- a/bfps/cpp/fluid_solver.hpp +++ /dev/null @@ -1,120 +0,0 @@ -/********************************************************************** -* * -* Copyright 2015 Max Planck Institute * -* for Dynamics and Self-Organization * -* * -* This file is part of bfps. * -* * -* bfps is free software: you can redistribute it and/or modify * -* it under the terms of the GNU General Public License as published * -* by the Free Software Foundation, either version 3 of the License, * -* or (at your option) any later version. * -* * -* bfps is distributed in the hope that it will be useful, * -* but WITHOUT ANY WARRANTY; without even the implied warranty of * -* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * -* GNU General Public License for more details. * -* * -* You should have received a copy of the GNU General Public License * -* along with bfps. If not, see <http://www.gnu.org/licenses/> * -* * -* Contact: Cristian.Lalescu@ds.mpg.de * -* * -**********************************************************************/ - -#include <stdio.h> -#include <stdlib.h> -#include <iostream> -#include "field_descriptor.hpp" -#include "fluid_solver_base.hpp" - -#ifndef FLUID_SOLVER - -#define FLUID_SOLVER - -extern int myrank, nprocs; - - -/* container for field descriptor, fields themselves, parameters, etc - * using the same big macro idea that they're using in fftw3.h - * I feel like I should quote: Ugh. - * */ - -template <class rnumber> -class fluid_solver:public fluid_solver_base<rnumber> -{ - public: - /* fields */ - rnumber *rvorticity; - rnumber *rvelocity ; - typename fluid_solver_base<rnumber>::cnumber *cvorticity; - typename fluid_solver_base<rnumber>::cnumber *cvelocity ; - - /* short names for velocity, and 4 vorticity fields */ - rnumber *ru, *rv[4]; - typename fluid_solver_base<rnumber>::cnumber *cu, *cv[4]; - - /* plans */ - typename fftw_interface<rnumber>::many_plan *c2r_vorticity; - typename fftw_interface<rnumber>::many_plan *r2c_vorticity; - typename fftw_interface<rnumber>::many_plan *c2r_velocity; - typename fftw_interface<rnumber>::many_plan *r2c_velocity; - typename fftw_interface<rnumber>::many_plan *uc2r, *ur2c; - typename fftw_interface<rnumber>::many_plan *vr2c[3], *vc2r[3]; - - /* physical parameters */ - double nu; - int fmode; // for Kolmogorov flow - double famplitude; // both for Kflow and band forcing - double fk0, fk1; // for band forcing - char forcing_type[128]; - - /* methods */ - fluid_solver( - const char *NAME, - int nx, - int ny, - int nz, - double DKX = 1.0, - double DKY = 1.0, - double DKZ = 1.0, - int DEALIAS_TYPE = 1, - unsigned FFTW_PLAN_RIGOR = FFTW_MEASURE); - ~fluid_solver(void); - - void compute_gradient_statistics( - rnumber (*__restrict__ vec)[2], - double *__restrict__ gradu_moments, - double *__restrict__ trS2_Q_R_moments, - ptrdiff_t *__restrict__ gradu_histograms, - ptrdiff_t *__restrict__ trS2_Q_R_histograms, - ptrdiff_t *__restrict__ QR2D_histogram, - double trS2_Q_R_max_estimates[3], - double gradu_max_estimates[9], - const int nbins_1D = 256, - const int nbins_2D = 64); - - void compute_vorticity(void); - void compute_velocity(rnumber (*__restrict__ vorticity)[2]); - void compute_pressure(rnumber (*__restrict__ pressure)[2]); - void compute_Eulerian_acceleration(rnumber (*__restrict__ dst)[2]); - void compute_Lagrangian_acceleration(rnumber (*__restrict__ dst)[2]); - void compute_Lagrangian_acceleration(rnumber *__restrict__ dst); - void ift_velocity(); - void dft_velocity(); - void ift_vorticity(); - void dft_vorticity(); - void omega_nonlin(int src); - void step(double dt); - void impose_zero_modes(void); - void add_forcing(rnumber (*__restrict__ acc_field)[2], rnumber (*__restrict__ vort_field)[2], rnumber factor); - - int read(char field, char representation); - int write(char field, char representation); - int write_rTrS2(); - int write_renstrophy(); - int write_rpressure(); -}; - -#endif//FLUID_SOLVER - diff --git a/bfps/cpp/fluid_solver_base.cpp b/bfps/cpp/fluid_solver_base.cpp deleted file mode 100644 index b1d64ef5ce8294efa53cac23b391700b6b8574d3..0000000000000000000000000000000000000000 --- a/bfps/cpp/fluid_solver_base.cpp +++ /dev/null @@ -1,834 +0,0 @@ -/********************************************************************** -* * -* Copyright 2015 Max Planck Institute * -* for Dynamics and Self-Organization * -* * -* This file is part of bfps. * -* * -* bfps is free software: you can redistribute it and/or modify * -* it under the terms of the GNU General Public License as published * -* by the Free Software Foundation, either version 3 of the License, * -* or (at your option) any later version. * -* * -* bfps is distributed in the hope that it will be useful, * -* but WITHOUT ANY WARRANTY; without even the implied warranty of * -* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * -* GNU General Public License for more details. * -* * -* You should have received a copy of the GNU General Public License * -* along with bfps. If not, see <http://www.gnu.org/licenses/> * -* * -* Contact: Cristian.Lalescu@ds.mpg.de * -* * -**********************************************************************/ - - - -#define NDEBUG - -#include <cassert> -#include <cmath> -#include <cstring> -#include "base.hpp" -#include "fluid_solver_base.hpp" -#include "fftw_tools.hpp" -#include "scope_timer.hpp" -#include "shared_array.hpp" - -template <class rnumber> -void fluid_solver_base<rnumber>::fill_up_filename(const char *base_name, char *destination) -{ - sprintf(destination, "%s_%s_i%.5x", this->name, base_name, this->iteration); -} - -template <class rnumber> -void fluid_solver_base<rnumber>::clean_up_real_space(rnumber *a, int howmany) -{ - TIMEZONE("fluid_solver_base::clean_up_real_space"); - for (ptrdiff_t rindex = 0; rindex < this->cd->local_size*2; rindex += howmany*(this->rd->subsizes[2]+2)) - std::fill_n(a+rindex+this->rd->subsizes[2]*howmany, 2*howmany, 0.0); -} - -template <class rnumber> -double fluid_solver_base<rnumber>::autocorrel(cnumber *a) -{ - double *spec = fftw_interface<double>::alloc_real(this->nshells*9); - double sum_local; - this->cospectrum(a, a, spec); - sum_local = 0.0; - for (unsigned int n = 0; n < this->nshells; n++) - { - sum_local += spec[n*9] + spec[n*9 + 4] + spec[n*9 + 8]; - } - fftw_interface<double>::free(spec); - return sum_local; -} - -template <class rnumber> -void fluid_solver_base<rnumber>::cospectrum(cnumber *a, cnumber *b, double *spec) -{ - TIMEZONE("fluid_solver_base::cospectrum"); - shared_array<double> cospec_local_thread(this->nshells*9,[&](double* cospec_local){ - std::fill_n(cospec_local, this->nshells*9, 0); - }); - - CLOOP_K2_NXMODES( - this, - - [&](ptrdiff_t cindex, ptrdiff_t /*xindex*/, ptrdiff_t /*yindex*/, - ptrdiff_t /*zindex*/, double k2, int nxmodes){ - if (k2 <= this->kMspec2) - { - int tmp_int = int(sqrt(k2)/this->dk)*9; - double* cospec_local = cospec_local_thread.getMine(); - for (int i=0; i<3; i++) - for (int j=0; j<3; j++) - { - cospec_local[tmp_int+i*3+j] += nxmodes * ( - (*(a + 3*cindex+i))[0] * (*(b + 3*cindex+j))[0] + - (*(a + 3*cindex+i))[1] * (*(b + 3*cindex+j))[1]); - } - }} - ); - cospec_local_thread.mergeParallel(); - MPI_Allreduce( - cospec_local_thread.getMasterData(), - (void*)spec, - this->nshells*9, - MPI_DOUBLE, MPI_SUM, this->cd->comm); -} - -template <class rnumber> -void fluid_solver_base<rnumber>::cospectrum(cnumber *a, cnumber *b, double *spec, const double k2exponent) -{ - TIMEZONE("fluid_solver_base::cospectrum2"); - shared_array<double> cospec_local_thread(this->nshells*9,[&](double* cospec_local){ - std::fill_n(cospec_local, this->nshells*9, 0); - }); - - CLOOP_K2_NXMODES( - this, - - [&](ptrdiff_t cindex, ptrdiff_t /*xindex*/, ptrdiff_t /*yindex*/, - ptrdiff_t /*zindex*/, double k2, int nxmodes){ - if (k2 <= this->kMspec2) - { - double factor = nxmodes*pow(k2, k2exponent); - int tmp_int = int(sqrt(k2)/this->dk)*9; - double* cospec_local = cospec_local_thread.getMine(); - for (int i=0; i<3; i++) - for (int j=0; j<3; j++) - { - cospec_local[tmp_int+i*3+j] += factor * ( - (*(a + 3*cindex+i))[0] * (*(b + 3*cindex+j))[0] + - (*(a + 3*cindex+i))[1] * (*(b + 3*cindex+j))[1]); - } - }} - ); - cospec_local_thread.mergeParallel(); - MPI_Allreduce( - cospec_local_thread.getMasterData(), - (void*)spec, - this->nshells*9, - MPI_DOUBLE, MPI_SUM, this->cd->comm); - //for (int n=0; n<this->nshells; n++) - //{ - // spec[n] *= 12.5663706144*pow(this->kshell[n], 2) / this->nshell[n]; - // /*is normalization needed? - // * spec[n] /= this->normalization_factor*/ - //} -} - -template <class rnumber> -void fluid_solver_base<rnumber>::compute_rspace_stats( - const rnumber *a, - const hid_t group, - const std::string dset_name, - const hsize_t toffset, - const std::vector<double> max_estimate) -{ - TIMEZONE("fluid_solver_base::compute_rspace_stats"); - const int nmoments = 10; - int nvals, nbins; - if (this->rd->myrank == 0) - { - hid_t dset, wspace; - hsize_t dims[3]; - int ndims; - dset = H5Dopen(group, ("moments/" + dset_name).c_str(), H5P_DEFAULT); - wspace = H5Dget_space(dset); - ndims = H5Sget_simple_extent_dims(wspace, dims, NULL); - assert(ndims == 3); - variable_used_only_in_assert(ndims); - assert(dims[1] == nmoments); - nvals = dims[2]; - H5Sclose(wspace); - H5Dclose(dset); - dset = H5Dopen(group, ("histograms/" + dset_name).c_str(), H5P_DEFAULT); - wspace = H5Dget_space(dset); - ndims = H5Sget_simple_extent_dims(wspace, dims, NULL); - assert(ndims == 3); - nbins = dims[1]; - assert(nvals == dims[2]); - H5Sclose(wspace); - H5Dclose(dset); - } - MPI_Bcast(&nvals, 1, MPI_INT, 0, this->rd->comm); - MPI_Bcast(&nbins, 1, MPI_INT, 0, this->rd->comm); - assert(nvals == max_estimate.size()); - shared_array<double> threaded_local_moments(nmoments*nvals, [&](double* local_moments){ - std::fill_n(local_moments, nmoments*nvals, 0); - if (nvals == 4) local_moments[3] = max_estimate[3]; - }); - - shared_array<double> threaded_val_tmp(nvals); - - shared_array<ptrdiff_t> threaded_local_hist(nbins*nvals, [&](ptrdiff_t* local_hist){ - std::fill_n(local_hist, nbins*nvals, 0); - }); - - // Not written by threads - double *binsize = new double[nvals]; - for (int i=0; i<nvals; i++) - binsize[i] = 2*max_estimate[i] / nbins; - - RLOOP( - this, - [&](ptrdiff_t rindex, ptrdiff_t /*xindex*/, ptrdiff_t /*yindex*/, ptrdiff_t /*zindex*/){ - double *val_tmp = threaded_val_tmp.getMine(); - ptrdiff_t* local_hist = threaded_local_hist.getMine(); - double *local_moments = threaded_local_moments.getMine(); - - if (nvals == 4) val_tmp[3] = 0.0; - for (int i=0; i<3; i++) - { - val_tmp[i] = a[rindex*3+i]; - if (nvals == 4) val_tmp[3] += val_tmp[i]*val_tmp[i]; - } - if (nvals == 4) - { - val_tmp[3] = sqrt(val_tmp[3]); - if (val_tmp[3] < local_moments[0*nvals+3]) - local_moments[0*nvals+3] = val_tmp[3]; - if (val_tmp[3] > local_moments[9*nvals+3]) - local_moments[9*nvals+3] = val_tmp[3]; - int bin = int(floor(val_tmp[3]*2/binsize[3])); - if (bin >= 0 && bin < nbins) - local_hist[bin*nvals+3]++; - } - for (int i=0; i<3; i++) - { - if (val_tmp[i] < local_moments[0*nvals+i]) - local_moments[0*nvals+i] = val_tmp[i]; - if (val_tmp[i] > local_moments[(nmoments-1)*nvals+i]) - local_moments[(nmoments-1)*nvals+i] = val_tmp[i]; - int bin = int(floor((val_tmp[i] + max_estimate[i]) / binsize[i])); - if (bin >= 0 && bin < nbins) - local_hist[bin*nvals+i]++; - } - for (int n=1; n < nmoments-1; n++){ - double pow_tmp = 1.; - for (int i=0; i<nvals; i++){ - local_moments[n*nvals + i] += (pow_tmp = val_tmp[i]*pow_tmp); - } - } - } - ); - - threaded_local_hist.mergeParallel(); - threaded_local_moments.mergeParallel([&](const int idx, const double& v1, const double& v2) -> double { - if(nvals == int(4) && idx == 0*nvals+3){ - return std::min(v1, v2); - } - if(nvals == int(4) && idx == 9*nvals+3){ - return std::max(v1, v2); - } - if(idx < 3){ - return std::min(v1, v2); - } - if((nmoments-1)*nvals <= idx && idx < (nmoments-1)*nvals+3){ - return std::max(v1, v2); - } - return v1 + v2; - }); - - - double *moments = new double[nmoments*nvals]; - MPI_Allreduce( - threaded_local_moments.getMasterData(), - (void*)moments, - nvals, - MPI_DOUBLE, MPI_MIN, this->cd->comm); - MPI_Allreduce( - (threaded_local_moments.getMasterData() + nvals), - (void*)(moments+nvals), - (nmoments-2)*nvals, - MPI_DOUBLE, MPI_SUM, this->cd->comm); - MPI_Allreduce( - (threaded_local_moments.getMasterData() + (nmoments-1)*nvals), - (void*)(moments+(nmoments-1)*nvals), - nvals, - MPI_DOUBLE, MPI_MAX, this->cd->comm); - ptrdiff_t *hist = new ptrdiff_t[nbins*nvals]; - MPI_Allreduce( - threaded_local_hist.getMasterData(), - (void*)hist, - nbins*nvals, - MPI_INT64_T, MPI_SUM, this->cd->comm); - for (int n=1; n < nmoments-1; n++) - for (int i=0; i<nvals; i++) - moments[n*nvals + i] /= this->normalization_factor; - delete[] binsize; - if (this->rd->myrank == 0) - { - hid_t dset, wspace, mspace; - hsize_t count[3], offset[3], dims[3]; - dset = H5Dopen(group, ("moments/" + dset_name).c_str(), H5P_DEFAULT); - wspace = H5Dget_space(dset); - H5Sget_simple_extent_dims(wspace, dims, NULL); - offset[0] = toffset; - offset[1] = 0; - offset[2] = 0; - count[0] = 1; - count[1] = nmoments; - count[2] = nvals; - mspace = H5Screate_simple(3, count, NULL); - H5Sselect_hyperslab(wspace, H5S_SELECT_SET, offset, NULL, count, NULL); - H5Dwrite(dset, H5T_NATIVE_DOUBLE, mspace, wspace, H5P_DEFAULT, moments); - H5Sclose(wspace); - H5Sclose(mspace); - H5Dclose(dset); - dset = H5Dopen(group, ("histograms/" + dset_name).c_str(), H5P_DEFAULT); - wspace = H5Dget_space(dset); - count[1] = nbins; - mspace = H5Screate_simple(3, count, NULL); - H5Sselect_hyperslab(wspace, H5S_SELECT_SET, offset, NULL, count, NULL); - H5Dwrite(dset, H5T_NATIVE_INT64, mspace, wspace, H5P_DEFAULT, hist); - H5Sclose(wspace); - H5Sclose(mspace); - H5Dclose(dset); - } - delete[] moments; - delete[] hist; -} - - - -template <class rnumber> -template<int nvals> -void fluid_solver_base<rnumber>::compute_rspace_stats( - rnumber *a, - double *moments, - ptrdiff_t *hist, - double max_estimate[], - const int nbins) -{ - TIMEZONE("fluid_solver_base::compute_rspace_stats"); - shared_array<double> threaded_local_moments(10*nvals,[&](double* local_moments){ - std::fill_n(local_moments, 10*nvals, 0); - if (nvals == 4) local_moments[3] = max_estimate[3]; - }); - - shared_array<ptrdiff_t> threaded_local_hist(nbins*nvals, [&](ptrdiff_t* local_hist){ - std::fill_n(local_hist, nbins*nvals, 0); - }); - - // Will not be modified by the threads - double binsize[nvals]; - for (int i=0; i<nvals; i++) - binsize[i] = 2*max_estimate[i] / nbins; - - RLOOP( - this, - [&](ptrdiff_t rindex, ptrdiff_t /*xindex*/, ptrdiff_t /*yindex*/, ptrdiff_t /*zindex*/){ - ptrdiff_t *local_hist = threaded_local_hist.getMine(); - double *local_moments = threaded_local_moments.getMine(); - - double val_tmp[nvals]; - if (nvals == 4) val_tmp[3] = 0.0; - for (int i=0; i<3; i++) - { - val_tmp[i] = a[rindex*3+i]; - if (nvals == 4) val_tmp[3] += val_tmp[i]*val_tmp[i]; - } - if (nvals == 4) - { - val_tmp[3] = sqrt(val_tmp[3]); - if (val_tmp[3] < local_moments[0*nvals+3]) - local_moments[0*nvals+3] = val_tmp[3]; - if (val_tmp[3] > local_moments[9*nvals+3]) - local_moments[9*nvals+3] = val_tmp[3]; - int bin = int(floor(val_tmp[3]*2/binsize[3])); - if (bin >= 0 && bin < nbins) - local_hist[bin*nvals+3]++; - } - for (int i=0; i<3; i++) - { - if (val_tmp[i] < local_moments[0*nvals+i]) - local_moments[0*nvals+i] = val_tmp[i]; - if (val_tmp[i] > local_moments[9*nvals+i]) - local_moments[9*nvals+i] = val_tmp[i]; - int bin = int(floor((val_tmp[i] + max_estimate[i]) / binsize[i])); - if (bin >= 0 && bin < nbins) - local_hist[bin*nvals+i]++; - } - for (int n=1; n<9; n++){ - double pow_tmp = 1; - for (int i=0; i<nvals; i++){ - local_moments[n*nvals + i] += (pow_tmp = val_tmp[i]*pow_tmp); - } - } - } - ); - - threaded_local_moments.mergeParallel([&](const int idx, const double& v1, const double& v2) -> double { - if(nvals == int(4) && idx == 0*nvals+3){ - return std::min(v1, v2); - } - if(nvals == int(4) && idx == 9*nvals+3){ - return std::max(v1, v2); - } - if(idx < 3){ - return std::min(v1, v2); - } - if(9*nvals <= idx && idx < 9*nvals+3){ - return std::max(v1, v2); - } - return v1 + v2; - }); - threaded_local_hist.mergeParallel(); - - MPI_Allreduce( - threaded_local_moments.getMasterData(), - (void*)moments, - nvals, - MPI_DOUBLE, MPI_MIN, this->cd->comm); - MPI_Allreduce( - (threaded_local_moments.getMasterData() + nvals), - (void*)(moments+nvals), - 8*nvals, - MPI_DOUBLE, MPI_SUM, this->cd->comm); - MPI_Allreduce( - (threaded_local_moments.getMasterData() + 9*nvals), - (void*)(moments+9*nvals), - nvals, - MPI_DOUBLE, MPI_MAX, this->cd->comm); - MPI_Allreduce( - (void*)threaded_local_hist.getMasterData(), - (void*)hist, - nbins*nvals, - MPI_INT64_T, MPI_SUM, this->cd->comm); - for (int n=1; n<9; n++) - for (int i=0; i<nvals; i++) - moments[n*nvals + i] /= this->normalization_factor; -} - -template <class rnumber> -void fluid_solver_base<rnumber>::write_spectrum(const char *fname, cnumber *a, const double k2exponent) -{ - TIMEZONE("fluid_solver_base::write_spectrum"); - double *spec = fftw_interface<double>::alloc_real(this->nshells); - this->cospectrum(a, a, spec, k2exponent); - if (this->cd->myrank == 0) - { - FILE *spec_file; - char full_name[512]; - sprintf(full_name, "%s_%s_spec", this->name, fname); - spec_file = fopen(full_name, "ab"); - fwrite((void*)&this->iteration, sizeof(int), 1, spec_file); - fwrite((void*)spec, sizeof(double), this->nshells, spec_file); - fclose(spec_file); - } - fftw_interface<double>::free(spec); -} - -/*****************************************************************************/ -/* macro for specializations to numeric types compatible with FFTW */ - -template <class rnumber> -fluid_solver_base<rnumber>::fluid_solver_base( - const char *NAME, - int nx, - int ny, - int nz, - double DKX, - double DKY, - double DKZ, - int DEALIAS_TYPE, - unsigned FFTW_PLAN_RIGOR) -{ - TIMEZONE("fluid_solver_base::fluid_solver_base"); - strncpy(this->name, NAME, 256); - this->name[255] = '\0'; - this->iteration = 0; - this->fftw_plan_rigor = FFTW_PLAN_RIGOR; - - int ntmp[4]; - ntmp[0] = nz; - ntmp[1] = ny; - ntmp[2] = nx; - ntmp[3] = 3; - this->rd = new field_descriptor<rnumber>( - 4, ntmp, mpi_real_type<rnumber>::real(), MPI_COMM_WORLD); - this->normalization_factor = (this->rd->full_size/3); - ntmp[0] = ny; - ntmp[1] = nz; - ntmp[2] = nx/2 + 1; - ntmp[3] = 3; - this->cd = new field_descriptor<rnumber>( - 4, ntmp, mpi_real_type<rnumber>::complex(), this->rd->comm); - - this->dkx = DKX; - this->dky = DKY; - this->dkz = DKZ; - this->kx = new double[this->cd->sizes[2]]; - this->ky = new double[this->cd->subsizes[0]]; - this->kz = new double[this->cd->sizes[1]]; - this->dealias_type = DEALIAS_TYPE; - switch(this->dealias_type) - { - /* HL07 smooth filter */ - case 1: - this->kMx = this->dkx*(int(this->rd->sizes[2] / 2)-1); - this->kMy = this->dky*(int(this->rd->sizes[1] / 2)-1); - this->kMz = this->dkz*(int(this->rd->sizes[0] / 2)-1); - break; - default: - this->kMx = this->dkx*(int(this->rd->sizes[2] / 3)-1); - this->kMy = this->dky*(int(this->rd->sizes[1] / 3)-1); - this->kMz = this->dkz*(int(this->rd->sizes[0] / 3)-1); - } - int i, ii; - for (i = 0; i<this->cd->sizes[2]; i++) - this->kx[i] = i*this->dkx; - for (i = 0; i<this->cd->subsizes[0]; i++) - { - ii = i + this->cd->starts[0]; - if (ii <= this->rd->sizes[1]/2) - this->ky[i] = this->dky*ii; - else - this->ky[i] = this->dky*(ii - this->rd->sizes[1]); - } - for (i = 0; i<this->cd->sizes[1]; i++) - { - if (i <= this->rd->sizes[0]/2) - this->kz[i] = this->dkz*i; - else - this->kz[i] = this->dkz*(i - this->rd->sizes[0]); - } - this->kM = this->kMx; - if (this->kM < this->kMy) this->kM = this->kMy; - if (this->kM < this->kMz) this->kM = this->kMz; - this->kM2 = this->kM * this->kM; - this->kMspec = this->kM; - this->kMspec2 = this->kM2; - this->dk = this->dkx; - if (this->dk > this->dky) this->dk = this->dky; - if (this->dk > this->dkz) this->dk = this->dkz; - this->dk2 = this->dk*this->dk; - DEBUG_MSG( - "kM = %g, kM2 = %g, dk = %g, dk2 = %g\n", - this->kM, this->kM2, this->dk, this->dk2); - /* spectra stuff */ - this->nshells = int(this->kMspec / this->dk) + 2; - DEBUG_MSG( - "kMspec = %g, kMspec2 = %g, nshells = %ld\n", - this->kMspec, this->kMspec2, this->nshells); - this->kshell = new double[this->nshells]; - std::fill_n(this->kshell, this->nshells, 0.0); - this->nshell = new int64_t[this->nshells]; - std::fill_n(this->nshell, this->nshells, 0); - DEBUG_MSG("fluid_solver_base::fluid_solver_base before declaring shared_array\n"); - - shared_array<double> kshell_local_threaded(this->nshells,[&](double* kshell_local){ - std::fill_n(kshell_local, this->nshells, 0.0); - }); - DEBUG_MSG("fluid_solver_base::fluid_solver_base before declaring shared_array\n"); - shared_array<int64_t> nshell_local_threaded(this->nshells,[&](int64_t* nshell_local){ - std::fill_n(nshell_local, this->nshells, 0); - }); - - std::vector<std::unordered_map<int, double>> Fourier_filter_threaded(omp_get_max_threads()); - - DEBUG_MSG("fluid_solver_base::fluid_solver_base before cloop_k2_nxmodes\n"); - CLOOP_K2_NXMODES( - this, - - [&](ptrdiff_t /*cindex*/, ptrdiff_t /*xindex*/, ptrdiff_t /*yindex*/, - ptrdiff_t /*zindex*/, double k2, int nxmodes){ - if (k2 < this->kM2) - { - double knorm = sqrt(k2); - nshell_local_threaded.getMine()[int(knorm/this->dk)] += nxmodes; - kshell_local_threaded.getMine()[int(knorm/this->dk)] += nxmodes*knorm; - } - Fourier_filter_threaded[omp_get_thread_num()][int(round(k2 / this->dk2))] = exp(-36.0 * pow(k2/this->kM2, 18.));} - ); - - // Merge results - nshell_local_threaded.mergeParallel(); - kshell_local_threaded.mergeParallel(); - for(int idxMerge = 0 ; idxMerge < int(Fourier_filter_threaded.size()) ; ++idxMerge){ - for(const auto kv : Fourier_filter_threaded[idxMerge]){ - this->Fourier_filter[kv.first] = kv.second; - } - } - - MPI_Allreduce( - (void*)(nshell_local_threaded.getMasterData()), - (void*)(this->nshell), - this->nshells, - MPI_INT64_T, MPI_SUM, this->cd->comm); - MPI_Allreduce( - (void*)(kshell_local_threaded.getMasterData()), - (void*)(this->kshell), - this->nshells, - MPI_DOUBLE, MPI_SUM, this->cd->comm); - for (unsigned int n=0; n<this->nshells; n++) - { - if (this->nshell[n] != 0) - this->kshell[n] /= this->nshell[n]; - else - this->kshell[n] = -1; - } - DEBUG_MSG("exiting fluid_solver_base::fluid_solver_base\n"); -} - -template <class rnumber> -fluid_solver_base<rnumber>::~fluid_solver_base() -{ - delete[] this->kshell; - delete[] this->nshell; - - delete[] this->kx; - delete[] this->ky; - delete[] this->kz; - - delete this->cd; - delete this->rd; -} - -template <class rnumber> -void fluid_solver_base<rnumber>::low_pass_Fourier(cnumber *a, const int howmany, const double kmax) -{ - TIMEZONE("fluid_solver_base::low_pass_Fourier"); - const double km2 = kmax*kmax; - const int howmany2 = 2*howmany; - /*DEBUG_MSG("entered low_pass_Fourier, kmax=%lg km2=%lg howmany2=%d\n", kmax, km2, howmany2);*/ - CLOOP_K2( - this, - /*DEBUG_MSG("kx=%lg ky=%lg kz=%lg k2=%lg\n", - this->kx[xindex], - this->ky[yindex], - this->kz[zindex], - k2);*/ - - [&](ptrdiff_t cindex, ptrdiff_t xindex, ptrdiff_t yindex, - ptrdiff_t zindex, double k2){ - if (k2 >= km2) - std::fill_n((rnumber*)(a + howmany*cindex), howmany2, 0.0);} - ); -} - -template <class rnumber> -void fluid_solver_base<rnumber>::dealias(cnumber *a, const int howmany) -{ - TIMEZONE("fluid_solver_base::dealias"); - if (this->dealias_type == 0) - { - this->low_pass_Fourier(a, howmany, this->kM); - return; - } - - CLOOP_K2( - this, - [&](ptrdiff_t cindex, ptrdiff_t /*xindex*/, ptrdiff_t /*yindex*/, - ptrdiff_t /*zindex*/, double k2){ - double tval = this->Fourier_filter[int(round(k2/this->dk2))]; - // It is thread safe on the index cindex - for (int tcounter = 0; tcounter < howmany; tcounter++) - for (int i=0; i<2; i++) - a[howmany*cindex+tcounter][i] *= tval; - } - ); -} - -template <class rnumber> -void fluid_solver_base<rnumber>::force_divfree(cnumber *a) -{ - TIMEZONE("fluid_solver_base::force_divfree"); - CLOOP_K2( - this, - - [&](ptrdiff_t cindex, ptrdiff_t xindex, ptrdiff_t yindex, - ptrdiff_t zindex, double k2){ - if (k2 > 0) - { - // It is thread safe on index cindex - cnumber tval; - tval[0] = (this->kx[xindex]*((*(a + cindex*3 ))[0]) + - this->ky[yindex]*((*(a + cindex*3+1))[0]) + - this->kz[zindex]*((*(a + cindex*3+2))[0]) ) / k2; - tval[1] = (this->kx[xindex]*((*(a + cindex*3 ))[1]) + - this->ky[yindex]*((*(a + cindex*3+1))[1]) + - this->kz[zindex]*((*(a + cindex*3+2))[1]) ) / k2; - for (int imag_part=0; imag_part<2; imag_part++) - { - a[cindex*3 ][imag_part] -= tval[imag_part]*this->kx[xindex]; - a[cindex*3+1][imag_part] -= tval[imag_part]*this->ky[yindex]; - a[cindex*3+2][imag_part] -= tval[imag_part]*this->kz[zindex]; - } - }} - ); - if (this->cd->myrank == this->cd->rank[0]) - std::fill_n((rnumber*)(a), 6, 0.0); -} - -template <class rnumber> -void fluid_solver_base<rnumber>::compute_vector_gradient(cnumber *A, cnumber *cvec) -{ - TIMEZONE("fluid_solver_base::compute_vector_gradient"); - std::fill_n((rnumber*)A, 3*2*this->cd->local_size, 0.0); - cnumber *dx_u, *dy_u, *dz_u; - dx_u = A; - dy_u = A + this->cd->local_size; - dz_u = A + 2*this->cd->local_size; - CLOOP_K2( - this, - - [&](ptrdiff_t cindex, ptrdiff_t xindex, ptrdiff_t yindex, - ptrdiff_t zindex, double k2){ - if (k2 <= this->kM2) - { - // It is thread safe on cindex - ptrdiff_t tindex = 3*cindex; - for (int cc=0; cc<3; cc++) - { - dx_u[tindex + cc][0] = -this->kx[xindex]*cvec[tindex+cc][1]; - dx_u[tindex + cc][1] = this->kx[xindex]*cvec[tindex+cc][0]; - dy_u[tindex + cc][0] = -this->ky[yindex]*cvec[tindex+cc][1]; - dy_u[tindex + cc][1] = this->ky[yindex]*cvec[tindex+cc][0]; - dz_u[tindex + cc][0] = -this->kz[zindex]*cvec[tindex+cc][1]; - dz_u[tindex + cc][1] = this->kz[zindex]*cvec[tindex+cc][0]; - } - }} - ); -} - -template <class rnumber> -void fluid_solver_base<rnumber>::symmetrize(cnumber *data, const int howmany) -{ - TIMEZONE("fluid_solver_base::symmetrize"); - ptrdiff_t ii, cc; - MPI_Status *mpistatus = new MPI_Status; - if (this->cd->myrank == this->cd->rank[0]) - { - for (cc = 0; cc < howmany; cc++) - data[cc][1] = 0.0; - for (ii = 1; ii < this->cd->sizes[1]/2; ii++) - for (cc = 0; cc < howmany; cc++) { - ( *(data + cc + howmany*(this->cd->sizes[1] - ii)*this->cd->sizes[2]))[0] = - (*(data + cc + howmany*( ii)*this->cd->sizes[2]))[0]; - ( *(data + cc + howmany*(this->cd->sizes[1] - ii)*this->cd->sizes[2]))[1] = - -(*(data + cc + howmany*( ii)*this->cd->sizes[2]))[1]; - } - } - cnumber *buffer; - buffer = fftw_interface<rnumber>::alloc_complex(howmany*this->cd->sizes[1]); - ptrdiff_t yy; - /*ptrdiff_t tindex;*/ - int ranksrc, rankdst; - for (yy = 1; yy < this->cd->sizes[0]/2; yy++) { - ranksrc = this->cd->rank[yy]; - rankdst = this->cd->rank[this->cd->sizes[0] - yy]; - if (this->cd->myrank == ranksrc) - for (ii = 0; ii < this->cd->sizes[1]; ii++) - for (cc = 0; cc < howmany; cc++) - for (int imag_comp=0; imag_comp<2; imag_comp++) - (*(buffer + howmany*ii+cc))[imag_comp] = - (*(data + howmany*((yy - this->cd->starts[0])*this->cd->sizes[1] + ii)*this->cd->sizes[2] + cc))[imag_comp]; - if (ranksrc != rankdst) - { - if (this->cd->myrank == ranksrc) - MPI_Send((void*)buffer, - howmany*this->cd->sizes[1], mpi_real_type<rnumber>::complex(), rankdst, yy, - this->cd->comm); - if (this->cd->myrank == rankdst) - MPI_Recv((void*)buffer, - howmany*this->cd->sizes[1], mpi_real_type<rnumber>::complex(), ranksrc, yy, - this->cd->comm, mpistatus); - } - if (this->cd->myrank == rankdst) - { - for (ii = 1; ii < this->cd->sizes[1]; ii++) - for (cc = 0; cc < howmany; cc++) - { - (*(data + howmany*((this->cd->sizes[0] - yy - this->cd->starts[0])*this->cd->sizes[1] + ii)*this->cd->sizes[2] + cc))[0] = - (*(buffer + howmany*(this->cd->sizes[1]-ii)+cc))[0]; - (*(data + howmany*((this->cd->sizes[0] - yy - this->cd->starts[0])*this->cd->sizes[1] + ii)*this->cd->sizes[2] + cc))[1] = - -(*(buffer + howmany*(this->cd->sizes[1]-ii)+cc))[1]; - } - for (cc = 0; cc < howmany; cc++) - { - (*((data + cc + howmany*(this->cd->sizes[0] - yy - this->cd->starts[0])*this->cd->sizes[1]*this->cd->sizes[2])))[0] = (*(buffer + cc))[0]; - (*((data + cc + howmany*(this->cd->sizes[0] - yy - this->cd->starts[0])*this->cd->sizes[1]*this->cd->sizes[2])))[1] = -(*(buffer + cc))[1]; - } - } - } - fftw_interface<rnumber>::free(buffer); - delete mpistatus; - /* put asymmetric data to 0 */ - /*if (this->cd->myrank == this->cd->rank[this->cd->sizes[0]/2]) - { - tindex = howmany*(this->cd->sizes[0]/2 - this->cd->starts[0])*this->cd->sizes[1]*this->cd->sizes[2]; - for (ii = 0; ii < this->cd->sizes[1]; ii++) - { - std::fill_n((rnumber*)(data + tindex), howmany*2*this->cd->sizes[2], 0.0); - tindex += howmany*this->cd->sizes[2]; - } - } - tindex = howmany*(); - std::fill_n((rnumber*)(data + tindex), howmany*2, 0.0);*/ -} - -template <class rnumber> -int fluid_solver_base<rnumber>::read_base(const char *fname, rnumber *data) -{ - char full_name[512]; - sprintf(full_name, "%s_%s_i%.5x", this->name, fname, this->iteration); - return this->rd->read(full_name, (void*)data); -} - -template <class rnumber> -int fluid_solver_base<rnumber>::read_base(const char *fname, cnumber *data) -{ - char full_name[512]; - sprintf(full_name, "%s_%s_i%.5x", this->name, fname, this->iteration); - return this->cd->read(full_name, (void*)data); -} - -template <class rnumber> -int fluid_solver_base<rnumber>::write_base(const char *fname, rnumber *data) -{ - char full_name[512]; - sprintf(full_name, "%s_%s_i%.5x", this->name, fname, this->iteration); - return this->rd->write(full_name, (void*)data); -} - -template <class rnumber> -int fluid_solver_base<rnumber>::write_base(const char *fname, cnumber *data) -{ - char full_name[512]; - sprintf(full_name, "%s_%s_i%.5x", this->name, fname, this->iteration); - return this->cd->write(full_name, (void*)data); -} - -/* finally, force generation of code */ -template class fluid_solver_base<float>; -template class fluid_solver_base<double>; - -/*****************************************************************************/ - - - - diff --git a/bfps/cpp/fluid_solver_base.hpp b/bfps/cpp/fluid_solver_base.hpp deleted file mode 100644 index e446956001a08fdbf0d3b11da8552e1cb6c61a45..0000000000000000000000000000000000000000 --- a/bfps/cpp/fluid_solver_base.hpp +++ /dev/null @@ -1,272 +0,0 @@ -/********************************************************************** -* * -* Copyright 2015 Max Planck Institute * -* for Dynamics and Self-Organization * -* * -* This file is part of bfps. * -* * -* bfps is free software: you can redistribute it and/or modify * -* it under the terms of the GNU General Public License as published * -* by the Free Software Foundation, either version 3 of the License, * -* or (at your option) any later version. * -* * -* bfps is distributed in the hope that it will be useful, * -* but WITHOUT ANY WARRANTY; without even the implied warranty of * -* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * -* GNU General Public License for more details. * -* * -* You should have received a copy of the GNU General Public License * -* along with bfps. If not, see <http://www.gnu.org/licenses/> * -* * -* Contact: Cristian.Lalescu@ds.mpg.de * -* * -**********************************************************************/ - -#include <stdio.h> -#include <stdlib.h> -#include <hdf5.h> -#include <iostream> -#include <unordered_map> -#include <vector> -#include "base.hpp" -#include "field_descriptor.hpp" -#include "scope_timer.hpp" -#include "omputils.hpp" - -#ifndef FLUID_SOLVER_BASE - -#define FLUID_SOLVER_BASE - -extern int myrank, nprocs; - - -/* container for field descriptor, fields themselves, parameters, etc - * using the same big macro idea that they're using in fftw3.h - * I feel like I should quote: Ugh. - * */ - -template <class rnumber> -class fluid_solver_base -{ - protected: - typedef rnumber cnumber[2]; - public: - field_descriptor<rnumber> *cd, *rd; - ptrdiff_t normalization_factor; - unsigned fftw_plan_rigor; - - /* simulation parameters */ - char name[256]; - int iteration; - - /* physical parameters */ - double dkx, dky, dkz, dk, dk2; - - /* mode and dealiasing information */ - int dealias_type; - double kMx, kMy, kMz, kM, kM2; - double kMspec, kMspec2; - double *kx, *ky, *kz; - std::unordered_map<int, double> Fourier_filter; - double *kshell; - int64_t *nshell; - unsigned int nshells; - - - /* methods */ - fluid_solver_base( - const char *NAME, - int nx, - int ny, - int nz, - double DKX = 1.0, - double DKY = 1.0, - double DKZ = 1.0, - int DEALIAS_TYPE = 0, - unsigned FFTW_PLAN_RIGOR = DEFAULT_FFTW_FLAG); - ~fluid_solver_base(); - - void low_pass_Fourier(cnumber *__restrict__ a, int howmany, double kmax); - void dealias(cnumber *__restrict__ a, int howmany); - void force_divfree(cnumber *__restrict__ a); - void symmetrize(cnumber *__restrict__ a, int howmany); - void clean_up_real_space(rnumber *__restrict__ a, int howmany); - void cospectrum(cnumber *__restrict__ a, cnumber *__restrict__ b, double *__restrict__ spec); - void cospectrum(cnumber *__restrict__ a, cnumber *__restrict__ b, double *__restrict__ spec, const double k2exponent); - double autocorrel(cnumber *__restrict__ a); - void compute_rspace_stats( - const rnumber *__restrict__ a, - const hid_t group, - const std::string dset_name, - const hsize_t toffset, - const std::vector<double> max_estimate); - template <int nvals> - void compute_rspace_stats(rnumber *__restrict__ a, - double *__restrict__ moments, - ptrdiff_t *__restrict__ hist, - double max_estimate[nvals], - const int nbins = 256); - inline void compute_rspace_stats3(rnumber *__restrict__ a, - double *__restrict__ moments, - ptrdiff_t *__restrict__ hist, - double max_estimate[3], - const int nbins = 256) - { - this->compute_rspace_stats<3>(a, moments, hist, max_estimate, nbins); - } - inline void compute_rspace_stats4(rnumber *__restrict__ a, - double *__restrict__ moments, - ptrdiff_t *__restrict__ hist, - double max_estimate[4], - const int nbins = 256) - { - this->compute_rspace_stats<4>(a, moments, hist, max_estimate, nbins); - } - void compute_vector_gradient(rnumber (*__restrict__ A)[2], rnumber(*__restrict__ source)[2]); - void write_spectrum(const char *fname, cnumber *a, const double k2exponent = 0.0); - void fill_up_filename(const char *base_name, char *full_name); - int read_base(const char *fname, rnumber *data); - int read_base(const char *fname, cnumber *data); - int write_base(const char *fname, rnumber *data); - int write_base(const char *fname, cnumber *data); -}; - - - -/*****************************************************************************/ -/* macros for loops */ - -/* Fourier space loop */ -template <class ObjectType, class FuncType> -void CLOOP(ObjectType* obj, FuncType expression) -{ - TIMEZONE("CLOOP"); - #pragma omp parallel - { - const hsize_t start = OmpUtils::ForIntervalStart(obj->cd->subsizes[0]); - const hsize_t end = OmpUtils::ForIntervalEnd(obj->cd->subsizes[0]); - for (ptrdiff_t yindex = start; yindex < ptrdiff_t(end); yindex++){ - ptrdiff_t cindex = yindex*obj->cd->subsizes[1]*obj->cd->subsizes[2]; - for (ptrdiff_t zindex = 0; zindex < obj->cd->subsizes[1]; zindex++) - for (ptrdiff_t xindex = 0; xindex < obj->cd->subsizes[2]; xindex++) - { - expression(cindex, xindex, yindex, zindex); - cindex++; - } - } - } -} - -template <class ObjectType, class FuncType> -void CLOOP_NXMODES(ObjectType* obj, FuncType expression) -{ - TIMEZONE("CLOOP_NXMODES"); - #pragma omp parallel - { - const hsize_t start = OmpUtils::ForIntervalStart(obj->cd->subsizes[1]); - const hsize_t end = OmpUtils::ForIntervalEnd(obj->cd->subsizes[1]); - for (ptrdiff_t yindex = 0; yindex < obj->cd->subsizes[0]; yindex++){ - for (ptrdiff_t zindex = start; zindex < ptrdiff_t(end); zindex++) - { - ptrdiff_t cindex = yindex*obj->cd->subsizes[1]*obj->cd->subsizes[2] - + zindex*obj->cd->subsizes[2]; - int nxmodes = 1; - ptrdiff_t xindex = 0; - expression(); - cindex++; - nxmodes = 2; - for (xindex = 1; xindex < obj->cd->subsizes[2]; xindex++) - { - expression(); - cindex++; - } - } - } - } -} - - -template <class ObjectType, class FuncType> -void CLOOP_K2(ObjectType* obj, FuncType expression) -{ - TIMEZONE("CLOOP_K2"); - #pragma omp parallel - { - const hsize_t start = OmpUtils::ForIntervalStart(obj->cd->subsizes[1]); - const hsize_t end = OmpUtils::ForIntervalEnd(obj->cd->subsizes[1]); - for (ptrdiff_t yindex = 0; yindex < obj->cd->subsizes[0]; yindex++){ - for (ptrdiff_t zindex = start; zindex < ptrdiff_t(end); zindex++){ - ptrdiff_t cindex = yindex*obj->cd->subsizes[1]*obj->cd->subsizes[2] - + zindex*obj->cd->subsizes[2]; - for (ptrdiff_t xindex = 0; xindex < obj->cd->subsizes[2]; xindex++) - { - double k2 = (obj->kx[xindex]*obj->kx[xindex] + - obj->ky[yindex]*obj->ky[yindex] + - obj->kz[zindex]*obj->kz[zindex]); - expression(cindex, xindex, yindex, zindex, k2); - cindex++; - } - } - } - } -} - - -template <class ObjectType, class FuncType> -void CLOOP_K2_NXMODES(ObjectType* obj, FuncType expression) -{ - #pragma omp parallel - { - const hsize_t start = OmpUtils::ForIntervalStart(obj->cd->subsizes[1]); - const hsize_t end = OmpUtils::ForIntervalEnd(obj->cd->subsizes[1]); - for (ptrdiff_t yindex = 0; yindex < obj->cd->subsizes[0]; yindex++){ - for (ptrdiff_t zindex = start; zindex < ptrdiff_t(end); zindex++) - { - ptrdiff_t cindex = yindex*obj->cd->subsizes[1]*obj->cd->subsizes[2] - + zindex*obj->cd->subsizes[2]; - int nxmodes = 1; - ptrdiff_t xindex = 0; - double k2 = (obj->kx[xindex]*obj->kx[xindex] + - obj->ky[yindex]*obj->ky[yindex] + - obj->kz[zindex]*obj->kz[zindex]); - expression(cindex, xindex, yindex, zindex, k2, nxmodes); - cindex++; - nxmodes = 2; - for (xindex = 1; xindex < obj->cd->subsizes[2]; xindex++) - { - double k2 = (obj->kx[xindex]*obj->kx[xindex] + - obj->ky[yindex]*obj->ky[yindex] + - obj->kz[zindex]*obj->kz[zindex]); - expression(cindex, xindex, yindex, zindex, k2, nxmodes); - cindex++; - } - } - } - } -} - - -template <class ObjectType, class FuncType> -void RLOOP(ObjectType* obj, FuncType expression) -{ - #pragma omp parallel - { - const hsize_t start = OmpUtils::ForIntervalStart(obj->rd->subsizes[1]); - const hsize_t end = OmpUtils::ForIntervalEnd(obj->rd->subsizes[1]); - for (int zindex = 0; zindex < obj->rd->subsizes[0] ; zindex++) - for (int yindex = start; yindex < ptrdiff_t(end); yindex++) - { - ptrdiff_t rindex = (zindex * obj->rd->subsizes[1] + yindex)*(obj->rd->subsizes[2]+2); - for (int xindex = 0; xindex < obj->rd->subsizes[2]; xindex++) - { - expression(rindex, xindex, yindex, zindex); - rindex++; - } - } - } -} - -/*****************************************************************************/ - -#endif//FLUID_SOLVER_BASE - diff --git a/bfps/cpp/full_code/NSVE.cpp b/bfps/cpp/full_code/NSVE.cpp index d9cb72a220aaf6cb124cb37f827373f9a44b03ac..ecec7db31235bc827b17b55f0c733f305e488761 100644 --- a/bfps/cpp/full_code/NSVE.cpp +++ b/bfps/cpp/full_code/NSVE.cpp @@ -1,7 +1,10 @@ +#define NDEBUG + #include <string> #include <cmath> #include "NSVE.hpp" #include "scope_timer.hpp" +#include "fftw_tools.hpp" template <typename rnumber> @@ -37,11 +40,11 @@ int NSVE<rnumber>::initialize(void) simname.c_str(), nx, ny, nz, dkx, dky, dkz, - DEFAULT_FFTW_FLAG); + fftw_planner_string_to_flag[this->fftw_plan_rigor]); this->tmp_vec_field = new field<rnumber, FFTW, THREE>( nx, ny, nz, this->comm, - DEFAULT_FFTW_FLAG); + fftw_planner_string_to_flag[this->fftw_plan_rigor]); this->fs->checkpoints_per_file = checkpoints_per_file; @@ -161,6 +164,7 @@ int NSVE<rnumber>::read_parameters(void) this->max_vorticity_estimate = hdf5_tools::read_value<double>(parameter_file, "parameters/max_vorticity_estimate"); std::string tmp = hdf5_tools::read_string(parameter_file, "parameters/forcing_type"); snprintf(this->forcing_type, 511, "%s", tmp.c_str()); + this->fftw_plan_rigor = hdf5_tools::read_string(parameter_file, "parameters/fftw_plan_rigor"); H5Fclose(parameter_file); return EXIT_SUCCESS; } diff --git a/bfps/cpp/full_code/NSVE.hpp b/bfps/cpp/full_code/NSVE.hpp index 062627fd1cc9513bbd29a14199e05d3a084c0851..83c63d35790d3616cf143da1ac43bec133e91675 100644 --- a/bfps/cpp/full_code/NSVE.hpp +++ b/bfps/cpp/full_code/NSVE.hpp @@ -53,6 +53,7 @@ class NSVE: public direct_numerical_simulation double max_velocity_estimate; double max_vorticity_estimate; double nu; + std::string fftw_plan_rigor; /* other stuff */ vorticity_equation<rnumber, FFTW> *fs; diff --git a/bfps/cpp/full_code/NSVE_field_stats.cpp b/bfps/cpp/full_code/NSVE_field_stats.cpp index 15980a20141a563be08ad0b28a3190b3e9e1c17c..b1c8d567592712f5d9feadd2caac73ca279238dd 100644 --- a/bfps/cpp/full_code/NSVE_field_stats.cpp +++ b/bfps/cpp/full_code/NSVE_field_stats.cpp @@ -1,6 +1,7 @@ #include <string> #include <cmath> #include "NSVE_field_stats.hpp" +#include "fftw_tools.hpp" #include "scope_timer.hpp" @@ -12,7 +13,7 @@ int NSVE_field_stats<rnumber>::initialize(void) this->vorticity = new field<rnumber, FFTW, THREE>( nx, ny, nz, this->comm, - DEFAULT_FFTW_FLAG); + fftw_planner_string_to_flag[this->fftw_plan_rigor]); this->vorticity->real_space_representation = false; hid_t parameter_file = H5Fopen( (this->simname + std::string(".h5")).c_str(), @@ -43,6 +44,7 @@ int NSVE_field_stats<rnumber>::initialize(void) this->vorticity->clayout->starts, this->vorticity->clayout->comm); } + this->fftw_plan_rigor = hdf5_tools::read_string(parameter_file, "parameters/fftw_plan_rigor"); H5Fclose(parameter_file); return EXIT_SUCCESS; } diff --git a/bfps/cpp/full_code/NSVE_field_stats.hpp b/bfps/cpp/full_code/NSVE_field_stats.hpp index d544c0c7d5f4c75559e63ea3e59bf9457d4730c5..28a2376f17ac2ac837cbacac828cd91572bb3a17 100644 --- a/bfps/cpp/full_code/NSVE_field_stats.hpp +++ b/bfps/cpp/full_code/NSVE_field_stats.hpp @@ -42,6 +42,8 @@ class NSVE_field_stats: public postprocess private: field_binary_IO<rnumber, COMPLEX, THREE> *bin_IO; public: + std::string fftw_plan_rigor; + field<rnumber, FFTW, THREE> *vorticity; NSVE_field_stats( diff --git a/bfps/cpp/full_code/NSVEcomplex_particles.cpp b/bfps/cpp/full_code/NSVEcomplex_particles.cpp index 9b910e9bb7a5aaf6b36d884858a095ff9971dffa..02a199317ae6bdf294adae1b8805b89df7f276b9 100644 --- a/bfps/cpp/full_code/NSVEcomplex_particles.cpp +++ b/bfps/cpp/full_code/NSVEcomplex_particles.cpp @@ -24,6 +24,8 @@ +#define NDEBUG + #include <string> #include <cmath> #include "NSVEcomplex_particles.hpp" diff --git a/bfps/cpp/full_code/NSVEparticles.cpp b/bfps/cpp/full_code/NSVEparticles.cpp index b09e32805bbfb61469926be9f9d1b259066f9080..bcb2f435e092ce1288ac28e8e4452bc1a034c8e0 100644 --- a/bfps/cpp/full_code/NSVEparticles.cpp +++ b/bfps/cpp/full_code/NSVEparticles.cpp @@ -1,6 +1,9 @@ + +#define NDEBUG + #include <string> #include <cmath> #include "NSVEparticles.hpp" diff --git a/bfps/cpp/full_code/direct_numerical_simulation.cpp b/bfps/cpp/full_code/direct_numerical_simulation.cpp index c0b0441e5b274cbe088b6fd0903823c6d17b2076..cacda323153f0ed0f628b9fccc38e38fdcdc253c 100644 --- a/bfps/cpp/full_code/direct_numerical_simulation.cpp +++ b/bfps/cpp/full_code/direct_numerical_simulation.cpp @@ -1,3 +1,5 @@ +#define NDEBUG + #include <cstdlib> #include <sys/types.h> #include <sys/stat.h> diff --git a/bfps/cpp/full_code/field_output_test.cpp b/bfps/cpp/full_code/field_output_test.cpp index 30df4e7512bec3c08325fe156b21789f80882f54..724060992ad5bba14adbe871c98067b4e57728ab 100644 --- a/bfps/cpp/full_code/field_output_test.cpp +++ b/bfps/cpp/full_code/field_output_test.cpp @@ -36,7 +36,7 @@ int field_output_test<rnumber>::do_work(void) field<rnumber, FFTW, ONE> *scal_field = new field<rnumber, FFTW, ONE>( this->nx, this->ny, this->nz, this->comm, - DEFAULT_FFTW_FLAG); + FFTW_ESTIMATE); std::default_random_engine rgen; std::normal_distribution<rnumber> rdist; rgen.seed(1); diff --git a/bfps/cpp/full_code/field_test.cpp b/bfps/cpp/full_code/field_test.cpp index 1627bc4088581468ebedab585db7ca9d6519d3a3..a9d531bcaf939b8b46ae539c57c00ae9b121c0a4 100644 --- a/bfps/cpp/full_code/field_test.cpp +++ b/bfps/cpp/full_code/field_test.cpp @@ -44,11 +44,11 @@ int field_test<rnumber>::do_work(void) field<rnumber, FFTW, ONE> *scal_field = new field<rnumber, FFTW, ONE>( this->nx, this->ny, this->nz, this->comm, - DEFAULT_FFTW_FLAG); + FFTW_ESTIMATE); field<rnumber, FFTW, ONE> *scal_field_alt = new field<rnumber, FFTW, ONE>( this->nx, this->ny, this->nz, this->comm, - DEFAULT_FFTW_FLAG); + FFTW_ESTIMATE); std::default_random_engine rgen; std::normal_distribution<rnumber> rdist; rgen.seed(2); diff --git a/bfps/cpp/full_code/filter_test.cpp b/bfps/cpp/full_code/filter_test.cpp index 4db13843fa8f69db77f8a15cbd0563feb087dfcf..6dbd05a940ff88623cd10802376497148bda5549 100644 --- a/bfps/cpp/full_code/filter_test.cpp +++ b/bfps/cpp/full_code/filter_test.cpp @@ -12,7 +12,7 @@ int filter_test<rnumber>::initialize(void) this->scal_field = new field<rnumber, FFTW, ONE>( nx, ny, nz, this->comm, - DEFAULT_FFTW_FLAG); + FFTW_ESTIMATE); this->kk = new kspace<FFTW, SMOOTH>( this->scal_field->clayout, this->dkx, this->dky, this->dkz); diff --git a/bfps/cpp/full_code/joint_acc_vel_stats.cpp b/bfps/cpp/full_code/joint_acc_vel_stats.cpp index 1c28527e5986e12a5d66151a5623194e4ffab3aa..fff2e2f5f4e83c3e89b742a18f2e3feaeb1466d1 100644 --- a/bfps/cpp/full_code/joint_acc_vel_stats.cpp +++ b/bfps/cpp/full_code/joint_acc_vel_stats.cpp @@ -110,7 +110,7 @@ int joint_acc_vel_stats<rnumber>::work_on_current_iteration(void) vel = new field<rnumber, FFTW, THREE>( this->nx, this->ny, this->nz, this->comm, - DEFAULT_FFTW_FLAG); + this->vorticity->fftw_plan_rigor); invert_curl(kk, this->ve->cvorticity, vel); vel->ift(); diff --git a/bfps/cpp/full_code/native_binary_to_hdf5.cpp b/bfps/cpp/full_code/native_binary_to_hdf5.cpp index fb5a39c2af8a88a158df679ad27ce0f08fab37f8..fe8e1c41a937e49db264aaca41c82df2503e4c99 100644 --- a/bfps/cpp/full_code/native_binary_to_hdf5.cpp +++ b/bfps/cpp/full_code/native_binary_to_hdf5.cpp @@ -12,7 +12,7 @@ int native_binary_to_hdf5<rnumber>::initialize(void) this->vec_field = new field<rnumber, FFTW, THREE>( nx, ny, nz, this->comm, - DEFAULT_FFTW_FLAG); + FFTW_ESTIMATE); this->vec_field->real_space_representation = false; this->bin_IO = new field_binary_IO<rnumber, COMPLEX, THREE>( this->vec_field->clayout->sizes, diff --git a/bfps/cpp/full_code/symmetrize_test.cpp b/bfps/cpp/full_code/symmetrize_test.cpp index 821161da846a323721c07ed47a7c66d9efea78f0..7cf96a71efe881876de1bcef2ab4d9f0482aaddf 100644 --- a/bfps/cpp/full_code/symmetrize_test.cpp +++ b/bfps/cpp/full_code/symmetrize_test.cpp @@ -2,6 +2,7 @@ #include <cmath> #include <random> #include "symmetrize_test.hpp" +#include "fftw_tools.hpp" #include "scope_timer.hpp" @@ -31,6 +32,7 @@ int symmetrize_test<rnumber>::read_parameters() H5P_DEFAULT); this->random_seed = hdf5_tools::read_value<int>( parameter_file, "/parameters/random_seed"); + this->fftw_plan_rigor = hdf5_tools::read_string(parameter_file, "parameters/fftw_plan_rigor"); H5Fclose(parameter_file); return EXIT_SUCCESS; } @@ -44,13 +46,13 @@ int symmetrize_test<rnumber>::do_work(void) field<rnumber, FFTW, THREE> *test_field0 = new field<rnumber, FFTW, THREE>( this->nx, this->ny, this->nz, this->comm, - DEFAULT_FFTW_FLAG); + fftw_planner_string_to_flag[this->fftw_plan_rigor]); DEBUG_MSG("finished allocating field0\n"); DEBUG_MSG("about to allocate field1\n"); field<rnumber, FFTW, THREE> *test_field1 = new field<rnumber, FFTW, THREE>( this->nx, this->ny, this->nz, this->comm, - DEFAULT_FFTW_FLAG); + fftw_planner_string_to_flag[this->fftw_plan_rigor]); DEBUG_MSG("finished allocating field1\n"); std::default_random_engine rgen; std::normal_distribution<rnumber> rdist; diff --git a/bfps/cpp/full_code/symmetrize_test.hpp b/bfps/cpp/full_code/symmetrize_test.hpp index d3fbbaeb0728959234ad53859d3940c8ef00ebd9..628aee6f5ba3fac23cfbe551418a6ff1213d7d5c 100644 --- a/bfps/cpp/full_code/symmetrize_test.hpp +++ b/bfps/cpp/full_code/symmetrize_test.hpp @@ -42,6 +42,7 @@ template <typename rnumber> class symmetrize_test: public test { public: + std::string fftw_plan_rigor; int random_seed; symmetrize_test( diff --git a/bfps/cpp/full_code/test_interpolation.cpp b/bfps/cpp/full_code/test_interpolation.cpp index 5ef11de44b6f6a36ab6827facae3c637b702bc58..2acd3c27426a4cdd2af244dfaa6b1779b2871f61 100644 --- a/bfps/cpp/full_code/test_interpolation.cpp +++ b/bfps/cpp/full_code/test_interpolation.cpp @@ -30,18 +30,18 @@ int test_interpolation<rnumber>::initialize(void) this->vorticity = new field<rnumber, FFTW, THREE>( this->nx, this->ny, this->nz, this->comm, - DEFAULT_FFTW_FLAG); + FFTW_ESTIMATE); this->vorticity->real_space_representation = false; this->velocity = new field<rnumber, FFTW, THREE>( this->nx, this->ny, this->nz, this->comm, - DEFAULT_FFTW_FLAG); + FFTW_ESTIMATE); this->nabla_u = new field<rnumber, FFTW, THREExTHREE>( this->nx, this->ny, this->nz, this->comm, - DEFAULT_FFTW_FLAG); + FFTW_ESTIMATE); this->kk = new kspace<FFTW, SMOOTH>( this->vorticity->clayout, this->dkx, this->dky, this->dkz); diff --git a/bfps/cpp/hdf5_tools.cpp b/bfps/cpp/hdf5_tools.cpp index c2ef6aaebf2538de5575627baf6403d39e749d2a..25acaf21b662501948616236ee1d441df2527ad3 100644 --- a/bfps/cpp/hdf5_tools.cpp +++ b/bfps/cpp/hdf5_tools.cpp @@ -208,17 +208,26 @@ std::string hdf5_tools::read_string( const hid_t group, const std::string dset_name) { - hid_t dset = H5Dopen(group, dset_name.c_str(), H5P_DEFAULT); - hid_t space = H5Dget_space(dset); - hid_t memtype = H5Dget_type(dset); - char *string_data = (char*)malloc(256); - H5Dread(dset, memtype, H5S_ALL, H5S_ALL, H5P_DEFAULT, &string_data); - std::string std_string_data = std::string(string_data); - free(string_data); - H5Sclose(space); - H5Tclose(memtype); - H5Dclose(dset); - return std_string_data; + if (H5Lexists(group, dset_name.c_str(), H5P_DEFAULT)) + { + hid_t dset = H5Dopen(group, dset_name.c_str(), H5P_DEFAULT); + hid_t space = H5Dget_space(dset); + hid_t memtype = H5Dget_type(dset); + char *string_data = (char*)malloc(256); + H5Dread(dset, memtype, H5S_ALL, H5S_ALL, H5P_DEFAULT, &string_data); + std::string std_string_data = std::string(string_data); + free(string_data); + H5Sclose(space); + H5Tclose(memtype); + H5Dclose(dset); + return std_string_data; + } + else + { + DEBUG_MSG("attempted to read dataset %s which does not exist.\n", + dset_name.c_str()); + return std::string("parameter does not exist"); + } } template diff --git a/bfps/cpp/interpolator.cpp b/bfps/cpp/interpolator.cpp deleted file mode 100644 index a0b38c4059585cc7fd58ab830b792be4f8bc193d..0000000000000000000000000000000000000000 --- a/bfps/cpp/interpolator.cpp +++ /dev/null @@ -1,214 +0,0 @@ -/********************************************************************** -* * -* Copyright 2015 Max Planck Institute * -* for Dynamics and Self-Organization * -* * -* This file is part of bfps. * -* * -* bfps is free software: you can redistribute it and/or modify * -* it under the terms of the GNU General Public License as published * -* by the Free Software Foundation, either version 3 of the License, * -* or (at your option) any later version. * -* * -* bfps is distributed in the hope that it will be useful, * -* but WITHOUT ANY WARRANTY; without even the implied warranty of * -* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * -* GNU General Public License for more details. * -* * -* You should have received a copy of the GNU General Public License * -* along with bfps. If not, see <http://www.gnu.org/licenses/> * -* * -* Contact: Cristian.Lalescu@ds.mpg.de * -* * -**********************************************************************/ - - - -#define NDEBUG - -#include "interpolator.hpp" - -template <class rnumber, int interp_neighbours> -interpolator<rnumber, interp_neighbours>::interpolator( - fluid_solver_base<rnumber> *fs, - base_polynomial_values BETA_POLYS, - ...) : interpolator_base<rnumber, interp_neighbours>(fs, BETA_POLYS) -{ - int tdims[4]; - this->compute_beta = BETA_POLYS; - tdims[0] = (interp_neighbours+1)*2*this->descriptor->nprocs + this->descriptor->sizes[0]; - tdims[1] = this->descriptor->sizes[1]; - tdims[2] = this->descriptor->sizes[2]+2; - tdims[3] = this->descriptor->sizes[3]; - this->buffered_descriptor = new field_descriptor<rnumber>( - 4, tdims, - this->descriptor->mpi_dtype, - this->descriptor->comm); - this->buffer_size = (interp_neighbours+1)*this->buffered_descriptor->slice_size; - this->field = new rnumber[this->buffered_descriptor->local_size]; -} - -template <class rnumber, int interp_neighbours> -interpolator<rnumber, interp_neighbours>::~interpolator() -{ - delete[] this->field; - delete this->buffered_descriptor; -} - -template <class rnumber, int interp_neighbours> -int interpolator<rnumber, interp_neighbours>::read_rFFTW(const void *void_src) -{ - rnumber *src = (rnumber*)void_src; - rnumber *dst = this->field; - /* do big copy of middle stuff */ - std::copy(src, - src + this->buffered_descriptor->slice_size*this->descriptor->subsizes[0], - dst + this->buffer_size); - MPI_Datatype MPI_RNUM = (sizeof(rnumber) == 4) ? MPI_FLOAT : MPI_DOUBLE; - int rsrc; - /* get upper slices */ - for (int rdst = 0; rdst < this->descriptor->nprocs; rdst++) - { - rsrc = this->descriptor->rank[(this->descriptor->all_start0[rdst] + - this->descriptor->all_size0[rdst]) % - this->descriptor->sizes[0]]; - if (this->descriptor->myrank == rsrc) - MPI_Send( - src, - this->buffer_size, - MPI_RNUM, - rdst, - 2*(rsrc*this->descriptor->nprocs + rdst), - this->buffered_descriptor->comm); - if (this->descriptor->myrank == rdst) - MPI_Recv( - dst + this->buffer_size + this->buffered_descriptor->slice_size*this->descriptor->subsizes[0], - this->buffer_size, - MPI_RNUM, - rsrc, - 2*(rsrc*this->descriptor->nprocs + rdst), - this->buffered_descriptor->comm, - MPI_STATUS_IGNORE); - } - /* get lower slices */ - for (int rdst = 0; rdst < this->descriptor->nprocs; rdst++) - { - rsrc = this->descriptor->rank[MOD(this->descriptor->all_start0[rdst] - 1, - this->descriptor->sizes[0])]; - if (this->descriptor->myrank == rsrc) - MPI_Send( - src + this->buffered_descriptor->slice_size*this->descriptor->subsizes[0] - this->buffer_size, - this->buffer_size, - MPI_RNUM, - rdst, - 2*(rsrc*this->descriptor->nprocs + rdst)+1, - this->descriptor->comm); - if (this->descriptor->myrank == rdst) - MPI_Recv( - dst, - this->buffer_size, - MPI_RNUM, - rsrc, - 2*(rsrc*this->descriptor->nprocs + rdst)+1, - this->descriptor->comm, - MPI_STATUS_IGNORE); - } - return EXIT_SUCCESS; -} - -template <class rnumber, int interp_neighbours> -void interpolator<rnumber, interp_neighbours>::sample( - const int nparticles, - const int pdimension, - const double *__restrict__ x, - double *__restrict__ y, - const int *deriv) -{ - /* get grid coordinates */ - int *xg = new int[3*nparticles]; - double *xx = new double[3*nparticles]; - double *yy = new double[3*nparticles]; - std::fill_n(yy, 3*nparticles, 0.0); - this->get_grid_coordinates(nparticles, pdimension, x, xg, xx); - /* perform interpolation */ - for (int p=0; p<nparticles; p++) - if (this->descriptor->rank[MOD(xg[p*3+2], this->descriptor->sizes[0])] == this->descriptor->myrank) - this->operator()(xg + p*3, xx + p*3, yy + p*3, deriv); - MPI_Allreduce( - yy, - y, - 3*nparticles, - MPI_DOUBLE, - MPI_SUM, - this->descriptor->comm); - delete[] yy; - delete[] xg; - delete[] xx; -} - -template <class rnumber, int interp_neighbours> -void interpolator<rnumber, interp_neighbours>::operator()( - const int *xg, - const double *xx, - double *__restrict__ dest, - const int *deriv) -{ - double bx[interp_neighbours*2+2], by[interp_neighbours*2+2], bz[interp_neighbours*2+2]; - if (deriv == NULL) - { - this->compute_beta(0, xx[0], bx); - this->compute_beta(0, xx[1], by); - this->compute_beta(0, xx[2], bz); - } - else - { - this->compute_beta(deriv[0], xx[0], bx); - this->compute_beta(deriv[1], xx[1], by); - this->compute_beta(deriv[2], xx[2], bz); - } - std::fill_n(dest, 3, 0); - ptrdiff_t bigiz, bigiy, bigix; - for (int iz = -interp_neighbours; iz <= interp_neighbours+1; iz++) - { - bigiz = ptrdiff_t(xg[2]+iz)-this->descriptor->starts[0]; - for (int iy = -interp_neighbours; iy <= interp_neighbours+1; iy++) - { - bigiy = ptrdiff_t(MOD(xg[1]+iy, this->descriptor->sizes[1])); - for (int ix = -interp_neighbours; ix <= interp_neighbours+1; ix++) - { - bigix = ptrdiff_t(MOD(xg[0]+ix, this->descriptor->sizes[2])); - ptrdiff_t tindex = ((bigiz *this->buffered_descriptor->sizes[1] + - bigiy)*this->buffered_descriptor->sizes[2] + - bigix)*3 + this->buffer_size; - for (int c=0; c<3; c++) - { - dest[c] += this->field[tindex+c]*(bz[iz+interp_neighbours]* - by[iy+interp_neighbours]* - bx[ix+interp_neighbours]); - } - } - } - } -} - -template class interpolator<float, 1>; -template class interpolator<float, 2>; -template class interpolator<float, 3>; -template class interpolator<float, 4>; -template class interpolator<float, 5>; -template class interpolator<float, 6>; -template class interpolator<float, 7>; -template class interpolator<float, 8>; -template class interpolator<float, 9>; -template class interpolator<float, 10>; -template class interpolator<double, 1>; -template class interpolator<double, 2>; -template class interpolator<double, 3>; -template class interpolator<double, 4>; -template class interpolator<double, 5>; -template class interpolator<double, 6>; -template class interpolator<double, 7>; -template class interpolator<double, 8>; -template class interpolator<double, 9>; -template class interpolator<double, 10>; - diff --git a/bfps/cpp/interpolator.hpp b/bfps/cpp/interpolator.hpp deleted file mode 100644 index 7e56ebe159fd24ed7cf623f0a869e1d262d4aadb..0000000000000000000000000000000000000000 --- a/bfps/cpp/interpolator.hpp +++ /dev/null @@ -1,79 +0,0 @@ -/********************************************************************** -* * -* Copyright 2015 Max Planck Institute * -* for Dynamics and Self-Organization * -* * -* This file is part of bfps. * -* * -* bfps is free software: you can redistribute it and/or modify * -* it under the terms of the GNU General Public License as published * -* by the Free Software Foundation, either version 3 of the License, * -* or (at your option) any later version. * -* * -* bfps is distributed in the hope that it will be useful, * -* but WITHOUT ANY WARRANTY; without even the implied warranty of * -* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * -* GNU General Public License for more details. * -* * -* You should have received a copy of the GNU General Public License * -* along with bfps. If not, see <http://www.gnu.org/licenses/> * -* * -* Contact: Cristian.Lalescu@ds.mpg.de * -* * -**********************************************************************/ - - - -#include <cmath> -#include "field_descriptor.hpp" -#include "fftw_tools.hpp" -#include "fluid_solver_base.hpp" -#include "interpolator_base.hpp" - -#ifndef INTERPOLATOR - -#define INTERPOLATOR - -template <class rnumber, int interp_neighbours> -class interpolator:public interpolator_base<rnumber, interp_neighbours> -{ - private: - /* pointer to buffered field */ - rnumber *field; - - public: - using interpolator_base<rnumber, interp_neighbours>::operator(); - ptrdiff_t buffer_size; - - /* descriptor for buffered field */ - field_descriptor<rnumber> *buffered_descriptor; - - interpolator( - fluid_solver_base<rnumber> *FSOLVER, - base_polynomial_values BETA_POLYS, - ...); - ~interpolator(); - - int read_rFFTW(const void *src); - - inline int get_rank(double z) - { - return this->descriptor->rank[MOD(int(floor(z/this->dz)), this->descriptor->sizes[0])]; - } - - /* interpolate field at an array of locations */ - void sample( - const int nparticles, - const int pdimension, - const double *__restrict__ x, - double *__restrict__ y, - const int *deriv = NULL); - void operator()( - const int *__restrict__ xg, - const double *__restrict__ xx, - double *__restrict__ dest, - const int *deriv = NULL); -}; - -#endif//INTERPOLATOR - diff --git a/bfps/cpp/interpolator_base.cpp b/bfps/cpp/interpolator_base.cpp deleted file mode 100644 index 668a965c65744ac5aae31afb6bee05711a433657..0000000000000000000000000000000000000000 --- a/bfps/cpp/interpolator_base.cpp +++ /dev/null @@ -1,113 +0,0 @@ -/********************************************************************** -* * -* Copyright 2015 Max Planck Institute * -* for Dynamics and Self-Organization * -* * -* This file is part of bfps. * -* * -* bfps is free software: you can redistribute it and/or modify * -* it under the terms of the GNU General Public License as published * -* by the Free Software Foundation, either version 3 of the License, * -* or (at your option) any later version. * -* * -* bfps is distributed in the hope that it will be useful, * -* but WITHOUT ANY WARRANTY; without even the implied warranty of * -* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * -* GNU General Public License for more details. * -* * -* You should have received a copy of the GNU General Public License * -* along with bfps. If not, see <http://www.gnu.org/licenses/> * -* * -* Contact: Cristian.Lalescu@ds.mpg.de * -* * -**********************************************************************/ - - - -#define NDEBUG - -#include <cmath> -#include "interpolator_base.hpp" - -template <class rnumber, int interp_neighbours> -interpolator_base<rnumber, interp_neighbours>::interpolator_base( - fluid_solver_base<rnumber> *fs, - base_polynomial_values BETA_POLYS) -{ - this->descriptor = fs->rd; - this->compute_beta = BETA_POLYS; - - // compute dx, dy, dz; - this->dx = 4*acos(0) / (fs->dkx*this->descriptor->sizes[2]); - this->dy = 4*acos(0) / (fs->dky*this->descriptor->sizes[1]); - this->dz = 4*acos(0) / (fs->dkz*this->descriptor->sizes[0]); -} - -template <class rnumber, int interp_neighbours> -interpolator_base<rnumber, interp_neighbours>::interpolator_base( - vorticity_equation<rnumber, FFTW> *fs, - base_polynomial_values BETA_POLYS) -{ -// this->descriptor = fs->rd; -// this->compute_beta = BETA_POLYS; -// -// // compute dx, dy, dz; -// this->dx = 4*acos(0) / (fs->kk->dkx*this->descriptor->sizes[2]); -// this->dy = 4*acos(0) / (fs->kk->dky*this->descriptor->sizes[1]); -// this->dz = 4*acos(0) / (fs->kk->dkz*this->descriptor->sizes[0]); -} - -template <class rnumber, int interp_neighbours> -void interpolator_base<rnumber, interp_neighbours>::get_grid_coordinates( - const int nparticles, - const int pdimension, - const double *x, - int *xg, - double *xx) -{ - for (int p=0; p<nparticles; p++) - this->get_grid_coordinates( - x + p*pdimension, - xg + p*3, - xx + p*3); -} - -template <class rnumber, int interp_neighbours> -void interpolator_base<rnumber, interp_neighbours>::get_grid_coordinates( - const double *x, - int *xg, - double *xx) -{ - static double grid_size[] = {this->dx, this->dy, this->dz}; - double tval; - for (int c=0; c<3; c++) - { - tval = floor(x[c]/grid_size[c]); - xg[c] = MOD(int(tval), this->descriptor->sizes[2-c]); - xx[c] = (x[c] - tval*grid_size[c]) / grid_size[c]; - } -} - - - -template class interpolator_base<float, 1>; -template class interpolator_base<float, 2>; -template class interpolator_base<float, 3>; -template class interpolator_base<float, 4>; -template class interpolator_base<float, 5>; -template class interpolator_base<float, 6>; -template class interpolator_base<float, 7>; -template class interpolator_base<float, 8>; -template class interpolator_base<float, 9>; -template class interpolator_base<float, 10>; -template class interpolator_base<double, 1>; -template class interpolator_base<double, 2>; -template class interpolator_base<double, 3>; -template class interpolator_base<double, 4>; -template class interpolator_base<double, 5>; -template class interpolator_base<double, 6>; -template class interpolator_base<double, 7>; -template class interpolator_base<double, 8>; -template class interpolator_base<double, 9>; -template class interpolator_base<double, 10>; - diff --git a/bfps/cpp/interpolator_base.hpp b/bfps/cpp/interpolator_base.hpp deleted file mode 100644 index f4c28db7b9de632e8ec4977dd67f929f06080e19..0000000000000000000000000000000000000000 --- a/bfps/cpp/interpolator_base.hpp +++ /dev/null @@ -1,114 +0,0 @@ -/********************************************************************** -* * -* Copyright 2015 Max Planck Institute * -* for Dynamics and Self-Organization * -* * -* This file is part of bfps. * -* * -* bfps is free software: you can redistribute it and/or modify * -* it under the terms of the GNU General Public License as published * -* by the Free Software Foundation, either version 3 of the License, * -* or (at your option) any later version. * -* * -* bfps is distributed in the hope that it will be useful, * -* but WITHOUT ANY WARRANTY; without even the implied warranty of * -* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * -* GNU General Public License for more details. * -* * -* You should have received a copy of the GNU General Public License * -* along with bfps. If not, see <http://www.gnu.org/licenses/> * -* * -* Contact: Cristian.Lalescu@ds.mpg.de * -* * -**********************************************************************/ - - - -#include "fluid_solver_base.hpp" -#include "vorticity_equation.hpp" -#include "spline_n1.hpp" -#include "spline_n2.hpp" -#include "spline_n3.hpp" -#include "spline_n4.hpp" -#include "spline_n5.hpp" -#include "spline_n6.hpp" -#include "spline_n7.hpp" -#include "spline_n8.hpp" -#include "spline_n9.hpp" -#include "spline_n10.hpp" -#include "Lagrange_polys.hpp" - -#ifndef INTERPOLATOR_BASE - -#define INTERPOLATOR_BASE - -typedef void (*base_polynomial_values)( - const int derivative, - const double fraction, - double *__restrict__ destination); - -template <class rnumber, int interp_neighbours> -class interpolator_base -{ - public: - /* pointer to polynomial function */ - base_polynomial_values compute_beta; - - /* descriptor of field to interpolate */ - field_descriptor<rnumber> *descriptor; - - /* physical parameters of field */ - double dx, dy, dz; - - interpolator_base( - fluid_solver_base<rnumber> *FSOLVER, - base_polynomial_values BETA_POLYS); - - interpolator_base( - vorticity_equation<rnumber, FFTW> *FSOLVER, - base_polynomial_values BETA_POLYS); - virtual ~interpolator_base(){} - - /* may not destroy input */ - virtual int read_rFFTW(const void *src) = 0; - - /* map real locations to grid coordinates */ - void get_grid_coordinates( - const int nparticles, - const int pdimension, - const double *__restrict__ x, - int *__restrict__ xg, - double *__restrict__ xx); - void get_grid_coordinates( - const double *__restrict__ x, - int *__restrict__ xg, - double *__restrict__ xx); - /* interpolate field at an array of locations */ - virtual void sample( - const int nparticles, - const int pdimension, - const double *__restrict__ x, - double *__restrict__ y, - const int *deriv = NULL) = 0; - /* interpolate 1 point */ - virtual void operator()( - const int *__restrict__ xg, - const double *__restrict__ xx, - double *__restrict__ dest, - const int *deriv = NULL) = 0; - - /* interpolate 1 point */ - inline void operator()( - const double *__restrict__ x, - double *__restrict__ dest, - const int *deriv = NULL) - { - int xg[3]; - double xx[3]; - this->get_grid_coordinates(x, xg, xx); - (*this)(xg, xx, dest, deriv); - } -}; - -#endif//INTERPOLATOR_BASE - diff --git a/bfps/cpp/kspace.cpp b/bfps/cpp/kspace.cpp index 3fb250002c33282463684c5f8da051ffe8e35b27..5ceb2a806d9c57556bfa36b98e1e5114c2e64f7a 100644 --- a/bfps/cpp/kspace.cpp +++ b/bfps/cpp/kspace.cpp @@ -23,6 +23,9 @@ **********************************************************************/ + +#define NDEBUG + #include <cmath> #include <cstdlib> #include <algorithm> @@ -31,6 +34,8 @@ #include "scope_timer.hpp" #include "shared_array.hpp" + + template <field_backend be, kspace_dealias_type dt> template <field_components fc> diff --git a/bfps/cpp/rFFTW_distributed_particles.cpp b/bfps/cpp/rFFTW_distributed_particles.cpp deleted file mode 100644 index 265975f8c817a1b40942e076bd016c2921618bbc..0000000000000000000000000000000000000000 --- a/bfps/cpp/rFFTW_distributed_particles.cpp +++ /dev/null @@ -1,804 +0,0 @@ -/********************************************************************** -* * -* Copyright 2015 Max Planck Institute * -* for Dynamics and Self-Organization * -* * -* This file is part of bfps. * -* * -* bfps is free software: you can redistribute it and/or modify * -* it under the terms of the GNU General Public License as published * -* by the Free Software Foundation, either version 3 of the License, * -* or (at your option) any later version. * -* * -* bfps is distributed in the hope that it will be useful, * -* but WITHOUT ANY WARRANTY; without even the implied warranty of * -* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * -* GNU General Public License for more details. * -* * -* You should have received a copy of the GNU General Public License * -* along with bfps. If not, see <http://www.gnu.org/licenses/> * -* * -* Contact: Cristian.Lalescu@ds.mpg.de * -* * -**********************************************************************/ - - - -#define NDEBUG - -#include <cmath> -#include <cassert> -#include <cstring> -#include <string> -#include <sstream> -#include <set> -#include <algorithm> -#include <ctime> - -#include "base.hpp" -#include "rFFTW_distributed_particles.hpp" -#include "fftw_tools.hpp" -#include "scope_timer.hpp" - - -extern int myrank, nprocs; - -template <particle_types particle_type, class rnumber, int interp_neighbours> -rFFTW_distributed_particles<particle_type, rnumber, interp_neighbours>::rFFTW_distributed_particles( - const char *NAME, - const hid_t data_file_id, - rFFTW_interpolator<rnumber, interp_neighbours> *VEL, - const int TRAJ_SKIP, - const int INTEGRATION_STEPS) : particles_io_base<particle_type>( - NAME, - TRAJ_SKIP, - data_file_id, - VEL->descriptor->comm) -{ - TIMEZONE("rFFTW_distributed_particles::rFFTW_distributed_particles"); - /* check that integration_steps has a valid value. - * If NDEBUG is defined, "assert" doesn't do anything. - * With NDEBUG defined, and an invalid INTEGRATION_STEPS, - * the particles will simply sit still. - * */ - assert((INTEGRATION_STEPS <= 6) && - (INTEGRATION_STEPS >= 1)); - /* check that the field layout is compatible with this class. - * if it's not, the code will fail in bad ways, most likely ending up - * with various CPUs locked in some MPI send/receive. - * therefore I prefer to just kill the code at this point, - * no matter whether or not NDEBUG is present. - * */ - if (interp_neighbours*2+2 > VEL->descriptor->subsizes[0]) - { - DEBUG_MSG("parameters incompatible with rFFTW_distributed_particles.\n" - "interp kernel size is %d, local_z_size is %d\n", - interp_neighbours*2+2, VEL->descriptor->subsizes[0]); - if (VEL->descriptor->myrank == 0) - std::cerr << "parameters incompatible with rFFTW_distributed_particles." << std::endl; - exit(0); - } - this->vel = VEL; - this->rhs.resize(INTEGRATION_STEPS); - this->integration_steps = INTEGRATION_STEPS; - /* the particles are expected to be evenly distributed among processes. - * therefore allocating twice that amount of memory seems enough. - * */ - this->state.reserve(2*this->nparticles / this->nprocs); - for (unsigned int i=0; i<this->rhs.size(); i++) - this->rhs[i].reserve(2*this->nparticles / this->nprocs); - - /* build communicators and stuff for interpolation */ - - /* number of processors per domain */ - this->domain_nprocs[-1] = 2; // domain in common with lower z CPU - this->domain_nprocs[ 0] = 1; // local domain - this->domain_nprocs[ 1] = 2; // domain in common with higher z CPU - - /* initialize domain bins */ - this->domain_particles[-1] = std::unordered_set<int>(); - this->domain_particles[ 0] = std::unordered_set<int>(); - this->domain_particles[ 1] = std::unordered_set<int>(); - this->domain_particles[-1].reserve(unsigned( - 1.5*(interp_neighbours*2+2)* - float(this->nparticles) / - this->nprocs)); - this->domain_particles[ 1].reserve(unsigned( - 1.5*(interp_neighbours*2+2)* - float(this->nparticles) / - this->nprocs)); - this->domain_particles[ 0].reserve(unsigned( - 1.5*(this->vel->descriptor->subsizes[0] - interp_neighbours*2-2)* - float(this->nparticles) / - this->nprocs)); - - int color, key; - MPI_Comm tmpcomm; - for (int rank=0; rank<this->nprocs; rank++) - { - color = MPI_UNDEFINED; - key = MPI_UNDEFINED; - if (this->myrank == rank) - { - color = rank; - key = 0; - } - if (this->myrank == MOD(rank + 1, this->nprocs)) - { - color = rank; - key = 1; - } - MPI_Comm_split(this->comm, color, key, &tmpcomm); - if (this->myrank == rank) - this->domain_comm[ 1] = tmpcomm; - if (this->myrank == MOD(rank+1, this->nprocs)) - this->domain_comm[-1] = tmpcomm; - - } - - /* following code may be useful in the future for the general case */ - //this->interp_comm.resize(this->vel->descriptor->sizes[0]); - //this->interp_nprocs.resize(this->vel->descriptor->sizes[0]); - //for (int zg=0; zg<this->vel->descriptor->sizes[0]; zg++) - //{ - // color = (this->vel->get_rank_info( - // (zg+.5)*this->vel->dz, rminz, rmaxz) ? zg : MPI_UNDEFINED); - // key = zg - this->vel->descriptor->starts[0] + interp_neighbours; - // MPI_Comm_split(this->comm, color, key, &this->interp_comm[zg]); - // if (this->interp_comm[zg] != MPI_COMM_NULL) - // MPI_Comm_size(this->interp_comm[zg], &this->interp_nprocs[zg]); - // else - // this->interp_nprocs[zg] = 0; - //} -} - -template <particle_types particle_type, class rnumber, int interp_neighbours> -rFFTW_distributed_particles<particle_type, rnumber, interp_neighbours>::~rFFTW_distributed_particles() -{ -} - -template <particle_types particle_type, class rnumber, int interp_neighbours> -void rFFTW_distributed_particles<particle_type, rnumber, interp_neighbours>::sample( - rFFTW_interpolator<rnumber, interp_neighbours> *field, - const std::unordered_map<int, single_particle_state<particle_type>> &x, - const std::unordered_map<int, std::unordered_set<int>> &dp, - std::unordered_map<int, single_particle_state<POINT3D>> &y) -{ - TIMEZONE("rFFTW_distributed_particles::sample"); - double *yyy; - double *yy; - y.clear(); - /* local z domain */ - yy = new double[3]; - for (auto p: dp.at(0)) - { - (*field)(x.find(p)->second.data, yy); - y[p] = yy; - } - delete[] yy; - /* boundary z domains */ - int domain_index; - for (int rankpair = 0; rankpair < this->nprocs; rankpair++) - { - if (this->myrank == rankpair) - domain_index = 1; - if (this->myrank == MOD(rankpair+1, this->nprocs)) - domain_index = -1; - if (this->myrank == rankpair || - this->myrank == MOD(rankpair+1, this->nprocs)) - { - yy = new double[3*dp.at(domain_index).size()]; - yyy = new double[3*dp.at(domain_index).size()]; - int tindex; - tindex = 0; - // can this sorting be done more efficiently? - std::vector<int> ordered_dp; - { - TIMEZONE("rFFTW_distributed_particles::sample::ordered_dp"); - ordered_dp.reserve(dp.at(domain_index).size()); - for (auto p: dp.at(domain_index)) - ordered_dp.push_back(p); - //std::set<int> ordered_dp(dp.at(domain_index)); - std::sort(ordered_dp.begin(), ordered_dp.end()); - } - - for (auto p: ordered_dp) - //for (auto p: dp.at(domain_index)) - { - (*field)(x.at(p).data, yy + tindex*3); - tindex++; - } - { - TIMEZONE("rFFTW_distributed_particles::sample::MPI_Allreduce"); - MPI_Allreduce( - yy, - yyy, - 3*dp.at(domain_index).size(), - MPI_DOUBLE, - MPI_SUM, - this->domain_comm[domain_index]); - } - tindex = 0; - for (auto p: ordered_dp) - //for (auto p: dp.at(domain_index)) - { - y[p] = yyy + tindex*3; - tindex++; - } - delete[] yy; - delete[] yyy; - } - } -} - -template <particle_types particle_type, class rnumber, int interp_neighbours> -void rFFTW_distributed_particles<particle_type, rnumber, interp_neighbours>::get_rhs( - const std::unordered_map<int, single_particle_state<particle_type>> &x, - const std::unordered_map<int, std::unordered_set<int>> &dp, - std::unordered_map<int, single_particle_state<particle_type>> &y) -{ - std::unordered_map<int, single_particle_state<POINT3D>> yy; - switch(particle_type) - { - case VELOCITY_TRACER: - this->sample(this->vel, x, dp, yy); - y.clear(); - y.reserve(yy.size()); - y.rehash(this->nparticles); - for (auto &pp: yy) - y[pp.first] = pp.second.data; - break; - } -} - -template <particle_types particle_type, class rnumber, int interp_neighbours> -void rFFTW_distributed_particles<particle_type, rnumber, interp_neighbours>::sample( - rFFTW_interpolator<rnumber, interp_neighbours> *field, - const char *dset_name) -{ - std::unordered_map<int, single_particle_state<POINT3D>> y; - this->sample(field, this->state, this->domain_particles, y); - this->write(dset_name, y); -} - -template <particle_types particle_type, class rnumber, int interp_neighbours> -void rFFTW_distributed_particles<particle_type, rnumber, interp_neighbours>::roll_rhs() -{ - for (int i=this->integration_steps-2; i>=0; i--) - rhs[i+1] = rhs[i]; -} - -template <particle_types particle_type, class rnumber, int interp_neighbours> -void rFFTW_distributed_particles<particle_type, rnumber, interp_neighbours>::redistribute( - std::unordered_map<int, single_particle_state<particle_type>> &x, - std::vector<std::unordered_map<int, single_particle_state<particle_type>>> &vals, - std::unordered_map<int, std::unordered_set<int>> &dp) -{ - TIMEZONE("rFFTW_distributed_particles::redistribute"); - //DEBUG_MSG("entered redistribute\n"); - /* get new distribution of particles */ - std::unordered_map<int, std::unordered_set<int>> newdp; - { - TIMEZONE("sort_into_domains"); - this->sort_into_domains(x, newdp); - } - /* take care of particles that are leaving the shared domains */ - int dindex[2] = {-1, 1}; - // for each D of the 2 shared domains - { - TIMEZONE("Loop1"); - for (int di=0; di<2; di++) - // for all particles previously in D - for (auto p: dp[dindex[di]]) - { - // if the particle is no longer in D - if (newdp[dindex[di]].find(p) == newdp[dindex[di]].end()) - { - // and the particle is not in the local domain - if (newdp[0].find(p) == newdp[0].end()) - { - // remove the particle from the local list - x.erase(p); - for (unsigned int i=0; i<vals.size(); i++) - vals[i].erase(p); - } - // if the particle is in the local domain, do nothing - } - } - } - /* take care of particles that are entering the shared domains */ - /* neighbouring rank offsets */ - int ro[2]; - ro[0] = -1; - ro[1] = 1; - /* particles to send, particles to receive */ - std::vector<int> ps[2], pr[2]; - for (int tcounter = 0; tcounter < 2; tcounter++) - { - ps[tcounter].reserve(newdp[dindex[tcounter]].size()); - } - /* number of particles to send, number of particles to receive */ - int nps[2], npr[2]; - int rsrc, rdst; - /* get list of id-s to send */ - { - TIMEZONE("Loop2"); - for (auto &p: dp[0]) - { - for (int di=0; di<2; di++) - { - if (newdp[dindex[di]].find(p) != newdp[dindex[di]].end()) - ps[di].push_back(p); - } - } - } - /* prepare data for send recv */ - for (int i=0; i<2; i++) - nps[i] = ps[i].size(); - for (rsrc = 0; rsrc<this->nprocs; rsrc++) - for (int i=0; i<2; i++) - { - rdst = MOD(rsrc+ro[i], this->nprocs); - if (this->myrank == rsrc){ - TIMEZONE("MPI_Send"); - MPI_Send( - nps+i, - 1, - MPI_INTEGER, - rdst, - 2*(rsrc*this->nprocs + rdst)+i, - this->comm); - } - if (this->myrank == rdst){ - TIMEZONE("MPI_Recv"); - MPI_Recv( - npr+1-i, - 1, - MPI_INTEGER, - rsrc, - 2*(rsrc*this->nprocs + rdst)+i, - this->comm, - MPI_STATUS_IGNORE); - } - } - //DEBUG_MSG("I have to send %d %d particles\n", nps[0], nps[1]); - //DEBUG_MSG("I have to recv %d %d particles\n", npr[0], npr[1]); - for (int i=0; i<2; i++) - pr[i].resize(npr[i]); - - int buffer_size = (nps[0] > nps[1]) ? nps[0] : nps[1]; - buffer_size = (buffer_size > npr[0])? buffer_size : npr[0]; - buffer_size = (buffer_size > npr[1])? buffer_size : npr[1]; - //DEBUG_MSG("buffer size is %d\n", buffer_size); - double *buffer = new double[buffer_size*state_dimension(particle_type)*(1+vals.size())]; - for (rsrc = 0; rsrc<this->nprocs; rsrc++) - for (int i=0; i<2; i++) - { - rdst = MOD(rsrc+ro[i], this->nprocs); - if (this->myrank == rsrc && nps[i] > 0) - { - TIMEZONE("this->myrank == rsrc && nps[i] > 0"); - MPI_Send( - &ps[i].front(), - nps[i], - MPI_INTEGER, - rdst, - 2*(rsrc*this->nprocs + rdst), - this->comm); - int pcounter = 0; - for (int p: ps[i]) - { - std::copy(x[p].data, - x[p].data + state_dimension(particle_type), - buffer + pcounter*(1+vals.size())*state_dimension(particle_type)); - for (unsigned int tindex=0; tindex<vals.size(); tindex++) - { - std::copy(vals[tindex][p].data, - vals[tindex][p].data + state_dimension(particle_type), - buffer + (pcounter*(1+vals.size()) + tindex+1)*state_dimension(particle_type)); - } - pcounter++; - } - MPI_Send( - buffer, - nps[i]*(1+vals.size())*state_dimension(particle_type), - MPI_DOUBLE, - rdst, - 2*(rsrc*this->nprocs + rdst)+1, - this->comm); - } - if (this->myrank == rdst && npr[1-i] > 0) - { - TIMEZONE("this->myrank == rdst && npr[1-i] > 0"); - MPI_Recv( - &pr[1-i].front(), - npr[1-i], - MPI_INTEGER, - rsrc, - 2*(rsrc*this->nprocs + rdst), - this->comm, - MPI_STATUS_IGNORE); - MPI_Recv( - buffer, - npr[1-i]*(1+vals.size())*state_dimension(particle_type), - MPI_DOUBLE, - rsrc, - 2*(rsrc*this->nprocs + rdst)+1, - this->comm, - MPI_STATUS_IGNORE); - int pcounter = 0; - for (int p: pr[1-i]) - { - x[p] = buffer + (pcounter*(1+vals.size()))*state_dimension(particle_type); - newdp[1-i].insert(p); - for (unsigned int tindex=0; tindex<vals.size(); tindex++) - { - vals[tindex][p] = buffer + (pcounter*(1+vals.size()) + tindex+1)*state_dimension(particle_type); - } - pcounter++; - } - } - } - delete[] buffer; - // x has been changed, so newdp is obsolete - // we need to sort into domains again - { - TIMEZONE("sort_into_domains2"); - this->sort_into_domains(x, dp); - } - -#ifndef NDEBUG - /* check that all particles at x are local */ - //for (auto &pp: x) - // if (this->vel->get_rank(pp.second.data[2]) != this->myrank) - // { - // DEBUG_MSG("found particle %d with rank %d\n", - // pp.first, - // this->vel->get_rank(pp.second.data[2])); - // assert(false); - // } -#endif - //DEBUG_MSG("exiting redistribute\n"); -} - - - -template <particle_types particle_type, class rnumber, int interp_neighbours> -void rFFTW_distributed_particles<particle_type, rnumber, interp_neighbours>::AdamsBashforth( - const int nsteps) -{ - this->get_rhs(this->state, this->domain_particles, this->rhs[0]); -#define AdamsBashforth_LOOP_PREAMBLE \ - for (auto &pp: this->state) \ - for (unsigned int i=0; i<state_dimension(particle_type); i++) - switch(nsteps) - { - case 1: - AdamsBashforth_LOOP_PREAMBLE - pp.second[i] += this->dt*this->rhs[0][pp.first][i]; - break; - case 2: - AdamsBashforth_LOOP_PREAMBLE - pp.second[i] += this->dt*(3*this->rhs[0][pp.first][i] - - this->rhs[1][pp.first][i])/2; - break; - case 3: - AdamsBashforth_LOOP_PREAMBLE - pp.second[i] += this->dt*(23*this->rhs[0][pp.first][i] - - 16*this->rhs[1][pp.first][i] - + 5*this->rhs[2][pp.first][i])/12; - break; - case 4: - AdamsBashforth_LOOP_PREAMBLE - pp.second[i] += this->dt*(55*this->rhs[0][pp.first][i] - - 59*this->rhs[1][pp.first][i] - + 37*this->rhs[2][pp.first][i] - - 9*this->rhs[3][pp.first][i])/24; - break; - case 5: - AdamsBashforth_LOOP_PREAMBLE - pp.second[i] += this->dt*(1901*this->rhs[0][pp.first][i] - - 2774*this->rhs[1][pp.first][i] - + 2616*this->rhs[2][pp.first][i] - - 1274*this->rhs[3][pp.first][i] - + 251*this->rhs[4][pp.first][i])/720; - break; - case 6: - AdamsBashforth_LOOP_PREAMBLE - pp.second[i] += this->dt*(4277*this->rhs[0][pp.first][i] - - 7923*this->rhs[1][pp.first][i] - + 9982*this->rhs[2][pp.first][i] - - 7298*this->rhs[3][pp.first][i] - + 2877*this->rhs[4][pp.first][i] - - 475*this->rhs[5][pp.first][i])/1440; - break; - } - this->redistribute(this->state, this->rhs, this->domain_particles); - this->roll_rhs(); -} - - -template <particle_types particle_type, class rnumber, int interp_neighbours> -void rFFTW_distributed_particles<particle_type, rnumber, interp_neighbours>::step() -{ - TIMEZONE("rFFTW_distributed_particles::step"); - this->AdamsBashforth((this->iteration < this->integration_steps) ? - this->iteration+1 : - this->integration_steps); - this->iteration++; -} - - -template <particle_types particle_type, class rnumber, int interp_neighbours> -void rFFTW_distributed_particles<particle_type, rnumber, interp_neighbours>::sort_into_domains( - const std::unordered_map<int, single_particle_state<particle_type>> &x, - std::unordered_map<int, std::unordered_set<int>> &dp) -{ - TIMEZONE("rFFTW_distributed_particles::sort_into_domains"); - int tmpint1, tmpint2; - dp.clear(); - dp[-1] = std::unordered_set<int>(); - dp[ 0] = std::unordered_set<int>(); - dp[ 1] = std::unordered_set<int>(); - dp[-1].reserve(unsigned( - 1.5*(interp_neighbours*2+2)* - float(this->nparticles) / - this->nprocs)); - dp[ 1].reserve(unsigned( - 1.5*(interp_neighbours*2+2)* - float(this->nparticles) / - this->nprocs)); - dp[ 0].reserve(unsigned( - 1.5*(this->vel->descriptor->subsizes[0] - interp_neighbours*2-2)* - float(this->nparticles) / - this->nprocs)); - for (auto &xx: x) - { - if (this->vel->get_rank_info(xx.second.data[2], tmpint1, tmpint2)) - { - if (tmpint1 == tmpint2) - dp[0].insert(xx.first); - else - { - if (this->myrank == tmpint1) - dp[-1].insert(xx.first); - else - dp[ 1].insert(xx.first); - } - } - } -} - - -template <particle_types particle_type, class rnumber, int interp_neighbours> -void rFFTW_distributed_particles<particle_type, rnumber, interp_neighbours>::read() -{ - TIMEZONE("rFFTW_distributed_particles::read"); - double *temp = new double[this->chunk_size*state_dimension(particle_type)]; - int tmpint1, tmpint2; - for (unsigned int cindex=0; cindex<this->get_number_of_chunks(); cindex++) - { - //read state - if (this->myrank == 0){ - TIMEZONE("read_state_chunk"); - this->read_state_chunk(cindex, temp); - } - { - TIMEZONE("MPI_Bcast"); - MPI_Bcast( - temp, - this->chunk_size*state_dimension(particle_type), - MPI_DOUBLE, - 0, - this->comm); - } - for (unsigned int p=0; p<this->chunk_size; p++) - { - if (this->vel->get_rank_info(temp[state_dimension(particle_type)*p+2], tmpint1, tmpint2)) - { - this->state[p+cindex*this->chunk_size] = temp + state_dimension(particle_type)*p; - } - } - //read rhs - if (this->iteration > 0){ - TIMEZONE("this->iteration > 0"); - for (int i=0; i<this->integration_steps; i++) - { - if (this->myrank == 0){ - TIMEZONE("read_rhs_chunk"); - this->read_rhs_chunk(cindex, i, temp); - } - { - TIMEZONE("MPI_Bcast"); - MPI_Bcast( - temp, - this->chunk_size*state_dimension(particle_type), - MPI_DOUBLE, - 0, - this->comm); - } - for (unsigned int p=0; p<this->chunk_size; p++) - { - auto pp = this->state.find(p+cindex*this->chunk_size); - if (pp != this->state.end()) - this->rhs[i][p+cindex*this->chunk_size] = temp + state_dimension(particle_type)*p; - } - } - } - } - this->sort_into_domains(this->state, this->domain_particles); - DEBUG_MSG("%s->state.size = %ld\n", this->name.c_str(), this->state.size()); - for (int domain=-1; domain<=1; domain++) - { - DEBUG_MSG("domain %d nparticles = %ld\n", domain, this->domain_particles[domain].size()); - } - delete[] temp; -} - -template <particle_types particle_type, class rnumber, int interp_neighbours> -void rFFTW_distributed_particles<particle_type, rnumber, interp_neighbours>::write( - const char *dset_name, - std::unordered_map<int, single_particle_state<POINT3D>> &y) -{ - TIMEZONE("rFFTW_distributed_particles::write"); - double *data = new double[this->chunk_size*3]; - double *yy = new double[this->chunk_size*3]; - //int pindex = 0; - for (unsigned int cindex=0; cindex<this->get_number_of_chunks(); cindex++) - { - std::fill_n(yy, this->chunk_size*3, 0); - //for (unsigned int p=0; p<this->chunk_size; p++, pindex++) - //{ - // if (this->domain_particles[-1].find(pindex) != this->domain_particles[-1].end() || - // this->domain_particles[ 0].find(pindex) != this->domain_particles[ 0].end()) - // { - // std::copy(y[pindex].data, - // y[pindex].data + 3, - // yy + p*3); - // } - //} - for (int s = -1; s <= 0; s++) - for (auto &pp: this->domain_particles[s]) - { - if (pp >= int(cindex*this->chunk_size) && - pp < int((cindex+1)*this->chunk_size)) - { - std::copy(y[pp].data, - y[pp].data + 3, - yy + (pp-cindex*this->chunk_size)*3); - } - } - { - TIMEZONE("MPI_Allreduce"); - MPI_Allreduce( - yy, - data, - 3*this->chunk_size, - MPI_DOUBLE, - MPI_SUM, - this->comm); - } - if (this->myrank == 0){ - TIMEZONE("write_point3D_chunk"); - this->write_point3D_chunk(dset_name, cindex, data); - } - } - delete[] yy; - delete[] data; -} - -template <particle_types particle_type, class rnumber, int interp_neighbours> -void rFFTW_distributed_particles<particle_type, rnumber, interp_neighbours>::write( - const bool write_rhs) -{ - TIMEZONE("rFFTW_distributed_particles::write2"); - double *temp0 = new double[this->chunk_size*state_dimension(particle_type)]; - double *temp1 = new double[this->chunk_size*state_dimension(particle_type)]; - //int pindex = 0; - for (unsigned int cindex=0; cindex<this->get_number_of_chunks(); cindex++) - { - //write state - std::fill_n(temp0, state_dimension(particle_type)*this->chunk_size, 0); - //pindex = cindex*this->chunk_size; - //for (unsigned int p=0; p<this->chunk_size; p++, pindex++) - //{ - // if (this->domain_particles[-1].find(pindex) != this->domain_particles[-1].end() || - // this->domain_particles[ 0].find(pindex) != this->domain_particles[ 0].end()) - // { - // TIMEZONE("std::copy"); - // std::copy(this->state[pindex].data, - // this->state[pindex].data + state_dimension(particle_type), - // temp0 + p*state_dimension(particle_type)); - // } - //} - for (int s = -1; s <= 0; s++) - for (auto &pp: this->domain_particles[s]) - { - if (pp >= int(cindex*this->chunk_size) && - pp < int((cindex+1)*this->chunk_size)) - { - std::copy(this->state[pp].data, - this->state[pp].data + state_dimension(particle_type), - temp0 + (pp-cindex*this->chunk_size)*state_dimension(particle_type)); - } - } - { - TIMEZONE("MPI_Allreduce"); - MPI_Allreduce( - temp0, - temp1, - state_dimension(particle_type)*this->chunk_size, - MPI_DOUBLE, - MPI_SUM, - this->comm); - } - if (this->myrank == 0){ - TIMEZONE("write_state_chunk"); - this->write_state_chunk(cindex, temp1); - } - //write rhs - if (write_rhs){ - TIMEZONE("write_rhs"); - for (int i=0; i<this->integration_steps; i++) - { - std::fill_n(temp0, state_dimension(particle_type)*this->chunk_size, 0); - //pindex = cindex*this->chunk_size; - //for (unsigned int p=0; p<this->chunk_size; p++, pindex++) - //{ - // if (this->domain_particles[-1].find(pindex) != this->domain_particles[-1].end() || - // this->domain_particles[ 0].find(pindex) != this->domain_particles[ 0].end()) - // { - // TIMEZONE("std::copy"); - // std::copy(this->rhs[i][pindex].data, - // this->rhs[i][pindex].data + state_dimension(particle_type), - // temp0 + p*state_dimension(particle_type)); - // } - //} - for (int s = -1; s <= 0; s++) - for (auto &pp: this->domain_particles[s]) - { - if (pp >= int(cindex*this->chunk_size) && - pp < int((cindex+1)*this->chunk_size)) - { - std::copy(this->rhs[i][pp].data, - this->rhs[i][pp].data + state_dimension(particle_type), - temp0 + (pp-cindex*this->chunk_size)*state_dimension(particle_type)); - } - } - { - TIMEZONE("MPI_Allreduce"); - MPI_Allreduce( - temp0, - temp1, - state_dimension(particle_type)*this->chunk_size, - MPI_DOUBLE, - MPI_SUM, - this->comm); - } - if (this->myrank == 0){ - TIMEZONE("write_rhs_chunk"); - this->write_rhs_chunk(cindex, i, temp1); - } - } - } - } - delete[] temp0; - delete[] temp1; -} - - -/*****************************************************************************/ -template class rFFTW_distributed_particles<VELOCITY_TRACER, float, 1>; -template class rFFTW_distributed_particles<VELOCITY_TRACER, float, 2>; -template class rFFTW_distributed_particles<VELOCITY_TRACER, float, 3>; -template class rFFTW_distributed_particles<VELOCITY_TRACER, float, 4>; -template class rFFTW_distributed_particles<VELOCITY_TRACER, float, 5>; -template class rFFTW_distributed_particles<VELOCITY_TRACER, float, 6>; -template class rFFTW_distributed_particles<VELOCITY_TRACER, double, 1>; -template class rFFTW_distributed_particles<VELOCITY_TRACER, double, 2>; -template class rFFTW_distributed_particles<VELOCITY_TRACER, double, 3>; -template class rFFTW_distributed_particles<VELOCITY_TRACER, double, 4>; -template class rFFTW_distributed_particles<VELOCITY_TRACER, double, 5>; -template class rFFTW_distributed_particles<VELOCITY_TRACER, double, 6>; -/*****************************************************************************/ - diff --git a/bfps/cpp/rFFTW_distributed_particles.hpp b/bfps/cpp/rFFTW_distributed_particles.hpp deleted file mode 100644 index 400411d5f1fd6e597714be494a72272a76e01206..0000000000000000000000000000000000000000 --- a/bfps/cpp/rFFTW_distributed_particles.hpp +++ /dev/null @@ -1,144 +0,0 @@ -/********************************************************************** -* * -* Copyright 2015 Max Planck Institute * -* for Dynamics and Self-Organization * -* * -* This file is part of bfps. * -* * -* bfps is free software: you can redistribute it and/or modify * -* it under the terms of the GNU General Public License as published * -* by the Free Software Foundation, either version 3 of the License, * -* or (at your option) any later version. * -* * -* bfps is distributed in the hope that it will be useful, * -* but WITHOUT ANY WARRANTY; without even the implied warranty of * -* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * -* GNU General Public License for more details. * -* * -* You should have received a copy of the GNU General Public License * -* along with bfps. If not, see <http://www.gnu.org/licenses/> * -* * -* Contact: Cristian.Lalescu@ds.mpg.de * -* * -**********************************************************************/ - - - -#include <stdio.h> -#include <stdlib.h> -#include <iostream> -#include <unordered_map> -#include <unordered_set> -#include <vector> -#include <hdf5.h> -#include "base.hpp" -#include "particles_base.hpp" -#include "fluid_solver_base.hpp" -#include "rFFTW_interpolator.hpp" - -#ifndef RFFTW_DISTRIBUTED_PARTICLES - -#define RFFTW_DISTRIBUTED_PARTICLES - -template <particle_types particle_type, class rnumber, int interp_neighbours> -class rFFTW_distributed_particles: public particles_io_base<particle_type> -{ - private: - // a "domain" corresponds to a region in 3D real space where a fixed set - // of MPI processes are required to participate in the interpolation - // formula (i.e. they all contain required information). - // we need to know how many processes there are for each of the domains - // to which the local process belongs. - std::unordered_map<int, int> domain_nprocs; - // each domain has an associated communicator, and we keep a list of the - // communicators to which the local process belongs - std::unordered_map<int, MPI_Comm> domain_comm; - // for each domain, we need a list of the IDs of the particles located - // in that domain - std::unordered_map<int, std::unordered_set<int>> domain_particles; - - // for each domain, we need the state of each particle - std::unordered_map<int, single_particle_state<particle_type>> state; - // for each domain, we also need the last few values of the right hand - // side of the ODE, since we use Adams-Bashforth integration - std::vector<std::unordered_map<int, single_particle_state<particle_type>>> rhs; - - public: - int integration_steps; - // this class only works with rFFTW interpolator - rFFTW_interpolator<rnumber, interp_neighbours> *vel; - - /* simulation parameters */ - double dt; - - /* methods */ - - /* constructor and destructor. - * allocate and deallocate: - * this->state - * this->rhs - * */ - rFFTW_distributed_particles( - const char *NAME, - const hid_t data_file_id, - rFFTW_interpolator<rnumber, interp_neighbours> *FIELD, - const int TRAJ_SKIP, - const int INTEGRATION_STEPS = 2); - ~rFFTW_distributed_particles(); - - void sample( - rFFTW_interpolator<rnumber, interp_neighbours> *field, - const char *dset_name); - void sample( - rFFTW_interpolator<rnumber, interp_neighbours> *field, - const std::unordered_map<int, single_particle_state<particle_type>> &x, - const std::unordered_map<int, std::unordered_set<int>> &dp, - std::unordered_map<int, single_particle_state<POINT3D>> &y); - void get_rhs( - const std::unordered_map<int, single_particle_state<particle_type>> &x, - const std::unordered_map<int, std::unordered_set<int>> &dp, - std::unordered_map<int, single_particle_state<particle_type>> &y); - - - /* given a list of particle positions, - * figure out which go into what local domain, and construct the relevant - * map of ID lists "dp" (for domain particles). - * */ - void sort_into_domains( - const std::unordered_map<int, single_particle_state<particle_type>> &x, - std::unordered_map<int, std::unordered_set<int>> &dp); - /* suppose the particles are currently badly distributed, and some - * arbitrary quantities (stored in "vals") are associated to the particles, - * and we need to properly distribute them among processes. - * that's what this function does. - * In practice it's only used to redistribute the rhs values (and it - * automatically redistributes the state x being passed). - * Some more comments are present in the .cpp file, but, in brief: the - * particles are simply moved from one domain to another. - * If it turns out that the new domain contains a process which does not - * know about a particle, that information is sent from the closest process. - * */ - void redistribute( - std::unordered_map<int, single_particle_state<particle_type>> &x, - std::vector<std::unordered_map<int, single_particle_state<particle_type>>> &vals, - std::unordered_map<int, std::unordered_set<int>> &dp); - - - /* input/output */ - void read(); - void write( - const char *dset_name, - std::unordered_map<int, single_particle_state<POINT3D>> &y); - void write( - const char *dset_name, - std::unordered_map<int, single_particle_state<particle_type>> &y); - void write(const bool write_rhs = true); - - /* solvers */ - void step(); - void roll_rhs(); - void AdamsBashforth(const int nsteps); -}; - -#endif//RFFTW_DISTRIBUTED_PARTICLES - diff --git a/bfps/cpp/rFFTW_interpolator.cpp b/bfps/cpp/rFFTW_interpolator.cpp deleted file mode 100644 index b8b21e8811d7f5286dc4edd00833c205539ea89c..0000000000000000000000000000000000000000 --- a/bfps/cpp/rFFTW_interpolator.cpp +++ /dev/null @@ -1,210 +0,0 @@ -/********************************************************************** -* * -* Copyright 2015 Max Planck Institute * -* for Dynamics and Self-Organization * -* * -* This file is part of bfps. * -* * -* bfps is free software: you can redistribute it and/or modify * -* it under the terms of the GNU General Public License as published * -* by the Free Software Foundation, either version 3 of the License, * -* or (at your option) any later version. * -* * -* bfps is distributed in the hope that it will be useful, * -* but WITHOUT ANY WARRANTY; without even the implied warranty of * -* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * -* GNU General Public License for more details. * -* * -* You should have received a copy of the GNU General Public License * -* along with bfps. If not, see <http://www.gnu.org/licenses/> * -* * -* Contact: Cristian.Lalescu@ds.mpg.de * -* * -**********************************************************************/ - - - -#define NDEBUG - -#include <cmath> -#include "rFFTW_interpolator.hpp" -#include "scope_timer.hpp" - -template <class rnumber, int interp_neighbours> -rFFTW_interpolator<rnumber, interp_neighbours>::rFFTW_interpolator( - fluid_solver_base<rnumber> *fs, - base_polynomial_values BETA_POLYS, - rnumber *FIELD_POINTER) : interpolator_base<rnumber, interp_neighbours>(fs, BETA_POLYS) -{ - this->field = FIELD_POINTER; - - - // generate compute array - this->compute = new bool[this->descriptor->sizes[0]]; - std::fill_n(this->compute, this->descriptor->sizes[0], false); - for (int iz = this->descriptor->starts[0]-interp_neighbours-1; - iz <= this->descriptor->starts[0]+this->descriptor->subsizes[0]+interp_neighbours; - iz++) - this->compute[((iz + this->descriptor->sizes[0]) % this->descriptor->sizes[0])] = true; -} - -template <class rnumber, int interp_neighbours> -rFFTW_interpolator<rnumber, interp_neighbours>::rFFTW_interpolator( - vorticity_equation<rnumber, FFTW> *fs, - base_polynomial_values BETA_POLYS, - rnumber *FIELD_POINTER) : interpolator_base<rnumber, interp_neighbours>(fs, BETA_POLYS) -{ -// this->field = FIELD_POINTER; -// -// -// // generate compute array -// this->compute = new bool[this->descriptor->sizes[0]]; -// std::fill_n(this->compute, this->descriptor->sizes[0], false); -// for (int iz = this->descriptor->starts[0]-interp_neighbours-1; -// iz <= this->descriptor->starts[0]+this->descriptor->subsizes[0]+interp_neighbours; -// iz++) -// this->compute[((iz + this->descriptor->sizes[0]) % this->descriptor->sizes[0])] = true; -} - -template <class rnumber, int interp_neighbours> -rFFTW_interpolator<rnumber, interp_neighbours>::~rFFTW_interpolator() -{ - delete[] this->compute; -} - -template <class rnumber, int interp_neighbours> -bool rFFTW_interpolator<rnumber, interp_neighbours>::get_rank_info(double z, int &maxz_rank, int &minz_rank) -{ - int zg = int(floor(z/this->dz)); - minz_rank = this->descriptor->rank[MOD( - zg - interp_neighbours, - this->descriptor->sizes[0])]; - maxz_rank = this->descriptor->rank[MOD( - zg + 1 + interp_neighbours, - this->descriptor->sizes[0])]; - bool is_here = false; - for (int iz = -interp_neighbours; iz <= interp_neighbours+1; iz++) - is_here = (is_here || - (this->descriptor->myrank == - this->descriptor->rank[MOD(zg+iz, this->descriptor->sizes[0])])); - return is_here; -} - -template <class rnumber, int interp_neighbours> -void rFFTW_interpolator<rnumber, interp_neighbours>::sample( - const int nparticles, - const int pdimension, - const double *__restrict__ x, - double *__restrict__ y, - const int *deriv) -{ - TIMEZONE("rFFTW_interpolator::sample"); - /* get grid coordinates */ - int *xg = new int[3*nparticles]; - double *xx = new double[3*nparticles]; - double *yy = new double[3*nparticles]; - std::fill_n(yy, 3*nparticles, 0.0); - this->get_grid_coordinates(nparticles, pdimension, x, xg, xx); - /* perform interpolation */ - for (int p=0; p<nparticles; p++) - if (this->compute[xg[p*3+2]]) - this->operator()(xg + p*3, xx + p*3, yy + p*3, deriv); - MPI_Allreduce( - yy, - y, - 3*nparticles, - MPI_DOUBLE, - MPI_SUM, - this->descriptor->comm); - delete[] yy; - delete[] xg; - delete[] xx; -} - -template <class rnumber, int interp_neighbours> -void rFFTW_interpolator<rnumber, interp_neighbours>::operator()( - const int *xg, - const double *xx, - double *dest, - const int *deriv) -{ - TIMEZONE("rFFTW_interpolator::operator()"); - double bx[interp_neighbours*2+2], by[interp_neighbours*2+2], bz[interp_neighbours*2+2]; - /* please note that the polynomials in z are computed for all the different - * iz values, independently of whether or not "myrank" will perform the - * computation for all the different iz slices. - * I don't know how big a deal this really is, but it is something that we can - * optimize. - * */ - if (deriv == NULL) - { - this->compute_beta(0, xx[0], bx); - this->compute_beta(0, xx[1], by); - this->compute_beta(0, xx[2], bz); - } - else - { - this->compute_beta(deriv[0], xx[0], bx); - this->compute_beta(deriv[1], xx[1], by); - this->compute_beta(deriv[2], xx[2], bz); - } - std::fill_n(dest, 3, 0); - ptrdiff_t bigiz, bigiy, bigix; - // loop over the 2*interp_neighbours + 2 z slices - for (int iz = -interp_neighbours; iz <= interp_neighbours+1; iz++) - { - // bigiz is the z index of the cell containing the particles - // this->descriptor->sizes[0] is added before taking the modulo - // because we want to be sure that "bigiz" is a positive number. - // I'm no longer sure why I don't use the MOD function here. - bigiz = ptrdiff_t(((xg[2]+iz) + this->descriptor->sizes[0]) % this->descriptor->sizes[0]); - // once we know bigiz, we know whether "myrank" has the relevant slice. - // if not, go to next value of bigiz - if (this->descriptor->myrank == this->descriptor->rank[bigiz]) - { - for (int iy = -interp_neighbours; iy <= interp_neighbours+1; iy++) - { - // bigiy is the y index of the cell - // since we have all the y indices in myrank, we can safely use the - // modulo value - bigiy = ptrdiff_t(MOD(xg[1]+iy, this->descriptor->sizes[1])); - for (int ix = -interp_neighbours; ix <= interp_neighbours+1; ix++) - { - // bigix is the x index of the cell - bigix = ptrdiff_t(MOD(xg[0]+ix, this->descriptor->sizes[2])); - // here we create the index to the current grid node - // note the removal of local_z_start from bigiz. - ptrdiff_t tindex = (((bigiz-this->descriptor->starts[0])*this->descriptor->sizes[1] + - bigiy)*(this->descriptor->sizes[2]+2) + - bigix)*3; - for (int c=0; c<3; c++) - dest[c] += this->field[tindex+c]*(bz[iz+interp_neighbours]* - by[iy+interp_neighbours]* - bx[ix+interp_neighbours]); - } - } - } - } -} - -template class rFFTW_interpolator<float, 1>; -template class rFFTW_interpolator<float, 2>; -template class rFFTW_interpolator<float, 3>; -template class rFFTW_interpolator<float, 4>; -template class rFFTW_interpolator<float, 5>; -template class rFFTW_interpolator<float, 6>; -template class rFFTW_interpolator<float, 7>; -template class rFFTW_interpolator<float, 8>; -template class rFFTW_interpolator<float, 9>; -template class rFFTW_interpolator<float, 10>; -template class rFFTW_interpolator<double, 1>; -template class rFFTW_interpolator<double, 2>; -template class rFFTW_interpolator<double, 3>; -template class rFFTW_interpolator<double, 4>; -template class rFFTW_interpolator<double, 5>; -template class rFFTW_interpolator<double, 6>; -template class rFFTW_interpolator<double, 7>; -template class rFFTW_interpolator<double, 8>; -template class rFFTW_interpolator<double, 9>; -template class rFFTW_interpolator<double, 10>; - diff --git a/bfps/cpp/rFFTW_interpolator.hpp b/bfps/cpp/rFFTW_interpolator.hpp deleted file mode 100644 index 5088be8b2f3094fd96332af0c923d7cc905e4f3f..0000000000000000000000000000000000000000 --- a/bfps/cpp/rFFTW_interpolator.hpp +++ /dev/null @@ -1,118 +0,0 @@ -/********************************************************************** -* * -* Copyright 2015 Max Planck Institute * -* for Dynamics and Self-Organization * -* * -* This file is part of bfps. * -* * -* bfps is free software: you can redistribute it and/or modify * -* it under the terms of the GNU General Public License as published * -* by the Free Software Foundation, either version 3 of the License, * -* or (at your option) any later version. * -* * -* bfps is distributed in the hope that it will be useful, * -* but WITHOUT ANY WARRANTY; without even the implied warranty of * -* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * -* GNU General Public License for more details. * -* * -* You should have received a copy of the GNU General Public License * -* along with bfps. If not, see <http://www.gnu.org/licenses/> * -* * -* Contact: Cristian.Lalescu@ds.mpg.de * -* * -**********************************************************************/ - - - -#include "field_descriptor.hpp" -#include "fftw_tools.hpp" -#include "fluid_solver_base.hpp" -#include "vorticity_equation.hpp" -#include "interpolator_base.hpp" - -#ifndef RFFTW_INTERPOLATOR - -#define RFFTW_INTERPOLATOR - -template <class rnumber, int interp_neighbours> -class rFFTW_interpolator:public interpolator_base<rnumber, interp_neighbours> -{ - public: - using interpolator_base<rnumber, interp_neighbours>::operator(); - - /* pointer to field that has to be interpolated - * The reason this is a member variable is because I want this class - * to be consistent with the "interpolator" class, where a member - * variable is absolutely required (since that class uses padding). - * */ - rnumber *field; - - /* compute[iz] is an array that says whether or not the current MPI - * process is involved in the interpolation formula for a particle - * located in cell "iz". - * It is mostly used in the formula itself. - * This translates as the following condition: - * local_zstart - neighbours <= iz <= local_zend + 1 + neighbours - * I think it's cleaner to keep things in an array, especially since - * "local_zend" is shorthand for another arithmetic operation anyway. - * */ - bool *compute; - - - /* Constructors */ - rFFTW_interpolator( - fluid_solver_base<rnumber> *FSOLVER, - base_polynomial_values BETA_POLYS, - rnumber *FIELD_DATA); - - /* this constructor is empty, I just needed for a quick hack of the - * "vorticity_equation" class. - * It should be removed soon. - * */ - rFFTW_interpolator( - vorticity_equation<rnumber, FFTW> *FSOLVER, - base_polynomial_values BETA_POLYS, - rnumber *FIELD_DATA); - ~rFFTW_interpolator(); - - /* This method is provided for consistency with "interpolator", and it - * does not destroy input */ - inline int read_rFFTW(const void *src) - { - this->field = (rnumber*)src; - return EXIT_SUCCESS; - } - - /* This is used when "compute" is not enough. - * For a given z location, it gives the outermost ranks that are relevant - * for the interpolation formula. - * */ - bool get_rank_info(double z, int &maxz_rank, int &minz_rank); - - /* interpolate field at an array of locations. - * After interpolation is performed, call Allreduce for "y", over - * this->descriptor->comm --- generally MPI_COMM_WORLD. - * This is useful for the simple "particles" class, where particle - * information is synchronized across all processes. - * */ - void sample( - const int nparticles, - const int pdimension, - const double *__restrict__ x, - double *__restrict__ y, - const int *deriv = NULL); - /* interpolate 1 point. - * Result is kept local. - * This is used in the "rFFTW_distributed_particles" class, with the - * result being synchronized across the relevant "local particle - * communicator". - * */ - void operator()( - const int *__restrict__ xg, - const double *__restrict__ xx, - double *__restrict__ dest, - const int *deriv = NULL); -}; - -#endif//RFFTW_INTERPOLATOR - diff --git a/bfps/cpp/slab_field_particles.cpp b/bfps/cpp/slab_field_particles.cpp deleted file mode 100644 index e3c84574062a4eabd5bf52d14a2b0d727c67b68e..0000000000000000000000000000000000000000 --- a/bfps/cpp/slab_field_particles.cpp +++ /dev/null @@ -1,799 +0,0 @@ -/********************************************************************** -* * -* Copyright 2015 Max Planck Institute * -* for Dynamics and Self-Organization * -* * -* This file is part of bfps. * -* * -* bfps is free software: you can redistribute it and/or modify * -* it under the terms of the GNU General Public License as published * -* by the Free Software Foundation, either version 3 of the License, * -* or (at your option) any later version. * -* * -* bfps is distributed in the hope that it will be useful, * -* but WITHOUT ANY WARRANTY; without even the implied warranty of * -* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * -* GNU General Public License for more details. * -* * -* You should have received a copy of the GNU General Public License * -* along with bfps. If not, see <http://www.gnu.org/licenses/> * -* * -* Contact: Cristian.Lalescu@ds.mpg.de * -* * -**********************************************************************/ - - - -#define NDEBUG - - -#include <cmath> -#include <cassert> -#include <cstring> -#include <string> -#include <sstream> - -#include "base.hpp" -#include "slab_field_particles.hpp" -#include "fftw_tools.hpp" - - -extern int myrank, nprocs; - -template <class rnumber> -slab_field_particles<rnumber>::slab_field_particles( - const char *NAME, - fluid_solver_base<rnumber> *FSOLVER, - const int NPARTICLES, - const int NCOMPONENTS, - base_polynomial_values BETA_POLYS, - const int INTERP_NEIGHBOURS, - const int TRAJ_SKIP, - const int INTEGRATION_STEPS) -{ - assert((NCOMPONENTS % 3) == 0); - assert((INTERP_NEIGHBOURS >= 1) || - (INTERP_NEIGHBOURS <= 8)); - assert((INTEGRATION_STEPS <= 6) && - (INTEGRATION_STEPS >= 1)); - strncpy(this->name, NAME, 256); - this->fs = FSOLVER; - this->nparticles = NPARTICLES; - this->ncomponents = NCOMPONENTS; - this->integration_steps = INTEGRATION_STEPS; - this->interp_neighbours = INTERP_NEIGHBOURS; - this->traj_skip = TRAJ_SKIP; - this->compute_beta = BETA_POLYS; - // in principle only the buffer width at the top needs the +1, - // but things are simpler if buffer_width is the same - this->buffer_width = this->interp_neighbours+1; - this->buffer_size = this->buffer_width*this->fs->rd->slice_size; - this->array_size = this->nparticles * this->ncomponents; - this->state = fftw_interface<rnumber>::alloc_real(this->array_size); - std::fill_n(this->state, this->array_size, 0.0); - for (int i=0; i < this->integration_steps; i++) - { - this->rhs[i] = fftw_interface<rnumber>::alloc_real(this->array_size); - std::fill_n(this->rhs[i], this->array_size, 0.0); - } - this->watching = new bool[this->fs->rd->nprocs*nparticles]; - std::fill_n(this->watching, this->fs->rd->nprocs*this->nparticles, false); - this->computing = new int[nparticles]; - - int tdims[4]; - tdims[0] = this->buffer_width*2*this->fs->rd->nprocs + this->fs->rd->sizes[0]; - tdims[1] = this->fs->rd->sizes[1]; - tdims[2] = this->fs->rd->sizes[2]; - tdims[3] = this->fs->rd->sizes[3]; - this->buffered_field_descriptor = new field_descriptor<rnumber>( - 4, tdims, - this->fs->rd->mpi_dtype, - this->fs->rd->comm); - - // compute dx, dy, dz; - this->dx = 4*acos(0) / (this->fs->dkx*this->fs->rd->sizes[2]); - this->dy = 4*acos(0) / (this->fs->dky*this->fs->rd->sizes[1]); - this->dz = 4*acos(0) / (this->fs->dkz*this->fs->rd->sizes[0]); - - // compute lower and upper bounds - this->lbound = new double[nprocs]; - this->ubound = new double[nprocs]; - double *tbound = new double[nprocs]; - std::fill_n(tbound, nprocs, 0.0); - tbound[this->fs->rd->myrank] = this->fs->rd->starts[0]*this->dz; - MPI_Allreduce( - tbound, - this->lbound, - nprocs, - MPI_DOUBLE, - MPI_SUM, - this->fs->rd->comm); - std::fill_n(tbound, nprocs, 0.0); - tbound[this->fs->rd->myrank] = (this->fs->rd->starts[0] + this->fs->rd->subsizes[0])*this->dz; - MPI_Allreduce( - tbound, - this->ubound, - nprocs, - MPI_DOUBLE, - MPI_SUM, - this->fs->rd->comm); - delete[] tbound; - //for (int r = 0; r<nprocs; r++) - // DEBUG_MSG( - // "lbound[%d] = %lg, ubound[%d] = %lg\n", - // r, this->lbound[r], - // r, this->ubound[r] - // ); -} - -template <class rnumber> -slab_field_particles<rnumber>::~slab_field_particles() -{ - delete[] this->computing; - delete[] this->watching; - fftw_interface<rnumber>::free(this->state); - for (int i=0; i < this->integration_steps; i++) - { - fftw_interface<rnumber>::free(this->rhs[i]); - } - delete[] this->lbound; - delete[] this->ubound; - delete this->buffered_field_descriptor; -} - -template <class rnumber> -void slab_field_particles<rnumber>::get_rhs(double *x, double *y) -{ - std::fill_n(y, this->array_size, 0.0); -} - -template <class rnumber> -void slab_field_particles<rnumber>::jump_estimate(double *dest) -{ - std::fill_n(dest, this->nparticles, 0.0); -} - -template <class rnumber> -int slab_field_particles<rnumber>::get_rank(double z) -{ - int tmp = this->fs->rd->rank[MOD(int(floor(z/this->dz)), this->fs->rd->sizes[0])]; - assert(tmp >= 0 && tmp < this->fs->rd->nprocs); - return tmp; -} - -template <class rnumber> -void slab_field_particles<rnumber>::synchronize_single_particle_state(int p, double *x, int source) -{ - if (source == -1) source = this->computing[p]; - if (this->watching[this->fs->rd->myrank*this->nparticles+p]) for (int r=0; r<this->fs->rd->nprocs; r++) - if (r != source && - this->watching[r*this->nparticles+p]) - { - //DEBUG_MSG("synchronizing state %d from %d to %d\n", p, this->computing[p], r); - if (this->fs->rd->myrank == source) - MPI_Send( - x+p*this->ncomponents, - this->ncomponents, - MPI_DOUBLE, - r, - p+this->computing[p]*this->nparticles, - this->fs->rd->comm); - if (this->fs->rd->myrank == r) - MPI_Recv( - x+p*this->ncomponents, - this->ncomponents, - MPI_DOUBLE, - source, - p+this->computing[p]*this->nparticles, - this->fs->rd->comm, - MPI_STATUS_IGNORE); - } -} - -template <class rnumber> -void slab_field_particles<rnumber>::synchronize() -{ - double *tstate = fftw_interface<double>::alloc_real(this->array_size); - // first, synchronize state and jump across CPUs - std::fill_n(tstate, this->array_size, 0.0); - for (int p=0; p<this->nparticles; p++) - { - //if (this->watching[this->fs->rd->myrank*this->nparticles + p]) - //DEBUG_MSG( - // "in synchronize, position for particle %d is %g %g %g\n", - // p, - // this->state[p*this->ncomponents], - // this->state[p*this->ncomponents+1], - // this->state[p*this->ncomponents+2]); - if (this->fs->rd->myrank == this->computing[p]) - std::copy(this->state + p*this->ncomponents, - this->state + (p+1)*this->ncomponents, - tstate + p*this->ncomponents); - } - MPI_Allreduce( - tstate, - this->state, - this->array_size, - MPI_DOUBLE, - MPI_SUM, - this->fs->rd->comm); - if (this->integration_steps >= 1) - { - for (int i=0; i<this->integration_steps; i++) - { - std::fill_n(tstate, this->array_size, 0.0); - for (int p=0; p<this->nparticles; p++) if (this->fs->rd->myrank == this->computing[p]) - std::copy(this->rhs[i] + p*this->ncomponents, - this->rhs[i] + (p+1)*this->ncomponents, - tstate + p*this->ncomponents); - std::fill_n(this->rhs[i], this->array_size, 0.0); - MPI_Allreduce( - tstate, - this->rhs[i], - this->array_size, - MPI_DOUBLE, - MPI_SUM, - this->fs->rd->comm); - } - } - fftw_interface<double>::free(tstate); - // assignment of particles - for (int p=0; p<this->nparticles; p++) - { - this->computing[p] = this->get_rank(this->state[p*this->ncomponents + 2]); - //DEBUG_MSG("synchronizing particles, particle %d computing is %d\n", p, this->computing[p]); - } - double *jump = fftw_interface<double>::alloc_real(this->nparticles); - this->jump_estimate(jump); - // now, see who needs to watch - bool *local_watching = new bool[this->fs->rd->nprocs*this->nparticles]; - std::fill_n(local_watching, this->fs->rd->nprocs*this->nparticles, false); - for (int p=0; p<this->nparticles; p++) - if (this->fs->rd->myrank == this->computing[p]) - { - local_watching[this->get_rank(this->state[this->ncomponents*p+2] )*this->nparticles+p] = true; - local_watching[this->get_rank(this->state[this->ncomponents*p+2]-jump[p])*this->nparticles+p] = true; - local_watching[this->get_rank(this->state[this->ncomponents*p+2]+jump[p])*this->nparticles+p] = true; - } - fftw_interface<double>::free(jump); - MPI_Allreduce( - local_watching, - this->watching, - this->nparticles*this->fs->rd->nprocs, - MPI_C_BOOL, - MPI_LOR, - this->fs->rd->comm); - delete[] local_watching; - for (int p=0; p<this->nparticles; p++) - DEBUG_MSG("watching = %d for particle %d\n", this->watching[this->fs->rd->myrank*nparticles+p], p); -} - - - -template <class rnumber> -void slab_field_particles<rnumber>::roll_rhs() -{ - for (int i=this->integration_steps-2; i>=0; i--) - std::copy(this->rhs[i], - this->rhs[i] + this->array_size, - this->rhs[i+1]); -} - - - -template <class rnumber> -void slab_field_particles<rnumber>::AdamsBashforth(int nsteps) -{ - ptrdiff_t ii; - this->get_rhs(this->state, this->rhs[0]); - //if (myrank == 0) - //{ - // DEBUG_MSG( - // "in AdamsBashforth for particles %s, integration_steps = %d, nsteps = %d, iteration = %d\n", - // this->name, - // this->integration_steps, - // nsteps, - // this->iteration); - // std::stringstream tstring; - // for (int p=0; p<this->nparticles; p++) - // tstring << " " << this->computing[p]; - // DEBUG_MSG("%s\n", tstring.str().c_str()); - // for (int i=0; i<this->integration_steps; i++) - // { - // std::stringstream tstring; - // for (int p=0; p<this->nparticles; p++) - // tstring << " " << this->rhs[i][p*3]; - // DEBUG_MSG("%s\n", tstring.str().c_str()); - // } - //} - switch(nsteps) - { - case 1: - for (int p=0; p<this->nparticles; p++) if (this->fs->rd->myrank == this->computing[p]) - for (int i=0; i<this->ncomponents; i++) - { - ii = p*this->ncomponents+i; - this->state[ii] += this->dt*this->rhs[0][ii]; - } - break; - case 2: - for (int p=0; p<this->nparticles; p++) if (this->fs->rd->myrank == this->computing[p]) - for (int i=0; i<this->ncomponents; i++) - { - ii = p*this->ncomponents+i; - this->state[ii] += this->dt*(3*this->rhs[0][ii] - - this->rhs[1][ii])/2; - } - break; - case 3: - for (int p=0; p<this->nparticles; p++) if (this->fs->rd->myrank == this->computing[p]) - for (int i=0; i<this->ncomponents; i++) - { - ii = p*this->ncomponents+i; - this->state[ii] += this->dt*(23*this->rhs[0][ii] - - 16*this->rhs[1][ii] - + 5*this->rhs[2][ii])/12; - } - break; - case 4: - for (int p=0; p<this->nparticles; p++) if (this->fs->rd->myrank == this->computing[p]) - for (int i=0; i<this->ncomponents; i++) - { - ii = p*this->ncomponents+i; - this->state[ii] += this->dt*(55*this->rhs[0][ii] - - 59*this->rhs[1][ii] - + 37*this->rhs[2][ii] - - 9*this->rhs[3][ii])/24; - } - break; - case 5: - for (int p=0; p<this->nparticles; p++) if (this->fs->rd->myrank == this->computing[p]) - for (int i=0; i<this->ncomponents; i++) - { - ii = p*this->ncomponents+i; - this->state[ii] += this->dt*(1901*this->rhs[0][ii] - - 2774*this->rhs[1][ii] - + 2616*this->rhs[2][ii] - - 1274*this->rhs[3][ii] - + 251*this->rhs[4][ii])/720; - } - break; - case 6: - for (int p=0; p<this->nparticles; p++) if (this->fs->rd->myrank == this->computing[p]) - for (int i=0; i<this->ncomponents; i++) - { - ii = p*this->ncomponents+i; - this->state[ii] += this->dt*(4277*this->rhs[0][ii] - - 7923*this->rhs[1][ii] - + 9982*this->rhs[2][ii] - - 7298*this->rhs[3][ii] - + 2877*this->rhs[4][ii] - - 475*this->rhs[5][ii])/1440; - } - break; - } - this->roll_rhs(); -} - - -template <class rnumber> -void slab_field_particles<rnumber>::step() -{ - this->AdamsBashforth((this->iteration < this->integration_steps) ? this->iteration+1 : this->integration_steps); - //this->cRK4(); - this->iteration++; - this->synchronize(); -} - - -template <class rnumber> -void slab_field_particles<rnumber>::Euler() -{ - double *y = fftw_interface<double>::alloc_real(this->array_size); - this->get_rhs(this->state, y); - for (int p=0; p<this->nparticles; p++) if (this->fs->rd->myrank == this->computing[p]) - { - for (int i=0; i<this->ncomponents; i++) - this->state[p*this->ncomponents+i] += this->dt*y[p*this->ncomponents+i]; - //DEBUG_MSG( - // "particle %d state is %lg %lg %lg\n", - // p, this->state[p*this->ncomponents], this->state[p*this->ncomponents+1], this->state[p*this->ncomponents+2]); - } - fftw_interface<double>::free(y); -} - - -template <class rnumber> -void slab_field_particles<rnumber>::Heun() -{ - double *y = new double[this->array_size]; - double dtfactor[] = {0.0, this->dt}; - this->get_rhs(this->state, this->rhs[0]); - for (int p=0; p<this->nparticles; p++) - { - this->synchronize_single_particle_state(p, this->rhs[0]); - //int crank = this->get_rank(this->state[p*3 + 2]); - //DEBUG_MSG( - // "k 0 iteration %d particle is %d, crank is %d, computing rank is %d, position is %g %g %g, rhs is %g %g %g\n", - // this->iteration, p, - // crank, this->computing[p], - // this->state[p*3], this->state[p*3+1], this->state[p*3+2], - // this->rhs[0][p*3], this->rhs[0][p*3+1], this->rhs[0][p*3+2]); - } - for (int kindex = 1; kindex < 2; kindex++) - { - for (int p=0; p<this->nparticles; p++) - { - if (this->watching[this->fs->rd->myrank*this->nparticles+p]) - for (int i=0; i<this->ncomponents; i++) - { - ptrdiff_t tindex = ptrdiff_t(p)*this->ncomponents + i; - y[tindex] = this->state[tindex] + dtfactor[kindex]*this->rhs[kindex-1][tindex]; - } - } - for (int p=0; p<this->nparticles; p++) - this->synchronize_single_particle_state(p, y); - this->get_rhs(y, this->rhs[kindex]); - for (int p=0; p<this->nparticles; p++) - { - this->synchronize_single_particle_state(p, this->rhs[kindex]); - DEBUG_MSG( - "k %d iteration %d particle is %d, position is %g %g %g, rhs is %g %g %g\n", - kindex, this->iteration, p, - y[p*3], y[p*3+1], y[p*3+2], - this->rhs[kindex][p*3], this->rhs[kindex][p*3+1], this->rhs[kindex][p*3+2]); - } - } - for (int p=0; p<this->nparticles; p++) - { - if (this->watching[this->fs->rd->myrank*this->nparticles+p]) - { - for (int i=0; i<this->ncomponents; i++) - { - ptrdiff_t tindex = ptrdiff_t(p)*this->ncomponents + i; - this->state[tindex] += this->dt*(this->rhs[0][tindex] + this->rhs[1][tindex])/2; - } - //int crank = this->get_rank(this->state[p*3 + 2]); - //if (crank != this->computing[p]) - // DEBUG_MSG( - // "k _ iteration %d particle is %d, crank is %d, computing rank is %d, position is %g %g %g\n", - // this->iteration, p, - // crank, this->computing[p], - // this->state[p*3], this->state[p*3+1], this->state[p*3+2]); - } - } - delete[] y; - DEBUG_MSG("exiting Heun\n"); -} - - -template <class rnumber> -void slab_field_particles<rnumber>::cRK4() -{ - double *y = new double[this->array_size]; - double dtfactor[] = {0.0, this->dt/2, this->dt/2, this->dt}; - this->get_rhs(this->state, this->rhs[0]); - for (int p=0; p<this->nparticles; p++) - this->synchronize_single_particle_state(p, this->rhs[0]); - for (int kindex = 1; kindex < 4; kindex++) - { - for (int p=0; p<this->nparticles; p++) - { - if (this->watching[this->fs->rd->myrank*this->nparticles+p]) - for (int i=0; i<this->ncomponents; i++) - { - ptrdiff_t tindex = ptrdiff_t(p)*this->ncomponents + i; - y[tindex] = this->state[tindex] + dtfactor[kindex]*this->rhs[kindex-1][tindex]; - } - } - for (int p=0; p<this->nparticles; p++) - this->synchronize_single_particle_state(p, y); - this->get_rhs(y, this->rhs[kindex]); - for (int p=0; p<this->nparticles; p++) - this->synchronize_single_particle_state(p, this->rhs[kindex]); - } - for (int p=0; p<this->nparticles; p++) - { - if (this->watching[this->fs->rd->myrank*this->nparticles+p]) - for (int i=0; i<this->ncomponents; i++) - { - ptrdiff_t tindex = ptrdiff_t(p)*this->ncomponents + i; - this->state[tindex] += this->dt*(this->rhs[0][tindex] + - 2*(this->rhs[1][tindex] + this->rhs[2][tindex]) + - this->rhs[3][tindex])/6; - } - } - delete[] y; -} - -template <class rnumber> -void slab_field_particles<rnumber>::get_grid_coordinates(double *x, int *xg, double *xx) -{ - static double grid_size[] = {this->dx, this->dy, this->dz}; - double tval; - std::fill_n(xg, this->nparticles*3, 0); - std::fill_n(xx, this->nparticles*3, 0.0); - for (int p=0; p<this->nparticles; p++) if (this->watching[this->fs->rd->myrank*this->nparticles+p]) - { - for (int c=0; c<3; c++) - { - tval = floor(x[p*this->ncomponents+c]/grid_size[c]); - xg[p*3+c] = MOD(int(tval), this->fs->rd->sizes[2-c]); - xx[p*3+c] = (x[p*this->ncomponents+c] - tval*grid_size[c]) / grid_size[c]; - } - xg[p*3+2] -= this->fs->rd->starts[0]; - if (this->fs->rd->myrank == this->fs->rd->rank[0] && - xg[p*3+2] > this->fs->rd->subsizes[0]) - xg[p*3+2] -= this->fs->rd->sizes[0]; - //DEBUG_MSG( - // "particle %d x is %lg %lg %lg xx is %lg %lg %lg xg is %d %d %d\n", - // p, - // x[p*3], x[p*3+1], x[p*3+2], - // xx[p*3], xx[p*3+1], xx[p*3+2], - // xg[p*3], xg[p*3+1], xg[p*3+2]); - } -} - -template <class rnumber> -void slab_field_particles<rnumber>::interpolation_formula(rnumber *field, int *xg, double *xx, double *dest, int *deriv) -{ - double bx[this->interp_neighbours*2+2], by[this->interp_neighbours*2+2], bz[this->interp_neighbours*2+2]; - this->compute_beta(deriv[0], xx[0], bx); - this->compute_beta(deriv[1], xx[1], by); - this->compute_beta(deriv[2], xx[2], bz); - //DEBUG_MSG("computed beta polynomials\n"); - std::fill_n(dest, 3, 0); - for (int iz = -this->interp_neighbours; iz <= this->interp_neighbours+1; iz++) - for (int iy = -this->interp_neighbours; iy <= this->interp_neighbours+1; iy++) - for (int ix = -this->interp_neighbours; ix <= this->interp_neighbours+1; ix++) - for (int c=0; c<3; c++) - { - //DEBUG_MSG( - // "%d %d %d %d %d %d %d %ld %ld\n", - // xg[2], xg[1], xg[0], iz, iy, ix, c, - // ((ptrdiff_t(xg[2]+iz) *this->fs->rd->subsizes[1] + - // ptrdiff_t(xg[1]+iy))*this->fs->rd->subsizes[2] + - // ptrdiff_t(xg[0]+ix))*3+c, - // this->buffered_field_descriptor->local_size - // ); - dest[c] += field[((ptrdiff_t( xg[2]+iz ) *this->fs->rd->subsizes[1] + - ptrdiff_t(MOD(xg[1]+iy, this->fs->rd->sizes[1])))*this->fs->rd->subsizes[2] + - ptrdiff_t(MOD(xg[0]+ix, this->fs->rd->sizes[2])))*3+c]*(bz[iz+this->interp_neighbours]* - by[iy+this->interp_neighbours]* - bx[ix+this->interp_neighbours]); - } -} - -template <class rnumber> -void slab_field_particles<rnumber>::linear_interpolation(rnumber *field, int *xg, double *xx, double *dest, int *deriv) -{ - //ptrdiff_t tindex, tmp; - //tindex = ((ptrdiff_t(xg[2] )*this->fs->rd->subsizes[1]+xg[1] )*this->fs->rd->subsizes[2]+xg[0] )*3; - //tmp = ptrdiff_t(xg[2]); - //DEBUG_MSG( - // "linear interpolation xx is %lg %lg %lg xg is %d %d %d," - // " corner index is ((%ld*%d+%d)*%d+%d)*3 = %ld\n", - // xx[0], xx[1], xx[2], - // xg[0], xg[1], xg[2], - // tmp, this->fs->rd->subsizes[1], xg[1], this->fs->rd->subsizes[2], xg[0], - // tindex); - for (int c=0; c<3; c++) - dest[c] = (field[((ptrdiff_t(xg[2] )*this->fs->rd->subsizes[1]+xg[1] )*this->fs->rd->subsizes[2]+xg[0] )*3+c]*((1-xx[0])*(1-xx[1])*(1-xx[2])) + - field[((ptrdiff_t(xg[2] )*this->fs->rd->subsizes[1]+xg[1] )*this->fs->rd->subsizes[2]+xg[0]+1)*3+c]*(( xx[0])*(1-xx[1])*(1-xx[2])) + - field[((ptrdiff_t(xg[2] )*this->fs->rd->subsizes[1]+xg[1]+1)*this->fs->rd->subsizes[2]+xg[0] )*3+c]*((1-xx[0])*( xx[1])*(1-xx[2])) + - field[((ptrdiff_t(xg[2] )*this->fs->rd->subsizes[1]+xg[1]+1)*this->fs->rd->subsizes[2]+xg[0]+1)*3+c]*(( xx[0])*( xx[1])*(1-xx[2])) + - field[((ptrdiff_t(xg[2]+1)*this->fs->rd->subsizes[1]+xg[1] )*this->fs->rd->subsizes[2]+xg[0] )*3+c]*((1-xx[0])*(1-xx[1])*( xx[2])) + - field[((ptrdiff_t(xg[2]+1)*this->fs->rd->subsizes[1]+xg[1] )*this->fs->rd->subsizes[2]+xg[0]+1)*3+c]*(( xx[0])*(1-xx[1])*( xx[2])) + - field[((ptrdiff_t(xg[2]+1)*this->fs->rd->subsizes[1]+xg[1]+1)*this->fs->rd->subsizes[2]+xg[0] )*3+c]*((1-xx[0])*( xx[1])*( xx[2])) + - field[((ptrdiff_t(xg[2]+1)*this->fs->rd->subsizes[1]+xg[1]+1)*this->fs->rd->subsizes[2]+xg[0]+1)*3+c]*(( xx[0])*( xx[1])*( xx[2]))); -} - -template <class rnumber> -void slab_field_particles<rnumber>::read(hid_t data_file_id) -{ - //DEBUG_MSG("aloha\n"); - if (this->fs->rd->myrank == 0) - { - std::string temp_string = (std::string("/particles/") + - std::string(this->name) + - std::string("/state")); - hid_t Cdset = H5Dopen(data_file_id, temp_string.c_str(), H5P_DEFAULT); - hid_t mspace, rspace; - hsize_t count[4], offset[4]; - rspace = H5Dget_space(Cdset); - H5Sget_simple_extent_dims(rspace, count, NULL); - count[0] = 1; - offset[0] = this->iteration / this->traj_skip; - offset[1] = 0; - offset[2] = 0; - mspace = H5Screate_simple(3, count, NULL); - H5Sselect_hyperslab(rspace, H5S_SELECT_SET, offset, NULL, count, NULL); - H5Dread(Cdset, H5T_NATIVE_DOUBLE, mspace, rspace, H5P_DEFAULT, this->state); - H5Sclose(mspace); - H5Sclose(rspace); - H5Dclose(Cdset); - if (this->iteration > 0) - { - temp_string = (std::string("/particles/") + - std::string(this->name) + - std::string("/rhs")); - Cdset = H5Dopen(data_file_id, temp_string.c_str(), H5P_DEFAULT); - rspace = H5Dget_space(Cdset); - H5Sget_simple_extent_dims(rspace, count, NULL); - //reading from last available position - offset[0] = count[0] - 1; - offset[3] = 0; - count[0] = 1; - count[1] = 1; - mspace = H5Screate_simple(4, count, NULL); - for (int i=0; i<this->integration_steps; i++) - { - offset[1] = i; - H5Sselect_hyperslab(rspace, H5S_SELECT_SET, offset, NULL, count, NULL); - H5Dread(Cdset, H5T_NATIVE_DOUBLE, mspace, rspace, H5P_DEFAULT, this->rhs[i]); - } - H5Sclose(mspace); - H5Sclose(rspace); - H5Dclose(Cdset); - } - } - MPI_Bcast( - this->state, - this->array_size, - MPI_DOUBLE, - 0, - this->fs->rd->comm); - for (int i = 0; i<this->integration_steps; i++) - { - MPI_Bcast( - this->rhs[i], - this->array_size, - MPI_DOUBLE, - 0, - this->fs->rd->comm); - } - // initial assignment of particles - for (int p=0; p<this->nparticles; p++) - { - this->computing[p] = this->get_rank(this->state[p*this->ncomponents + 2]); - //DEBUG_MSG("reading particles, particle %d computing is %d\n", p, this->computing[p]); - } - // now actual synchronization - this->synchronize(); -} - -template <class rnumber> -void slab_field_particles<rnumber>::write(hid_t data_file_id, bool write_rhs) -{ - if (this->fs->rd->myrank == 0) - { - std::string temp_string = (std::string("/particles/") + - std::string(this->name) + - std::string("/state")); - hid_t Cdset = H5Dopen(data_file_id, temp_string.c_str(), H5P_DEFAULT); - hid_t mspace, wspace; - hsize_t count[4], offset[4]; - wspace = H5Dget_space(Cdset); - H5Sget_simple_extent_dims(wspace, count, NULL); - count[0] = 1; - offset[0] = this->iteration / this->traj_skip; - offset[1] = 0; - offset[2] = 0; - mspace = H5Screate_simple(3, count, NULL); - H5Sselect_hyperslab(wspace, H5S_SELECT_SET, offset, NULL, count, NULL); - H5Dwrite(Cdset, H5T_NATIVE_DOUBLE, mspace, wspace, H5P_DEFAULT, this->state); - H5Sclose(mspace); - H5Sclose(wspace); - H5Dclose(Cdset); - if (write_rhs) - { - temp_string = (std::string("/particles/") + - std::string(this->name) + - std::string("/rhs")); - Cdset = H5Dopen(data_file_id, temp_string.c_str(), H5P_DEFAULT); - wspace = H5Dget_space(Cdset); - H5Sget_simple_extent_dims(wspace, count, NULL); - //writing to last available position - offset[0] = count[0] - 1; - count[0] = 1; - count[1] = 1; - offset[3] = 0; - mspace = H5Screate_simple(4, count, NULL); - for (int i=0; i<this->integration_steps; i++) - { - offset[1] = i; - H5Sselect_hyperslab(wspace, H5S_SELECT_SET, offset, NULL, count, NULL); - H5Dwrite(Cdset, H5T_NATIVE_DOUBLE, mspace, wspace, H5P_DEFAULT, this->rhs[i]); - } - H5Sclose(mspace); - H5Sclose(wspace); - H5Dclose(Cdset); - } - } -} - - - -/*****************************************************************************/ -/* macro for specializations to numeric types compatible with FFTW */ -#define SLAB_FIELD_PARTICLES_DEFINITIONS(FFTW, R, MPI_RNUM) \ - \ -template <> \ -void slab_field_particles<R>::rFFTW_to_buffered(R *src, R *dst) \ -{ \ - /* do big copy of middle stuff */ \ - std::copy(src, \ - src + this->fs->rd->local_size, \ - dst + this->buffer_size); \ - int rsrc; \ - /* get upper slices */ \ - for (int rdst = 0; rdst < this->fs->rd->nprocs; rdst++) \ - { \ - rsrc = this->fs->rd->rank[(this->fs->rd->all_start0[rdst] + \ - this->fs->rd->all_size0[rdst]) % \ - this->fs->rd->sizes[0]]; \ - if (this->fs->rd->myrank == rsrc) \ - MPI_Send( \ - (void*)(src), \ - this->buffer_size, \ - MPI_RNUM, \ - rdst, \ - 2*(rsrc*this->fs->rd->nprocs + rdst), \ - this->fs->rd->comm); \ - if (this->fs->rd->myrank == rdst) \ - MPI_Recv( \ - (void*)(dst + this->buffer_size + this->fs->rd->local_size), \ - this->buffer_size, \ - MPI_RNUM, \ - rsrc, \ - 2*(rsrc*this->fs->rd->nprocs + rdst), \ - this->fs->rd->comm, \ - MPI_STATUS_IGNORE); \ - } \ - /* get lower slices */ \ - for (int rdst = 0; rdst < this->fs->rd->nprocs; rdst++) \ - { \ - rsrc = this->fs->rd->rank[MOD(this->fs->rd->all_start0[rdst] - 1, \ - this->fs->rd->sizes[0])]; \ - if (this->fs->rd->myrank == rsrc) \ - MPI_Send( \ - (void*)(src + this->fs->rd->local_size - this->buffer_size), \ - this->buffer_size, \ - MPI_RNUM, \ - rdst, \ - 2*(rsrc*this->fs->rd->nprocs + rdst)+1, \ - this->fs->rd->comm); \ - if (this->fs->rd->myrank == rdst) \ - MPI_Recv( \ - (void*)(dst), \ - this->buffer_size, \ - MPI_RNUM, \ - rsrc, \ - 2*(rsrc*this->fs->rd->nprocs + rdst)+1, \ - this->fs->rd->comm, \ - MPI_STATUS_IGNORE); \ - } \ -} \ -/*****************************************************************************/ - - - -/*****************************************************************************/ -/* now actually use the macro defined above */ -SLAB_FIELD_PARTICLES_DEFINITIONS( - FFTW_MANGLE_FLOAT, - float, - MPI_FLOAT) -SLAB_FIELD_PARTICLES_DEFINITIONS( - FFTW_MANGLE_DOUBLE, - double, - MPI_DOUBLE) -/*****************************************************************************/ - - - -/*****************************************************************************/ -/* finally, force generation of code for single precision */ -template class slab_field_particles<float>; -template class slab_field_particles<double>; -/*****************************************************************************/ diff --git a/bfps/cpp/slab_field_particles.hpp b/bfps/cpp/slab_field_particles.hpp deleted file mode 100644 index 15f9477bbfb680be17390447ce88bc40cd7471e2..0000000000000000000000000000000000000000 --- a/bfps/cpp/slab_field_particles.hpp +++ /dev/null @@ -1,149 +0,0 @@ -/********************************************************************** -* * -* Copyright 2015 Max Planck Institute * -* for Dynamics and Self-Organization * -* * -* This file is part of bfps. * -* * -* bfps is free software: you can redistribute it and/or modify * -* it under the terms of the GNU General Public License as published * -* by the Free Software Foundation, either version 3 of the License, * -* or (at your option) any later version. * -* * -* bfps is distributed in the hope that it will be useful, * -* but WITHOUT ANY WARRANTY; without even the implied warranty of * -* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * -* GNU General Public License for more details. * -* * -* You should have received a copy of the GNU General Public License * -* along with bfps. If not, see <http://www.gnu.org/licenses/> * -* * -* Contact: Cristian.Lalescu@ds.mpg.de * -* * -**********************************************************************/ - - - -#include <stdio.h> -#include <stdlib.h> -#include <iostream> -#include <hdf5.h> -#include "base.hpp" -#include "fluid_solver_base.hpp" -#include "interpolator.hpp" - -#ifndef SLAB_FIELD_PARTICLES - -#define SLAB_FIELD_PARTICLES - -extern int myrank, nprocs; - -template <class rnumber> -class slab_field_particles -{ - protected: - //typedef void (slab_field_particles<rnumber>::*tensor_product_interpolation_formula)( - // rnumber *field, - // int *xg, - // double *xx, - // double *dest, - // int *deriv); - public: - fluid_solver_base<rnumber> *fs; - field_descriptor<rnumber> *buffered_field_descriptor; - - /* watching is an array of shape [nparticles], with - * watching[p] being true if particle p is in the domain of myrank - * or in the buffer regions. - * watching is not really being used right now, since I don't do partial - * synchronizations of particles. - * we may do this at some point in the future, if it seems needed... - * */ - bool *watching; - /* computing is an array of shape [nparticles], with - * computing[p] being the rank that is currently working on particle p - * */ - int *computing; - - /* state will generally hold all the information about the particles. - * in the beginning, we will only need to solve 3D ODEs, but I figured - * a general ncomponents is better, since we may change our minds. - * */ - double *state; - double *rhs[6]; - int nparticles; - int ncomponents; - int array_size; - int interp_neighbours; - int buffer_width; - int integration_steps; - int traj_skip; - ptrdiff_t buffer_size; - double *lbound; - double *ubound; - //tensor_product_interpolation_formula spline_formula; - base_polynomial_values compute_beta; - - /* simulation parameters */ - char name[256]; - int iteration; - double dt; - - /* physical parameters of field */ - rnumber dx, dy, dz; - - /* methods */ - - /* constructor and destructor. - * allocate and deallocate: - * this->state - * this->lbound - * this->ubound - * this->watching - * */ - slab_field_particles( - const char *NAME, - fluid_solver_base<rnumber> *FSOLVER, - const int NPARTICLES, - const int NCOMPONENTS, - base_polynomial_values BETA_POLYS, - const int INTERP_NEIGHBOURS, - const int TRAJ_SKIP, - const int INTEGRATION_STEPS = 2); - ~slab_field_particles(); - - /* an Euler step is needed to compute an estimate of future positions, - * which is needed for synchronization. - * */ - virtual void jump_estimate(double *jump_length); - /* function get_rhs is virtual since we want children to do different things, - * depending on the type of particle. - * */ - virtual void get_rhs(double *x, double *rhs); - - /* generic methods, should work for all children of this class */ - int get_rank(double z); // get rank for given value of z - void synchronize(); - void synchronize_single_particle_state(int p, double *x, int source_id = -1); - void get_grid_coordinates(double *x, int *xg, double *xx); - void linear_interpolation(rnumber *field, int *xg, double *xx, double *dest, int *deriv); - void interpolation_formula(rnumber *field, int *xg, double *xx, double *dest, int *deriv); - - void rFFTW_to_buffered(rnumber *src, rnumber *dst); - - /* generic methods, should work for all children of this class */ - void read(hid_t data_file_id); - void write(hid_t data_file_id, bool write_rhs = true); - - /* solver stuff */ - void step(); - void roll_rhs(); - void AdamsBashforth(int nsteps); - void Euler(); - void Heun(); - void cRK4(); -}; - - -#endif//SLAB_FIELD_PARTICLES - diff --git a/bfps/cpp/tracers.cpp b/bfps/cpp/tracers.cpp deleted file mode 100644 index 3d9fbfb6a1e357d70452466b6cc901659444539d..0000000000000000000000000000000000000000 --- a/bfps/cpp/tracers.cpp +++ /dev/null @@ -1,204 +0,0 @@ -/********************************************************************** -* * -* Copyright 2015 Max Planck Institute * -* for Dynamics and Self-Organization * -* * -* This file is part of bfps. * -* * -* bfps is free software: you can redistribute it and/or modify * -* it under the terms of the GNU General Public License as published * -* by the Free Software Foundation, either version 3 of the License, * -* or (at your option) any later version. * -* * -* bfps is distributed in the hope that it will be useful, * -* but WITHOUT ANY WARRANTY; without even the implied warranty of * -* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * -* GNU General Public License for more details. * -* * -* You should have received a copy of the GNU General Public License * -* along with bfps. If not, see <http://www.gnu.org/licenses/> * -* * -* Contact: Cristian.Lalescu@ds.mpg.de * -* * -**********************************************************************/ - - - -#define NDEBUG - - -#include <cmath> -#include "base.hpp" -#include "fftw_tools.hpp" -#include "tracers.hpp" - -template <class rnumber> -void tracers<rnumber>::jump_estimate(double *jump) -{ - int deriv[] = {0, 0, 0}; - int *xg = new int[this->array_size]; - double *xx = new double[this->array_size]; - rnumber *vel = this->data + this->buffer_size; - double tmp[3]; - /* get grid coordinates */ - this->get_grid_coordinates(this->state, xg, xx); - - /* perform interpolation */ - for (int p=0; p<this->nparticles; p++) if (this->fs->rd->myrank == this->computing[p]) - { - this->interpolation_formula(vel, xg + p*3, xx + p*3, tmp, deriv); - jump[p] = fabs(3*this->dt * tmp[2]); - if (jump[p] < this->dz*1.01) - jump[p] = this->dz*1.01; - } - delete[] xg; - delete[] xx; -} - -template <class rnumber> -void tracers<rnumber>::get_rhs(double *x, double *y) -{ - std::fill_n(y, this->array_size, 0.0); - int deriv[] = {0, 0, 0}; - /* get grid coordinates */ - int *xg = new int[this->array_size]; - double *xx = new double[this->array_size]; - rnumber *vel = this->data + this->buffer_size; - this->get_grid_coordinates(x, xg, xx); - //DEBUG_MSG( - // "position is %g %g %g, grid_coords are %d %d %d %g %g %g\n", - // x[0], x[1], x[2], - // xg[0], xg[1], xg[2], - // xx[0], xx[1], xx[2]); - /* perform interpolation */ - for (int p=0; p<this->nparticles; p++) - { - if (this->watching[this->fs->rd->myrank*this->nparticles+p]) - { - int crank = this->get_rank(x[p*3 + 2]); - if (this->fs->rd->myrank == crank) - { - this->interpolation_formula(vel, xg + p*3, xx + p*3, y + p*3, deriv); - DEBUG_MSG( - "position is %g %g %g %d %d %d %g %g %g, result is %g %g %g\n", - x[p*3], x[p*3+1], x[p*3+2], - xg[p*3], xg[p*3+1], xg[p*3+2], - xx[p*3], xx[p*3+1], xx[p*3+2], - y[p*3], y[p*3+1], y[p*3+2]); - } - if (crank != this->computing[p]) - { - this->synchronize_single_particle_state(p, y, crank); - } - //DEBUG_MSG( - // "after synch crank is %d, computing rank is %d, position is %g %g %g, result is %g %g %g\n", - // this->iteration, p, - // crank, this->computing[p], - // x[p*3], x[p*3+1], x[p*3+2], - // y[p*3], y[p*3+1], y[p*3+2]); - } - } - delete[] xg; - delete[] xx; -} - -template<class rnumber> -void tracers<rnumber>::update_field(bool clip_on) -{ - if (clip_on) - clip_zero_padding<rnumber>(this->fs->rd, this->source_data, 3); - this->rFFTW_to_buffered(this->source_data, this->data); -} - -/*****************************************************************************/ -/* macro for specializations to numeric types compatible with FFTW */ - -#define TRACERS_DEFINITIONS(FFTW, R, MPI_RNUM, MPI_CNUM) \ - \ -template <> \ -tracers<R>::tracers( \ - const char *NAME, \ - fluid_solver_base<R> *FSOLVER, \ - const int NPARTICLES, \ - base_polynomial_values BETA_POLYS, \ - const int NEIGHBOURS, \ - const int TRAJ_SKIP, \ - const int INTEGRATION_STEPS, \ - R *SOURCE_DATA) : slab_field_particles<R>( \ - NAME, \ - FSOLVER, \ - NPARTICLES, \ - 3, \ - BETA_POLYS, \ - NEIGHBOURS, \ - TRAJ_SKIP, \ - INTEGRATION_STEPS) \ -{ \ - this->source_data = SOURCE_DATA; \ - this->data = FFTW(alloc_real)(this->buffered_field_descriptor->local_size); \ -} \ - \ -template<> \ -tracers<R>::~tracers() \ -{ \ - FFTW(free)(this->data); \ -} \ - \ -template <> \ -void tracers<R>::sample_vec_field(R *vec_field, double *vec_values) \ -{ \ - vec_field += this->buffer_size; \ - double *vec_local = new double[this->array_size]; \ - std::fill_n(vec_local, this->array_size, 0.0); \ - int deriv[] = {0, 0, 0}; \ - /* get grid coordinates */ \ - int *xg = new int[this->array_size]; \ - double *xx = new double[this->array_size]; \ - this->get_grid_coordinates(this->state, xg, xx); \ - /* perform interpolation */ \ - for (int p=0; p<this->nparticles; p++) \ - if (this->fs->rd->myrank == this->computing[p]) \ - this->interpolation_formula( \ - vec_field, \ - xg + p*3, \ - xx + p*3, \ - vec_local + p*3, \ - deriv); \ - MPI_Allreduce( \ - vec_local, \ - vec_values, \ - this->array_size, \ - MPI_DOUBLE, \ - MPI_SUM, \ - this->fs->rd->comm); \ - delete[] xg; \ - delete[] xx; \ - delete[] vec_local; \ -} \ - -/*****************************************************************************/ - - - -/*****************************************************************************/ -/* now actually use the macro defined above */ -TRACERS_DEFINITIONS( - FFTW_MANGLE_FLOAT, - float, - MPI_FLOAT, - MPI_COMPLEX) -TRACERS_DEFINITIONS( - FFTW_MANGLE_DOUBLE, - double, - MPI_DOUBLE, - BFPS_MPICXX_DOUBLE_COMPLEX) -/*****************************************************************************/ - - - -/*****************************************************************************/ -/* finally, force generation of code */ -template class tracers<float>; -template class tracers<double>; -/*****************************************************************************/ - diff --git a/bfps/cpp/tracers.hpp b/bfps/cpp/tracers.hpp deleted file mode 100644 index 1a063e026578dd71b9a223ee46b55d2c86d4399f..0000000000000000000000000000000000000000 --- a/bfps/cpp/tracers.hpp +++ /dev/null @@ -1,63 +0,0 @@ -/********************************************************************** -* * -* Copyright 2015 Max Planck Institute * -* for Dynamics and Self-Organization * -* * -* This file is part of bfps. * -* * -* bfps is free software: you can redistribute it and/or modify * -* it under the terms of the GNU General Public License as published * -* by the Free Software Foundation, either version 3 of the License, * -* or (at your option) any later version. * -* * -* bfps is distributed in the hope that it will be useful, * -* but WITHOUT ANY WARRANTY; without even the implied warranty of * -* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * -* GNU General Public License for more details. * -* * -* You should have received a copy of the GNU General Public License * -* along with bfps. If not, see <http://www.gnu.org/licenses/> * -* * -* Contact: Cristian.Lalescu@ds.mpg.de * -* * -**********************************************************************/ - - - -#include "slab_field_particles.hpp" - -#ifndef TRACERS - -#define TRACERS - -extern int myrank, nprocs; - -template <class rnumber> -class tracers final:public slab_field_particles<rnumber> -{ - public: - rnumber *source_data; - rnumber *data; - - /* methods */ - tracers( - const char *NAME, - fluid_solver_base<rnumber> *FSOLVER, - const int NPARTICLES, - base_polynomial_values BETA_POLYS, - const int NEIGHBOURS, - const int TRAJ_SKIP, - const int INTEGRATION_STEPS, - rnumber *SOURCE_DATA); - ~tracers(); - - void update_field(bool clip_on = true); - virtual void get_rhs(double *x, double *rhs); - virtual void jump_estimate(double *jump_length); - - void sample_vec_field(rnumber *vec_field, double *vec_values); -}; - - -#endif//TRACERS - diff --git a/cpp_build.py b/cpp_build.py index a312191aadd3c54d1f5461823e4d39fe54355e79..39371214bab4e2ac3ee9f5b064f5532811448765 100644 --- a/cpp_build.py +++ b/cpp_build.py @@ -65,6 +65,15 @@ src_file_list = ['hdf5_tools', 'Lagrange_polys', 'scope_timer'] +def get_file_dependency_list(src_file): + p = subprocess.Popen( + ['g++', '-Ibfps/cpp', '-MM', 'bfps/cpp/' + src_file + '.cpp'], + stdout = subprocess.PIPE) + out, err = p.communicate() + p.terminate() + deps = str(out, 'ASCII').replace('\\\n', '') + return deps + def get_dependency_list(): ofile = open('dependencies.txt', 'w') for src_file in src_file_list: diff --git a/setup.py b/setup.py index b427ebe77ab86ec8be96b3a751a97600c629df53..3094c692942de2191e3a924af12aad0e48f584f1 100644 --- a/setup.py +++ b/setup.py @@ -123,8 +123,6 @@ src_file_list = [ 'spline_n10', 'Lagrange_polys', 'scope_timer', - 'interpolator', - 'interpolator_base', 'full_code/test_interpolation', 'full_code/NSVEparticles', 'full_code/NSVEcomplex_particles', @@ -215,13 +213,10 @@ class CompileLibCommand(distutils.cmd.Command): if not os.path.isfile('bfps/libbfps.a'): need_to_compile = True else: + need_to_compile = False ofile = 'bfps/libbfps.a' libtime = datetime.datetime.fromtimestamp(os.path.getctime(ofile)) latest = libtime - for fname in header_list: - latest = max(latest, - datetime.datetime.fromtimestamp(os.path.getctime('bfps/' + fname))) - need_to_compile = (latest > libtime) eca = extra_compile_args eca += ['-fPIC'] if self.timing_output: @@ -238,9 +233,14 @@ class CompileLibCommand(distutils.cmd.Command): if not os.path.exists(ofile): need_to_compile_file = True else: - need_to_compile_file = (need_to_compile or - (datetime.datetime.fromtimestamp(os.path.getctime(ofile)) < - datetime.datetime.fromtimestamp(os.path.getctime(ifile)))) + need_to_compile_file = False + if not need_to_compile: + latest = libtime + dependency_list = get_file_dependency_list(fname) + for depname in dependency_list.split()[1:]: + latest = max(latest, + datetime.datetime.fromtimestamp(os.path.getctime(depname))) + need_to_compile_file = (latest > libtime) if need_to_compile_file: command_strings = [compiler, '-c'] command_strings += ['bfps/cpp/' + fname + '.cpp'] @@ -269,6 +269,15 @@ class CompileLibCommand(distutils.cmd.Command): protocol = 2) return None +def get_file_dependency_list(src_file): + p = subprocess.Popen( + ['g++', '-std=c++11', '-Ibfps/cpp', '-MM', 'bfps/cpp/' + src_file + '.cpp'], + stdout = subprocess.PIPE) + out, err = p.communicate() + p.terminate() + deps = str(out, 'ASCII').replace('\\\n', '') + return deps + from setuptools import setup setup( diff --git a/tests/base.py b/tests/base.py index 6f110716cfc01f560549093247f39d038b48a92b..542679733757b5213193f3b7f6ad02cda7e0617b 100644 --- a/tests/base.py +++ b/tests/base.py @@ -33,7 +33,6 @@ import numpy as np import matplotlib.pyplot as plt import bfps -from bfps import FluidResize from bfps.tools import particle_finite_diff_test as acceleration_test import argparse @@ -92,33 +91,13 @@ parser.add_argument( dest = 'kMeta', default = 2.0) -def double(opt): - old_simname = 'N{0:0>3x}'.format(opt.n) - new_simname = 'N{0:0>3x}'.format(opt.n*2) - c = FluidResize(fluid_precision = opt.precision) - c.launch( - args = ['--simname', old_simname + '_double', - '--wd', opt.work_dir, - '--nx', '{0}'.format(opt.n), - '--ny', '{0}'.format(opt.n), - '--nz', '{0}'.format(opt.n), - '--dst_nx', '{0}'.format(2*opt.n), - '--dst_ny', '{0}'.format(2*opt.n), - '--dst_nz', '{0}'.format(2*opt.n), - '--dst_simname', new_simname, - '--src_simname', old_simname, - '--src_iteration', '0', - '--src_wd', './', - '--niter_todo', '0']) - return None - def launch( opt, nu = None, dt = None, tracer_state_file = None, vorticity_field = None, - code_class = bfps.NavierStokes, + code_class = bfps.DNS, particle_class = 'particles', interpolator_class = 'rFFTW_interpolator'): c = code_class( diff --git a/tests/test_plain.py b/tests/test_plain.py deleted file mode 100644 index ad30224f869fc724758cc95d8b9e10da7b4ca2d4..0000000000000000000000000000000000000000 --- a/tests/test_plain.py +++ /dev/null @@ -1,156 +0,0 @@ -#! /usr/bin/env python3 -####################################################################### -# # -# Copyright 2015 Max Planck Institute # -# for Dynamics and Self-Organization # -# # -# This file is part of bfps. # -# # -# bfps is free software: you can redistribute it and/or modify # -# it under the terms of the GNU General Public License as published # -# by the Free Software Foundation, either version 3 of the License, # -# or (at your option) any later version. # -# # -# bfps is distributed in the hope that it will be useful, # -# but WITHOUT ANY WARRANTY; without even the implied warranty of # -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # -# GNU General Public License for more details. # -# # -# You should have received a copy of the GNU General Public License # -# along with bfps. If not, see <http://www.gnu.org/licenses/> # -# # -# Contact: Cristian.Lalescu@ds.mpg.de # -# # -####################################################################### - - - -#from base import * -import bfps -from bfps.tools import particle_finite_diff_test as acceleration_test -import sys - -import numpy as np -import matplotlib.pyplot as plt - -#parser.add_argument('--multiplejob', -# dest = 'multiplejob', action = 'store_true') -# -#parser.add_argument( -# '--particle-class', -# default = 'particles', -# dest = 'particle_class', -# type = str) -# -#parser.add_argument( -# '--interpolator-class', -# default = 'interpolator', -# dest = 'interpolator_class', -# type = str) - -class NSPlain(bfps.NavierStokes): - def specific_parser_arguments( - self, - parser): - bfps.NavierStokes.specific_parser_arguments(self, parser) - parser.add_argument( - '--particle-class', - default = 'rFFTW_distributed_particles', - dest = 'particle_class', - type = str) - parser.add_argument( - '--interpolator-class', - default = 'rFFTW_interpolator', - dest = 'interpolator_class', - type = str) - parser.add_argument('--neighbours', - type = int, - dest = 'neighbours', - default = 3) - parser.add_argument('--smoothness', - type = int, - dest = 'smoothness', - default = 2) - return None - def launch( - self, - args = [], - **kwargs): - opt = self.prepare_launch(args = args) - self.fill_up_fluid_code() - if type(opt.nparticles) == int: - if opt.nparticles > 0: - self.add_3D_rFFTW_field( - name = 'rFFTW_acc') - self.add_interpolator( - name = 'spline', - neighbours = opt.neighbours, - smoothness = opt.smoothness, - class_name = opt.interpolator_class) - self.add_particles( - kcut = ['fs->kM/2', 'fs->kM/3'], - integration_steps = 3, - interpolator = 'spline', - class_name = opt.particle_class) - self.add_particles( - integration_steps = [2, 3, 4, 6], - interpolator = 'spline', - acc_name = 'rFFTW_acc', - class_name = opt.particle_class) - self.finalize_code() - self.launch_jobs(opt = opt) - return None - -def plain(args): - wd = opt.work_dir - opt.work_dir = wd + '/N{0:0>3x}_1'.format(opt.n) - c0 = launch(opt, dt = 0.2/opt.n, - particle_class = opt.particle_class, - interpolator_class = opt.interpolator_class) - c0.compute_statistics() - print ('Re = {0:.0f}'.format(c0.statistics['Re'])) - print ('Rlambda = {0:.0f}'.format(c0.statistics['Rlambda'])) - print ('Lint = {0:.4e}, etaK = {1:.4e}'.format(c0.statistics['Lint'], c0.statistics['etaK'])) - print ('Tint = {0:.4e}, tauK = {1:.4e}'.format(c0.statistics['Tint'], c0.statistics['tauK'])) - print ('kMetaK = {0:.4e}'.format(c0.statistics['kMeta'])) - for s in range(c0.particle_species): - acceleration_test(c0, species = s, m = 1) - if not opt.multiplejob: - return None - assert(opt.niter_todo % 3 == 0) - opt.work_dir = wd + '/N{0:0>3x}_2'.format(opt.n) - opt.njobs *= 2 - opt.niter_todo = opt.niter_todo//2 - c1 = launch(opt, dt = c0.parameters['dt'], - particle_class = opt.particle_class, - interpolator_class = opt.interpolator_class) - c1.compute_statistics() - opt.work_dir = wd + '/N{0:0>3x}_3'.format(opt.n) - opt.njobs = 3*opt.njobs//2 - opt.niter_todo = 2*opt.niter_todo//3 - c2 = launch(opt, dt = c0.parameters['dt'], - particle_class = opt.particle_class, - interpolator_class = opt.interpolator_class) - c2.compute_statistics() - compare_stats(opt, c0, c1) - compare_stats(opt, c0, c2) - return None - -if __name__ == '__main__': - c0 = NSPlain() - c0.launch( - ['-n', '32', - '--ncpu', '4', - '--nparticles', '1000', - '--niter_todo', '48', - '--wd', 'data/single'] + - sys.argv[1:]) - c0.compute_statistics() - print ('Re = {0:.0f}'.format(c0.statistics['Re'])) - print ('Rlambda = {0:.0f}'.format(c0.statistics['Rlambda'])) - print ('Lint = {0:.4e}, etaK = {1:.4e}'.format(c0.statistics['Lint'], c0.statistics['etaK'])) - print ('Tint = {0:.4e}, tauK = {1:.4e}'.format(c0.statistics['Tint'], c0.statistics['tauK'])) - print ('kMetaK = {0:.4e}'.format(c0.statistics['kMeta'])) - for s in range(c0.particle_species): - acceleration_test(c0, species = s, m = 1) - diff --git a/tests/test_vorticity_equation.py b/tests/test_vorticity_equation.py index dfaccb8bf352bdd252e5edf29f6e7d711689f7dc..e492bfa5c75d0f2f2b9989cccef49964b8bc90b4 100644 --- a/tests/test_vorticity_equation.py +++ b/tests/test_vorticity_equation.py @@ -273,12 +273,13 @@ def main(): particle_initial_condition[..., 2] = yvals[None, :, None] particle_initial_condition = particle_initial_condition.reshape(-1, 3) nparticles = nparticles**2 - c = bfps.NavierStokes(simname = 'fluid_solver') + c = bfps.DNS(simname = 'fluid_solver') if run_NS: run_NSVE = True subprocess.call('rm *fluid_solver* NavierStokes*', shell = True) c.launch( - ['-n', '32', + ['NSVE', + '-n', '32', '--simname', 'fluid_solver', '--ncpu', '4', '--niter_todo', '{0}'.format(niterations), @@ -298,9 +299,10 @@ def main(): f = h5py.File('vorticity_equation_checkpoint_0.h5', 'w') f['vorticity/complex/0'] = data f.close() - c = bfps.NSVorticityEquation() + c = bfps.DNS() c.launch( - ['-n', '32', + ['NSVEparticles', + '-n', '32', '--simname', 'vorticity_equation', '--np', '4', '--ntpp', '1',