diff --git a/bfps/FluidConvert.py b/bfps/FluidConvert.py index 14be9b985139fabf3b7e1cda1b5f9ee9618a8307..ca462ed3c3700455f5e3f6591003224f5c1b2452 100644 --- a/bfps/FluidConvert.py +++ b/bfps/FluidConvert.py @@ -43,7 +43,7 @@ class FluidConvert(_fluid_particle_base): work_dir = './', simname = 'test', fluid_precision = 'single', - use_fftw_wisdom = True): + use_fftw_wisdom = False): _fluid_particle_base.__init__( self, name = name + '-' + fluid_precision, @@ -109,11 +109,13 @@ class FluidConvert(_fluid_particle_base): """ self.fluid_end += 'delete fs;\n' return None - def add_parser_arguments( + def specific_parser_arguments( self, parser): - _fluid_particle_base.add_parser_arguments(self, parser) - self.parameters_to_parser_arguments(parser, parameters = self.spec_parameters) + _fluid_particle_base.specific_parser_arguments(self, parser) + self.parameters_to_parser_arguments( + parser, + parameters = self.spec_parameters) return None def launch( self, @@ -125,13 +127,13 @@ class FluidConvert(_fluid_particle_base): self.pars_from_namespace( opt, parameters = self.spec_parameters) - self.set_host_info(bfps.host_info) self.rewrite_par( group = 'conversion_parameters', parameters = self.spec_parameters) - self.run( - ncpu = opt.ncpu, - err_file = 'err_convert', - out_file = 'out_convert') + self.run(ncpu = opt.ncpu, + hours = opt.minutes // 60, + minutes = opt.minutes % 60, + err_file = 'err_convert', + out_file = 'out_convert') return None diff --git a/bfps/NSVorticityEquation.py b/bfps/NSVorticityEquation.py new file mode 100644 index 0000000000000000000000000000000000000000..36ccfbc349a84986b33b84c7fa50cb42d4937b4b --- /dev/null +++ b/bfps/NSVorticityEquation.py @@ -0,0 +1,610 @@ +####################################################################### +# # +# Copyright 2015 Max Planck Institute # +# for Dynamics and Self-Organization # +# # +# This file is part of bfps. # +# # +# bfps is free software: you can redistribute it and/or modify # +# it under the terms of the GNU General Public License as published # +# by the Free Software Foundation, either version 3 of the License, # +# or (at your option) any later version. # +# # +# bfps is distributed in the hope that it will be useful, # +# but WITHOUT ANY WARRANTY; without even the implied warranty of # +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # +# GNU General Public License for more details. # +# # +# You should have received a copy of the GNU General Public License # +# along with bfps. If not, see <http://www.gnu.org/licenses/> # +# # +# Contact: Cristian.Lalescu@ds.mpg.de # +# # +####################################################################### + + + +import sys +import os +import numpy as np +import h5py +import argparse + +import bfps +import bfps.tools +from bfps._code import _code +from bfps._fluid_base import _fluid_particle_base + +class NSVorticityEquation(_fluid_particle_base): + def __init__( + self, + name = 'NSVE-v' + bfps.__version__, + work_dir = './', + simname = 'test', + fluid_precision = 'single', + fftw_plan_rigor = 'FFTW_MEASURE', + use_fftw_wisdom = True): + self.fftw_plan_rigor = fftw_plan_rigor + _fluid_particle_base.__init__( + self, + name = name + '-' + fluid_precision, + work_dir = work_dir, + simname = simname, + dtype = fluid_precision, + use_fftw_wisdom = use_fftw_wisdom) + self.parameters['nu'] = 0.1 + self.parameters['fmode'] = 1 + self.parameters['famplitude'] = 0.5 + self.parameters['fk0'] = 2.0 + self.parameters['fk1'] = 4.0 + self.parameters['forcing_type'] = 'linear' + self.parameters['histogram_bins'] = 256 + self.parameters['max_velocity_estimate'] = 1.0 + self.parameters['max_vorticity_estimate'] = 1.0 + self.file_datasets_grow = """ + //begincpp + hid_t group; + group = H5Gopen(stat_file, "/statistics", H5P_DEFAULT); + H5Ovisit(group, H5_INDEX_NAME, H5_ITER_NATIVE, grow_statistics_dataset, NULL); + H5Gclose(group); + //endcpp + """ + self.style = {} + self.statistics = {} + self.fluid_output = """ + { + char file_name[512]; + fs->fill_up_filename("cvorticity", file_name); + fs->cvorticity->io( + file_name, + "vorticity", + fs->iteration, + false); + } + """ + # vorticity_equation specific things + self.includes += '#include "vorticity_equation.hpp"\n' + self.store_kspace = """ + //begincpp + if (myrank == 0 && iteration == 0) + { + DEBUG_MSG("about to store kspace\\n"); + TIMEZONE("fluid_base::store_kspace"); + hsize_t dims[4]; + hid_t space, dset; + // store kspace information + hid_t parameter_file = stat_file; + //char fname[256]; + //sprintf(fname, "%s.h5", simname); + //parameter_file = H5Fopen(fname, H5F_ACC_RDWR, H5P_DEFAULT); + dset = H5Dopen(parameter_file, "/kspace/kshell", H5P_DEFAULT); + space = H5Dget_space(dset); + H5Sget_simple_extent_dims(space, dims, NULL); + H5Sclose(space); + if (fs->kk->nshells != dims[0]) + { + DEBUG_MSG( + "ERROR: computed nshells %d not equal to data file nshells %d\\n", + fs->kk->nshells, dims[0]); + } + H5Dwrite(dset, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, &fs->kk->kshell.front()); + H5Dclose(dset); + dset = H5Dopen(parameter_file, "/kspace/nshell", H5P_DEFAULT); + H5Dwrite(dset, H5T_NATIVE_INT64, H5S_ALL, H5S_ALL, H5P_DEFAULT, &fs->kk->nshell.front()); + H5Dclose(dset); + dset = H5Dopen(parameter_file, "/kspace/kM", H5P_DEFAULT); + H5Dwrite(dset, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, &fs->kk->kM); + H5Dclose(dset); + dset = H5Dopen(parameter_file, "/kspace/dk", H5P_DEFAULT); + H5Dwrite(dset, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, &fs->kk->dk); + H5Dclose(dset); + //H5Fclose(parameter_file); + DEBUG_MSG("finished storing kspace\\n"); + } + //endcpp + """ + return None + def create_stat_output( + self, + dset_name, + data_buffer, + data_type = 'H5T_NATIVE_DOUBLE', + size_setup = None, + close_spaces = True): + new_stat_output_txt = 'Cdset = H5Dopen(stat_file, "{0}", H5P_DEFAULT);\n'.format(dset_name) + if not type(size_setup) == type(None): + new_stat_output_txt += ( + size_setup + + 'wspace = H5Dget_space(Cdset);\n' + + 'ndims = H5Sget_simple_extent_dims(wspace, dims, NULL);\n' + + 'mspace = H5Screate_simple(ndims, count, NULL);\n' + + 'H5Sselect_hyperslab(wspace, H5S_SELECT_SET, offset, NULL, count, NULL);\n') + new_stat_output_txt += ('H5Dwrite(Cdset, {0}, mspace, wspace, H5P_DEFAULT, {1});\n' + + 'H5Dclose(Cdset);\n').format(data_type, data_buffer) + if close_spaces: + new_stat_output_txt += ('H5Sclose(mspace);\n' + + 'H5Sclose(wspace);\n') + return new_stat_output_txt + def write_fluid_stats(self): + self.fluid_includes += '#include <cmath>\n' + self.fluid_includes += '#include "fftw_tools.hpp"\n' + self.stat_src += """ + //begincpp + DEBUG_MSG("inside stat src\\n"); + hid_t stat_group; + if (myrank == 0) + stat_group = H5Gopen(stat_file, "statistics", H5P_DEFAULT); + fs->compute_velocity(fs->cvorticity); + *tmp_vec_field = fs->cvelocity->get_cdata(); + tmp_vec_field->compute_stats( + fs->kk, + stat_group, + "velocity", + fs->iteration / niter_stat, + max_velocity_estimate/sqrt(3)); + DEBUG_MSG("after stats for %d\\n", fs->iteration); + //endcpp + """ + self.stat_src += """ + //begincpp + *tmp_vec_field = fs->cvorticity->get_cdata(); + DEBUG_MSG("%g %g\\n", + tmp_vec_field->cval(tmp_vec_field->get_cindex(2, 1, 1), 0, 0), + tmp_vec_field->cval(tmp_vec_field->get_cindex(2, 1, 1), 0, 1)); + tmp_vec_field->compute_stats( + fs->kk, + stat_group, + "vorticity", + fs->iteration / niter_stat, + max_vorticity_estimate/sqrt(3)); + DEBUG_MSG("after vort stats for %d\\n", fs->iteration); + //endcpp + """ + self.stat_src += """ + //begincpp + if (myrank == 0) + H5Gclose(stat_group); + if (myrank == 0) + {{ + hid_t Cdset, wspace, mspace; + int ndims; + hsize_t count[4], offset[4], dims[4]; + offset[0] = fs->iteration/niter_stat; + offset[1] = 0; + offset[2] = 0; + offset[3] = 0; + //endcpp + """.format(self.C_dtype) + if self.dtype == np.float32: + field_H5T = 'H5T_NATIVE_FLOAT' + elif self.dtype == np.float64: + field_H5T = 'H5T_NATIVE_DOUBLE' + self.stat_src += self.create_stat_output( + '/statistics/xlines/velocity', + 'fs->rvelocity->get_rdata()', + data_type = field_H5T, + size_setup = """ + count[0] = 1; + count[1] = nx; + count[2] = 3; + """, + close_spaces = False) + self.stat_src += self.create_stat_output( + '/statistics/xlines/vorticity', + 'fs->rvorticity->get_rdata()', + data_type = field_H5T) + self.stat_src += '}\n' + return None + def fill_up_fluid_code(self): + self.fluid_includes += '#include <cstring>\n' + self.fluid_variables += ( + 'vorticity_equation<{0}, FFTW> *fs;\n'.format(self.C_dtype) + + 'field<{0}, FFTW, THREE> *tmp_vec_field;\n'.format(self.C_dtype) + + 'field<{0}, FFTW, ONE> *tmp_scal_field;\n'.format(self.C_dtype)) + self.fluid_definitions += """ + typedef struct {{ + {0} re; + {0} im; + }} tmp_complex_type; + """.format(self.C_dtype) + self.write_fluid_stats() + if self.dtype == np.float32: + field_H5T = 'H5T_NATIVE_FLOAT' + elif self.dtype == np.float64: + field_H5T = 'H5T_NATIVE_DOUBLE' + self.fluid_start += """ + //begincpp + char fname[512]; + fs = new vorticity_equation<{0}, FFTW>( + simname, + nx, ny, nz, + dkx, dky, dkz, + {1}); + tmp_vec_field = new field<{0}, FFTW, THREE>( + nx, ny, nz, + MPI_COMM_WORLD, + {1}); + tmp_scal_field = new field<{0}, FFTW, ONE>( + nx, ny, nz, + MPI_COMM_WORLD, + {1}); + fs->nu = nu; + fs->fmode = fmode; + fs->famplitude = famplitude; + fs->fk0 = fk0; + fs->fk1 = fk1; + strncpy(fs->forcing_type, forcing_type, 128); + fs->iteration = iteration; + {{ + char file_name[512]; + fs->fill_up_filename("cvorticity", file_name); + fs->cvorticity->real_space_representation = false; + fs->cvorticity->io( + file_name, + "vorticity", + fs->iteration, + true); + fs->kk->template low_pass<{0}, THREE>(fs->cvorticity->get_cdata(), fs->kk->kM); + fs->kk->template force_divfree<{0}>(fs->cvorticity->get_cdata()); + }} + //endcpp + """.format(self.C_dtype, self.fftw_plan_rigor, field_H5T) + self.fluid_start += self.store_kspace + self.fluid_loop = ('fs->step(dt);\n' + + 'DEBUG_MSG("this is step %d\\n", fs->iteration);\n') + self.fluid_loop += ('if (fs->iteration % niter_out == 0)\n{\n' + + self.fluid_output + '\n}\n') + self.fluid_end = ('if (fs->iteration % niter_out != 0)\n{\n' + + self.fluid_output + '\n}\n' + + 'delete fs;\n' + + 'delete tmp_vec_field;\n' + + 'delete tmp_scal_field;\n') + return None + def get_postprocess_file_name(self): + return os.path.join(self.work_dir, self.simname + '_postprocess.h5') + def get_postprocess_file(self): + return h5py.File(self.get_postprocess_file_name(), 'r') + def compute_statistics(self, iter0 = 0, iter1 = None): + """Run basic postprocessing on raw data. + The energy spectrum :math:`E(t, k)` and the enstrophy spectrum + :math:`\\frac{1}{2}\omega^2(t, k)` are computed from the + + .. math:: + + \sum_{k \\leq \\|\\mathbf{k}\\| \\leq k+dk}\\hat{u_i} \\hat{u_j}^*, \\hskip .5cm + \sum_{k \\leq \\|\\mathbf{k}\\| \\leq k+dk}\\hat{\omega_i} \\hat{\\omega_j}^* + + tensors, and the enstrophy spectrum is also used to + compute the dissipation :math:`\\varepsilon(t)`. + These basic quantities are stored in a newly created HDF5 file, + ``simname_postprocess.h5``. + """ + if len(list(self.statistics.keys())) > 0: + return None + self.read_parameters() + with self.get_data_file() as data_file: + if 'moments' not in data_file['statistics'].keys(): + return None + iter0 = min((data_file['statistics/moments/velocity'].shape[0] * + self.parameters['niter_stat']-1), + iter0) + if type(iter1) == type(None): + iter1 = data_file['iteration'].value + else: + iter1 = min(data_file['iteration'].value, iter1) + ii0 = iter0 // self.parameters['niter_stat'] + ii1 = iter1 // self.parameters['niter_stat'] + self.statistics['kshell'] = data_file['kspace/kshell'].value + self.statistics['kM'] = data_file['kspace/kM'].value + self.statistics['dk'] = data_file['kspace/dk'].value + computation_needed = True + pp_file = h5py.File(self.get_postprocess_file_name(), 'a') + if 'ii0' in pp_file.keys(): + computation_needed = not (ii0 == pp_file['ii0'].value and + ii1 == pp_file['ii1'].value) + if computation_needed: + for k in pp_file.keys(): + del pp_file[k] + if computation_needed: + pp_file['iter0'] = iter0 + pp_file['iter1'] = iter1 + pp_file['ii0'] = ii0 + pp_file['ii1'] = ii1 + pp_file['t'] = (self.parameters['dt']* + self.parameters['niter_stat']* + (np.arange(ii0, ii1+1).astype(np.float))) + pp_file['energy(t, k)'] = ( + data_file['statistics/spectra/velocity_velocity'][ii0:ii1+1, :, 0, 0] + + data_file['statistics/spectra/velocity_velocity'][ii0:ii1+1, :, 1, 1] + + data_file['statistics/spectra/velocity_velocity'][ii0:ii1+1, :, 2, 2])/2 + pp_file['enstrophy(t, k)'] = ( + data_file['statistics/spectra/vorticity_vorticity'][ii0:ii1+1, :, 0, 0] + + data_file['statistics/spectra/vorticity_vorticity'][ii0:ii1+1, :, 1, 1] + + data_file['statistics/spectra/vorticity_vorticity'][ii0:ii1+1, :, 2, 2])/2 + pp_file['vel_max(t)'] = data_file['statistics/moments/velocity'] [ii0:ii1+1, 9, 3] + pp_file['renergy(t)'] = data_file['statistics/moments/velocity'][ii0:ii1+1, 2, 3]/2 + for k in ['t', + 'energy(t, k)', + 'enstrophy(t, k)', + 'vel_max(t)', + 'renergy(t)']: + if k in pp_file.keys(): + self.statistics[k] = pp_file[k].value + self.compute_time_averages() + return None + def compute_time_averages(self): + """Compute easy stats. + + Further computation of statistics based on the contents of + ``simname_postprocess.h5``. + Standard quantities are as follows + (consistent with [Ishihara]_): + + .. math:: + + U_{\\textrm{int}}(t) = \\sqrt{\\frac{2E(t)}{3}}, \\hskip .5cm + L_{\\textrm{int}}(t) = \\frac{\pi}{2U_{int}^2(t)} \\int \\frac{dk}{k} E(t, k), \\hskip .5cm + T_{\\textrm{int}}(t) = + \\frac{L_{\\textrm{int}}(t)}{U_{\\textrm{int}}(t)} + + \\eta_K = \\left(\\frac{\\nu^3}{\\varepsilon}\\right)^{1/4}, \\hskip .5cm + \\tau_K = \\left(\\frac{\\nu}{\\varepsilon}\\right)^{1/2}, \\hskip .5cm + \\lambda = \\sqrt{\\frac{15 \\nu U_{\\textrm{int}}^2}{\\varepsilon}} + + Re = \\frac{U_{\\textrm{int}} L_{\\textrm{int}}}{\\nu}, \\hskip + .5cm + R_{\\lambda} = \\frac{U_{\\textrm{int}} \\lambda}{\\nu} + + .. [Ishihara] T. Ishihara et al, + *Small-scale statistics in high-resolution direct numerical + simulation of turbulence: Reynolds number dependence of + one-point velocity gradient statistics*. + J. Fluid Mech., + **592**, 335-366, 2007 + """ + for key in ['energy', 'enstrophy']: + self.statistics[key + '(t)'] = (self.statistics['dk'] * + np.sum(self.statistics[key + '(t, k)'], axis = 1)) + self.statistics['Uint(t)'] = np.sqrt(2*self.statistics['energy(t)'] / 3) + self.statistics['Lint(t)'] = ((self.statistics['dk']*np.pi / + (2*self.statistics['Uint(t)']**2)) * + np.nansum(self.statistics['energy(t, k)'] / + self.statistics['kshell'][None, :], axis = 1)) + for key in ['energy', + 'enstrophy', + 'vel_max', + 'Uint', + 'Lint']: + if key + '(t)' in self.statistics.keys(): + self.statistics[key] = np.average(self.statistics[key + '(t)'], axis = 0) + for suffix in ['', '(t)']: + self.statistics['diss' + suffix] = (self.parameters['nu'] * + self.statistics['enstrophy' + suffix]*2) + self.statistics['etaK' + suffix] = (self.parameters['nu']**3 / + self.statistics['diss' + suffix])**.25 + self.statistics['tauK' + suffix] = (self.parameters['nu'] / + self.statistics['diss' + suffix])**.5 + self.statistics['Re' + suffix] = (self.statistics['Uint' + suffix] * + self.statistics['Lint' + suffix] / + self.parameters['nu']) + self.statistics['lambda' + suffix] = (15 * self.parameters['nu'] * + self.statistics['Uint' + suffix]**2 / + self.statistics['diss' + suffix])**.5 + self.statistics['Rlambda' + suffix] = (self.statistics['Uint' + suffix] * + self.statistics['lambda' + suffix] / + self.parameters['nu']) + self.statistics['kMeta' + suffix] = (self.statistics['kM'] * + self.statistics['etaK' + suffix]) + if self.parameters['dealias_type'] == 1: + self.statistics['kMeta' + suffix] *= 0.8 + self.statistics['Tint'] = self.statistics['Lint'] / self.statistics['Uint'] + self.statistics['Taylor_microscale'] = self.statistics['lambda'] + return None + def set_plt_style( + self, + style = {'dashes' : (None, None)}): + self.style.update(style) + return None + def read_cfield( + self, + field_name = 'vorticity', + iteration = 0): + """read the Fourier representation of a vector field. + + Read the binary file containing iteration ``iteration`` of the + field ``field_name``, and return it as a properly shaped + ``numpy.memmap`` object. + """ + return np.memmap( + os.path.join(self.work_dir, + self.simname + '_{0}_i{1:0>5x}'.format('c' + field_name, iteration)), + dtype = self.ctype, + mode = 'r', + shape = (self.parameters['ny'], + self.parameters['nz'], + self.parameters['nx']//2+1, + 3)) + def write_par( + self, + iter0 = 0): + _fluid_particle_base.write_par(self, iter0 = iter0) + with h5py.File(self.get_data_file_name(), 'r+') as ofile: + kspace = self.get_kspace() + nshells = kspace['nshell'].shape[0] + vec_stat_datasets = ['velocity', 'vorticity'] + scal_stat_datasets = [] + for k in vec_stat_datasets: + time_chunk = 2**20//(8*3*self.parameters['nx']) # FIXME: use proper size of self.dtype + time_chunk = max(time_chunk, 1) + ofile.create_dataset('statistics/xlines/' + k, + (1, self.parameters['nx'], 3), + chunks = (time_chunk, self.parameters['nx'], 3), + maxshape = (None, self.parameters['nx'], 3), + dtype = self.dtype) + for k in vec_stat_datasets: + time_chunk = 2**20//(8*3*3*nshells) + time_chunk = max(time_chunk, 1) + ofile.create_dataset('statistics/spectra/' + k + '_' + k, + (1, nshells, 3, 3), + chunks = (time_chunk, nshells, 3, 3), + maxshape = (None, nshells, 3, 3), + dtype = np.float64) + time_chunk = 2**20//(8*4*10) + time_chunk = max(time_chunk, 1) + a = ofile.create_dataset('statistics/moments/' + k, + (1, 10, 4), + chunks = (time_chunk, 10, 4), + maxshape = (None, 10, 4), + dtype = np.float64) + time_chunk = 2**20//(8*4*self.parameters['histogram_bins']) + time_chunk = max(time_chunk, 1) + ofile.create_dataset('statistics/histograms/' + k, + (1, + self.parameters['histogram_bins'], + 4), + chunks = (time_chunk, + self.parameters['histogram_bins'], + 4), + maxshape = (None, + self.parameters['histogram_bins'], + 4), + dtype = np.int64) + return None + def specific_parser_arguments( + self, + parser): + _fluid_particle_base.specific_parser_arguments(self, parser) + parser.add_argument( + '--src-wd', + type = str, + dest = 'src_work_dir', + default = '') + parser.add_argument( + '--src-simname', + type = str, + dest = 'src_simname', + default = '') + parser.add_argument( + '--src-iteration', + type = int, + dest = 'src_iteration', + default = 0) + parser.add_argument( + '--njobs', + type = int, dest = 'njobs', + default = 1) + parser.add_argument( + '--kMeta', + type = float, + dest = 'kMeta', + default = 2.0) + parser.add_argument( + '--dtfactor', + type = float, + dest = 'dtfactor', + default = 0.5, + help = 'dt is computed as DTFACTOR / N') + return None + def prepare_launch( + self, + args = []): + """Set up reasonable parameters. + + With the default Lundgren forcing applied in the band [2, 4], + we can estimate the dissipation, therefore we can estimate + :math:`k_M \\eta_K` and constrain the viscosity. + + In brief, the command line parameter :math:`k_M \\eta_K` is + used in the following formula for :math:`\\nu` (:math:`N` is the + number of real space grid points per coordinate): + + .. math:: + + \\nu = \\left(\\frac{2 k_M \\eta_K}{N} \\right)^{4/3} + + With this choice, the average dissipation :math:`\\varepsilon` + will be close to 0.4, and the integral scale velocity will be + close to 0.77, yielding the approximate value for the Taylor + microscale and corresponding Reynolds number: + + .. math:: + + \\lambda \\approx 4.75\\left(\\frac{2 k_M \\eta_K}{N} \\right)^{4/6}, \\hskip .5in + R_\\lambda \\approx 3.7 \\left(\\frac{N}{2 k_M \\eta_K} \\right)^{4/6} + + """ + opt = _code.prepare_launch(self, args = args) + self.parameters['nu'] = (opt.kMeta * 2 / opt.n)**(4./3) + self.parameters['dt'] = (opt.dtfactor / opt.n) + # custom famplitude for 288 and 576 + if opt.n == 288: + self.parameters['famplitude'] = 0.45 + elif opt.n == 576: + self.parameters['famplitude'] = 0.47 + if ((self.parameters['niter_todo'] % self.parameters['niter_out']) != 0): + self.parameters['niter_out'] = self.parameters['niter_todo'] + if len(opt.src_work_dir) == 0: + opt.src_work_dir = os.path.realpath(opt.work_dir) + self.pars_from_namespace(opt) + return opt + def launch( + self, + args = [], + **kwargs): + opt = self.prepare_launch(args = args) + self.fill_up_fluid_code() + self.finalize_code() + self.launch_jobs(opt = opt) + return None + def launch_jobs( + self, + opt = None): + if not os.path.exists(os.path.join(self.work_dir, self.simname + '.h5')): + self.write_par() + init_condition_file = os.path.join( + self.work_dir, + self.simname + '_cvorticity_i{0:0>5x}.h5'.format(0)) + if not os.path.exists(init_condition_file): + if len(opt.src_simname) > 0: + src_file = os.path.join( + os.path.realpath(opt.src_work_dir), + opt.src_simname + '_cvorticity_i{0:0>5x}.h5'.format(opt.src_iteration)) + os.symlink(src_file, init_condition_file) + else: + data = self.generate_vector_field( + write_to_file = False, + spectra_slope = 2.0, + amplitude = 0.05) + f = h5py.File(init_condition_file, 'w') + f['vorticity/complex/{0}'.format(0)] = data + f.close() + self.run( + ncpu = opt.ncpu, + njobs = opt.njobs, + hours = opt.minutes // 60, + minutes = opt.minutes % 60) + return None + +if __name__ == '__main__': + pass + diff --git a/bfps/NavierStokes.py b/bfps/NavierStokes.py index d3eaea031d53dce4ac8394c8d9a3026c7147c925..0c9ebcb9ab3454a9c35f0a746d88b13aefd63eb6 100644 --- a/bfps/NavierStokes.py +++ b/bfps/NavierStokes.py @@ -898,7 +898,6 @@ class NavierStokes(_fluid_particle_base): chunks = (time_chunk, 1, 1) + dims[3:] else: chunks = (time_chunk, 1) + dims[2:] - chunks = (time_chunk, 1, 1) + dims[3:] bfps.tools.create_alloc_early_dataset( ofile, '/tracers{0}/rhs'.format(s), @@ -913,6 +912,8 @@ class NavierStokes(_fluid_particle_base): (1,) + pbase_shape + (3,), (h5py.h5s.UNLIMITED,) + pbase_shape + (3,), chunks) + # "velocity" is sampled, single precision is enough + # for the results we are interested in. bfps.tools.create_alloc_early_dataset( ofile, '/tracers{0}/velocity'.format(s), diff --git a/bfps/__init__.py b/bfps/__init__.py index 4a90f95268cffe3b0c2e1d68d7f4763a4c142e84..09663e1da56539eb51d257032444b38ba7096bc9 100644 --- a/bfps/__init__.py +++ b/bfps/__init__.py @@ -49,4 +49,5 @@ from host_information import host_info from .FluidConvert import FluidConvert from .FluidResize import FluidResize from .NavierStokes import NavierStokes +from .NSVorticityEquation import NSVorticityEquation diff --git a/bfps/__main__.py b/bfps/__main__.py index a26d84d0e918cebe1a9351ca20b5249418d6a3c6..9db5e350340e67dfe99c5a40e3027b489399a42e 100644 --- a/bfps/__main__.py +++ b/bfps/__main__.py @@ -29,6 +29,7 @@ import argparse import bfps from .NavierStokes import NavierStokes +from .NSVorticityEquation import NSVorticityEquation from .FluidResize import FluidResize from .FluidConvert import FluidConvert from .NSManyParticles import NSManyParticles @@ -45,6 +46,12 @@ def main(): 'NS', 'NS-single', 'NS-double'] + NSVEoptions = ['NSVorticityEquation', + 'NSVorticityEquation-single', + 'NSVorticityEquation-double', + 'NSVE', + 'NSVE-single', + 'NSVE-double'] FRoptions = ['FluidResize', 'FluidResize-single', 'FluidResize-double', @@ -57,7 +64,7 @@ def main(): 'NSManyParticles-double'] parser.add_argument( 'base_class', - choices = NSoptions + FRoptions + FCoptions + NSMPopt, + choices = NSoptions + NSVEoptions + FRoptions + FCoptions + NSMPopt, type = str) # first option is the choice of base class or -h or -v # all other options are passed on to the base_class instance @@ -70,6 +77,8 @@ def main(): precision = 'single' if opt.base_class in NSoptions: base_class = NavierStokes + if opt.base_class in NSVEoptions: + base_class = NSVorticityEquation elif opt.base_class in FRoptions: base_class = FluidResize elif opt.base_class in FCoptions: diff --git a/bfps/_base.py b/bfps/_base.py index 71b1fcac23984da5b7c73d672ef44e6ab3127c4f..939c8766fc32c2c063e83a2e90758d4078de94a0 100644 --- a/bfps/_base.py +++ b/bfps/_base.py @@ -83,8 +83,7 @@ class _base(object): 'hid_t dset, memtype, space;\n' + 'char fname[256];\n' + 'hsize_t dims[1];\n' + - 'char *string_data;\n' + - #'char string_data[512];\n' + + 'char string_data[512];\n' + 'sprintf(fname, "%s.h5", simname);\n' + 'parameter_file = H5Fopen(fname, H5F_ACC_RDONLY, H5P_DEFAULT);\n') for i in range(len(key)): @@ -95,14 +94,8 @@ class _base(object): elif type(parameters[key[i]]) == str: src_txt += ('space = H5Dget_space(dset);\n' + 'memtype = H5Dget_type(dset);\n' + - #'H5Sget_simple_extent_dims(space, dims, NULL);\n' + - #'DEBUG_MSG("dims[0] = %ld\\n", dims[0]);\n' + - 'string_data = (char*)malloc(512*sizeof(char));\n' + 'H5Dread(dset, memtype, H5S_ALL, H5S_ALL, H5P_DEFAULT, &string_data);\n' + 'sprintf({0}, "%s", string_data);\n'.format(key[i]) + - #'DEBUG_MSG("{0} = %s\\n", {0});\n'.format(key[i]) + - #'delete[] string_data;\n' + - 'free(string_data);\n' 'H5Sclose(space);\n' + 'H5Tclose(memtype);\n') elif type(parameters[key[i]]) == np.ndarray: diff --git a/bfps/cpp/distributed_particles.cpp b/bfps/cpp/distributed_particles.cpp index c6ae8a282dc0b695354506ccaec873930f30572d..73fd0275d8138d41bb4ee7fbc28e2d41e8017661 100644 --- a/bfps/cpp/distributed_particles.cpp +++ b/bfps/cpp/distributed_particles.cpp @@ -45,17 +45,17 @@ template <particle_types particle_type, class rnumber, int interp_neighbours> distributed_particles<particle_type, rnumber, interp_neighbours>::distributed_particles( const char *NAME, const hid_t data_file_id, - interpolator<rnumber, interp_neighbours> *FIELD, + interpolator<rnumber, interp_neighbours> *VEL, const int TRAJ_SKIP, const int INTEGRATION_STEPS) : particles_io_base<particle_type>( NAME, TRAJ_SKIP, data_file_id, - FIELD->descriptor->comm) + VEL->descriptor->comm) { assert((INTEGRATION_STEPS <= 6) && (INTEGRATION_STEPS >= 1)); - this->vel = FIELD; + this->vel = VEL; this->rhs.resize(INTEGRATION_STEPS); this->integration_steps = INTEGRATION_STEPS; this->state.reserve(2*this->nparticles / this->nprocs); diff --git a/bfps/cpp/field.cpp b/bfps/cpp/field.cpp index 62941fc4290dacdd49bca0ba4cf088459607bad3..5cc2e7a32a06a239714e22c169463b0eaed07b1c 100644 --- a/bfps/cpp/field.cpp +++ b/bfps/cpp/field.cpp @@ -23,6 +23,7 @@ **********************************************************************/ +#include <sys/stat.h> #include <cmath> #include <cstdlib> #include <algorithm> @@ -31,83 +32,7 @@ #include "scope_timer.hpp" #include "shared_array.hpp" -template <field_components fc> -field_layout<fc>::field_layout( - const hsize_t *SIZES, - const hsize_t *SUBSIZES, - const hsize_t *STARTS, - const MPI_Comm COMM_TO_USE) -{ - TIMEZONE("field_layout::field_layout"); - this->comm = COMM_TO_USE; - MPI_Comm_rank(this->comm, &this->myrank); - MPI_Comm_size(this->comm, &this->nprocs); - std::copy(SIZES, SIZES + 3, this->sizes); - std::copy(SUBSIZES, SUBSIZES + 3, this->subsizes); - std::copy(STARTS, STARTS + 3, this->starts); - if (fc == THREE || fc == THREExTHREE) - { - this->sizes[3] = 3; - this->subsizes[3] = 3; - this->starts[3] = 0; - } - if (fc == THREExTHREE) - { - this->sizes[4] = 3; - this->subsizes[4] = 3; - this->starts[4] = 0; - } - this->local_size = 1; - this->full_size = 1; - for (unsigned int i=0; i<ndim(fc); i++) - { - this->local_size *= this->subsizes[i]; - this->full_size *= this->sizes[i]; - } - - /*field will at most be distributed in 2D*/ - this->rank.resize(2); - this->all_start.resize(2); - this->all_size.resize(2); - for (int i=0; i<2; i++) - { - this->rank[i].resize(this->sizes[i]); - std::vector<int> local_rank; - local_rank.resize(this->sizes[i], 0); - for (unsigned int ii=this->starts[i]; ii<this->starts[i]+this->subsizes[i]; ii++) - local_rank[ii] = this->myrank; - MPI_Allreduce( - &local_rank.front(), - &this->rank[i].front(), - this->sizes[i], - MPI_INT, - MPI_SUM, - this->comm); - this->all_start[i].resize(this->nprocs); - std::vector<int> local_start; - local_start.resize(this->nprocs, 0); - local_start[this->myrank] = this->starts[i]; - MPI_Allreduce( - &local_start.front(), - &this->all_start[i].front(), - this->nprocs, - MPI_INT, - MPI_SUM, - this->comm); - this->all_size[i].resize(this->nprocs); - std::vector<int> local_subsize; - local_subsize.resize(this->nprocs, 0); - local_subsize[this->myrank] = this->subsizes[i]; - MPI_Allreduce( - &local_subsize.front(), - &this->all_size[i].front(), - this->nprocs, - MPI_INT, - MPI_SUM, - this->comm); - } -} template <typename rnumber, field_backend be, @@ -169,47 +94,27 @@ field<rnumber, be, fc>::field( starts[0] = local_0_start; starts[1] = 0; starts[2] = 0; this->rmemlayout = new field_layout<fc>( sizes, subsizes, starts, this->comm); - sizes[0] = nz; sizes[1] = ny; sizes[2] = nx/2+1; - subsizes[0] = local_n1; subsizes[1] = ny; subsizes[2] = nx/2+1; + sizes[0] = ny; sizes[1] = nz; sizes[2] = nx/2+1; + subsizes[0] = local_n1; subsizes[1] = nz; subsizes[2] = nx/2+1; starts[0] = local_1_start; starts[1] = 0; starts[2] = 0; this->clayout = new field_layout<fc>( sizes, subsizes, starts, this->comm); - this->data = (rnumber*)fftw_malloc( - sizeof(rnumber)*this->rmemlayout->local_size); - if(typeid(rnumber) == typeid(float)) - { - this->c2r_plan = new fftwf_plan; - this->r2c_plan = new fftwf_plan; - *((fftwf_plan*)this->c2r_plan) = fftwf_mpi_plan_many_dft_c2r( - 3, nfftw, ncomp(fc), - FFTW_MPI_DEFAULT_BLOCK, FFTW_MPI_DEFAULT_BLOCK, - (fftwf_complex*)this->data, (float*)this->data, - this->comm, - this->fftw_plan_rigor | FFTW_MPI_TRANSPOSED_IN); - *((fftwf_plan*)this->r2c_plan) = fftwf_mpi_plan_many_dft_r2c( - 3, nfftw, ncomp(fc), - FFTW_MPI_DEFAULT_BLOCK, FFTW_MPI_DEFAULT_BLOCK, - (float*)this->data, (fftwf_complex*)this->data, - this->comm, - this->fftw_plan_rigor | FFTW_MPI_TRANSPOSED_OUT); - } - if (typeid(rnumber) == typeid(double)) - { - this->c2r_plan = new fftw_plan; - this->r2c_plan = new fftw_plan; - *((fftw_plan*)this->c2r_plan) = fftw_mpi_plan_many_dft_c2r( - 3, nfftw, ncomp(fc), - FFTW_MPI_DEFAULT_BLOCK, FFTW_MPI_DEFAULT_BLOCK, - (fftw_complex*)this->data, (double*)this->data, - this->comm, - this->fftw_plan_rigor | FFTW_MPI_TRANSPOSED_IN); - *((fftw_plan*)this->r2c_plan) = fftw_mpi_plan_many_dft_r2c( - 3, nfftw, ncomp(fc), - FFTW_MPI_DEFAULT_BLOCK, FFTW_MPI_DEFAULT_BLOCK, - (double*)this->data, (fftw_complex*)this->data, - this->comm, - this->fftw_plan_rigor | FFTW_MPI_TRANSPOSED_OUT); - } + this->data = fftw_interface<rnumber>::alloc_real( + this->rmemlayout->local_size); + this->c2r_plan = fftw_interface<rnumber>::mpi_plan_many_dft_c2r( + 3, nfftw, ncomp(fc), + FFTW_MPI_DEFAULT_BLOCK, FFTW_MPI_DEFAULT_BLOCK, + (typename fftw_interface<rnumber>::complex*)this->data, + this->data, + this->comm, + this->fftw_plan_rigor | FFTW_MPI_TRANSPOSED_IN); + this->r2c_plan = fftw_interface<rnumber>::mpi_plan_many_dft_r2c( + 3, nfftw, ncomp(fc), + FFTW_MPI_DEFAULT_BLOCK, FFTW_MPI_DEFAULT_BLOCK, + this->data, + (typename fftw_interface<rnumber>::complex*)this->data, + this->comm, + this->fftw_plan_rigor | FFTW_MPI_TRANSPOSED_OUT); break; } } @@ -228,21 +133,9 @@ field<rnumber, be, fc>::~field() delete this->rlayout; delete this->rmemlayout; delete this->clayout; - fftw_free(this->data); - if (typeid(rnumber) == typeid(float)) - { - fftwf_destroy_plan(*(fftwf_plan*)this->c2r_plan); - delete (fftwf_plan*)this->c2r_plan; - fftwf_destroy_plan(*(fftwf_plan*)this->r2c_plan); - delete (fftwf_plan*)this->r2c_plan; - } - else if (typeid(rnumber) == typeid(double)) - { - fftw_destroy_plan(*(fftw_plan*)this->c2r_plan); - delete (fftw_plan*)this->c2r_plan; - fftw_destroy_plan(*(fftw_plan*)this->r2c_plan); - delete (fftw_plan*)this->r2c_plan; - } + fftw_interface<rnumber>::free(this->data); + fftw_interface<rnumber>::destroy_plan(this->c2r_plan); + fftw_interface<rnumber>::destroy_plan(this->r2c_plan); break; } } @@ -253,10 +146,7 @@ template <typename rnumber, void field<rnumber, be, fc>::ift() { TIMEZONE("field::ift"); - if (typeid(rnumber) == typeid(float)) - fftwf_execute(*((fftwf_plan*)this->c2r_plan)); - else if (typeid(rnumber) == typeid(double)) - fftw_execute(*((fftw_plan*)this->c2r_plan)); + fftw_interface<rnumber>::execute(this->c2r_plan); this->real_space_representation = true; } @@ -265,10 +155,8 @@ template <typename rnumber, field_components fc> void field<rnumber, be, fc>::dft() { - if (typeid(rnumber) == typeid(float)) - fftwf_execute(*((fftwf_plan*)this->r2c_plan)); - else if (typeid(rnumber) == typeid(double)) - fftw_execute(*((fftw_plan*)this->r2c_plan)); + TIMEZONE("field::dft"); + fftw_interface<rnumber>::execute(this->r2c_plan); this->real_space_representation = false; } @@ -277,60 +165,335 @@ template <typename rnumber, field_components fc> int field<rnumber, be, fc>::io( const std::string fname, - const std::string dset_name, - const int toffset, + const std::string field_name, + const int iteration, const bool read) { + /* file dataset has same dimensions as field */ TIMEZONE("field::io"); hid_t file_id, dset_id, plist_id; - hid_t dset_type; - bool io_for_real = false; + std::string representation = std::string( + this->real_space_representation ? + "real" : "complex"); + std::string dset_name = ( + "/" + field_name + + "/" + representation + + "/" + std::to_string(iteration)); + + /* open/create file */ + plist_id = H5Pcreate(H5P_FILE_ACCESS); + H5Pset_fapl_mpio(plist_id, this->comm, MPI_INFO_NULL); + bool file_exists = false; + { + struct stat file_buffer; + file_exists = (stat(fname.c_str(), &file_buffer) == 0); + } + if (read) + { + assert(file_exists); + file_id = H5Fopen(fname.c_str(), H5F_ACC_RDONLY, plist_id); + } + else + { + if (file_exists) + file_id = H5Fopen(fname.c_str(), H5F_ACC_RDWR, plist_id); + else + file_id = H5Fcreate(fname.c_str(), H5F_ACC_EXCL, H5P_DEFAULT, plist_id); + } + H5Pclose(plist_id); + + /* check what kind of representation is being used */ + if (read) + { + dset_id = H5Dopen( + file_id, + dset_name.c_str(), + H5P_DEFAULT); + hid_t dset_type = H5Dget_type(dset_id); + bool io_for_real = ( + H5Tequal(dset_type, H5T_IEEE_F32BE) || + H5Tequal(dset_type, H5T_IEEE_F32LE) || + H5Tequal(dset_type, H5T_INTEL_F32) || + H5Tequal(dset_type, H5T_NATIVE_FLOAT) || + H5Tequal(dset_type, H5T_IEEE_F64BE) || + H5Tequal(dset_type, H5T_IEEE_F64LE) || + H5Tequal(dset_type, H5T_INTEL_F64) || + H5Tequal(dset_type, H5T_NATIVE_DOUBLE)); + H5Tclose(dset_type); + assert(this->real_space_representation == io_for_real); + } + + /* generic space initialization */ + hid_t fspace, mspace; + hsize_t count[ndim(fc)], offset[ndim(fc)], dims[ndim(fc)]; + hsize_t memoffset[ndim(fc)], memshape[ndim(fc)]; + + if (this->real_space_representation) + { + for (unsigned int i=0; i<ndim(fc); i++) + { + count[i] = this->rlayout->subsizes[i]; + offset[i] = this->rlayout->starts[i]; + dims[i] = this->rlayout->sizes[i]; + memshape[i] = this->rmemlayout->subsizes[i]; + memoffset[i] = 0; + } + } + else + { + for (unsigned int i=0; i<ndim(fc); i++) + { + count [i] = this->clayout->subsizes[i]; + offset[i] = this->clayout->starts[i]; + dims [i] = this->clayout->sizes[i]; + memshape [i] = count[i]; + memoffset[i] = 0; + } + } + mspace = H5Screate_simple(ndim(fc), memshape, NULL); + H5Sselect_hyperslab(mspace, H5S_SELECT_SET, memoffset, NULL, count, NULL); + + /* open/create data set */ + if (read) + fspace = H5Dget_space(dset_id); + else + { + if (!H5Lexists(file_id, field_name.c_str(), H5P_DEFAULT)) + { + hid_t gid_tmp = H5Gcreate( + file_id, field_name.c_str(), + H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + H5Gclose(gid_tmp); + } + + if (!H5Lexists(file_id, (field_name + "/" + representation).c_str(), H5P_DEFAULT)) + { + hid_t gid_tmp = H5Gcreate( + file_id, ("/" + field_name + "/" + representation).c_str(), + H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + H5Gclose(gid_tmp); + } + if (H5Lexists(file_id, dset_name.c_str(), H5P_DEFAULT)) + { + dset_id = H5Dopen(file_id, dset_name.c_str(), H5P_DEFAULT); + fspace = H5Dget_space(dset_id); + } + else + { + fspace = H5Screate_simple( + ndim(fc), + dims, + NULL); + /* chunking needs to go in here */ + dset_id = H5Dcreate( + file_id, + dset_name.c_str(), + (this->real_space_representation ? this->rnumber_H5T : this->cnumber_H5T), + fspace, + H5P_DEFAULT, + H5P_DEFAULT, + H5P_DEFAULT); + } + } + /* both dset_id and fspace should now have sane values */ + + /* check file space */ + int ndims_fspace = H5Sget_simple_extent_dims(fspace, dims, NULL); + assert(ndims_fspace == ndim(fc)); + if (this->real_space_representation) + { + for (unsigned int i=0; i<ndim(fc); i++) + { + offset[i] = this->rlayout->starts[i]; + assert(dims[i] == this->rlayout->sizes[i]); + } + H5Sselect_hyperslab(fspace, H5S_SELECT_SET, offset, NULL, count, NULL); + if (read) + { + std::fill_n(this->data, this->rmemlayout->local_size, 0); + H5Dread(dset_id, this->rnumber_H5T, mspace, fspace, H5P_DEFAULT, this->data); + } + else + { + assert(this->real_space_representation); + H5Dwrite(dset_id, this->rnumber_H5T, mspace, fspace, H5P_DEFAULT, this->data); + } + H5Sclose(mspace); + } + else + { + for (unsigned int i=0; i<ndim(fc); i++) + { + offset[i] = this->clayout->starts[i]; + assert(dims[i] == this->clayout->sizes[i]); + } + H5Sselect_hyperslab(fspace, H5S_SELECT_SET, offset, NULL, count, NULL); + if (read) + { + std::fill_n(this->data, this->clayout->local_size*2, 0); + H5Dread(dset_id, this->cnumber_H5T, mspace, fspace, H5P_DEFAULT, this->data); + this->symmetrize(); + } + else + { + assert(!this->real_space_representation); + H5Dwrite(dset_id, this->cnumber_H5T, mspace, fspace, H5P_DEFAULT, this->data); + } + H5Sclose(mspace); + } - /* open file */ + H5Sclose(fspace); + /* close data set */ + H5Dclose(dset_id); + /* close file */ + H5Fclose(file_id); + return EXIT_SUCCESS; +} + +template <typename rnumber, + field_backend be, + field_components fc> +int field<rnumber, be, fc>::io_database( + const std::string fname, + const std::string field_name, + const int toffset, + const bool read) +{ + /* file dataset is has a time dimension as well */ + TIMEZONE("field::io_database"); + hid_t file_id, dset_id, plist_id; + std::string representation = std::string( + this->real_space_representation ? + "real" : "complex"); + std::string dset_name = ( + "/" + field_name + + "/" + representation); + + /* open/create file */ plist_id = H5Pcreate(H5P_FILE_ACCESS); H5Pset_fapl_mpio(plist_id, this->comm, MPI_INFO_NULL); + bool file_exists = false; + { + struct stat file_buffer; + file_exists = (stat(fname.c_str(), &file_buffer) == 0); + } if (read) + { + assert(file_exists); file_id = H5Fopen(fname.c_str(), H5F_ACC_RDONLY, plist_id); + } else - file_id = H5Fopen(fname.c_str(), H5F_ACC_RDWR, plist_id); + { + if (file_exists) + file_id = H5Fopen(fname.c_str(), H5F_ACC_RDWR, plist_id); + else + file_id = H5Fcreate(fname.c_str(), H5F_ACC_EXCL, H5P_DEFAULT, plist_id); + } H5Pclose(plist_id); - /* open data set */ - dset_id = H5Dopen(file_id, dset_name.c_str(), H5P_DEFAULT); - dset_type = H5Dget_type(dset_id); - io_for_real = ( - H5Tequal(dset_type, H5T_IEEE_F32BE) || - H5Tequal(dset_type, H5T_IEEE_F32LE) || - H5Tequal(dset_type, H5T_INTEL_F32) || - H5Tequal(dset_type, H5T_NATIVE_FLOAT) || - H5Tequal(dset_type, H5T_IEEE_F64BE) || - H5Tequal(dset_type, H5T_IEEE_F64LE) || - H5Tequal(dset_type, H5T_INTEL_F64) || - H5Tequal(dset_type, H5T_NATIVE_DOUBLE)); + /* check what kind of representation is being used */ + if (read) + { + dset_id = H5Dopen( + file_id, + dset_name.c_str(), + H5P_DEFAULT); + hid_t dset_type = H5Dget_type(dset_id); + bool io_for_real = ( + H5Tequal(dset_type, H5T_IEEE_F32BE) || + H5Tequal(dset_type, H5T_IEEE_F32LE) || + H5Tequal(dset_type, H5T_INTEL_F32) || + H5Tequal(dset_type, H5T_NATIVE_FLOAT) || + H5Tequal(dset_type, H5T_IEEE_F64BE) || + H5Tequal(dset_type, H5T_IEEE_F64LE) || + H5Tequal(dset_type, H5T_INTEL_F64) || + H5Tequal(dset_type, H5T_NATIVE_DOUBLE)); + H5Tclose(dset_type); + assert(this->real_space_representation == io_for_real); + } /* generic space initialization */ hid_t fspace, mspace; - fspace = H5Dget_space(dset_id); hsize_t count[ndim(fc)+1], offset[ndim(fc)+1], dims[ndim(fc)+1]; hsize_t memoffset[ndim(fc)+1], memshape[ndim(fc)+1]; - H5Sget_simple_extent_dims(fspace, dims, NULL); + + int dim_counter_offset = 1; + dim_counter_offset = 1; count[0] = 1; - offset[0] = toffset; memshape[0] = 1; memoffset[0] = 0; - if (io_for_real) + if (this->real_space_representation) { for (unsigned int i=0; i<ndim(fc); i++) { - count[i+1] = this->rlayout->subsizes[i]; - offset[i+1] = this->rlayout->starts[i]; - assert(dims[i+1] == this->rlayout->sizes[i]); - memshape[i+1] = this->rmemlayout->subsizes[i]; - memoffset[i+1] = 0; + count[i+dim_counter_offset] = this->rlayout->subsizes[i]; + offset[i+dim_counter_offset] = this->rlayout->starts[i]; + dims[i+dim_counter_offset] = this->rlayout->sizes[i]; + memshape[i+dim_counter_offset] = this->rmemlayout->subsizes[i]; + memoffset[i+dim_counter_offset] = 0; } - mspace = H5Screate_simple(ndim(fc)+1, memshape, NULL); - H5Sselect_hyperslab(fspace, H5S_SELECT_SET, offset, NULL, count, NULL); + mspace = H5Screate_simple(dim_counter_offset + ndim(fc), memshape, NULL); + H5Sselect_hyperslab(mspace, H5S_SELECT_SET, memoffset, NULL, count, NULL); + } + else + { + for (unsigned int i=0; i<ndim(fc); i++) + { + count[i+dim_counter_offset] = this->clayout->subsizes[i]; + offset[i+dim_counter_offset] = this->clayout->starts[i]; + dims[i+dim_counter_offset] = this->clayout->sizes[i]; + memshape[i+dim_counter_offset] = count[i+dim_counter_offset]; + memoffset[i+dim_counter_offset] = 0; + } + mspace = H5Screate_simple(dim_counter_offset + ndim(fc), memshape, NULL); H5Sselect_hyperslab(mspace, H5S_SELECT_SET, memoffset, NULL, count, NULL); + } + + /* open/create data set */ + if (read) + fspace = H5Dget_space(dset_id); + else + { + if (!H5Lexists(file_id, field_name.c_str(), H5P_DEFAULT)) + H5Gcreate( + file_id, field_name.c_str(), + H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + if (H5Lexists(file_id, dset_name.c_str(), H5P_DEFAULT)) + { + dset_id = H5Dopen(file_id, dset_name.c_str(), H5P_DEFAULT); + fspace = H5Dget_space(dset_id); + } + else + { + fspace = H5Screate_simple( + ndim(fc), + dims, + NULL); + /* chunking needs to go in here */ + dset_id = H5Dcreate( + file_id, + dset_name.c_str(), + (this->real_space_representation ? this->rnumber_H5T : this->cnumber_H5T), + fspace, + H5P_DEFAULT, + H5P_DEFAULT, + H5P_DEFAULT); + } + } + /* both dset_id and fspace should now have sane values */ + + /* check file space */ + int ndims_fspace = H5Sget_simple_extent_dims(fspace, dims, NULL); + assert(ndims_fspace == ndim(fc) + 1); + offset[0] = toffset; + if (this->real_space_representation) + { + for (unsigned int i=0; i<ndim(fc); i++) + { + offset[i+dim_counter_offset] = this->rlayout->starts[i]; + assert(dims[i+dim_counter_offset] == this->rlayout->sizes[i]); + } + H5Sselect_hyperslab(fspace, H5S_SELECT_SET, offset, NULL, count, NULL); if (read) { std::fill_n(this->data, this->rmemlayout->local_size, 0); @@ -339,13 +502,8 @@ int field<rnumber, be, fc>::io( } else { + assert(this->real_space_representation); H5Dwrite(dset_id, this->rnumber_H5T, mspace, fspace, H5P_DEFAULT, this->data); - if (!this->real_space_representation) - /* in principle we could do an inverse Fourier transform in here, - * however that would be unsafe since we wouldn't know whether we'd need to - * normalize or not. - * */ - DEBUG_MSG("I just wrote complex field into real space dataset. It's probably nonsense.\n"); } H5Sclose(mspace); } @@ -353,30 +511,24 @@ int field<rnumber, be, fc>::io( { for (unsigned int i=0; i<ndim(fc); i++) { - count[i+1] = this->clayout->subsizes[i]; - offset[i+1] = this->clayout->starts[i]; - assert(dims[i+1] == this->clayout->sizes[i]); - memshape[i+1] = count[i+1]; - memoffset[i+1] = 0; + offset[i+dim_counter_offset] = this->clayout->starts[i]; + assert(dims[i+dim_counter_offset] == this->clayout->sizes[i]); } - mspace = H5Screate_simple(ndim(fc)+1, memshape, NULL); H5Sselect_hyperslab(fspace, H5S_SELECT_SET, offset, NULL, count, NULL); - H5Sselect_hyperslab(mspace, H5S_SELECT_SET, memoffset, NULL, count, NULL); if (read) { H5Dread(dset_id, this->cnumber_H5T, mspace, fspace, H5P_DEFAULT, this->data); this->real_space_representation = false; + this->symmetrize(); } else { + assert(!this->real_space_representation); H5Dwrite(dset_id, this->cnumber_H5T, mspace, fspace, H5P_DEFAULT, this->data); - if (this->real_space_representation) - DEBUG_MSG("I just wrote real space field into complex dataset. It's probably nonsense.\n"); } H5Sclose(mspace); } - H5Tclose(dset_type); H5Sclose(fspace); /* close data set */ H5Dclose(dset_id); @@ -405,8 +557,11 @@ void field<rnumber, be, fc>::compute_rspace_xincrement_stats( this->rlayout->sizes[0], this->rlayout->comm); tmp_field->real_space_representation = true; - FIELD_RLOOP<be>( - this,[&](hsize_t zindex, hsize_t yindex, ptrdiff_t rindex, hsize_t xindex){ + this->RLOOP( + [&](ptrdiff_t rindex, + ptrdiff_t xindex, + ptrdiff_t yindex, + ptrdiff_t zindex){ hsize_t rrindex = (xindex + xcells)%this->rlayout->sizes[2] + ( zindex * this->rlayout->subsizes[1] + yindex)*( this->rmemlayout->subsizes[2]); @@ -414,7 +569,7 @@ void field<rnumber, be, fc>::compute_rspace_xincrement_stats( tmp_field->data[rindex*ncomp(fc) + component] = this->data[rrindex*ncomp(fc) + component] - this->data[rindex*ncomp(fc) + component]; - }); + }); tmp_field->compute_rspace_stats( group, dset_name, @@ -499,9 +654,9 @@ void field<rnumber, be, fc>::compute_rspace_stats( }); { - TIMEZONE("FIELD_RLOOP"); - FIELD_RLOOP<be>( - this,[&](hsize_t /*zindex*/, hsize_t /*yindex*/, ptrdiff_t rindex, hsize_t /*xindex*/){ + TIMEZONE("field::RLOOP"); + this->RLOOP( + [&](hsize_t /*zindex*/, hsize_t /*yindex*/, ptrdiff_t rindex, hsize_t /*xindex*/){ double *local_moments = local_moments_threaded.getMine(); double *val_tmp = val_tmp_threaded.getMine(); ptrdiff_t *local_hist = local_hist_threaded.getMine(); @@ -628,6 +783,86 @@ void field<rnumber, be, fc>::normalize() this->data[tmp_index] /= this->npoints; } +template <typename rnumber, + field_backend be, + field_components fc> +void field<rnumber, be, fc>::symmetrize() +{ + TIMEZONE("field::symmetrize"); + assert(!this->real_space_representation); + ptrdiff_t ii, cc; + typename fftw_interface<rnumber>::complex *data = this->get_cdata(); + MPI_Status *mpistatus = new MPI_Status; + if (this->myrank == this->clayout->rank[0][0]) + { + for (cc = 0; cc < ncomp(fc); cc++) + data[cc][1] = 0.0; + for (ii = 1; ii < this->clayout->sizes[1]/2; ii++) + for (cc = 0; cc < ncomp(fc); cc++) { + ( *(data + cc + ncomp(fc)*(this->clayout->sizes[1] - ii)*this->clayout->sizes[2]))[0] = + (*(data + cc + ncomp(fc)*( ii)*this->clayout->sizes[2]))[0]; + ( *(data + cc + ncomp(fc)*(this->clayout->sizes[1] - ii)*this->clayout->sizes[2]))[1] = + -(*(data + cc + ncomp(fc)*( ii)*this->clayout->sizes[2]))[1]; + } + } + typename fftw_interface<rnumber>::complex *buffer; + buffer = fftw_interface<rnumber>::alloc_complex(ncomp(fc)*this->clayout->sizes[1]); + ptrdiff_t yy; + /*ptrdiff_t tindex;*/ + int ranksrc, rankdst; + for (yy = 1; yy < this->clayout->sizes[0]/2; yy++) { + ranksrc = this->clayout->rank[0][yy]; + rankdst = this->clayout->rank[0][this->clayout->sizes[0] - yy]; + if (this->clayout->myrank == ranksrc) + for (ii = 0; ii < this->clayout->sizes[1]; ii++) + for (cc = 0; cc < ncomp(fc); cc++) + for (int imag_comp=0; imag_comp<2; imag_comp++) + (*(buffer + ncomp(fc)*ii+cc))[imag_comp] = + (*(data + ncomp(fc)*((yy - this->clayout->starts[0])*this->clayout->sizes[1] + ii)*this->clayout->sizes[2] + cc))[imag_comp]; + if (ranksrc != rankdst) + { + if (this->clayout->myrank == ranksrc) + MPI_Send((void*)buffer, + ncomp(fc)*this->clayout->sizes[1], mpi_real_type<rnumber>::complex(), rankdst, yy, + this->clayout->comm); + if (this->clayout->myrank == rankdst) + MPI_Recv((void*)buffer, + ncomp(fc)*this->clayout->sizes[1], mpi_real_type<rnumber>::complex(), ranksrc, yy, + this->clayout->comm, mpistatus); + } + if (this->clayout->myrank == rankdst) + { + for (ii = 1; ii < this->clayout->sizes[1]; ii++) + for (cc = 0; cc < ncomp(fc); cc++) + { + (*(data + ncomp(fc)*((this->clayout->sizes[0] - yy - this->clayout->starts[0])*this->clayout->sizes[1] + ii)*this->clayout->sizes[2] + cc))[0] = + (*(buffer + ncomp(fc)*(this->clayout->sizes[1]-ii)+cc))[0]; + (*(data + ncomp(fc)*((this->clayout->sizes[0] - yy - this->clayout->starts[0])*this->clayout->sizes[1] + ii)*this->clayout->sizes[2] + cc))[1] = + -(*(buffer + ncomp(fc)*(this->clayout->sizes[1]-ii)+cc))[1]; + } + for (cc = 0; cc < ncomp(fc); cc++) + { + (*((data + cc + ncomp(fc)*(this->clayout->sizes[0] - yy - this->clayout->starts[0])*this->clayout->sizes[1]*this->clayout->sizes[2])))[0] = (*(buffer + cc))[0]; + (*((data + cc + ncomp(fc)*(this->clayout->sizes[0] - yy - this->clayout->starts[0])*this->clayout->sizes[1]*this->clayout->sizes[2])))[1] = -(*(buffer + cc))[1]; + } + } + } + fftw_interface<rnumber>::free(buffer); + delete mpistatus; + /* put asymmetric data to 0 */ + /*if (this->clayout->myrank == this->clayout->rank[0][this->clayout->sizes[0]/2]) + { + tindex = ncomp(fc)*(this->clayout->sizes[0]/2 - this->clayout->starts[0])*this->clayout->sizes[1]*this->clayout->sizes[2]; + for (ii = 0; ii < this->clayout->sizes[1]; ii++) + { + std::fill_n((rnumber*)(data + tindex), ncomp(fc)*2*this->clayout->sizes[2], 0.0); + tindex += ncomp(fc)*this->clayout->sizes[2]; + } + } + tindex = ncomp(fc)*(); + std::fill_n((rnumber*)(data + tindex), ncomp(fc)*2, 0.0);*/ +} + template <typename rnumber, field_backend be, field_components fc> @@ -673,8 +908,8 @@ void field<rnumber, be, fc>::compute_stats( // what follows gave me a headache until I found this link: // http://stackoverflow.com/questions/8256636/expected-primary-expression-error-on-template-method-using kk->template cospectrum<rnumber, fc>( - (cnumber*)this->data, - (cnumber*)this->data, + (typename fftw_interface<rnumber>::complex*)this->data, + (typename fftw_interface<rnumber>::complex*)this->data, group, dset_name + "_" + dset_name, toffset); @@ -690,253 +925,6 @@ void field<rnumber, be, fc>::compute_stats( } } -template <field_backend be, - kspace_dealias_type dt> -template <field_components fc> -kspace<be, dt>::kspace( - const field_layout<fc> *source_layout, - const double DKX, - const double DKY, - const double DKZ) -{ - TIMEZONE("field::kspace"); - /* get layout */ - this->layout = new field_layout<ONE>( - source_layout->sizes, - source_layout->subsizes, - source_layout->starts, - source_layout->comm); - - /* store dk values */ - this->dkx = DKX; - this->dky = DKY; - this->dkz = DKZ; - - /* compute kx, ky, kz and compute kM values */ - switch(be) - { - case FFTW: - this->kx.resize(this->layout->sizes[2]); - this->ky.resize(this->layout->subsizes[0]); - this->kz.resize(this->layout->sizes[1]); - int i, ii; - for (i = 0; i<int(this->layout->sizes[2]); i++) - this->kx[i] = i*this->dkx; - for (i = 0; i<int(this->layout->subsizes[0]); i++) - { - ii = i + this->layout->starts[0]; - if (ii <= int(this->layout->sizes[1]/2)) - this->ky[i] = this->dky*ii; - else - this->ky[i] = this->dky*(ii - int(this->layout->sizes[1])); - } - for (i = 0; i<int(this->layout->sizes[1]); i++) - { - if (i <= int(this->layout->sizes[0]/2)) - this->kz[i] = this->dkz*i; - else - this->kz[i] = this->dkz*(i - int(this->layout->sizes[0])); - } - switch(dt) - { - case TWO_THIRDS: - this->kMx = this->dkx*(int(2*(int(this->layout->sizes[2])-1)/3)-1); - this->kMy = this->dky*(int(this->layout->sizes[0] / 3)-1); - this->kMz = this->dkz*(int(this->layout->sizes[1] / 3)-1); - break; - case SMOOTH: - this->kMx = this->dkx*(int(this->layout->sizes[2])-2); - this->kMy = this->dky*(int(this->layout->sizes[0] / 2)-1); - this->kMz = this->dkz*(int(this->layout->sizes[1] / 2)-1); - break; - } - break; - } - - /* get global kM and dk */ - this->kM = this->kMx; - if (this->kM < this->kMy) this->kM = this->kMy; - if (this->kM < this->kMz) this->kM = this->kMz; - this->kM2 = this->kM * this->kM; - this->dk = this->dkx; - if (this->dk > this->dky) this->dk = this->dky; - if (this->dk > this->dkz) this->dk = this->dkz; - this->dk2 = this->dk*this->dk; - - /* spectra stuff */ - this->nshells = int(this->kM / this->dk) + 2; - this->kshell.resize(this->nshells, 0); - this->nshell.resize(this->nshells, 0); - - shared_array<double> kshell_local_threaded(this->nshells, [&](double* kshell_local){ - std::fill_n(kshell_local, this->nshells, 0); - }); - - shared_array<int64_t> nshell_local_threaded(this->nshells, [&](int64_t* nshell_local){ - std::fill_n(nshell_local, this->nshells, 0); - }); - - std::vector<std::unordered_map<int, double>> dealias_filter_threaded(omp_get_max_threads()); - - KSPACE_CLOOP_K2_NXMODES( - this,[&](ptrdiff_t /*cindex*/, hsize_t /*yindex*/, hsize_t /*zindex*/, int nxmodes, hsize_t /*xindex*/, double k2){ - if (k2 < this->kM2) - { - double knorm = sqrt(k2); - nshell_local_threaded.getMine()[int(knorm/this->dk)] += nxmodes; - kshell_local_threaded.getMine()[int(knorm/this->dk)] += nxmodes*knorm; - } - if (dt == TWO_THIRDS){ - // Should not be any race condition here it is a "write" - dealias_filter_threaded[omp_get_thread_num()][int(round(k2 / this->dk2))] = exp(-36.0 * pow(k2/this->kM2, 18.)); - } - }); - - for(int idxMerge = 0 ; idxMerge < int(dealias_filter_threaded.size()) ; ++idxMerge){ - for(const auto kv : dealias_filter_threaded[idxMerge]){ - this->dealias_filter[kv.first] = kv.second; - } - } - - nshell_local_threaded.mergeParallel(); - kshell_local_threaded.mergeParallel(); - - MPI_Allreduce( - nshell_local_threaded.getMasterData(), - &this->nshell.front(), - this->nshells, - MPI_INT64_T, MPI_SUM, this->layout->comm); - MPI_Allreduce( - kshell_local_threaded.getMasterData(), - &this->kshell.front(), - this->nshells, - MPI_DOUBLE, MPI_SUM, this->layout->comm); - for (int n=0; n<this->nshells; n++) - this->kshell[n] /= this->nshell[n]; -} - -template <field_backend be, - kspace_dealias_type dt> -kspace<be, dt>::~kspace() -{ - delete this->layout; -} - -template <field_backend be, - kspace_dealias_type dt> -template <typename rnumber, - field_components fc> -void kspace<be, dt>::low_pass(rnumber *__restrict__ a, const double kmax) -{ - const double km2 = kmax*kmax; - KSPACE_CLOOP_K2( - this,[&](ptrdiff_t cindex, hsize_t /*yindex*/, hsize_t /*zindex*/, hsize_t /*xindex*/, double k2){ - // There is not race condition because it is write - if (k2 >= km2){ - std::fill_n(a + 2*ncomp(fc)*cindex, 2*ncomp(fc), 0); - } - }); -} - -template <field_backend be, - kspace_dealias_type dt> -template <typename rnumber, - field_components fc> -void kspace<be, dt>::dealias(rnumber *__restrict__ a) -{ - switch(be) - { - case TWO_THIRDS: - this->low_pass<rnumber, fc>(a, this->kM); - break; - case SMOOTH: - KSPACE_CLOOP_K2( - this,[&](ptrdiff_t cindex, hsize_t /*yindex*/, hsize_t /*zindex*/, hsize_t /*xindex*/, double k2){ - double tval = this->dealias_filter[int(round(k2 / this->dk2))]; - // There is no overlap between threads here because cindex is thread-safe index - // and push for a jump of 2*ncomp(fc) - for (int tcounter=0; tcounter<2*ncomp(fc); tcounter++){ - a[2*ncomp(fc)*cindex + tcounter] *= tval; - } - }); - break; - } -} - -template <field_backend be, - kspace_dealias_type dt> -template <typename rnumber, - field_components fc> -void kspace<be, dt>::cospectrum( - const rnumber(* __restrict a)[2], - const rnumber(* __restrict b)[2], - const hid_t group, - const std::string dset_name, - const hsize_t toffset) -{ - TIMEZONE("field::cospectrum"); - std::vector<double> spec; - spec.resize(this->nshells*ncomp(fc)*ncomp(fc), 0); - - shared_array<double> spec_local_threaded(this->nshells*ncomp(fc)*ncomp(fc), [&](double* spec_local){ - std::fill_n(spec_local, this->nshells*ncomp(fc)*ncomp(fc), 0); - }); - - KSPACE_CLOOP_K2_NXMODES( - this,[&](ptrdiff_t cindex, hsize_t /*yindex*/, hsize_t /*zindex*/, int nxmodes, hsize_t /*xindex*/, double k2){ - if (k2 <= this->kM2) - { - double* spec_local = spec_local_threaded.getMine(); - int tmp_int = int(sqrt(k2) / this->dk)*ncomp(fc)*ncomp(fc); - for (hsize_t i=0; i<ncomp(fc); i++) - for (hsize_t j=0; j<ncomp(fc); j++) - spec_local[tmp_int + i*ncomp(fc)+j] += nxmodes * ( - (a[ncomp(fc)*cindex + i][0] * b[ncomp(fc)*cindex + j][0]) + - (a[ncomp(fc)*cindex + i][1] * b[ncomp(fc)*cindex + j][1])); - } - }); - spec_local_threaded.mergeParallel(); - - MPI_Allreduce( - spec_local_threaded.getMasterData(), - &spec.front(), - spec.size(), - MPI_DOUBLE, MPI_SUM, this->layout->comm); - if (this->layout->myrank == 0) - { - hid_t dset, wspace, mspace; - hsize_t count[(ndim(fc)-2)*2], offset[(ndim(fc)-2)*2], dims[(ndim(fc)-2)*2]; - dset = H5Dopen(group, ("spectra/" + dset_name).c_str(), H5P_DEFAULT); - wspace = H5Dget_space(dset); - H5Sget_simple_extent_dims(wspace, dims, NULL); - switch (fc) - { - case THREExTHREE: - offset[4] = 0; - offset[5] = 0; - count[4] = ncomp(fc); - count[5] = ncomp(fc); - case THREE: - offset[2] = 0; - offset[3] = 0; - count[2] = ncomp(fc); - count[3] = ncomp(fc); - default: - offset[0] = toffset; - offset[1] = 0; - count[0] = 1; - count[1] = this->nshells; - } - mspace = H5Screate_simple((ndim(fc)-2)*2, count, NULL); - H5Sselect_hyperslab(wspace, H5S_SELECT_SET, offset, NULL, count, NULL); - H5Dwrite(dset, H5T_NATIVE_DOUBLE, mspace, wspace, H5P_DEFAULT, &spec.front()); - H5Sclose(wspace); - H5Sclose(mspace); - H5Dclose(dset); - } -} - - template <typename rnumber, field_backend be, field_components fc1, @@ -951,28 +939,48 @@ void compute_gradient( assert(!src->real_space_representation); assert((fc1 == ONE && fc2 == THREE) || (fc1 == THREE && fc2 == THREExTHREE)); - KSPACE_CLOOP_K2( - kk,[&](ptrdiff_t cindex, hsize_t yindex, hsize_t zindex, hsize_t xindex, double k2){ - if (k2 < kk->kM2) - for (unsigned int field_component = 0; - field_component < ncomp(fc1); - field_component++) - { - // There must not be any race condition because it is write - dst->get_cdata()[(cindex*3+0)*ncomp(fc1)+field_component][0] = - - kk->kx[xindex]*src->get_cdata()[cindex*ncomp(fc1)+field_component][1]; - dst->get_cdata()[(cindex*3+0)*ncomp(fc1)+field_component][1] = - kk->kx[xindex]*src->get_cdata()[cindex*ncomp(fc1)+field_component][0]; - dst->get_cdata()[(cindex*3+1)*ncomp(fc1)+field_component][0] = - - kk->ky[yindex]*src->get_cdata()[cindex*ncomp(fc1)+field_component][1]; - dst->get_cdata()[(cindex*3+1)*ncomp(fc1)+field_component][1] = - kk->ky[yindex]*src->get_cdata()[cindex*ncomp(fc1)+field_component][0]; - dst->get_cdata()[(cindex*3+2)*ncomp(fc1)+field_component][0] = - - kk->kz[zindex]*src->get_cdata()[cindex*ncomp(fc1)+field_component][1]; - dst->get_cdata()[(cindex*3+2)*ncomp(fc1)+field_component][1] = - kk->kz[zindex]*src->get_cdata()[cindex*ncomp(fc1)+field_component][0]; - } - }); + kk->CLOOP_K2( + [&](ptrdiff_t cindex, + ptrdiff_t xindex, + ptrdiff_t yindex, + ptrdiff_t zindex, + double k2){ + if (k2 < kk->kM2) switch(fc1) + { + case ONE: + dst->cval(cindex, 0, 0) = -kk->kx[xindex]*src->cval(cindex, 1); + dst->cval(cindex, 0, 1) = kk->kx[xindex]*src->cval(cindex, 0); + dst->cval(cindex, 1, 0) = -kk->ky[yindex]*src->cval(cindex, 1); + dst->cval(cindex, 1, 1) = kk->ky[yindex]*src->cval(cindex, 0); + dst->cval(cindex, 2, 0) = -kk->kz[zindex]*src->cval(cindex, 1); + dst->cval(cindex, 2, 1) = kk->kz[zindex]*src->cval(cindex, 0); + break; + case THREE: + for (unsigned int field_component = 0; + field_component < ncomp(fc1); + field_component++) + { + dst->cval(cindex, 0, field_component, 0) = -kk->kx[xindex]*src->cval(cindex, field_component, 1); + dst->cval(cindex, 0, field_component, 1) = kk->kx[xindex]*src->cval(cindex, field_component, 0); + dst->cval(cindex, 1, field_component, 0) = -kk->ky[yindex]*src->cval(cindex, field_component, 1); + dst->cval(cindex, 1, field_component, 1) = kk->ky[yindex]*src->cval(cindex, field_component, 0); + dst->cval(cindex, 2, field_component, 0) = -kk->kz[zindex]*src->cval(cindex, field_component, 1); + dst->cval(cindex, 2, field_component, 1) = kk->kz[zindex]*src->cval(cindex, field_component, 0); + } + //dst->get_cdata()[(cindex*3+0)*ncomp(fc1)+field_component][0] = + // - kk->kx[xindex]*src->get_cdata()[cindex*ncomp(fc1)+field_component][1]; + //dst->get_cdata()[(cindex*3+0)*ncomp(fc1)+field_component][1] = + // kk->kx[xindex]*src->get_cdata()[cindex*ncomp(fc1)+field_component][0]; + //dst->get_cdata()[(cindex*3+1)*ncomp(fc1)+field_component][0] = + // - kk->ky[yindex]*src->get_cdata()[cindex*ncomp(fc1)+field_component][1]; + //dst->get_cdata()[(cindex*3+1)*ncomp(fc1)+field_component][1] = + // kk->ky[yindex]*src->get_cdata()[cindex*ncomp(fc1)+field_component][0]; + //dst->get_cdata()[(cindex*3+2)*ncomp(fc1)+field_component][0] = + // - kk->kz[zindex]*src->get_cdata()[cindex*ncomp(fc1)+field_component][1]; + //dst->get_cdata()[(cindex*3+2)*ncomp(fc1)+field_component][1] = + // kk->kz[zindex]*src->get_cdata()[cindex*ncomp(fc1)+field_component][0]; + } + }); } template class field<float, FFTW, ONE>; @@ -982,49 +990,6 @@ template class field<double, FFTW, ONE>; template class field<double, FFTW, THREE>; template class field<double, FFTW, THREExTHREE>; -template class kspace<FFTW, TWO_THIRDS>; -template class kspace<FFTW, SMOOTH>; - -template kspace<FFTW, TWO_THIRDS>::kspace<>( - const field_layout<ONE> *, - const double, const double, const double); -template kspace<FFTW, TWO_THIRDS>::kspace<>( - const field_layout<THREE> *, - const double, const double, const double); -template kspace<FFTW, TWO_THIRDS>::kspace<>( - const field_layout<THREExTHREE> *, - const double, const double, const double); - -template kspace<FFTW, SMOOTH>::kspace<>( - const field_layout<ONE> *, - const double, const double, const double); -template kspace<FFTW, SMOOTH>::kspace<>( - const field_layout<THREE> *, - const double, const double, const double); -template kspace<FFTW, SMOOTH>::kspace<>( - const field_layout<THREExTHREE> *, - const double, const double, const double); - -template void kspace<FFTW, SMOOTH>::low_pass<float, ONE>( - float *__restrict__ a, - const double kmax); -template void kspace<FFTW, SMOOTH>::low_pass<float, THREE>( - float *__restrict__ a, - const double kmax); -template void kspace<FFTW, SMOOTH>::low_pass<float, THREExTHREE>( - float *__restrict__ a, - const double kmax); - -template void kspace<FFTW, SMOOTH>::low_pass<double, ONE>( - double *__restrict__ a, - const double kmax); -template void kspace<FFTW, SMOOTH>::low_pass<double, THREE>( - double *__restrict__ a, - const double kmax); -template void kspace<FFTW, SMOOTH>::low_pass<double, THREExTHREE>( - double *__restrict__ a, - const double kmax); - template void field<float, FFTW, ONE>::compute_stats<TWO_THIRDS>( kspace<FFTW, TWO_THIRDS> *, const hid_t, const std::string, const hsize_t, const double); diff --git a/bfps/cpp/field.hpp b/bfps/cpp/field.hpp index a9790d9a6cdff4a95db134222484ed4981bb8a9b..3cb1424e1e6aea910f36e16bdcf3324ce05e518b 100644 --- a/bfps/cpp/field.hpp +++ b/bfps/cpp/field.hpp @@ -24,110 +24,16 @@ -#include <mpi.h> #include <hdf5.h> -#include <fftw3-mpi.h> #include <unordered_map> #include <vector> #include <string> -#include "base.hpp" +#include "kspace.hpp" -#ifndef FIELD +#ifndef FIELD_HPP -#define FIELD +#define FIELD_HPP -enum field_backend {FFTW}; -enum field_components {ONE, THREE, THREExTHREE}; -enum kspace_dealias_type {TWO_THIRDS, SMOOTH}; - -constexpr unsigned int ncomp( - field_components fc) - /* return actual number of field components for each enum value */ -{ - return ((fc == THREE) ? 3 : ( - (fc == THREExTHREE) ? 9 : 1)); -} - -constexpr unsigned int ndim( - field_components fc) - /* return actual number of field dimensions for each enum value */ -{ - return ((fc == THREE) ? 4 : ( - (fc == THREExTHREE) ? 5 : 3)); -} - -template <field_components fc> -class field_layout -{ - public: - /* description */ - hsize_t sizes[ndim(fc)]; - hsize_t subsizes[ndim(fc)]; - hsize_t starts[ndim(fc)]; - hsize_t local_size, full_size; - - int myrank, nprocs; - MPI_Comm comm; - - std::vector<std::vector<int>> rank; - std::vector<std::vector<int>> all_start; - std::vector<std::vector<int>> all_size; - - /* methods */ - field_layout( - const hsize_t *SIZES, - const hsize_t *SUBSIZES, - const hsize_t *STARTS, - const MPI_Comm COMM_TO_USE); - ~field_layout(){} -}; - -template <field_backend be, - kspace_dealias_type dt> -class kspace -{ - public: - /* relevant field layout */ - field_layout<ONE> *layout; - - /* physical parameters */ - double dkx, dky, dkz, dk, dk2; - - /* mode and dealiasing information */ - double kMx, kMy, kMz, kM, kM2; - double kMspec, kMspec2; - std::vector<double> kx, ky, kz; - std::unordered_map<int, double> dealias_filter; - std::vector<double> kshell; - std::vector<int64_t> nshell; - int nshells; - - /* methods */ - template <field_components fc> - kspace( - const field_layout<fc> *source_layout, - const double DKX = 1.0, - const double DKY = 1.0, - const double DKZ = 1.0); - ~kspace(); - - template <typename rnumber, - field_components fc> - void low_pass(rnumber *__restrict__ a, const double kmax); - - template <typename rnumber, - field_components fc> - void dealias(rnumber *__restrict__ a); - - template <typename rnumber, - field_components fc> - void cospectrum( - const rnumber(* __restrict__ a)[2], - const rnumber(* __restrict__ b)[2], - const hid_t group, - const std::string dset_name, - const hsize_t toffset); -}; template <typename rnumber, field_backend be, @@ -136,10 +42,9 @@ class field { private: /* data arrays */ - rnumber *data; - typedef rnumber cnumber[2]; - hsize_t npoints; + rnumber *__restrict__ data; public: + hsize_t npoints; bool real_space_representation; /* basic MPI information */ int myrank, nprocs; @@ -153,8 +58,8 @@ class field field_layout<fc> *clayout, *rlayout, *rmemlayout; /* FFT plans */ - void *c2r_plan; - void *r2c_plan; + typename fftw_interface<rnumber>::plan c2r_plan; + typename fftw_interface<rnumber>::plan r2c_plan; unsigned fftw_plan_rigor; /* HDF5 data types for arrays */ @@ -171,14 +76,22 @@ class field int io( const std::string fname, - const std::string dset_name, + const std::string field_name, + const int iteration, + const bool read = true); + int io_database( + const std::string fname, + const std::string field_name, const int toffset, const bool read = true); + /* essential FFT stuff */ void dft(); void ift(); void normalize(); + void symmetrize(); + /* stats */ void compute_rspace_xincrement_stats( const int xcells, const hid_t group, @@ -191,16 +104,57 @@ class field const std::string dset_name, const hsize_t toffset, const std::vector<double> max_estimate); - inline rnumber *get_rdata() + + /* acess data */ + inline rnumber *__restrict__ get_rdata() { return this->data; } - inline cnumber *get_cdata() + + inline typename fftw_interface<rnumber>::complex *__restrict__ get_cdata() + { + return (typename fftw_interface<rnumber>::complex*__restrict__)this->data; + } + + inline rnumber &rval(ptrdiff_t rindex, int component = 0) + { + assert(fc == ONE || fc == THREE); + assert(component >= 0 && component <ncomp(fc)); + return *(this->data + rindex*ncomp(fc) + component); + } + + inline rnumber &rval(ptrdiff_t rindex, int comp1, int comp0) + { + assert(fc == THREExTHREE); + assert(comp1 >= 0 && comp1 < 3); + assert(comp0 >= 0 && comp0 < 3); + return *(this->data + ((rindex*3 + comp1)*3 + comp0)); + } + + inline rnumber &cval(ptrdiff_t cindex, int imag) + { + assert(fc == ONE); + assert(imag == 0 || imag == 1); + return *(this->data + cindex*2 + imag); + } + + inline rnumber &cval(ptrdiff_t cindex, int component, int imag) { - return (cnumber*)this->data; + assert(fc == THREE); + assert(imag == 0 || imag == 1); + return *(this->data + (cindex*ncomp(fc) + component)*2 + imag); } - inline field<rnumber, be, fc>& operator=(const cnumber *__restrict__ source) + inline rnumber &cval(ptrdiff_t cindex, int comp1, int comp0, int imag) + { + assert(fc == THREExTHREE); + assert(comp1 >= 0 && comp1 < 3); + assert(comp0 >= 0 && comp0 < 3); + assert(imag == 0 || imag == 1); + return *(this->data + ((cindex*3 + comp1)*3+comp0)*2 + imag); + } + + inline field<rnumber, be, fc>& operator=(const typename fftw_interface<rnumber>::complex *__restrict__ source) { std::copy((rnumber*)source, (rnumber*)(source + this->clayout->local_size), @@ -217,6 +171,15 @@ class field this->real_space_representation = true; return *this; } + + inline field<rnumber, be, fc>& operator=(const rnumber value) + { + std::fill_n(this->data, + this->rmemlayout->local_size, + value); + return *this; + } + template <kspace_dealias_type dt> void compute_stats( kspace<be, dt> *kk, @@ -224,6 +187,45 @@ class field const std::string dset_name, const hsize_t toffset, const double max_estimate); + inline void impose_zero_mode() + { + if (this->clayout->myrank == this->clayout->rank[0][0] && + this->real_space_representation == false) + { + std::fill_n(this->data, 2*ncomp(fc), 0.0); + } + } + template <class func_type> + void RLOOP(func_type expression) + { + switch(be) + { + case FFTW: + #pragma omp parallel for schedule(static) + for (hsize_t zindex = 0; zindex < this->rlayout->subsizes[0]; zindex++) + for (hsize_t yindex = 0; yindex < this->rlayout->subsizes[1]; yindex++) + { + ptrdiff_t rindex = ( + zindex * this->rlayout->subsizes[1] + yindex)*( + this->rmemlayout->subsizes[2]); + for (hsize_t xindex = 0; xindex < this->rlayout->subsizes[2]; xindex++) + { + expression(rindex, xindex, yindex, zindex); + rindex++; + } + } + break; + } + } + ptrdiff_t get_cindex( + ptrdiff_t xindex, + ptrdiff_t yindex, + ptrdiff_t zindex) + { + return ((yindex*this->clayout->subsizes[1] + + zindex)*this->clayout->subsizes[2] + + xindex); + } }; template <typename rnumber, @@ -236,79 +238,5 @@ void compute_gradient( field<rnumber, be, fc1> *source, field<rnumber, be, fc2> *destination); - -/* real space loop */ -template <field_backend be, class ObjectType, class FuncType> -void FIELD_RLOOP(ObjectType* obj, FuncType expression) -{ - switch (be) - { - case FFTW: - #pragma omp parallel for schedule(static) - for (hsize_t zindex = 0; zindex < obj->rlayout->subsizes[0]; zindex++) - for (hsize_t yindex = 0; yindex < obj->rlayout->subsizes[1]; yindex++) - { - ptrdiff_t rindex = ( - zindex * obj->rlayout->subsizes[1] + yindex)*( - obj->rmemlayout->subsizes[2]); - for (hsize_t xindex = 0; xindex < obj->rlayout->subsizes[2]; xindex++) - { - expression(zindex, yindex, rindex, xindex); - rindex++; - } - } - break; - } -} - -template <class ObjectType, class FuncType> -void KSPACE_CLOOP_K2(ObjectType* obj, FuncType expression) - -{ - #pragma omp parallel for schedule(static) - for (hsize_t yindex = 0; yindex < obj->layout->subsizes[0]; yindex++){ - ptrdiff_t cindex = yindex*obj->layout->subsizes[1]*obj->layout->subsizes[2]; - for (hsize_t zindex = 0; zindex < obj->layout->subsizes[1]; zindex++) - for (hsize_t xindex = 0; xindex < obj->layout->subsizes[2]; xindex++) - { - double k2 = (obj->kx[xindex]*obj->kx[xindex] + - obj->ky[yindex]*obj->ky[yindex] + - obj->kz[zindex]*obj->kz[zindex]); - expression(cindex, yindex, zindex, xindex, k2); - cindex++; - } - } -} - -template <class ObjectType, class FuncType> -void KSPACE_CLOOP_K2_NXMODES(ObjectType* obj, FuncType expression) - -{ - #pragma omp parallel for schedule(static) - for (hsize_t yindex = 0; yindex < obj->layout->subsizes[0]; yindex++) - { - ptrdiff_t cindex = yindex*( obj->layout->subsizes[1] * obj->layout->subsizes[2] ); - for (hsize_t zindex = 0; zindex < obj->layout->subsizes[1]; zindex++) - { - int nxmodes = 1; - hsize_t xindex = 0; - double k2 = (obj->kx[xindex]*obj->kx[xindex] + - obj->ky[yindex]*obj->ky[yindex] + - obj->kz[zindex]*obj->kz[zindex]); - expression(cindex, yindex, zindex, nxmodes, xindex, k2); - cindex++; - nxmodes = 2; - for (xindex = 1; xindex < obj->layout->subsizes[2]; xindex++) - { - double k2 = (obj->kx[xindex]*obj->kx[xindex] + - obj->ky[yindex]*obj->ky[yindex] + - obj->kz[zindex]*obj->kz[zindex]); - expression(cindex, yindex, zindex, nxmodes, xindex, k2); - cindex++; - } - } - } -} - -#endif//FIELD +#endif//FIELD_HPP diff --git a/bfps/cpp/field_layout.cpp b/bfps/cpp/field_layout.cpp new file mode 100644 index 0000000000000000000000000000000000000000..908904991d5d95b0c89ba679b402d8d5727b8c85 --- /dev/null +++ b/bfps/cpp/field_layout.cpp @@ -0,0 +1,111 @@ +/********************************************************************** +* * +* Copyright 2015 Max Planck Institute * +* for Dynamics and Self-Organization * +* * +* This file is part of bfps. * +* * +* bfps is free software: you can redistribute it and/or modify * +* it under the terms of the GNU General Public License as published * +* by the Free Software Foundation, either version 3 of the License, * +* or (at your option) any later version. * +* * +* bfps is distributed in the hope that it will be useful, * +* but WITHOUT ANY WARRANTY; without even the implied warranty of * +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * +* GNU General Public License for more details. * +* * +* You should have received a copy of the GNU General Public License * +* along with bfps. If not, see <http://www.gnu.org/licenses/> * +* * +* Contact: Cristian.Lalescu@ds.mpg.de * +* * +**********************************************************************/ + + +#include <cassert> +#include "field_layout.hpp" +#include "scope_timer.hpp" + +template <field_components fc> +field_layout<fc>::field_layout( + const hsize_t *SIZES, + const hsize_t *SUBSIZES, + const hsize_t *STARTS, + const MPI_Comm COMM_TO_USE) +{ + TIMEZONE("field_layout::field_layout"); + this->comm = COMM_TO_USE; + MPI_Comm_rank(this->comm, &this->myrank); + MPI_Comm_size(this->comm, &this->nprocs); + + std::copy(SIZES, SIZES + 3, this->sizes); + std::copy(SUBSIZES, SUBSIZES + 3, this->subsizes); + std::copy(STARTS, STARTS + 3, this->starts); + if (fc == THREE || fc == THREExTHREE) + { + this->sizes[3] = 3; + this->subsizes[3] = 3; + this->starts[3] = 0; + } + if (fc == THREExTHREE) + { + this->sizes[4] = 3; + this->subsizes[4] = 3; + this->starts[4] = 0; + } + this->local_size = 1; + this->full_size = 1; + for (unsigned int i=0; i<ndim(fc); i++) + { + this->local_size *= this->subsizes[i]; + this->full_size *= this->sizes[i]; + } + + /*field will at most be distributed in 2D*/ + this->rank.resize(2); + this->all_start.resize(2); + this->all_size.resize(2); + for (int i=0; i<2; i++) + { + this->rank[i].resize(this->sizes[i]); + std::vector<int> local_rank; + local_rank.resize(this->sizes[i], 0); + for (unsigned int ii=this->starts[i]; ii<this->starts[i]+this->subsizes[i]; ii++) + local_rank[ii] = this->myrank; + MPI_Allreduce( + &local_rank.front(), + &this->rank[i].front(), + this->sizes[i], + MPI_INT, + MPI_SUM, + this->comm); + this->all_start[i].resize(this->nprocs); + std::vector<int> local_start; + local_start.resize(this->nprocs, 0); + local_start[this->myrank] = this->starts[i]; + MPI_Allreduce( + &local_start.front(), + &this->all_start[i].front(), + this->nprocs, + MPI_INT, + MPI_SUM, + this->comm); + this->all_size[i].resize(this->nprocs); + std::vector<int> local_subsize; + local_subsize.resize(this->nprocs, 0); + local_subsize[this->myrank] = this->subsizes[i]; + MPI_Allreduce( + &local_subsize.front(), + &this->all_size[i].front(), + this->nprocs, + MPI_INT, + MPI_SUM, + this->comm); + } +} + +template class field_layout<ONE>; +template class field_layout<THREE>; +template class field_layout<THREExTHREE>; + diff --git a/bfps/cpp/field_layout.hpp b/bfps/cpp/field_layout.hpp new file mode 100644 index 0000000000000000000000000000000000000000..770119c2dcb05017d495b62559f050646872dc84 --- /dev/null +++ b/bfps/cpp/field_layout.hpp @@ -0,0 +1,79 @@ +/********************************************************************** +* * +* Copyright 2015 Max Planck Institute * +* for Dynamics and Self-Organization * +* * +* This file is part of bfps. * +* * +* bfps is free software: you can redistribute it and/or modify * +* it under the terms of the GNU General Public License as published * +* by the Free Software Foundation, either version 3 of the License, * +* or (at your option) any later version. * +* * +* bfps is distributed in the hope that it will be useful, * +* but WITHOUT ANY WARRANTY; without even the implied warranty of * +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * +* GNU General Public License for more details. * +* * +* You should have received a copy of the GNU General Public License * +* along with bfps. If not, see <http://www.gnu.org/licenses/> * +* * +* Contact: Cristian.Lalescu@ds.mpg.de * +* * +**********************************************************************/ + + + +#include <vector> +#include "base.hpp" + +#ifndef FIELD_LAYOUT_HPP + +#define FIELD_LAYOUT_HPP + +enum field_components {ONE, THREE, THREExTHREE}; + +constexpr unsigned int ncomp( + field_components fc) + /* return actual number of field components for each enum value */ +{ + return ((fc == THREE) ? 3 : ( + (fc == THREExTHREE) ? 9 : 1)); +} + +constexpr unsigned int ndim( + field_components fc) + /* return actual number of field dimensions for each enum value */ +{ + return ((fc == THREE) ? 4 : ( + (fc == THREExTHREE) ? 5 : 3)); +} + +template <field_components fc> +class field_layout +{ + public: + /* description */ + hsize_t sizes[ndim(fc)]; + hsize_t subsizes[ndim(fc)]; + hsize_t starts[ndim(fc)]; + hsize_t local_size, full_size; + + int myrank, nprocs; + MPI_Comm comm; + + std::vector<std::vector<int>> rank; + std::vector<std::vector<int>> all_start; + std::vector<std::vector<int>> all_size; + + /* methods */ + field_layout( + const hsize_t *SIZES, + const hsize_t *SUBSIZES, + const hsize_t *STARTS, + const MPI_Comm COMM_TO_USE); + ~field_layout(){} +}; + +#endif//FIELD_LAYOUT_HPP + diff --git a/bfps/cpp/interpolator_base.cpp b/bfps/cpp/interpolator_base.cpp index 58bf57cf13382f0704da4537dae9d21bb4a841da..db81dcb329070e14897e432e82a2fee95810e169 100644 --- a/bfps/cpp/interpolator_base.cpp +++ b/bfps/cpp/interpolator_base.cpp @@ -43,6 +43,20 @@ interpolator_base<rnumber, interp_neighbours>::interpolator_base( this->dz = 4*acos(0) / (fs->dkz*this->descriptor->sizes[0]); } +template <class rnumber, int interp_neighbours> +interpolator_base<rnumber, interp_neighbours>::interpolator_base( + vorticity_equation<rnumber, FFTW> *fs, + base_polynomial_values BETA_POLYS) +{ +// this->descriptor = fs->rd; +// this->compute_beta = BETA_POLYS; +// +// // compute dx, dy, dz; +// this->dx = 4*acos(0) / (fs->kk->dkx*this->descriptor->sizes[2]); +// this->dy = 4*acos(0) / (fs->kk->dky*this->descriptor->sizes[1]); +// this->dz = 4*acos(0) / (fs->kk->dkz*this->descriptor->sizes[0]); +} + template <class rnumber, int interp_neighbours> void interpolator_base<rnumber, interp_neighbours>::get_grid_coordinates( const int nparticles, diff --git a/bfps/cpp/interpolator_base.hpp b/bfps/cpp/interpolator_base.hpp index 7dda7fb08319bf2a044bcc220e204b748d6336d6..f4b793342d9b5b38e39c717ad30ee88e106958aa 100644 --- a/bfps/cpp/interpolator_base.hpp +++ b/bfps/cpp/interpolator_base.hpp @@ -25,6 +25,7 @@ #include "fluid_solver_base.hpp" +#include "vorticity_equation.hpp" #include "spline_n1.hpp" #include "spline_n2.hpp" #include "spline_n3.hpp" @@ -58,6 +59,10 @@ class interpolator_base interpolator_base( fluid_solver_base<rnumber> *FSOLVER, base_polynomial_values BETA_POLYS); + + interpolator_base( + vorticity_equation<rnumber, FFTW> *FSOLVER, + base_polynomial_values BETA_POLYS); virtual ~interpolator_base(){} /* may not destroy input */ diff --git a/bfps/cpp/kspace.cpp b/bfps/cpp/kspace.cpp new file mode 100644 index 0000000000000000000000000000000000000000..f98024ea50dfd709ab63f490727c274c7fe3d600 --- /dev/null +++ b/bfps/cpp/kspace.cpp @@ -0,0 +1,439 @@ +/********************************************************************** +* * +* Copyright 2015 Max Planck Institute * +* for Dynamics and Self-Organization * +* * +* This file is part of bfps. * +* * +* bfps is free software: you can redistribute it and/or modify * +* it under the terms of the GNU General Public License as published * +* by the Free Software Foundation, either version 3 of the License, * +* or (at your option) any later version. * +* * +* bfps is distributed in the hope that it will be useful, * +* but WITHOUT ANY WARRANTY; without even the implied warranty of * +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * +* GNU General Public License for more details. * +* * +* You should have received a copy of the GNU General Public License * +* along with bfps. If not, see <http://www.gnu.org/licenses/> * +* * +* Contact: Cristian.Lalescu@ds.mpg.de * +* * +**********************************************************************/ + + +#include <cmath> +#include <cstdlib> +#include <algorithm> +#include <cassert> +#include "kspace.hpp" +#include "scope_timer.hpp" + + + +template <field_backend be, + kspace_dealias_type dt> +template <field_components fc> +kspace<be, dt>::kspace( + const field_layout<fc> *source_layout, + const double DKX, + const double DKY, + const double DKZ) +{ + TIMEZONE("field::kspace"); + /* get layout */ + this->layout = new field_layout<ONE>( + source_layout->sizes, + source_layout->subsizes, + source_layout->starts, + source_layout->comm); + + /* store dk values */ + this->dkx = DKX; + this->dky = DKY; + this->dkz = DKZ; + + /* compute kx, ky, kz and compute kM values */ + switch(be) + { + case FFTW: + this->kx.resize(this->layout->sizes[2]); + this->ky.resize(this->layout->subsizes[0]); + this->kz.resize(this->layout->sizes[1]); + int i, ii; + for (i = 0; i<int(this->layout->sizes[2]); i++) + this->kx[i] = i*this->dkx; + for (i = 0; i<int(this->layout->subsizes[0]); i++) + { + ii = i + this->layout->starts[0]; + if (ii <= int(this->layout->sizes[1]/2)) + this->ky[i] = this->dky*ii; + else + this->ky[i] = this->dky*(ii - int(this->layout->sizes[1])); + } + for (i = 0; i<int(this->layout->sizes[1]); i++) + { + if (i <= int(this->layout->sizes[0]/2)) + this->kz[i] = this->dkz*i; + else + this->kz[i] = this->dkz*(i - int(this->layout->sizes[0])); + } + switch(dt) + { + case TWO_THIRDS: + this->kMx = this->dkx*(int(2*(int(this->layout->sizes[2])-1)/3)-1); + this->kMy = this->dky*(int(this->layout->sizes[0] / 3)-1); + this->kMz = this->dkz*(int(this->layout->sizes[1] / 3)-1); + break; + case SMOOTH: + this->kMx = this->dkx*(int(this->layout->sizes[2])-2); + this->kMy = this->dky*(int(this->layout->sizes[0] / 2)-1); + this->kMz = this->dkz*(int(this->layout->sizes[1] / 2)-1); + break; + } + break; + } + + /* get global kM and dk */ + this->kM = this->kMx; + if (this->kM < this->kMy) this->kM = this->kMy; + if (this->kM < this->kMz) this->kM = this->kMz; + this->kM2 = this->kM * this->kM; + this->dk = this->dkx; + if (this->dk > this->dky) this->dk = this->dky; + if (this->dk > this->dkz) this->dk = this->dkz; + this->dk2 = this->dk*this->dk; + + /* spectra stuff */ + this->nshells = int(this->kM / this->dk) + 2; + this->kshell.resize(this->nshells, 0); + this->nshell.resize(this->nshells, 0); + std::vector<double> kshell_local; + kshell_local.resize(this->nshells, 0); + std::vector<int64_t> nshell_local; + nshell_local.resize(this->nshells, 0); + this->CLOOP_K2_NXMODES( + [&](ptrdiff_t cindex, + ptrdiff_t xindex, + ptrdiff_t yindex, + ptrdiff_t zindex, + double k2, + int nxmodes){ + if (k2 < this->kM2) + { + double knorm = sqrt(k2); + nshell_local[int(knorm/this->dk)] += nxmodes; + kshell_local[int(knorm/this->dk)] += nxmodes*knorm; + } + if (dt == SMOOTH) + this->dealias_filter[int(round(k2 / this->dk2))] = \ + exp(-36.0 * pow(k2/this->kM2, 18.)); + }); + MPI_Allreduce( + &nshell_local.front(), + &this->nshell.front(), + this->nshells, + MPI_INT64_T, MPI_SUM, this->layout->comm); + MPI_Allreduce( + &kshell_local.front(), + &this->kshell.front(), + this->nshells, + MPI_DOUBLE, MPI_SUM, this->layout->comm); + for (int n=0; n<this->nshells; n++) + this->kshell[n] /= this->nshell[n]; +} + +template <field_backend be, + kspace_dealias_type dt> +kspace<be, dt>::~kspace() +{ + delete this->layout; +} + +template <field_backend be, + kspace_dealias_type dt> +template <typename rnumber, + field_components fc> +void kspace<be, dt>::low_pass(typename fftw_interface<rnumber>::complex *__restrict__ a, const double kmax) +{ + const double km2 = kmax*kmax; + this->CLOOP_K2( + [&](ptrdiff_t cindex, + ptrdiff_t xindex, + ptrdiff_t yindex, + ptrdiff_t zindex, + double k2){ + if (k2 >= km2) + std::fill_n((rnumber*)(a + ncomp(fc)*cindex), 2*ncomp(fc), 0); + }); +} + +template <field_backend be, + kspace_dealias_type dt> +template <typename rnumber, + field_components fc> +void kspace<be, dt>::dealias(typename fftw_interface<rnumber>::complex *__restrict__ a) +{ + switch(dt) + { + case TWO_THIRDS: + this->low_pass<rnumber, fc>(a, this->kM); + break; + case SMOOTH: + this->CLOOP_K2( + [&](ptrdiff_t cindex, + ptrdiff_t xindex, + ptrdiff_t yindex, + ptrdiff_t zindex, + double k2){ + double tval = this->dealias_filter[int(round(k2 / this->dk2))]; + for (int tcounter=0; tcounter<2*ncomp(fc); tcounter++) + ((rnumber*)a)[2*ncomp(fc)*cindex + tcounter] *= tval; + }); + break; + } +} + +template <field_backend be, + kspace_dealias_type dt> +template <typename rnumber> +void kspace<be, dt>::force_divfree(typename fftw_interface<rnumber>::complex *__restrict__ a) +{ + TIMEZONE("kspace::force_divfree"); + typename fftw_interface<rnumber>::complex tval; + this->CLOOP_K2( + [&](ptrdiff_t cindex, + ptrdiff_t xindex, + ptrdiff_t yindex, + ptrdiff_t zindex, + double k2){ + if (k2 > 0) + { + tval[0] = (this->kx[xindex]*((*(a + cindex*3 ))[0]) + + this->ky[yindex]*((*(a + cindex*3+1))[0]) + + this->kz[zindex]*((*(a + cindex*3+2))[0]) ) / k2; + tval[1] = (this->kx[xindex]*((*(a + cindex*3 ))[1]) + + this->ky[yindex]*((*(a + cindex*3+1))[1]) + + this->kz[zindex]*((*(a + cindex*3+2))[1]) ) / k2; + for (int imag_part=0; imag_part<2; imag_part++) + { + a[cindex*3 ][imag_part] -= tval[imag_part]*this->kx[xindex]; + a[cindex*3+1][imag_part] -= tval[imag_part]*this->ky[yindex]; + a[cindex*3+2][imag_part] -= tval[imag_part]*this->kz[zindex]; + } + } + } + ); + if (this->layout->myrank == this->layout->rank[0][0]) + std::fill_n((rnumber*)(a), 6, 0.0); +} + +template <field_backend be, + kspace_dealias_type dt> +template <typename rnumber, + field_components fc> +void kspace<be, dt>::cospectrum( + const rnumber(* __restrict a)[2], + const rnumber(* __restrict b)[2], + const hid_t group, + const std::string dset_name, + const hsize_t toffset) +{ + TIMEZONE("field::cospectrum"); + std::vector<double> spec, spec_local; + spec.resize(this->nshells*ncomp(fc)*ncomp(fc), 0); + spec_local.resize(this->nshells*ncomp(fc)*ncomp(fc), 0); + this->CLOOP_K2_NXMODES( + [&](ptrdiff_t cindex, + ptrdiff_t xindex, + ptrdiff_t yindex, + ptrdiff_t zindex, + double k2, + int nxmodes){ + if (k2 <= this->kM2) + { + int tmp_int = int(sqrt(k2) / this->dk)*ncomp(fc)*ncomp(fc); + for (hsize_t i=0; i<ncomp(fc); i++) + for (hsize_t j=0; j<ncomp(fc); j++) + spec_local[tmp_int + i*ncomp(fc)+j] += nxmodes * ( + (a[ncomp(fc)*cindex + i][0] * b[ncomp(fc)*cindex + j][0]) + + (a[ncomp(fc)*cindex + i][1] * b[ncomp(fc)*cindex + j][1])); + } + }); + MPI_Allreduce( + &spec_local.front(), + &spec.front(), + spec.size(), + MPI_DOUBLE, MPI_SUM, this->layout->comm); + if (this->layout->myrank == 0) + { + hid_t dset, wspace, mspace; + hsize_t count[(ndim(fc)-2)*2], offset[(ndim(fc)-2)*2], dims[(ndim(fc)-2)*2]; + dset = H5Dopen(group, ("spectra/" + dset_name).c_str(), H5P_DEFAULT); + wspace = H5Dget_space(dset); + H5Sget_simple_extent_dims(wspace, dims, NULL); + switch (fc) + { + case THREExTHREE: + offset[4] = 0; + offset[5] = 0; + count[4] = ncomp(fc); + count[5] = ncomp(fc); + case THREE: + offset[2] = 0; + offset[3] = 0; + count[2] = ncomp(fc); + count[3] = ncomp(fc); + default: + offset[0] = toffset; + offset[1] = 0; + count[0] = 1; + count[1] = this->nshells; + } + mspace = H5Screate_simple((ndim(fc)-2)*2, count, NULL); + H5Sselect_hyperslab(wspace, H5S_SELECT_SET, offset, NULL, count, NULL); + H5Dwrite(dset, H5T_NATIVE_DOUBLE, mspace, wspace, H5P_DEFAULT, &spec.front()); + H5Sclose(wspace); + H5Sclose(mspace); + H5Dclose(dset); + } +} + + +template class kspace<FFTW, TWO_THIRDS>; +template class kspace<FFTW, SMOOTH>; + +template kspace<FFTW, TWO_THIRDS>::kspace<>( + const field_layout<ONE> *, + const double, const double, const double); +template kspace<FFTW, TWO_THIRDS>::kspace<>( + const field_layout<THREE> *, + const double, const double, const double); +template kspace<FFTW, TWO_THIRDS>::kspace<>( + const field_layout<THREExTHREE> *, + const double, const double, const double); + +template kspace<FFTW, SMOOTH>::kspace<>( + const field_layout<ONE> *, + const double, const double, const double); +template kspace<FFTW, SMOOTH>::kspace<>( + const field_layout<THREE> *, + const double, const double, const double); +template kspace<FFTW, SMOOTH>::kspace<>( + const field_layout<THREExTHREE> *, + const double, const double, const double); + +template void kspace<FFTW, SMOOTH>::low_pass<float, ONE>( + typename fftw_interface<float>::complex *__restrict__ a, + const double kmax); +template void kspace<FFTW, SMOOTH>::low_pass<float, THREE>( + typename fftw_interface<float>::complex *__restrict__ a, + const double kmax); +template void kspace<FFTW, SMOOTH>::low_pass<float, THREExTHREE>( + typename fftw_interface<float>::complex *__restrict__ a, + const double kmax); + +template void kspace<FFTW, SMOOTH>::low_pass<double, ONE>( + typename fftw_interface<double>::complex *__restrict__ a, + const double kmax); +template void kspace<FFTW, SMOOTH>::low_pass<double, THREE>( + typename fftw_interface<double>::complex *__restrict__ a, + const double kmax); +template void kspace<FFTW, SMOOTH>::low_pass<double, THREExTHREE>( + typename fftw_interface<double>::complex *__restrict__ a, + const double kmax); + +template void kspace<FFTW, SMOOTH>::dealias<float, ONE>( + typename fftw_interface<float>::complex *__restrict__ a); +template void kspace<FFTW, SMOOTH>::dealias<float, THREE>( + typename fftw_interface<float>::complex *__restrict__ a); +template void kspace<FFTW, SMOOTH>::dealias<float, THREExTHREE>( + typename fftw_interface<float>::complex *__restrict__ a); + +template void kspace<FFTW, SMOOTH>::dealias<double, ONE>( + typename fftw_interface<double>::complex *__restrict__ a); +template void kspace<FFTW, SMOOTH>::dealias<double, THREE>( + typename fftw_interface<double>::complex *__restrict__ a); +template void kspace<FFTW, SMOOTH>::dealias<double, THREExTHREE>( + typename fftw_interface<double>::complex *__restrict__ a); + +template void kspace<FFTW, TWO_THIRDS>::cospectrum<float, ONE>( + const typename fftw_interface<float>::complex *__restrict__ a, + const typename fftw_interface<float>::complex *__restrict__ b, + const hid_t group, + const std::string dset_name, + const hsize_t toffset); +template void kspace<FFTW, TWO_THIRDS>::cospectrum<float, THREE>( + const typename fftw_interface<float>::complex *__restrict__ a, + const typename fftw_interface<float>::complex *__restrict__ b, + const hid_t group, + const std::string dset_name, + const hsize_t toffset); +template void kspace<FFTW, TWO_THIRDS>::cospectrum<float, THREExTHREE>( + const typename fftw_interface<float>::complex *__restrict__ a, + const typename fftw_interface<float>::complex *__restrict__ b, + const hid_t group, + const std::string dset_name, + const hsize_t toffset); +template void kspace<FFTW, TWO_THIRDS>::cospectrum<double, ONE>( + const typename fftw_interface<double>::complex *__restrict__ a, + const typename fftw_interface<double>::complex *__restrict__ b, + const hid_t group, + const std::string dset_name, + const hsize_t toffset); +template void kspace<FFTW, TWO_THIRDS>::cospectrum<double, THREE>( + const typename fftw_interface<double>::complex *__restrict__ a, + const typename fftw_interface<double>::complex *__restrict__ b, + const hid_t group, + const std::string dset_name, + const hsize_t toffset); +template void kspace<FFTW, TWO_THIRDS>::cospectrum<double, THREExTHREE>( + const typename fftw_interface<double>::complex *__restrict__ a, + const typename fftw_interface<double>::complex *__restrict__ b, + const hid_t group, + const std::string dset_name, + const hsize_t toffset); + +template void kspace<FFTW, SMOOTH>::cospectrum<float, ONE>( + const typename fftw_interface<float>::complex *__restrict__ a, + const typename fftw_interface<float>::complex *__restrict__ b, + const hid_t group, + const std::string dset_name, + const hsize_t toffset); +template void kspace<FFTW, SMOOTH>::cospectrum<float, THREE>( + const typename fftw_interface<float>::complex *__restrict__ a, + const typename fftw_interface<float>::complex *__restrict__ b, + const hid_t group, + const std::string dset_name, + const hsize_t toffset); +template void kspace<FFTW, SMOOTH>::cospectrum<float, THREExTHREE>( + const typename fftw_interface<float>::complex *__restrict__ a, + const typename fftw_interface<float>::complex *__restrict__ b, + const hid_t group, + const std::string dset_name, + const hsize_t toffset); +template void kspace<FFTW, SMOOTH>::cospectrum<double, ONE>( + const typename fftw_interface<double>::complex *__restrict__ a, + const typename fftw_interface<double>::complex *__restrict__ b, + const hid_t group, + const std::string dset_name, + const hsize_t toffset); +template void kspace<FFTW, SMOOTH>::cospectrum<double, THREE>( + const typename fftw_interface<double>::complex *__restrict__ a, + const typename fftw_interface<double>::complex *__restrict__ b, + const hid_t group, + const std::string dset_name, + const hsize_t toffset); +template void kspace<FFTW, SMOOTH>::cospectrum<double, THREExTHREE>( + const typename fftw_interface<double>::complex *__restrict__ a, + const typename fftw_interface<double>::complex *__restrict__ b, + const hid_t group, + const std::string dset_name, + const hsize_t toffset); + +template void kspace<FFTW, SMOOTH>::force_divfree<float>( + typename fftw_interface<float>::complex *__restrict__ a); +template void kspace<FFTW, SMOOTH>::force_divfree<double>( + typename fftw_interface<double>::complex *__restrict__ a); + diff --git a/bfps/cpp/kspace.hpp b/bfps/cpp/kspace.hpp new file mode 100644 index 0000000000000000000000000000000000000000..ea59d53aed80d3d97fd17d5b5ed72f710c190d4f --- /dev/null +++ b/bfps/cpp/kspace.hpp @@ -0,0 +1,145 @@ +/********************************************************************** +* * +* Copyright 2015 Max Planck Institute * +* for Dynamics and Self-Organization * +* * +* This file is part of bfps. * +* * +* bfps is free software: you can redistribute it and/or modify * +* it under the terms of the GNU General Public License as published * +* by the Free Software Foundation, either version 3 of the License, * +* or (at your option) any later version. * +* * +* bfps is distributed in the hope that it will be useful, * +* but WITHOUT ANY WARRANTY; without even the implied warranty of * +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * +* GNU General Public License for more details. * +* * +* You should have received a copy of the GNU General Public License * +* along with bfps. If not, see <http://www.gnu.org/licenses/> * +* * +* Contact: Cristian.Lalescu@ds.mpg.de * +* * +**********************************************************************/ + + + +#include <hdf5.h> +#include <unordered_map> +#include <vector> +#include <string> +#include "fftw_interface.hpp" +#include "field_layout.hpp" + +#ifndef KSPACE_HPP + +#define KSPACE_HPP + +enum field_backend {FFTW}; +enum kspace_dealias_type {TWO_THIRDS, SMOOTH}; + + +template <field_backend be, + kspace_dealias_type dt> +class kspace +{ + public: + /* relevant field layout */ + field_layout<ONE> *layout; + + /* physical parameters */ + double dkx, dky, dkz, dk, dk2; + + /* mode and dealiasing information */ + double kMx, kMy, kMz, kM, kM2; + std::vector<double> kx, ky, kz; + std::unordered_map<int, double> dealias_filter; + std::vector<double> kshell; + std::vector<int64_t> nshell; + int nshells; + + /* methods */ + template <field_components fc> + kspace( + const field_layout<fc> *source_layout, + const double DKX = 1.0, + const double DKY = 1.0, + const double DKZ = 1.0); + ~kspace(); + + template <typename rnumber, + field_components fc> + void low_pass( + typename fftw_interface<rnumber>::complex *__restrict__ a, + const double kmax); + + template <typename rnumber, + field_components fc> + void dealias(typename fftw_interface<rnumber>::complex *__restrict__ a); + + template <typename rnumber, + field_components fc> + void cospectrum( + const rnumber(* __restrict__ a)[2], + const rnumber(* __restrict__ b)[2], + const hid_t group, + const std::string dset_name, + const hsize_t toffset); + template <class func_type> + void CLOOP(func_type expression) + { + ptrdiff_t cindex = 0; + for (hsize_t yindex = 0; yindex < this->layout->subsizes[0]; yindex++) + for (hsize_t zindex = 0; zindex < this->layout->subsizes[1]; zindex++) + for (hsize_t xindex = 0; xindex < this->layout->subsizes[2]; xindex++) + { + expression(cindex, xindex, yindex, zindex); + cindex++; + } + } + template <class func_type> + void CLOOP_K2(func_type expression) + { + double k2; + ptrdiff_t cindex = 0; + for (hsize_t yindex = 0; yindex < this->layout->subsizes[0]; yindex++) + for (hsize_t zindex = 0; zindex < this->layout->subsizes[1]; zindex++) + for (hsize_t xindex = 0; xindex < this->layout->subsizes[2]; xindex++) + { + k2 = (this->kx[xindex]*this->kx[xindex] + + this->ky[yindex]*this->ky[yindex] + + this->kz[zindex]*this->kz[zindex]); + expression(cindex, xindex, yindex, zindex, k2); + cindex++; + } + } + template <class func_type> + void CLOOP_K2_NXMODES(func_type expression) + { + ptrdiff_t cindex = 0; + for (hsize_t yindex = 0; yindex < this->layout->subsizes[0]; yindex++) + for (hsize_t zindex = 0; zindex < this->layout->subsizes[1]; zindex++) + { + hsize_t xindex = 0; + double k2 = ( + this->kx[xindex]*this->kx[xindex] + + this->ky[yindex]*this->ky[yindex] + + this->kz[zindex]*this->kz[zindex]); + expression(cindex, xindex, yindex, zindex, k2, 1); + cindex++; + for (xindex = 1; xindex < this->layout->subsizes[2]; xindex++) + { + k2 = (this->kx[xindex]*this->kx[xindex] + + this->ky[yindex]*this->ky[yindex] + + this->kz[zindex]*this->kz[zindex]); + expression(cindex, xindex, yindex, zindex, k2, 2); + cindex++; + } + } + } + template <typename rnumber> + void force_divfree(typename fftw_interface<rnumber>::complex *__restrict__ a); +}; + +#endif//KSPACE_HPP + diff --git a/bfps/cpp/particles.cpp b/bfps/cpp/particles.cpp index 847f065d49299b559162060876402101fe48d9d4..cdaf157cb912c3074faf84bfecf1d9b3752c78a7 100644 --- a/bfps/cpp/particles.cpp +++ b/bfps/cpp/particles.cpp @@ -43,17 +43,17 @@ template <particle_types particle_type, class rnumber, int interp_neighbours> particles<particle_type, rnumber, interp_neighbours>::particles( const char *NAME, const hid_t data_file_id, - interpolator_base<rnumber, interp_neighbours> *FIELD, + interpolator_base<rnumber, interp_neighbours> *VEL, const int TRAJ_SKIP, const int INTEGRATION_STEPS) : particles_io_base<particle_type>( NAME, TRAJ_SKIP, data_file_id, - FIELD->descriptor->comm) + VEL->descriptor->comm) { assert((INTEGRATION_STEPS <= 6) && (INTEGRATION_STEPS >= 1)); - this->vel = FIELD; + this->vel = VEL; this->integration_steps = INTEGRATION_STEPS; this->array_size = this->nparticles * state_dimension(particle_type); this->state = new double[this->array_size]; diff --git a/bfps/cpp/rFFTW_distributed_particles.cpp b/bfps/cpp/rFFTW_distributed_particles.cpp index d04e10402ba649a3aa17001481d85585eb17a841..713b4f79cc8888713a9edad7a2c344edc090289b 100644 --- a/bfps/cpp/rFFTW_distributed_particles.cpp +++ b/bfps/cpp/rFFTW_distributed_particles.cpp @@ -32,6 +32,8 @@ #include <string> #include <sstream> #include <set> +#include <algorithm> +#include <ctime> #include "base.hpp" #include "rFFTW_distributed_particles.hpp" @@ -45,13 +47,13 @@ template <particle_types particle_type, class rnumber, int interp_neighbours> rFFTW_distributed_particles<particle_type, rnumber, interp_neighbours>::rFFTW_distributed_particles( const char *NAME, const hid_t data_file_id, - rFFTW_interpolator<rnumber, interp_neighbours> *FIELD, + rFFTW_interpolator<rnumber, interp_neighbours> *VEL, const int TRAJ_SKIP, const int INTEGRATION_STEPS) : particles_io_base<particle_type>( NAME, TRAJ_SKIP, data_file_id, - FIELD->descriptor->comm) + VEL->descriptor->comm) { TIMEZONE("rFFTW_distributed_particles::rFFTW_distributed_particles"); /* check that integration_steps has a valid value. @@ -67,16 +69,16 @@ rFFTW_distributed_particles<particle_type, rnumber, interp_neighbours>::rFFTW_di * therefore I prefer to just kill the code at this point, * no matter whether or not NDEBUG is present. * */ - if (interp_neighbours*2+2 > FIELD->descriptor->subsizes[0]) + if (interp_neighbours*2+2 > VEL->descriptor->subsizes[0]) { DEBUG_MSG("parameters incompatible with rFFTW_distributed_particles.\n" "interp kernel size is %d, local_z_size is %d\n", - interp_neighbours*2+2, FIELD->descriptor->subsizes[0]); - if (FIELD->descriptor->myrank == 0) + interp_neighbours*2+2, VEL->descriptor->subsizes[0]); + if (VEL->descriptor->myrank == 0) std::cerr << "parameters incompatible with rFFTW_distributed_particles." << std::endl; exit(0); } - this->vel = FIELD; + this->vel = VEL; this->rhs.resize(INTEGRATION_STEPS); this->integration_steps = INTEGRATION_STEPS; this->state.reserve(2*this->nparticles / this->nprocs); @@ -187,14 +189,18 @@ void rFFTW_distributed_particles<particle_type, rnumber, interp_neighbours>::sam int tindex; tindex = 0; // can this sorting be done more efficiently? - std::set<int> ordered_dp; + std::vector<int> ordered_dp; { TIMEZONE("rFFTW_distributed_particles::sample::ordered_dp"); - for (auto p: dp.at(domain_index)) - ordered_dp.insert(p); + ordered_dp.reserve(dp.at(domain_index).size()); + for (auto p: dp.at(domain_index)) + ordered_dp.push_back(p); + //std::set<int> ordered_dp(dp.at(domain_index)); + std::sort(ordered_dp.begin(), ordered_dp.end()); } for (auto p: ordered_dp) + //for (auto p: dp.at(domain_index)) { (*field)(x.at(p).data, yy + tindex*3); tindex++; @@ -211,6 +217,7 @@ void rFFTW_distributed_particles<particle_type, rnumber, interp_neighbours>::sam } tindex = 0; for (auto p: ordered_dp) + //for (auto p: dp.at(domain_index)) { y[p] = yyy + tindex*3; tindex++; @@ -233,8 +240,10 @@ void rFFTW_distributed_particles<particle_type, rnumber, interp_neighbours>::get case VELOCITY_TRACER: this->sample(this->vel, x, dp, yy); y.clear(); - for (auto &pp: x) - y[pp.first] = yy[pp.first].data; + y.reserve(yy.size()); + y.rehash(this->nparticles); + for (auto &pp: yy) + y[pp.first] = pp.second.data; break; } } @@ -301,6 +310,10 @@ void rFFTW_distributed_particles<particle_type, rnumber, interp_neighbours>::red ro[1] = 1; /* particles to send, particles to receive */ std::vector<int> ps[2], pr[2]; + for (int tcounter = 0; tcounter < 2; tcounter++) + { + ps[tcounter].reserve(newdp[dindex[tcounter]].size()); + } /* number of particles to send, number of particles to receive */ int nps[2], npr[2]; int rsrc, rdst; @@ -452,44 +465,51 @@ void rFFTW_distributed_particles<particle_type, rnumber, interp_neighbours>::Ada const int nsteps) { this->get_rhs(this->state, this->domain_particles, this->rhs[0]); - for (auto &pp: this->state) +#define AdamsBashforth_LOOP_PREAMBLE \ + for (auto &pp: this->state) \ for (unsigned int i=0; i<state_dimension(particle_type); i++) - switch(nsteps) - { - case 1: - pp.second[i] += this->dt*this->rhs[0][pp.first][i]; - break; - case 2: - pp.second[i] += this->dt*(3*this->rhs[0][pp.first][i] - - this->rhs[1][pp.first][i])/2; - break; - case 3: - pp.second[i] += this->dt*(23*this->rhs[0][pp.first][i] - - 16*this->rhs[1][pp.first][i] - + 5*this->rhs[2][pp.first][i])/12; - break; - case 4: - pp.second[i] += this->dt*(55*this->rhs[0][pp.first][i] - - 59*this->rhs[1][pp.first][i] - + 37*this->rhs[2][pp.first][i] - - 9*this->rhs[3][pp.first][i])/24; - break; - case 5: - pp.second[i] += this->dt*(1901*this->rhs[0][pp.first][i] - - 2774*this->rhs[1][pp.first][i] - + 2616*this->rhs[2][pp.first][i] - - 1274*this->rhs[3][pp.first][i] - + 251*this->rhs[4][pp.first][i])/720; - break; - case 6: - pp.second[i] += this->dt*(4277*this->rhs[0][pp.first][i] - - 7923*this->rhs[1][pp.first][i] - + 9982*this->rhs[2][pp.first][i] - - 7298*this->rhs[3][pp.first][i] - + 2877*this->rhs[4][pp.first][i] - - 475*this->rhs[5][pp.first][i])/1440; - break; - } + switch(nsteps) + { + case 1: + AdamsBashforth_LOOP_PREAMBLE + pp.second[i] += this->dt*this->rhs[0][pp.first][i]; + break; + case 2: + AdamsBashforth_LOOP_PREAMBLE + pp.second[i] += this->dt*(3*this->rhs[0][pp.first][i] + - this->rhs[1][pp.first][i])/2; + break; + case 3: + AdamsBashforth_LOOP_PREAMBLE + pp.second[i] += this->dt*(23*this->rhs[0][pp.first][i] + - 16*this->rhs[1][pp.first][i] + + 5*this->rhs[2][pp.first][i])/12; + break; + case 4: + AdamsBashforth_LOOP_PREAMBLE + pp.second[i] += this->dt*(55*this->rhs[0][pp.first][i] + - 59*this->rhs[1][pp.first][i] + + 37*this->rhs[2][pp.first][i] + - 9*this->rhs[3][pp.first][i])/24; + break; + case 5: + AdamsBashforth_LOOP_PREAMBLE + pp.second[i] += this->dt*(1901*this->rhs[0][pp.first][i] + - 2774*this->rhs[1][pp.first][i] + + 2616*this->rhs[2][pp.first][i] + - 1274*this->rhs[3][pp.first][i] + + 251*this->rhs[4][pp.first][i])/720; + break; + case 6: + AdamsBashforth_LOOP_PREAMBLE + pp.second[i] += this->dt*(4277*this->rhs[0][pp.first][i] + - 7923*this->rhs[1][pp.first][i] + + 9982*this->rhs[2][pp.first][i] + - 7298*this->rhs[3][pp.first][i] + + 2877*this->rhs[4][pp.first][i] + - 475*this->rhs[5][pp.first][i])/1440; + break; + } this->redistribute(this->state, this->rhs, this->domain_particles); this->roll_rhs(); } @@ -620,20 +640,31 @@ void rFFTW_distributed_particles<particle_type, rnumber, interp_neighbours>::wri TIMEZONE("rFFTW_distributed_particles::write"); double *data = new double[this->chunk_size*3]; double *yy = new double[this->chunk_size*3]; - int pindex = 0; - for (unsigned int cindex=0; cindex<this->get_number_of_chunks(); cindex++) + //int pindex = 0; + for (unsigned int cindex=0; cindex<this->get_number_of_chunks(); cindex++) { std::fill_n(yy, this->chunk_size*3, 0); - for (unsigned int p=0; p<this->chunk_size; p++, pindex++) - { - if (this->domain_particles[-1].find(pindex) != this->domain_particles[-1].end() || - this->domain_particles[ 0].find(pindex) != this->domain_particles[ 0].end()) - { - std::copy(y[pindex].data, - y[pindex].data + 3, - yy + p*3); - } - } + //for (unsigned int p=0; p<this->chunk_size; p++, pindex++) + //{ + // if (this->domain_particles[-1].find(pindex) != this->domain_particles[-1].end() || + // this->domain_particles[ 0].find(pindex) != this->domain_particles[ 0].end()) + // { + // std::copy(y[pindex].data, + // y[pindex].data + 3, + // yy + p*3); + // } + //} + for (int s = -1; s <= 0; s++) + for (auto &pp: this->domain_particles[s]) + { + if (pp >= cindex*this->chunk_size && + pp < (cindex+1)*this->chunk_size) + { + std::copy(y[pp].data, + y[pp].data + 3, + yy + (pp-cindex*this->chunk_size)*3); + } + } { TIMEZONE("MPI_Allreduce"); MPI_Allreduce( @@ -660,23 +691,34 @@ void rFFTW_distributed_particles<particle_type, rnumber, interp_neighbours>::wri TIMEZONE("rFFTW_distributed_particles::write2"); double *temp0 = new double[this->chunk_size*state_dimension(particle_type)]; double *temp1 = new double[this->chunk_size*state_dimension(particle_type)]; - int pindex = 0; + //int pindex = 0; for (unsigned int cindex=0; cindex<this->get_number_of_chunks(); cindex++) { //write state std::fill_n(temp0, state_dimension(particle_type)*this->chunk_size, 0); - pindex = cindex*this->chunk_size; - for (unsigned int p=0; p<this->chunk_size; p++, pindex++) - { - if (this->domain_particles[-1].find(pindex) != this->domain_particles[-1].end() || - this->domain_particles[ 0].find(pindex) != this->domain_particles[ 0].end()) - { - TIMEZONE("std::copy"); - std::copy(this->state[pindex].data, - this->state[pindex].data + state_dimension(particle_type), - temp0 + p*state_dimension(particle_type)); - } - } + //pindex = cindex*this->chunk_size; + //for (unsigned int p=0; p<this->chunk_size; p++, pindex++) + //{ + // if (this->domain_particles[-1].find(pindex) != this->domain_particles[-1].end() || + // this->domain_particles[ 0].find(pindex) != this->domain_particles[ 0].end()) + // { + // TIMEZONE("std::copy"); + // std::copy(this->state[pindex].data, + // this->state[pindex].data + state_dimension(particle_type), + // temp0 + p*state_dimension(particle_type)); + // } + //} + for (int s = -1; s <= 0; s++) + for (auto &pp: this->domain_particles[s]) + { + if (pp >= cindex*this->chunk_size && + pp < (cindex+1)*this->chunk_size) + { + std::copy(this->state[pp].data, + this->state[pp].data + state_dimension(particle_type), + temp0 + (pp-cindex*this->chunk_size)*state_dimension(particle_type)); + } + } { TIMEZONE("MPI_Allreduce"); MPI_Allreduce( @@ -697,18 +739,29 @@ void rFFTW_distributed_particles<particle_type, rnumber, interp_neighbours>::wri for (int i=0; i<this->integration_steps; i++) { std::fill_n(temp0, state_dimension(particle_type)*this->chunk_size, 0); - pindex = cindex*this->chunk_size; - for (unsigned int p=0; p<this->chunk_size; p++, pindex++) - { - if (this->domain_particles[-1].find(pindex) != this->domain_particles[-1].end() || - this->domain_particles[ 0].find(pindex) != this->domain_particles[ 0].end()) - { - TIMEZONE("std::copy"); - std::copy(this->rhs[i][pindex].data, - this->rhs[i][pindex].data + state_dimension(particle_type), - temp0 + p*state_dimension(particle_type)); - } - } + //pindex = cindex*this->chunk_size; + //for (unsigned int p=0; p<this->chunk_size; p++, pindex++) + //{ + // if (this->domain_particles[-1].find(pindex) != this->domain_particles[-1].end() || + // this->domain_particles[ 0].find(pindex) != this->domain_particles[ 0].end()) + // { + // TIMEZONE("std::copy"); + // std::copy(this->rhs[i][pindex].data, + // this->rhs[i][pindex].data + state_dimension(particle_type), + // temp0 + p*state_dimension(particle_type)); + // } + //} + for (int s = -1; s <= 0; s++) + for (auto &pp: this->domain_particles[s]) + { + if (pp >= cindex*this->chunk_size && + pp < (cindex+1)*this->chunk_size) + { + std::copy(this->rhs[i][pp].data, + this->rhs[i][pp].data + state_dimension(particle_type), + temp0 + (pp-cindex*this->chunk_size)*state_dimension(particle_type)); + } + } { TIMEZONE("MPI_Allreduce"); MPI_Allreduce( diff --git a/bfps/cpp/rFFTW_interpolator.cpp b/bfps/cpp/rFFTW_interpolator.cpp index f8dc62da0a163425075a0a0f38209ab28c45ca7f..77100d1dc25878f299e76cb4dbbcd089c845dcdc 100644 --- a/bfps/cpp/rFFTW_interpolator.cpp +++ b/bfps/cpp/rFFTW_interpolator.cpp @@ -34,10 +34,10 @@ template <class rnumber, int interp_neighbours> rFFTW_interpolator<rnumber, interp_neighbours>::rFFTW_interpolator( fluid_solver_base<rnumber> *fs, base_polynomial_values BETA_POLYS, - rnumber *FIELD) : interpolator_base<rnumber, interp_neighbours>(fs, BETA_POLYS) + rnumber *FIELD_POINTER) : interpolator_base<rnumber, interp_neighbours>(fs, BETA_POLYS) { this->field_size = 2*fs->cd->local_size; - this->field = FIELD; + this->field = FIELD_POINTER; // generate compute array @@ -49,6 +49,25 @@ rFFTW_interpolator<rnumber, interp_neighbours>::rFFTW_interpolator( this->compute[((iz + this->descriptor->sizes[0]) % this->descriptor->sizes[0])] = true; } +template <class rnumber, int interp_neighbours> +rFFTW_interpolator<rnumber, interp_neighbours>::rFFTW_interpolator( + vorticity_equation<rnumber, FFTW> *fs, + base_polynomial_values BETA_POLYS, + rnumber *FIELD_POINTER) : interpolator_base<rnumber, interp_neighbours>(fs, BETA_POLYS) +{ +// this->field_size = 2*fs->cd->local_size; +// this->field = FIELD_POINTER; +// +// +// // generate compute array +// this->compute = new bool[this->descriptor->sizes[0]]; +// std::fill_n(this->compute, this->descriptor->sizes[0], false); +// for (int iz = this->descriptor->starts[0]-interp_neighbours-1; +// iz <= this->descriptor->starts[0]+this->descriptor->subsizes[0]+interp_neighbours; +// iz++) +// this->compute[((iz + this->descriptor->sizes[0]) % this->descriptor->sizes[0])] = true; +} + template <class rnumber, int interp_neighbours> rFFTW_interpolator<rnumber, interp_neighbours>::~rFFTW_interpolator() { diff --git a/bfps/cpp/rFFTW_interpolator.hpp b/bfps/cpp/rFFTW_interpolator.hpp index 795257d2744e432d9c346b93848cadfbd8cc85dc..a8ca1abc61a883cb27dee5e9f82aa09abdd77cca 100644 --- a/bfps/cpp/rFFTW_interpolator.hpp +++ b/bfps/cpp/rFFTW_interpolator.hpp @@ -27,6 +27,7 @@ #include "field_descriptor.hpp" #include "fftw_tools.hpp" #include "fluid_solver_base.hpp" +#include "vorticity_equation.hpp" #include "interpolator_base.hpp" #ifndef RFFTW_INTERPOLATOR @@ -54,6 +55,11 @@ class rFFTW_interpolator:public interpolator_base<rnumber, interp_neighbours> fluid_solver_base<rnumber> *FSOLVER, base_polynomial_values BETA_POLYS, rnumber *FIELD_DATA); + + rFFTW_interpolator( + vorticity_equation<rnumber, FFTW> *FSOLVER, + base_polynomial_values BETA_POLYS, + rnumber *FIELD_DATA); ~rFFTW_interpolator(); /* does not destroy input */ diff --git a/bfps/cpp/vorticity_equation.cpp b/bfps/cpp/vorticity_equation.cpp new file mode 100644 index 0000000000000000000000000000000000000000..55573d5bcc2f5ebdb3ec2ac975997455a977215c --- /dev/null +++ b/bfps/cpp/vorticity_equation.cpp @@ -0,0 +1,652 @@ +/********************************************************************** +* * +* Copyright 2015 Max Planck Institute * +* for Dynamics and Self-Organization * +* * +* This file is part of bfps. * +* * +* bfps is free software: you can redistribute it and/or modify * +* it under the terms of the GNU General Public License as published * +* by the Free Software Foundation, either version 3 of the License, * +* or (at your option) any later version. * +* * +* bfps is distributed in the hope that it will be useful, * +* but WITHOUT ANY WARRANTY; without even the implied warranty of * +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * +* GNU General Public License for more details. * +* * +* You should have received a copy of the GNU General Public License * +* along with bfps. If not, see <http://www.gnu.org/licenses/> * +* * +* Contact: Cristian.Lalescu@ds.mpg.de * +* * +**********************************************************************/ + + + +//#define NDEBUG + +#include <cassert> +#include <cmath> +#include <cstring> +#include "fftw_tools.hpp" +#include "vorticity_equation.hpp" +#include "scope_timer.hpp" + + + +template <class rnumber, + field_backend be> +void vorticity_equation<rnumber, be>::impose_zero_modes() +{ + TIMEZONE("vorticity_equation::impose_zero_modes"); + this->u->impose_zero_mode(); + this->v[0]->impose_zero_mode(); + this->v[1]->impose_zero_mode(); + this->v[2]->impose_zero_mode(); +} + +template <class rnumber, + field_backend be> +vorticity_equation<rnumber, be>::vorticity_equation( + const char *NAME, + int nx, + int ny, + int nz, + double DKX, + double DKY, + double DKZ, + unsigned FFTW_PLAN_RIGOR) +{ + TIMEZONE("vorticity_equation::vorticity_equation"); + /* initialize name and basic stuff */ + strncpy(this->name, NAME, 256); + this->name[255] = '\0'; + this->iteration = 0; + + /* initialize fields */ + this->cvorticity = new field<rnumber, be, THREE>( + nx, ny, nz, MPI_COMM_WORLD, FFTW_PLAN_RIGOR); + this->rvorticity = new field<rnumber, be, THREE>( + nx, ny, nz, MPI_COMM_WORLD, FFTW_PLAN_RIGOR); + this->v[1] = new field<rnumber, be, THREE>( + nx, ny, nz, MPI_COMM_WORLD, FFTW_PLAN_RIGOR); + this->v[2] = new field<rnumber, be, THREE>( + nx, ny, nz, MPI_COMM_WORLD, FFTW_PLAN_RIGOR); + this->v[0] = this->cvorticity; + this->v[3] = this->cvorticity; + + this->cvelocity = new field<rnumber, be, THREE>( + nx, ny, nz, MPI_COMM_WORLD, FFTW_PLAN_RIGOR); + this->rvelocity = new field<rnumber, be, THREE>( + nx, ny, nz, MPI_COMM_WORLD, FFTW_PLAN_RIGOR); + this->u = this->cvelocity; + + /* initialize kspace */ + this->kk = new kspace<be, SMOOTH>( + this->cvorticity->clayout, DKX, DKY, DKZ); + + /* ``physical'' parameters etc, initialized here just in case */ + + this->nu = 0.1; + this->fmode = 1; + this->famplitude = 1.0; + this->fk0 = 2.0; + this->fk1 = 4.0; +} + +template <class rnumber, + field_backend be> +vorticity_equation<rnumber, be>::~vorticity_equation() +{ + TIMEZONE("vorticity_equation::~vorticity_equation"); + delete this->kk; + delete this->cvorticity; + delete this->rvorticity; + delete this->v[1]; + delete this->v[2]; + delete this->cvelocity; + delete this->rvelocity; +} + +template <class rnumber, + field_backend be> +void vorticity_equation<rnumber, be>::compute_vorticity() +{ + TIMEZONE("vorticity_equation::compute_vorticity"); + this->cvorticity->real_space_representation = false; + this->kk->CLOOP_K2( + [&](ptrdiff_t cindex, + ptrdiff_t xindex, + ptrdiff_t yindex, + ptrdiff_t zindex, + double k2){ + if (k2 <= this->kk->kM2) + { + this->cvorticity->cval(cindex,0,0) = -(this->kk->ky[yindex]*this->u->cval(cindex,2,1) - this->kk->kz[zindex]*this->u->cval(cindex,1,1)); + this->cvorticity->cval(cindex,0,1) = (this->kk->ky[yindex]*this->u->cval(cindex,2,0) - this->kk->kz[zindex]*this->u->cval(cindex,1,0)); + this->cvorticity->cval(cindex,1,0) = -(this->kk->kz[zindex]*this->u->cval(cindex,0,1) - this->kk->kx[xindex]*this->u->cval(cindex,2,1)); + this->cvorticity->cval(cindex,1,1) = (this->kk->kz[zindex]*this->u->cval(cindex,0,0) - this->kk->kx[xindex]*this->u->cval(cindex,2,0)); + this->cvorticity->cval(cindex,2,0) = -(this->kk->kx[xindex]*this->u->cval(cindex,1,1) - this->kk->ky[yindex]*this->u->cval(cindex,0,1)); + this->cvorticity->cval(cindex,2,1) = (this->kk->kx[xindex]*this->u->cval(cindex,1,0) - this->kk->ky[yindex]*this->u->cval(cindex,0,0)); + //ptrdiff_t tindex = 3*cindex; + //this->cvorticity->get_cdata()[tindex+0][0] = -(this->kk->ky[yindex]*this->u->get_cdata()[tindex+2][1] - this->kk->kz[zindex]*this->u->get_cdata()[tindex+1][1]); + //this->cvorticity->get_cdata()[tindex+1][0] = -(this->kk->kz[zindex]*this->u->get_cdata()[tindex+0][1] - this->kk->kx[xindex]*this->u->get_cdata()[tindex+2][1]); + //this->cvorticity->get_cdata()[tindex+2][0] = -(this->kk->kx[xindex]*this->u->get_cdata()[tindex+1][1] - this->kk->ky[yindex]*this->u->get_cdata()[tindex+0][1]); + //this->cvorticity->get_cdata()[tindex+0][1] = (this->kk->ky[yindex]*this->u->get_cdata()[tindex+2][0] - this->kk->kz[zindex]*this->u->get_cdata()[tindex+1][0]); + //this->cvorticity->get_cdata()[tindex+1][1] = (this->kk->kz[zindex]*this->u->get_cdata()[tindex+0][0] - this->kk->kx[xindex]*this->u->get_cdata()[tindex+2][0]); + //this->cvorticity->get_cdata()[tindex+2][1] = (this->kk->kx[xindex]*this->u->get_cdata()[tindex+1][0] - this->kk->ky[yindex]*this->u->get_cdata()[tindex+0][0]); + } + else + std::fill_n((rnumber*)(this->cvorticity->get_cdata()+3*cindex), 6, 0.0); + } + ); + this->cvorticity->symmetrize(); +} + +template <class rnumber, + field_backend be> +void vorticity_equation<rnumber, be>::compute_velocity(field<rnumber, be, THREE> *vorticity) +{ + TIMEZONE("vorticity_equation::compute_velocity"); + this->u->real_space_representation = false; + this->kk->CLOOP_K2( + [&](ptrdiff_t cindex, + ptrdiff_t xindex, + ptrdiff_t yindex, + ptrdiff_t zindex, + double k2){ + if (k2 <= this->kk->kM2 && k2 > 0) + { + this->u->cval(cindex,0,0) = -(this->kk->ky[yindex]*vorticity->cval(cindex,2,1) - this->kk->kz[zindex]*vorticity->cval(cindex,1,1)) / k2; + this->u->cval(cindex,0,1) = (this->kk->ky[yindex]*vorticity->cval(cindex,2,0) - this->kk->kz[zindex]*vorticity->cval(cindex,1,0)) / k2; + this->u->cval(cindex,1,0) = -(this->kk->kz[zindex]*vorticity->cval(cindex,0,1) - this->kk->kx[xindex]*vorticity->cval(cindex,2,1)) / k2; + this->u->cval(cindex,1,1) = (this->kk->kz[zindex]*vorticity->cval(cindex,0,0) - this->kk->kx[xindex]*vorticity->cval(cindex,2,0)) / k2; + this->u->cval(cindex,2,0) = -(this->kk->kx[xindex]*vorticity->cval(cindex,1,1) - this->kk->ky[yindex]*vorticity->cval(cindex,0,1)) / k2; + this->u->cval(cindex,2,1) = (this->kk->kx[xindex]*vorticity->cval(cindex,1,0) - this->kk->ky[yindex]*vorticity->cval(cindex,0,0)) / k2; + //ptrdiff_t tindex = 3*cindex; + //this->u->get_cdata()[tindex+0][0] = -(this->kk->ky[yindex]*vorticity->get_cdata()[tindex+2][1] - this->kk->kz[zindex]*vorticity->get_cdata()[tindex+1][1]) / k2; + //this->u->get_cdata()[tindex+0][1] = (this->kk->ky[yindex]*vorticity->get_cdata()[tindex+2][0] - this->kk->kz[zindex]*vorticity->get_cdata()[tindex+1][0]) / k2; + //this->u->get_cdata()[tindex+1][0] = -(this->kk->kz[zindex]*vorticity->get_cdata()[tindex+0][1] - this->kk->kx[xindex]*vorticity->get_cdata()[tindex+2][1]) / k2; + //this->u->get_cdata()[tindex+1][1] = (this->kk->kz[zindex]*vorticity->get_cdata()[tindex+0][0] - this->kk->kx[xindex]*vorticity->get_cdata()[tindex+2][0]) / k2; + //this->u->get_cdata()[tindex+2][0] = -(this->kk->kx[xindex]*vorticity->get_cdata()[tindex+1][1] - this->kk->ky[yindex]*vorticity->get_cdata()[tindex+0][1]) / k2; + //this->u->get_cdata()[tindex+2][1] = (this->kk->kx[xindex]*vorticity->get_cdata()[tindex+1][0] - this->kk->ky[yindex]*vorticity->get_cdata()[tindex+0][0]) / k2; + } + else + std::fill_n((rnumber*)(this->u->get_cdata()+3*cindex), 6, 0.0); + } + ); + this->u->symmetrize(); +} + +template <class rnumber, + field_backend be> +void vorticity_equation<rnumber, be>::add_forcing( + field<rnumber, be, THREE> *dst, + field<rnumber, be, THREE> *vort_field, + rnumber factor) +{ + TIMEZONE("vorticity_equation::add_forcing"); + if (strcmp(this->forcing_type, "none") == 0) + return; + if (strcmp(this->forcing_type, "Kolmogorov") == 0) + { + ptrdiff_t cindex; + if (this->cvorticity->clayout->myrank == this->cvorticity->clayout->rank[0][this->fmode]) + { + cindex = ((this->fmode - this->cvorticity->clayout->starts[0]) * this->cvorticity->clayout->sizes[1])*this->cvorticity->clayout->sizes[2]; + dst->cval(cindex,2, 0) -= this->famplitude*factor/2; + //dst->get_cdata()[cindex*3+2][0] -= this->famplitude*factor/2; + } + if (this->cvorticity->clayout->myrank == this->cvorticity->clayout->rank[0][this->cvorticity->clayout->sizes[0] - this->fmode]) + { + cindex = ((this->cvorticity->clayout->sizes[0] - this->fmode - this->cvorticity->clayout->starts[0]) * this->cvorticity->clayout->sizes[1])*this->cvorticity->clayout->sizes[2]; + dst->cval(cindex, 2, 0) -= this->famplitude*factor/2; + //dst->get_cdata()[cindex*3+2][0] -= this->famplitude*factor/2; + } + return; + } + if (strcmp(this->forcing_type, "linear") == 0) + { + this->kk->CLOOP( + [&](ptrdiff_t cindex, + ptrdiff_t xindex, + ptrdiff_t yindex, + ptrdiff_t zindex){ + double knorm = sqrt(this->kk->kx[xindex]*this->kk->kx[xindex] + + this->kk->ky[yindex]*this->kk->ky[yindex] + + this->kk->kz[zindex]*this->kk->kz[zindex]); + if ((this->fk0 <= knorm) && + (this->fk1 >= knorm)) + for (int c=0; c<3; c++) + for (int i=0; i<2; i++) + dst->cval(cindex,c,i) += this->famplitude*vort_field->cval(cindex,c,i)*factor; + //dst->get_cdata()[cindex*3+c][i] += this->famplitude*vort_field->get_cdata()[cindex*3+c][i]*factor; + } + ); + return; + } +} + +template <class rnumber, + field_backend be> +void vorticity_equation<rnumber, be>::omega_nonlin( + int src) +{ + DEBUG_MSG("vorticity_equation::omega_nonlin(%d)\n", src); + assert(src >= 0 && src < 3); + this->compute_velocity(this->v[src]); + /* get fields from Fourier space to real space */ + this->u->ift(); + this->rvorticity->real_space_representation = false; + *this->rvorticity = this->v[src]->get_cdata(); + this->rvorticity->ift(); + /* compute cross product $u \times \omega$, and normalize */ + this->u->RLOOP( + [&](ptrdiff_t rindex, + ptrdiff_t xindex, + ptrdiff_t yindex, + ptrdiff_t zindex){ + //ptrdiff_t tindex = 3*rindex; + rnumber tmp[3]; + for (int cc=0; cc<3; cc++) + tmp[cc] = (this->u->rval(rindex,(cc+1)%3)*this->rvorticity->rval(rindex,(cc+2)%3) - + this->u->rval(rindex,(cc+2)%3)*this->rvorticity->rval(rindex,(cc+1)%3)); + //tmp[cc][0] = (this->u->get_rdata()[tindex+(cc+1)%3]*this->rvorticity->get_rdata()[tindex+(cc+2)%3] - + // this->u->get_rdata()[tindex+(cc+2)%3]*this->rvorticity->get_rdata()[tindex+(cc+1)%3]); + for (int cc=0; cc<3; cc++) + this->u->rval(rindex,cc) = tmp[cc] / this->u->npoints; + //this->u->get_rdata()[(3*rindex)+cc] = tmp[cc][0] / this->u->npoints; + } + ); + /* go back to Fourier space */ + //this->clean_up_real_space(this->ru, 3); + this->u->dft(); + this->kk->template dealias<rnumber, THREE>(this->u->get_cdata()); + /* $\imath k \times Fourier(u \times \omega)$ */ + this->kk->CLOOP( + [&](ptrdiff_t cindex, + ptrdiff_t xindex, + ptrdiff_t yindex, + ptrdiff_t zindex){ + rnumber tmp[3][2]; + { + tmp[0][0] = -(this->kk->ky[yindex]*this->u->cval(cindex,2,1) - this->kk->kz[zindex]*this->u->cval(cindex,1,1)); + tmp[1][0] = -(this->kk->kz[zindex]*this->u->cval(cindex,0,1) - this->kk->kx[xindex]*this->u->cval(cindex,2,1)); + tmp[2][0] = -(this->kk->kx[xindex]*this->u->cval(cindex,1,1) - this->kk->ky[yindex]*this->u->cval(cindex,0,1)); + tmp[0][1] = (this->kk->ky[yindex]*this->u->cval(cindex,2,0) - this->kk->kz[zindex]*this->u->cval(cindex,1,0)); + tmp[1][1] = (this->kk->kz[zindex]*this->u->cval(cindex,0,0) - this->kk->kx[xindex]*this->u->cval(cindex,2,0)); + tmp[2][1] = (this->kk->kx[xindex]*this->u->cval(cindex,1,0) - this->kk->ky[yindex]*this->u->cval(cindex,0,0)); + } + //ptrdiff_t tindex = 3*cindex; + //{ + // tmp[0][0] = -(this->kk->ky[yindex]*this->u->get_cdata()[tindex+2][1] - this->kk->kz[zindex]*this->u->get_cdata()[tindex+1][1]); + // tmp[1][0] = -(this->kk->kz[zindex]*this->u->get_cdata()[tindex+0][1] - this->kk->kx[xindex]*this->u->get_cdata()[tindex+2][1]); + // tmp[2][0] = -(this->kk->kx[xindex]*this->u->get_cdata()[tindex+1][1] - this->kk->ky[yindex]*this->u->get_cdata()[tindex+0][1]); + // tmp[0][1] = (this->kk->ky[yindex]*this->u->get_cdata()[tindex+2][0] - this->kk->kz[zindex]*this->u->get_cdata()[tindex+1][0]); + // tmp[1][1] = (this->kk->kz[zindex]*this->u->get_cdata()[tindex+0][0] - this->kk->kx[xindex]*this->u->get_cdata()[tindex+2][0]); + // tmp[2][1] = (this->kk->kx[xindex]*this->u->get_cdata()[tindex+1][0] - this->kk->ky[yindex]*this->u->get_cdata()[tindex+0][0]); + //} + for (int cc=0; cc<3; cc++) for (int i=0; i<2; i++) + this->u->cval(cindex, cc, i) = tmp[cc][i]; + //this->u->get_cdata()[3*cindex+cc][i] = tmp[cc][i]; + } + ); + this->add_forcing(this->u, this->v[src], 1.0); + this->kk->template force_divfree<rnumber>(this->u->get_cdata()); +} + +template <class rnumber, + field_backend be> +void vorticity_equation<rnumber, be>::step(double dt) +{ + DEBUG_MSG("vorticity_equation::step\n"); + TIMEZONE("vorticity_equation::step"); + double factor0, factor1; + *this->v[1] = 0.0; + this->omega_nonlin(0); + this->kk->CLOOP_K2( + [&](ptrdiff_t cindex, + ptrdiff_t xindex, + ptrdiff_t yindex, + ptrdiff_t zindex, + double k2){ + if (k2 <= this->kk->kM2) + { + factor0 = exp(-this->nu * k2 * dt); + for (int cc=0; cc<3; cc++) for (int i=0; i<2; i++) + this->v[1]->cval(cindex,cc,i) = ( + this->v[0]->cval(cindex,cc,i) + + dt*this->u->cval(cindex,cc,i))*factor0; + //this->v[1]->get_cdata()[3*cindex+cc][i] = ( + // this->v[0]->get_cdata()[3*cindex+cc][i] + + // dt*this->u->get_cdata()[3*cindex+cc][i])*factor0; + } + } + ); + + this->omega_nonlin(1); + this->kk->CLOOP_K2( + [&](ptrdiff_t cindex, + ptrdiff_t xindex, + ptrdiff_t yindex, + ptrdiff_t zindex, + double k2){ + if (k2 <= this->kk->kM2) + { + factor0 = exp(-this->nu * k2 * dt/2); + factor1 = exp( this->nu * k2 * dt/2); + for (int cc=0; cc<3; cc++) for (int i=0; i<2; i++) + this->v[2]->cval(cindex, cc, i) = ( + 3*this->v[0]->cval(cindex,cc,i)*factor0 + + ( this->v[1]->cval(cindex,cc,i) + + dt*this->u->cval(cindex,cc,i))*factor1)*0.25; + //this->v[2]->get_cdata()[3*cindex+cc][i] = ( + // 3*this->v[0]->get_cdata()[3*cindex+cc][i]*factor0 + + // (this->v[1]->get_cdata()[3*cindex+cc][i] + + // dt*this->u->get_cdata()[3*cindex+cc][i])*factor1)*0.25; + } + } + ); + + this->omega_nonlin(2); + this->kk->CLOOP_K2( + [&](ptrdiff_t cindex, + ptrdiff_t xindex, + ptrdiff_t yindex, + ptrdiff_t zindex, + double k2){ + if (k2 <= this->kk->kM2) + { + factor0 = exp(-this->nu * k2 * dt * 0.5); + for (int cc=0; cc<3; cc++) for (int i=0; i<2; i++) + this->v[3]->cval(cindex,cc,i) = ( + this->v[0]->cval(cindex,cc,i)*factor0 + + 2*(this->v[2]->cval(cindex,cc,i) + + dt*this->u->cval(cindex,cc,i)))*factor0/3; + //this->v[3]->get_cdata()[3*cindex+cc][i] = ( + // this->v[0]->get_cdata()[3*cindex+cc][i]*factor0 + + // 2*(this->v[2]->get_cdata()[3*cindex+cc][i] + + // dt*this->u->get_cdata()[3*cindex+cc][i]))*factor0/3; + } + } + ); + + this->kk->template force_divfree<rnumber>(this->cvorticity->get_cdata()); + this->cvorticity->symmetrize(); + this->iteration++; +} + +template <class rnumber, + field_backend be> +void vorticity_equation<rnumber, be>::compute_pressure(field<rnumber, be, ONE> *pressure) +{ + TIMEZONE("vorticity_equation::compute_pressure"); + /* assume velocity is already in real space representation */ + + this->v[1]->real_space_representation = true; + /* diagonal terms 11 22 33 */ + this->v[1]->RLOOP ( + [&](ptrdiff_t rindex, + ptrdiff_t xindex, + ptrdiff_t yindex, + ptrdiff_t zindex){ + //ptrdiff_t tindex = 3*rindex; + for (int cc=0; cc<3; cc++) + this->v[1]->rval(rindex,cc) = this->u->rval(rindex,cc)*this->u->rval(rindex,cc); + //this->v[1]->get_rdata()[tindex+cc] = this->u->get_rdata()[tindex+cc]*this->u->get_rdata()[tindex+cc]; + } + ); + //this->clean_up_real_space(this->rv[1], 3); + this->v[1]->dft(); + this->kk->template dealias<rnumber, THREE>(this->v[1]->get_cdata()); + this->kk->CLOOP_K2( + [&](ptrdiff_t cindex, + ptrdiff_t xindex, + ptrdiff_t yindex, + ptrdiff_t zindex, + double k2){ + if (k2 <= this->kk->kM2 && k2 > 0) + { + ptrdiff_t tindex = 3*cindex; + for (int i=0; i<2; i++) + { + pressure->get_cdata()[cindex][i] = \ + -(this->kk->kx[xindex]*this->kk->kx[xindex]*this->v[1]->get_cdata()[tindex+0][i] + + this->kk->ky[yindex]*this->kk->ky[yindex]*this->v[1]->get_cdata()[tindex+1][i] + + this->kk->kz[zindex]*this->kk->kz[zindex]*this->v[1]->get_cdata()[tindex+2][i]); + } + } + else + std::fill_n((rnumber*)(pressure->get_cdata()+cindex), 2, 0.0); + } + ); + /* off-diagonal terms 12 23 31 */ + this->v[1]->real_space_representation = true; + this->v[1]->RLOOP ( + [&](ptrdiff_t rindex, + ptrdiff_t xindex, + ptrdiff_t yindex, + ptrdiff_t zindex){ + //ptrdiff_t tindex = 3*rindex; + for (int cc=0; cc<3; cc++) + this->v[1]->rval(rindex,cc) = this->u->rval(rindex,cc)*this->u->rval(rindex,(cc+1)%3); + //this->v[1]->get_rdata()[tindex+cc] = this->u->get_rdata()[tindex+cc]*this->u->get_rdata()[tindex+(cc+1)%3]; + } + ); + //this->clean_up_real_space(this->rv[1], 3); + this->v[1]->dft(); + this->kk->template dealias<rnumber, THREE>(this->v[1]->get_cdata()); + this->kk->CLOOP_K2( + [&](ptrdiff_t cindex, + ptrdiff_t xindex, + ptrdiff_t yindex, + ptrdiff_t zindex, + double k2){ + if (k2 <= this->kk->kM2 && k2 > 0) + { + ptrdiff_t tindex = 3*cindex; + for (int i=0; i<2; i++) + { + pressure->get_cdata()[cindex][i] -= \ + 2*(this->kk->kx[xindex]*this->kk->ky[yindex]*this->v[1]->get_cdata()[tindex+0][i] + + this->kk->ky[yindex]*this->kk->kz[zindex]*this->v[1]->get_cdata()[tindex+1][i] + + this->kk->kz[zindex]*this->kk->kx[xindex]*this->v[1]->get_cdata()[tindex+2][i]); + pressure->get_cdata()[cindex][i] /= pressure->npoints*k2; + } + } + } + ); +} + +template <class rnumber, + field_backend be> +void vorticity_equation<rnumber, be>::compute_Lagrangian_acceleration( + field<rnumber, be, THREE> *acceleration) +{ + field<rnumber, be, ONE> *pressure = new field<rnumber, be, ONE>( + this->cvelocity->rlayout->sizes[2], + this->cvelocity->rlayout->sizes[1], + this->cvelocity->rlayout->sizes[0], + this->cvelocity->rlayout->comm, + this->cvelocity->fftw_plan_rigor); + this->compute_velocity(this->cvorticity); + this->cvelocity->ift(); + this->compute_pressure(pressure); + this->compute_velocity(this->cvorticity); + acceleration->real_space_representation = false; + this->kk->CLOOP_K2( + [&](ptrdiff_t cindex, + ptrdiff_t xindex, + ptrdiff_t yindex, + ptrdiff_t zindex, + double k2){ + if (k2 <= this->kk->kM2) + { + ptrdiff_t tindex = 3*cindex; + for (int cc=0; cc<3; cc++) + for (int i=0; i<2; i++) + acceleration->get_cdata()[tindex+cc][i] = \ + - this->nu*k2*this->cvelocity->get_cdata()[tindex+cc][i]; + if (strcmp(this->forcing_type, "linear") == 0) + { + double knorm = sqrt(k2); + if ((this->fk0 <= knorm) && + (this->fk1 >= knorm)) + for (int c=0; c<3; c++) + for (int i=0; i<2; i++) + acceleration->get_cdata()[tindex+c][i] += \ + this->famplitude*this->cvelocity->get_cdata()[tindex+c][i]; + } + acceleration->get_cdata()[tindex+0][0] += this->kk->kx[xindex]*pressure->get_cdata()[cindex][1]; + acceleration->get_cdata()[tindex+1][0] += this->kk->ky[yindex]*pressure->get_cdata()[cindex][1]; + acceleration->get_cdata()[tindex+2][0] += this->kk->kz[zindex]*pressure->get_cdata()[cindex][1]; + acceleration->get_cdata()[tindex+0][1] -= this->kk->kx[xindex]*pressure->get_cdata()[cindex][0]; + acceleration->get_cdata()[tindex+1][1] -= this->kk->ky[yindex]*pressure->get_cdata()[cindex][0]; + acceleration->get_cdata()[tindex+2][1] -= this->kk->kz[zindex]*pressure->get_cdata()[cindex][0]; + } + }); + delete pressure; +} + +template <class rnumber, + field_backend be> +void vorticity_equation<rnumber, be>::compute_Eulerian_acceleration( + field<rnumber, be, THREE> *acceleration) +{ + this->compute_velocity(this->cvorticity); + acceleration->real_space_representation = false; + /* put in linear terms */ + this->kk->CLOOP_K2( + [&](ptrdiff_t cindex, + ptrdiff_t xindex, + ptrdiff_t yindex, + ptrdiff_t zindex, + double k2){ + if (k2 <= this->kk->kM2) + { + ptrdiff_t tindex = 3*cindex; + for (int cc=0; cc<3; cc++) + for (int i=0; i<2; i++) + acceleration->get_cdata()[tindex+cc][i] = \ + - this->nu*k2*this->cvelocity->get_cdata()[tindex+cc][i]; + if (strcmp(this->forcing_type, "linear") == 0) + { + double knorm = sqrt(k2); + if ((this->fk0 <= knorm) && + (this->fk1 >= knorm)) + { + for (int c=0; c<3; c++) + for (int i=0; i<2; i++) + acceleration->get_cdata()[tindex+c][i] += \ + this->famplitude*this->cvelocity->get_cdata()[tindex+c][i]; + } + } + } + } + ); + this->cvelocity->ift(); + /* compute uu */ + /* 11 22 33 */ + this->v[1]->real_space_representation = true; + this->cvelocity->RLOOP ( + [&](ptrdiff_t rindex, + ptrdiff_t xindex, + ptrdiff_t yindex, + ptrdiff_t zindex){ + //ptrdiff_t tindex = 3*rindex; + for (int cc=0; cc<3; cc++) + this->v[1]->rval(rindex,cc) = \ + this->cvelocity->rval(rindex,cc)*this->cvelocity->rval(rindex,cc) / this->cvelocity->npoints; + //this->v[1]->get_rdata()[tindex+cc] = this->cvelocity->get_rdata()[tindex+cc]*this->cvelocity->get_rdata()[tindex+cc] / this->cvelocity->npoints; + } + ); + this->v[1]->dft(); + this->kk->template dealias<rnumber, THREE>(this->v[1]->get_cdata()); + this->kk->CLOOP_K2( + [&](ptrdiff_t cindex, + ptrdiff_t xindex, + ptrdiff_t yindex, + ptrdiff_t zindex, + double k2){ + if (k2 <= this->kk->kM2) + { + ptrdiff_t tindex = 3*cindex; + acceleration->get_cdata()[tindex+0][0] += + this->kk->kx[xindex]*this->v[1]->get_cdata()[tindex+0][1]; + acceleration->get_cdata()[tindex+0][1] += + -this->kk->kx[xindex]*this->v[1]->get_cdata()[tindex+0][0]; + acceleration->get_cdata()[tindex+1][0] += + this->kk->ky[yindex]*this->v[1]->get_cdata()[tindex+1][1]; + acceleration->get_cdata()[tindex+1][1] += + -this->kk->ky[yindex]*this->v[1]->get_cdata()[tindex+1][0]; + acceleration->get_cdata()[tindex+2][0] += + this->kk->kz[zindex]*this->v[1]->get_cdata()[tindex+2][1]; + acceleration->get_cdata()[tindex+2][1] += + -this->kk->kz[zindex]*this->v[1]->get_cdata()[tindex+2][0]; + } + } + ); + /* 12 23 31 */ + this->v[1]->real_space_representation = true; + this->cvelocity->RLOOP ( + [&](ptrdiff_t rindex, + ptrdiff_t xindex, + ptrdiff_t yindex, + ptrdiff_t zindex){ + //ptrdiff_t tindex = 3*rindex; + for (int cc=0; cc<3; cc++) + this->v[1]->rval(rindex,cc) = \ + this->cvelocity->rval(rindex,cc)*this->cvelocity->rval(rindex,(cc+1)%3) / this->cvelocity->npoints; + //this->v[1]->get_rdata()[tindex+cc] = this->cvelocity->get_rdata()[tindex+cc]*this->cvelocity->get_rdata()[tindex+(cc+1)%3] / this->cvelocity->npoints; + } + ); + this->v[1]->dft(); + this->kk->template dealias<rnumber, THREE>(this->v[1]->get_cdata()); + this->kk->CLOOP_K2( + [&](ptrdiff_t cindex, + ptrdiff_t xindex, + ptrdiff_t yindex, + ptrdiff_t zindex, + double k2){ + if (k2 <= this->kk->kM2) + { + ptrdiff_t tindex = 3*cindex; + acceleration->get_cdata()[tindex+0][0] += + (this->kk->ky[yindex]*this->v[1]->get_cdata()[tindex+0][1] + + this->kk->kz[zindex]*this->v[1]->get_cdata()[tindex+2][1]); + acceleration->get_cdata()[tindex+0][1] += + - (this->kk->ky[yindex]*this->v[1]->get_cdata()[tindex+0][0] + + this->kk->kz[zindex]*this->v[1]->get_cdata()[tindex+2][0]); + acceleration->get_cdata()[tindex+1][0] += + (this->kk->kz[zindex]*this->v[1]->get_cdata()[tindex+1][1] + + this->kk->kx[xindex]*this->v[1]->get_cdata()[tindex+0][1]); + acceleration->get_cdata()[tindex+1][1] += + - (this->kk->kz[zindex]*this->v[1]->get_cdata()[tindex+1][0] + + this->kk->kx[xindex]*this->v[1]->get_cdata()[tindex+0][0]); + acceleration->get_cdata()[tindex+2][0] += + (this->kk->kx[xindex]*this->v[1]->get_cdata()[tindex+2][1] + + this->kk->ky[yindex]*this->v[1]->get_cdata()[tindex+1][1]); + acceleration->get_cdata()[tindex+2][1] += + - (this->kk->kx[xindex]*this->v[1]->get_cdata()[tindex+2][0] + + this->kk->ky[yindex]*this->v[1]->get_cdata()[tindex+1][0]); + } + } + ); + if (this->kk->layout->myrank == this->kk->layout->rank[0][0]) + std::fill_n((rnumber*)(acceleration->get_cdata()), 6, 0.0); + this->kk->template force_divfree<rnumber>(acceleration->get_cdata()); +} + + +/*****************************************************************************/ + + + + +/*****************************************************************************/ +/* finally, force generation of code for single precision */ +template class vorticity_equation<float, FFTW>; +template class vorticity_equation<double, FFTW>; +/*****************************************************************************/ + diff --git a/bfps/cpp/vorticity_equation.hpp b/bfps/cpp/vorticity_equation.hpp new file mode 100644 index 0000000000000000000000000000000000000000..0831362d96fc842082b6a0fb26b1486fe0bdb4e0 --- /dev/null +++ b/bfps/cpp/vorticity_equation.hpp @@ -0,0 +1,107 @@ +/********************************************************************** +* * +* Copyright 2015 Max Planck Institute * +* for Dynamics and Self-Organization * +* * +* This file is part of bfps. * +* * +* bfps is free software: you can redistribute it and/or modify * +* it under the terms of the GNU General Public License as published * +* by the Free Software Foundation, either version 3 of the License, * +* or (at your option) any later version. * +* * +* bfps is distributed in the hope that it will be useful, * +* but WITHOUT ANY WARRANTY; without even the implied warranty of * +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * +* GNU General Public License for more details. * +* * +* You should have received a copy of the GNU General Public License * +* along with bfps. If not, see <http://www.gnu.org/licenses/> * +* * +* Contact: Cristian.Lalescu@ds.mpg.de * +* * +**********************************************************************/ + +#include <stdio.h> +#include <stdlib.h> +#include <iostream> + +#include "field.hpp" +#include "field_descriptor.hpp" + +#ifndef VORTICITY_EQUATION + +#define VORTICITY_EQUATION + +extern int myrank, nprocs; + + +/* container for field descriptor, fields themselves, parameters, etc + * This particular class is only meant as a stepping stone to a proper solver + * that only uses the field class (and related layout and kspace classes), and + * HDF5 for I/O. + * */ + +template <typename rnumber, + field_backend be> +class vorticity_equation +{ + public: + /* name */ + char name[256]; + + /* iteration */ + int iteration; + + /* fields */ + field<rnumber, be, THREE> *cvorticity, *cvelocity; + field<rnumber, be, THREE> *rvorticity, *rvelocity; + kspace<be, SMOOTH> *kk; + + + /* short names for velocity, and 4 vorticity fields */ + field<rnumber, be, THREE> *u, *v[4]; + + /* physical parameters */ + double nu; + int fmode; // for Kolmogorov flow + double famplitude; // both for Kflow and band forcing + double fk0, fk1; // for band forcing + char forcing_type[128]; + + /* constructor, destructor */ + vorticity_equation( + const char *NAME, + int nx, + int ny, + int nz, + double DKX = 1.0, + double DKY = 1.0, + double DKZ = 1.0, + unsigned FFTW_PLAN_RIGOR = FFTW_MEASURE); + ~vorticity_equation(void); + + /* solver essential methods */ + void omega_nonlin(int src); + void step(double dt); + void impose_zero_modes(void); + void add_forcing(field<rnumber, be, THREE> *dst, + field<rnumber, be, THREE> *src_vorticity, + rnumber factor); + void compute_vorticity(void); + void compute_velocity(field<rnumber, be, THREE> *vorticity); + + /* binary I/O stuff */ + inline void fill_up_filename(const char *base_name, char *full_name) + { + sprintf(full_name, "%s_%s_i%.5x.h5", this->name, base_name, this->iteration); + } + + /* statistics and general postprocessing */ + void compute_pressure(field<rnumber, be, ONE> *pressure); + void compute_Eulerian_acceleration(field<rnumber, be, THREE> *acceleration); + void compute_Lagrangian_acceleration(field<rnumber, be, THREE> *acceleration); +}; + +#endif//VORTICITY_EQUATION + diff --git a/setup.py b/setup.py index 2a745190571df95c5e2d0b89138ebfe3b32c0469..8d4ae0f9dfb554229c6b7293e596646501a58bc7 100644 --- a/setup.py +++ b/setup.py @@ -88,7 +88,10 @@ print('This is bfps version ' + VERSION) ### lists of files and MANIFEST.in -src_file_list = ['field', +src_file_list = ['vorticity_equation', + 'field', + 'kspace', + 'field_layout', 'field_descriptor', 'rFFTW_distributed_particles', 'distributed_particles', diff --git a/tests/test_field_class.py b/tests/test_field_class.py index fc52f419a5ab2dd7a5231676c41b9d586d497080..110d9be685ef42d4ed231a3a3c723ac34e3d916d 100644 --- a/tests/test_field_class.py +++ b/tests/test_field_class.py @@ -32,32 +32,37 @@ class TestField(_fluid_particle_base): self.fluid_includes += '#include "fftw_tools.hpp"\n' self.fluid_includes += '#include "field.hpp"\n' self.fluid_variables += ('field<' + self.C_dtype + ', FFTW, ONE> *f;\n' + + 'field<' + self.C_dtype + ', FFTW, THREE> *v;\n' + 'kspace<FFTW, SMOOTH> *kk;\n') self.fluid_start += """ //begincpp f = new field<{0}, FFTW, ONE>( nx, ny, nz, MPI_COMM_WORLD); + v = new field<{0}, FFTW, THREE>( + nx, ny, nz, MPI_COMM_WORLD); kk = new kspace<FFTW, SMOOTH>( f->clayout, 1., 1., 1.); // read rdata - f->io("field.h5", "rdata", 0, true); + f->real_space_representation = true; + f->io("field.h5", "scal", 0, true); // go to fourier space, write into cdata_tmp f->dft(); - f->io("field.h5", "cdata_tmp", 0, false); + f->io("field.h5", "scal_tmp", 0, false); f->ift(); - f->io("field.h5", "rdata", 0, false); - f->io("field.h5", "cdata", 0, true); + f->io("field.h5", "scal", 0, false); + f->real_space_representation = false; + f->io("field.h5", "scal", 0, true); hid_t gg; if (f->myrank == 0) gg = H5Fopen("field.h5", H5F_ACC_RDWR, H5P_DEFAULT); kk->cospectrum<float, ONE>( - f->get_rdata(), - f->get_rdata(), + f->get_cdata(), + f->get_cdata(), gg, "scal", 0); f->ift(); - f->io("field.h5", "rdata_tmp", 0, false); + f->io("field.h5", "scal_tmp", 0, false); std::vector<double> me; me.resize(1); me[0] = 30; @@ -66,11 +71,15 @@ class TestField(_fluid_particle_base): 0, me); if (f->myrank == 0) H5Fclose(gg); + v->real_space_representation = false; + v->io("field.h5", "vec", 0, true); + v->io("field.h5", "vec_tmp", 0, false); //endcpp """.format(self.C_dtype) self.fluid_end += """ //begincpp delete f; + delete v; //endcpp """ return None @@ -92,7 +101,7 @@ class TestField(_fluid_particle_base): return None def main(): - n = 128 + n = 32 kdata = pyfftw.n_byte_align_empty( (n, n, n//2 + 1), pyfftw.simd_alignment, @@ -116,10 +125,10 @@ def main(): tf.parameters['ny'] = n tf.parameters['nz'] = n f = h5py.File('field.h5', 'w') - f['cdata'] = cdata.reshape((1,) + cdata.shape) - f['cdata_tmp'] = np.zeros(shape=(1,) + cdata.shape).astype(cdata.dtype) - f['rdata'] = rdata.reshape((1,) + rdata.shape) - f['rdata_tmp'] = np.zeros(shape=(1,) + rdata.shape).astype(rdata.dtype) + f['scal/complex/0'] = cdata + f['scal/real/0'] = rdata + f['vec/complex/0'] = np.array([cdata, cdata, cdata]).reshape(cdata.shape + (3,)) + f['vec/real/0'] = np.array([rdata, rdata, rdata]).reshape(rdata.shape + (3,)) f['moments/scal'] = np.zeros(shape = (1, 10)).astype(np.float) f['histograms/scal'] = np.zeros(shape = (1, 64)).astype(np.float) kspace = tf.get_kspace() @@ -133,35 +142,60 @@ def main(): '--ncpu', '2']) f = h5py.File('field.h5', 'r') - err0 = np.max(np.abs(f['rdata_tmp'][0] - rdata)) / np.mean(np.abs(rdata)) - err1 = np.max(np.abs(f['rdata'][0]/(n**3) - rdata)) / np.mean(np.abs(rdata)) - err2 = np.max(np.abs(f['cdata_tmp'][0]/(n**3) - cdata)) / np.mean(np.abs(cdata)) - print(err0, err1, err2) - assert(err0 < 1e-5) - assert(err1 < 1e-5) - assert(err2 < 1e-4) - ### compare - #fig = plt.figure(figsize=(12, 6)) - #a = fig.add_subplot(121) - #a.set_axis_off() - #a.imshow(rdata[0, :, :], interpolation = 'none') - #a = fig.add_subplot(122) - #a.set_axis_off() - #a.imshow(f['rdata_tmp'][0, 0, :, :], interpolation = 'none') + #err0 = np.max(np.abs(f['scal_tmp/real/0'].value - rdata)) / np.mean(np.abs(rdata)) + #err1 = np.max(np.abs(f['scal/real/0'].value/(n**3) - rdata)) / np.mean(np.abs(rdata)) + #err2 = np.max(np.abs(f['scal_tmp/complex/0'].value/(n**3) - cdata)) / np.mean(np.abs(cdata)) + #print(err0, err1, err2) + #assert(err0 < 1e-5) + #assert(err1 < 1e-5) + #assert(err2 < 1e-4) + ## compare + fig = plt.figure(figsize=(18, 6)) + a = fig.add_subplot(131) + a.set_axis_off() + v0 = f['vec/complex/0'][:, :, 0, 0] + v1 = f['vec_tmp/complex/0'][:, :, 0, 0] + a.imshow(np.log(np.abs(v0 - v1)), + interpolation = 'none') + a = fig.add_subplot(132) + a.set_axis_off() + a.imshow(np.log(np.abs(v0)), + interpolation = 'none') + a = fig.add_subplot(133) + a.set_axis_off() + a.imshow(np.log(np.abs(v1)), + interpolation = 'none') + fig.tight_layout() + fig.savefig('tst_fields.pdf') + fig = plt.figure(figsize=(18, 6)) + a = fig.add_subplot(131) + a.set_axis_off() + v0 = f['scal/complex/0'][:, :, 0] + v1 = f['scal_tmp/complex/0'][:, :, 0] + a.imshow(np.log(np.abs(v0 - v1)), + interpolation = 'none') + a = fig.add_subplot(132) + a.set_axis_off() + a.imshow(np.log(np.abs(v0)), + interpolation = 'none') + a = fig.add_subplot(133) + a.set_axis_off() + a.imshow(np.log(np.abs(v1)), + interpolation = 'none') + fig.tight_layout() + fig.savefig('tst_sfields.pdf') + # look at moments and histogram + #print('moments are ', f['moments/scal'][0]) + #fig = plt.figure(figsize=(6,6)) + #a = fig.add_subplot(211) + #a.plot(f['histograms/scal'][0]) + #a.set_yscale('log') + #a = fig.add_subplot(212) + #a.plot(f['spectra/scal'][0]) + #a.set_xscale('log') + #a.set_yscale('log') #fig.tight_layout() #fig.savefig('tst.pdf') - # look at moments and histogram - print('moments are ', f['moments/scal'][0]) - fig = plt.figure(figsize=(6,6)) - a = fig.add_subplot(211) - a.plot(f['histograms/scal'][0]) - a.set_yscale('log') - a = fig.add_subplot(212) - a.plot(f['spectra/scal'][0]) - a.set_xscale('log') - a.set_yscale('log') - fig.tight_layout() - fig.savefig('tst.pdf') return None if __name__ == '__main__': diff --git a/tests/test_vorticity_equation.py b/tests/test_vorticity_equation.py new file mode 100644 index 0000000000000000000000000000000000000000..ec50531df29e82c1ff767ab3d292bef0aac66c4c --- /dev/null +++ b/tests/test_vorticity_equation.py @@ -0,0 +1,101 @@ +####################################################################### +# # +# Copyright 2015 Max Planck Institute # +# for Dynamics and Self-Organization # +# # +# This file is part of bfps. # +# # +# bfps is free software: you can redistribute it and/or modify # +# it under the terms of the GNU General Public License as published # +# by the Free Software Foundation, either version 3 of the License, # +# or (at your option) any later version. # +# # +# bfps is distributed in the hope that it will be useful, # +# but WITHOUT ANY WARRANTY; without even the implied warranty of # +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # +# GNU General Public License for more details. # +# # +# You should have received a copy of the GNU General Public License # +# along with bfps. If not, see <http://www.gnu.org/licenses/> # +# # +# Contact: Cristian.Lalescu@ds.mpg.de # +# # +####################################################################### + + + +import sys +import os +import numpy as np +import h5py +import argparse + +import bfps +import bfps.tools + +from bfps_addons import NSReader +import matplotlib.pyplot as plt + +def main(): + c = bfps.NavierStokes() + c.launch( + ['-n', '72', + '--simname', 'fluid_solver', + '--ncpu', '4', + '--niter_todo', '256', + '--niter_out', '256', + '--niter_stat', '1', + '--wd', './'] + + sys.argv[1:]) + data = c.read_cfield(iteration = 0) + f = h5py.File('vorticity_equation_cvorticity_i00000.h5', 'w') + f['vorticity/complex/0'] = data + f.close() + c = bfps.NSVorticityEquation() + c.launch( + ['-n', '72', + '--simname', 'vorticity_equation', + '--ncpu', '4', + '--niter_todo', '256', + '--niter_out', '256', + '--niter_stat', '1', + '--wd', './'] + + sys.argv[1:]) + c0 = NSReader(simname = 'fluid_solver') + c1 = NSReader(simname = 'vorticity_equation') + df0 = c0.get_data_file() + df1 = c1.get_data_file() + f = plt.figure(figsize=(6,10)) + a = f.add_subplot(211) + a.plot(df0['statistics/moments/vorticity'][:, 2, 3], + color = 'blue', + marker = '.') + a.plot(df1['statistics/moments/vorticity'][:, 2, 3], + color = 'red', + marker = '.') + a = f.add_subplot(212) + a.plot(df0['statistics/moments/velocity'][:, 2, 3], + color = 'blue', + marker = '.') + a.plot(df1['statistics/moments/velocity'][:, 2, 3], + color = 'red', + marker = '.') + f.tight_layout() + f.savefig('figs/moments.pdf') + f = plt.figure(figsize = (6, 10)) + a = f.add_subplot(111) + a.plot(c0.statistics['enstrophy(t, k)'][0]) + a.plot(c1.statistics['enstrophy(t, k)'][0]) + a.set_yscale('log') + f.tight_layout() + f.savefig('figs/spectra.pdf') + f = h5py.File('vorticity_equation_cvorticity_i00000.h5', 'r') + #print(c0.statistics['enstrophy(t, k)'][0]) + #print(c1.statistics['enstrophy(t, k)'][0]) + c0.do_plots() + c1.do_plots() + return None + +if __name__ == '__main__': + main() +