diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml new file mode 100644 index 0000000000000000000000000000000000000000..86587ca1087933e9337366fe39092be2197b6377 --- /dev/null +++ b/.gitlab-ci.yml @@ -0,0 +1,10 @@ + +simple_test: + script: + - source tests/ci-scripts/test.sh + tags: + - fftw3 + - hdf5 + - mpi + - python3 + diff --git a/AUTHORS b/AUTHORS new file mode 100644 index 0000000000000000000000000000000000000000..921ffeda512e71d1a70c2797e5c676f80967aede --- /dev/null +++ b/AUTHORS @@ -0,0 +1,6 @@ +All people who contributed to bfps, in order of the date of their first +contribution. + +Cristian C Lalescu <Cristian.Lalescu@ds.mpg.de> +Dimitar Vlaykov +Berenger Bramas diff --git a/README.rst b/README.rst index 0379bc61d93b1a88baaf8c0d757c0092dbb6361a..ddb9f2447db919248100368a9a08b13297d5e3a4 100644 --- a/README.rst +++ b/README.rst @@ -12,11 +12,14 @@ Parameters and statistics are stored in HDF5 format, together with code information, so simulation data should be "future proof" --- suggestions of possible improvements to the current approach are always welcome. +The primary aim of bfps is to reduce the time spent on setting up and +baby sitting DNS, as well as simplify the analysis of the generated +data. The wish is that this Python package provides an easy and general way of constructing efficient specialized DNS C++ codes for different turbulence problems encountered in research. At the same time, the package should provide a unified way of -postprocessing data, and accessing the postprocessing results. +postprocessing, and accessing the postprocessing results. The code therefore consists of two main parts: the pure C++ code, a set of loosely related "building blocks", and the Python code, which can generate C++ code using the pure classes, but with a significant degree @@ -34,10 +37,10 @@ the user's machine, or submitted to a queue on a cluster. Installation ------------ -So far, the code has been run on an ubuntu 14.04 machine, an opensuse -13.2 desktop, and a reasonably standard linux cluster (biggest run so -far was 1344^3 on 16 nodes of 12 cores each, with about 24 seconds per -time step). +So far, the code has been run on laptops, desktops, and a couple of +clusters (biggest run so far was 1536^3 on 16 nodes of 32 cores each, +with about 11 seconds per time step, for a simple incompressible +Navier-Stokes problem). Postprocessing data may not be very computationally intensive, depending on the amount of data involved. @@ -55,21 +58,21 @@ Use a console; navigate to the ``bfps`` folder, and type: **Full installation** If you want to run simulations on the machine where you're installing, -you will need to call `build` before installing. +you will need to call `compile_library` before installing. Your machine needs to have an MPI compiler installed, the HDF5 C library and FFTW >= 3.4. The file `machine_settings_py.py` should be modified -appropriately for your machine (otherwise the `build` command will most +appropriately for your machine (otherwise the `compile_library` command will most likely fail). This file will be copied the first time you run `setup.py` into -`$HOME/.config/bfps/machine_settings.py`, where it will be imported from -afterwards. -You may, obviously, edit it afterwards and rerun the build command as +`$HOME/.config/bfps/machine_settings.py`, **where it will be imported from +afterwards** --- any future edits **must** be made to the new file. +You may, obviously, edit it afterwards and rerun the `compile_library` command as needed. .. code:: bash - python setup.py build + python setup.py compile_library python setup.py install ------------- @@ -99,9 +102,7 @@ Comments * particles: initialization of multistep solvers is done with lower order methods, so direct convergence tests will fail. -* Code is used mainly with Python 3.4, but Python 2.7 - compatibility should be kept since mayavi (well, vtk actually) only - works on Python 2. - Until vtk becomes compatible with Python 3.x, any Python 2.7 - incompatibilites can be reported as bugs. +* Code is used mainly with Python 3.4 and 3.5. + In principle it should be easy to maintain compatibility with Python + 2.7.x, but as of `bfps 1.8` this is no longer a main concern. diff --git a/bfps/DNS.py b/bfps/DNS.py new file mode 100644 index 0000000000000000000000000000000000000000..4f26b86c5d4739e1bb3989f2e4a7d9a70ad3f009 --- /dev/null +++ b/bfps/DNS.py @@ -0,0 +1,930 @@ +####################################################################### +# # +# Copyright 2015 Max Planck Institute # +# for Dynamics and Self-Organization # +# # +# This file is part of bfps. # +# # +# bfps is free software: you can redistribute it and/or modify # +# it under the terms of the GNU General Public License as published # +# by the Free Software Foundation, either version 3 of the License, # +# or (at your option) any later version. # +# # +# bfps is distributed in the hope that it will be useful, # +# but WITHOUT ANY WARRANTY; without even the implied warranty of # +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # +# GNU General Public License for more details. # +# # +# You should have received a copy of the GNU General Public License # +# along with bfps. If not, see <http://www.gnu.org/licenses/> # +# # +# Contact: Cristian.Lalescu@ds.mpg.de # +# # +####################################################################### + + + +import os +import sys +import shutil +import subprocess +import argparse +import h5py +import math +import numpy as np +import warnings + +import bfps +from ._code import _code +from bfps import tools + +class DNS(_code): + """This class is meant to stitch together the C++ code into a final source file, + compile it, and handle all job launching. + """ + def __init__( + self, + work_dir = './', + simname = 'test'): + _code.__init__( + self, + work_dir = work_dir, + simname = simname) + self.host_info = {'type' : 'cluster', + 'environment' : None, + 'deltanprocs' : 1, + 'queue' : '', + 'mail_address': '', + 'mail_events' : None} + self.generate_default_parameters() + return None + def set_precision( + self, + fluid_dtype): + if fluid_dtype in [np.float32, np.float64]: + self.fluid_dtype = fluid_dtype + elif fluid_dtype in ['single', 'double']: + if fluid_dtype == 'single': + self.fluid_dtype = np.dtype(np.float32) + elif fluid_dtype == 'double': + self.fluid_dtype = np.dtype(np.float64) + self.rtype = self.fluid_dtype + if self.rtype == np.float32: + self.ctype = np.dtype(np.complex64) + self.C_field_dtype = 'float' + self.fluid_precision = 'single' + elif self.rtype == np.float64: + self.ctype = np.dtype(np.complex128) + self.C_field_dtype = 'double' + self.fluid_precision = 'double' + return None + def write_src(self): + self.version_message = ( + '/***********************************************************************\n' + + '* this code automatically generated by bfps\n' + + '* version {0}\n'.format(bfps.__version__) + + '***********************************************************************/\n\n\n') + self.include_list = [ + '"base.hpp"', + '"scope_timer.hpp"', + '"fftw_interface.hpp"', + '"full_code/main_code.hpp"', + '<cmath>', + '<iostream>', + '<hdf5.h>', + '<string>', + '<cstring>', + '<fftw3-mpi.h>', + '<omp.h>', + '<cfenv>', + '<cstdlib>', + '"full_code/{0}.hpp"\n'.format(self.dns_type)] + self.main = """ + int main(int argc, char *argv[]) + {{ + bool fpe = ( + (getenv("BFPS_FPE_OFF") == nullptr) || + (getenv("BFPS_FPE_OFF") != std::string("TRUE"))); + return main_code< {0} >(argc, argv, fpe); + }} + """.format(self.dns_type + '<{0}>'.format(self.C_field_dtype)) + self.includes = '\n'.join( + ['#include ' + hh + for hh in self.include_list]) + with open(self.name + '.cpp', 'w') as outfile: + outfile.write(self.version_message + '\n\n') + outfile.write(self.includes + '\n\n') + outfile.write( + self.cread_pars( + template_class = '{0}<rnumber>::'.format(self.dns_type), + template_prefix = 'template <typename rnumber> ', + simname_variable = 'this->simname.c_str()', + prepend_this = True) + + '\n\n') + for rnumber in ['float', 'double']: + outfile.write(self.cread_pars( + template_class = '{0}<{1}>::'.format(self.dns_type, rnumber), + template_prefix = 'template '.format(rnumber), + just_declaration = True) + '\n\n') + if self.dns_type in ['NSVEparticles', 'NSVE_no_output', 'NSVEparticles_no_output']: + outfile.write('template <typename rnumber> int NSVE<rnumber>::read_parameters(){return EXIT_SUCCESS;}\n') + outfile.write('template int NSVE<float>::read_parameters();\n') + outfile.write('template int NSVE<double>::read_parameters();\n\n') + if self.dns_type in ['NSVEparticles_no_output']: + outfile.write('template <typename rnumber> int NSVEparticles<rnumber>::read_parameters(){return EXIT_SUCCESS;}\n') + outfile.write('template int NSVEparticles<float>::read_parameters();\n') + outfile.write('template int NSVEparticles<double>::read_parameters();\n\n') + outfile.write(self.main + '\n') + return None + def generate_default_parameters(self): + # these parameters are relevant for all DNS classes + self.parameters['dealias_type'] = int(1) + self.parameters['dkx'] = float(1.0) + self.parameters['dky'] = float(1.0) + self.parameters['dkz'] = float(1.0) + self.parameters['niter_todo'] = int(8) + self.parameters['niter_stat'] = int(1) + self.parameters['niter_out'] = int(8) + self.parameters['checkpoints_per_file'] = int(1) + self.parameters['dt'] = float(0.01) + self.parameters['nu'] = float(0.1) + self.parameters['fmode'] = int(1) + self.parameters['famplitude'] = float(0.5) + self.parameters['fk0'] = float(2.0) + self.parameters['fk1'] = float(4.0) + self.parameters['forcing_type'] = 'linear' + self.parameters['histogram_bins'] = int(256) + self.parameters['max_velocity_estimate'] = float(1) + self.parameters['max_vorticity_estimate'] = float(1) + # parameters specific to particle version + self.NSVEp_extra_parameters = {} + self.NSVEp_extra_parameters['niter_part'] = int(1) + self.NSVEp_extra_parameters['nparticles'] = int(10) + self.NSVEp_extra_parameters['tracers0_integration_steps'] = int(4) + self.NSVEp_extra_parameters['tracers0_neighbours'] = int(1) + self.NSVEp_extra_parameters['tracers0_smoothness'] = int(1) + return None + def get_kspace(self): + kspace = {} + if self.parameters['dealias_type'] == 1: + kMx = self.parameters['dkx']*(self.parameters['nx']//2 - 1) + kMy = self.parameters['dky']*(self.parameters['ny']//2 - 1) + kMz = self.parameters['dkz']*(self.parameters['nz']//2 - 1) + else: + kMx = self.parameters['dkx']*(self.parameters['nx']//3 - 1) + kMy = self.parameters['dky']*(self.parameters['ny']//3 - 1) + kMz = self.parameters['dkz']*(self.parameters['nz']//3 - 1) + kspace['kM'] = max(kMx, kMy, kMz) + kspace['dk'] = min(self.parameters['dkx'], + self.parameters['dky'], + self.parameters['dkz']) + nshells = int(kspace['kM'] / kspace['dk']) + 2 + kspace['nshell'] = np.zeros(nshells, dtype = np.int64) + kspace['kshell'] = np.zeros(nshells, dtype = np.float64) + kspace['kx'] = np.arange( 0, + self.parameters['nx']//2 + 1).astype(np.float64)*self.parameters['dkx'] + kspace['ky'] = np.arange(-self.parameters['ny']//2 + 1, + self.parameters['ny']//2 + 1).astype(np.float64)*self.parameters['dky'] + kspace['ky'] = np.roll(kspace['ky'], self.parameters['ny']//2+1) + kspace['kz'] = np.arange(-self.parameters['nz']//2 + 1, + self.parameters['nz']//2 + 1).astype(np.float64)*self.parameters['dkz'] + kspace['kz'] = np.roll(kspace['kz'], self.parameters['nz']//2+1) + return kspace + def get_data_file_name(self): + return os.path.join(self.work_dir, self.simname + '.h5') + def get_data_file(self): + return h5py.File(self.get_data_file_name(), 'r') + def get_particle_file_name(self): + return os.path.join(self.work_dir, self.simname + '_particles.h5') + def get_particle_file(self): + return h5py.File(self.get_particle_file_name(), 'r') + def get_postprocess_file_name(self): + return os.path.join(self.work_dir, self.simname + '_postprocess.h5') + def get_postprocess_file(self): + return h5py.File(self.get_postprocess_file_name(), 'r') + def compute_statistics(self, iter0 = 0, iter1 = None): + """Run basic postprocessing on raw data. + The energy spectrum :math:`E(t, k)` and the enstrophy spectrum + :math:`\\frac{1}{2}\omega^2(t, k)` are computed from the + + .. math:: + + \sum_{k \\leq \\|\\mathbf{k}\\| \\leq k+dk}\\hat{u_i} \\hat{u_j}^*, \\hskip .5cm + \sum_{k \\leq \\|\\mathbf{k}\\| \\leq k+dk}\\hat{\omega_i} \\hat{\\omega_j}^* + + tensors, and the enstrophy spectrum is also used to + compute the dissipation :math:`\\varepsilon(t)`. + These basic quantities are stored in a newly created HDF5 file, + ``simname_postprocess.h5``. + """ + if len(list(self.statistics.keys())) > 0: + return None + self.read_parameters() + with self.get_data_file() as data_file: + if 'moments' not in data_file['statistics'].keys(): + return None + iter0 = min((data_file['statistics/moments/velocity'].shape[0] * + self.parameters['niter_stat']-1), + iter0) + if type(iter1) == type(None): + iter1 = data_file['iteration'].value + else: + iter1 = min(data_file['iteration'].value, iter1) + ii0 = iter0 // self.parameters['niter_stat'] + ii1 = iter1 // self.parameters['niter_stat'] + self.statistics['kshell'] = data_file['kspace/kshell'].value + self.statistics['kM'] = data_file['kspace/kM'].value + self.statistics['dk'] = data_file['kspace/dk'].value + computation_needed = True + pp_file = h5py.File(self.get_postprocess_file_name(), 'a') + if 'ii0' in pp_file.keys(): + computation_needed = not (ii0 == pp_file['ii0'].value and + ii1 == pp_file['ii1'].value) + if computation_needed: + for k in pp_file.keys(): + del pp_file[k] + if computation_needed: + pp_file['iter0'] = iter0 + pp_file['iter1'] = iter1 + pp_file['ii0'] = ii0 + pp_file['ii1'] = ii1 + pp_file['t'] = (self.parameters['dt']* + self.parameters['niter_stat']* + (np.arange(ii0, ii1+1).astype(np.float))) + pp_file['energy(t, k)'] = ( + data_file['statistics/spectra/velocity_velocity'][ii0:ii1+1, :, 0, 0] + + data_file['statistics/spectra/velocity_velocity'][ii0:ii1+1, :, 1, 1] + + data_file['statistics/spectra/velocity_velocity'][ii0:ii1+1, :, 2, 2])/2 + pp_file['enstrophy(t, k)'] = ( + data_file['statistics/spectra/vorticity_vorticity'][ii0:ii1+1, :, 0, 0] + + data_file['statistics/spectra/vorticity_vorticity'][ii0:ii1+1, :, 1, 1] + + data_file['statistics/spectra/vorticity_vorticity'][ii0:ii1+1, :, 2, 2])/2 + pp_file['vel_max(t)'] = data_file['statistics/moments/velocity'] [ii0:ii1+1, 9, 3] + pp_file['renergy(t)'] = data_file['statistics/moments/velocity'][ii0:ii1+1, 2, 3]/2 + for k in ['t', + 'energy(t, k)', + 'enstrophy(t, k)', + 'vel_max(t)', + 'renergy(t)']: + if k in pp_file.keys(): + self.statistics[k] = pp_file[k].value + self.compute_time_averages() + return None + def compute_time_averages(self): + """Compute easy stats. + + Further computation of statistics based on the contents of + ``simname_postprocess.h5``. + Standard quantities are as follows + (consistent with [Ishihara]_): + + .. math:: + + U_{\\textrm{int}}(t) = \\sqrt{\\frac{2E(t)}{3}}, \\hskip .5cm + L_{\\textrm{int}}(t) = \\frac{\pi}{2U_{int}^2(t)} \\int \\frac{dk}{k} E(t, k), \\hskip .5cm + T_{\\textrm{int}}(t) = + \\frac{L_{\\textrm{int}}(t)}{U_{\\textrm{int}}(t)} + + \\eta_K = \\left(\\frac{\\nu^3}{\\varepsilon}\\right)^{1/4}, \\hskip .5cm + \\tau_K = \\left(\\frac{\\nu}{\\varepsilon}\\right)^{1/2}, \\hskip .5cm + \\lambda = \\sqrt{\\frac{15 \\nu U_{\\textrm{int}}^2}{\\varepsilon}} + + Re = \\frac{U_{\\textrm{int}} L_{\\textrm{int}}}{\\nu}, \\hskip + .5cm + R_{\\lambda} = \\frac{U_{\\textrm{int}} \\lambda}{\\nu} + + .. [Ishihara] T. Ishihara et al, + *Small-scale statistics in high-resolution direct numerical + simulation of turbulence: Reynolds number dependence of + one-point velocity gradient statistics*. + J. Fluid Mech., + **592**, 335-366, 2007 + """ + for key in ['energy', 'enstrophy']: + self.statistics[key + '(t)'] = (self.statistics['dk'] * + np.sum(self.statistics[key + '(t, k)'], axis = 1)) + self.statistics['Uint(t)'] = np.sqrt(2*self.statistics['energy(t)'] / 3) + self.statistics['Lint(t)'] = ((self.statistics['dk']*np.pi / + (2*self.statistics['Uint(t)']**2)) * + np.nansum(self.statistics['energy(t, k)'] / + self.statistics['kshell'][None, :], axis = 1)) + for key in ['energy', + 'enstrophy', + 'vel_max', + 'Uint', + 'Lint']: + if key + '(t)' in self.statistics.keys(): + self.statistics[key] = np.average(self.statistics[key + '(t)'], axis = 0) + for suffix in ['', '(t)']: + self.statistics['diss' + suffix] = (self.parameters['nu'] * + self.statistics['enstrophy' + suffix]*2) + self.statistics['etaK' + suffix] = (self.parameters['nu']**3 / + self.statistics['diss' + suffix])**.25 + self.statistics['tauK' + suffix] = (self.parameters['nu'] / + self.statistics['diss' + suffix])**.5 + self.statistics['Re' + suffix] = (self.statistics['Uint' + suffix] * + self.statistics['Lint' + suffix] / + self.parameters['nu']) + self.statistics['lambda' + suffix] = (15 * self.parameters['nu'] * + self.statistics['Uint' + suffix]**2 / + self.statistics['diss' + suffix])**.5 + self.statistics['Rlambda' + suffix] = (self.statistics['Uint' + suffix] * + self.statistics['lambda' + suffix] / + self.parameters['nu']) + self.statistics['kMeta' + suffix] = (self.statistics['kM'] * + self.statistics['etaK' + suffix]) + if self.parameters['dealias_type'] == 1: + self.statistics['kMeta' + suffix] *= 0.8 + self.statistics['Tint'] = self.statistics['Lint'] / self.statistics['Uint'] + self.statistics['Taylor_microscale'] = self.statistics['lambda'] + return None + def set_plt_style( + self, + style = {'dashes' : (None, None)}): + self.style.update(style) + return None + def convert_complex_from_binary( + self, + field_name = 'vorticity', + iteration = 0, + file_name = None): + """read the Fourier representation of a vector field. + + Read the binary file containing iteration ``iteration`` of the + field ``field_name``, and write it in a ``.h5`` file. + """ + data = np.memmap( + os.path.join(self.work_dir, + self.simname + '_{0}_i{1:0>5x}'.format('c' + field_name, iteration)), + dtype = self.ctype, + mode = 'r', + shape = (self.parameters['ny'], + self.parameters['nz'], + self.parameters['nx']//2+1, + 3)) + if type(file_name) == type(None): + file_name = self.simname + '_{0}_i{1:0>5x}.h5'.format('c' + field_name, iteration) + file_name = os.path.join(self.work_dir, file_name) + f = h5py.File(file_name, 'a') + f[field_name + '/complex/{0}'.format(iteration)] = data + f.close() + return None + def write_par( + self, + iter0 = 0, + particle_ic = None): + assert (self.parameters['niter_todo'] % self.parameters['niter_stat'] == 0) + assert (self.parameters['niter_todo'] % self.parameters['niter_out'] == 0) + assert (self.parameters['niter_out'] % self.parameters['niter_stat'] == 0) + if self.dns_type in ['NSVEparticles_no_output', 'NSVEparticles']: + assert (self.parameters['niter_todo'] % self.parameters['niter_part'] == 0) + assert (self.parameters['niter_out'] % self.parameters['niter_part'] == 0) + _code.write_par(self, iter0 = iter0) + with h5py.File(self.get_data_file_name(), 'r+') as ofile: + ofile['bfps_info/exec_name'] = self.name + kspace = self.get_kspace() + for k in kspace.keys(): + ofile['kspace/' + k] = kspace[k] + nshells = kspace['nshell'].shape[0] + kspace = self.get_kspace() + nshells = kspace['nshell'].shape[0] + vec_stat_datasets = ['velocity', 'vorticity'] + scal_stat_datasets = [] + for k in vec_stat_datasets: + time_chunk = 2**20//(8*3*3*nshells) + time_chunk = max(time_chunk, 1) + ofile.create_dataset('statistics/spectra/' + k + '_' + k, + (1, nshells, 3, 3), + chunks = (time_chunk, nshells, 3, 3), + maxshape = (None, nshells, 3, 3), + dtype = np.float64) + time_chunk = 2**20//(8*4*10) + time_chunk = max(time_chunk, 1) + a = ofile.create_dataset('statistics/moments/' + k, + (1, 10, 4), + chunks = (time_chunk, 10, 4), + maxshape = (None, 10, 4), + dtype = np.float64) + time_chunk = 2**20//(8*4*self.parameters['histogram_bins']) + time_chunk = max(time_chunk, 1) + ofile.create_dataset('statistics/histograms/' + k, + (1, + self.parameters['histogram_bins'], + 4), + chunks = (time_chunk, + self.parameters['histogram_bins'], + 4), + maxshape = (None, + self.parameters['histogram_bins'], + 4), + dtype = np.int64) + ofile['checkpoint'] = int(0) + if self.dns_type in ['NSVE', 'NSVE_no_output']: + return None + + if type(particle_ic) == type(None): + pbase_shape = (self.parameters['nparticles'],) + number_of_particles = self.parameters['nparticles'] + else: + pbase_shape = particle_ic.shape[:-1] + assert(particle_ic.shape[-1] == 3) + number_of_particles = 1 + for val in pbase_shape[1:]: + number_of_particles *= val + with h5py.File(self.get_checkpoint_0_fname(), 'a') as ofile: + s = 0 + ofile.create_group('tracers{0}'.format(s)) + ofile.create_group('tracers{0}/rhs'.format(s)) + ofile.create_group('tracers{0}/state'.format(s)) + ofile['tracers{0}/rhs'.format(s)].create_dataset( + '0', + shape = ( + (self.parameters['tracers{0}_integration_steps'.format(s)],) + + pbase_shape + + (3,)), + dtype = np.float) + ofile['tracers{0}/state'.format(s)].create_dataset( + '0', + shape = ( + pbase_shape + + (3,)), + dtype = np.float) + return None + def job_parser_arguments( + self, + parser): + parser.add_argument( + '--ncpu', + type = int, + dest = 'ncpu', + default = -1) + parser.add_argument( + '--np', '--nprocesses', + metavar = 'NPROCESSES', + help = 'number of mpi processes to use', + type = int, + dest = 'nb_processes', + default = 4) + parser.add_argument( + '--ntpp', '--nthreads-per-process', + type = int, + dest = 'nb_threads_per_process', + metavar = 'NTHREADS_PER_PROCESS', + help = 'number of threads to use per MPI process', + default = 1) + parser.add_argument( + '--no-submit', + action = 'store_true', + dest = 'no_submit') + parser.add_argument( + '--environment', + type = str, + dest = 'environment', + default = None) + parser.add_argument( + '--minutes', + type = int, + dest = 'minutes', + default = 5, + help = 'If environment supports it, this is the requested wall-clock-limit.') + parser.add_argument( + '--njobs', + type = int, dest = 'njobs', + default = 1) + return None + def simulation_parser_arguments( + self, + parser): + parser.add_argument( + '--simname', + type = str, dest = 'simname', + default = 'test') + parser.add_argument( + '-n', '--grid-size', + type = int, + dest = 'n', + default = 32, + metavar = 'N', + help = 'code is run by default in a grid of NxNxN') + for coord in ['x', 'y', 'z']: + parser.add_argument( + '--L{0}'.format(coord), '--box-length-{0}'.format(coord), + type = float, + dest = 'L{0}'.format(coord), + default = 2.0, + metavar = 'length{0}'.format(coord), + help = 'length of the box in the {0} direction will be `length{0} x pi`'.format(coord)) + parser.add_argument( + '--wd', + type = str, dest = 'work_dir', + default = './') + parser.add_argument( + '--precision', + choices = ['single', 'double'], + type = str, + default = 'single') + parser.add_argument( + '--src-wd', + type = str, + dest = 'src_work_dir', + default = '') + parser.add_argument( + '--src-simname', + type = str, + dest = 'src_simname', + default = '') + parser.add_argument( + '--src-iteration', + type = int, + dest = 'src_iteration', + default = 0) + parser.add_argument( + '--kMeta', + type = float, + dest = 'kMeta', + default = 2.0) + parser.add_argument( + '--dtfactor', + type = float, + dest = 'dtfactor', + default = 0.5, + help = 'dt is computed as DTFACTOR / N') + return None + def particle_parser_arguments( + self, + parser): + parser.add_argument( + '--particle-rand-seed', + type = int, + dest = 'particle_rand_seed', + default = None) + parser.add_argument( + '--pclouds', + type = int, + dest = 'pclouds', + default = 1, + help = ('number of particle clouds. Particle "clouds" ' + 'consist of particles distributed according to ' + 'pcloud-type.')) + parser.add_argument( + '--pcloud-type', + choices = ['random-cube', + 'regular-cube'], + dest = 'pcloud_type', + default = 'random-cube') + parser.add_argument( + '--particle-cloud-size', + type = float, + dest = 'particle_cloud_size', + default = 2*np.pi) + return None + def add_parser_arguments( + self, + parser): + subparsers = parser.add_subparsers( + dest = 'DNS_class', + help = 'type of simulation to run') + subparsers.required = True + parser_NSVE = subparsers.add_parser( + 'NSVE', + help = 'plain Navier-Stokes vorticity formulation') + self.simulation_parser_arguments(parser_NSVE) + self.job_parser_arguments(parser_NSVE) + self.parameters_to_parser_arguments(parser_NSVE) + + parser_NSVE_no_output = subparsers.add_parser( + 'NSVE_no_output', + help = 'plain Navier-Stokes vorticity formulation, checkpoints are NOT SAVED') + self.simulation_parser_arguments(parser_NSVE_no_output) + self.job_parser_arguments(parser_NSVE_no_output) + self.parameters_to_parser_arguments(parser_NSVE_no_output) + + parser_NSVEparticles_no_output = subparsers.add_parser( + 'NSVEparticles_no_output', + help = 'plain Navier-Stokes vorticity formulation, with basic fluid tracers, checkpoints are NOT SAVED') + self.simulation_parser_arguments(parser_NSVEparticles_no_output) + self.job_parser_arguments(parser_NSVEparticles_no_output) + self.particle_parser_arguments(parser_NSVEparticles_no_output) + self.parameters_to_parser_arguments(parser_NSVEparticles_no_output) + self.parameters_to_parser_arguments( + parser_NSVEparticles_no_output, + self.NSVEp_extra_parameters) + + parser_NSVEp2 = subparsers.add_parser( + 'NSVEparticles', + help = 'plain Navier-Stokes vorticity formulation, with basic fluid tracers') + self.simulation_parser_arguments(parser_NSVEp2) + self.job_parser_arguments(parser_NSVEp2) + self.particle_parser_arguments(parser_NSVEp2) + self.parameters_to_parser_arguments(parser_NSVEp2) + self.parameters_to_parser_arguments( + parser_NSVEp2, + self.NSVEp_extra_parameters) + return None + def prepare_launch( + self, + args = []): + """Set up reasonable parameters. + + With the default Lundgren forcing applied in the band [2, 4], + we can estimate the dissipation, therefore we can estimate + :math:`k_M \\eta_K` and constrain the viscosity. + + In brief, the command line parameter :math:`k_M \\eta_K` is + used in the following formula for :math:`\\nu` (:math:`N` is the + number of real space grid points per coordinate): + + .. math:: + + \\nu = \\left(\\frac{2 k_M \\eta_K}{N} \\right)^{4/3} + + With this choice, the average dissipation :math:`\\varepsilon` + will be close to 0.4, and the integral scale velocity will be + close to 0.77, yielding the approximate value for the Taylor + microscale and corresponding Reynolds number: + + .. math:: + + \\lambda \\approx 4.75\\left(\\frac{2 k_M \\eta_K}{N} \\right)^{4/6}, \\hskip .5in + R_\\lambda \\approx 3.7 \\left(\\frac{N}{2 k_M \\eta_K} \\right)^{4/6} + + """ + opt = _code.prepare_launch(self, args = args) + self.set_precision(opt.precision) + self.dns_type = opt.DNS_class + self.name = self.dns_type + '-' + self.fluid_precision + '-v' + bfps.__version__ + # merge parameters if needed + if self.dns_type in ['NSVEparticles', 'NSVEparticles_no_output']: + for k in self.NSVEp_extra_parameters.keys(): + self.parameters[k] = self.NSVEp_extra_parameters[k] + self.parameters['nu'] = (opt.kMeta * 2 / opt.n)**(4./3) + self.parameters['dt'] = (opt.dtfactor / opt.n) + # custom famplitude for 288 and 576 + if opt.n == 288: + self.parameters['famplitude'] = 0.45 + elif opt.n == 576: + self.parameters['famplitude'] = 0.47 + if ((self.parameters['niter_todo'] % self.parameters['niter_out']) != 0): + self.parameters['niter_out'] = self.parameters['niter_todo'] + if len(opt.src_work_dir) == 0: + opt.src_work_dir = os.path.realpath(opt.work_dir) + if type(opt.dkx) == type(None): + opt.dkx = 2. / opt.Lx + if type(opt.dky) == type(None): + opt.dky = 2. / opt.Ly + if type(opt.dkx) == type(None): + opt.dkz = 2. / opt.Lz + if type(opt.nx) == type(None): + opt.nx = opt.n + if type(opt.ny) == type(None): + opt.ny = opt.n + if type(opt.nz) == type(None): + opt.nz = opt.n + if type(opt.checkpoints_per_file) == type(None): + # hardcoded FFTW complex representation size + field_size = 3*(opt.nx+2)*opt.ny*opt.nz*self.fluid_dtype.itemsize + checkpoint_size = field_size + if self.dns_type in ['NSVEparticles', 'NSVEparticles_no_output']: + rhs_size = self.parameters['tracers0_integration_steps'] + if type(opt.tracers0_integration_steps) != type(None): + rhs_size = opt.tracers0_integration_steps + nparticles = opt.nparticles + if type(nparticles) == type(None): + nparticles = self.NSVEp_extra_parameters['nparticles'] + particle_size = (1+rhs_size)*3*nparticles*8 + checkpoint_size += particle_size + if checkpoint_size < 1e9: + opt.checkpoints_per_file = int(1e9 / checkpoint_size) + self.pars_from_namespace(opt) + return opt + def launch( + self, + args = [], + **kwargs): + opt = self.prepare_launch(args = args) + self.launch_jobs(opt = opt, **kwargs) + return None + def get_checkpoint_0_fname(self): + return os.path.join( + self.work_dir, + self.simname + '_checkpoint_0.h5') + def generate_tracer_state( + self, + rseed = None, + species = 0): + with h5py.File(self.get_checkpoint_0_fname(), 'a') as data_file: + dset = data_file[ + 'tracers{0}/state/0'.format(species)] + if not type(rseed) == type(None): + np.random.seed(rseed) + nn = self.parameters['nparticles'] + cc = int(0) + batch_size = int(1e6) + while nn > 0: + if nn > batch_size: + dset[cc*batch_size:(cc+1)*batch_size] = np.random.random( + (batch_size, 3))*2*np.pi + nn -= batch_size + else: + dset[cc*batch_size:cc*batch_size+nn] = np.random.random( + (nn, 3))*2*np.pi + nn = 0 + cc += 1 + return None + def generate_vector_field( + self, + rseed = 7547, + spectra_slope = 1., + amplitude = 1., + iteration = 0, + field_name = 'vorticity', + write_to_file = False, + # to switch to constant field, use generate_data_3D_uniform + # for scalar_generator + scalar_generator = tools.generate_data_3D): + """generate vector field. + + The generated field is not divergence free, but it has the proper + shape. + + :param rseed: seed for random number generator + :param spectra_slope: spectrum of field will look like k^(-p) + :param amplitude: all amplitudes are multiplied with this value + :param iteration: the field is written at this iteration + :param field_name: the name of the field being generated + :param write_to_file: should we write the field to file? + :param scalar_generator: which function to use for generating the + individual components. + Possible values: bfps.tools.generate_data_3D, + bfps.tools.generate_data_3D_uniform + :type rseed: int + :type spectra_slope: float + :type amplitude: float + :type iteration: int + :type field_name: str + :type write_to_file: bool + :type scalar_generator: function + + :returns: ``Kdata``, a complex valued 4D ``numpy.array`` that uses the + transposed FFTW layout. + Kdata[ky, kz, kx, i] is the amplitude of mode (kx, ky, kz) for + the i-th component of the field. + (i.e. x is the fastest index and z the slowest index in the + real-space representation). + """ + np.random.seed(rseed) + Kdata00 = scalar_generator( + self.parameters['nz']//2, + self.parameters['ny']//2, + self.parameters['nx']//2, + p = spectra_slope, + amplitude = amplitude).astype(self.ctype) + Kdata01 = scalar_generator( + self.parameters['nz']//2, + self.parameters['ny']//2, + self.parameters['nx']//2, + p = spectra_slope, + amplitude = amplitude).astype(self.ctype) + Kdata02 = scalar_generator( + self.parameters['nz']//2, + self.parameters['ny']//2, + self.parameters['nx']//2, + p = spectra_slope, + amplitude = amplitude).astype(self.ctype) + Kdata0 = np.zeros( + Kdata00.shape + (3,), + Kdata00.dtype) + Kdata0[..., 0] = Kdata00 + Kdata0[..., 1] = Kdata01 + Kdata0[..., 2] = Kdata02 + Kdata1 = tools.padd_with_zeros( + Kdata0, + self.parameters['nz'], + self.parameters['ny'], + self.parameters['nx']) + if write_to_file: + Kdata1.tofile( + os.path.join(self.work_dir, + self.simname + "_c{0}_i{1:0>5x}".format(field_name, iteration))) + return Kdata1 + def copy_complex_field( + self, + src_file_name, + src_dset_name, + dst_file, + dst_dset_name, + make_link = True): + # I define a min_shape thingie, but for now I only trust this method for + # the case of increasing/decreasing by the same factor in all directions. + # in principle we could write something more generic, but i'm not sure + # how complicated that would be + dst_shape = (self.parameters['nz'], + self.parameters['ny'], + (self.parameters['nx']+2) // 2, + 3) + src_file = h5py.File(src_file_name, 'r') + if (src_file[src_dset_name].shape == dst_shape): + if make_link and (src_file[src_dset_name].dtype == self.ctype): + dst_file[dst_dset_name] = h5py.ExternalLink( + src_file_name, + src_dset_name) + else: + dst_file.create_dataset( + dst_dset_name, + shape = dst_shape, + dtype = self.ctype, + fillvalue = 0.0) + for kz in range(src_file[src_dset_name].shape[0]): + dst_file[dst_dset_name][kz] = src_file[src_dset_name][kz] + else: + print('aloha') + min_shape = (min(dst_shape[0], src_file[src_dset_name].shape[0]), + min(dst_shape[1], src_file[src_dset_name].shape[1]), + min(dst_shape[2], src_file[src_dset_name].shape[2]), + 3) + print(self.ctype) + dst_file.create_dataset( + dst_dset_name, + shape = dst_shape, + dtype = np.dtype(self.ctype), + fillvalue = complex(0)) + for kz in range(min_shape[0]): + dst_file[dst_dset_name][kz,:min_shape[1], :min_shape[2]] = \ + src_file[src_dset_name][kz, :min_shape[1], :min_shape[2]] + return None + def launch_jobs( + self, + opt = None, + particle_initial_condition = None): + if not os.path.exists(os.path.join(self.work_dir, self.simname + '.h5')): + # take care of fields' initial condition + if not os.path.exists(self.get_checkpoint_0_fname()): + f = h5py.File(self.get_checkpoint_0_fname(), 'w') + if len(opt.src_simname) > 0: + source_cp = 0 + src_file = 'not_a_file' + while True: + src_file = os.path.join( + os.path.realpath(opt.src_work_dir), + opt.src_simname + '_checkpoint_{0}.h5'.format(source_cp)) + f0 = h5py.File(src_file, 'r') + if '{0}'.format(opt.src_iteration) in f0['vorticity/complex'].keys(): + f0.close() + break + source_cp += 1 + self.copy_complex_field( + src_file, + 'vorticity/complex/{0}'.format(opt.src_iteration), + f, + 'vorticity/complex/{0}'.format(0)) + else: + data = self.generate_vector_field( + write_to_file = False, + spectra_slope = 2.0, + amplitude = 0.05) + f['vorticity/complex/{0}'.format(0)] = data + f.close() + ## take care of particles' initial condition + #if self.dns_type in ['NSVEparticles', 'NSVEparticles_no_output']: + # if opt.pclouds > 1: + # np.random.seed(opt.particle_rand_seed) + # if opt.pcloud_type == 'random-cube': + # particle_initial_condition = ( + # np.random.random((opt.pclouds, 1, 3))*2*np.pi + + # np.random.random((1, self.parameters['nparticles'], 3))*opt.particle_cloud_size) + # elif opt.pcloud_type == 'regular-cube': + # onedarray = np.linspace( + # -opt.particle_cloud_size/2, + # opt.particle_cloud_size/2, + # self.parameters['nparticles']) + # particle_initial_condition = np.zeros( + # (opt.pclouds, + # self.parameters['nparticles'], + # self.parameters['nparticles'], + # self.parameters['nparticles'], 3), + # dtype = np.float64) + # particle_initial_condition[:] = \ + # np.random.random((opt.pclouds, 1, 1, 1, 3))*2*np.pi + # particle_initial_condition[..., 0] += onedarray[None, None, None, :] + # particle_initial_condition[..., 1] += onedarray[None, None, :, None] + # particle_initial_condition[..., 2] += onedarray[None, :, None, None] + self.write_par( + particle_ic = None) + if self.dns_type in ['NSVEparticles', 'NSVEparticles_no_output']: + if self.parameters['nparticles'] > 0: + self.generate_tracer_state( + species = 0, + rseed = opt.particle_rand_seed) + if not os.path.exists(self.get_particle_file_name()): + with h5py.File(self.get_particle_file_name(), 'w') as particle_file: + particle_file.create_group('tracers0/velocity') + particle_file.create_group('tracers0/acceleration') + self.run( + nb_processes = opt.nb_processes, + nb_threads_per_process = opt.nb_threads_per_process, + njobs = opt.njobs, + hours = opt.minutes // 60, + minutes = opt.minutes % 60, + no_submit = opt.no_submit) + return None + diff --git a/bfps/FluidConvert.py b/bfps/FluidConvert.py index 14be9b985139fabf3b7e1cda1b5f9ee9618a8307..d924f2a1d5ed411855ca13687aa716fa3aa31dc5 100644 --- a/bfps/FluidConvert.py +++ b/bfps/FluidConvert.py @@ -43,7 +43,7 @@ class FluidConvert(_fluid_particle_base): work_dir = './', simname = 'test', fluid_precision = 'single', - use_fftw_wisdom = True): + use_fftw_wisdom = False): _fluid_particle_base.__init__( self, name = name + '-' + fluid_precision, @@ -98,7 +98,7 @@ class FluidConvert(_fluid_particle_base): nx, ny, nz, dkx, dky, dkz, dealias_type, - FFTW_ESTIMATE); + DEFAULT_FFTW_FLAG); //endcpp """.format(self.C_dtype) self.fluid_loop += """ @@ -109,11 +109,13 @@ class FluidConvert(_fluid_particle_base): """ self.fluid_end += 'delete fs;\n' return None - def add_parser_arguments( + def specific_parser_arguments( self, parser): - _fluid_particle_base.add_parser_arguments(self, parser) - self.parameters_to_parser_arguments(parser, parameters = self.spec_parameters) + _fluid_particle_base.specific_parser_arguments(self, parser) + self.parameters_to_parser_arguments( + parser, + parameters = self.spec_parameters) return None def launch( self, @@ -125,13 +127,13 @@ class FluidConvert(_fluid_particle_base): self.pars_from_namespace( opt, parameters = self.spec_parameters) - self.set_host_info(bfps.host_info) self.rewrite_par( group = 'conversion_parameters', parameters = self.spec_parameters) - self.run( - ncpu = opt.ncpu, - err_file = 'err_convert', - out_file = 'out_convert') + self.run(ncpu = opt.ncpu, + hours = opt.minutes // 60, + minutes = opt.minutes % 60, + err_file = 'err_convert', + out_file = 'out_convert') return None diff --git a/bfps/FluidResize.py b/bfps/FluidResize.py index be0af1fe8228ffd31f42c08b5d0fca45dadbf8b2..fb5e26208f6960d447bc927bd9e207354620d188 100644 --- a/bfps/FluidResize.py +++ b/bfps/FluidResize.py @@ -136,6 +136,8 @@ class FluidResize(_fluid_particle_base): for k in ['dst_nx', 'dst_ny', 'dst_nz']: if type(cmd_line_pars[k]) == type(None): cmd_line_pars[k] = opt.m + # the 3 dst_ni have been updated in opt itself at this point + # I'm not sure if this code is future-proof... self.parameters['niter_todo'] = 0 self.pars_from_namespace(opt) src_file = os.path.join( @@ -144,10 +146,11 @@ class FluidResize(_fluid_particle_base): read_file = os.path.join( self.work_dir, opt.src_simname + '_cvorticity_i{0:0>5x}'.format(opt.src_iteration)) - self.set_host_info(bfps.host_info) self.write_par(iter0 = opt.src_iteration) if not os.path.exists(read_file): os.symlink(src_file, read_file) - self.run(ncpu = opt.ncpu) + self.run(ncpu = opt.ncpu, + hours = opt.minutes // 60, + minutes = opt.minutes % 60) return None diff --git a/bfps/NSVorticityEquation.py b/bfps/NSVorticityEquation.py new file mode 100644 index 0000000000000000000000000000000000000000..5f87097fefbb56f731a75597395d42423fc17ba6 --- /dev/null +++ b/bfps/NSVorticityEquation.py @@ -0,0 +1,864 @@ +####################################################################### +# # +# Copyright 2015 Max Planck Institute # +# for Dynamics and Self-Organization # +# # +# This file is part of bfps. # +# # +# bfps is free software: you can redistribute it and/or modify # +# it under the terms of the GNU General Public License as published # +# by the Free Software Foundation, either version 3 of the License, # +# or (at your option) any later version. # +# # +# bfps is distributed in the hope that it will be useful, # +# but WITHOUT ANY WARRANTY; without even the implied warranty of # +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # +# GNU General Public License for more details. # +# # +# You should have received a copy of the GNU General Public License # +# along with bfps. If not, see <http://www.gnu.org/licenses/> # +# # +# Contact: Cristian.Lalescu@ds.mpg.de # +# # +####################################################################### + + + +import sys +import os +import numpy as np +import h5py +import argparse + +import bfps +import bfps.tools +from bfps._code import _code +from bfps._fluid_base import _fluid_particle_base + +class NSVorticityEquation(_fluid_particle_base): + def __init__( + self, + name = 'NSVorticityEquation-v' + bfps.__version__, + work_dir = './', + simname = 'test', + fluid_precision = 'single', + fftw_plan_rigor = 'FFTW_MEASURE', + use_fftw_wisdom = True): + """ + This code uses checkpoints for DNS restarts, and it can be stopped + by creating the file "stop_<simname>" in the working directory. + For postprocessing of field snapshots, consider creating a separate + HDF5 file (from the python wrapper) which contains links to all the + different snapshots. + """ + self.fftw_plan_rigor = fftw_plan_rigor + _fluid_particle_base.__init__( + self, + name = name + '-' + fluid_precision, + work_dir = work_dir, + simname = simname, + dtype = fluid_precision, + use_fftw_wisdom = use_fftw_wisdom) + self.parameters['nu'] = float(0.1) + self.parameters['fmode'] = 1 + self.parameters['famplitude'] = float(0.5) + self.parameters['fk0'] = float(2.0) + self.parameters['fk1'] = float(4.0) + self.parameters['forcing_type'] = 'linear' + self.parameters['histogram_bins'] = int(256) + self.parameters['max_velocity_estimate'] = float(1) + self.parameters['max_vorticity_estimate'] = float(1) + self.parameters['checkpoints_per_file'] = int(1) + self.file_datasets_grow = """ + //begincpp + hid_t group; + group = H5Gopen(stat_file, "/statistics", H5P_DEFAULT); + H5Ovisit(group, H5_INDEX_NAME, H5_ITER_NATIVE, grow_statistics_dataset, NULL); + H5Gclose(group); + //endcpp + """ + self.style = {} + self.statistics = {} + self.fluid_output = """ + fs->io_checkpoint(false); + """ + # vorticity_equation specific things + self.includes += '#include "vorticity_equation.hpp"\n' + self.store_kspace = """ + //begincpp + if (myrank == 0 && iteration == 0) + { + TIMEZONE("fluid_base::store_kspace"); + hsize_t dims[4]; + hid_t space, dset; + // store kspace information + dset = H5Dopen(stat_file, "/kspace/kshell", H5P_DEFAULT); + space = H5Dget_space(dset); + H5Sget_simple_extent_dims(space, dims, NULL); + H5Sclose(space); + if (fs->kk->nshells != dims[0]) + { + DEBUG_MSG( + "ERROR: computed nshells %d not equal to data file nshells %d\\n", + fs->kk->nshells, dims[0]); + } + H5Dwrite(dset, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, &fs->kk->kshell.front()); + H5Dclose(dset); + dset = H5Dopen(stat_file, "/kspace/nshell", H5P_DEFAULT); + H5Dwrite(dset, H5T_NATIVE_INT64, H5S_ALL, H5S_ALL, H5P_DEFAULT, &fs->kk->nshell.front()); + H5Dclose(dset); + dset = H5Dopen(stat_file, "/kspace/kM", H5P_DEFAULT); + H5Dwrite(dset, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, &fs->kk->kM); + H5Dclose(dset); + dset = H5Dopen(stat_file, "/kspace/dk", H5P_DEFAULT); + H5Dwrite(dset, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, &fs->kk->dk); + H5Dclose(dset); + } + //endcpp + """ + return None + def add_particles( + self, + integration_steps = 2, + neighbours = 1, + smoothness = 1): + assert(integration_steps > 0 and integration_steps < 6) + self.particle_species = 1 + self.parameters['tracers0_integration_steps'] = int(integration_steps) + self.parameters['tracers0_neighbours'] = int(neighbours) + self.parameters['tracers0_smoothness'] = int(smoothness) + self.parameters['tracers0_interpolator'] = 'spline' + self.particle_includes += """ + #include "particles/particles_system_builder.hpp" + #include "particles/particles_output_hdf5.hpp" + """ + ## initialize + self.particle_start += """ + DEBUG_MSG( + "current fname is %s\\n and iteration is %d", + fs->get_current_fname().c_str(), + fs->iteration); + std::unique_ptr<abstract_particles_system<long long int, double>> ps = particles_system_builder( + fs->cvelocity, // (field object) + fs->kk, // (kspace object, contains dkx, dky, dkz) + tracers0_integration_steps, // to check coherency between parameters and hdf input file (nb rhs) + (long long int)nparticles, // to check coherency between parameters and hdf input file + fs->get_current_fname(), // particles input filename + std::string("/tracers0/state/") + std::to_string(fs->iteration), // dataset name for initial input + std::string("/tracers0/rhs/") + std::to_string(fs->iteration), // dataset name for initial input + tracers0_neighbours, // parameter (interpolation no neighbours) + tracers0_smoothness, // parameter + MPI_COMM_WORLD, + fs->iteration+1); + particles_output_hdf5<long long int, double,3,3> particles_output_writer_mpi( + MPI_COMM_WORLD, + "tracers0", + nparticles, + tracers0_integration_steps); + """ + self.particle_loop += """ + fs->compute_velocity(fs->cvorticity); + fs->cvelocity->ift(); + ps->completeLoop(dt); + """ + self.particle_output = """ + { + particles_output_writer_mpi.open_file(fs->get_current_fname()); + particles_output_writer_mpi.save(ps->getParticlesPositions(), + ps->getParticlesRhs(), + ps->getParticlesIndexes(), + ps->getLocalNbParticles(), + fs->iteration); + particles_output_writer_mpi.close_file(); + } + """ + self.particle_end += 'ps.release();\n' + return None + def create_stat_output( + self, + dset_name, + data_buffer, + data_type = 'H5T_NATIVE_DOUBLE', + size_setup = None, + close_spaces = True): + new_stat_output_txt = 'Cdset = H5Dopen(stat_file, "{0}", H5P_DEFAULT);\n'.format(dset_name) + if not type(size_setup) == type(None): + new_stat_output_txt += ( + size_setup + + 'wspace = H5Dget_space(Cdset);\n' + + 'ndims = H5Sget_simple_extent_dims(wspace, dims, NULL);\n' + + 'mspace = H5Screate_simple(ndims, count, NULL);\n' + + 'H5Sselect_hyperslab(wspace, H5S_SELECT_SET, offset, NULL, count, NULL);\n') + new_stat_output_txt += ('H5Dwrite(Cdset, {0}, mspace, wspace, H5P_DEFAULT, {1});\n' + + 'H5Dclose(Cdset);\n').format(data_type, data_buffer) + if close_spaces: + new_stat_output_txt += ('H5Sclose(mspace);\n' + + 'H5Sclose(wspace);\n') + return new_stat_output_txt + def write_fluid_stats(self): + self.fluid_includes += '#include <cmath>\n' + self.fluid_includes += '#include "fftw_tools.hpp"\n' + self.stat_src += """ + //begincpp + hid_t stat_group; + if (myrank == 0) + stat_group = H5Gopen(stat_file, "statistics", H5P_DEFAULT); + fs->compute_velocity(fs->cvorticity); + *tmp_vec_field = fs->cvelocity->get_cdata(); + tmp_vec_field->compute_stats( + fs->kk, + stat_group, + "velocity", + fs->iteration / niter_stat, + max_velocity_estimate/sqrt(3)); + //endcpp + """ + self.stat_src += """ + //begincpp + *tmp_vec_field = fs->cvorticity->get_cdata(); + tmp_vec_field->compute_stats( + fs->kk, + stat_group, + "vorticity", + fs->iteration / niter_stat, + max_vorticity_estimate/sqrt(3)); + //endcpp + """ + self.stat_src += """ + //begincpp + if (myrank == 0) + H5Gclose(stat_group); + if (myrank == 0) + {{ + hid_t Cdset, wspace, mspace; + int ndims; + hsize_t count[4], offset[4], dims[4]; + offset[0] = fs->iteration/niter_stat; + offset[1] = 0; + offset[2] = 0; + offset[3] = 0; + //endcpp + """.format(self.C_dtype) + if self.dtype == np.float32: + field_H5T = 'H5T_NATIVE_FLOAT' + elif self.dtype == np.float64: + field_H5T = 'H5T_NATIVE_DOUBLE' + self.stat_src += self.create_stat_output( + '/statistics/xlines/velocity', + 'fs->rvelocity->get_rdata()', + data_type = field_H5T, + size_setup = """ + count[0] = 1; + count[1] = nx; + count[2] = 3; + """, + close_spaces = False) + self.stat_src += self.create_stat_output( + '/statistics/xlines/vorticity', + 'fs->rvorticity->get_rdata()', + data_type = field_H5T) + self.stat_src += '}\n' + ## checkpoint + self.stat_src += """ + //begincpp + if (myrank == 0) + { + std::string fname = ( + std::string("stop_") + + std::string(simname)); + { + struct stat file_buffer; + stop_code_now = (stat(fname.c_str(), &file_buffer) == 0); + } + } + MPI_Bcast(&stop_code_now, 1, MPI_C_BOOL, 0, MPI_COMM_WORLD); + //endcpp + """ + return None + def fill_up_fluid_code(self): + self.fluid_includes += '#include <cstring>\n' + self.fluid_variables += ( + 'vorticity_equation<{0}, FFTW> *fs;\n'.format(self.C_dtype) + + 'field<{0}, FFTW, THREE> *tmp_vec_field;\n'.format(self.C_dtype) + + 'field<{0}, FFTW, ONE> *tmp_scal_field;\n'.format(self.C_dtype)) + self.fluid_definitions += """ + typedef struct {{ + {0} re; + {0} im; + }} tmp_complex_type; + """.format(self.C_dtype) + self.write_fluid_stats() + if self.dtype == np.float32: + field_H5T = 'H5T_NATIVE_FLOAT' + elif self.dtype == np.float64: + field_H5T = 'H5T_NATIVE_DOUBLE' + self.variables += 'int checkpoint;\n' + self.variables += 'bool stop_code_now;\n' + self.read_checkpoint = """ + //begincpp + if (myrank == 0) + { + hid_t dset = H5Dopen(stat_file, "checkpoint", H5P_DEFAULT); + H5Dread( + dset, + H5T_NATIVE_INT, + H5S_ALL, + H5S_ALL, + H5P_DEFAULT, + &checkpoint); + H5Dclose(dset); + } + MPI_Bcast(&checkpoint, 1, MPI_INT, 0, MPI_COMM_WORLD); + fs->checkpoint = checkpoint; + //endcpp + """ + self.store_checkpoint = """ + //begincpp + checkpoint = fs->checkpoint; + if (myrank == 0) + { + hid_t dset = H5Dopen(stat_file, "checkpoint", H5P_DEFAULT); + H5Dwrite( + dset, + H5T_NATIVE_INT, + H5S_ALL, + H5S_ALL, + H5P_DEFAULT, + &checkpoint); + H5Dclose(dset); + } + //endcpp + """ + self.fluid_start += """ + //begincpp + char fname[512]; + fs = new vorticity_equation<{0}, FFTW>( + simname, + nx, ny, nz, + dkx, dky, dkz, + {1}); + tmp_vec_field = new field<{0}, FFTW, THREE>( + nx, ny, nz, + MPI_COMM_WORLD, + {1}); + tmp_scal_field = new field<{0}, FFTW, ONE>( + nx, ny, nz, + MPI_COMM_WORLD, + {1}); + fs->checkpoints_per_file = checkpoints_per_file; + fs->nu = nu; + fs->fmode = fmode; + fs->famplitude = famplitude; + fs->fk0 = fk0; + fs->fk1 = fk1; + strncpy(fs->forcing_type, forcing_type, 128); + fs->iteration = iteration; + {2} + fs->cvorticity->real_space_representation = false; + fs->io_checkpoint(); + //endcpp + """.format( + self.C_dtype, + self.fftw_plan_rigor, + self.read_checkpoint) + self.fluid_start += self.store_kspace + self.fluid_start += 'stop_code_now = false;\n' + self.fluid_loop = 'fs->step(dt);\n' + self.fluid_loop += ('if (fs->iteration % niter_out == 0)\n{\n' + + self.fluid_output + + self.particle_output + + self.store_checkpoint + + '\n}\n' + + 'if (stop_code_now){\n' + + 'iteration = fs->iteration;\n' + + 'break;\n}\n') + self.fluid_end = ('if (fs->iteration % niter_out != 0)\n{\n' + + self.fluid_output + + self.particle_output + + self.store_checkpoint + + 'DEBUG_MSG("checkpoint value is %d\\n", checkpoint);\n' + + '\n}\n' + + 'delete fs;\n' + + 'delete tmp_vec_field;\n' + + 'delete tmp_scal_field;\n') + return None + def get_postprocess_file_name(self): + return os.path.join(self.work_dir, self.simname + '_postprocess.h5') + def get_postprocess_file(self): + return h5py.File(self.get_postprocess_file_name(), 'r') + def compute_statistics(self, iter0 = 0, iter1 = None): + """Run basic postprocessing on raw data. + The energy spectrum :math:`E(t, k)` and the enstrophy spectrum + :math:`\\frac{1}{2}\omega^2(t, k)` are computed from the + + .. math:: + + \sum_{k \\leq \\|\\mathbf{k}\\| \\leq k+dk}\\hat{u_i} \\hat{u_j}^*, \\hskip .5cm + \sum_{k \\leq \\|\\mathbf{k}\\| \\leq k+dk}\\hat{\omega_i} \\hat{\\omega_j}^* + + tensors, and the enstrophy spectrum is also used to + compute the dissipation :math:`\\varepsilon(t)`. + These basic quantities are stored in a newly created HDF5 file, + ``simname_postprocess.h5``. + """ + if len(list(self.statistics.keys())) > 0: + return None + self.read_parameters() + with self.get_data_file() as data_file: + if 'moments' not in data_file['statistics'].keys(): + return None + iter0 = min((data_file['statistics/moments/velocity'].shape[0] * + self.parameters['niter_stat']-1), + iter0) + if type(iter1) == type(None): + iter1 = data_file['iteration'].value + else: + iter1 = min(data_file['iteration'].value, iter1) + ii0 = iter0 // self.parameters['niter_stat'] + ii1 = iter1 // self.parameters['niter_stat'] + self.statistics['kshell'] = data_file['kspace/kshell'].value + self.statistics['kM'] = data_file['kspace/kM'].value + self.statistics['dk'] = data_file['kspace/dk'].value + computation_needed = True + pp_file = h5py.File(self.get_postprocess_file_name(), 'a') + if 'ii0' in pp_file.keys(): + computation_needed = not (ii0 == pp_file['ii0'].value and + ii1 == pp_file['ii1'].value) + if computation_needed: + for k in pp_file.keys(): + del pp_file[k] + if computation_needed: + pp_file['iter0'] = iter0 + pp_file['iter1'] = iter1 + pp_file['ii0'] = ii0 + pp_file['ii1'] = ii1 + pp_file['t'] = (self.parameters['dt']* + self.parameters['niter_stat']* + (np.arange(ii0, ii1+1).astype(np.float))) + pp_file['energy(t, k)'] = ( + data_file['statistics/spectra/velocity_velocity'][ii0:ii1+1, :, 0, 0] + + data_file['statistics/spectra/velocity_velocity'][ii0:ii1+1, :, 1, 1] + + data_file['statistics/spectra/velocity_velocity'][ii0:ii1+1, :, 2, 2])/2 + pp_file['enstrophy(t, k)'] = ( + data_file['statistics/spectra/vorticity_vorticity'][ii0:ii1+1, :, 0, 0] + + data_file['statistics/spectra/vorticity_vorticity'][ii0:ii1+1, :, 1, 1] + + data_file['statistics/spectra/vorticity_vorticity'][ii0:ii1+1, :, 2, 2])/2 + pp_file['vel_max(t)'] = data_file['statistics/moments/velocity'] [ii0:ii1+1, 9, 3] + pp_file['renergy(t)'] = data_file['statistics/moments/velocity'][ii0:ii1+1, 2, 3]/2 + for k in ['t', + 'energy(t, k)', + 'enstrophy(t, k)', + 'vel_max(t)', + 'renergy(t)']: + if k in pp_file.keys(): + self.statistics[k] = pp_file[k].value + self.compute_time_averages() + return None + def compute_time_averages(self): + """Compute easy stats. + + Further computation of statistics based on the contents of + ``simname_postprocess.h5``. + Standard quantities are as follows + (consistent with [Ishihara]_): + + .. math:: + + U_{\\textrm{int}}(t) = \\sqrt{\\frac{2E(t)}{3}}, \\hskip .5cm + L_{\\textrm{int}}(t) = \\frac{\pi}{2U_{int}^2(t)} \\int \\frac{dk}{k} E(t, k), \\hskip .5cm + T_{\\textrm{int}}(t) = + \\frac{L_{\\textrm{int}}(t)}{U_{\\textrm{int}}(t)} + + \\eta_K = \\left(\\frac{\\nu^3}{\\varepsilon}\\right)^{1/4}, \\hskip .5cm + \\tau_K = \\left(\\frac{\\nu}{\\varepsilon}\\right)^{1/2}, \\hskip .5cm + \\lambda = \\sqrt{\\frac{15 \\nu U_{\\textrm{int}}^2}{\\varepsilon}} + + Re = \\frac{U_{\\textrm{int}} L_{\\textrm{int}}}{\\nu}, \\hskip + .5cm + R_{\\lambda} = \\frac{U_{\\textrm{int}} \\lambda}{\\nu} + + .. [Ishihara] T. Ishihara et al, + *Small-scale statistics in high-resolution direct numerical + simulation of turbulence: Reynolds number dependence of + one-point velocity gradient statistics*. + J. Fluid Mech., + **592**, 335-366, 2007 + """ + for key in ['energy', 'enstrophy']: + self.statistics[key + '(t)'] = (self.statistics['dk'] * + np.sum(self.statistics[key + '(t, k)'], axis = 1)) + self.statistics['Uint(t)'] = np.sqrt(2*self.statistics['energy(t)'] / 3) + self.statistics['Lint(t)'] = ((self.statistics['dk']*np.pi / + (2*self.statistics['Uint(t)']**2)) * + np.nansum(self.statistics['energy(t, k)'] / + self.statistics['kshell'][None, :], axis = 1)) + for key in ['energy', + 'enstrophy', + 'vel_max', + 'Uint', + 'Lint']: + if key + '(t)' in self.statistics.keys(): + self.statistics[key] = np.average(self.statistics[key + '(t)'], axis = 0) + for suffix in ['', '(t)']: + self.statistics['diss' + suffix] = (self.parameters['nu'] * + self.statistics['enstrophy' + suffix]*2) + self.statistics['etaK' + suffix] = (self.parameters['nu']**3 / + self.statistics['diss' + suffix])**.25 + self.statistics['tauK' + suffix] = (self.parameters['nu'] / + self.statistics['diss' + suffix])**.5 + self.statistics['Re' + suffix] = (self.statistics['Uint' + suffix] * + self.statistics['Lint' + suffix] / + self.parameters['nu']) + self.statistics['lambda' + suffix] = (15 * self.parameters['nu'] * + self.statistics['Uint' + suffix]**2 / + self.statistics['diss' + suffix])**.5 + self.statistics['Rlambda' + suffix] = (self.statistics['Uint' + suffix] * + self.statistics['lambda' + suffix] / + self.parameters['nu']) + self.statistics['kMeta' + suffix] = (self.statistics['kM'] * + self.statistics['etaK' + suffix]) + if self.parameters['dealias_type'] == 1: + self.statistics['kMeta' + suffix] *= 0.8 + self.statistics['Tint'] = self.statistics['Lint'] / self.statistics['Uint'] + self.statistics['Taylor_microscale'] = self.statistics['lambda'] + return None + def set_plt_style( + self, + style = {'dashes' : (None, None)}): + self.style.update(style) + return None + def convert_complex_from_binary( + self, + field_name = 'vorticity', + iteration = 0, + file_name = None): + """read the Fourier representation of a vector field. + + Read the binary file containing iteration ``iteration`` of the + field ``field_name``, and write it in a ``.h5`` file. + """ + data = np.memmap( + os.path.join(self.work_dir, + self.simname + '_{0}_i{1:0>5x}'.format('c' + field_name, iteration)), + dtype = self.ctype, + mode = 'r', + shape = (self.parameters['ny'], + self.parameters['nz'], + self.parameters['nx']//2+1, + 3)) + if type(file_name) == type(None): + file_name = self.simname + '_{0}_i{1:0>5x}.h5'.format('c' + field_name, iteration) + file_name = os.path.join(self.work_dir, file_name) + f = h5py.File(file_name, 'a') + f[field_name + '/complex/{0}'.format(iteration)] = data + f.close() + return None + def write_par( + self, + iter0 = 0, + particle_ic = None): + _fluid_particle_base.write_par(self, iter0 = iter0) + with h5py.File(self.get_data_file_name(), 'r+') as ofile: + kspace = self.get_kspace() + nshells = kspace['nshell'].shape[0] + vec_stat_datasets = ['velocity', 'vorticity'] + scal_stat_datasets = [] + for k in vec_stat_datasets: + time_chunk = 2**20//(8*3*self.parameters['nx']) # FIXME: use proper size of self.dtype + time_chunk = max(time_chunk, 1) + ofile.create_dataset('statistics/xlines/' + k, + (1, self.parameters['nx'], 3), + chunks = (time_chunk, self.parameters['nx'], 3), + maxshape = (None, self.parameters['nx'], 3), + dtype = self.dtype) + for k in vec_stat_datasets: + time_chunk = 2**20//(8*3*3*nshells) + time_chunk = max(time_chunk, 1) + ofile.create_dataset('statistics/spectra/' + k + '_' + k, + (1, nshells, 3, 3), + chunks = (time_chunk, nshells, 3, 3), + maxshape = (None, nshells, 3, 3), + dtype = np.float64) + time_chunk = 2**20//(8*4*10) + time_chunk = max(time_chunk, 1) + a = ofile.create_dataset('statistics/moments/' + k, + (1, 10, 4), + chunks = (time_chunk, 10, 4), + maxshape = (None, 10, 4), + dtype = np.float64) + time_chunk = 2**20//(8*4*self.parameters['histogram_bins']) + time_chunk = max(time_chunk, 1) + ofile.create_dataset('statistics/histograms/' + k, + (1, + self.parameters['histogram_bins'], + 4), + chunks = (time_chunk, + self.parameters['histogram_bins'], + 4), + maxshape = (None, + self.parameters['histogram_bins'], + 4), + dtype = np.int64) + ofile['checkpoint'] = int(0) + if self.particle_species == 0: + return None + + if type(particle_ic) == type(None): + pbase_shape = (self.parameters['nparticles'],) + number_of_particles = self.parameters['nparticles'] + else: + pbase_shape = particle_ic.shape[:-1] + assert(particle_ic.shape[-1] == 3) + number_of_particles = 1 + for val in pbase_shape[1:]: + number_of_particles *= val + with h5py.File(self.get_checkpoint_0_fname(), 'a') as ofile: + s = 0 + ofile.create_group('tracers{0}'.format(s)) + ofile.create_group('tracers{0}/rhs'.format(s)) + ofile.create_group('tracers{0}/state'.format(s)) + ofile['tracers{0}/rhs'.format(s)].create_dataset( + '0', + shape = ( + (self.parameters['tracers{0}_integration_steps'.format(s)],) + + pbase_shape + + (3,)), + dtype = np.float) + ofile['tracers{0}/state'.format(s)].create_dataset( + '0', + shape = ( + pbase_shape + + (3,)), + dtype = np.float) + return None + def specific_parser_arguments( + self, + parser): + _fluid_particle_base.specific_parser_arguments(self, parser) + parser.add_argument( + '--src-wd', + type = str, + dest = 'src_work_dir', + default = '') + parser.add_argument( + '--src-simname', + type = str, + dest = 'src_simname', + default = '') + parser.add_argument( + '--src-iteration', + type = int, + dest = 'src_iteration', + default = 0) + parser.add_argument( + '--njobs', + type = int, dest = 'njobs', + default = 1) + parser.add_argument( + '--kMeta', + type = float, + dest = 'kMeta', + default = 2.0) + parser.add_argument( + '--dtfactor', + type = float, + dest = 'dtfactor', + default = 0.5, + help = 'dt is computed as DTFACTOR / N') + parser.add_argument( + '--particle-rand-seed', + type = int, + dest = 'particle_rand_seed', + default = None) + parser.add_argument( + '--pclouds', + type = int, + dest = 'pclouds', + default = 1, + help = ('number of particle clouds. Particle "clouds" ' + 'consist of particles distributed according to ' + 'pcloud-type.')) + parser.add_argument( + '--pcloud-type', + choices = ['random-cube', + 'regular-cube'], + dest = 'pcloud_type', + default = 'random-cube') + parser.add_argument( + '--particle-cloud-size', + type = float, + dest = 'particle_cloud_size', + default = 2*np.pi) + parser.add_argument( + '--neighbours', + type = int, + dest = 'neighbours', + default = 1) + parser.add_argument( + '--smoothness', + type = int, + dest = 'smoothness', + default = 1) + return None + def prepare_launch( + self, + args = []): + """Set up reasonable parameters. + + With the default Lundgren forcing applied in the band [2, 4], + we can estimate the dissipation, therefore we can estimate + :math:`k_M \\eta_K` and constrain the viscosity. + + In brief, the command line parameter :math:`k_M \\eta_K` is + used in the following formula for :math:`\\nu` (:math:`N` is the + number of real space grid points per coordinate): + + .. math:: + + \\nu = \\left(\\frac{2 k_M \\eta_K}{N} \\right)^{4/3} + + With this choice, the average dissipation :math:`\\varepsilon` + will be close to 0.4, and the integral scale velocity will be + close to 0.77, yielding the approximate value for the Taylor + microscale and corresponding Reynolds number: + + .. math:: + + \\lambda \\approx 4.75\\left(\\frac{2 k_M \\eta_K}{N} \\right)^{4/6}, \\hskip .5in + R_\\lambda \\approx 3.7 \\left(\\frac{N}{2 k_M \\eta_K} \\right)^{4/6} + + """ + opt = _code.prepare_launch(self, args = args) + self.parameters['nu'] = (opt.kMeta * 2 / opt.n)**(4./3) + self.parameters['dt'] = (opt.dtfactor / opt.n) + # custom famplitude for 288 and 576 + if opt.n == 288: + self.parameters['famplitude'] = 0.45 + elif opt.n == 576: + self.parameters['famplitude'] = 0.47 + if ((self.parameters['niter_todo'] % self.parameters['niter_out']) != 0): + self.parameters['niter_out'] = self.parameters['niter_todo'] + if len(opt.src_work_dir) == 0: + opt.src_work_dir = os.path.realpath(opt.work_dir) + self.pars_from_namespace(opt) + return opt + def launch( + self, + args = [], + **kwargs): + opt = self.prepare_launch(args = args) + if type(opt.nparticles) != type(None): + if opt.nparticles > 0: + self.name += '-particles' + self.add_particles( + integration_steps = 4, + neighbours = opt.neighbours, + smoothness = opt.smoothness) + self.fill_up_fluid_code() + self.finalize_code() + self.launch_jobs(opt = opt, **kwargs) + return None + def get_checkpoint_0_fname(self): + return os.path.join( + self.work_dir, + self.simname + '_checkpoint_0.h5') + def generate_tracer_state( + self, + rseed = None, + iteration = 0, + species = 0, + write_to_file = False, + ncomponents = 3, + testing = False, + data = None): + if (type(data) == type(None)): + if not type(rseed) == type(None): + np.random.seed(rseed) + #point with problems: 5.37632864e+00, 6.10414710e+00, 6.25256493e+00] + data = np.zeros(self.parameters['nparticles']*ncomponents).reshape(-1, ncomponents) + data[:, :3] = np.random.random((self.parameters['nparticles'], 3))*2*np.pi + if testing: + #data[0] = np.array([3.26434, 4.24418, 3.12157]) + data[:] = np.array([ 0.72086101, 2.59043666, 6.27501953]) + with h5py.File(self.get_checkpoint_0_fname(), 'a') as data_file: + data_file['tracers{0}/state/0'.format(species)][:] = data + if write_to_file: + data.tofile( + os.path.join( + self.work_dir, + "tracers{0}_state_i{1:0>5x}".format(species, iteration))) + return data + def launch_jobs( + self, + opt = None, + particle_initial_condition = None): + if not os.path.exists(os.path.join(self.work_dir, self.simname + '.h5')): + # take care of fields' initial condition + if not os.path.exists(self.get_checkpoint_0_fname()): + f = h5py.File(self.get_checkpoint_0_fname(), 'w') + if len(opt.src_simname) > 0: + source_cp = 0 + src_file = 'not_a_file' + while True: + src_file = os.path.join( + os.path.realpath(opt.src_work_dir), + opt.src_simname + '_checkpoint_{0}.h5'.format(source_cp)) + f0 = h5py.File(src_file, 'r') + if '{0}'.format(opt.src_iteration) in f0['vorticity/complex'].keys(): + f0.close() + break + source_cp += 1 + f['vorticity/complex/{0}'.format(0)] = h5py.ExternalLink( + src_file, + 'vorticity/complex/{0}'.format(opt.src_iteration)) + else: + data = self.generate_vector_field( + write_to_file = False, + spectra_slope = 2.0, + amplitude = 0.05) + f['vorticity/complex/{0}'.format(0)] = data + f.close() + # take care of particles' initial condition + if opt.pclouds > 1: + np.random.seed(opt.particle_rand_seed) + if opt.pcloud_type == 'random-cube': + particle_initial_condition = ( + np.random.random((opt.pclouds, 1, 3))*2*np.pi + + np.random.random((1, self.parameters['nparticles'], 3))*opt.particle_cloud_size) + elif opt.pcloud_type == 'regular-cube': + onedarray = np.linspace( + -opt.particle_cloud_size/2, + opt.particle_cloud_size/2, + self.parameters['nparticles']) + particle_initial_condition = np.zeros( + (opt.pclouds, + self.parameters['nparticles'], + self.parameters['nparticles'], + self.parameters['nparticles'], 3), + dtype = np.float64) + particle_initial_condition[:] = \ + np.random.random((opt.pclouds, 1, 1, 1, 3))*2*np.pi + particle_initial_condition[..., 0] += onedarray[None, None, None, :] + particle_initial_condition[..., 1] += onedarray[None, None, :, None] + particle_initial_condition[..., 2] += onedarray[None, :, None, None] + self.write_par( + particle_ic = particle_initial_condition) + if self.parameters['nparticles'] > 0: + data = self.generate_tracer_state( + species = 0, + rseed = opt.particle_rand_seed, + data = particle_initial_condition) + for s in range(1, self.particle_species): + self.generate_tracer_state(species = s, data = data) + self.run( + nb_processes = opt.nb_processes, + nb_threads_per_process = opt.nb_threads_per_process, + njobs = opt.njobs, + hours = opt.minutes // 60, + minutes = opt.minutes % 60, + no_submit = opt.no_submit) + return None + +if __name__ == '__main__': + pass + diff --git a/bfps/NavierStokes.py b/bfps/NavierStokes.py index a9af1c49d1c85a3c41a2ea74f6a3bd9913e19f6e..59fb907c4a79f73dec5b6a8cfcb06d99b0b584bb 100644 --- a/bfps/NavierStokes.py +++ b/bfps/NavierStokes.py @@ -31,6 +31,7 @@ import h5py import argparse import bfps +import bfps.tools from ._code import _code from ._fluid_base import _fluid_particle_base @@ -262,20 +263,6 @@ class NavierStokes(_fluid_particle_base): field_H5T = 'H5T_NATIVE_FLOAT' elif self.dtype == np.float64: field_H5T = 'H5T_NATIVE_DOUBLE' - self.stat_src += self.create_stat_output( - '/statistics/xlines/velocity', - 'fs->rvelocity', - data_type = field_H5T, - size_setup = """ - count[0] = 1; - count[1] = nx; - count[2] = 3; - """, - close_spaces = False) - self.stat_src += self.create_stat_output( - '/statistics/xlines/vorticity', - 'fs->rvorticity', - data_type = field_H5T) if self.QR_stats_on: self.stat_src += self.create_stat_output( '/statistics/moments/trS2_Q_R', @@ -572,8 +559,12 @@ class NavierStokes(_fluid_particle_base): self.particle_stat_src += '}\n' self.particle_species += nspecies return None + def get_cache_file_name(self): + return os.path.join(self.work_dir, self.simname + '_cache.h5') + def get_cache_file(self): + return h5py.File(self.get_postprocess_file_name(), 'r') def get_postprocess_file_name(self): - return os.path.join(self.work_dir, self.simname + '_postprocess.h5') + return self.get_cache_file_name() def get_postprocess_file(self): return h5py.File(self.get_postprocess_file_name(), 'r') def compute_statistics(self, iter0 = 0, iter1 = None): @@ -589,7 +580,7 @@ class NavierStokes(_fluid_particle_base): tensors, and the enstrophy spectrum is also used to compute the dissipation :math:`\\varepsilon(t)`. These basic quantities are stored in a newly created HDF5 file, - ``simname_postprocess.h5``. + ``simname_cache.h5``. """ if len(list(self.statistics.keys())) > 0: return None @@ -615,7 +606,9 @@ class NavierStokes(_fluid_particle_base): computation_needed = not (ii0 == pp_file['ii0'].value and ii1 == pp_file['ii1'].value) if computation_needed: - for k in pp_file.keys(): + for k in ['t', 'vel_max(t)', 'renergy(t)', + 'energy(t, k)', 'enstrophy(t, k)', + 'ii0', 'ii1', 'iter0', 'iter1']: del pp_file[k] if computation_needed: pp_file['iter0'] = iter0 @@ -651,7 +644,7 @@ class NavierStokes(_fluid_particle_base): """Compute easy stats. Further computation of statistics based on the contents of - ``simname_postprocess.h5``. + ``simname_cache.h5``. Standard quantities are as follows (consistent with [Ishihara]_): @@ -751,12 +744,14 @@ class NavierStokes(_fluid_particle_base): vec_stat_datasets = ['velocity', 'vorticity'] scal_stat_datasets = [] for k in vec_stat_datasets: - time_chunk = 2**20//(8*3*self.parameters['nx']) # FIXME: use proper size of self.dtype + time_chunk = 2**20 // ( + self.dtype.itemsize*3* + self.parameters['nx']*self.parameters['ny']) time_chunk = max(time_chunk, 1) - ofile.create_dataset('statistics/xlines/' + k, - (1, self.parameters['nx'], 3), - chunks = (time_chunk, self.parameters['nx'], 3), - maxshape = (None, self.parameters['nx'], 3), + ofile.create_dataset('statistics/0slices/' + k + '/real', + (1, self.parameters['ny'], self.parameters['nx'], 3), + chunks = (time_chunk, self.parameters['ny'], self.parameters['nx'], 3), + maxshape = (None, self.parameters['ny'], self.parameters['nx'], 3), dtype = self.dtype) if self.Lag_acc_stats_on: vec_stat_datasets += ['Lagrangian_acceleration'] @@ -873,33 +868,6 @@ class NavierStokes(_fluid_particle_base): dtype = np.int64) if self.particle_species == 0: return None - def create_particle_dataset( - data_file, - dset_name, - dset_shape, - dset_maxshape, - dset_chunks, - # maybe something more general can be used here - dset_dtype = h5py.h5t.IEEE_F64LE): - # create the dataspace. - space_id = h5py.h5s.create_simple( - dset_shape, - dset_maxshape) - # create the dataset creation property list. - dcpl = h5py.h5p.create(h5py.h5p.DATASET_CREATE) - # set the allocation time to "early". - dcpl.set_alloc_time(h5py.h5d.ALLOC_TIME_EARLY) - dcpl.set_chunk(dset_chunks) - # and now create dataset - if sys.version_info[0] == 3: - dset_name = dset_name.encode() - return h5py.h5d.create( - data_file.id, - dset_name, - dset_dtype, - space_id, - dcpl, - h5py.h5p.DEFAULT) if type(particle_ic) == type(None): pbase_shape = (self.parameters['nparticles'],) @@ -907,9 +875,12 @@ class NavierStokes(_fluid_particle_base): else: pbase_shape = particle_ic.shape[:-1] assert(particle_ic.shape[-1] == 3) - number_of_particles = 1 - for val in pbase_shape[1:]: - number_of_particles *= val + if len(pbase_shape) == 1: + number_of_particles = pbase_shape[0] + else: + number_of_particles = 1 + for val in pbase_shape[1:]: + number_of_particles *= val with h5py.File(self.get_particle_file_name(), 'a') as ofile: for s in range(self.particle_species): @@ -924,7 +895,7 @@ class NavierStokes(_fluid_particle_base): chunks = (time_chunk, 1, 1) + dims[3:] else: chunks = (time_chunk, 1) + dims[2:] - create_particle_dataset( + bfps.tools.create_alloc_early_dataset( ofile, '/tracers{0}/rhs'.format(s), dims, maxshape, chunks) @@ -932,25 +903,29 @@ class NavierStokes(_fluid_particle_base): chunks = (time_chunk, 1) + pbase_shape[1:] + (3,) else: chunks = (time_chunk, pbase_shape[0], 3) - create_particle_dataset( + bfps.tools.create_alloc_early_dataset( ofile, '/tracers{0}/state'.format(s), (1,) + pbase_shape + (3,), (h5py.h5s.UNLIMITED,) + pbase_shape + (3,), chunks) - create_particle_dataset( + # "velocity" is sampled, single precision is enough + # for the results we are interested in. + bfps.tools.create_alloc_early_dataset( ofile, '/tracers{0}/velocity'.format(s), (1,) + pbase_shape + (3,), (h5py.h5s.UNLIMITED,) + pbase_shape + (3,), - chunks) + chunks, + dset_dtype = h5py.h5t.IEEE_F32LE) if self.parameters['tracers{0}_acc_on'.format(s)]: - create_particle_dataset( + bfps.tools.create_alloc_early_dataset( ofile, '/tracers{0}/acceleration'.format(s), (1,) + pbase_shape + (3,), (h5py.h5s.UNLIMITED,) + pbase_shape + (3,), - chunks) + chunks, + dset_dtype = h5py.h5t.IEEE_F32LE) return None def add_particle_fields( self, @@ -1065,6 +1040,16 @@ class NavierStokes(_fluid_particle_base): type = float, dest = 'particle_cloud_size', default = 2*np.pi) + parser.add_argument( + '--neighbours', + type = int, + dest = 'neighbours', + default = 1) + parser.add_argument( + '--smoothness', + type = int, + dest = 'smoothness', + default = 1) return None def prepare_launch( self, @@ -1135,26 +1120,43 @@ class NavierStokes(_fluid_particle_base): opt.nparticles = 0 elif type(opt.nparticles) == int: if opt.nparticles > 0: + self.name += '-particles' self.add_3D_rFFTW_field( name = 'rFFTW_acc') self.add_interpolator( name = 'cubic_spline', - neighbours = 1, - smoothness = 1, + neighbours = opt.neighbours, + smoothness = opt.smoothness, class_name = 'rFFTW_interpolator') self.add_particles( integration_steps = [4], interpolator = 'cubic_spline', acc_name = 'rFFTW_acc', class_name = 'rFFTW_distributed_particles') + self.variables += 'hid_t particle_file;\n' + self.main_start += """ + if (myrank == 0) + { + // set caching parameters + hid_t fapl = H5Pcreate(H5P_FILE_ACCESS); + herr_t cache_err = H5Pset_cache(fapl, 0, 521, 134217728, 1.0); + DEBUG_MSG("when setting cache for particles I got %d\\n", cache_err); + sprintf(fname, "%s_particles.h5", simname); + particle_file = H5Fopen(fname, H5F_ACC_RDWR, fapl); + } + """ + self.main_end = ('if (myrank == 0)\n' + + '{\n' + + 'H5Fclose(particle_file);\n' + + '}\n') + self.main_end self.finalize_code() - self.launch_jobs(opt = opt) + self.launch_jobs(opt = opt, **kwargs) return None def launch_jobs( self, - opt = None): + opt = None, + particle_initial_condition = None): if not os.path.exists(os.path.join(self.work_dir, self.simname + '.h5')): - particle_initial_condition = None if opt.pclouds > 1: np.random.seed(opt.particle_rand_seed) if opt.pcloud_type == 'random-cube': @@ -1201,7 +1203,11 @@ class NavierStokes(_fluid_particle_base): spectra_slope = 2.0, amplitude = 0.05) self.run( - ncpu = opt.ncpu, - njobs = opt.njobs) + nb_processes = opt.nb_processes, + nb_threads_per_process = opt.nb_threads_per_process, + njobs = opt.njobs, + hours = opt.minutes // 60, + minutes = opt.minutes % 60, + no_submit = opt.no_submit) return None diff --git a/bfps/PP.py b/bfps/PP.py new file mode 100644 index 0000000000000000000000000000000000000000..ec9994bcabf43f30f6c4b2e4f901481812ba678f --- /dev/null +++ b/bfps/PP.py @@ -0,0 +1,642 @@ +####################################################################### +# # +# Copyright 2015 Max Planck Institute # +# for Dynamics and Self-Organization # +# # +# This file is part of bfps. # +# # +# bfps is free software: you can redistribute it and/or modify # +# it under the terms of the GNU General Public License as published # +# by the Free Software Foundation, either version 3 of the License, # +# or (at your option) any later version. # +# # +# bfps is distributed in the hope that it will be useful, # +# but WITHOUT ANY WARRANTY; without even the implied warranty of # +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # +# GNU General Public License for more details. # +# # +# You should have received a copy of the GNU General Public License # +# along with bfps. If not, see <http://www.gnu.org/licenses/> # +# # +# Contact: Cristian.Lalescu@ds.mpg.de # +# # +####################################################################### + + + +import os +import sys +import shutil +import subprocess +import argparse +import h5py +import math +import numpy as np +import warnings + +import bfps +from ._code import _code +from bfps import tools + +class PP(_code): + """This class is meant to stitch together the C++ code into a final source file, + compile it, and handle all job launching. + """ + def __init__( + self, + work_dir = './', + simname = 'test'): + _code.__init__( + self, + work_dir = work_dir, + simname = simname) + self.host_info = {'type' : 'cluster', + 'environment' : None, + 'deltanprocs' : 1, + 'queue' : '', + 'mail_address': '', + 'mail_events' : None} + self.generate_default_parameters() + return None + def set_precision( + self, + fluid_dtype): + if fluid_dtype in [np.float32, np.float64]: + self.fluid_dtype = fluid_dtype + elif fluid_dtype in ['single', 'double']: + if fluid_dtype == 'single': + self.fluid_dtype = np.dtype(np.float32) + elif fluid_dtype == 'double': + self.fluid_dtype = np.dtype(np.float64) + self.rtype = self.fluid_dtype + if self.rtype == np.float32: + self.ctype = np.dtype(np.complex64) + self.C_field_dtype = 'float' + self.fluid_precision = 'single' + elif self.rtype == np.float64: + self.ctype = np.dtype(np.complex128) + self.C_field_dtype = 'double' + self.fluid_precision = 'double' + return None + def write_src(self): + self.version_message = ( + '/***********************************************************************\n' + + '* this code automatically generated by bfps\n' + + '* version {0}\n'.format(bfps.__version__) + + '***********************************************************************/\n\n\n') + self.include_list = [ + '"base.hpp"', + '"scope_timer.hpp"', + '"fftw_interface.hpp"', + '"full_code/main_code.hpp"', + '<cmath>', + '<iostream>', + '<hdf5.h>', + '<string>', + '<cstring>', + '<fftw3-mpi.h>', + '<omp.h>', + '<cfenv>', + '<cstdlib>', + '"full_code/{0}.hpp"\n'.format(self.dns_type)] + self.main = """ + int main(int argc, char *argv[]) + {{ + bool fpe = ( + (getenv("BFPS_FPE_OFF") == nullptr) || + (getenv("BFPS_FPE_OFF") != std::string("TRUE"))); + return main_code< {0} >(argc, argv, fpe); + }} + """.format(self.dns_type + '<{0}>'.format(self.C_field_dtype)) + self.includes = '\n'.join( + ['#include ' + hh + for hh in self.include_list]) + with open(self.name + '.cpp', 'w') as outfile: + outfile.write(self.version_message + '\n\n') + outfile.write(self.includes + '\n\n') + outfile.write(self.main + '\n') + return None + def generate_default_parameters(self): + # these parameters are relevant for all DNS classes + self.parameters['dealias_type'] = int(1) + self.parameters['dkx'] = float(1.0) + self.parameters['dky'] = float(1.0) + self.parameters['dkz'] = float(1.0) + self.parameters['nu'] = float(0.1) + self.parameters['fmode'] = int(1) + self.parameters['famplitude'] = float(0.5) + self.parameters['fk0'] = float(2.0) + self.parameters['fk1'] = float(4.0) + self.parameters['forcing_type'] = 'linear' + self.pp_parameters = {} + self.pp_parameters['iteration_list'] = np.zeros(1).astype(np.int) + return None + def get_data_file_name(self): + return os.path.join(self.work_dir, self.simname + '.h5') + def get_data_file(self): + return h5py.File(self.get_data_file_name(), 'r') + def get_particle_file_name(self): + return os.path.join(self.work_dir, self.simname + '_particles.h5') + def get_particle_file(self): + return h5py.File(self.get_particle_file_name(), 'r') + def get_postprocess_file_name(self): + return os.path.join(self.work_dir, self.simname + '_postprocess.h5') + def get_postprocess_file(self): + return h5py.File(self.get_postprocess_file_name(), 'r') + def compute_statistics(self, iter0 = 0, iter1 = None): + """Run basic postprocessing on raw data. + The energy spectrum :math:`E(t, k)` and the enstrophy spectrum + :math:`\\frac{1}{2}\omega^2(t, k)` are computed from the + + .. math:: + + \sum_{k \\leq \\|\\mathbf{k}\\| \\leq k+dk}\\hat{u_i} \\hat{u_j}^*, \\hskip .5cm + \sum_{k \\leq \\|\\mathbf{k}\\| \\leq k+dk}\\hat{\omega_i} \\hat{\\omega_j}^* + + tensors, and the enstrophy spectrum is also used to + compute the dissipation :math:`\\varepsilon(t)`. + These basic quantities are stored in a newly created HDF5 file, + ``simname_postprocess.h5``. + """ + if len(list(self.statistics.keys())) > 0: + return None + self.read_parameters() + with self.get_data_file() as data_file: + if 'moments' not in data_file['statistics'].keys(): + return None + iter0 = min((data_file['statistics/moments/velocity'].shape[0] * + self.parameters['niter_stat']-1), + iter0) + if type(iter1) == type(None): + iter1 = data_file['iteration'].value + else: + iter1 = min(data_file['iteration'].value, iter1) + ii0 = iter0 // self.parameters['niter_stat'] + ii1 = iter1 // self.parameters['niter_stat'] + self.statistics['kshell'] = data_file['kspace/kshell'].value + self.statistics['kM'] = data_file['kspace/kM'].value + self.statistics['dk'] = data_file['kspace/dk'].value + computation_needed = True + pp_file = h5py.File(self.get_postprocess_file_name(), 'a') + if 'ii0' in pp_file.keys(): + computation_needed = not (ii0 == pp_file['ii0'].value and + ii1 == pp_file['ii1'].value) + if computation_needed: + for k in pp_file.keys(): + del pp_file[k] + if computation_needed: + pp_file['iter0'] = iter0 + pp_file['iter1'] = iter1 + pp_file['ii0'] = ii0 + pp_file['ii1'] = ii1 + pp_file['t'] = (self.parameters['dt']* + self.parameters['niter_stat']* + (np.arange(ii0, ii1+1).astype(np.float))) + pp_file['energy(t, k)'] = ( + data_file['statistics/spectra/velocity_velocity'][ii0:ii1+1, :, 0, 0] + + data_file['statistics/spectra/velocity_velocity'][ii0:ii1+1, :, 1, 1] + + data_file['statistics/spectra/velocity_velocity'][ii0:ii1+1, :, 2, 2])/2 + pp_file['enstrophy(t, k)'] = ( + data_file['statistics/spectra/vorticity_vorticity'][ii0:ii1+1, :, 0, 0] + + data_file['statistics/spectra/vorticity_vorticity'][ii0:ii1+1, :, 1, 1] + + data_file['statistics/spectra/vorticity_vorticity'][ii0:ii1+1, :, 2, 2])/2 + pp_file['vel_max(t)'] = data_file['statistics/moments/velocity'] [ii0:ii1+1, 9, 3] + pp_file['renergy(t)'] = data_file['statistics/moments/velocity'][ii0:ii1+1, 2, 3]/2 + for k in ['t', + 'energy(t, k)', + 'enstrophy(t, k)', + 'vel_max(t)', + 'renergy(t)']: + if k in pp_file.keys(): + self.statistics[k] = pp_file[k].value + self.compute_time_averages() + return None + def compute_time_averages(self): + """Compute easy stats. + + Further computation of statistics based on the contents of + ``simname_postprocess.h5``. + Standard quantities are as follows + (consistent with [Ishihara]_): + + .. math:: + + U_{\\textrm{int}}(t) = \\sqrt{\\frac{2E(t)}{3}}, \\hskip .5cm + L_{\\textrm{int}}(t) = \\frac{\pi}{2U_{int}^2(t)} \\int \\frac{dk}{k} E(t, k), \\hskip .5cm + T_{\\textrm{int}}(t) = + \\frac{L_{\\textrm{int}}(t)}{U_{\\textrm{int}}(t)} + + \\eta_K = \\left(\\frac{\\nu^3}{\\varepsilon}\\right)^{1/4}, \\hskip .5cm + \\tau_K = \\left(\\frac{\\nu}{\\varepsilon}\\right)^{1/2}, \\hskip .5cm + \\lambda = \\sqrt{\\frac{15 \\nu U_{\\textrm{int}}^2}{\\varepsilon}} + + Re = \\frac{U_{\\textrm{int}} L_{\\textrm{int}}}{\\nu}, \\hskip + .5cm + R_{\\lambda} = \\frac{U_{\\textrm{int}} \\lambda}{\\nu} + + .. [Ishihara] T. Ishihara et al, + *Small-scale statistics in high-resolution direct numerical + simulation of turbulence: Reynolds number dependence of + one-point velocity gradient statistics*. + J. Fluid Mech., + **592**, 335-366, 2007 + """ + for key in ['energy', 'enstrophy']: + self.statistics[key + '(t)'] = (self.statistics['dk'] * + np.sum(self.statistics[key + '(t, k)'], axis = 1)) + self.statistics['Uint(t)'] = np.sqrt(2*self.statistics['energy(t)'] / 3) + self.statistics['Lint(t)'] = ((self.statistics['dk']*np.pi / + (2*self.statistics['Uint(t)']**2)) * + np.nansum(self.statistics['energy(t, k)'] / + self.statistics['kshell'][None, :], axis = 1)) + for key in ['energy', + 'enstrophy', + 'vel_max', + 'Uint', + 'Lint']: + if key + '(t)' in self.statistics.keys(): + self.statistics[key] = np.average(self.statistics[key + '(t)'], axis = 0) + for suffix in ['', '(t)']: + self.statistics['diss' + suffix] = (self.parameters['nu'] * + self.statistics['enstrophy' + suffix]*2) + self.statistics['etaK' + suffix] = (self.parameters['nu']**3 / + self.statistics['diss' + suffix])**.25 + self.statistics['tauK' + suffix] = (self.parameters['nu'] / + self.statistics['diss' + suffix])**.5 + self.statistics['Re' + suffix] = (self.statistics['Uint' + suffix] * + self.statistics['Lint' + suffix] / + self.parameters['nu']) + self.statistics['lambda' + suffix] = (15 * self.parameters['nu'] * + self.statistics['Uint' + suffix]**2 / + self.statistics['diss' + suffix])**.5 + self.statistics['Rlambda' + suffix] = (self.statistics['Uint' + suffix] * + self.statistics['lambda' + suffix] / + self.parameters['nu']) + self.statistics['kMeta' + suffix] = (self.statistics['kM'] * + self.statistics['etaK' + suffix]) + if self.parameters['dealias_type'] == 1: + self.statistics['kMeta' + suffix] *= 0.8 + self.statistics['Tint'] = self.statistics['Lint'] / self.statistics['Uint'] + self.statistics['Taylor_microscale'] = self.statistics['lambda'] + return None + def set_plt_style( + self, + style = {'dashes' : (None, None)}): + self.style.update(style) + return None + def convert_complex_from_binary( + self, + field_name = 'vorticity', + iteration = 0, + file_name = None): + """read the Fourier representation of a vector field. + + Read the binary file containing iteration ``iteration`` of the + field ``field_name``, and write it in a ``.h5`` file. + """ + data = np.memmap( + os.path.join(self.work_dir, + self.simname + '_{0}_i{1:0>5x}'.format('c' + field_name, iteration)), + dtype = self.ctype, + mode = 'r', + shape = (self.parameters['ny'], + self.parameters['nz'], + self.parameters['nx']//2+1, + 3)) + if type(file_name) == type(None): + file_name = self.simname + '_{0}_i{1:0>5x}.h5'.format('c' + field_name, iteration) + file_name = os.path.join(self.work_dir, file_name) + f = h5py.File(file_name, 'a') + f[field_name + '/complex/{0}'.format(iteration)] = data + f.close() + return None + def job_parser_arguments( + self, + parser): + parser.add_argument( + '--ncpu', + type = int, + dest = 'ncpu', + default = -1) + parser.add_argument( + '--np', '--nprocesses', + metavar = 'NPROCESSES', + help = 'number of mpi processes to use', + type = int, + dest = 'nb_processes', + default = 4) + parser.add_argument( + '--ntpp', '--nthreads-per-process', + type = int, + dest = 'nb_threads_per_process', + metavar = 'NTHREADS_PER_PROCESS', + help = 'number of threads to use per MPI process', + default = 1) + parser.add_argument( + '--no-submit', + action = 'store_true', + dest = 'no_submit') + parser.add_argument( + '--environment', + type = str, + dest = 'environment', + default = None) + parser.add_argument( + '--minutes', + type = int, + dest = 'minutes', + default = 5, + help = 'If environment supports it, this is the requested wall-clock-limit.') + parser.add_argument( + '--njobs', + type = int, dest = 'njobs', + default = 1) + return None + def simulation_parser_arguments( + self, + parser): + parser.add_argument( + '--simname', + type = str, dest = 'simname', + default = 'test') + parser.add_argument( + '--wd', + type = str, dest = 'work_dir', + default = './') + parser.add_argument( + '--precision', + choices = ['single', 'double'], + type = str, + default = 'single') + parser.add_argument( + '--iter0', + type = int, + dest = 'iter0', + default = 0) + parser.add_argument( + '--iter1', + type = int, + dest = 'iter1', + default = 0) + return None + def particle_parser_arguments( + self, + parser): + parser.add_argument( + '--particle-rand-seed', + type = int, + dest = 'particle_rand_seed', + default = None) + parser.add_argument( + '--pclouds', + type = int, + dest = 'pclouds', + default = 1, + help = ('number of particle clouds. Particle "clouds" ' + 'consist of particles distributed according to ' + 'pcloud-type.')) + parser.add_argument( + '--pcloud-type', + choices = ['random-cube', + 'regular-cube'], + dest = 'pcloud_type', + default = 'random-cube') + parser.add_argument( + '--particle-cloud-size', + type = float, + dest = 'particle_cloud_size', + default = 2*np.pi) + return None + def add_parser_arguments( + self, + parser): + subparsers = parser.add_subparsers( + dest = 'DNS_class', + help = 'type of simulation to run') + subparsers.required = True + parser_native_binary_to_hdf5 = subparsers.add_parser( + 'native_binary_to_hdf5', + help = 'convert native binary to hdf5') + self.simulation_parser_arguments(parser_native_binary_to_hdf5) + self.job_parser_arguments(parser_native_binary_to_hdf5) + self.parameters_to_parser_arguments(parser_native_binary_to_hdf5) + parser_get_rfields = subparsers.add_parser( + 'get_rfields', + help = 'get real space velocity field') + self.simulation_parser_arguments(parser_get_rfields) + self.job_parser_arguments(parser_get_rfields) + self.parameters_to_parser_arguments(parser_get_rfields) + return None + def prepare_launch( + self, + args = []): + """Set up reasonable parameters. + + With the default Lundgren forcing applied in the band [2, 4], + we can estimate the dissipation, therefore we can estimate + :math:`k_M \\eta_K` and constrain the viscosity. + + In brief, the command line parameter :math:`k_M \\eta_K` is + used in the following formula for :math:`\\nu` (:math:`N` is the + number of real space grid points per coordinate): + + .. math:: + + \\nu = \\left(\\frac{2 k_M \\eta_K}{N} \\right)^{4/3} + + With this choice, the average dissipation :math:`\\varepsilon` + will be close to 0.4, and the integral scale velocity will be + close to 0.77, yielding the approximate value for the Taylor + microscale and corresponding Reynolds number: + + .. math:: + + \\lambda \\approx 4.75\\left(\\frac{2 k_M \\eta_K}{N} \\right)^{4/6}, \\hskip .5in + R_\\lambda \\approx 3.7 \\left(\\frac{N}{2 k_M \\eta_K} \\right)^{4/6} + + """ + opt = _code.prepare_launch(self, args = args) + self.set_precision(opt.precision) + self.dns_type = opt.DNS_class + self.name = self.dns_type + '-' + self.fluid_precision + '-v' + bfps.__version__ + # merge parameters if needed + for k in self.pp_parameters.keys(): + self.parameters[k] = self.pp_parameters[k] + self.pars_from_namespace(opt) + niter_out = self.get_data_file()['parameters/niter_todo'].value + self.pp_parameters['iteration_list'] = np.arange( + opt.iter0, opt.iter1+niter_out, niter_out, dtype = np.int) + return opt + def launch( + self, + args = [], + **kwargs): + opt = self.prepare_launch(args = args) + self.launch_jobs(opt = opt, **kwargs) + return None + def get_checkpoint_0_fname(self): + return os.path.join( + self.work_dir, + self.simname + '_checkpoint_0.h5') + def generate_tracer_state( + self, + rseed = None, + species = 0): + with h5py.File(self.get_checkpoint_0_fname(), 'a') as data_file: + dset = data_file[ + 'tracers{0}/state/0'.format(species)] + if not type(rseed) == type(None): + np.random.seed(rseed) + nn = self.parameters['nparticles'] + cc = int(0) + batch_size = int(1e6) + while nn > 0: + if nn > batch_size: + dset[cc*batch_size:(cc+1)*batch_size] = np.random.random( + (batch_size, 3))*2*np.pi + nn -= batch_size + else: + dset[cc*batch_size:cc*batch_size+nn] = np.random.random( + (nn, 3))*2*np.pi + nn = 0 + cc += 1 + return None + def generate_vector_field( + self, + rseed = 7547, + spectra_slope = 1., + amplitude = 1., + iteration = 0, + field_name = 'vorticity', + write_to_file = False, + # to switch to constant field, use generate_data_3D_uniform + # for scalar_generator + scalar_generator = tools.generate_data_3D): + """generate vector field. + + The generated field is not divergence free, but it has the proper + shape. + + :param rseed: seed for random number generator + :param spectra_slope: spectrum of field will look like k^(-p) + :param amplitude: all amplitudes are multiplied with this value + :param iteration: the field is written at this iteration + :param field_name: the name of the field being generated + :param write_to_file: should we write the field to file? + :param scalar_generator: which function to use for generating the + individual components. + Possible values: bfps.tools.generate_data_3D, + bfps.tools.generate_data_3D_uniform + :type rseed: int + :type spectra_slope: float + :type amplitude: float + :type iteration: int + :type field_name: str + :type write_to_file: bool + :type scalar_generator: function + + :returns: ``Kdata``, a complex valued 4D ``numpy.array`` that uses the + transposed FFTW layout. + Kdata[ky, kz, kx, i] is the amplitude of mode (kx, ky, kz) for + the i-th component of the field. + (i.e. x is the fastest index and z the slowest index in the + real-space representation). + """ + np.random.seed(rseed) + Kdata00 = scalar_generator( + self.parameters['nz']//2, + self.parameters['ny']//2, + self.parameters['nx']//2, + p = spectra_slope, + amplitude = amplitude).astype(self.ctype) + Kdata01 = scalar_generator( + self.parameters['nz']//2, + self.parameters['ny']//2, + self.parameters['nx']//2, + p = spectra_slope, + amplitude = amplitude).astype(self.ctype) + Kdata02 = scalar_generator( + self.parameters['nz']//2, + self.parameters['ny']//2, + self.parameters['nx']//2, + p = spectra_slope, + amplitude = amplitude).astype(self.ctype) + Kdata0 = np.zeros( + Kdata00.shape + (3,), + Kdata00.dtype) + Kdata0[..., 0] = Kdata00 + Kdata0[..., 1] = Kdata01 + Kdata0[..., 2] = Kdata02 + Kdata1 = tools.padd_with_zeros( + Kdata0, + self.parameters['nz'], + self.parameters['ny'], + self.parameters['nx']) + if write_to_file: + Kdata1.tofile( + os.path.join(self.work_dir, + self.simname + "_c{0}_i{1:0>5x}".format(field_name, iteration))) + return Kdata1 + def copy_complex_field( + self, + src_file_name, + src_dset_name, + dst_file, + dst_dset_name, + make_link = True): + # I define a min_shape thingie, but for now I only trust this method for + # the case of increasing/decreasing by the same factor in all directions. + # in principle we could write something more generic, but i'm not sure + # how complicated that would be + dst_shape = (self.parameters['nz'], + self.parameters['ny'], + (self.parameters['nx']+2) // 2, + 3) + src_file = h5py.File(src_file_name, 'r') + if (src_file[src_dset_name].shape == dst_shape): + if make_link and (src_file[src_dset_name].dtype == self.ctype): + dst_file[dst_dset_name] = h5py.ExternalLink( + src_file_name, + src_dset_name) + else: + dst_file.create_dataset( + dst_dset_name, + shape = dst_shape, + dtype = self.ctype, + fillvalue = 0.0) + for kz in range(src_file[src_dset_name].shape[0]): + dst_file[dst_dset_name][kz] = src_file[src_dset_name][kz] + else: + print('aloha') + min_shape = (min(dst_shape[0], src_file[src_dset_name].shape[0]), + min(dst_shape[1], src_file[src_dset_name].shape[1]), + min(dst_shape[2], src_file[src_dset_name].shape[2]), + 3) + print(self.ctype) + dst_file.create_dataset( + dst_dset_name, + shape = dst_shape, + dtype = np.dtype(self.ctype), + fillvalue = complex(0)) + for kz in range(min_shape[0]): + dst_file[dst_dset_name][kz,:min_shape[1], :min_shape[2]] = \ + src_file[src_dset_name][kz, :min_shape[1], :min_shape[2]] + return None + def launch_jobs( + self, + opt = None, + particle_initial_condition = None): + self.rewrite_par( + group = self.dns_type, + parameters = self.pp_parameters) + self.run( + nb_processes = opt.nb_processes, + nb_threads_per_process = opt.nb_threads_per_process, + njobs = opt.njobs, + hours = opt.minutes // 60, + minutes = opt.minutes % 60, + no_submit = opt.no_submit, + err_file = 'err_' + self.dns_type, + out_file = 'out_' + self.dns_type) + return None + diff --git a/bfps/__init__.py b/bfps/__init__.py index 4a90f95268cffe3b0c2e1d68d7f4763a4c142e84..6c220e69d877670206e411c5a0f1f1ae78c04d33 100644 --- a/bfps/__init__.py +++ b/bfps/__init__.py @@ -46,7 +46,10 @@ bfpsfolder = os.path.join(homefolder, '.config/', 'bfps') sys.path.append(bfpsfolder) from host_information import host_info +from .DNS import DNS from .FluidConvert import FluidConvert from .FluidResize import FluidResize from .NavierStokes import NavierStokes +from .NSVorticityEquation import NSVorticityEquation +#import test diff --git a/bfps/__main__.py b/bfps/__main__.py index a26d84d0e918cebe1a9351ca20b5249418d6a3c6..fa30289b430c7db9508c2e9fae796f6fe47b8eb6 100644 --- a/bfps/__main__.py +++ b/bfps/__main__.py @@ -28,7 +28,10 @@ import sys import argparse import bfps +from .DNS import DNS +from .PP import PP from .NavierStokes import NavierStokes +from .NSVorticityEquation import NSVorticityEquation from .FluidResize import FluidResize from .FluidConvert import FluidConvert from .NSManyParticles import NSManyParticles @@ -45,6 +48,12 @@ def main(): 'NS', 'NS-single', 'NS-double'] + NSVEoptions = ['NSVorticityEquation', + 'NSVorticityEquation-single', + 'NSVorticityEquation-double', + 'NSVE', + 'NSVE-single', + 'NSVE-double'] FRoptions = ['FluidResize', 'FluidResize-single', 'FluidResize-double', @@ -57,19 +66,34 @@ def main(): 'NSManyParticles-double'] parser.add_argument( 'base_class', - choices = NSoptions + FRoptions + FCoptions + NSMPopt, + choices = ['DNS', 'PP'] + + NSoptions + + NSVEoptions + + FRoptions + + FCoptions + + NSMPopt, type = str) # first option is the choice of base class or -h or -v # all other options are passed on to the base_class instance opt = parser.parse_args(sys.argv[1:2]) # error is thrown if first option is not a base class, so launch # cannot be executed by mistake. + if opt.base_class == 'DNS': + c = DNS() + c.launch(args = sys.argv[2:]) + return None + if opt.base_class == 'PP': + c = PP() + c.launch(args = sys.argv[2:]) + return None if 'double' in opt.base_class: precision = 'double' else: precision = 'single' if opt.base_class in NSoptions: base_class = NavierStokes + if opt.base_class in NSVEoptions: + base_class = NSVorticityEquation elif opt.base_class in FRoptions: base_class = FluidResize elif opt.base_class in FCoptions: diff --git a/bfps/_base.py b/bfps/_base.py index 2204fe666402eeccc4d815b6381d6b5060a0e7ac..037261d3f1c6ea7af7fc58b79484ed461f84a28b 100644 --- a/bfps/_base.py +++ b/bfps/_base.py @@ -56,7 +56,9 @@ class _base(object): key = sorted(list(parameters.keys())) src_txt = '' for i in range(len(key)): - if type(parameters[key[i]]) == int: + if (type(parameters[key[i]]) == int and parameters[key[i]] >= 1<<30): + src_txt += 'long long int ' + key[i] + ';\n' + elif type(parameters[key[i]]) == int: src_txt += 'int ' + key[i] + ';\n' elif type(parameters[key[i]]) == str: src_txt += 'char ' + key[i] + '[{0}];\n'.format(self.string_length) @@ -74,31 +76,46 @@ class _base(object): self, parameters = None, function_suffix = '', - file_group = 'parameters'): + template_class = '', + template_prefix = '', + file_group = 'parameters', + just_declaration = False, + simname_variable = 'simname', + prepend_this = False): if type(parameters) == type(None): parameters = self.parameters key = sorted(list(parameters.keys())) - src_txt = ('int read_parameters' + function_suffix + '()\n{\n' + - 'hid_t parameter_file;\n' + - 'hid_t dset, memtype, space;\n' + - 'char fname[256];\n' + - 'hsize_t dims[1];\n' + - 'char *string_data;\n' + - 'sprintf(fname, "%s.h5", simname);\n' + - 'parameter_file = H5Fopen(fname, H5F_ACC_RDONLY, H5P_DEFAULT);\n') + src_txt = (template_prefix + + 'int ' + + template_class + + 'read_parameters' + function_suffix + '()') + if just_declaration: + return src_txt + ';\n' + src_txt += ('\n{\n' + + 'hid_t parameter_file;\n' + + 'hid_t dset, memtype, space;\n' + + 'char fname[256];\n' + + 'hsize_t dims[1];\n' + + 'char *string_data;\n' + + 'sprintf(fname, "%s.h5", {0});\n'.format(simname_variable) + + 'parameter_file = H5Fopen(fname, H5F_ACC_RDONLY, H5P_DEFAULT);\n') + key_prefix = '' + if prepend_this: + key_prefix = 'this->' for i in range(len(key)): src_txt += 'dset = H5Dopen(parameter_file, "/{0}/{1}", H5P_DEFAULT);\n'.format( file_group, key[i]) - if type(parameters[key[i]]) == int: - src_txt += 'H5Dread(dset, H5T_NATIVE_INT, H5S_ALL, H5S_ALL, H5P_DEFAULT, &{0});\n'.format(key[i]) + if (type(parameters[key[i]]) == int and parameters[key[i]] >= 1<<30): + src_txt += 'H5Dread(dset, H5T_NATIVE_LLONG, H5S_ALL, H5S_ALL, H5P_DEFAULT, &{0});\n'.format(key_prefix + key[i]) + elif type(parameters[key[i]]) == int: + src_txt += 'H5Dread(dset, H5T_NATIVE_INT, H5S_ALL, H5S_ALL, H5P_DEFAULT, &{0});\n'.format(key_prefix + key[i]) elif type(parameters[key[i]]) == str: src_txt += ('space = H5Dget_space(dset);\n' + 'memtype = H5Dget_type(dset);\n' + - 'H5Sget_simple_extent_dims(space, dims, NULL);\n' + - 'string_data = (char*)malloc(dims[0]*sizeof(char));\n' + + 'string_data = (char*)malloc(256);\n' + 'H5Dread(dset, memtype, H5S_ALL, H5S_ALL, H5P_DEFAULT, &string_data);\n' + - 'sprintf({0}, "%s", string_data);\n'.format(key[i]) + - 'free(string_data);\n' + + 'sprintf({0}, "%s", string_data);\n'.format(key_prefix + key[i]) + + 'free(string_data);\n' 'H5Sclose(space);\n' + 'H5Tclose(memtype);\n') elif type(parameters[key[i]]) == np.ndarray: @@ -106,10 +123,10 @@ class _base(object): template_par = 'int' elif parameters[key[i]].dtype == np.float64: template_par = 'double' - src_txt += '{0} = read_vector<{1}>(parameter_file, "/{2}/{0}");\n'.format( - key[i], template_par, file_group) + src_txt += '{0} = hdf5_tools::read_vector<{1}>(parameter_file, "/{2}/{0}");\n'.format( + key_prefix + key[i], template_par, file_group) else: - src_txt += 'H5Dread(dset, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, &{0});\n'.format(key[i]) + src_txt += 'H5Dread(dset, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, &{0});\n'.format(key_prefix + key[i]) src_txt += 'H5Dclose(dset);\n' src_txt += 'H5Fclose(parameter_file);\n' src_txt += 'return 0;\n}\n' # finishing read_parameters @@ -123,7 +140,7 @@ class _base(object): elif type(self.parameters[key[i]]) == str: src_txt += 'DEBUG_MSG("'+ key[i] + ' = %s\\n", ' + key[i] + ');\n' elif type(self.parameters[key[i]]) == np.ndarray: - src_txt += ('for (int array_counter=0; array_counter<' + + src_txt += ('for (unsigned int array_counter=0; array_counter<' + key[i] + '.size(); array_counter++)\n' + '{\n' + @@ -158,9 +175,12 @@ class _base(object): def rewrite_par( self, group = None, - parameters = None): + parameters = None, + file_name = None): assert(group != 'parameters') - ofile = h5py.File(os.path.join(self.work_dir, self.simname + '.h5'), 'r+') + if type(file_name) == type(None): + file_name = os.path.join(self.work_dir, self.simname + '.h5') + ofile = h5py.File(file_name, 'a') for k in parameters.keys(): if group not in ofile.keys(): ofile.create_group(group) @@ -214,9 +234,10 @@ class _base(object): if type(parameters) == type(None): parameters = self.parameters cmd_line_pars = vars(opt) - for k in ['nx', 'ny', 'nz']: - if type(cmd_line_pars[k]) == type(None): - cmd_line_pars[k] = opt.n + if 'n' in cmd_line_pars.keys(): + for k in ['nx', 'ny', 'nz']: + if type(cmd_line_pars[k]) == type(None): + cmd_line_pars[k] = opt.n for k in parameters.keys(): if k in cmd_line_pars.keys(): if not type(cmd_line_pars[k]) == type(None): @@ -250,8 +271,27 @@ class _base(object): help = 'code is run by default in a grid of NxNxN') parser.add_argument( '--ncpu', - type = int, dest = 'ncpu', - default = 2) + type = int, + dest = 'ncpu', + default = -1) + parser.add_argument( + '--np', '--nprocesses', + metavar = 'NPROCESSES', + help = 'number of mpi processes to use', + type = int, + dest = 'nb_processes', + default = 4) + parser.add_argument( + '--ntpp', '--nthreads-per-process', + type = int, + dest = 'nb_threads_per_process', + metavar = 'NTHREADS_PER_PROCESS', + help = 'number of threads to use per MPI process', + default = 1) + parser.add_argument( + '--no-submit', + action = 'store_true', + dest = 'no_submit') parser.add_argument( '--simname', type = str, dest = 'simname', @@ -265,6 +305,13 @@ class _base(object): '--wd', type = str, dest = 'work_dir', default = './') + parser.add_argument( + '--minutes', + type = int, + dest = 'minutes', + default = 5, + help = 'If environment supports it, this is the requested wall-clock-limit.') + return None def parameters_to_parser_arguments( self, diff --git a/bfps/_code.py b/bfps/_code.py index 314681ada3bb81e5700fdb7f1307c9af96fa5011..22bcd9101ff6591e00f0455c1de1af2698c5f842 100644 --- a/bfps/_code.py +++ b/bfps/_code.py @@ -32,6 +32,7 @@ import argparse import h5py from datetime import datetime import math +import warnings import bfps from ._base import _base @@ -45,19 +46,25 @@ class _code(_base): work_dir = './', simname = 'test'): _base.__init__(self, work_dir = work_dir, simname = simname) - self.version_message = ('/***********************************************************************\n' + - '* this code automatically generated by bfps\n' + - '* version {0}\n'.format(bfps.__version__) + - '***********************************************************************/\n\n\n') + self.version_message = ( + '/***********************************************************************\n' + + '* this code automatically generated by bfps\n' + + '* version {0}\n'.format(bfps.__version__) + + '***********************************************************************/\n\n\n') self.includes = """ //begincpp #include "base.hpp" #include "fluid_solver.hpp" + #include "scope_timer.hpp" + #include "fftw_interface.hpp" #include <iostream> #include <hdf5.h> #include <string> #include <cstring> #include <fftw3-mpi.h> + #include <omp.h> + #include <fenv.h> + #include <cstdlib> //endcpp """ self.variables = 'int myrank, nprocs;\n' @@ -69,23 +76,58 @@ class _code(_base): //begincpp int main(int argc, char *argv[]) { - MPI_Init(&argc, &argv); - MPI_Comm_rank(MPI_COMM_WORLD, &myrank); - MPI_Comm_size(MPI_COMM_WORLD, &nprocs); - fftw_mpi_init(); - fftwf_mpi_init(); + if(getenv("BFPS_FPE_OFF") == nullptr || getenv("BFPS_FPE_OFF") != std::string("TRUE")){ + feenableexcept(FE_INVALID | FE_OVERFLOW); + } + else{ + std::cout << "FPE have been turned OFF" << std::endl; + } if (argc != 2) { std::cerr << "Wrong number of command line arguments. Stopping." << std::endl; MPI_Finalize(); return EXIT_SUCCESS; } + #ifdef NO_FFTWOMP + MPI_Init(&argc, &argv); + MPI_Comm_rank(MPI_COMM_WORLD, &myrank); + MPI_Comm_size(MPI_COMM_WORLD, &nprocs); + fftw_mpi_init(); + fftwf_mpi_init(); + DEBUG_MSG("There are %d processes\\n", nprocs); + #else + int mpiprovided; + MPI_Init_thread(&argc, &argv, MPI_THREAD_FUNNELED, &mpiprovided); + assert(mpiprovided >= MPI_THREAD_FUNNELED); + MPI_Comm_rank(MPI_COMM_WORLD, &myrank); + MPI_Comm_size(MPI_COMM_WORLD, &nprocs); + const int nbThreads = omp_get_max_threads(); + DEBUG_MSG("Number of threads for the FFTW = %d\\n", nbThreads); + if (nbThreads > 1){ + fftw_init_threads(); + fftwf_init_threads(); + } + fftw_mpi_init(); + fftwf_mpi_init(); + DEBUG_MSG("There are %d processes and %d threads\\n", nprocs, nbThreads); + if (nbThreads > 1){ + fftw_plan_with_nthreads(nbThreads); + fftwf_plan_with_nthreads(nbThreads); + } + #endif strcpy(simname, argv[1]); sprintf(fname, "%s.h5", simname); parameter_file = H5Fopen(fname, H5F_ACC_RDONLY, H5P_DEFAULT); Cdset = H5Dopen(parameter_file, "iteration", H5P_DEFAULT); - H5Dread(Cdset, H5T_NATIVE_INT, H5S_ALL, H5S_ALL, H5P_DEFAULT, &iteration); - DEBUG_MSG("simname is %s and iteration is %d\\n", simname, iteration); + H5Dread( + Cdset, + H5T_NATIVE_INT, + H5S_ALL, + H5S_ALL, + H5P_DEFAULT, + &iteration); + DEBUG_MSG("simname is %s and iteration is %d\\n", + simname, iteration); H5Dclose(Cdset); H5Fclose(parameter_file); read_parameters(); @@ -97,12 +139,16 @@ class _code(_base): DEBUG_MSG("when setting stat_file cache I got %d\\n", cache_err); stat_file = H5Fopen(fname, H5F_ACC_RDWR, fapl); } + { + TIMEZONE("code::main_start"); //endcpp """ for ostream in ['cout', 'cerr']: - self.main_start += 'if (myrank == 0) std::{1} << "{0}" << std::endl;'.format(self.version_message, ostream).replace('\n', '\\n') + '\n' + self.main_start += 'if (myrank == 0) std::{1} << "{0}" << std::endl;'.format( + self.version_message, ostream).replace('\n', '\\n') + '\n' self.main_end = """ //begincpp + } // clean up if (myrank == 0) { @@ -113,6 +159,16 @@ class _code(_base): } fftwf_mpi_cleanup(); fftw_mpi_cleanup(); + #ifndef NO_FFTWOMP + if (nbThreads > 1){ + fftw_cleanup_threads(); + fftwf_cleanup_threads(); + } + #endif + #ifdef USE_TIMINGOUTPUT + global_timer_manager.show(MPI_COMM_WORLD); + global_timer_manager.showHtml(MPI_COMM_WORLD); + #endif MPI_Finalize(); return EXIT_SUCCESS; } @@ -147,15 +203,21 @@ class _code(_base): libraries = ['bfps'] libraries += bfps.install_info['libraries'] - command_strings = ['g++'] + command_strings = [bfps.install_info['compiler']] command_strings += [self.name + '.cpp', '-o', self.name] command_strings += bfps.install_info['extra_compile_args'] command_strings += ['-I' + idir for idir in bfps.install_info['include_dirs']] command_strings.append('-I' + bfps.header_dir) command_strings += ['-L' + ldir for ldir in bfps.install_info['library_dirs']] + command_strings += ['-Wl,-rpath=' + ldir for ldir in bfps.install_info['library_dirs']] command_strings.append('-L' + bfps.lib_dir) + command_strings.append('-Wl,-rpath=' + bfps.lib_dir) + for libname in libraries: command_strings += ['-l' + libname] + + command_strings += ['-fopenmp'] + self.write_src() print('compiling code with command\n' + ' '.join(command_strings)) return subprocess.call(command_strings) @@ -165,12 +227,14 @@ class _code(_base): self.host_info.update(host_info) return None def run(self, - ncpu = 2, + nb_processes, + nb_threads_per_process, out_file = 'out_file', err_file = 'err_file', - hours = 1, - minutes = 0, - njobs = 1): + hours = 0, + minutes = 10, + njobs = 1, + no_submit = False): self.read_parameters() with h5py.File(os.path.join(self.work_dir, self.simname + '.h5'), 'r') as data_file: iter0 = data_file['iteration'].value @@ -185,12 +249,16 @@ class _code(_base): assert(self.compile_code() == 0) if self.work_dir != os.path.realpath(os.getcwd()): shutil.copy(self.name, self.work_dir) + if 'niter_todo' not in self.parameters.keys(): + self.parameters['niter_todo'] = 1 current_dir = os.getcwd() os.chdir(self.work_dir) os.chdir(current_dir) command_atoms = ['mpirun', '-np', - '{0}'.format(ncpu), + '{0}'.format(nb_processes), + '-x', + 'OMP_NUM_THREADS={0}'.format(nb_threads_per_process), './' + self.name, self.simname] if self.host_info['type'] == 'cluster': @@ -200,9 +268,9 @@ class _code(_base): qsub_script_name = 'run_' + suffix + '.sh' self.write_sge_file( file_name = os.path.join(self.work_dir, qsub_script_name), - nprocesses = ncpu, + nprocesses = nb_processes*nb_threads_per_process, name_of_run = suffix, - command_atoms = command_atoms[3:], + command_atoms = command_atoms[5:], hours = hours, minutes = minutes, out_file = out_file + '_' + suffix, @@ -214,10 +282,70 @@ class _code(_base): subprocess.call(qsub_atoms + [qsub_script_name]) os.chdir(current_dir) job_name_list.append(suffix) + if self.host_info['type'] == 'SLURM': + job_id_list = [] + for j in range(njobs): + suffix = self.simname + '_{0}'.format(iter0 + j*self.parameters['niter_todo']) + qsub_script_name = 'run_' + suffix + '.sh' + self.write_slurm_file( + file_name = os.path.join(self.work_dir, qsub_script_name), + name_of_run = suffix, + command_atoms = command_atoms[5:], + hours = hours, + minutes = minutes, + out_file = out_file + '_' + suffix, + err_file = err_file + '_' + suffix, + nb_mpi_processes = nb_processes, + nb_threads_per_process = nb_threads_per_process) + os.chdir(self.work_dir) + qsub_atoms = ['sbatch'] + + if not no_submit: + if len(job_id_list) >= 1: + qsub_atoms += ['--dependency=afterok:{0}'.format(job_id_list[-1])] + p = subprocess.Popen( + qsub_atoms + [qsub_script_name], + stdout = subprocess.PIPE) + out, err = p.communicate() + p.terminate() + job_id_list.append(int(out.split()[-1])) + os.chdir(current_dir) + elif self.host_info['type'] == 'IBMLoadLeveler': + suffix = self.simname + '_{0}'.format(iter0) + job_script_name = 'run_' + suffix + '.sh' + if (njobs == 1): + self.write_IBMLoadLeveler_file_single_job( + file_name = os.path.join(self.work_dir, job_script_name), + name_of_run = suffix, + command_atoms = command_atoms[5:], + hours = hours, + minutes = minutes, + out_file = out_file + '_' + suffix, + err_file = err_file + '_' + suffix, + nb_mpi_processes = nb_processes, + nb_threads_per_process = nb_threads_per_process) + else: + self.write_IBMLoadLeveler_file_many_job( + file_name = os.path.join(self.work_dir, job_script_name), + name_of_run = suffix, + command_atoms = command_atoms[5:], + hours = hours, + minutes = minutes, + out_file = out_file + '_' + suffix, + err_file = err_file + '_' + suffix, + njobs = njobs, + nb_mpi_processes = nb_processes, + nb_threads_per_process = nb_threads_per_process) + submit_atoms = ['llsubmit'] + + if not no_submit: + subprocess.call(submit_atoms + [os.path.join(self.work_dir, job_script_name)]) + elif self.host_info['type'] == 'pc': os.chdir(self.work_dir) - os.environ['LD_LIBRARY_PATH'] += ':{0}'.format(bfps.lib_dir) - print('added to LD_LIBRARY_PATH the location {0}'.format(bfps.lib_dir)) + if os.getenv('LD_LIBRARY_PATH') != None: + os.environ['LD_LIBRARY_PATH'] += ':{0}'.format(bfps.lib_dir) + print('added to LD_LIBRARY_PATH the location {0}'.format(bfps.lib_dir)) for j in range(njobs): suffix = self.simname + '_{0}'.format(iter0 + j*self.parameters['niter_todo']) print('running code with command\n' + ' '.join(command_atoms)) @@ -226,6 +354,195 @@ class _code(_base): stderr = open(err_file + '_' + suffix, 'w')) os.chdir(current_dir) return None + def write_IBMLoadLeveler_file_single_job( + self, + file_name = None, + nprocesses = None, + name_of_run = None, + command_atoms = [], + hours = None, + minutes = None, + out_file = None, + err_file = None, + nb_mpi_processes = None, + nb_threads_per_process = None): + + script_file = open(file_name, 'w') + script_file.write('# @ shell=/bin/bash\n') + # error file + if type(err_file) == type(None): + err_file = 'err.job.$(jobid)' + script_file.write('# @ error = ' + os.path.join(self.work_dir, err_file) + '\n') + # output file + if type(out_file) == type(None): + out_file = 'out.job.$(jobid)' + script_file.write('# @ output = ' + os.path.join(self.work_dir, out_file) + '\n') + + # If Ibm is used should be : script_file.write('# @ job_type = parallel\n') + script_file.write('# @ job_type = MPICH\n') + + script_file.write('# @ node_usage = not_shared\n') + script_file.write('# @ notification = complete\n') + script_file.write('# @ notify_user = $(user)@rzg.mpg.de\n') + + nb_cpus_per_node = self.host_info['deltanprocs'] + assert(isinstance(nb_cpus_per_node, int) and nb_cpus_per_node >= 1, + 'nb_cpus_per_node is {}'.format(nb_cpus_per_node)) + + # No more threads than the number of cores + assert(nb_threads_per_process <= nb_cpus_per_node, + "Cannot use more threads ({} asked) than the number of cores ({})".format( + nb_threads_per_process, nb_cpus_per_node)) + # Warn if some core will not be ued + if nb_cpus_per_node%nb_threads_per_process != 0: + warnings.warn("The number of threads is smaller than the number of cores (machine will be underused)", + UserWarning) + + nb_cpus = nb_mpi_processes*nb_threads_per_process + if (nb_cpus < nb_cpus_per_node): + # in case we use only a few process on a single node + nb_nodes = 1 + nb_processes_per_node = nb_mpi_processes + first_node_tasks = nb_mpi_processes + else: + nb_nodes = int((nb_cpus+nb_cpus_per_node-1) // nb_cpus_per_node) + # if more than one node we requiere to have a multiple of deltanprocs + nb_processes_per_node = int(nb_cpus_per_node // nb_threads_per_process) + first_node_tasks = int(nb_mpi_processes - (nb_nodes-1)*nb_processes_per_node) + + script_file.write('# @ resources = ConsumableCpus({})\n'.format(nb_threads_per_process)) + script_file.write('# @ network.MPI = sn_all,not_shared,us\n') + script_file.write('# @ wall_clock_limit = {0}:{1:0>2d}:00\n'.format(hours, minutes)) + assert(type(self.host_info['environment']) != type(None)) + script_file.write('# @ node = {0}\n'.format(nb_nodes)) + script_file.write('# @ tasks_per_node = {0}\n'.format(nb_processes_per_node)) + if (first_node_tasks > 0): + script_file.write('# @ first_node_tasks = {0}\n'.format(first_node_tasks)) + script_file.write('# @ queue\n') + + + script_file.write('source ~/.config/bfps/bashrc\n') + script_file.write('module li\n') + script_file.write('export OMP_NUM_THREADS={}\n'.format(nb_threads_per_process)) + + script_file.write('LD_LIBRARY_PATH=' + + ':'.join([bfps.lib_dir] + bfps.install_info['library_dirs']) + + ':${LD_LIBRARY_PATH}\n') + script_file.write('echo "Start time is `date`"\n') + script_file.write('export HTMLOUTPUT={}.html\n'.format(command_atoms[-1])) + script_file.write('cd ' + self.work_dir + '\n') + + script_file.write('export KMP_AFFINITY=compact,verbose\n') + script_file.write('export I_MPI_PIN_DOMAIN=omp\n') + script_file.write('mpiexec.hydra ' + + ' -np {} '.format(nb_mpi_processes) + + ' -ppn {} '.format(nb_processes_per_node) + + ' -ordered-output -prepend-rank ' + + os.path.join( + self.work_dir, + command_atoms[0]) + + ' ' + + ' '.join(command_atoms[1:]) + + '\n') + + script_file.write('echo "End time is `date`"\n') + script_file.write('exit 0\n') + script_file.close() + return None + def write_IBMLoadLeveler_file_many_job( + self, + file_name = None, + nprocesses = None, + name_of_run = None, + command_atoms = [], + hours = None, + minutes = None, + out_file = None, + err_file = None, + njobs = 2, + nb_mpi_processes = None, + nb_threads_per_process = None): + assert(type(self.host_info['environment']) != type(None)) + script_file = open(file_name, 'w') + script_file.write('# @ shell=/bin/bash\n') + # error file + if type(err_file) == type(None): + err_file = 'err.job.$(jobid).$(stepid)' + script_file.write('# @ error = ' + os.path.join(self.work_dir, err_file) + '\n') + # output file + if type(out_file) == type(None): + out_file = 'out.job.$(jobid).$(stepid)' + script_file.write('# @ output = ' + os.path.join(self.work_dir, out_file) + '\n') + # If Ibm is used should be : script_file.write('# @ job_type = parallel\n') + script_file.write('# @ job_type = MPICH\n') + script_file.write('# @ node_usage = not_shared\n') + script_file.write('#\n') + + nb_cpus_per_node = self.host_info['deltanprocs'] + assert(isinstance(nb_cpus_per_node, int) and nb_cpus_per_node >= 1, 'nb_cpus_per_node is {}'.format(nb_cpus_per_node)) + + # No more threads than the number of cores + assert(nb_threads_per_process <= nb_cpus_per_node, + "Cannot use more threads ({} asked) than the number of cores ({})".format( + nb_threads_per_process, nb_cpus_per_node)) + # Warn if some core will not be ued + if nb_cpus_per_node%nb_threads_per_process != 0: + warnings.warn("The number of threads is smaller than the number of cores (machine will be underused)", + UserWarning) + + nb_cpus = nb_mpi_processes*nb_threads_per_process + if (nb_cpus < nb_cpus_per_node): + # in case we use only a few process on a single node + nb_nodes = 1 + nb_processes_per_node = nb_mpi_processes + first_node_tasks = nb_mpi_processes + else: + nb_nodes = int((nb_cpus+nb_cpus_per_node-1) // nb_cpus_per_node) + # if more than one node we requiere to have a multiple of deltanprocs + nb_processes_per_node = int(nb_cpus_per_node // nb_threads_per_process) + first_node_tasks = int(nb_mpi_processes - (nb_nodes-1)*nb_processes_per_node) + + for job in range(njobs): + script_file.write('# @ step_name = {0}.$(stepid)\n'.format(self.simname)) + script_file.write('# @ resources = ConsumableCpus({})\n'.format(nb_threads_per_process)) + script_file.write('# @ network.MPI = sn_all,not_shared,us\n') + script_file.write('# @ wall_clock_limit = {0}:{1:0>2d}:00\n'.format(hours, minutes)) + assert(type(self.host_info['environment']) != type(None)) + script_file.write('# @ node = {0}\n'.format(nb_nodes)) + script_file.write('# @ tasks_per_node = {0}\n'.format(nb_processes_per_node)) + if (first_node_tasks > 0): + script_file.write('# @ first_node_tasks = {0}\n'.format(first_node_tasks)) + script_file.write('# @ queue\n') + + script_file.write('source ~/.config/bfps/bashrc\n') + script_file.write('module li\n') + script_file.write('export OMP_NUM_THREADS={}\n'.format(nb_threads_per_process)) + + script_file.write('LD_LIBRARY_PATH=' + + ':'.join([bfps.lib_dir] + bfps.install_info['library_dirs']) + + ':${LD_LIBRARY_PATH}\n') + script_file.write('echo "Start time is `date`"\n') + script_file.write('export HTMLOUTPUT={}.html\n'.format(command_atoms[-1])) + script_file.write('cd ' + self.work_dir + '\n') + + script_file.write('export KMP_AFFINITY=compact,verbose\n') + script_file.write('export I_MPI_PIN_DOMAIN=omp\n') + + script_file.write('mpiexec.hydra ' + + ' -np {} '.format(nb_mpi_processes) + + ' -ppn {} '.format(nb_processes_per_node) + + ' -ordered-output -prepend-rank ' + + os.path.join( + self.work_dir, + command_atoms[0]) + + ' ' + + ' '.join(command_atoms[1:]) + + '\n') + + script_file.write('echo "End time is `date`"\n') + script_file.write('exit 0\n') + script_file.close() + return None def write_sge_file( self, file_name = None, @@ -267,6 +584,79 @@ class _code(_base): script_file.write('exit 0\n') script_file.close() return None + def write_slurm_file( + self, + file_name = None, + name_of_run = None, + command_atoms = [], + hours = None, + minutes = None, + out_file = None, + err_file = None, + nb_mpi_processes = None, + nb_threads_per_process = None): + script_file = open(file_name, 'w') + script_file.write('#!/bin/bash -l\n') + # job name + script_file.write('#SBATCH -J {0}\n'.format(name_of_run)) + # use current working directory + script_file.write('#SBATCH -D ./\n') + # error file + if not type(err_file) == type(None): + script_file.write('#SBATCH -e ' + err_file + '\n') + # output file + if not type(out_file) == type(None): + script_file.write('#SBATCH -o ' + out_file + '\n') + script_file.write('#SBATCH --partition={0}\n'.format( + self.host_info['environment'])) + + nb_cpus_per_node = self.host_info['deltanprocs'] + assert(isinstance(nb_cpus_per_node, int) and nb_cpus_per_node >= 1, + 'nb_cpus_per_node is {}'.format(nb_cpus_per_node)) + + # No more threads than the number of cores + assert(nb_threads_per_process <= nb_cpus_per_node, + "Cannot use more threads ({} asked) than the number of cores ({})".format( + nb_threads_per_process, nb_cpus_per_node)) + # Warn if some core will not be ued + if nb_cpus_per_node%nb_threads_per_process != 0: + warnings.warn( + "The number of threads is smaller than the number of cores (machine will be underused)", + UserWarning) + + nb_cpus = nb_mpi_processes*nb_threads_per_process + if (nb_cpus < nb_cpus_per_node): + # in case we use only a few process on a single node + nb_nodes = 1 + nb_processes_per_node = nb_mpi_processes + else: + nb_nodes = int((nb_cpus+nb_cpus_per_node-1) // nb_cpus_per_node) + # if more than one node we requiere to have a multiple of deltanprocs + nb_processes_per_node = int(nb_cpus_per_node // nb_threads_per_process) + + + script_file.write('#SBATCH --nodes={0}\n'.format(nb_nodes)) + script_file.write('#SBATCH --ntasks-per-node={0}\n'.format(nb_processes_per_node)) + script_file.write('#SBATCH --cpus-per-task={0}\n'.format(nb_threads_per_process)) + + script_file.write('#SBATCH --mail-type=none\n') + script_file.write('#SBATCH --time={0}:{1:0>2d}:00\n'.format(hours, minutes)) + script_file.write('source ~/.config/bfps/bashrc\n') + if nb_threads_per_process > 1: + script_file.write('export OMP_NUM_THREADS={0}\n'.format(nb_threads_per_process)) + script_file.write('export OMP_PLACES=cores\n') + + script_file.write('LD_LIBRARY_PATH=' + + ':'.join([bfps.lib_dir] + bfps.install_info['library_dirs']) + + ':${LD_LIBRARY_PATH}\n') + script_file.write('echo "Start time is `date`"\n') + script_file.write('cd ' + self.work_dir + '\n') + script_file.write('export HTMLOUTPUT={}.html\n'.format(command_atoms[-1])) + script_file.write('srun {0}\n'.format(' '.join(command_atoms))) + script_file.write('echo "End time is `date`"\n') + script_file.write('exit 0\n') + script_file.close() + return None def prepare_launch( self, args = [], @@ -274,6 +664,14 @@ class _code(_base): parser = argparse.ArgumentParser('bfps ' + type(self).__name__) self.add_parser_arguments(parser) opt = parser.parse_args(args) + + if opt.ncpu != -1: + warnings.warn( + 'ncpu should be replaced by np/ntpp', + DeprecationWarning) + opt.nb_processes = opt.ncpu + opt.nb_threads_per_process = 1 + self.set_host_info(bfps.host_info) if type(opt.environment) != type(None): self.host_info['environment'] = opt.environment diff --git a/bfps/_fluid_base.py b/bfps/_fluid_base.py index 7eef1e1569cf3f5f66b5adcf52494be8de2fbe49..757e6cb81e6c605cbcb3c2e9d19bd7487add115f 100644 --- a/bfps/_fluid_base.py +++ b/bfps/_fluid_base.py @@ -88,6 +88,7 @@ class _fluid_particle_base(_code): self.particle_definitions = '' self.particle_start = '' self.particle_loop = '' + self.particle_output = '' self.particle_end = '' self.particle_stat_src = '' self.file_datasets_grow = '' @@ -95,6 +96,7 @@ class _fluid_particle_base(_code): //begincpp if (myrank == 0 && iteration == 0) { + TIMEZONE("fuild_base::store_kspace"); hsize_t dims[4]; hid_t space, dset; // store kspace information @@ -141,8 +143,7 @@ class _fluid_particle_base(_code): postprocess_mode = False): self.includes += self.fluid_includes self.includes += '#include <ctime>\n' - self.variables += (self.fluid_variables + - 'hid_t particle_file;\n') + self.variables += self.fluid_variables self.definitions += ('int grow_single_dataset(hid_t dset, int tincrement)\n{\n' + 'int ndims;\n' + 'hsize_t space;\n' + @@ -215,22 +216,6 @@ class _fluid_particle_base(_code): }} //endcpp """.format(fftw_prefix) + self.main_end - if self.particle_species > 0: - self.main_start += """ - if (myrank == 0) - { - // set caching parameters - hid_t fapl = H5Pcreate(H5P_FILE_ACCESS); - herr_t cache_err = H5Pset_cache(fapl, 0, 521, 134217728, 1.0); - DEBUG_MSG("when setting cache for particles I got %d\\n", cache_err); - sprintf(fname, "%s_particles.h5", simname); - particle_file = H5Fopen(fname, H5F_ACC_RDWR, fapl); - } - """ - self.main_end = ('if (myrank == 0)\n' + - '{\n' + - 'H5Fclose(particle_file);\n' + - '}\n') + self.main_end self.main = """ //begincpp int data_file_problem; @@ -263,8 +248,15 @@ class _fluid_particle_base(_code): '<< time_difference/nprocs << " seconds" << std::endl;\n' + 'time0 = time1;\n') if not postprocess_mode: - self.main += 'for (int max_iter = iteration+niter_todo; iteration < max_iter; iteration++)\n' + self.main += 'for (int max_iter = iteration+niter_todo-iteration%niter_todo; iteration < max_iter; iteration++)\n' self.main += '{\n' + + self.main += """ + #ifdef USE_TIMINGOUTPUT + const std::string loopLabel = "code::main_start::loop-" + std::to_string(iteration); + TIMEZONE(loopLabel.c_str()); + #endif + """ self.main += 'if (iteration % niter_stat == 0) do_stats();\n' if self.particle_species > 0: self.main += 'if (iteration % niter_part == 0) do_particle_stats();\n' @@ -278,20 +270,29 @@ class _fluid_particle_base(_code): else: self.main += 'for (int frame_index = iter0; frame_index <= iter1; frame_index += niter_out)\n' self.main += '{\n' + self.main += """ + #ifdef USE_TIMINGOUTPUT + const std::string loopLabel = "code::main_start::loop-" + std::to_string(frame_index); + TIMEZONE(loopLabel.c_str()); + #endif + """ if self.particle_species > 0: self.main += self.particle_loop self.main += self.fluid_loop self.main += output_time_difference.format('frame_index') self.main += '}\n' + self.main += self.fluid_end if self.particle_species > 0: self.main += self.particle_end - self.main += self.fluid_end return None def read_rfield( self, field = 'velocity', iteration = 0, filename = None): + """ + :note: assumes field is a vector field + """ if type(filename) == type(None): filename = os.path.join( self.work_dir, @@ -299,6 +300,7 @@ class _fluid_particle_base(_code): return np.memmap( filename, dtype = self.dtype, + mode = 'r', shape = (self.parameters['nz'], self.parameters['ny'], self.parameters['nx'], 3)) diff --git a/bfps/cpp/Lagrange_polys.cpp b/bfps/cpp/Lagrange_polys.cpp index f5660b053686b1148c0c50b81d7936d675148092..bbc8997f896aebba486e4fa601c18eaf2aacfcc1 100644 --- a/bfps/cpp/Lagrange_polys.cpp +++ b/bfps/cpp/Lagrange_polys.cpp @@ -1,6 +1,6 @@ /********************************************************************** * * -* Copyright 2015 Max Planck Institute * +* Copyright 2017 Max Planck Institute * * for Dynamics and Self-Organization * * * * This file is part of bfps. * @@ -27,356 +27,505 @@ #include "Lagrange_polys.hpp" #include <cmath> -void beta_Lagrange_n1(int deriv, double x, double *poly_val){ -switch(deriv) -{ -case 0: -poly_val[0] = x*(x*(-1.0L/6.0L*x + 1.0L/2.0L) - 1.0L/3.0L); -poly_val[1] = x*(x*((1.0L/2.0L)*x - 1) - 1.0L/2.0L) + 1; -poly_val[2] = x*(x*(-1.0L/2.0L*x + 1.0L/2.0L) + 1); -poly_val[3] = x*((1.0L/6.0L)*pow(x, 2) - 1.0L/6.0L); -break; -case 1: -poly_val[0] = x*(-1.0L/2.0L*x + 1) - 1.0L/3.0L; -poly_val[1] = x*((3.0L/2.0L)*x - 2) - 1.0L/2.0L; -poly_val[2] = x*(-3.0L/2.0L*x + 1) + 1; -poly_val[3] = (1.0L/2.0L)*pow(x, 2) - 1.0L/6.0L; -break; -case 2: -poly_val[0] = -x + 1; -poly_val[1] = 3*x - 2; -poly_val[2] = -3*x + 1; -poly_val[3] = x; -break; +void beta_Lagrange_n1(int deriv, double x, double *poly_val) { + switch(deriv) + { + case 0: + poly_val[0] = x*(x*(-1.0L/6.0L*x + 1.0L/2.0L) - 1.0L/3.0L); + poly_val[1] = x*(x*((1.0L/2.0L)*x - 1) - 1.0L/2.0L) + 1; + poly_val[2] = x*(x*(-1.0L/2.0L*x + 1.0L/2.0L) + 1); + poly_val[3] = x*((1.0L/6.0L)*pow(x, 2) - 1.0L/6.0L); + break; + case 1: + poly_val[0] = x*(-1.0L/2.0L*x + 1) - 1.0L/3.0L; + poly_val[1] = x*((3.0L/2.0L)*x - 2) - 1.0L/2.0L; + poly_val[2] = x*(-3.0L/2.0L*x + 1) + 1; + poly_val[3] = (1.0L/2.0L)*pow(x, 2) - 1.0L/6.0L; + break; + case 2: + poly_val[0] = -x + 1; + poly_val[1] = 3*x - 2; + poly_val[2] = -3*x + 1; + poly_val[3] = x; + break; + } } +void beta_Lagrange_n2(int deriv, double x, double *poly_val) { + switch(deriv) + { + case 0: + poly_val[0] = x*(x*(x*(x*(-1.0L/120.0L*x + 1.0L/24.0L) - 1.0L/24.0L) - 1.0L/24.0L) + 1.0L/20.0L); + poly_val[1] = x*(x*(x*(x*((1.0L/24.0L)*x - 1.0L/6.0L) - 1.0L/24.0L) + 2.0L/3.0L) - 1.0L/2.0L); + poly_val[2] = x*(x*(x*(x*(-1.0L/12.0L*x + 1.0L/4.0L) + 5.0L/12.0L) - 5.0L/4.0L) - 1.0L/3.0L) + 1; + poly_val[3] = x*(x*(x*(x*((1.0L/12.0L)*x - 1.0L/6.0L) - 7.0L/12.0L) + 2.0L/3.0L) + 1); + poly_val[4] = x*(x*(x*(x*(-1.0L/24.0L*x + 1.0L/24.0L) + 7.0L/24.0L) - 1.0L/24.0L) - 1.0L/4.0L); + poly_val[5] = x*(pow(x, 2)*((1.0L/120.0L)*pow(x, 2) - 1.0L/24.0L) + 1.0L/30.0L); + break; + case 1: + poly_val[0] = x*(x*(x*(-1.0L/24.0L*x + 1.0L/6.0L) - 1.0L/8.0L) - 1.0L/12.0L) + 1.0L/20.0L; + poly_val[1] = x*(x*(x*((5.0L/24.0L)*x - 2.0L/3.0L) - 1.0L/8.0L) + 4.0L/3.0L) - 1.0L/2.0L; + poly_val[2] = x*(x*(x*(-5.0L/12.0L*x + 1) + 5.0L/4.0L) - 5.0L/2.0L) - 1.0L/3.0L; + poly_val[3] = x*(x*(x*((5.0L/12.0L)*x - 2.0L/3.0L) - 7.0L/4.0L) + 4.0L/3.0L) + 1; + poly_val[4] = x*(x*(x*(-5.0L/24.0L*x + 1.0L/6.0L) + 7.0L/8.0L) - 1.0L/12.0L) - 1.0L/4.0L; + poly_val[5] = pow(x, 2)*((1.0L/24.0L)*pow(x, 2) - 1.0L/8.0L) + 1.0L/30.0L; + break; + case 2: + poly_val[0] = x*(x*(-1.0L/6.0L*x + 1.0L/2.0L) - 1.0L/4.0L) - 1.0L/12.0L; + poly_val[1] = x*(x*((5.0L/6.0L)*x - 2) - 1.0L/4.0L) + 4.0L/3.0L; + poly_val[2] = x*(x*(-5.0L/3.0L*x + 3) + 5.0L/2.0L) - 5.0L/2.0L; + poly_val[3] = x*(x*((5.0L/3.0L)*x - 2) - 7.0L/2.0L) + 4.0L/3.0L; + poly_val[4] = x*(x*(-5.0L/6.0L*x + 1.0L/2.0L) + 7.0L/4.0L) - 1.0L/12.0L; + poly_val[5] = x*((1.0L/6.0L)*pow(x, 2) - 1.0L/4.0L); + break; + } } -void beta_Lagrange_n2(int deriv, double x, double *poly_val){ -switch(deriv) -{ -case 0: -poly_val[0] = x*(x*(x*(x*(-1.0L/120.0L*x + 1.0L/24.0L) - 1.0L/24.0L) - 1.0L/24.0L) + 1.0L/20.0L); -poly_val[1] = x*(x*(x*(x*((1.0L/24.0L)*x - 1.0L/6.0L) - 1.0L/24.0L) + 2.0L/3.0L) - 1.0L/2.0L); -poly_val[2] = x*(x*(x*(x*(-1.0L/12.0L*x + 1.0L/4.0L) + 5.0L/12.0L) - 5.0L/4.0L) - 1.0L/3.0L) + 1; -poly_val[3] = x*(x*(x*(x*((1.0L/12.0L)*x - 1.0L/6.0L) - 7.0L/12.0L) + 2.0L/3.0L) + 1); -poly_val[4] = x*(x*(x*(x*(-1.0L/24.0L*x + 1.0L/24.0L) + 7.0L/24.0L) - 1.0L/24.0L) - 1.0L/4.0L); -poly_val[5] = x*(pow(x, 2)*((1.0L/120.0L)*pow(x, 2) - 1.0L/24.0L) + 1.0L/30.0L); -break; -case 1: -poly_val[0] = x*(x*(x*(-1.0L/24.0L*x + 1.0L/6.0L) - 1.0L/8.0L) - 1.0L/12.0L) + 1.0L/20.0L; -poly_val[1] = x*(x*(x*((5.0L/24.0L)*x - 2.0L/3.0L) - 1.0L/8.0L) + 4.0L/3.0L) - 1.0L/2.0L; -poly_val[2] = x*(x*(x*(-5.0L/12.0L*x + 1) + 5.0L/4.0L) - 5.0L/2.0L) - 1.0L/3.0L; -poly_val[3] = x*(x*(x*((5.0L/12.0L)*x - 2.0L/3.0L) - 7.0L/4.0L) + 4.0L/3.0L) + 1; -poly_val[4] = x*(x*(x*(-5.0L/24.0L*x + 1.0L/6.0L) + 7.0L/8.0L) - 1.0L/12.0L) - 1.0L/4.0L; -poly_val[5] = pow(x, 2)*((1.0L/24.0L)*pow(x, 2) - 1.0L/8.0L) + 1.0L/30.0L; -break; -case 2: -poly_val[0] = x*(x*(-1.0L/6.0L*x + 1.0L/2.0L) - 1.0L/4.0L) - 1.0L/12.0L; -poly_val[1] = x*(x*((5.0L/6.0L)*x - 2) - 1.0L/4.0L) + 4.0L/3.0L; -poly_val[2] = x*(x*(-5.0L/3.0L*x + 3) + 5.0L/2.0L) - 5.0L/2.0L; -poly_val[3] = x*(x*((5.0L/3.0L)*x - 2) - 7.0L/2.0L) + 4.0L/3.0L; -poly_val[4] = x*(x*(-5.0L/6.0L*x + 1.0L/2.0L) + 7.0L/4.0L) - 1.0L/12.0L; -poly_val[5] = x*((1.0L/6.0L)*pow(x, 2) - 1.0L/4.0L); -break; +void beta_Lagrange_n3(int deriv, double x, double *poly_val) { + switch(deriv) + { + case 0: + poly_val[0] = x*(x*(x*(x*(x*(x*(-1.0L/5040.0L*x + 1.0L/720.0L) - 1.0L/720.0L) - 1.0L/144.0L) + 1.0L/90.0L) + 1.0L/180.0L) - 1.0L/105.0L); + poly_val[1] = x*(x*(x*(x*(x*(x*((1.0L/720.0L)*x - 1.0L/120.0L) - 1.0L/360.0L) + 1.0L/12.0L) - 71.0L/720.0L) - 3.0L/40.0L) + 1.0L/10.0L); + poly_val[2] = x*(x*(x*(x*(x*(x*(-1.0L/240.0L*x + 1.0L/48.0L) + 3.0L/80.0L) - 13.0L/48.0L) + 1.0L/15.0L) + 3.0L/4.0L) - 3.0L/5.0L); + poly_val[3] = x*(x*(x*(x*(x*(x*((1.0L/144.0L)*x - 1.0L/36.0L) - 7.0L/72.0L) + 7.0L/18.0L) + 49.0L/144.0L) - 49.0L/36.0L) - 1.0L/4.0L) + 1; + poly_val[4] = x*(x*(x*(x*(x*(x*(-1.0L/144.0L*x + 1.0L/48.0L) + 17.0L/144.0L) - 13.0L/48.0L) - 11.0L/18.0L) + 3.0L/4.0L) + 1); + poly_val[5] = x*(x*(x*(x*(x*(x*((1.0L/240.0L)*x - 1.0L/120.0L) - 3.0L/40.0L) + 1.0L/12.0L) + 89.0L/240.0L) - 3.0L/40.0L) - 3.0L/10.0L); + poly_val[6] = x*(x*(x*(x*(x*(x*(-1.0L/720.0L*x + 1.0L/720.0L) + 17.0L/720.0L) - 1.0L/144.0L) - 4.0L/45.0L) + 1.0L/180.0L) + 1.0L/15.0L); + poly_val[7] = x*(pow(x, 2)*(pow(x, 2)*((1.0L/5040.0L)*pow(x, 2) - 1.0L/360.0L) + 7.0L/720.0L) - 1.0L/140.0L); + break; + case 1: + poly_val[0] = x*(x*(x*(x*(x*(-1.0L/720.0L*x + 1.0L/120.0L) - 1.0L/144.0L) - 1.0L/36.0L) + 1.0L/30.0L) + 1.0L/90.0L) - 1.0L/105.0L; + poly_val[1] = x*(x*(x*(x*(x*((7.0L/720.0L)*x - 1.0L/20.0L) - 1.0L/72.0L) + 1.0L/3.0L) - 71.0L/240.0L) - 3.0L/20.0L) + 1.0L/10.0L; + poly_val[2] = x*(x*(x*(x*(x*(-7.0L/240.0L*x + 1.0L/8.0L) + 3.0L/16.0L) - 13.0L/12.0L) + 1.0L/5.0L) + 3.0L/2.0L) - 3.0L/5.0L; + poly_val[3] = x*(x*(x*(x*(x*((7.0L/144.0L)*x - 1.0L/6.0L) - 35.0L/72.0L) + 14.0L/9.0L) + 49.0L/48.0L) - 49.0L/18.0L) - 1.0L/4.0L; + poly_val[4] = x*(x*(x*(x*(x*(-7.0L/144.0L*x + 1.0L/8.0L) + 85.0L/144.0L) - 13.0L/12.0L) - 11.0L/6.0L) + 3.0L/2.0L) + 1; + poly_val[5] = x*(x*(x*(x*(x*((7.0L/240.0L)*x - 1.0L/20.0L) - 3.0L/8.0L) + 1.0L/3.0L) + 89.0L/80.0L) - 3.0L/20.0L) - 3.0L/10.0L; + poly_val[6] = x*(x*(x*(x*(x*(-7.0L/720.0L*x + 1.0L/120.0L) + 17.0L/144.0L) - 1.0L/36.0L) - 4.0L/15.0L) + 1.0L/90.0L) + 1.0L/15.0L; + poly_val[7] = pow(x, 2)*(pow(x, 2)*((1.0L/720.0L)*pow(x, 2) - 1.0L/72.0L) + 7.0L/240.0L) - 1.0L/140.0L; + break; + case 2: + poly_val[0] = x*(x*(x*(x*(-1.0L/120.0L*x + 1.0L/24.0L) - 1.0L/36.0L) - 1.0L/12.0L) + 1.0L/15.0L) + 1.0L/90.0L; + poly_val[1] = x*(x*(x*(x*((7.0L/120.0L)*x - 1.0L/4.0L) - 1.0L/18.0L) + 1) - 71.0L/120.0L) - 3.0L/20.0L; + poly_val[2] = x*(x*(x*(x*(-7.0L/40.0L*x + 5.0L/8.0L) + 3.0L/4.0L) - 13.0L/4.0L) + 2.0L/5.0L) + 3.0L/2.0L; + poly_val[3] = x*(x*(x*(x*((7.0L/24.0L)*x - 5.0L/6.0L) - 35.0L/18.0L) + 14.0L/3.0L) + 49.0L/24.0L) - 49.0L/18.0L; + poly_val[4] = x*(x*(x*(x*(-7.0L/24.0L*x + 5.0L/8.0L) + 85.0L/36.0L) - 13.0L/4.0L) - 11.0L/3.0L) + 3.0L/2.0L; + poly_val[5] = x*(x*(x*(x*((7.0L/40.0L)*x - 1.0L/4.0L) - 3.0L/2.0L) + 1) + 89.0L/40.0L) - 3.0L/20.0L; + poly_val[6] = x*(x*(x*(x*(-7.0L/120.0L*x + 1.0L/24.0L) + 17.0L/36.0L) - 1.0L/12.0L) - 8.0L/15.0L) + 1.0L/90.0L; + poly_val[7] = x*(pow(x, 2)*((1.0L/120.0L)*pow(x, 2) - 1.0L/18.0L) + 7.0L/120.0L); + break; + } } +void beta_Lagrange_n4(int deriv, double x, double *poly_val) { + switch(deriv) + { + case 0: + poly_val[0] = x*(x*(x*(x*(x*(x*(x*(x*(-1.0L/362880.0L*x + 1.0L/40320.0L) - 1.0L/60480.0L) - 1.0L/2880.0L) + 11.0L/17280.0L) + 7.0L/5760.0L) - 59.0L/22680.0L) - 1.0L/1120.0L) + 1.0L/504.0L); + poly_val[1] = x*(x*(x*(x*(x*(x*(x*(x*((1.0L/40320.0L)*x - 1.0L/5040.0L) - 1.0L/6720.0L) + 1.0L/240.0L) - 11.0L/1920.0L) - 1.0L/60.0L) + 299.0L/10080.0L) + 4.0L/315.0L) - 1.0L/42.0L); + poly_val[2] = x*(x*(x*(x*(x*(x*(x*(x*(-1.0L/10080.0L*x + 1.0L/1440.0L) + 1.0L/630.0L) - 13.0L/720.0L) + 13.0L/1440.0L) + 169.0L/1440.0L) - 773.0L/5040.0L) - 1.0L/10.0L) + 1.0L/7.0L); + poly_val[3] = x*(x*(x*(x*(x*(x*(x*(x*((1.0L/4320.0L)*x - 1.0L/720.0L) - 1.0L/180.0L) + 29.0L/720.0L) + 11.0L/480.0L) - 61.0L/180.0L) + 161.0L/1080.0L) + 4.0L/5.0L) - 2.0L/3.0L); + poly_val[4] = x*(x*(x*(x*(x*(x*(x*(x*(-1.0L/2880.0L*x + 1.0L/576.0L) + 1.0L/96.0L) - 5.0L/96.0L) - 91.0L/960.0L) + 91.0L/192.0L) + 41.0L/144.0L) - 205.0L/144.0L) - 1.0L/5.0L) + 1; + poly_val[5] = x*(x*(x*(x*(x*(x*(x*(x*((1.0L/2880.0L)*x - 1.0L/720.0L) - 17.0L/1440.0L) + 29.0L/720.0L) + 389.0L/2880.0L) - 61.0L/180.0L) - 449.0L/720.0L) + 4.0L/5.0L) + 1); + poly_val[6] = x*(x*(x*(x*(x*(x*(x*(x*(-1.0L/4320.0L*x + 1.0L/1440.0L) + 1.0L/120.0L) - 13.0L/720.0L) - 143.0L/1440.0L) + 169.0L/1440.0L) + 917.0L/2160.0L) - 1.0L/10.0L) - 1.0L/3.0L); + poly_val[7] = x*(x*(x*(x*(x*(x*(x*(x*((1.0L/10080.0L)*x - 1.0L/5040.0L) - 1.0L/280.0L) + 1.0L/240.0L) + 19.0L/480.0L) - 1.0L/60.0L) - 331.0L/2520.0L) + 4.0L/315.0L) + 2.0L/21.0L); + poly_val[8] = x*(x*(x*(x*(x*(x*(x*(x*(-1.0L/40320.0L*x + 1.0L/40320.0L) + 17.0L/20160.0L) - 1.0L/2880.0L) - 47.0L/5760.0L) + 7.0L/5760.0L) + 127.0L/5040.0L) - 1.0L/1120.0L) - 1.0L/56.0L); + poly_val[9] = x*(pow(x, 2)*(pow(x, 2)*(pow(x, 2)*((1.0L/362880.0L)*pow(x, 2) - 1.0L/12096.0L) + 13.0L/17280.0L) - 41.0L/18144.0L) + 1.0L/630.0L); + break; + case 1: + poly_val[0] = x*(x*(x*(x*(x*(x*(x*(-1.0L/40320.0L*x + 1.0L/5040.0L) - 1.0L/8640.0L) - 1.0L/480.0L) + 11.0L/3456.0L) + 7.0L/1440.0L) - 59.0L/7560.0L) - 1.0L/560.0L) + 1.0L/504.0L; + poly_val[1] = x*(x*(x*(x*(x*(x*(x*((1.0L/4480.0L)*x - 1.0L/630.0L) - 1.0L/960.0L) + 1.0L/40.0L) - 11.0L/384.0L) - 1.0L/15.0L) + 299.0L/3360.0L) + 8.0L/315.0L) - 1.0L/42.0L; + poly_val[2] = x*(x*(x*(x*(x*(x*(x*(-1.0L/1120.0L*x + 1.0L/180.0L) + 1.0L/90.0L) - 13.0L/120.0L) + 13.0L/288.0L) + 169.0L/360.0L) - 773.0L/1680.0L) - 1.0L/5.0L) + 1.0L/7.0L; + poly_val[3] = x*(x*(x*(x*(x*(x*(x*((1.0L/480.0L)*x - 1.0L/90.0L) - 7.0L/180.0L) + 29.0L/120.0L) + 11.0L/96.0L) - 61.0L/45.0L) + 161.0L/360.0L) + 8.0L/5.0L) - 2.0L/3.0L; + poly_val[4] = x*(x*(x*(x*(x*(x*(x*(-1.0L/320.0L*x + 1.0L/72.0L) + 7.0L/96.0L) - 5.0L/16.0L) - 91.0L/192.0L) + 91.0L/48.0L) + 41.0L/48.0L) - 205.0L/72.0L) - 1.0L/5.0L; + poly_val[5] = x*(x*(x*(x*(x*(x*(x*((1.0L/320.0L)*x - 1.0L/90.0L) - 119.0L/1440.0L) + 29.0L/120.0L) + 389.0L/576.0L) - 61.0L/45.0L) - 449.0L/240.0L) + 8.0L/5.0L) + 1; + poly_val[6] = x*(x*(x*(x*(x*(x*(x*(-1.0L/480.0L*x + 1.0L/180.0L) + 7.0L/120.0L) - 13.0L/120.0L) - 143.0L/288.0L) + 169.0L/360.0L) + 917.0L/720.0L) - 1.0L/5.0L) - 1.0L/3.0L; + poly_val[7] = x*(x*(x*(x*(x*(x*(x*((1.0L/1120.0L)*x - 1.0L/630.0L) - 1.0L/40.0L) + 1.0L/40.0L) + 19.0L/96.0L) - 1.0L/15.0L) - 331.0L/840.0L) + 8.0L/315.0L) + 2.0L/21.0L; + poly_val[8] = x*(x*(x*(x*(x*(x*(x*(-1.0L/4480.0L*x + 1.0L/5040.0L) + 17.0L/2880.0L) - 1.0L/480.0L) - 47.0L/1152.0L) + 7.0L/1440.0L) + 127.0L/1680.0L) - 1.0L/560.0L) - 1.0L/56.0L; + poly_val[9] = pow(x, 2)*(pow(x, 2)*(pow(x, 2)*((1.0L/40320.0L)*pow(x, 2) - 1.0L/1728.0L) + 13.0L/3456.0L) - 41.0L/6048.0L) + 1.0L/630.0L; + break; + case 2: + poly_val[0] = x*(x*(x*(x*(x*(x*(-1.0L/5040.0L*x + 1.0L/720.0L) - 1.0L/1440.0L) - 1.0L/96.0L) + 11.0L/864.0L) + 7.0L/480.0L) - 59.0L/3780.0L) - 1.0L/560.0L; + poly_val[1] = x*(x*(x*(x*(x*(x*((1.0L/560.0L)*x - 1.0L/90.0L) - 1.0L/160.0L) + 1.0L/8.0L) - 11.0L/96.0L) - 1.0L/5.0L) + 299.0L/1680.0L) + 8.0L/315.0L; + poly_val[2] = x*(x*(x*(x*(x*(x*(-1.0L/140.0L*x + 7.0L/180.0L) + 1.0L/15.0L) - 13.0L/24.0L) + 13.0L/72.0L) + 169.0L/120.0L) - 773.0L/840.0L) - 1.0L/5.0L; + poly_val[3] = x*(x*(x*(x*(x*(x*((1.0L/60.0L)*x - 7.0L/90.0L) - 7.0L/30.0L) + 29.0L/24.0L) + 11.0L/24.0L) - 61.0L/15.0L) + 161.0L/180.0L) + 8.0L/5.0L; + poly_val[4] = x*(x*(x*(x*(x*(x*(-1.0L/40.0L*x + 7.0L/72.0L) + 7.0L/16.0L) - 25.0L/16.0L) - 91.0L/48.0L) + 91.0L/16.0L) + 41.0L/24.0L) - 205.0L/72.0L; + poly_val[5] = x*(x*(x*(x*(x*(x*((1.0L/40.0L)*x - 7.0L/90.0L) - 119.0L/240.0L) + 29.0L/24.0L) + 389.0L/144.0L) - 61.0L/15.0L) - 449.0L/120.0L) + 8.0L/5.0L; + poly_val[6] = x*(x*(x*(x*(x*(x*(-1.0L/60.0L*x + 7.0L/180.0L) + 7.0L/20.0L) - 13.0L/24.0L) - 143.0L/72.0L) + 169.0L/120.0L) + 917.0L/360.0L) - 1.0L/5.0L; + poly_val[7] = x*(x*(x*(x*(x*(x*((1.0L/140.0L)*x - 1.0L/90.0L) - 3.0L/20.0L) + 1.0L/8.0L) + 19.0L/24.0L) - 1.0L/5.0L) - 331.0L/420.0L) + 8.0L/315.0L; + poly_val[8] = x*(x*(x*(x*(x*(x*(-1.0L/560.0L*x + 1.0L/720.0L) + 17.0L/480.0L) - 1.0L/96.0L) - 47.0L/288.0L) + 7.0L/480.0L) + 127.0L/840.0L) - 1.0L/560.0L; + poly_val[9] = x*(pow(x, 2)*(pow(x, 2)*((1.0L/5040.0L)*pow(x, 2) - 1.0L/288.0L) + 13.0L/864.0L) - 41.0L/3024.0L); + break; + } } -void beta_Lagrange_n3(int deriv, double x, double *poly_val){ -switch(deriv) -{ -case 0: -poly_val[0] = x*(x*(x*(x*(x*(x*(-1.0L/5040.0L*x + 1.0L/720.0L) - 1.0L/720.0L) - 1.0L/144.0L) + 1.0L/90.0L) + 1.0L/180.0L) - 1.0L/105.0L); -poly_val[1] = x*(x*(x*(x*(x*(x*((1.0L/720.0L)*x - 1.0L/120.0L) - 1.0L/360.0L) + 1.0L/12.0L) - 71.0L/720.0L) - 3.0L/40.0L) + 1.0L/10.0L); -poly_val[2] = x*(x*(x*(x*(x*(x*(-1.0L/240.0L*x + 1.0L/48.0L) + 3.0L/80.0L) - 13.0L/48.0L) + 1.0L/15.0L) + 3.0L/4.0L) - 3.0L/5.0L); -poly_val[3] = x*(x*(x*(x*(x*(x*((1.0L/144.0L)*x - 1.0L/36.0L) - 7.0L/72.0L) + 7.0L/18.0L) + 49.0L/144.0L) - 49.0L/36.0L) - 1.0L/4.0L) + 1; -poly_val[4] = x*(x*(x*(x*(x*(x*(-1.0L/144.0L*x + 1.0L/48.0L) + 17.0L/144.0L) - 13.0L/48.0L) - 11.0L/18.0L) + 3.0L/4.0L) + 1); -poly_val[5] = x*(x*(x*(x*(x*(x*((1.0L/240.0L)*x - 1.0L/120.0L) - 3.0L/40.0L) + 1.0L/12.0L) + 89.0L/240.0L) - 3.0L/40.0L) - 3.0L/10.0L); -poly_val[6] = x*(x*(x*(x*(x*(x*(-1.0L/720.0L*x + 1.0L/720.0L) + 17.0L/720.0L) - 1.0L/144.0L) - 4.0L/45.0L) + 1.0L/180.0L) + 1.0L/15.0L); -poly_val[7] = x*(pow(x, 2)*(pow(x, 2)*((1.0L/5040.0L)*pow(x, 2) - 1.0L/360.0L) + 7.0L/720.0L) - 1.0L/140.0L); -break; -case 1: -poly_val[0] = x*(x*(x*(x*(x*(-1.0L/720.0L*x + 1.0L/120.0L) - 1.0L/144.0L) - 1.0L/36.0L) + 1.0L/30.0L) + 1.0L/90.0L) - 1.0L/105.0L; -poly_val[1] = x*(x*(x*(x*(x*((7.0L/720.0L)*x - 1.0L/20.0L) - 1.0L/72.0L) + 1.0L/3.0L) - 71.0L/240.0L) - 3.0L/20.0L) + 1.0L/10.0L; -poly_val[2] = x*(x*(x*(x*(x*(-7.0L/240.0L*x + 1.0L/8.0L) + 3.0L/16.0L) - 13.0L/12.0L) + 1.0L/5.0L) + 3.0L/2.0L) - 3.0L/5.0L; -poly_val[3] = x*(x*(x*(x*(x*((7.0L/144.0L)*x - 1.0L/6.0L) - 35.0L/72.0L) + 14.0L/9.0L) + 49.0L/48.0L) - 49.0L/18.0L) - 1.0L/4.0L; -poly_val[4] = x*(x*(x*(x*(x*(-7.0L/144.0L*x + 1.0L/8.0L) + 85.0L/144.0L) - 13.0L/12.0L) - 11.0L/6.0L) + 3.0L/2.0L) + 1; -poly_val[5] = x*(x*(x*(x*(x*((7.0L/240.0L)*x - 1.0L/20.0L) - 3.0L/8.0L) + 1.0L/3.0L) + 89.0L/80.0L) - 3.0L/20.0L) - 3.0L/10.0L; -poly_val[6] = x*(x*(x*(x*(x*(-7.0L/720.0L*x + 1.0L/120.0L) + 17.0L/144.0L) - 1.0L/36.0L) - 4.0L/15.0L) + 1.0L/90.0L) + 1.0L/15.0L; -poly_val[7] = pow(x, 2)*(pow(x, 2)*((1.0L/720.0L)*pow(x, 2) - 1.0L/72.0L) + 7.0L/240.0L) - 1.0L/140.0L; -break; -case 2: -poly_val[0] = x*(x*(x*(x*(-1.0L/120.0L*x + 1.0L/24.0L) - 1.0L/36.0L) - 1.0L/12.0L) + 1.0L/15.0L) + 1.0L/90.0L; -poly_val[1] = x*(x*(x*(x*((7.0L/120.0L)*x - 1.0L/4.0L) - 1.0L/18.0L) + 1) - 71.0L/120.0L) - 3.0L/20.0L; -poly_val[2] = x*(x*(x*(x*(-7.0L/40.0L*x + 5.0L/8.0L) + 3.0L/4.0L) - 13.0L/4.0L) + 2.0L/5.0L) + 3.0L/2.0L; -poly_val[3] = x*(x*(x*(x*((7.0L/24.0L)*x - 5.0L/6.0L) - 35.0L/18.0L) + 14.0L/3.0L) + 49.0L/24.0L) - 49.0L/18.0L; -poly_val[4] = x*(x*(x*(x*(-7.0L/24.0L*x + 5.0L/8.0L) + 85.0L/36.0L) - 13.0L/4.0L) - 11.0L/3.0L) + 3.0L/2.0L; -poly_val[5] = x*(x*(x*(x*((7.0L/40.0L)*x - 1.0L/4.0L) - 3.0L/2.0L) + 1) + 89.0L/40.0L) - 3.0L/20.0L; -poly_val[6] = x*(x*(x*(x*(-7.0L/120.0L*x + 1.0L/24.0L) + 17.0L/36.0L) - 1.0L/12.0L) - 8.0L/15.0L) + 1.0L/90.0L; -poly_val[7] = x*(pow(x, 2)*((1.0L/120.0L)*pow(x, 2) - 1.0L/18.0L) + 7.0L/120.0L); -break; +void beta_Lagrange_n5(int deriv, double x, double *poly_val) { + switch(deriv) + { + case 0: + poly_val[0] = x*(x*(x*(x*(x*(x*(x*(x*(pow(x, 2)*(-1.0L/39916800.0L*x + 1.0L/3628800.0L) - 1.0L/120960.0L) + 19.0L/1209600.0L) + 13.0L/172800.0L) - 67.0L/362880.0L) - 41.0L/181440.0L) + 13.0L/21600.0L) + 1.0L/6300.0L) - 1.0L/2310.0L); + poly_val[1] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((1.0L/3628800.0L)*x - 1.0L/362880.0L) - 1.0L/241920.0L) + 13.0L/120960.0L) - 179.0L/1209600.0L) - 19.0L/17280.0L) + 1663.0L/725760.0L) + 1261.0L/362880.0L) - 2447.0L/302400.0L) - 5.0L/2016.0L) + 1.0L/168.0L); + poly_val[2] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-1.0L/725760.0L*x + 1.0L/80640.0L) + 1.0L/25920.0L) - 23.0L/40320.0L) + 73.0L/241920.0L) + 29.0L/3840.0L) - 4399.0L/362880.0L) - 541.0L/20160.0L) + 667.0L/12960.0L) + 5.0L/252.0L) - 5.0L/126.0L); + poly_val[3] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((1.0L/241920.0L)*x - 1.0L/30240.0L) - 13.0L/80640.0L) + 17.0L/10080.0L) + 23.0L/26880.0L) - 13.0L/480.0L) + 5459.0L/241920.0L) + 4369.0L/30240.0L) - 4069.0L/20160.0L) - 5.0L/42.0L) + 5.0L/28.0L); + poly_val[4] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-1.0L/120960.0L*x + 1.0L/17280.0L) + 1.0L/2520.0L) - 1.0L/320.0L) - 43.0L/8064.0L) + 323.0L/5760.0L) + 431.0L/60480.0L) - 1669.0L/4320.0L) + 1069.0L/5040.0L) + 5.0L/6.0L) - 5.0L/7.0L); + poly_val[5] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((1.0L/86400.0L)*x - 1.0L/14400.0L) - 11.0L/17280.0L) + 11.0L/2880.0L) + 341.0L/28800.0L) - 341.0L/4800.0L) - 1529.0L/17280.0L) + 1529.0L/2880.0L) + 5269.0L/21600.0L) - 5269.0L/3600.0L) - 1.0L/6.0L) + 1; + poly_val[6] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-1.0L/86400.0L*x + 1.0L/17280.0L) + 1.0L/1440.0L) - 1.0L/320.0L) - 431.0L/28800.0L) + 323.0L/5760.0L) + 1249.0L/8640.0L) - 1669.0L/4320.0L) - 2269.0L/3600.0L) + 5.0L/6.0L) + 1); + poly_val[7] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((1.0L/120960.0L)*x - 1.0L/30240.0L) - 1.0L/1920.0L) + 17.0L/10080.0L) + 53.0L/4480.0L) - 13.0L/480.0L) - 14197.0L/120960.0L) + 4369.0L/30240.0L) + 667.0L/1440.0L) - 5.0L/42.0L) - 5.0L/14.0L); + poly_val[8] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-1.0L/241920.0L*x + 1.0L/80640.0L) + 1.0L/3780.0L) - 23.0L/40320.0L) - 479.0L/80640.0L) + 29.0L/3840.0L) + 6563.0L/120960.0L) - 541.0L/20160.0L) - 5069.0L/30240.0L) + 5.0L/252.0L) + 5.0L/42.0L); + poly_val[9] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((1.0L/725760.0L)*x - 1.0L/362880.0L) - 1.0L/11520.0L) + 13.0L/120960.0L) + 89.0L/48384.0L) - 19.0L/17280.0L) - 10837.0L/725760.0L) + 1261.0L/362880.0L) + 371.0L/8640.0L) - 5.0L/2016.0L) - 5.0L/168.0L); + poly_val[10] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-1.0L/3628800.0L*x + 1.0L/3628800.0L) + 1.0L/60480.0L) - 1.0L/120960.0L) - 391.0L/1209600.0L) + 13.0L/172800.0L) + 901.0L/362880.0L) - 41.0L/181440.0L) - 1049.0L/151200.0L) + 1.0L/6300.0L) + 1.0L/210.0L); + poly_val[11] = x*(pow(x, 2)*(pow(x, 2)*(pow(x, 2)*(pow(x, 2)*((1.0L/39916800.0L)*pow(x, 2) - 1.0L/725760.0L) + 31.0L/1209600.0L) - 139.0L/725760.0L) + 479.0L/907200.0L) - 1.0L/2772.0L); + break; + case 1: + poly_val[0] = x*(x*(x*(x*(x*(x*(x*(pow(x, 2)*(-1.0L/3628800.0L*x + 1.0L/362880.0L) - 1.0L/15120.0L) + 19.0L/172800.0L) + 13.0L/28800.0L) - 67.0L/72576.0L) - 41.0L/45360.0L) + 13.0L/7200.0L) + 1.0L/3150.0L) - 1.0L/2310.0L; + poly_val[1] = x*(x*(x*(x*(x*(x*(x*(x*(x*((11.0L/3628800.0L)*x - 1.0L/36288.0L) - 1.0L/26880.0L) + 13.0L/15120.0L) - 179.0L/172800.0L) - 19.0L/2880.0L) + 1663.0L/145152.0L) + 1261.0L/90720.0L) - 2447.0L/100800.0L) - 5.0L/1008.0L) + 1.0L/168.0L; + poly_val[2] = x*(x*(x*(x*(x*(x*(x*(x*(x*(-11.0L/725760.0L*x + 1.0L/8064.0L) + 1.0L/2880.0L) - 23.0L/5040.0L) + 73.0L/34560.0L) + 29.0L/640.0L) - 4399.0L/72576.0L) - 541.0L/5040.0L) + 667.0L/4320.0L) + 5.0L/126.0L) - 5.0L/126.0L; + poly_val[3] = x*(x*(x*(x*(x*(x*(x*(x*(x*((11.0L/241920.0L)*x - 1.0L/3024.0L) - 13.0L/8960.0L) + 17.0L/1260.0L) + 23.0L/3840.0L) - 13.0L/80.0L) + 5459.0L/48384.0L) + 4369.0L/7560.0L) - 4069.0L/6720.0L) - 5.0L/21.0L) + 5.0L/28.0L; + poly_val[4] = x*(x*(x*(x*(x*(x*(x*(x*(x*(-11.0L/120960.0L*x + 1.0L/1728.0L) + 1.0L/280.0L) - 1.0L/40.0L) - 43.0L/1152.0L) + 323.0L/960.0L) + 431.0L/12096.0L) - 1669.0L/1080.0L) + 1069.0L/1680.0L) + 5.0L/3.0L) - 5.0L/7.0L; + poly_val[5] = x*(x*(x*(x*(x*(x*(x*(x*(x*((11.0L/86400.0L)*x - 1.0L/1440.0L) - 11.0L/1920.0L) + 11.0L/360.0L) + 2387.0L/28800.0L) - 341.0L/800.0L) - 1529.0L/3456.0L) + 1529.0L/720.0L) + 5269.0L/7200.0L) - 5269.0L/1800.0L) - 1.0L/6.0L; + poly_val[6] = x*(x*(x*(x*(x*(x*(x*(x*(x*(-11.0L/86400.0L*x + 1.0L/1728.0L) + 1.0L/160.0L) - 1.0L/40.0L) - 3017.0L/28800.0L) + 323.0L/960.0L) + 1249.0L/1728.0L) - 1669.0L/1080.0L) - 2269.0L/1200.0L) + 5.0L/3.0L) + 1; + poly_val[7] = x*(x*(x*(x*(x*(x*(x*(x*(x*((11.0L/120960.0L)*x - 1.0L/3024.0L) - 3.0L/640.0L) + 17.0L/1260.0L) + 53.0L/640.0L) - 13.0L/80.0L) - 14197.0L/24192.0L) + 4369.0L/7560.0L) + 667.0L/480.0L) - 5.0L/21.0L) - 5.0L/14.0L; + poly_val[8] = x*(x*(x*(x*(x*(x*(x*(x*(x*(-11.0L/241920.0L*x + 1.0L/8064.0L) + 1.0L/420.0L) - 23.0L/5040.0L) - 479.0L/11520.0L) + 29.0L/640.0L) + 6563.0L/24192.0L) - 541.0L/5040.0L) - 5069.0L/10080.0L) + 5.0L/126.0L) + 5.0L/42.0L; + poly_val[9] = x*(x*(x*(x*(x*(x*(x*(x*(x*((11.0L/725760.0L)*x - 1.0L/36288.0L) - 1.0L/1280.0L) + 13.0L/15120.0L) + 89.0L/6912.0L) - 19.0L/2880.0L) - 10837.0L/145152.0L) + 1261.0L/90720.0L) + 371.0L/2880.0L) - 5.0L/1008.0L) - 5.0L/168.0L; + poly_val[10] = x*(x*(x*(x*(x*(x*(x*(x*(x*(-11.0L/3628800.0L*x + 1.0L/362880.0L) + 1.0L/6720.0L) - 1.0L/15120.0L) - 391.0L/172800.0L) + 13.0L/28800.0L) + 901.0L/72576.0L) - 41.0L/45360.0L) - 1049.0L/50400.0L) + 1.0L/3150.0L) + 1.0L/210.0L; + poly_val[11] = pow(x, 2)*(pow(x, 2)*(pow(x, 2)*(pow(x, 2)*((1.0L/3628800.0L)*pow(x, 2) - 1.0L/80640.0L) + 31.0L/172800.0L) - 139.0L/145152.0L) + 479.0L/302400.0L) - 1.0L/2772.0L; + break; + case 2: + poly_val[0] = x*(x*(x*(x*(x*(x*(pow(x, 2)*(-1.0L/362880.0L*x + 1.0L/40320.0L) - 1.0L/2160.0L) + 19.0L/28800.0L) + 13.0L/5760.0L) - 67.0L/18144.0L) - 41.0L/15120.0L) + 13.0L/3600.0L) + 1.0L/3150.0L; + poly_val[1] = x*(x*(x*(x*(x*(x*(x*(x*((11.0L/362880.0L)*x - 1.0L/4032.0L) - 1.0L/3360.0L) + 13.0L/2160.0L) - 179.0L/28800.0L) - 19.0L/576.0L) + 1663.0L/36288.0L) + 1261.0L/30240.0L) - 2447.0L/50400.0L) - 5.0L/1008.0L; + poly_val[2] = x*(x*(x*(x*(x*(x*(x*(x*(-11.0L/72576.0L*x + 1.0L/896.0L) + 1.0L/360.0L) - 23.0L/720.0L) + 73.0L/5760.0L) + 29.0L/128.0L) - 4399.0L/18144.0L) - 541.0L/1680.0L) + 667.0L/2160.0L) + 5.0L/126.0L; + poly_val[3] = x*(x*(x*(x*(x*(x*(x*(x*((11.0L/24192.0L)*x - 1.0L/336.0L) - 13.0L/1120.0L) + 17.0L/180.0L) + 23.0L/640.0L) - 13.0L/16.0L) + 5459.0L/12096.0L) + 4369.0L/2520.0L) - 4069.0L/3360.0L) - 5.0L/21.0L; + poly_val[4] = x*(x*(x*(x*(x*(x*(x*(x*(-11.0L/12096.0L*x + 1.0L/192.0L) + 1.0L/35.0L) - 7.0L/40.0L) - 43.0L/192.0L) + 323.0L/192.0L) + 431.0L/3024.0L) - 1669.0L/360.0L) + 1069.0L/840.0L) + 5.0L/3.0L; + poly_val[5] = x*(x*(x*(x*(x*(x*(x*(x*((11.0L/8640.0L)*x - 1.0L/160.0L) - 11.0L/240.0L) + 77.0L/360.0L) + 2387.0L/4800.0L) - 341.0L/160.0L) - 1529.0L/864.0L) + 1529.0L/240.0L) + 5269.0L/3600.0L) - 5269.0L/1800.0L; + poly_val[6] = x*(x*(x*(x*(x*(x*(x*(x*(-11.0L/8640.0L*x + 1.0L/192.0L) + 1.0L/20.0L) - 7.0L/40.0L) - 3017.0L/4800.0L) + 323.0L/192.0L) + 1249.0L/432.0L) - 1669.0L/360.0L) - 2269.0L/600.0L) + 5.0L/3.0L; + poly_val[7] = x*(x*(x*(x*(x*(x*(x*(x*((11.0L/12096.0L)*x - 1.0L/336.0L) - 3.0L/80.0L) + 17.0L/180.0L) + 159.0L/320.0L) - 13.0L/16.0L) - 14197.0L/6048.0L) + 4369.0L/2520.0L) + 667.0L/240.0L) - 5.0L/21.0L; + poly_val[8] = x*(x*(x*(x*(x*(x*(x*(x*(-11.0L/24192.0L*x + 1.0L/896.0L) + 2.0L/105.0L) - 23.0L/720.0L) - 479.0L/1920.0L) + 29.0L/128.0L) + 6563.0L/6048.0L) - 541.0L/1680.0L) - 5069.0L/5040.0L) + 5.0L/126.0L; + poly_val[9] = x*(x*(x*(x*(x*(x*(x*(x*((11.0L/72576.0L)*x - 1.0L/4032.0L) - 1.0L/160.0L) + 13.0L/2160.0L) + 89.0L/1152.0L) - 19.0L/576.0L) - 10837.0L/36288.0L) + 1261.0L/30240.0L) + 371.0L/1440.0L) - 5.0L/1008.0L; + poly_val[10] = x*(x*(x*(x*(x*(x*(x*(x*(-11.0L/362880.0L*x + 1.0L/40320.0L) + 1.0L/840.0L) - 1.0L/2160.0L) - 391.0L/28800.0L) + 13.0L/5760.0L) + 901.0L/18144.0L) - 41.0L/15120.0L) - 1049.0L/25200.0L) + 1.0L/3150.0L; + poly_val[11] = x*(pow(x, 2)*(pow(x, 2)*(pow(x, 2)*((1.0L/362880.0L)*pow(x, 2) - 1.0L/10080.0L) + 31.0L/28800.0L) - 139.0L/36288.0L) + 479.0L/151200.0L); + break; + } } +void beta_Lagrange_n6(int deriv, double x, double *poly_val) { + switch(deriv) + { + case 0: + poly_val[0] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-1.0L/6227020800.0L*x + 1.0L/479001600.0L) + 1.0L/479001600.0L) - 1.0L/8709120.0L) + 1.0L/4838400.0L) + 31.0L/14515200.0L) - 247.0L/43545600.0L) - 139.0L/8709120.0L) + 1049.0L/21772800.0L) + 479.0L/10886400.0L) - 2791.0L/19958400.0L) - 1.0L/33264.0L) + 1.0L/10296.0L); + poly_val[1] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((1.0L/479001600.0L)*x - 1.0L/39916800.0L) - 31.0L/479001600.0L) + 1.0L/604800.0L) - 29.0L/14515200.0L) - 41.0L/1209600.0L) + 3337.0L/43545600.0L) + 121.0L/453600.0L) - 3893.0L/5443200.0L) - 19.0L/25200.0L) + 7171.0L/3326400.0L) + 1.0L/1925.0L) - 1.0L/660.0L); + poly_val[2] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-1.0L/79833600.0L*x + 1.0L/7257600.0L) + 47.0L/79833600.0L) - 1.0L/96768.0L) + 1.0L/268800.0L) + 601.0L/2419200.0L) - 3169.0L/7257600.0L) - 625.0L/290304.0L) + 8891.0L/1814400.0L) + 643.0L/100800.0L) - 8777.0L/554400.0L) - 1.0L/224.0L) + 1.0L/88.0L); + poly_val[3] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((1.0L/21772800.0L)*x - 1.0L/2177280.0L) - 61.0L/21772800.0L) + 41.0L/1088640.0L) + 181.0L/7257600.0L) - 151.0L/145152.0L) + 23477.0L/21772800.0L) + 3011.0L/272160.0L) - 53293.0L/2721600.0L) - 4969.0L/136080.0L) + 33583.0L/453600.0L) + 5.0L/189.0L) - 1.0L/18.0L); + poly_val[4] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-1.0L/8709120.0L*x + 1.0L/967680.0L) + 73.0L/8709120.0L) - 29.0L/322560.0L) - 479.0L/2903040.0L) + 59.0L/21504.0L) - 3317.0L/8709120.0L) - 33853.0L/967680.0L) + 156529.0L/4354560.0L) + 4469.0L/26880.0L) - 29483.0L/120960.0L) - 15.0L/112.0L) + 5.0L/24.0L); + poly_val[5] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((1.0L/4838400.0L)*x - 1.0L/604800.0L) - 83.0L/4838400.0L) + 1.0L/6720.0L) + 761.0L/1612800.0L) - 971.0L/201600.0L) - 21169.0L/4838400.0L) + 1039.0L/15120.0L) - 4523.0L/604800.0L) - 1769.0L/4200.0L) + 8783.0L/33600.0L) + 6.0L/7.0L) - 3.0L/4.0L); + poly_val[6] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-1.0L/3628800.0L*x + 1.0L/518400.0L) + 13.0L/518400.0L) - 91.0L/518400.0L) - 143.0L/172800.0L) + 1001.0L/172800.0L) + 44473.0L/3628800.0L) - 44473.0L/518400.0L) - 5291.0L/64800.0L) + 37037.0L/64800.0L) + 767.0L/3600.0L) - 5369.0L/3600.0L) - 1.0L/7.0L) + 1; + poly_val[7] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((1.0L/3628800.0L)*x - 1.0L/604800.0L) - 97.0L/3628800.0L) + 1.0L/6720.0L) + 1181.0L/1209600.0L) - 971.0L/201600.0L) - 61951.0L/3628800.0L) + 1039.0L/15120.0L) + 68207.0L/453600.0L) - 1769.0L/4200.0L) - 15983.0L/25200.0L) + 6.0L/7.0L) + 1); + poly_val[8] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-1.0L/4838400.0L*x + 1.0L/967680.0L) + 101.0L/4838400.0L) - 29.0L/322560.0L) - 1291.0L/1612800.0L) + 59.0L/21504.0L) + 71023.0L/4838400.0L) - 33853.0L/967680.0L) - 317413.0L/2419200.0L) + 4469.0L/26880.0L) + 33083.0L/67200.0L) - 15.0L/112.0L) - 3.0L/8.0L); + poly_val[9] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((1.0L/8709120.0L)*x - 1.0L/2177280.0L) - 103.0L/8709120.0L) + 41.0L/1088640.0L) + 443.0L/967680.0L) - 151.0L/145152.0L) - 71653.0L/8709120.0L) + 3011.0L/272160.0L) + 73169.0L/1088640.0L) - 4969.0L/136080.0L) - 35983.0L/181440.0L) + 5.0L/189.0L) + 5.0L/36.0L); + poly_val[10] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-1.0L/21772800.0L*x + 1.0L/7257600.0L) + 103.0L/21772800.0L) - 1.0L/96768.0L) - 1301.0L/7257600.0L) + 601.0L/2419200.0L) + 66109.0L/21772800.0L) - 625.0L/290304.0L) - 120949.0L/5443200.0L) + 643.0L/100800.0L) + 9227.0L/151200.0L) - 1.0L/224.0L) - 1.0L/24.0L); + poly_val[11] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((1.0L/79833600.0L)*x - 1.0L/39916800.0L) - 101.0L/79833600.0L) + 1.0L/604800.0L) + 37.0L/806400.0L) - 41.0L/1209600.0L) - 5273.0L/7257600.0L) + 121.0L/453600.0L) + 4577.0L/907200.0L) - 19.0L/25200.0L) - 7459.0L/554400.0L) + 1.0L/1925.0L) + 1.0L/110.0L); + poly_val[12] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-1.0L/479001600.0L*x + 1.0L/479001600.0L) + 97.0L/479001600.0L) - 1.0L/8709120.0L) - 101.0L/14515200.0L) + 31.0L/14515200.0L) + 4601.0L/43545600.0L) - 139.0L/8709120.0L) - 15553.0L/21772800.0L) + 479.0L/10886400.0L) + 37483.0L/19958400.0L) - 1.0L/33264.0L) - 1.0L/792.0L); + poly_val[13] = x*(pow(x, 2)*(pow(x, 2)*(pow(x, 2)*(pow(x, 2)*(pow(x, 2)*((1.0L/6227020800.0L)*pow(x, 2) - 1.0L/68428800.0L) + 1.0L/2073600.0L) - 311.0L/43545600.0L) + 37.0L/777600.0L) - 59.0L/475200.0L) + 1.0L/12012.0L); + break; + case 1: + poly_val[0] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-1.0L/479001600.0L*x + 1.0L/39916800.0L) + 1.0L/43545600.0L) - 1.0L/870912.0L) + 1.0L/537600.0L) + 31.0L/1814400.0L) - 247.0L/6220800.0L) - 139.0L/1451520.0L) + 1049.0L/4354560.0L) + 479.0L/2721600.0L) - 2791.0L/6652800.0L) - 1.0L/16632.0L) + 1.0L/10296.0L; + poly_val[1] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((13.0L/479001600.0L)*x - 1.0L/3326400.0L) - 31.0L/43545600.0L) + 1.0L/60480.0L) - 29.0L/1612800.0L) - 41.0L/151200.0L) + 3337.0L/6220800.0L) + 121.0L/75600.0L) - 3893.0L/1088640.0L) - 19.0L/6300.0L) + 7171.0L/1108800.0L) + 2.0L/1925.0L) - 1.0L/660.0L; + poly_val[2] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-13.0L/79833600.0L*x + 1.0L/604800.0L) + 47.0L/7257600.0L) - 5.0L/48384.0L) + 3.0L/89600.0L) + 601.0L/302400.0L) - 3169.0L/1036800.0L) - 625.0L/48384.0L) + 8891.0L/362880.0L) + 643.0L/25200.0L) - 8777.0L/184800.0L) - 1.0L/112.0L) + 1.0L/88.0L; + poly_val[3] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((13.0L/21772800.0L)*x - 1.0L/181440.0L) - 671.0L/21772800.0L) + 41.0L/108864.0L) + 181.0L/806400.0L) - 151.0L/18144.0L) + 23477.0L/3110400.0L) + 3011.0L/45360.0L) - 53293.0L/544320.0L) - 4969.0L/34020.0L) + 33583.0L/151200.0L) + 10.0L/189.0L) - 1.0L/18.0L; + poly_val[4] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-13.0L/8709120.0L*x + 1.0L/80640.0L) + 803.0L/8709120.0L) - 29.0L/32256.0L) - 479.0L/322560.0L) + 59.0L/2688.0L) - 3317.0L/1244160.0L) - 33853.0L/161280.0L) + 156529.0L/870912.0L) + 4469.0L/6720.0L) - 29483.0L/40320.0L) - 15.0L/56.0L) + 5.0L/24.0L; + poly_val[5] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((13.0L/4838400.0L)*x - 1.0L/50400.0L) - 913.0L/4838400.0L) + 1.0L/672.0L) + 761.0L/179200.0L) - 971.0L/25200.0L) - 21169.0L/691200.0L) + 1039.0L/2520.0L) - 4523.0L/120960.0L) - 1769.0L/1050.0L) + 8783.0L/11200.0L) + 12.0L/7.0L) - 3.0L/4.0L; + poly_val[6] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-13.0L/3628800.0L*x + 1.0L/43200.0L) + 143.0L/518400.0L) - 91.0L/51840.0L) - 143.0L/19200.0L) + 1001.0L/21600.0L) + 44473.0L/518400.0L) - 44473.0L/86400.0L) - 5291.0L/12960.0L) + 37037.0L/16200.0L) + 767.0L/1200.0L) - 5369.0L/1800.0L) - 1.0L/7.0L; + poly_val[7] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((13.0L/3628800.0L)*x - 1.0L/50400.0L) - 1067.0L/3628800.0L) + 1.0L/672.0L) + 1181.0L/134400.0L) - 971.0L/25200.0L) - 61951.0L/518400.0L) + 1039.0L/2520.0L) + 68207.0L/90720.0L) - 1769.0L/1050.0L) - 15983.0L/8400.0L) + 12.0L/7.0L) + 1; + poly_val[8] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-13.0L/4838400.0L*x + 1.0L/80640.0L) + 1111.0L/4838400.0L) - 29.0L/32256.0L) - 1291.0L/179200.0L) + 59.0L/2688.0L) + 71023.0L/691200.0L) - 33853.0L/161280.0L) - 317413.0L/483840.0L) + 4469.0L/6720.0L) + 33083.0L/22400.0L) - 15.0L/56.0L) - 3.0L/8.0L; + poly_val[9] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((13.0L/8709120.0L)*x - 1.0L/181440.0L) - 1133.0L/8709120.0L) + 41.0L/108864.0L) + 443.0L/107520.0L) - 151.0L/18144.0L) - 71653.0L/1244160.0L) + 3011.0L/45360.0L) + 73169.0L/217728.0L) - 4969.0L/34020.0L) - 35983.0L/60480.0L) + 10.0L/189.0L) + 5.0L/36.0L; + poly_val[10] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-13.0L/21772800.0L*x + 1.0L/604800.0L) + 1133.0L/21772800.0L) - 5.0L/48384.0L) - 1301.0L/806400.0L) + 601.0L/302400.0L) + 66109.0L/3110400.0L) - 625.0L/48384.0L) - 120949.0L/1088640.0L) + 643.0L/25200.0L) + 9227.0L/50400.0L) - 1.0L/112.0L) - 1.0L/24.0L; + poly_val[11] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((13.0L/79833600.0L)*x - 1.0L/3326400.0L) - 101.0L/7257600.0L) + 1.0L/60480.0L) + 37.0L/89600.0L) - 41.0L/151200.0L) - 5273.0L/1036800.0L) + 121.0L/75600.0L) + 4577.0L/181440.0L) - 19.0L/6300.0L) - 7459.0L/184800.0L) + 2.0L/1925.0L) + 1.0L/110.0L; + poly_val[12] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-13.0L/479001600.0L*x + 1.0L/39916800.0L) + 97.0L/43545600.0L) - 1.0L/870912.0L) - 101.0L/1612800.0L) + 31.0L/1814400.0L) + 4601.0L/6220800.0L) - 139.0L/1451520.0L) - 15553.0L/4354560.0L) + 479.0L/2721600.0L) + 37483.0L/6652800.0L) - 1.0L/16632.0L) - 1.0L/792.0L; + poly_val[13] = pow(x, 2)*(pow(x, 2)*(pow(x, 2)*(pow(x, 2)*(pow(x, 2)*((1.0L/479001600.0L)*pow(x, 2) - 1.0L/6220800.0L) + 1.0L/230400.0L) - 311.0L/6220800.0L) + 37.0L/155520.0L) - 59.0L/158400.0L) + 1.0L/12012.0L; + break; + case 2: + poly_val[0] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-1.0L/39916800.0L*x + 1.0L/3628800.0L) + 1.0L/4354560.0L) - 1.0L/96768.0L) + 1.0L/67200.0L) + 31.0L/259200.0L) - 247.0L/1036800.0L) - 139.0L/290304.0L) + 1049.0L/1088640.0L) + 479.0L/907200.0L) - 2791.0L/3326400.0L) - 1.0L/16632.0L; + poly_val[1] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((13.0L/39916800.0L)*x - 1.0L/302400.0L) - 31.0L/4354560.0L) + 1.0L/6720.0L) - 29.0L/201600.0L) - 41.0L/21600.0L) + 3337.0L/1036800.0L) + 121.0L/15120.0L) - 3893.0L/272160.0L) - 19.0L/2100.0L) + 7171.0L/554400.0L) + 2.0L/1925.0L; + poly_val[2] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-13.0L/6652800.0L*x + 11.0L/604800.0L) + 47.0L/725760.0L) - 5.0L/5376.0L) + 3.0L/11200.0L) + 601.0L/43200.0L) - 3169.0L/172800.0L) - 3125.0L/48384.0L) + 8891.0L/90720.0L) + 643.0L/8400.0L) - 8777.0L/92400.0L) - 1.0L/112.0L; + poly_val[3] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((13.0L/1814400.0L)*x - 11.0L/181440.0L) - 671.0L/2177280.0L) + 41.0L/12096.0L) + 181.0L/100800.0L) - 151.0L/2592.0L) + 23477.0L/518400.0L) + 3011.0L/9072.0L) - 53293.0L/136080.0L) - 4969.0L/11340.0L) + 33583.0L/75600.0L) + 10.0L/189.0L; + poly_val[4] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-13.0L/725760.0L*x + 11.0L/80640.0L) + 803.0L/870912.0L) - 29.0L/3584.0L) - 479.0L/40320.0L) + 59.0L/384.0L) - 3317.0L/207360.0L) - 33853.0L/32256.0L) + 156529.0L/217728.0L) + 4469.0L/2240.0L) - 29483.0L/20160.0L) - 15.0L/56.0L; + poly_val[5] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((13.0L/403200.0L)*x - 11.0L/50400.0L) - 913.0L/483840.0L) + 3.0L/224.0L) + 761.0L/22400.0L) - 971.0L/3600.0L) - 21169.0L/115200.0L) + 1039.0L/504.0L) - 4523.0L/30240.0L) - 1769.0L/350.0L) + 8783.0L/5600.0L) + 12.0L/7.0L; + poly_val[6] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-13.0L/302400.0L*x + 11.0L/43200.0L) + 143.0L/51840.0L) - 91.0L/5760.0L) - 143.0L/2400.0L) + 7007.0L/21600.0L) + 44473.0L/86400.0L) - 44473.0L/17280.0L) - 5291.0L/3240.0L) + 37037.0L/5400.0L) + 767.0L/600.0L) - 5369.0L/1800.0L; + poly_val[7] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((13.0L/302400.0L)*x - 11.0L/50400.0L) - 1067.0L/362880.0L) + 3.0L/224.0L) + 1181.0L/16800.0L) - 971.0L/3600.0L) - 61951.0L/86400.0L) + 1039.0L/504.0L) + 68207.0L/22680.0L) - 1769.0L/350.0L) - 15983.0L/4200.0L) + 12.0L/7.0L; + poly_val[8] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-13.0L/403200.0L*x + 11.0L/80640.0L) + 1111.0L/483840.0L) - 29.0L/3584.0L) - 1291.0L/22400.0L) + 59.0L/384.0L) + 71023.0L/115200.0L) - 33853.0L/32256.0L) - 317413.0L/120960.0L) + 4469.0L/2240.0L) + 33083.0L/11200.0L) - 15.0L/56.0L; + poly_val[9] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((13.0L/725760.0L)*x - 11.0L/181440.0L) - 1133.0L/870912.0L) + 41.0L/12096.0L) + 443.0L/13440.0L) - 151.0L/2592.0L) - 71653.0L/207360.0L) + 3011.0L/9072.0L) + 73169.0L/54432.0L) - 4969.0L/11340.0L) - 35983.0L/30240.0L) + 10.0L/189.0L; + poly_val[10] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-13.0L/1814400.0L*x + 11.0L/604800.0L) + 1133.0L/2177280.0L) - 5.0L/5376.0L) - 1301.0L/100800.0L) + 601.0L/43200.0L) + 66109.0L/518400.0L) - 3125.0L/48384.0L) - 120949.0L/272160.0L) + 643.0L/8400.0L) + 9227.0L/25200.0L) - 1.0L/112.0L; + poly_val[11] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((13.0L/6652800.0L)*x - 1.0L/302400.0L) - 101.0L/725760.0L) + 1.0L/6720.0L) + 37.0L/11200.0L) - 41.0L/21600.0L) - 5273.0L/172800.0L) + 121.0L/15120.0L) + 4577.0L/45360.0L) - 19.0L/2100.0L) - 7459.0L/92400.0L) + 2.0L/1925.0L; + poly_val[12] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-13.0L/39916800.0L*x + 1.0L/3628800.0L) + 97.0L/4354560.0L) - 1.0L/96768.0L) - 101.0L/201600.0L) + 31.0L/259200.0L) + 4601.0L/1036800.0L) - 139.0L/290304.0L) - 15553.0L/1088640.0L) + 479.0L/907200.0L) + 37483.0L/3326400.0L) - 1.0L/16632.0L; + poly_val[13] = x*(pow(x, 2)*(pow(x, 2)*(pow(x, 2)*(pow(x, 2)*((1.0L/39916800.0L)*pow(x, 2) - 1.0L/622080.0L) + 1.0L/28800.0L) - 311.0L/1036800.0L) + 37.0L/38880.0L) - 59.0L/79200.0L); + break; + } } -void beta_Lagrange_n4(int deriv, double x, double *poly_val){ -switch(deriv) -{ -case 0: -poly_val[0] = x*(x*(x*(x*(x*(x*(x*(x*(-1.0L/362880.0L*x + 1.0L/40320.0L) - 1.0L/60480.0L) - 1.0L/2880.0L) + 11.0L/17280.0L) + 7.0L/5760.0L) - 59.0L/22680.0L) - 1.0L/1120.0L) + 1.0L/504.0L); -poly_val[1] = x*(x*(x*(x*(x*(x*(x*(x*((1.0L/40320.0L)*x - 1.0L/5040.0L) - 1.0L/6720.0L) + 1.0L/240.0L) - 11.0L/1920.0L) - 1.0L/60.0L) + 299.0L/10080.0L) + 4.0L/315.0L) - 1.0L/42.0L); -poly_val[2] = x*(x*(x*(x*(x*(x*(x*(x*(-1.0L/10080.0L*x + 1.0L/1440.0L) + 1.0L/630.0L) - 13.0L/720.0L) + 13.0L/1440.0L) + 169.0L/1440.0L) - 773.0L/5040.0L) - 1.0L/10.0L) + 1.0L/7.0L); -poly_val[3] = x*(x*(x*(x*(x*(x*(x*(x*((1.0L/4320.0L)*x - 1.0L/720.0L) - 1.0L/180.0L) + 29.0L/720.0L) + 11.0L/480.0L) - 61.0L/180.0L) + 161.0L/1080.0L) + 4.0L/5.0L) - 2.0L/3.0L); -poly_val[4] = x*(x*(x*(x*(x*(x*(x*(x*(-1.0L/2880.0L*x + 1.0L/576.0L) + 1.0L/96.0L) - 5.0L/96.0L) - 91.0L/960.0L) + 91.0L/192.0L) + 41.0L/144.0L) - 205.0L/144.0L) - 1.0L/5.0L) + 1; -poly_val[5] = x*(x*(x*(x*(x*(x*(x*(x*((1.0L/2880.0L)*x - 1.0L/720.0L) - 17.0L/1440.0L) + 29.0L/720.0L) + 389.0L/2880.0L) - 61.0L/180.0L) - 449.0L/720.0L) + 4.0L/5.0L) + 1); -poly_val[6] = x*(x*(x*(x*(x*(x*(x*(x*(-1.0L/4320.0L*x + 1.0L/1440.0L) + 1.0L/120.0L) - 13.0L/720.0L) - 143.0L/1440.0L) + 169.0L/1440.0L) + 917.0L/2160.0L) - 1.0L/10.0L) - 1.0L/3.0L); -poly_val[7] = x*(x*(x*(x*(x*(x*(x*(x*((1.0L/10080.0L)*x - 1.0L/5040.0L) - 1.0L/280.0L) + 1.0L/240.0L) + 19.0L/480.0L) - 1.0L/60.0L) - 331.0L/2520.0L) + 4.0L/315.0L) + 2.0L/21.0L); -poly_val[8] = x*(x*(x*(x*(x*(x*(x*(x*(-1.0L/40320.0L*x + 1.0L/40320.0L) + 17.0L/20160.0L) - 1.0L/2880.0L) - 47.0L/5760.0L) + 7.0L/5760.0L) + 127.0L/5040.0L) - 1.0L/1120.0L) - 1.0L/56.0L); -poly_val[9] = x*(pow(x, 2)*(pow(x, 2)*(pow(x, 2)*((1.0L/362880.0L)*pow(x, 2) - 1.0L/12096.0L) + 13.0L/17280.0L) - 41.0L/18144.0L) + 1.0L/630.0L); -break; -case 1: -poly_val[0] = x*(x*(x*(x*(x*(x*(x*(-1.0L/40320.0L*x + 1.0L/5040.0L) - 1.0L/8640.0L) - 1.0L/480.0L) + 11.0L/3456.0L) + 7.0L/1440.0L) - 59.0L/7560.0L) - 1.0L/560.0L) + 1.0L/504.0L; -poly_val[1] = x*(x*(x*(x*(x*(x*(x*((1.0L/4480.0L)*x - 1.0L/630.0L) - 1.0L/960.0L) + 1.0L/40.0L) - 11.0L/384.0L) - 1.0L/15.0L) + 299.0L/3360.0L) + 8.0L/315.0L) - 1.0L/42.0L; -poly_val[2] = x*(x*(x*(x*(x*(x*(x*(-1.0L/1120.0L*x + 1.0L/180.0L) + 1.0L/90.0L) - 13.0L/120.0L) + 13.0L/288.0L) + 169.0L/360.0L) - 773.0L/1680.0L) - 1.0L/5.0L) + 1.0L/7.0L; -poly_val[3] = x*(x*(x*(x*(x*(x*(x*((1.0L/480.0L)*x - 1.0L/90.0L) - 7.0L/180.0L) + 29.0L/120.0L) + 11.0L/96.0L) - 61.0L/45.0L) + 161.0L/360.0L) + 8.0L/5.0L) - 2.0L/3.0L; -poly_val[4] = x*(x*(x*(x*(x*(x*(x*(-1.0L/320.0L*x + 1.0L/72.0L) + 7.0L/96.0L) - 5.0L/16.0L) - 91.0L/192.0L) + 91.0L/48.0L) + 41.0L/48.0L) - 205.0L/72.0L) - 1.0L/5.0L; -poly_val[5] = x*(x*(x*(x*(x*(x*(x*((1.0L/320.0L)*x - 1.0L/90.0L) - 119.0L/1440.0L) + 29.0L/120.0L) + 389.0L/576.0L) - 61.0L/45.0L) - 449.0L/240.0L) + 8.0L/5.0L) + 1; -poly_val[6] = x*(x*(x*(x*(x*(x*(x*(-1.0L/480.0L*x + 1.0L/180.0L) + 7.0L/120.0L) - 13.0L/120.0L) - 143.0L/288.0L) + 169.0L/360.0L) + 917.0L/720.0L) - 1.0L/5.0L) - 1.0L/3.0L; -poly_val[7] = x*(x*(x*(x*(x*(x*(x*((1.0L/1120.0L)*x - 1.0L/630.0L) - 1.0L/40.0L) + 1.0L/40.0L) + 19.0L/96.0L) - 1.0L/15.0L) - 331.0L/840.0L) + 8.0L/315.0L) + 2.0L/21.0L; -poly_val[8] = x*(x*(x*(x*(x*(x*(x*(-1.0L/4480.0L*x + 1.0L/5040.0L) + 17.0L/2880.0L) - 1.0L/480.0L) - 47.0L/1152.0L) + 7.0L/1440.0L) + 127.0L/1680.0L) - 1.0L/560.0L) - 1.0L/56.0L; -poly_val[9] = pow(x, 2)*(pow(x, 2)*(pow(x, 2)*((1.0L/40320.0L)*pow(x, 2) - 1.0L/1728.0L) + 13.0L/3456.0L) - 41.0L/6048.0L) + 1.0L/630.0L; -break; -case 2: -poly_val[0] = x*(x*(x*(x*(x*(x*(-1.0L/5040.0L*x + 1.0L/720.0L) - 1.0L/1440.0L) - 1.0L/96.0L) + 11.0L/864.0L) + 7.0L/480.0L) - 59.0L/3780.0L) - 1.0L/560.0L; -poly_val[1] = x*(x*(x*(x*(x*(x*((1.0L/560.0L)*x - 1.0L/90.0L) - 1.0L/160.0L) + 1.0L/8.0L) - 11.0L/96.0L) - 1.0L/5.0L) + 299.0L/1680.0L) + 8.0L/315.0L; -poly_val[2] = x*(x*(x*(x*(x*(x*(-1.0L/140.0L*x + 7.0L/180.0L) + 1.0L/15.0L) - 13.0L/24.0L) + 13.0L/72.0L) + 169.0L/120.0L) - 773.0L/840.0L) - 1.0L/5.0L; -poly_val[3] = x*(x*(x*(x*(x*(x*((1.0L/60.0L)*x - 7.0L/90.0L) - 7.0L/30.0L) + 29.0L/24.0L) + 11.0L/24.0L) - 61.0L/15.0L) + 161.0L/180.0L) + 8.0L/5.0L; -poly_val[4] = x*(x*(x*(x*(x*(x*(-1.0L/40.0L*x + 7.0L/72.0L) + 7.0L/16.0L) - 25.0L/16.0L) - 91.0L/48.0L) + 91.0L/16.0L) + 41.0L/24.0L) - 205.0L/72.0L; -poly_val[5] = x*(x*(x*(x*(x*(x*((1.0L/40.0L)*x - 7.0L/90.0L) - 119.0L/240.0L) + 29.0L/24.0L) + 389.0L/144.0L) - 61.0L/15.0L) - 449.0L/120.0L) + 8.0L/5.0L; -poly_val[6] = x*(x*(x*(x*(x*(x*(-1.0L/60.0L*x + 7.0L/180.0L) + 7.0L/20.0L) - 13.0L/24.0L) - 143.0L/72.0L) + 169.0L/120.0L) + 917.0L/360.0L) - 1.0L/5.0L; -poly_val[7] = x*(x*(x*(x*(x*(x*((1.0L/140.0L)*x - 1.0L/90.0L) - 3.0L/20.0L) + 1.0L/8.0L) + 19.0L/24.0L) - 1.0L/5.0L) - 331.0L/420.0L) + 8.0L/315.0L; -poly_val[8] = x*(x*(x*(x*(x*(x*(-1.0L/560.0L*x + 1.0L/720.0L) + 17.0L/480.0L) - 1.0L/96.0L) - 47.0L/288.0L) + 7.0L/480.0L) + 127.0L/840.0L) - 1.0L/560.0L; -poly_val[9] = x*(pow(x, 2)*(pow(x, 2)*((1.0L/5040.0L)*pow(x, 2) - 1.0L/288.0L) + 13.0L/864.0L) - 41.0L/3024.0L); -break; +void beta_Lagrange_n7(int deriv, double x, double *poly_val) { + switch(deriv) + { + case 0: + poly_val[0] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-1.0L/1307674368000.0L*x + 1.0L/87178291200.0L) + 1.0L/37362124800.0L) - 1.0L/958003200.0L) + 23.0L/14370048000.0L) + 1.0L/29030400.0L) - 173.0L/1828915200.0L) - 311.0L/609638400.0L) + 137.0L/81648000.0L) + 37.0L/10886400.0L) - 2173.0L/179625600.0L) - 59.0L/6652800.0L) + 37133.0L/1135134000.0L) + 1.0L/168168.0L) - 1.0L/45045.0L); + poly_val[1] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((1.0L/87178291200.0L)*x - 1.0L/6227020800.0L) - 1.0L/1556755200.0L) + 1.0L/59875200.0L) - 1.0L/68428800.0L) - 13.0L/21772800.0L) + 211.0L/152409600.0L) + 101.0L/10886400.0L) - 95.0L/3483648.0L) - 2767.0L/43545600.0L) + 7043.0L/34214400.0L) + 20137.0L/119750400.0L) - 129053.0L/227026800.0L) - 7.0L/61776.0L) + 1.0L/2574.0L); + poly_val[2] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-1.0L/12454041600.0L*x + 1.0L/958003200.0L) + 1.0L/166053888.0L) - 23.0L/191600640.0L) + 1.0L/958003200.0L) + 139.0L/29030400.0L) - 149.0L/17418240.0L) - 1399.0L/17418240.0L) + 23.0L/113400.0L) + 6271.0L/10886400.0L) - 19787.0L/11975040.0L) - 2077.0L/1330560.0L) + 51043.0L/10810800.0L) + 7.0L/6600.0L) - 7.0L/2145.0L); + poly_val[3] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((1.0L/2874009600.0L)*x - 1.0L/239500800.0L) - 23.0L/718502400.0L) + 31.0L/59875200.0L) + 151.0L/287400960.0L) - 83.0L/3628800.0L) + 1621.0L/65318400.0L) + 2363.0L/5443200.0L) - 228653.0L/261273600.0L) - 73811.0L/21772800.0L) + 2960071.0L/359251200.0L) + 31957.0L/3326400.0L) - 500287.0L/19958400.0L) - 7.0L/1056.0L) + 7.0L/396.0L); + poly_val[4] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-1.0L/958003200.0L*x + 1.0L/87091200.0L) + 107.0L/958003200.0L) - 131.0L/87091200.0L) - 3139.0L/958003200.0L) + 6283.0L/87091200.0L) - 1429.0L/87091200.0L) - 135073.0L/87091200.0L) + 11261.0L/5443200.0L) + 157477.0L/10886400.0L) - 1642643.0L/59875200.0L) - 247081.0L/5443200.0L) + 239731.0L/2494800.0L) + 7.0L/216.0L) - 7.0L/99.0L); + poly_val[5] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((1.0L/435456000.0L)*x - 1.0L/43545600.0L) - 1.0L/3628800.0L) + 17.0L/5443200.0L) + 2371.0L/217728000.0L) - 1153.0L/7257600.0L) - 2663.0L/21772800.0L) + 40987.0L/10886400.0L) - 267829.0L/145152000.0L) - 1819681.0L/43545600.0L) + 1055099.0L/21772800.0L) + 222581.0L/1209600.0L) - 52889.0L/189000.0L) - 7.0L/48.0L) + 7.0L/30.0L); + poly_val[6] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-1.0L/261273600.0L*x + 1.0L/29030400.0L) + 131.0L/261273600.0L) - 139.0L/29030400.0L) - 6211.0L/261273600.0L) + 2441.0L/9676800.0L) + 17959.0L/37324800.0L) - 184297.0L/29030400.0L) - 1021.0L/326592.0L) + 286397.0L/3628800.0L) - 333059.0L/16329600.0L) - 90281.0L/201600.0L) + 68231.0L/226800.0L) + 7.0L/8.0L) - 7.0L/9.0L); + poly_val[7] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((1.0L/203212800.0L)*x - 1.0L/25401600.0L) - 1.0L/1451520.0L) + 1.0L/181440.0L) + 533.0L/14515200.0L) - 533.0L/1814400.0L) - 9581.0L/10160640.0L) + 9581.0L/1270080.0L) + 353639.0L/29030400.0L) - 353639.0L/3628800.0L) - 54613.0L/725760.0L) + 54613.0L/90720.0L) + 266681.0L/1411200.0L) - 266681.0L/176400.0L) - 1.0L/8.0L) + 1; + poly_val[8] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-1.0L/203212800.0L*x + 1.0L/29030400.0L) + 1.0L/1382400.0L) - 139.0L/29030400.0L) - 241.0L/5806080.0L) + 2441.0L/9676800.0L) + 242881.0L/203212800.0L) - 184297.0L/29030400.0L) - 1601.0L/86400.0L) + 286397.0L/3628800.0L) + 279731.0L/1814400.0L) - 90281.0L/201600.0L) - 112331.0L/176400.0L) + 7.0L/8.0L) + 1); + poly_val[9] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((1.0L/261273600.0L)*x - 1.0L/43545600.0L) - 19.0L/32659200.0L) + 17.0L/5443200.0L) + 4547.0L/130636800.0L) - 1153.0L/7257600.0L) - 68659.0L/65318400.0L) + 40987.0L/10886400.0L) + 4442849.0L/261273600.0L) - 1819681.0L/43545600.0L) - 9281953.0L/65318400.0L) + 222581.0L/1209600.0L) + 116803.0L/226800.0L) - 7.0L/48.0L) - 7.0L/18.0L); + poly_val[10] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-1.0L/435456000.0L*x + 1.0L/87091200.0L) + 31.0L/87091200.0L) - 131.0L/87091200.0L) - 9427.0L/435456000.0L) + 6283.0L/87091200.0L) + 57173.0L/87091200.0L) - 135073.0L/87091200.0L) - 70337.0L/6804000.0L) + 157477.0L/10886400.0L) + 427361.0L/5443200.0L) - 247081.0L/5443200.0L) - 254431.0L/1134000.0L) + 7.0L/216.0L) + 7.0L/45.0L); + poly_val[11] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((1.0L/958003200.0L)*x - 1.0L/239500800.0L) - 13.0L/79833600.0L) + 31.0L/59875200.0L) + 4723.0L/479001600.0L) - 83.0L/3628800.0L) - 6347.0L/21772800.0L) + 2363.0L/5443200.0L) + 5017.0L/1161216.0L) - 73811.0L/21772800.0L) - 3535297.0L/119750400.0L) + 31957.0L/3326400.0L) + 522337.0L/6652800.0L) - 7.0L/1056.0L) - 7.0L/132.0L); + poly_val[12] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-1.0L/2874009600.0L*x + 1.0L/958003200.0L) + 31.0L/574801920.0L) - 23.0L/191600640.0L) - 9187.0L/2874009600.0L) + 139.0L/29030400.0L) + 947.0L/10450944.0L) - 1399.0L/17418240.0L) - 20623.0L/16329600.0L) + 6271.0L/10886400.0L) + 294617.0L/35925120.0L) - 2077.0L/1330560.0L) - 52807.0L/2494800.0L) + 7.0L/6600.0L) + 7.0L/495.0L); + poly_val[13] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((1.0L/12454041600.0L)*x - 1.0L/6227020800.0L) - 19.0L/1556755200.0L) + 1.0L/59875200.0L) + 67.0L/95800320.0L) - 13.0L/21772800.0L) - 59.0L/3110400.0L) + 101.0L/10886400.0L) + 22159.0L/87091200.0L) - 2767.0L/43545600.0L) - 385381.0L/239500800.0L) + 20137.0L/119750400.0L) + 16591.0L/4054050.0L) - 7.0L/61776.0L) - 7.0L/2574.0L); + poly_val[14] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-1.0L/87178291200.0L*x + 1.0L/87178291200.0L) + 1.0L/593049600.0L) - 1.0L/958003200.0L) - 89.0L/958003200.0L) + 1.0L/29030400.0L) + 1487.0L/609638400.0L) - 311.0L/609638400.0L) - 29.0L/907200.0L) + 37.0L/10886400.0L) + 11927.0L/59875200.0L) - 59.0L/6652800.0L) - 38033.0L/75675600.0L) + 1.0L/168168.0L) + 1.0L/3003.0L); + poly_val[15] = x*(pow(x, 2)*(pow(x, 2)*(pow(x, 2)*(pow(x, 2)*(pow(x, 2)*(pow(x, 2)*((1.0L/1307674368000.0L)*pow(x, 2) - 1.0L/9340531200.0L) + 41.0L/7185024000.0L) - 67.0L/457228800.0L) + 2473.0L/1306368000.0L) - 4201.0L/359251200.0L) + 266681.0L/9081072000.0L) - 1.0L/51480.0L); + break; + case 1: + poly_val[0] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-1.0L/87178291200.0L*x + 1.0L/6227020800.0L) + 1.0L/2874009600.0L) - 1.0L/79833600.0L) + 23.0L/1306368000.0L) + 1.0L/2903040.0L) - 173.0L/203212800.0L) - 311.0L/76204800.0L) + 137.0L/11664000.0L) + 37.0L/1814400.0L) - 2173.0L/35925120.0L) - 59.0L/1663200.0L) + 37133.0L/378378000.0L) + 1.0L/84084.0L) - 1.0L/45045.0L; + poly_val[1] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((1.0L/5811886080.0L)*x - 1.0L/444787200.0L) - 1.0L/119750400.0L) + 1.0L/4989600.0L) - 1.0L/6220800.0L) - 13.0L/2177280.0L) + 211.0L/16934400.0L) + 101.0L/1360800.0L) - 95.0L/497664.0L) - 2767.0L/7257600.0L) + 7043.0L/6842880.0L) + 20137.0L/29937600.0L) - 129053.0L/75675600.0L) - 7.0L/30888.0L) + 1.0L/2574.0L; + poly_val[2] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-1.0L/830269440.0L*x + 1.0L/68428800.0L) + 1.0L/12773376.0L) - 23.0L/15966720.0L) + 1.0L/87091200.0L) + 139.0L/2903040.0L) - 149.0L/1935360.0L) - 1399.0L/2177280.0L) + 23.0L/16200.0L) + 6271.0L/1814400.0L) - 19787.0L/2395008.0L) - 2077.0L/332640.0L) + 51043.0L/3603600.0L) + 7.0L/3300.0L) - 7.0L/2145.0L; + poly_val[3] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((1.0L/191600640.0L)*x - 1.0L/17107200.0L) - 299.0L/718502400.0L) + 31.0L/4989600.0L) + 151.0L/26127360.0L) - 83.0L/362880.0L) + 1621.0L/7257600.0L) + 2363.0L/680400.0L) - 228653.0L/37324800.0L) - 73811.0L/3628800.0L) + 2960071.0L/71850240.0L) + 31957.0L/831600.0L) - 500287.0L/6652800.0L) - 7.0L/528.0L) + 7.0L/396.0L; + poly_val[4] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-1.0L/63866880.0L*x + 1.0L/6220800.0L) + 1391.0L/958003200.0L) - 131.0L/7257600.0L) - 3139.0L/87091200.0L) + 6283.0L/8709120.0L) - 1429.0L/9676800.0L) - 135073.0L/10886400.0L) + 11261.0L/777600.0L) + 157477.0L/1814400.0L) - 1642643.0L/11975040.0L) - 247081.0L/1360800.0L) + 239731.0L/831600.0L) + 7.0L/108.0L) - 7.0L/99.0L; + poly_val[5] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((1.0L/29030400.0L)*x - 1.0L/3110400.0L) - 13.0L/3628800.0L) + 17.0L/453600.0L) + 26081.0L/217728000.0L) - 1153.0L/725760.0L) - 2663.0L/2419200.0L) + 40987.0L/1360800.0L) - 267829.0L/20736000.0L) - 1819681.0L/7257600.0L) + 1055099.0L/4354560.0L) + 222581.0L/302400.0L) - 52889.0L/63000.0L) - 7.0L/24.0L) + 7.0L/30.0L; + poly_val[6] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-1.0L/17418240.0L*x + 1.0L/2073600.0L) + 1703.0L/261273600.0L) - 139.0L/2419200.0L) - 68321.0L/261273600.0L) + 2441.0L/967680.0L) + 17959.0L/4147200.0L) - 184297.0L/3628800.0L) - 1021.0L/46656.0L) + 286397.0L/604800.0L) - 333059.0L/3265920.0L) - 90281.0L/50400.0L) + 68231.0L/75600.0L) + 7.0L/4.0L) - 7.0L/9.0L; + poly_val[7] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((1.0L/13547520.0L)*x - 1.0L/1814400.0L) - 13.0L/1451520.0L) + 1.0L/15120.0L) + 5863.0L/14515200.0L) - 533.0L/181440.0L) - 9581.0L/1128960.0L) + 9581.0L/158760.0L) + 353639.0L/4147200.0L) - 353639.0L/604800.0L) - 54613.0L/145152.0L) + 54613.0L/22680.0L) + 266681.0L/470400.0L) - 266681.0L/88200.0L) - 1.0L/8.0L; + poly_val[8] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-1.0L/13547520.0L*x + 1.0L/2073600.0L) + 13.0L/1382400.0L) - 139.0L/2419200.0L) - 2651.0L/5806080.0L) + 2441.0L/967680.0L) + 242881.0L/22579200.0L) - 184297.0L/3628800.0L) - 11207.0L/86400.0L) + 286397.0L/604800.0L) + 279731.0L/362880.0L) - 90281.0L/50400.0L) - 112331.0L/58800.0L) + 7.0L/4.0L) + 1; + poly_val[9] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((1.0L/17418240.0L)*x - 1.0L/3110400.0L) - 247.0L/32659200.0L) + 17.0L/453600.0L) + 50017.0L/130636800.0L) - 1153.0L/725760.0L) - 68659.0L/7257600.0L) + 40987.0L/1360800.0L) + 4442849.0L/37324800.0L) - 1819681.0L/7257600.0L) - 9281953.0L/13063680.0L) + 222581.0L/302400.0L) + 116803.0L/75600.0L) - 7.0L/24.0L) - 7.0L/18.0L; + poly_val[10] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-1.0L/29030400.0L*x + 1.0L/6220800.0L) + 403.0L/87091200.0L) - 131.0L/7257600.0L) - 103697.0L/435456000.0L) + 6283.0L/8709120.0L) + 57173.0L/9676800.0L) - 135073.0L/10886400.0L) - 70337.0L/972000.0L) + 157477.0L/1814400.0L) + 427361.0L/1088640.0L) - 247081.0L/1360800.0L) - 254431.0L/378000.0L) + 7.0L/108.0L) + 7.0L/45.0L; + poly_val[11] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((1.0L/63866880.0L)*x - 1.0L/17107200.0L) - 169.0L/79833600.0L) + 31.0L/4989600.0L) + 4723.0L/43545600.0L) - 83.0L/362880.0L) - 6347.0L/2419200.0L) + 2363.0L/680400.0L) + 5017.0L/165888.0L) - 73811.0L/3628800.0L) - 3535297.0L/23950080.0L) + 31957.0L/831600.0L) + 522337.0L/2217600.0L) - 7.0L/528.0L) - 7.0L/132.0L; + poly_val[12] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-1.0L/191600640.0L*x + 1.0L/68428800.0L) + 403.0L/574801920.0L) - 23.0L/15966720.0L) - 9187.0L/261273600.0L) + 139.0L/2903040.0L) + 947.0L/1161216.0L) - 1399.0L/2177280.0L) - 20623.0L/2332800.0L) + 6271.0L/1814400.0L) + 294617.0L/7185024.0L) - 2077.0L/332640.0L) - 52807.0L/831600.0L) + 7.0L/3300.0L) + 7.0L/495.0L; + poly_val[13] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((1.0L/830269440.0L)*x - 1.0L/444787200.0L) - 19.0L/119750400.0L) + 1.0L/4989600.0L) + 67.0L/8709120.0L) - 13.0L/2177280.0L) - 59.0L/345600.0L) + 101.0L/1360800.0L) + 22159.0L/12441600.0L) - 2767.0L/7257600.0L) - 385381.0L/47900160.0L) + 20137.0L/29937600.0L) + 16591.0L/1351350.0L) - 7.0L/30888.0L) - 7.0L/2574.0L; + poly_val[14] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-1.0L/5811886080.0L*x + 1.0L/6227020800.0L) + 1.0L/45619200.0L) - 1.0L/79833600.0L) - 89.0L/87091200.0L) + 1.0L/2903040.0L) + 1487.0L/67737600.0L) - 311.0L/76204800.0L) - 29.0L/129600.0L) + 37.0L/1814400.0L) + 11927.0L/11975040.0L) - 59.0L/1663200.0L) - 38033.0L/25225200.0L) + 1.0L/84084.0L) + 1.0L/3003.0L; + poly_val[15] = pow(x, 2)*(pow(x, 2)*(pow(x, 2)*(pow(x, 2)*(pow(x, 2)*(pow(x, 2)*((1.0L/87178291200.0L)*pow(x, 2) - 1.0L/718502400.0L) + 41.0L/653184000.0L) - 67.0L/50803200.0L) + 2473.0L/186624000.0L) - 4201.0L/71850240.0L) + 266681.0L/3027024000.0L) - 1.0L/51480.0L; + break; + case 2: + poly_val[0] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-1.0L/6227020800.0L*x + 1.0L/479001600.0L) + 1.0L/239500800.0L) - 1.0L/7257600.0L) + 23.0L/130636800.0L) + 1.0L/322560.0L) - 173.0L/25401600.0L) - 311.0L/10886400.0L) + 137.0L/1944000.0L) + 37.0L/362880.0L) - 2173.0L/8981280.0L) - 59.0L/554400.0L) + 37133.0L/189189000.0L) + 1.0L/84084.0L; + poly_val[1] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((1.0L/415134720.0L)*x - 1.0L/34214400.0L) - 1.0L/9979200.0L) + 1.0L/453600.0L) - 1.0L/622080.0L) - 13.0L/241920.0L) + 211.0L/2116800.0L) + 101.0L/194400.0L) - 95.0L/82944.0L) - 2767.0L/1451520.0L) + 7043.0L/1710720.0L) + 20137.0L/9979200.0L) - 129053.0L/37837800.0L) - 7.0L/30888.0L; + poly_val[2] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-1.0L/59304960.0L*x + 13.0L/68428800.0L) + 1.0L/1064448.0L) - 23.0L/1451520.0L) + 1.0L/8709120.0L) + 139.0L/322560.0L) - 149.0L/241920.0L) - 1399.0L/311040.0L) + 23.0L/2700.0L) + 6271.0L/362880.0L) - 19787.0L/598752.0L) - 2077.0L/110880.0L) + 51043.0L/1801800.0L) + 7.0L/3300.0L; + poly_val[3] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((1.0L/13685760.0L)*x - 13.0L/17107200.0L) - 299.0L/59875200.0L) + 31.0L/453600.0L) + 151.0L/2612736.0L) - 83.0L/40320.0L) + 1621.0L/907200.0L) + 2363.0L/97200.0L) - 228653.0L/6220800.0L) - 73811.0L/725760.0L) + 2960071.0L/17962560.0L) + 31957.0L/277200.0L) - 500287.0L/3326400.0L) - 7.0L/528.0L; + poly_val[4] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-1.0L/4561920.0L*x + 13.0L/6220800.0L) + 1391.0L/79833600.0L) - 1441.0L/7257600.0L) - 3139.0L/8709120.0L) + 6283.0L/967680.0L) - 1429.0L/1209600.0L) - 135073.0L/1555200.0L) + 11261.0L/129600.0L) + 157477.0L/362880.0L) - 1642643.0L/2993760.0L) - 247081.0L/453600.0L) + 239731.0L/415800.0L) + 7.0L/108.0L; + poly_val[5] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((1.0L/2073600.0L)*x - 13.0L/3110400.0L) - 13.0L/302400.0L) + 187.0L/453600.0L) + 26081.0L/21772800.0L) - 1153.0L/80640.0L) - 2663.0L/302400.0L) + 40987.0L/194400.0L) - 267829.0L/3456000.0L) - 1819681.0L/1451520.0L) + 1055099.0L/1088640.0L) + 222581.0L/100800.0L) - 52889.0L/31500.0L) - 7.0L/24.0L; + poly_val[6] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-1.0L/1244160.0L*x + 13.0L/2073600.0L) + 1703.0L/21772800.0L) - 1529.0L/2419200.0L) - 68321.0L/26127360.0L) + 2441.0L/107520.0L) + 17959.0L/518400.0L) - 184297.0L/518400.0L) - 1021.0L/7776.0L) + 286397.0L/120960.0L) - 333059.0L/816480.0L) - 90281.0L/16800.0L) + 68231.0L/37800.0L) + 7.0L/4.0L; + poly_val[7] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((1.0L/967680.0L)*x - 13.0L/1814400.0L) - 13.0L/120960.0L) + 11.0L/15120.0L) + 5863.0L/1451520.0L) - 533.0L/20160.0L) - 9581.0L/141120.0L) + 9581.0L/22680.0L) + 353639.0L/691200.0L) - 353639.0L/120960.0L) - 54613.0L/36288.0L) + 54613.0L/7560.0L) + 266681.0L/235200.0L) - 266681.0L/88200.0L; + poly_val[8] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-1.0L/967680.0L*x + 13.0L/2073600.0L) + 13.0L/115200.0L) - 1529.0L/2419200.0L) - 2651.0L/580608.0L) + 2441.0L/107520.0L) + 242881.0L/2822400.0L) - 184297.0L/518400.0L) - 11207.0L/14400.0L) + 286397.0L/120960.0L) + 279731.0L/90720.0L) - 90281.0L/16800.0L) - 112331.0L/29400.0L) + 7.0L/4.0L; + poly_val[9] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((1.0L/1244160.0L)*x - 13.0L/3110400.0L) - 247.0L/2721600.0L) + 187.0L/453600.0L) + 50017.0L/13063680.0L) - 1153.0L/80640.0L) - 68659.0L/907200.0L) + 40987.0L/194400.0L) + 4442849.0L/6220800.0L) - 1819681.0L/1451520.0L) - 9281953.0L/3265920.0L) + 222581.0L/100800.0L) + 116803.0L/37800.0L) - 7.0L/24.0L; + poly_val[10] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-1.0L/2073600.0L*x + 13.0L/6220800.0L) + 403.0L/7257600.0L) - 1441.0L/7257600.0L) - 103697.0L/43545600.0L) + 6283.0L/967680.0L) + 57173.0L/1209600.0L) - 135073.0L/1555200.0L) - 70337.0L/162000.0L) + 157477.0L/362880.0L) + 427361.0L/272160.0L) - 247081.0L/453600.0L) - 254431.0L/189000.0L) + 7.0L/108.0L; + poly_val[11] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((1.0L/4561920.0L)*x - 13.0L/17107200.0L) - 169.0L/6652800.0L) + 31.0L/453600.0L) + 4723.0L/4354560.0L) - 83.0L/40320.0L) - 6347.0L/302400.0L) + 2363.0L/97200.0L) + 5017.0L/27648.0L) - 73811.0L/725760.0L) - 3535297.0L/5987520.0L) + 31957.0L/277200.0L) + 522337.0L/1108800.0L) - 7.0L/528.0L; + poly_val[12] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-1.0L/13685760.0L*x + 13.0L/68428800.0L) + 403.0L/47900160.0L) - 23.0L/1451520.0L) - 9187.0L/26127360.0L) + 139.0L/322560.0L) + 947.0L/145152.0L) - 1399.0L/311040.0L) - 20623.0L/388800.0L) + 6271.0L/362880.0L) + 294617.0L/1796256.0L) - 2077.0L/110880.0L) - 52807.0L/415800.0L) + 7.0L/3300.0L; + poly_val[13] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((1.0L/59304960.0L)*x - 1.0L/34214400.0L) - 19.0L/9979200.0L) + 1.0L/453600.0L) + 67.0L/870912.0L) - 13.0L/241920.0L) - 59.0L/43200.0L) + 101.0L/194400.0L) + 22159.0L/2073600.0L) - 2767.0L/1451520.0L) - 385381.0L/11975040.0L) + 20137.0L/9979200.0L) + 16591.0L/675675.0L) - 7.0L/30888.0L; + poly_val[14] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-1.0L/415134720.0L*x + 1.0L/479001600.0L) + 1.0L/3801600.0L) - 1.0L/7257600.0L) - 89.0L/8709120.0L) + 1.0L/322560.0L) + 1487.0L/8467200.0L) - 311.0L/10886400.0L) - 29.0L/21600.0L) + 37.0L/362880.0L) + 11927.0L/2993760.0L) - 59.0L/554400.0L) - 38033.0L/12612600.0L) + 1.0L/84084.0L; + poly_val[15] = x*(pow(x, 2)*(pow(x, 2)*(pow(x, 2)*(pow(x, 2)*(pow(x, 2)*((1.0L/6227020800.0L)*pow(x, 2) - 1.0L/59875200.0L) + 41.0L/65318400.0L) - 67.0L/6350400.0L) + 2473.0L/31104000.0L) - 4201.0L/17962560.0L) + 266681.0L/1513512000.0L); + break; + } } +void beta_Lagrange_n8(int deriv, double x, double *poly_val) { + switch(deriv) + { + case 0: + poly_val[0] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-1.0L/355687428096000.0L*x + 1.0L/20922789888000.0L) + 1.0L/5230697472000.0L) - 1.0L/149448499200.0L) + 1.0L/135862272000.0L) + 41.0L/114960384000.0L) - 391.0L/402361344000.0L) - 67.0L/7315660800.0L) + 4657.0L/146313216000.0L) + 2473.0L/20901888000.0L) - 6583.0L/14370048000.0L) - 4201.0L/5748019200.0L) + 144689.0L/48432384000.0L) + 266681.0L/145297152000.0L) - 157.0L/20384000.0L) - 1.0L/823680.0L) + 1.0L/194480.0L); + poly_val[1] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((1.0L/20922789888000.0L)*x - 1.0L/1307674368000.0L) - 23.0L/5230697472000.0L) + 31.0L/261534873600.0L) - 67.0L/1494484992000.0L) - 97.0L/14370048000.0L) + 6143.0L/402361344000.0L) + 331.0L/1828915200.0L) - 82289.0L/146313216000.0L) - 2747.0L/1143072000.0L) + 244843.0L/28740096000.0L) + 2711.0L/179625600.0L) - 8287319.0L/145297152000.0L) - 21701.0L/567567000.0L) + 150307.0L/1009008000.0L) + 8.0L/315315.0L) - 1.0L/10010.0L); + poly_val[2] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-1.0L/2615348736000.0L*x + 1.0L/174356582400.0L) + 19.0L/435891456000.0L) - 1.0L/1037836800.0L) - 31.0L/62270208000.0L) + 19.0L/319334400.0L) - 2543.0L/25147584000.0L) - 517.0L/304819200.0L) + 27683.0L/6096384000.0L) + 1363.0L/58060800.0L) - 356513.0L/4790016000.0L) - 24149.0L/159667200.0L) + 169537087.0L/326918592000.0L) + 1058149.0L/2724321600.0L) - 3135247.0L/2270268000.0L) - 1.0L/3861.0L) + 2.0L/2145.0L); + poly_val[3] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((1.0L/523069747200.0L)*x - 1.0L/37362124800.0L) - 67.0L/261534873600.0L) + 179.0L/37362124800.0L) + 139.0L/18681062400.0L) - 919.0L/2874009600.0L) + 6427.0L/20118067200.0L) + 2591.0L/261273600.0L) - 78299.0L/3657830400.0L) - 4787.0L/32659200.0L) + 287941.0L/718502400.0L) + 176989.0L/179625600.0L) - 401207.0L/134534400.0L) - 41981.0L/16216200.0L) + 68923.0L/8408400.0L) + 56.0L/32175.0L) - 4.0L/715.0L); + poly_val[4] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-1.0L/149448499200.0L*x + 1.0L/11496038400.0L) + 19.0L/18681062400.0L) - 47.0L/2874009600.0L) - 3323.0L/74724249600.0L) + 6707.0L/5748019200.0L) - 109.0L/574801920.0L) - 10331.0L/261273600.0L) + 2489.0L/41803776.0L) + 678739.0L/1045094400.0L) - 4125323.0L/2874009600.0L) - 6782981.0L/1437004800.0L) + 12532313.0L/1037836800.0L) + 1033649.0L/79833600.0L) - 1014049.0L/28828800.0L) - 7.0L/792.0L) + 7.0L/286.0L); + poly_val[5] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((1.0L/57480192000.0L)*x - 1.0L/4790016000.0L) - 1.0L/342144000.0L) + 13.0L/319334400.0L) + 1567.0L/9580032000.0L) - 4889.0L/1596672000.0L) - 2521.0L/1026432000.0L) + 9767.0L/87091200.0L) - 139379.0L/1741824000.0L) - 37517.0L/18144000.0L) + 7618319.0L/2395008000.0L) + 352423.0L/19958400.0L) - 126741731.0L/3592512000.0L) - 999349.0L/18711000.0L) + 2919647.0L/24948000.0L) + 56.0L/1485.0L) - 14.0L/165.0L); + poly_val[6] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-1.0L/28740096000.0L*x + 1.0L/2612736000.0L) + 13.0L/2052864000.0L) - 1.0L/13063680.0L) - 6011.0L/14370048000.0L) + 7811.0L/1306368000.0L) + 2543.0L/224532000.0L) - 6067.0L/26127360.0L) - 126523.0L/2612736000.0L) + 12312353.0L/2612736000.0L) - 48574927.0L/14370048000.0L) - 248945.0L/5225472.0L) + 7969111.0L/133056000.0L) + 901349.0L/4536000.0L) - 287383.0L/924000.0L) - 7.0L/45.0L) + 14.0L/55.0L); + poly_val[7] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((1.0L/18289152000.0L)*x - 1.0L/1828915200.0L) - 97.0L/9144576000.0L) + 29.0L/261273600.0L) + 257.0L/326592000.0L) - 331.0L/37324800.0L) - 253499.0L/9144576000.0L) + 652969.0L/1828915200.0L) + 8209463.0L/18289152000.0L) - 251539.0L/32659200.0L) - 586787.0L/326592000.0L) + 203617.0L/2332800.0L) - 4022849.0L/127008000.0L) - 372149.0L/793800.0L) + 293749.0L/882000.0L) + 8.0L/9.0L) - 4.0L/5.0L); + poly_val[8] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-1.0L/14631321600.0L*x + 1.0L/1625702400.0L) + 17.0L/1219276800.0L) - 17.0L/135475200.0L) - 391.0L/348364800.0L) + 391.0L/38707200.0L) + 167297.0L/3657830400.0L) - 167297.0L/406425600.0L) - 4913051.0L/4877107200.0L) + 4913051.0L/541900800.0L) + 1034059.0L/87091200.0L) - 1034059.0L/9676800.0L) - 63566689.0L/914457600.0L) + 63566689.0L/101606400.0L) + 1077749.0L/6350400.0L) - 1077749.0L/705600.0L) - 1.0L/9.0L) + 1; + poly_val[9] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((1.0L/14631321600.0L)*x - 1.0L/1828915200.0L) - 53.0L/3657830400.0L) + 29.0L/261273600.0L) + 1289.0L/1045094400.0L) - 331.0L/37324800.0L) - 39947.0L/731566080.0L) + 652969.0L/1828915200.0L) + 3992581.0L/2926264320.0L) - 251539.0L/32659200.0L) - 5114489.0L/261273600.0L) + 203617.0L/2332800.0L) + 5310539.0L/33868800.0L) - 372149.0L/793800.0L) - 50061.0L/78400.0L) + 8.0L/9.0L) + 1); + poly_val[10] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-1.0L/18289152000.0L*x + 1.0L/2612736000.0L) + 109.0L/9144576000.0L) - 1.0L/13063680.0L) - 1373.0L/1306368000.0L) + 7811.0L/1306368000.0L) + 110987.0L/2286144000.0L) - 6067.0L/26127360.0L) - 23232953.0L/18289152000.0L) + 12312353.0L/2612736000.0L) + 24721061.0L/1306368000.0L) - 248945.0L/5225472.0L) - 38328917.0L/254016000.0L) + 901349.0L/4536000.0L) + 940549.0L/1764000.0L) - 7.0L/45.0L) - 2.0L/5.0L); + poly_val[11] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((1.0L/28740096000.0L)*x - 1.0L/4790016000.0L) - 37.0L/4790016000.0L) + 13.0L/319334400.0L) + 151.0L/217728000.0L) - 4889.0L/1596672000.0L) - 466597.0L/14370048000.0L) + 9767.0L/87091200.0L) + 739651.0L/870912000.0L) - 37517.0L/18144000.0L) - 14666779.0L/1197504000.0L) + 352423.0L/19958400.0L) + 158720899.0L/1796256000.0L) - 999349.0L/18711000.0L) - 279677.0L/1134000.0L) + 56.0L/1485.0L) + 28.0L/165.0L); + poly_val[12] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-1.0L/57480192000.0L*x + 1.0L/11496038400.0L) + 1.0L/256608000.0L) - 47.0L/2874009600.0L) - 10091.0L/28740096000.0L) + 6707.0L/5748019200.0L) + 33481.0L/2052864000.0L) - 10331.0L/261273600.0L) - 2166403.0L/5225472000.0L) + 678739.0L/1045094400.0L) + 80761123.0L/14370048000.0L) - 6782981.0L/1437004800.0L) - 1622179.0L/44352000.0L) + 1033649.0L/79833600.0L) + 351083.0L/3696000.0L) - 7.0L/792.0L) - 7.0L/110.0L); + poly_val[13] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((1.0L/149448499200.0L)*x - 1.0L/37362124800.0L) - 1.0L/667180800.0L) + 179.0L/37362124800.0L) + 10001.0L/74724249600.0L) - 919.0L/2874009600.0L) - 2183.0L/359251200.0L) + 2591.0L/261273600.0L) + 154891.0L/1045094400.0L) - 4787.0L/32659200.0L) - 2723543.0L/1437004800.0L) + 176989.0L/179625600.0L) + 12175981.0L/1037836800.0L) - 41981.0L/16216200.0L) - 213041.0L/7207200.0L) + 56.0L/32175.0L) + 14.0L/715.0L); + poly_val[14] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-1.0L/523069747200.0L*x + 1.0L/174356582400.0L) + 37.0L/87178291200.0L) - 1.0L/1037836800.0L) - 463.0L/12454041600.0L) + 19.0L/319334400.0L) + 823.0L/502951680.0L) - 517.0L/304819200.0L) - 9353.0L/243855360.0L) + 1363.0L/58060800.0L) + 453109.0L/958003200.0L) - 24149.0L/159667200.0L) - 186467471.0L/65383718400.0L) + 1058149.0L/2724321600.0L) + 3213647.0L/454053600.0L) - 1.0L/3861.0L) - 2.0L/429.0L); + poly_val[15] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((1.0L/2615348736000.0L)*x - 1.0L/1307674368000.0L) - 109.0L/1307674368000.0L) + 31.0L/261534873600.0L) + 83.0L/11675664000.0L) - 97.0L/14370048000.0L) - 30491.0L/100590336000.0L) + 331.0L/1828915200.0L) + 126241.0L/18289152000.0L) - 2747.0L/1143072000.0L) - 299063.0L/3592512000.0L) + 2711.0L/179625600.0L) + 2993917.0L/6054048000.0L) - 21701.0L/567567000.0L) - 51169.0L/42042000.0L) + 8.0L/315315.0L) + 4.0L/5005.0L); + poly_val[16] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-1.0L/20922789888000.0L*x + 1.0L/20922789888000.0L) + 53.0L/5230697472000.0L) - 1.0L/149448499200.0L) - 179.0L/213497856000.0L) + 41.0L/114960384000.0L) + 14017.0L/402361344000.0L) - 67.0L/7315660800.0L) - 113791.0L/146313216000.0L) + 2473.0L/20901888000.0L) + 4747.0L/513216000.0L) - 4201.0L/5748019200.0L) - 7912501.0L/145297152000.0L) + 266681.0L/145297152000.0L) + 269131.0L/2018016000.0L) - 1.0L/823680.0L) - 1.0L/11440.0L); + poly_val[17] = x*(pow(x, 2)*(pow(x, 2)*(pow(x, 2)*(pow(x, 2)*(pow(x, 2)*(pow(x, 2)*(pow(x, 2)*((1.0L/355687428096000.0L)*pow(x, 2) - 1.0L/1743565824000.0L) + 23.0L/498161664000.0L) - 757.0L/402361344000.0L) + 2021.0L/48771072000.0L) - 4679.0L/9580032000.0L) + 3739217.0L/1307674368000.0L) - 63397.0L/9081072000.0L) + 1.0L/218790.0L); + break; + case 1: + poly_val[0] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-1.0L/20922789888000.0L*x + 1.0L/1307674368000.0L) + 1.0L/348713164800.0L) - 1.0L/10674892800.0L) + 1.0L/10450944000.0L) + 41.0L/9580032000.0L) - 391.0L/36578304000.0L) - 67.0L/731566080.0L) + 4657.0L/16257024000.0L) + 2473.0L/2612736000.0L) - 6583.0L/2052864000.0L) - 4201.0L/958003200.0L) + 144689.0L/9686476800.0L) + 266681.0L/36324288000.0L) - 471.0L/20384000.0L) - 1.0L/411840.0L) + 1.0L/194480.0L; + poly_val[1] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((17.0L/20922789888000.0L)*x - 1.0L/81729648000.0L) - 23.0L/348713164800.0L) + 31.0L/18681062400.0L) - 67.0L/114960384000.0L) - 97.0L/1197504000.0L) + 6143.0L/36578304000.0L) + 331.0L/182891520.0L) - 82289.0L/16257024000.0L) - 2747.0L/142884000.0L) + 244843.0L/4105728000.0L) + 2711.0L/29937600.0L) - 8287319.0L/29059430400.0L) - 21701.0L/141891750.0L) + 150307.0L/336336000.0L) + 16.0L/315315.0L) - 1.0L/10010.0L; + poly_val[2] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-17.0L/2615348736000.0L*x + 1.0L/10897286400.0L) + 19.0L/29059430400.0L) - 1.0L/74131200.0L) - 31.0L/4790016000.0L) + 19.0L/26611200.0L) - 2543.0L/2286144000.0L) - 517.0L/30481920.0L) + 27683.0L/677376000.0L) + 1363.0L/7257600.0L) - 356513.0L/684288000.0L) - 24149.0L/26611200.0L) + 169537087.0L/65383718400.0L) + 1058149.0L/681080400.0L) - 3135247.0L/756756000.0L) - 2.0L/3861.0L) + 2.0L/2145.0L; + poly_val[3] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((17.0L/523069747200.0L)*x - 1.0L/2335132800.0L) - 67.0L/17435658240.0L) + 179.0L/2668723200.0L) + 139.0L/1437004800.0L) - 919.0L/239500800.0L) + 6427.0L/1828915200.0L) + 2591.0L/26127360.0L) - 78299.0L/406425600.0L) - 4787.0L/4082400.0L) + 287941.0L/102643200.0L) + 176989.0L/29937600.0L) - 401207.0L/26906880.0L) - 41981.0L/4054050.0L) + 68923.0L/2802800.0L) + 112.0L/32175.0L) - 4.0L/715.0L; + poly_val[4] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-17.0L/149448499200.0L*x + 1.0L/718502400.0L) + 19.0L/1245404160.0L) - 47.0L/205286400.0L) - 3323.0L/5748019200.0L) + 6707.0L/479001600.0L) - 109.0L/52254720.0L) - 10331.0L/26127360.0L) + 2489.0L/4644864.0L) + 678739.0L/130636800.0L) - 4125323.0L/410572800.0L) - 6782981.0L/239500800.0L) + 12532313.0L/207567360.0L) + 1033649.0L/19958400.0L) - 1014049.0L/9609600.0L) - 7.0L/396.0L) + 7.0L/286.0L; + poly_val[5] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((17.0L/57480192000.0L)*x - 1.0L/299376000.0L) - 1.0L/22809600.0L) + 13.0L/22809600.0L) + 20371.0L/9580032000.0L) - 4889.0L/133056000.0L) - 2521.0L/93312000.0L) + 9767.0L/8709120.0L) - 139379.0L/193536000.0L) - 37517.0L/2268000.0L) + 7618319.0L/342144000.0L) + 352423.0L/3326400.0L) - 126741731.0L/718502400.0L) - 999349.0L/4677750.0L) + 2919647.0L/8316000.0L) + 112.0L/1485.0L) - 14.0L/165.0L; + poly_val[6] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-17.0L/28740096000.0L*x + 1.0L/163296000.0L) + 13.0L/136857600.0L) - 1.0L/933120.0L) - 78143.0L/14370048000.0L) + 7811.0L/108864000.0L) + 2543.0L/20412000.0L) - 6067.0L/2612736.0L) - 126523.0L/290304000.0L) + 12312353.0L/326592000.0L) - 48574927.0L/2052864000.0L) - 248945.0L/870912.0L) + 7969111.0L/26611200.0L) + 901349.0L/1134000.0L) - 287383.0L/308000.0L) - 14.0L/45.0L) + 14.0L/55.0L; + poly_val[7] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((17.0L/18289152000.0L)*x - 1.0L/114307200.0L) - 97.0L/609638400.0L) + 29.0L/18662400.0L) + 3341.0L/326592000.0L) - 331.0L/3110400.0L) - 2788489.0L/9144576000.0L) + 652969.0L/182891520.0L) + 8209463.0L/2032128000.0L) - 251539.0L/4082400.0L) - 586787.0L/46656000.0L) + 203617.0L/388800.0L) - 4022849.0L/25401600.0L) - 372149.0L/198450.0L) + 293749.0L/294000.0L) + 16.0L/9.0L) - 4.0L/5.0L; + poly_val[8] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-17.0L/14631321600.0L*x + 1.0L/101606400.0L) + 17.0L/81285120.0L) - 17.0L/9676800.0L) - 5083.0L/348364800.0L) + 391.0L/3225600.0L) + 1840267.0L/3657830400.0L) - 167297.0L/40642560.0L) - 4913051.0L/541900800.0L) + 4913051.0L/67737600.0L) + 1034059.0L/12441600.0L) - 1034059.0L/1612800.0L) - 63566689.0L/182891520.0L) + 63566689.0L/25401600.0L) + 1077749.0L/2116800.0L) - 1077749.0L/352800.0L) - 1.0L/9.0L; + poly_val[9] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((17.0L/14631321600.0L)*x - 1.0L/114307200.0L) - 53.0L/243855360.0L) + 29.0L/18662400.0L) + 16757.0L/1045094400.0L) - 331.0L/3110400.0L) - 439417.0L/731566080.0L) + 652969.0L/182891520.0L) + 3992581.0L/325140480.0L) - 251539.0L/4082400.0L) - 5114489.0L/37324800.0L) + 203617.0L/388800.0L) + 5310539.0L/6773760.0L) - 372149.0L/198450.0L) - 150183.0L/78400.0L) + 16.0L/9.0L) + 1; + poly_val[10] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-17.0L/18289152000.0L*x + 1.0L/163296000.0L) + 109.0L/609638400.0L) - 1.0L/933120.0L) - 17849.0L/1306368000.0L) + 7811.0L/108864000.0L) + 1220857.0L/2286144000.0L) - 6067.0L/2612736.0L) - 23232953.0L/2032128000.0L) + 12312353.0L/326592000.0L) + 24721061.0L/186624000.0L) - 248945.0L/870912.0L) - 38328917.0L/50803200.0L) + 901349.0L/1134000.0L) + 940549.0L/588000.0L) - 14.0L/45.0L) - 2.0L/5.0L; + poly_val[11] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((17.0L/28740096000.0L)*x - 1.0L/299376000.0L) - 37.0L/319334400.0L) + 13.0L/22809600.0L) + 1963.0L/217728000.0L) - 4889.0L/133056000.0L) - 466597.0L/1306368000.0L) + 9767.0L/8709120.0L) + 739651.0L/96768000.0L) - 37517.0L/2268000.0L) - 14666779.0L/171072000.0L) + 352423.0L/3326400.0L) + 158720899.0L/359251200.0L) - 999349.0L/4677750.0L) - 279677.0L/378000.0L) + 112.0L/1485.0L) + 28.0L/165.0L; + poly_val[12] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-17.0L/57480192000.0L*x + 1.0L/718502400.0L) + 1.0L/17107200.0L) - 47.0L/205286400.0L) - 131183.0L/28740096000.0L) + 6707.0L/479001600.0L) + 33481.0L/186624000.0L) - 10331.0L/26127360.0L) - 2166403.0L/580608000.0L) + 678739.0L/130636800.0L) + 80761123.0L/2052864000.0L) - 6782981.0L/239500800.0L) - 1622179.0L/8870400.0L) + 1033649.0L/19958400.0L) + 351083.0L/1232000.0L) - 7.0L/396.0L) - 7.0L/110.0L; + poly_val[13] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((17.0L/149448499200.0L)*x - 1.0L/2335132800.0L) - 1.0L/44478720.0L) + 179.0L/2668723200.0L) + 10001.0L/5748019200.0L) - 919.0L/239500800.0L) - 2183.0L/32659200.0L) + 2591.0L/26127360.0L) + 154891.0L/116121600.0L) - 4787.0L/4082400.0L) - 2723543.0L/205286400.0L) + 176989.0L/29937600.0L) + 12175981.0L/207567360.0L) - 41981.0L/4054050.0L) - 213041.0L/2402400.0L) + 112.0L/32175.0L) + 14.0L/715.0L; + poly_val[14] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-17.0L/523069747200.0L*x + 1.0L/10897286400.0L) + 37.0L/5811886080.0L) - 1.0L/74131200.0L) - 463.0L/958003200.0L) + 19.0L/26611200.0L) + 823.0L/45722880.0L) - 517.0L/30481920.0L) - 9353.0L/27095040.0L) + 1363.0L/7257600.0L) + 453109.0L/136857600.0L) - 24149.0L/26611200.0L) - 186467471.0L/13076743680.0L) + 1058149.0L/681080400.0L) + 3213647.0L/151351200.0L) - 2.0L/3861.0L) - 2.0L/429.0L; + poly_val[15] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((17.0L/2615348736000.0L)*x - 1.0L/81729648000.0L) - 109.0L/87178291200.0L) + 31.0L/18681062400.0L) + 83.0L/898128000.0L) - 97.0L/1197504000.0L) - 30491.0L/9144576000.0L) + 331.0L/182891520.0L) + 126241.0L/2032128000.0L) - 2747.0L/142884000.0L) - 299063.0L/513216000.0L) + 2711.0L/29937600.0L) + 2993917.0L/1210809600.0L) - 21701.0L/141891750.0L) - 51169.0L/14014000.0L) + 16.0L/315315.0L) + 4.0L/5005.0L; + poly_val[16] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-17.0L/20922789888000.0L*x + 1.0L/1307674368000.0L) + 53.0L/348713164800.0L) - 1.0L/10674892800.0L) - 179.0L/16422912000.0L) + 41.0L/9580032000.0L) + 14017.0L/36578304000.0L) - 67.0L/731566080.0L) - 113791.0L/16257024000.0L) + 2473.0L/2612736000.0L) + 33229.0L/513216000.0L) - 4201.0L/958003200.0L) - 7912501.0L/29059430400.0L) + 266681.0L/36324288000.0L) + 269131.0L/672672000.0L) - 1.0L/411840.0L) - 1.0L/11440.0L; + poly_val[17] = pow(x, 2)*(pow(x, 2)*(pow(x, 2)*(pow(x, 2)*(pow(x, 2)*(pow(x, 2)*(pow(x, 2)*((1.0L/20922789888000.0L)*pow(x, 2) - 1.0L/116237721600.0L) + 23.0L/38320128000.0L) - 757.0L/36578304000.0L) + 2021.0L/5419008000.0L) - 4679.0L/1368576000.0L) + 3739217.0L/261534873600.0L) - 63397.0L/3027024000.0L) + 1.0L/218790.0L; + break; + case 2: + poly_val[0] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-1.0L/1307674368000.0L*x + 1.0L/87178291200.0L) + 1.0L/24908083200.0L) - 1.0L/821145600.0L) + 1.0L/870912000.0L) + 41.0L/870912000.0L) - 391.0L/3657830400.0L) - 67.0L/81285120.0L) + 4657.0L/2032128000.0L) + 2473.0L/373248000.0L) - 6583.0L/342144000.0L) - 4201.0L/191600640.0L) + 144689.0L/2421619200.0L) + 266681.0L/12108096000.0L) - 471.0L/10192000.0L) - 1.0L/411840.0L; + poly_val[1] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((17.0L/1307674368000.0L)*x - 1.0L/5448643200.0L) - 23.0L/24908083200.0L) + 31.0L/1437004800.0L) - 67.0L/9580032000.0L) - 97.0L/108864000.0L) + 6143.0L/3657830400.0L) + 331.0L/20321280.0L) - 82289.0L/2032128000.0L) - 2747.0L/20412000.0L) + 244843.0L/684288000.0L) + 2711.0L/5987520.0L) - 8287319.0L/7264857600.0L) - 21701.0L/47297250.0L) + 150307.0L/168168000.0L) + 16.0L/315315.0L; + poly_val[2] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-17.0L/163459296000.0L*x + 1.0L/726485760.0L) + 19.0L/2075673600.0L) - 1.0L/5702400.0L) - 31.0L/399168000.0L) + 19.0L/2419200.0L) - 2543.0L/228614400.0L) - 517.0L/3386880.0L) + 27683.0L/84672000.0L) + 1363.0L/1036800.0L) - 356513.0L/114048000.0L) - 24149.0L/5322240.0L) + 169537087.0L/16345929600.0L) + 1058149.0L/227026800.0L) - 3135247.0L/378378000.0L) - 2.0L/3861.0L; + poly_val[3] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((17.0L/32691859200.0L)*x - 1.0L/155675520.0L) - 67.0L/1245404160.0L) + 179.0L/205286400.0L) + 139.0L/119750400.0L) - 919.0L/21772800.0L) + 6427.0L/182891520.0L) + 2591.0L/2903040.0L) - 78299.0L/50803200.0L) - 4787.0L/583200.0L) + 287941.0L/17107200.0L) + 176989.0L/5987520.0L) - 401207.0L/6726720.0L) - 41981.0L/1351350.0L) + 68923.0L/1401400.0L) + 112.0L/32175.0L; + poly_val[4] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-17.0L/9340531200.0L*x + 1.0L/47900160.0L) + 19.0L/88957440.0L) - 611.0L/205286400.0L) - 3323.0L/479001600.0L) + 6707.0L/43545600.0L) - 109.0L/5225472.0L) - 10331.0L/2903040.0L) + 2489.0L/580608.0L) + 678739.0L/18662400.0L) - 4125323.0L/68428800.0L) - 6782981.0L/47900160.0L) + 12532313.0L/51891840.0L) + 1033649.0L/6652800.0L) - 1014049.0L/4804800.0L) - 7.0L/396.0L; + poly_val[5] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((17.0L/3592512000.0L)*x - 1.0L/19958400.0L) - 7.0L/11404800.0L) + 169.0L/22809600.0L) + 20371.0L/798336000.0L) - 4889.0L/12096000.0L) - 2521.0L/9331200.0L) + 9767.0L/967680.0L) - 139379.0L/24192000.0L) - 37517.0L/324000.0L) + 7618319.0L/57024000.0L) + 352423.0L/665280.0L) - 126741731.0L/179625600.0L) - 999349.0L/1559250.0L) + 2919647.0L/4158000.0L) + 112.0L/1485.0L; + poly_val[6] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-17.0L/1796256000.0L*x + 1.0L/10886400.0L) + 91.0L/68428800.0L) - 13.0L/933120.0L) - 78143.0L/1197504000.0L) + 85921.0L/108864000.0L) + 2543.0L/2041200.0L) - 6067.0L/290304.0L) - 126523.0L/36288000.0L) + 12312353.0L/46656000.0L) - 48574927.0L/342144000.0L) - 1244725.0L/870912.0L) + 7969111.0L/6652800.0L) + 901349.0L/378000.0L) - 287383.0L/154000.0L) - 14.0L/45.0L; + poly_val[7] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((17.0L/1143072000.0L)*x - 1.0L/7620480.0L) - 97.0L/43545600.0L) + 377.0L/18662400.0L) + 3341.0L/27216000.0L) - 3641.0L/3110400.0L) - 2788489.0L/914457600.0L) + 652969.0L/20321280.0L) + 8209463.0L/254016000.0L) - 251539.0L/583200.0L) - 586787.0L/7776000.0L) + 203617.0L/77760.0L) - 4022849.0L/6350400.0L) - 372149.0L/66150.0L) + 293749.0L/147000.0L) + 16.0L/9.0L; + poly_val[8] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-17.0L/914457600.0L*x + 1.0L/6773760.0L) + 17.0L/5806080.0L) - 221.0L/9676800.0L) - 5083.0L/29030400.0L) + 4301.0L/3225600.0L) + 1840267.0L/365783040.0L) - 167297.0L/4515840.0L) - 4913051.0L/67737600.0L) + 4913051.0L/9676800.0L) + 1034059.0L/2073600.0L) - 1034059.0L/322560.0L) - 63566689.0L/45722880.0L) + 63566689.0L/8467200.0L) + 1077749.0L/1058400.0L) - 1077749.0L/352800.0L; + poly_val[9] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((17.0L/914457600.0L)*x - 1.0L/7620480.0L) - 53.0L/17418240.0L) + 377.0L/18662400.0L) + 16757.0L/87091200.0L) - 3641.0L/3110400.0L) - 439417.0L/73156608.0L) + 652969.0L/20321280.0L) + 3992581.0L/40642560.0L) - 251539.0L/583200.0L) - 5114489.0L/6220800.0L) + 203617.0L/77760.0L) + 5310539.0L/1693440.0L) - 372149.0L/66150.0L) - 150183.0L/39200.0L) + 16.0L/9.0L; + poly_val[10] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-17.0L/1143072000.0L*x + 1.0L/10886400.0L) + 109.0L/43545600.0L) - 13.0L/933120.0L) - 17849.0L/108864000.0L) + 85921.0L/108864000.0L) + 1220857.0L/228614400.0L) - 6067.0L/290304.0L) - 23232953.0L/254016000.0L) + 12312353.0L/46656000.0L) + 24721061.0L/31104000.0L) - 1244725.0L/870912.0L) - 38328917.0L/12700800.0L) + 901349.0L/378000.0L) + 940549.0L/294000.0L) - 14.0L/45.0L; + poly_val[11] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((17.0L/1796256000.0L)*x - 1.0L/19958400.0L) - 37.0L/22809600.0L) + 169.0L/22809600.0L) + 1963.0L/18144000.0L) - 4889.0L/12096000.0L) - 466597.0L/130636800.0L) + 9767.0L/967680.0L) + 739651.0L/12096000.0L) - 37517.0L/324000.0L) - 14666779.0L/28512000.0L) + 352423.0L/665280.0L) + 158720899.0L/89812800.0L) - 999349.0L/1559250.0L) - 279677.0L/189000.0L) + 112.0L/1485.0L; + poly_val[12] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-17.0L/3592512000.0L*x + 1.0L/47900160.0L) + 7.0L/8553600.0L) - 611.0L/205286400.0L) - 131183.0L/2395008000.0L) + 6707.0L/43545600.0L) + 33481.0L/18662400.0L) - 10331.0L/2903040.0L) - 2166403.0L/72576000.0L) + 678739.0L/18662400.0L) + 80761123.0L/342144000.0L) - 6782981.0L/47900160.0L) - 1622179.0L/2217600.0L) + 1033649.0L/6652800.0L) + 351083.0L/616000.0L) - 7.0L/396.0L; + poly_val[13] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((17.0L/9340531200.0L)*x - 1.0L/155675520.0L) - 7.0L/22239360.0L) + 179.0L/205286400.0L) + 10001.0L/479001600.0L) - 919.0L/21772800.0L) - 2183.0L/3265920.0L) + 2591.0L/2903040.0L) + 154891.0L/14515200.0L) - 4787.0L/583200.0L) - 2723543.0L/34214400.0L) + 176989.0L/5987520.0L) + 12175981.0L/51891840.0L) - 41981.0L/1351350.0L) - 213041.0L/1201200.0L) + 112.0L/32175.0L; + poly_val[14] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-17.0L/32691859200.0L*x + 1.0L/726485760.0L) + 37.0L/415134720.0L) - 1.0L/5702400.0L) - 463.0L/79833600.0L) + 19.0L/2419200.0L) + 823.0L/4572288.0L) - 517.0L/3386880.0L) - 9353.0L/3386880.0L) + 1363.0L/1036800.0L) + 453109.0L/22809600.0L) - 24149.0L/5322240.0L) - 186467471.0L/3269185920.0L) + 1058149.0L/227026800.0L) + 3213647.0L/75675600.0L) - 2.0L/3861.0L; + poly_val[15] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((17.0L/163459296000.0L)*x - 1.0L/5448643200.0L) - 109.0L/6227020800.0L) + 31.0L/1437004800.0L) + 83.0L/74844000.0L) - 97.0L/108864000.0L) - 30491.0L/914457600.0L) + 331.0L/20321280.0L) + 126241.0L/254016000.0L) - 2747.0L/20412000.0L) - 299063.0L/85536000.0L) + 2711.0L/5987520.0L) + 2993917.0L/302702400.0L) - 21701.0L/47297250.0L) - 51169.0L/7007000.0L) + 16.0L/315315.0L; + poly_val[16] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-17.0L/1307674368000.0L*x + 1.0L/87178291200.0L) + 53.0L/24908083200.0L) - 1.0L/821145600.0L) - 179.0L/1368576000.0L) + 41.0L/870912000.0L) + 14017.0L/3657830400.0L) - 67.0L/81285120.0L) - 113791.0L/2032128000.0L) + 2473.0L/373248000.0L) + 33229.0L/85536000.0L) - 4201.0L/191600640.0L) - 7912501.0L/7264857600.0L) + 266681.0L/12108096000.0L) + 269131.0L/336336000.0L) - 1.0L/411840.0L; + poly_val[17] = x*(pow(x, 2)*(pow(x, 2)*(pow(x, 2)*(pow(x, 2)*(pow(x, 2)*(pow(x, 2)*((1.0L/1307674368000.0L)*pow(x, 2) - 1.0L/8302694400.0L) + 23.0L/3193344000.0L) - 757.0L/3657830400.0L) + 2021.0L/677376000.0L) - 4679.0L/228096000.0L) + 3739217.0L/65383718400.0L) - 63397.0L/1513512000.0L); + break; + } } -void beta_Lagrange_n5(int deriv, double x, double *poly_val){ -switch(deriv) -{ -case 0: -poly_val[0] = x*(x*(x*(x*(x*(x*(x*(x*(pow(x, 2)*(-1.0L/39916800.0L*x + 1.0L/3628800.0L) - 1.0L/120960.0L) + 19.0L/1209600.0L) + 13.0L/172800.0L) - 67.0L/362880.0L) - 41.0L/181440.0L) + 13.0L/21600.0L) + 1.0L/6300.0L) - 1.0L/2310.0L); -poly_val[1] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((1.0L/3628800.0L)*x - 1.0L/362880.0L) - 1.0L/241920.0L) + 13.0L/120960.0L) - 179.0L/1209600.0L) - 19.0L/17280.0L) + 1663.0L/725760.0L) + 1261.0L/362880.0L) - 2447.0L/302400.0L) - 5.0L/2016.0L) + 1.0L/168.0L); -poly_val[2] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-1.0L/725760.0L*x + 1.0L/80640.0L) + 1.0L/25920.0L) - 23.0L/40320.0L) + 73.0L/241920.0L) + 29.0L/3840.0L) - 4399.0L/362880.0L) - 541.0L/20160.0L) + 667.0L/12960.0L) + 5.0L/252.0L) - 5.0L/126.0L); -poly_val[3] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((1.0L/241920.0L)*x - 1.0L/30240.0L) - 13.0L/80640.0L) + 17.0L/10080.0L) + 23.0L/26880.0L) - 13.0L/480.0L) + 5459.0L/241920.0L) + 4369.0L/30240.0L) - 4069.0L/20160.0L) - 5.0L/42.0L) + 5.0L/28.0L); -poly_val[4] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-1.0L/120960.0L*x + 1.0L/17280.0L) + 1.0L/2520.0L) - 1.0L/320.0L) - 43.0L/8064.0L) + 323.0L/5760.0L) + 431.0L/60480.0L) - 1669.0L/4320.0L) + 1069.0L/5040.0L) + 5.0L/6.0L) - 5.0L/7.0L); -poly_val[5] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((1.0L/86400.0L)*x - 1.0L/14400.0L) - 11.0L/17280.0L) + 11.0L/2880.0L) + 341.0L/28800.0L) - 341.0L/4800.0L) - 1529.0L/17280.0L) + 1529.0L/2880.0L) + 5269.0L/21600.0L) - 5269.0L/3600.0L) - 1.0L/6.0L) + 1; -poly_val[6] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-1.0L/86400.0L*x + 1.0L/17280.0L) + 1.0L/1440.0L) - 1.0L/320.0L) - 431.0L/28800.0L) + 323.0L/5760.0L) + 1249.0L/8640.0L) - 1669.0L/4320.0L) - 2269.0L/3600.0L) + 5.0L/6.0L) + 1); -poly_val[7] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((1.0L/120960.0L)*x - 1.0L/30240.0L) - 1.0L/1920.0L) + 17.0L/10080.0L) + 53.0L/4480.0L) - 13.0L/480.0L) - 14197.0L/120960.0L) + 4369.0L/30240.0L) + 667.0L/1440.0L) - 5.0L/42.0L) - 5.0L/14.0L); -poly_val[8] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-1.0L/241920.0L*x + 1.0L/80640.0L) + 1.0L/3780.0L) - 23.0L/40320.0L) - 479.0L/80640.0L) + 29.0L/3840.0L) + 6563.0L/120960.0L) - 541.0L/20160.0L) - 5069.0L/30240.0L) + 5.0L/252.0L) + 5.0L/42.0L); -poly_val[9] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((1.0L/725760.0L)*x - 1.0L/362880.0L) - 1.0L/11520.0L) + 13.0L/120960.0L) + 89.0L/48384.0L) - 19.0L/17280.0L) - 10837.0L/725760.0L) + 1261.0L/362880.0L) + 371.0L/8640.0L) - 5.0L/2016.0L) - 5.0L/168.0L); -poly_val[10] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-1.0L/3628800.0L*x + 1.0L/3628800.0L) + 1.0L/60480.0L) - 1.0L/120960.0L) - 391.0L/1209600.0L) + 13.0L/172800.0L) + 901.0L/362880.0L) - 41.0L/181440.0L) - 1049.0L/151200.0L) + 1.0L/6300.0L) + 1.0L/210.0L); -poly_val[11] = x*(pow(x, 2)*(pow(x, 2)*(pow(x, 2)*(pow(x, 2)*((1.0L/39916800.0L)*pow(x, 2) - 1.0L/725760.0L) + 31.0L/1209600.0L) - 139.0L/725760.0L) + 479.0L/907200.0L) - 1.0L/2772.0L); -break; -case 1: -poly_val[0] = x*(x*(x*(x*(x*(x*(x*(pow(x, 2)*(-1.0L/3628800.0L*x + 1.0L/362880.0L) - 1.0L/15120.0L) + 19.0L/172800.0L) + 13.0L/28800.0L) - 67.0L/72576.0L) - 41.0L/45360.0L) + 13.0L/7200.0L) + 1.0L/3150.0L) - 1.0L/2310.0L; -poly_val[1] = x*(x*(x*(x*(x*(x*(x*(x*(x*((11.0L/3628800.0L)*x - 1.0L/36288.0L) - 1.0L/26880.0L) + 13.0L/15120.0L) - 179.0L/172800.0L) - 19.0L/2880.0L) + 1663.0L/145152.0L) + 1261.0L/90720.0L) - 2447.0L/100800.0L) - 5.0L/1008.0L) + 1.0L/168.0L; -poly_val[2] = x*(x*(x*(x*(x*(x*(x*(x*(x*(-11.0L/725760.0L*x + 1.0L/8064.0L) + 1.0L/2880.0L) - 23.0L/5040.0L) + 73.0L/34560.0L) + 29.0L/640.0L) - 4399.0L/72576.0L) - 541.0L/5040.0L) + 667.0L/4320.0L) + 5.0L/126.0L) - 5.0L/126.0L; -poly_val[3] = x*(x*(x*(x*(x*(x*(x*(x*(x*((11.0L/241920.0L)*x - 1.0L/3024.0L) - 13.0L/8960.0L) + 17.0L/1260.0L) + 23.0L/3840.0L) - 13.0L/80.0L) + 5459.0L/48384.0L) + 4369.0L/7560.0L) - 4069.0L/6720.0L) - 5.0L/21.0L) + 5.0L/28.0L; -poly_val[4] = x*(x*(x*(x*(x*(x*(x*(x*(x*(-11.0L/120960.0L*x + 1.0L/1728.0L) + 1.0L/280.0L) - 1.0L/40.0L) - 43.0L/1152.0L) + 323.0L/960.0L) + 431.0L/12096.0L) - 1669.0L/1080.0L) + 1069.0L/1680.0L) + 5.0L/3.0L) - 5.0L/7.0L; -poly_val[5] = x*(x*(x*(x*(x*(x*(x*(x*(x*((11.0L/86400.0L)*x - 1.0L/1440.0L) - 11.0L/1920.0L) + 11.0L/360.0L) + 2387.0L/28800.0L) - 341.0L/800.0L) - 1529.0L/3456.0L) + 1529.0L/720.0L) + 5269.0L/7200.0L) - 5269.0L/1800.0L) - 1.0L/6.0L; -poly_val[6] = x*(x*(x*(x*(x*(x*(x*(x*(x*(-11.0L/86400.0L*x + 1.0L/1728.0L) + 1.0L/160.0L) - 1.0L/40.0L) - 3017.0L/28800.0L) + 323.0L/960.0L) + 1249.0L/1728.0L) - 1669.0L/1080.0L) - 2269.0L/1200.0L) + 5.0L/3.0L) + 1; -poly_val[7] = x*(x*(x*(x*(x*(x*(x*(x*(x*((11.0L/120960.0L)*x - 1.0L/3024.0L) - 3.0L/640.0L) + 17.0L/1260.0L) + 53.0L/640.0L) - 13.0L/80.0L) - 14197.0L/24192.0L) + 4369.0L/7560.0L) + 667.0L/480.0L) - 5.0L/21.0L) - 5.0L/14.0L; -poly_val[8] = x*(x*(x*(x*(x*(x*(x*(x*(x*(-11.0L/241920.0L*x + 1.0L/8064.0L) + 1.0L/420.0L) - 23.0L/5040.0L) - 479.0L/11520.0L) + 29.0L/640.0L) + 6563.0L/24192.0L) - 541.0L/5040.0L) - 5069.0L/10080.0L) + 5.0L/126.0L) + 5.0L/42.0L; -poly_val[9] = x*(x*(x*(x*(x*(x*(x*(x*(x*((11.0L/725760.0L)*x - 1.0L/36288.0L) - 1.0L/1280.0L) + 13.0L/15120.0L) + 89.0L/6912.0L) - 19.0L/2880.0L) - 10837.0L/145152.0L) + 1261.0L/90720.0L) + 371.0L/2880.0L) - 5.0L/1008.0L) - 5.0L/168.0L; -poly_val[10] = x*(x*(x*(x*(x*(x*(x*(x*(x*(-11.0L/3628800.0L*x + 1.0L/362880.0L) + 1.0L/6720.0L) - 1.0L/15120.0L) - 391.0L/172800.0L) + 13.0L/28800.0L) + 901.0L/72576.0L) - 41.0L/45360.0L) - 1049.0L/50400.0L) + 1.0L/3150.0L) + 1.0L/210.0L; -poly_val[11] = pow(x, 2)*(pow(x, 2)*(pow(x, 2)*(pow(x, 2)*((1.0L/3628800.0L)*pow(x, 2) - 1.0L/80640.0L) + 31.0L/172800.0L) - 139.0L/145152.0L) + 479.0L/302400.0L) - 1.0L/2772.0L; -break; -case 2: -poly_val[0] = x*(x*(x*(x*(x*(x*(pow(x, 2)*(-1.0L/362880.0L*x + 1.0L/40320.0L) - 1.0L/2160.0L) + 19.0L/28800.0L) + 13.0L/5760.0L) - 67.0L/18144.0L) - 41.0L/15120.0L) + 13.0L/3600.0L) + 1.0L/3150.0L; -poly_val[1] = x*(x*(x*(x*(x*(x*(x*(x*((11.0L/362880.0L)*x - 1.0L/4032.0L) - 1.0L/3360.0L) + 13.0L/2160.0L) - 179.0L/28800.0L) - 19.0L/576.0L) + 1663.0L/36288.0L) + 1261.0L/30240.0L) - 2447.0L/50400.0L) - 5.0L/1008.0L; -poly_val[2] = x*(x*(x*(x*(x*(x*(x*(x*(-11.0L/72576.0L*x + 1.0L/896.0L) + 1.0L/360.0L) - 23.0L/720.0L) + 73.0L/5760.0L) + 29.0L/128.0L) - 4399.0L/18144.0L) - 541.0L/1680.0L) + 667.0L/2160.0L) + 5.0L/126.0L; -poly_val[3] = x*(x*(x*(x*(x*(x*(x*(x*((11.0L/24192.0L)*x - 1.0L/336.0L) - 13.0L/1120.0L) + 17.0L/180.0L) + 23.0L/640.0L) - 13.0L/16.0L) + 5459.0L/12096.0L) + 4369.0L/2520.0L) - 4069.0L/3360.0L) - 5.0L/21.0L; -poly_val[4] = x*(x*(x*(x*(x*(x*(x*(x*(-11.0L/12096.0L*x + 1.0L/192.0L) + 1.0L/35.0L) - 7.0L/40.0L) - 43.0L/192.0L) + 323.0L/192.0L) + 431.0L/3024.0L) - 1669.0L/360.0L) + 1069.0L/840.0L) + 5.0L/3.0L; -poly_val[5] = x*(x*(x*(x*(x*(x*(x*(x*((11.0L/8640.0L)*x - 1.0L/160.0L) - 11.0L/240.0L) + 77.0L/360.0L) + 2387.0L/4800.0L) - 341.0L/160.0L) - 1529.0L/864.0L) + 1529.0L/240.0L) + 5269.0L/3600.0L) - 5269.0L/1800.0L; -poly_val[6] = x*(x*(x*(x*(x*(x*(x*(x*(-11.0L/8640.0L*x + 1.0L/192.0L) + 1.0L/20.0L) - 7.0L/40.0L) - 3017.0L/4800.0L) + 323.0L/192.0L) + 1249.0L/432.0L) - 1669.0L/360.0L) - 2269.0L/600.0L) + 5.0L/3.0L; -poly_val[7] = x*(x*(x*(x*(x*(x*(x*(x*((11.0L/12096.0L)*x - 1.0L/336.0L) - 3.0L/80.0L) + 17.0L/180.0L) + 159.0L/320.0L) - 13.0L/16.0L) - 14197.0L/6048.0L) + 4369.0L/2520.0L) + 667.0L/240.0L) - 5.0L/21.0L; -poly_val[8] = x*(x*(x*(x*(x*(x*(x*(x*(-11.0L/24192.0L*x + 1.0L/896.0L) + 2.0L/105.0L) - 23.0L/720.0L) - 479.0L/1920.0L) + 29.0L/128.0L) + 6563.0L/6048.0L) - 541.0L/1680.0L) - 5069.0L/5040.0L) + 5.0L/126.0L; -poly_val[9] = x*(x*(x*(x*(x*(x*(x*(x*((11.0L/72576.0L)*x - 1.0L/4032.0L) - 1.0L/160.0L) + 13.0L/2160.0L) + 89.0L/1152.0L) - 19.0L/576.0L) - 10837.0L/36288.0L) + 1261.0L/30240.0L) + 371.0L/1440.0L) - 5.0L/1008.0L; -poly_val[10] = x*(x*(x*(x*(x*(x*(x*(x*(-11.0L/362880.0L*x + 1.0L/40320.0L) + 1.0L/840.0L) - 1.0L/2160.0L) - 391.0L/28800.0L) + 13.0L/5760.0L) + 901.0L/18144.0L) - 41.0L/15120.0L) - 1049.0L/25200.0L) + 1.0L/3150.0L; -poly_val[11] = x*(pow(x, 2)*(pow(x, 2)*(pow(x, 2)*((1.0L/362880.0L)*pow(x, 2) - 1.0L/10080.0L) + 31.0L/28800.0L) - 139.0L/36288.0L) + 479.0L/151200.0L); -break; -} -} -void beta_Lagrange_n6(int deriv, double x, double *poly_val){ -switch(deriv) -{ -case 0: -poly_val[0] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-1.0L/6227020800.0L*x + 1.0L/479001600.0L) + 1.0L/479001600.0L) - 1.0L/8709120.0L) + 1.0L/4838400.0L) + 31.0L/14515200.0L) - 247.0L/43545600.0L) - 139.0L/8709120.0L) + 1049.0L/21772800.0L) + 479.0L/10886400.0L) - 2791.0L/19958400.0L) - 1.0L/33264.0L) + 1.0L/10296.0L); -poly_val[1] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((1.0L/479001600.0L)*x - 1.0L/39916800.0L) - 31.0L/479001600.0L) + 1.0L/604800.0L) - 29.0L/14515200.0L) - 41.0L/1209600.0L) + 3337.0L/43545600.0L) + 121.0L/453600.0L) - 3893.0L/5443200.0L) - 19.0L/25200.0L) + 7171.0L/3326400.0L) + 1.0L/1925.0L) - 1.0L/660.0L); -poly_val[2] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-1.0L/79833600.0L*x + 1.0L/7257600.0L) + 47.0L/79833600.0L) - 1.0L/96768.0L) + 1.0L/268800.0L) + 601.0L/2419200.0L) - 3169.0L/7257600.0L) - 625.0L/290304.0L) + 8891.0L/1814400.0L) + 643.0L/100800.0L) - 8777.0L/554400.0L) - 1.0L/224.0L) + 1.0L/88.0L); -poly_val[3] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((1.0L/21772800.0L)*x - 1.0L/2177280.0L) - 61.0L/21772800.0L) + 41.0L/1088640.0L) + 181.0L/7257600.0L) - 151.0L/145152.0L) + 23477.0L/21772800.0L) + 3011.0L/272160.0L) - 53293.0L/2721600.0L) - 4969.0L/136080.0L) + 33583.0L/453600.0L) + 5.0L/189.0L) - 1.0L/18.0L); -poly_val[4] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-1.0L/8709120.0L*x + 1.0L/967680.0L) + 73.0L/8709120.0L) - 29.0L/322560.0L) - 479.0L/2903040.0L) + 59.0L/21504.0L) - 3317.0L/8709120.0L) - 33853.0L/967680.0L) + 156529.0L/4354560.0L) + 4469.0L/26880.0L) - 29483.0L/120960.0L) - 15.0L/112.0L) + 5.0L/24.0L); -poly_val[5] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((1.0L/4838400.0L)*x - 1.0L/604800.0L) - 83.0L/4838400.0L) + 1.0L/6720.0L) + 761.0L/1612800.0L) - 971.0L/201600.0L) - 21169.0L/4838400.0L) + 1039.0L/15120.0L) - 4523.0L/604800.0L) - 1769.0L/4200.0L) + 8783.0L/33600.0L) + 6.0L/7.0L) - 3.0L/4.0L); -poly_val[6] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-1.0L/3628800.0L*x + 1.0L/518400.0L) + 13.0L/518400.0L) - 91.0L/518400.0L) - 143.0L/172800.0L) + 1001.0L/172800.0L) + 44473.0L/3628800.0L) - 44473.0L/518400.0L) - 5291.0L/64800.0L) + 37037.0L/64800.0L) + 767.0L/3600.0L) - 5369.0L/3600.0L) - 1.0L/7.0L) + 1; -poly_val[7] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((1.0L/3628800.0L)*x - 1.0L/604800.0L) - 97.0L/3628800.0L) + 1.0L/6720.0L) + 1181.0L/1209600.0L) - 971.0L/201600.0L) - 61951.0L/3628800.0L) + 1039.0L/15120.0L) + 68207.0L/453600.0L) - 1769.0L/4200.0L) - 15983.0L/25200.0L) + 6.0L/7.0L) + 1); -poly_val[8] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-1.0L/4838400.0L*x + 1.0L/967680.0L) + 101.0L/4838400.0L) - 29.0L/322560.0L) - 1291.0L/1612800.0L) + 59.0L/21504.0L) + 71023.0L/4838400.0L) - 33853.0L/967680.0L) - 317413.0L/2419200.0L) + 4469.0L/26880.0L) + 33083.0L/67200.0L) - 15.0L/112.0L) - 3.0L/8.0L); -poly_val[9] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((1.0L/8709120.0L)*x - 1.0L/2177280.0L) - 103.0L/8709120.0L) + 41.0L/1088640.0L) + 443.0L/967680.0L) - 151.0L/145152.0L) - 71653.0L/8709120.0L) + 3011.0L/272160.0L) + 73169.0L/1088640.0L) - 4969.0L/136080.0L) - 35983.0L/181440.0L) + 5.0L/189.0L) + 5.0L/36.0L); -poly_val[10] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-1.0L/21772800.0L*x + 1.0L/7257600.0L) + 103.0L/21772800.0L) - 1.0L/96768.0L) - 1301.0L/7257600.0L) + 601.0L/2419200.0L) + 66109.0L/21772800.0L) - 625.0L/290304.0L) - 120949.0L/5443200.0L) + 643.0L/100800.0L) + 9227.0L/151200.0L) - 1.0L/224.0L) - 1.0L/24.0L); -poly_val[11] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((1.0L/79833600.0L)*x - 1.0L/39916800.0L) - 101.0L/79833600.0L) + 1.0L/604800.0L) + 37.0L/806400.0L) - 41.0L/1209600.0L) - 5273.0L/7257600.0L) + 121.0L/453600.0L) + 4577.0L/907200.0L) - 19.0L/25200.0L) - 7459.0L/554400.0L) + 1.0L/1925.0L) + 1.0L/110.0L); -poly_val[12] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-1.0L/479001600.0L*x + 1.0L/479001600.0L) + 97.0L/479001600.0L) - 1.0L/8709120.0L) - 101.0L/14515200.0L) + 31.0L/14515200.0L) + 4601.0L/43545600.0L) - 139.0L/8709120.0L) - 15553.0L/21772800.0L) + 479.0L/10886400.0L) + 37483.0L/19958400.0L) - 1.0L/33264.0L) - 1.0L/792.0L); -poly_val[13] = x*(pow(x, 2)*(pow(x, 2)*(pow(x, 2)*(pow(x, 2)*(pow(x, 2)*((1.0L/6227020800.0L)*pow(x, 2) - 1.0L/68428800.0L) + 1.0L/2073600.0L) - 311.0L/43545600.0L) + 37.0L/777600.0L) - 59.0L/475200.0L) + 1.0L/12012.0L); -break; -case 1: -poly_val[0] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-1.0L/479001600.0L*x + 1.0L/39916800.0L) + 1.0L/43545600.0L) - 1.0L/870912.0L) + 1.0L/537600.0L) + 31.0L/1814400.0L) - 247.0L/6220800.0L) - 139.0L/1451520.0L) + 1049.0L/4354560.0L) + 479.0L/2721600.0L) - 2791.0L/6652800.0L) - 1.0L/16632.0L) + 1.0L/10296.0L; -poly_val[1] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((13.0L/479001600.0L)*x - 1.0L/3326400.0L) - 31.0L/43545600.0L) + 1.0L/60480.0L) - 29.0L/1612800.0L) - 41.0L/151200.0L) + 3337.0L/6220800.0L) + 121.0L/75600.0L) - 3893.0L/1088640.0L) - 19.0L/6300.0L) + 7171.0L/1108800.0L) + 2.0L/1925.0L) - 1.0L/660.0L; -poly_val[2] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-13.0L/79833600.0L*x + 1.0L/604800.0L) + 47.0L/7257600.0L) - 5.0L/48384.0L) + 3.0L/89600.0L) + 601.0L/302400.0L) - 3169.0L/1036800.0L) - 625.0L/48384.0L) + 8891.0L/362880.0L) + 643.0L/25200.0L) - 8777.0L/184800.0L) - 1.0L/112.0L) + 1.0L/88.0L; -poly_val[3] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((13.0L/21772800.0L)*x - 1.0L/181440.0L) - 671.0L/21772800.0L) + 41.0L/108864.0L) + 181.0L/806400.0L) - 151.0L/18144.0L) + 23477.0L/3110400.0L) + 3011.0L/45360.0L) - 53293.0L/544320.0L) - 4969.0L/34020.0L) + 33583.0L/151200.0L) + 10.0L/189.0L) - 1.0L/18.0L; -poly_val[4] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-13.0L/8709120.0L*x + 1.0L/80640.0L) + 803.0L/8709120.0L) - 29.0L/32256.0L) - 479.0L/322560.0L) + 59.0L/2688.0L) - 3317.0L/1244160.0L) - 33853.0L/161280.0L) + 156529.0L/870912.0L) + 4469.0L/6720.0L) - 29483.0L/40320.0L) - 15.0L/56.0L) + 5.0L/24.0L; -poly_val[5] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((13.0L/4838400.0L)*x - 1.0L/50400.0L) - 913.0L/4838400.0L) + 1.0L/672.0L) + 761.0L/179200.0L) - 971.0L/25200.0L) - 21169.0L/691200.0L) + 1039.0L/2520.0L) - 4523.0L/120960.0L) - 1769.0L/1050.0L) + 8783.0L/11200.0L) + 12.0L/7.0L) - 3.0L/4.0L; -poly_val[6] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-13.0L/3628800.0L*x + 1.0L/43200.0L) + 143.0L/518400.0L) - 91.0L/51840.0L) - 143.0L/19200.0L) + 1001.0L/21600.0L) + 44473.0L/518400.0L) - 44473.0L/86400.0L) - 5291.0L/12960.0L) + 37037.0L/16200.0L) + 767.0L/1200.0L) - 5369.0L/1800.0L) - 1.0L/7.0L; -poly_val[7] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((13.0L/3628800.0L)*x - 1.0L/50400.0L) - 1067.0L/3628800.0L) + 1.0L/672.0L) + 1181.0L/134400.0L) - 971.0L/25200.0L) - 61951.0L/518400.0L) + 1039.0L/2520.0L) + 68207.0L/90720.0L) - 1769.0L/1050.0L) - 15983.0L/8400.0L) + 12.0L/7.0L) + 1; -poly_val[8] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-13.0L/4838400.0L*x + 1.0L/80640.0L) + 1111.0L/4838400.0L) - 29.0L/32256.0L) - 1291.0L/179200.0L) + 59.0L/2688.0L) + 71023.0L/691200.0L) - 33853.0L/161280.0L) - 317413.0L/483840.0L) + 4469.0L/6720.0L) + 33083.0L/22400.0L) - 15.0L/56.0L) - 3.0L/8.0L; -poly_val[9] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((13.0L/8709120.0L)*x - 1.0L/181440.0L) - 1133.0L/8709120.0L) + 41.0L/108864.0L) + 443.0L/107520.0L) - 151.0L/18144.0L) - 71653.0L/1244160.0L) + 3011.0L/45360.0L) + 73169.0L/217728.0L) - 4969.0L/34020.0L) - 35983.0L/60480.0L) + 10.0L/189.0L) + 5.0L/36.0L; -poly_val[10] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-13.0L/21772800.0L*x + 1.0L/604800.0L) + 1133.0L/21772800.0L) - 5.0L/48384.0L) - 1301.0L/806400.0L) + 601.0L/302400.0L) + 66109.0L/3110400.0L) - 625.0L/48384.0L) - 120949.0L/1088640.0L) + 643.0L/25200.0L) + 9227.0L/50400.0L) - 1.0L/112.0L) - 1.0L/24.0L; -poly_val[11] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((13.0L/79833600.0L)*x - 1.0L/3326400.0L) - 101.0L/7257600.0L) + 1.0L/60480.0L) + 37.0L/89600.0L) - 41.0L/151200.0L) - 5273.0L/1036800.0L) + 121.0L/75600.0L) + 4577.0L/181440.0L) - 19.0L/6300.0L) - 7459.0L/184800.0L) + 2.0L/1925.0L) + 1.0L/110.0L; -poly_val[12] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-13.0L/479001600.0L*x + 1.0L/39916800.0L) + 97.0L/43545600.0L) - 1.0L/870912.0L) - 101.0L/1612800.0L) + 31.0L/1814400.0L) + 4601.0L/6220800.0L) - 139.0L/1451520.0L) - 15553.0L/4354560.0L) + 479.0L/2721600.0L) + 37483.0L/6652800.0L) - 1.0L/16632.0L) - 1.0L/792.0L; -poly_val[13] = pow(x, 2)*(pow(x, 2)*(pow(x, 2)*(pow(x, 2)*(pow(x, 2)*((1.0L/479001600.0L)*pow(x, 2) - 1.0L/6220800.0L) + 1.0L/230400.0L) - 311.0L/6220800.0L) + 37.0L/155520.0L) - 59.0L/158400.0L) + 1.0L/12012.0L; -break; -case 2: -poly_val[0] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-1.0L/39916800.0L*x + 1.0L/3628800.0L) + 1.0L/4354560.0L) - 1.0L/96768.0L) + 1.0L/67200.0L) + 31.0L/259200.0L) - 247.0L/1036800.0L) - 139.0L/290304.0L) + 1049.0L/1088640.0L) + 479.0L/907200.0L) - 2791.0L/3326400.0L) - 1.0L/16632.0L; -poly_val[1] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((13.0L/39916800.0L)*x - 1.0L/302400.0L) - 31.0L/4354560.0L) + 1.0L/6720.0L) - 29.0L/201600.0L) - 41.0L/21600.0L) + 3337.0L/1036800.0L) + 121.0L/15120.0L) - 3893.0L/272160.0L) - 19.0L/2100.0L) + 7171.0L/554400.0L) + 2.0L/1925.0L; -poly_val[2] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-13.0L/6652800.0L*x + 11.0L/604800.0L) + 47.0L/725760.0L) - 5.0L/5376.0L) + 3.0L/11200.0L) + 601.0L/43200.0L) - 3169.0L/172800.0L) - 3125.0L/48384.0L) + 8891.0L/90720.0L) + 643.0L/8400.0L) - 8777.0L/92400.0L) - 1.0L/112.0L; -poly_val[3] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((13.0L/1814400.0L)*x - 11.0L/181440.0L) - 671.0L/2177280.0L) + 41.0L/12096.0L) + 181.0L/100800.0L) - 151.0L/2592.0L) + 23477.0L/518400.0L) + 3011.0L/9072.0L) - 53293.0L/136080.0L) - 4969.0L/11340.0L) + 33583.0L/75600.0L) + 10.0L/189.0L; -poly_val[4] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-13.0L/725760.0L*x + 11.0L/80640.0L) + 803.0L/870912.0L) - 29.0L/3584.0L) - 479.0L/40320.0L) + 59.0L/384.0L) - 3317.0L/207360.0L) - 33853.0L/32256.0L) + 156529.0L/217728.0L) + 4469.0L/2240.0L) - 29483.0L/20160.0L) - 15.0L/56.0L; -poly_val[5] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((13.0L/403200.0L)*x - 11.0L/50400.0L) - 913.0L/483840.0L) + 3.0L/224.0L) + 761.0L/22400.0L) - 971.0L/3600.0L) - 21169.0L/115200.0L) + 1039.0L/504.0L) - 4523.0L/30240.0L) - 1769.0L/350.0L) + 8783.0L/5600.0L) + 12.0L/7.0L; -poly_val[6] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-13.0L/302400.0L*x + 11.0L/43200.0L) + 143.0L/51840.0L) - 91.0L/5760.0L) - 143.0L/2400.0L) + 7007.0L/21600.0L) + 44473.0L/86400.0L) - 44473.0L/17280.0L) - 5291.0L/3240.0L) + 37037.0L/5400.0L) + 767.0L/600.0L) - 5369.0L/1800.0L; -poly_val[7] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((13.0L/302400.0L)*x - 11.0L/50400.0L) - 1067.0L/362880.0L) + 3.0L/224.0L) + 1181.0L/16800.0L) - 971.0L/3600.0L) - 61951.0L/86400.0L) + 1039.0L/504.0L) + 68207.0L/22680.0L) - 1769.0L/350.0L) - 15983.0L/4200.0L) + 12.0L/7.0L; -poly_val[8] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-13.0L/403200.0L*x + 11.0L/80640.0L) + 1111.0L/483840.0L) - 29.0L/3584.0L) - 1291.0L/22400.0L) + 59.0L/384.0L) + 71023.0L/115200.0L) - 33853.0L/32256.0L) - 317413.0L/120960.0L) + 4469.0L/2240.0L) + 33083.0L/11200.0L) - 15.0L/56.0L; -poly_val[9] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((13.0L/725760.0L)*x - 11.0L/181440.0L) - 1133.0L/870912.0L) + 41.0L/12096.0L) + 443.0L/13440.0L) - 151.0L/2592.0L) - 71653.0L/207360.0L) + 3011.0L/9072.0L) + 73169.0L/54432.0L) - 4969.0L/11340.0L) - 35983.0L/30240.0L) + 10.0L/189.0L; -poly_val[10] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-13.0L/1814400.0L*x + 11.0L/604800.0L) + 1133.0L/2177280.0L) - 5.0L/5376.0L) - 1301.0L/100800.0L) + 601.0L/43200.0L) + 66109.0L/518400.0L) - 3125.0L/48384.0L) - 120949.0L/272160.0L) + 643.0L/8400.0L) + 9227.0L/25200.0L) - 1.0L/112.0L; -poly_val[11] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((13.0L/6652800.0L)*x - 1.0L/302400.0L) - 101.0L/725760.0L) + 1.0L/6720.0L) + 37.0L/11200.0L) - 41.0L/21600.0L) - 5273.0L/172800.0L) + 121.0L/15120.0L) + 4577.0L/45360.0L) - 19.0L/2100.0L) - 7459.0L/92400.0L) + 2.0L/1925.0L; -poly_val[12] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-13.0L/39916800.0L*x + 1.0L/3628800.0L) + 97.0L/4354560.0L) - 1.0L/96768.0L) - 101.0L/201600.0L) + 31.0L/259200.0L) + 4601.0L/1036800.0L) - 139.0L/290304.0L) - 15553.0L/1088640.0L) + 479.0L/907200.0L) + 37483.0L/3326400.0L) - 1.0L/16632.0L; -poly_val[13] = x*(pow(x, 2)*(pow(x, 2)*(pow(x, 2)*(pow(x, 2)*((1.0L/39916800.0L)*pow(x, 2) - 1.0L/622080.0L) + 1.0L/28800.0L) - 311.0L/1036800.0L) + 37.0L/38880.0L) - 59.0L/79200.0L); -break; -} -} -void beta_Lagrange_n7(int deriv, double x, double *poly_val){ -switch(deriv) -{ -case 0: -poly_val[0] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-1.0L/1307674368000.0L*x + 1.0L/87178291200.0L) + 1.0L/37362124800.0L) - 1.0L/958003200.0L) + 23.0L/14370048000.0L) + 1.0L/29030400.0L) - 173.0L/1828915200.0L) - 311.0L/609638400.0L) + 137.0L/81648000.0L) + 37.0L/10886400.0L) - 2173.0L/179625600.0L) - 59.0L/6652800.0L) + 37133.0L/1135134000.0L) + 1.0L/168168.0L) - 1.0L/45045.0L); -poly_val[1] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((1.0L/87178291200.0L)*x - 1.0L/6227020800.0L) - 1.0L/1556755200.0L) + 1.0L/59875200.0L) - 1.0L/68428800.0L) - 13.0L/21772800.0L) + 211.0L/152409600.0L) + 101.0L/10886400.0L) - 95.0L/3483648.0L) - 2767.0L/43545600.0L) + 7043.0L/34214400.0L) + 20137.0L/119750400.0L) - 129053.0L/227026800.0L) - 7.0L/61776.0L) + 1.0L/2574.0L); -poly_val[2] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-1.0L/12454041600.0L*x + 1.0L/958003200.0L) + 1.0L/166053888.0L) - 23.0L/191600640.0L) + 1.0L/958003200.0L) + 139.0L/29030400.0L) - 149.0L/17418240.0L) - 1399.0L/17418240.0L) + 23.0L/113400.0L) + 6271.0L/10886400.0L) - 19787.0L/11975040.0L) - 2077.0L/1330560.0L) + 51043.0L/10810800.0L) + 7.0L/6600.0L) - 7.0L/2145.0L); -poly_val[3] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((1.0L/2874009600.0L)*x - 1.0L/239500800.0L) - 23.0L/718502400.0L) + 31.0L/59875200.0L) + 151.0L/287400960.0L) - 83.0L/3628800.0L) + 1621.0L/65318400.0L) + 2363.0L/5443200.0L) - 228653.0L/261273600.0L) - 73811.0L/21772800.0L) + 2960071.0L/359251200.0L) + 31957.0L/3326400.0L) - 500287.0L/19958400.0L) - 7.0L/1056.0L) + 7.0L/396.0L); -poly_val[4] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-1.0L/958003200.0L*x + 1.0L/87091200.0L) + 107.0L/958003200.0L) - 131.0L/87091200.0L) - 3139.0L/958003200.0L) + 6283.0L/87091200.0L) - 1429.0L/87091200.0L) - 135073.0L/87091200.0L) + 11261.0L/5443200.0L) + 157477.0L/10886400.0L) - 1642643.0L/59875200.0L) - 247081.0L/5443200.0L) + 239731.0L/2494800.0L) + 7.0L/216.0L) - 7.0L/99.0L); -poly_val[5] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((1.0L/435456000.0L)*x - 1.0L/43545600.0L) - 1.0L/3628800.0L) + 17.0L/5443200.0L) + 2371.0L/217728000.0L) - 1153.0L/7257600.0L) - 2663.0L/21772800.0L) + 40987.0L/10886400.0L) - 267829.0L/145152000.0L) - 1819681.0L/43545600.0L) + 1055099.0L/21772800.0L) + 222581.0L/1209600.0L) - 52889.0L/189000.0L) - 7.0L/48.0L) + 7.0L/30.0L); -poly_val[6] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-1.0L/261273600.0L*x + 1.0L/29030400.0L) + 131.0L/261273600.0L) - 139.0L/29030400.0L) - 6211.0L/261273600.0L) + 2441.0L/9676800.0L) + 17959.0L/37324800.0L) - 184297.0L/29030400.0L) - 1021.0L/326592.0L) + 286397.0L/3628800.0L) - 333059.0L/16329600.0L) - 90281.0L/201600.0L) + 68231.0L/226800.0L) + 7.0L/8.0L) - 7.0L/9.0L); -poly_val[7] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((1.0L/203212800.0L)*x - 1.0L/25401600.0L) - 1.0L/1451520.0L) + 1.0L/181440.0L) + 533.0L/14515200.0L) - 533.0L/1814400.0L) - 9581.0L/10160640.0L) + 9581.0L/1270080.0L) + 353639.0L/29030400.0L) - 353639.0L/3628800.0L) - 54613.0L/725760.0L) + 54613.0L/90720.0L) + 266681.0L/1411200.0L) - 266681.0L/176400.0L) - 1.0L/8.0L) + 1; -poly_val[8] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-1.0L/203212800.0L*x + 1.0L/29030400.0L) + 1.0L/1382400.0L) - 139.0L/29030400.0L) - 241.0L/5806080.0L) + 2441.0L/9676800.0L) + 242881.0L/203212800.0L) - 184297.0L/29030400.0L) - 1601.0L/86400.0L) + 286397.0L/3628800.0L) + 279731.0L/1814400.0L) - 90281.0L/201600.0L) - 112331.0L/176400.0L) + 7.0L/8.0L) + 1); -poly_val[9] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((1.0L/261273600.0L)*x - 1.0L/43545600.0L) - 19.0L/32659200.0L) + 17.0L/5443200.0L) + 4547.0L/130636800.0L) - 1153.0L/7257600.0L) - 68659.0L/65318400.0L) + 40987.0L/10886400.0L) + 4442849.0L/261273600.0L) - 1819681.0L/43545600.0L) - 9281953.0L/65318400.0L) + 222581.0L/1209600.0L) + 116803.0L/226800.0L) - 7.0L/48.0L) - 7.0L/18.0L); -poly_val[10] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-1.0L/435456000.0L*x + 1.0L/87091200.0L) + 31.0L/87091200.0L) - 131.0L/87091200.0L) - 9427.0L/435456000.0L) + 6283.0L/87091200.0L) + 57173.0L/87091200.0L) - 135073.0L/87091200.0L) - 70337.0L/6804000.0L) + 157477.0L/10886400.0L) + 427361.0L/5443200.0L) - 247081.0L/5443200.0L) - 254431.0L/1134000.0L) + 7.0L/216.0L) + 7.0L/45.0L); -poly_val[11] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((1.0L/958003200.0L)*x - 1.0L/239500800.0L) - 13.0L/79833600.0L) + 31.0L/59875200.0L) + 4723.0L/479001600.0L) - 83.0L/3628800.0L) - 6347.0L/21772800.0L) + 2363.0L/5443200.0L) + 5017.0L/1161216.0L) - 73811.0L/21772800.0L) - 3535297.0L/119750400.0L) + 31957.0L/3326400.0L) + 522337.0L/6652800.0L) - 7.0L/1056.0L) - 7.0L/132.0L); -poly_val[12] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-1.0L/2874009600.0L*x + 1.0L/958003200.0L) + 31.0L/574801920.0L) - 23.0L/191600640.0L) - 9187.0L/2874009600.0L) + 139.0L/29030400.0L) + 947.0L/10450944.0L) - 1399.0L/17418240.0L) - 20623.0L/16329600.0L) + 6271.0L/10886400.0L) + 294617.0L/35925120.0L) - 2077.0L/1330560.0L) - 52807.0L/2494800.0L) + 7.0L/6600.0L) + 7.0L/495.0L); -poly_val[13] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((1.0L/12454041600.0L)*x - 1.0L/6227020800.0L) - 19.0L/1556755200.0L) + 1.0L/59875200.0L) + 67.0L/95800320.0L) - 13.0L/21772800.0L) - 59.0L/3110400.0L) + 101.0L/10886400.0L) + 22159.0L/87091200.0L) - 2767.0L/43545600.0L) - 385381.0L/239500800.0L) + 20137.0L/119750400.0L) + 16591.0L/4054050.0L) - 7.0L/61776.0L) - 7.0L/2574.0L); -poly_val[14] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-1.0L/87178291200.0L*x + 1.0L/87178291200.0L) + 1.0L/593049600.0L) - 1.0L/958003200.0L) - 89.0L/958003200.0L) + 1.0L/29030400.0L) + 1487.0L/609638400.0L) - 311.0L/609638400.0L) - 29.0L/907200.0L) + 37.0L/10886400.0L) + 11927.0L/59875200.0L) - 59.0L/6652800.0L) - 38033.0L/75675600.0L) + 1.0L/168168.0L) + 1.0L/3003.0L); -poly_val[15] = x*(pow(x, 2)*(pow(x, 2)*(pow(x, 2)*(pow(x, 2)*(pow(x, 2)*(pow(x, 2)*((1.0L/1307674368000.0L)*pow(x, 2) - 1.0L/9340531200.0L) + 41.0L/7185024000.0L) - 67.0L/457228800.0L) + 2473.0L/1306368000.0L) - 4201.0L/359251200.0L) + 266681.0L/9081072000.0L) - 1.0L/51480.0L); -break; -case 1: -poly_val[0] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-1.0L/87178291200.0L*x + 1.0L/6227020800.0L) + 1.0L/2874009600.0L) - 1.0L/79833600.0L) + 23.0L/1306368000.0L) + 1.0L/2903040.0L) - 173.0L/203212800.0L) - 311.0L/76204800.0L) + 137.0L/11664000.0L) + 37.0L/1814400.0L) - 2173.0L/35925120.0L) - 59.0L/1663200.0L) + 37133.0L/378378000.0L) + 1.0L/84084.0L) - 1.0L/45045.0L; -poly_val[1] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((1.0L/5811886080.0L)*x - 1.0L/444787200.0L) - 1.0L/119750400.0L) + 1.0L/4989600.0L) - 1.0L/6220800.0L) - 13.0L/2177280.0L) + 211.0L/16934400.0L) + 101.0L/1360800.0L) - 95.0L/497664.0L) - 2767.0L/7257600.0L) + 7043.0L/6842880.0L) + 20137.0L/29937600.0L) - 129053.0L/75675600.0L) - 7.0L/30888.0L) + 1.0L/2574.0L; -poly_val[2] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-1.0L/830269440.0L*x + 1.0L/68428800.0L) + 1.0L/12773376.0L) - 23.0L/15966720.0L) + 1.0L/87091200.0L) + 139.0L/2903040.0L) - 149.0L/1935360.0L) - 1399.0L/2177280.0L) + 23.0L/16200.0L) + 6271.0L/1814400.0L) - 19787.0L/2395008.0L) - 2077.0L/332640.0L) + 51043.0L/3603600.0L) + 7.0L/3300.0L) - 7.0L/2145.0L; -poly_val[3] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((1.0L/191600640.0L)*x - 1.0L/17107200.0L) - 299.0L/718502400.0L) + 31.0L/4989600.0L) + 151.0L/26127360.0L) - 83.0L/362880.0L) + 1621.0L/7257600.0L) + 2363.0L/680400.0L) - 228653.0L/37324800.0L) - 73811.0L/3628800.0L) + 2960071.0L/71850240.0L) + 31957.0L/831600.0L) - 500287.0L/6652800.0L) - 7.0L/528.0L) + 7.0L/396.0L; -poly_val[4] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-1.0L/63866880.0L*x + 1.0L/6220800.0L) + 1391.0L/958003200.0L) - 131.0L/7257600.0L) - 3139.0L/87091200.0L) + 6283.0L/8709120.0L) - 1429.0L/9676800.0L) - 135073.0L/10886400.0L) + 11261.0L/777600.0L) + 157477.0L/1814400.0L) - 1642643.0L/11975040.0L) - 247081.0L/1360800.0L) + 239731.0L/831600.0L) + 7.0L/108.0L) - 7.0L/99.0L; -poly_val[5] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((1.0L/29030400.0L)*x - 1.0L/3110400.0L) - 13.0L/3628800.0L) + 17.0L/453600.0L) + 26081.0L/217728000.0L) - 1153.0L/725760.0L) - 2663.0L/2419200.0L) + 40987.0L/1360800.0L) - 267829.0L/20736000.0L) - 1819681.0L/7257600.0L) + 1055099.0L/4354560.0L) + 222581.0L/302400.0L) - 52889.0L/63000.0L) - 7.0L/24.0L) + 7.0L/30.0L; -poly_val[6] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-1.0L/17418240.0L*x + 1.0L/2073600.0L) + 1703.0L/261273600.0L) - 139.0L/2419200.0L) - 68321.0L/261273600.0L) + 2441.0L/967680.0L) + 17959.0L/4147200.0L) - 184297.0L/3628800.0L) - 1021.0L/46656.0L) + 286397.0L/604800.0L) - 333059.0L/3265920.0L) - 90281.0L/50400.0L) + 68231.0L/75600.0L) + 7.0L/4.0L) - 7.0L/9.0L; -poly_val[7] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((1.0L/13547520.0L)*x - 1.0L/1814400.0L) - 13.0L/1451520.0L) + 1.0L/15120.0L) + 5863.0L/14515200.0L) - 533.0L/181440.0L) - 9581.0L/1128960.0L) + 9581.0L/158760.0L) + 353639.0L/4147200.0L) - 353639.0L/604800.0L) - 54613.0L/145152.0L) + 54613.0L/22680.0L) + 266681.0L/470400.0L) - 266681.0L/88200.0L) - 1.0L/8.0L; -poly_val[8] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-1.0L/13547520.0L*x + 1.0L/2073600.0L) + 13.0L/1382400.0L) - 139.0L/2419200.0L) - 2651.0L/5806080.0L) + 2441.0L/967680.0L) + 242881.0L/22579200.0L) - 184297.0L/3628800.0L) - 11207.0L/86400.0L) + 286397.0L/604800.0L) + 279731.0L/362880.0L) - 90281.0L/50400.0L) - 112331.0L/58800.0L) + 7.0L/4.0L) + 1; -poly_val[9] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((1.0L/17418240.0L)*x - 1.0L/3110400.0L) - 247.0L/32659200.0L) + 17.0L/453600.0L) + 50017.0L/130636800.0L) - 1153.0L/725760.0L) - 68659.0L/7257600.0L) + 40987.0L/1360800.0L) + 4442849.0L/37324800.0L) - 1819681.0L/7257600.0L) - 9281953.0L/13063680.0L) + 222581.0L/302400.0L) + 116803.0L/75600.0L) - 7.0L/24.0L) - 7.0L/18.0L; -poly_val[10] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-1.0L/29030400.0L*x + 1.0L/6220800.0L) + 403.0L/87091200.0L) - 131.0L/7257600.0L) - 103697.0L/435456000.0L) + 6283.0L/8709120.0L) + 57173.0L/9676800.0L) - 135073.0L/10886400.0L) - 70337.0L/972000.0L) + 157477.0L/1814400.0L) + 427361.0L/1088640.0L) - 247081.0L/1360800.0L) - 254431.0L/378000.0L) + 7.0L/108.0L) + 7.0L/45.0L; -poly_val[11] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((1.0L/63866880.0L)*x - 1.0L/17107200.0L) - 169.0L/79833600.0L) + 31.0L/4989600.0L) + 4723.0L/43545600.0L) - 83.0L/362880.0L) - 6347.0L/2419200.0L) + 2363.0L/680400.0L) + 5017.0L/165888.0L) - 73811.0L/3628800.0L) - 3535297.0L/23950080.0L) + 31957.0L/831600.0L) + 522337.0L/2217600.0L) - 7.0L/528.0L) - 7.0L/132.0L; -poly_val[12] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-1.0L/191600640.0L*x + 1.0L/68428800.0L) + 403.0L/574801920.0L) - 23.0L/15966720.0L) - 9187.0L/261273600.0L) + 139.0L/2903040.0L) + 947.0L/1161216.0L) - 1399.0L/2177280.0L) - 20623.0L/2332800.0L) + 6271.0L/1814400.0L) + 294617.0L/7185024.0L) - 2077.0L/332640.0L) - 52807.0L/831600.0L) + 7.0L/3300.0L) + 7.0L/495.0L; -poly_val[13] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((1.0L/830269440.0L)*x - 1.0L/444787200.0L) - 19.0L/119750400.0L) + 1.0L/4989600.0L) + 67.0L/8709120.0L) - 13.0L/2177280.0L) - 59.0L/345600.0L) + 101.0L/1360800.0L) + 22159.0L/12441600.0L) - 2767.0L/7257600.0L) - 385381.0L/47900160.0L) + 20137.0L/29937600.0L) + 16591.0L/1351350.0L) - 7.0L/30888.0L) - 7.0L/2574.0L; -poly_val[14] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-1.0L/5811886080.0L*x + 1.0L/6227020800.0L) + 1.0L/45619200.0L) - 1.0L/79833600.0L) - 89.0L/87091200.0L) + 1.0L/2903040.0L) + 1487.0L/67737600.0L) - 311.0L/76204800.0L) - 29.0L/129600.0L) + 37.0L/1814400.0L) + 11927.0L/11975040.0L) - 59.0L/1663200.0L) - 38033.0L/25225200.0L) + 1.0L/84084.0L) + 1.0L/3003.0L; -poly_val[15] = pow(x, 2)*(pow(x, 2)*(pow(x, 2)*(pow(x, 2)*(pow(x, 2)*(pow(x, 2)*((1.0L/87178291200.0L)*pow(x, 2) - 1.0L/718502400.0L) + 41.0L/653184000.0L) - 67.0L/50803200.0L) + 2473.0L/186624000.0L) - 4201.0L/71850240.0L) + 266681.0L/3027024000.0L) - 1.0L/51480.0L; -break; -case 2: -poly_val[0] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-1.0L/6227020800.0L*x + 1.0L/479001600.0L) + 1.0L/239500800.0L) - 1.0L/7257600.0L) + 23.0L/130636800.0L) + 1.0L/322560.0L) - 173.0L/25401600.0L) - 311.0L/10886400.0L) + 137.0L/1944000.0L) + 37.0L/362880.0L) - 2173.0L/8981280.0L) - 59.0L/554400.0L) + 37133.0L/189189000.0L) + 1.0L/84084.0L; -poly_val[1] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((1.0L/415134720.0L)*x - 1.0L/34214400.0L) - 1.0L/9979200.0L) + 1.0L/453600.0L) - 1.0L/622080.0L) - 13.0L/241920.0L) + 211.0L/2116800.0L) + 101.0L/194400.0L) - 95.0L/82944.0L) - 2767.0L/1451520.0L) + 7043.0L/1710720.0L) + 20137.0L/9979200.0L) - 129053.0L/37837800.0L) - 7.0L/30888.0L; -poly_val[2] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-1.0L/59304960.0L*x + 13.0L/68428800.0L) + 1.0L/1064448.0L) - 23.0L/1451520.0L) + 1.0L/8709120.0L) + 139.0L/322560.0L) - 149.0L/241920.0L) - 1399.0L/311040.0L) + 23.0L/2700.0L) + 6271.0L/362880.0L) - 19787.0L/598752.0L) - 2077.0L/110880.0L) + 51043.0L/1801800.0L) + 7.0L/3300.0L; -poly_val[3] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((1.0L/13685760.0L)*x - 13.0L/17107200.0L) - 299.0L/59875200.0L) + 31.0L/453600.0L) + 151.0L/2612736.0L) - 83.0L/40320.0L) + 1621.0L/907200.0L) + 2363.0L/97200.0L) - 228653.0L/6220800.0L) - 73811.0L/725760.0L) + 2960071.0L/17962560.0L) + 31957.0L/277200.0L) - 500287.0L/3326400.0L) - 7.0L/528.0L; -poly_val[4] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-1.0L/4561920.0L*x + 13.0L/6220800.0L) + 1391.0L/79833600.0L) - 1441.0L/7257600.0L) - 3139.0L/8709120.0L) + 6283.0L/967680.0L) - 1429.0L/1209600.0L) - 135073.0L/1555200.0L) + 11261.0L/129600.0L) + 157477.0L/362880.0L) - 1642643.0L/2993760.0L) - 247081.0L/453600.0L) + 239731.0L/415800.0L) + 7.0L/108.0L; -poly_val[5] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((1.0L/2073600.0L)*x - 13.0L/3110400.0L) - 13.0L/302400.0L) + 187.0L/453600.0L) + 26081.0L/21772800.0L) - 1153.0L/80640.0L) - 2663.0L/302400.0L) + 40987.0L/194400.0L) - 267829.0L/3456000.0L) - 1819681.0L/1451520.0L) + 1055099.0L/1088640.0L) + 222581.0L/100800.0L) - 52889.0L/31500.0L) - 7.0L/24.0L; -poly_val[6] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-1.0L/1244160.0L*x + 13.0L/2073600.0L) + 1703.0L/21772800.0L) - 1529.0L/2419200.0L) - 68321.0L/26127360.0L) + 2441.0L/107520.0L) + 17959.0L/518400.0L) - 184297.0L/518400.0L) - 1021.0L/7776.0L) + 286397.0L/120960.0L) - 333059.0L/816480.0L) - 90281.0L/16800.0L) + 68231.0L/37800.0L) + 7.0L/4.0L; -poly_val[7] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((1.0L/967680.0L)*x - 13.0L/1814400.0L) - 13.0L/120960.0L) + 11.0L/15120.0L) + 5863.0L/1451520.0L) - 533.0L/20160.0L) - 9581.0L/141120.0L) + 9581.0L/22680.0L) + 353639.0L/691200.0L) - 353639.0L/120960.0L) - 54613.0L/36288.0L) + 54613.0L/7560.0L) + 266681.0L/235200.0L) - 266681.0L/88200.0L; -poly_val[8] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-1.0L/967680.0L*x + 13.0L/2073600.0L) + 13.0L/115200.0L) - 1529.0L/2419200.0L) - 2651.0L/580608.0L) + 2441.0L/107520.0L) + 242881.0L/2822400.0L) - 184297.0L/518400.0L) - 11207.0L/14400.0L) + 286397.0L/120960.0L) + 279731.0L/90720.0L) - 90281.0L/16800.0L) - 112331.0L/29400.0L) + 7.0L/4.0L; -poly_val[9] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((1.0L/1244160.0L)*x - 13.0L/3110400.0L) - 247.0L/2721600.0L) + 187.0L/453600.0L) + 50017.0L/13063680.0L) - 1153.0L/80640.0L) - 68659.0L/907200.0L) + 40987.0L/194400.0L) + 4442849.0L/6220800.0L) - 1819681.0L/1451520.0L) - 9281953.0L/3265920.0L) + 222581.0L/100800.0L) + 116803.0L/37800.0L) - 7.0L/24.0L; -poly_val[10] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-1.0L/2073600.0L*x + 13.0L/6220800.0L) + 403.0L/7257600.0L) - 1441.0L/7257600.0L) - 103697.0L/43545600.0L) + 6283.0L/967680.0L) + 57173.0L/1209600.0L) - 135073.0L/1555200.0L) - 70337.0L/162000.0L) + 157477.0L/362880.0L) + 427361.0L/272160.0L) - 247081.0L/453600.0L) - 254431.0L/189000.0L) + 7.0L/108.0L; -poly_val[11] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((1.0L/4561920.0L)*x - 13.0L/17107200.0L) - 169.0L/6652800.0L) + 31.0L/453600.0L) + 4723.0L/4354560.0L) - 83.0L/40320.0L) - 6347.0L/302400.0L) + 2363.0L/97200.0L) + 5017.0L/27648.0L) - 73811.0L/725760.0L) - 3535297.0L/5987520.0L) + 31957.0L/277200.0L) + 522337.0L/1108800.0L) - 7.0L/528.0L; -poly_val[12] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-1.0L/13685760.0L*x + 13.0L/68428800.0L) + 403.0L/47900160.0L) - 23.0L/1451520.0L) - 9187.0L/26127360.0L) + 139.0L/322560.0L) + 947.0L/145152.0L) - 1399.0L/311040.0L) - 20623.0L/388800.0L) + 6271.0L/362880.0L) + 294617.0L/1796256.0L) - 2077.0L/110880.0L) - 52807.0L/415800.0L) + 7.0L/3300.0L; -poly_val[13] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((1.0L/59304960.0L)*x - 1.0L/34214400.0L) - 19.0L/9979200.0L) + 1.0L/453600.0L) + 67.0L/870912.0L) - 13.0L/241920.0L) - 59.0L/43200.0L) + 101.0L/194400.0L) + 22159.0L/2073600.0L) - 2767.0L/1451520.0L) - 385381.0L/11975040.0L) + 20137.0L/9979200.0L) + 16591.0L/675675.0L) - 7.0L/30888.0L; -poly_val[14] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-1.0L/415134720.0L*x + 1.0L/479001600.0L) + 1.0L/3801600.0L) - 1.0L/7257600.0L) - 89.0L/8709120.0L) + 1.0L/322560.0L) + 1487.0L/8467200.0L) - 311.0L/10886400.0L) - 29.0L/21600.0L) + 37.0L/362880.0L) + 11927.0L/2993760.0L) - 59.0L/554400.0L) - 38033.0L/12612600.0L) + 1.0L/84084.0L; -poly_val[15] = x*(pow(x, 2)*(pow(x, 2)*(pow(x, 2)*(pow(x, 2)*(pow(x, 2)*((1.0L/6227020800.0L)*pow(x, 2) - 1.0L/59875200.0L) + 41.0L/65318400.0L) - 67.0L/6350400.0L) + 2473.0L/31104000.0L) - 4201.0L/17962560.0L) + 266681.0L/1513512000.0L); -break; -} -} -void beta_Lagrange_n8(int deriv, double x, double *poly_val){ -switch(deriv) -{ -case 0: -poly_val[0] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-1.0L/355687428096000.0L*x + 1.0L/20922789888000.0L) + 1.0L/5230697472000.0L) - 1.0L/149448499200.0L) + 1.0L/135862272000.0L) + 41.0L/114960384000.0L) - 391.0L/402361344000.0L) - 67.0L/7315660800.0L) + 4657.0L/146313216000.0L) + 2473.0L/20901888000.0L) - 6583.0L/14370048000.0L) - 4201.0L/5748019200.0L) + 144689.0L/48432384000.0L) + 266681.0L/145297152000.0L) - 157.0L/20384000.0L) - 1.0L/823680.0L) + 1.0L/194480.0L); -poly_val[1] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((1.0L/20922789888000.0L)*x - 1.0L/1307674368000.0L) - 23.0L/5230697472000.0L) + 31.0L/261534873600.0L) - 67.0L/1494484992000.0L) - 97.0L/14370048000.0L) + 6143.0L/402361344000.0L) + 331.0L/1828915200.0L) - 82289.0L/146313216000.0L) - 2747.0L/1143072000.0L) + 244843.0L/28740096000.0L) + 2711.0L/179625600.0L) - 8287319.0L/145297152000.0L) - 21701.0L/567567000.0L) + 150307.0L/1009008000.0L) + 8.0L/315315.0L) - 1.0L/10010.0L); -poly_val[2] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-1.0L/2615348736000.0L*x + 1.0L/174356582400.0L) + 19.0L/435891456000.0L) - 1.0L/1037836800.0L) - 31.0L/62270208000.0L) + 19.0L/319334400.0L) - 2543.0L/25147584000.0L) - 517.0L/304819200.0L) + 27683.0L/6096384000.0L) + 1363.0L/58060800.0L) - 356513.0L/4790016000.0L) - 24149.0L/159667200.0L) + 169537087.0L/326918592000.0L) + 1058149.0L/2724321600.0L) - 3135247.0L/2270268000.0L) - 1.0L/3861.0L) + 2.0L/2145.0L); -poly_val[3] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((1.0L/523069747200.0L)*x - 1.0L/37362124800.0L) - 67.0L/261534873600.0L) + 179.0L/37362124800.0L) + 139.0L/18681062400.0L) - 919.0L/2874009600.0L) + 6427.0L/20118067200.0L) + 2591.0L/261273600.0L) - 78299.0L/3657830400.0L) - 4787.0L/32659200.0L) + 287941.0L/718502400.0L) + 176989.0L/179625600.0L) - 401207.0L/134534400.0L) - 41981.0L/16216200.0L) + 68923.0L/8408400.0L) + 56.0L/32175.0L) - 4.0L/715.0L); -poly_val[4] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-1.0L/149448499200.0L*x + 1.0L/11496038400.0L) + 19.0L/18681062400.0L) - 47.0L/2874009600.0L) - 3323.0L/74724249600.0L) + 6707.0L/5748019200.0L) - 109.0L/574801920.0L) - 10331.0L/261273600.0L) + 2489.0L/41803776.0L) + 678739.0L/1045094400.0L) - 4125323.0L/2874009600.0L) - 6782981.0L/1437004800.0L) + 12532313.0L/1037836800.0L) + 1033649.0L/79833600.0L) - 1014049.0L/28828800.0L) - 7.0L/792.0L) + 7.0L/286.0L); -poly_val[5] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((1.0L/57480192000.0L)*x - 1.0L/4790016000.0L) - 1.0L/342144000.0L) + 13.0L/319334400.0L) + 1567.0L/9580032000.0L) - 4889.0L/1596672000.0L) - 2521.0L/1026432000.0L) + 9767.0L/87091200.0L) - 139379.0L/1741824000.0L) - 37517.0L/18144000.0L) + 7618319.0L/2395008000.0L) + 352423.0L/19958400.0L) - 126741731.0L/3592512000.0L) - 999349.0L/18711000.0L) + 2919647.0L/24948000.0L) + 56.0L/1485.0L) - 14.0L/165.0L); -poly_val[6] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-1.0L/28740096000.0L*x + 1.0L/2612736000.0L) + 13.0L/2052864000.0L) - 1.0L/13063680.0L) - 6011.0L/14370048000.0L) + 7811.0L/1306368000.0L) + 2543.0L/224532000.0L) - 6067.0L/26127360.0L) - 126523.0L/2612736000.0L) + 12312353.0L/2612736000.0L) - 48574927.0L/14370048000.0L) - 248945.0L/5225472.0L) + 7969111.0L/133056000.0L) + 901349.0L/4536000.0L) - 287383.0L/924000.0L) - 7.0L/45.0L) + 14.0L/55.0L); -poly_val[7] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((1.0L/18289152000.0L)*x - 1.0L/1828915200.0L) - 97.0L/9144576000.0L) + 29.0L/261273600.0L) + 257.0L/326592000.0L) - 331.0L/37324800.0L) - 253499.0L/9144576000.0L) + 652969.0L/1828915200.0L) + 8209463.0L/18289152000.0L) - 251539.0L/32659200.0L) - 586787.0L/326592000.0L) + 203617.0L/2332800.0L) - 4022849.0L/127008000.0L) - 372149.0L/793800.0L) + 293749.0L/882000.0L) + 8.0L/9.0L) - 4.0L/5.0L); -poly_val[8] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-1.0L/14631321600.0L*x + 1.0L/1625702400.0L) + 17.0L/1219276800.0L) - 17.0L/135475200.0L) - 391.0L/348364800.0L) + 391.0L/38707200.0L) + 167297.0L/3657830400.0L) - 167297.0L/406425600.0L) - 4913051.0L/4877107200.0L) + 4913051.0L/541900800.0L) + 1034059.0L/87091200.0L) - 1034059.0L/9676800.0L) - 63566689.0L/914457600.0L) + 63566689.0L/101606400.0L) + 1077749.0L/6350400.0L) - 1077749.0L/705600.0L) - 1.0L/9.0L) + 1; -poly_val[9] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((1.0L/14631321600.0L)*x - 1.0L/1828915200.0L) - 53.0L/3657830400.0L) + 29.0L/261273600.0L) + 1289.0L/1045094400.0L) - 331.0L/37324800.0L) - 39947.0L/731566080.0L) + 652969.0L/1828915200.0L) + 3992581.0L/2926264320.0L) - 251539.0L/32659200.0L) - 5114489.0L/261273600.0L) + 203617.0L/2332800.0L) + 5310539.0L/33868800.0L) - 372149.0L/793800.0L) - 50061.0L/78400.0L) + 8.0L/9.0L) + 1); -poly_val[10] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-1.0L/18289152000.0L*x + 1.0L/2612736000.0L) + 109.0L/9144576000.0L) - 1.0L/13063680.0L) - 1373.0L/1306368000.0L) + 7811.0L/1306368000.0L) + 110987.0L/2286144000.0L) - 6067.0L/26127360.0L) - 23232953.0L/18289152000.0L) + 12312353.0L/2612736000.0L) + 24721061.0L/1306368000.0L) - 248945.0L/5225472.0L) - 38328917.0L/254016000.0L) + 901349.0L/4536000.0L) + 940549.0L/1764000.0L) - 7.0L/45.0L) - 2.0L/5.0L); -poly_val[11] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((1.0L/28740096000.0L)*x - 1.0L/4790016000.0L) - 37.0L/4790016000.0L) + 13.0L/319334400.0L) + 151.0L/217728000.0L) - 4889.0L/1596672000.0L) - 466597.0L/14370048000.0L) + 9767.0L/87091200.0L) + 739651.0L/870912000.0L) - 37517.0L/18144000.0L) - 14666779.0L/1197504000.0L) + 352423.0L/19958400.0L) + 158720899.0L/1796256000.0L) - 999349.0L/18711000.0L) - 279677.0L/1134000.0L) + 56.0L/1485.0L) + 28.0L/165.0L); -poly_val[12] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-1.0L/57480192000.0L*x + 1.0L/11496038400.0L) + 1.0L/256608000.0L) - 47.0L/2874009600.0L) - 10091.0L/28740096000.0L) + 6707.0L/5748019200.0L) + 33481.0L/2052864000.0L) - 10331.0L/261273600.0L) - 2166403.0L/5225472000.0L) + 678739.0L/1045094400.0L) + 80761123.0L/14370048000.0L) - 6782981.0L/1437004800.0L) - 1622179.0L/44352000.0L) + 1033649.0L/79833600.0L) + 351083.0L/3696000.0L) - 7.0L/792.0L) - 7.0L/110.0L); -poly_val[13] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((1.0L/149448499200.0L)*x - 1.0L/37362124800.0L) - 1.0L/667180800.0L) + 179.0L/37362124800.0L) + 10001.0L/74724249600.0L) - 919.0L/2874009600.0L) - 2183.0L/359251200.0L) + 2591.0L/261273600.0L) + 154891.0L/1045094400.0L) - 4787.0L/32659200.0L) - 2723543.0L/1437004800.0L) + 176989.0L/179625600.0L) + 12175981.0L/1037836800.0L) - 41981.0L/16216200.0L) - 213041.0L/7207200.0L) + 56.0L/32175.0L) + 14.0L/715.0L); -poly_val[14] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-1.0L/523069747200.0L*x + 1.0L/174356582400.0L) + 37.0L/87178291200.0L) - 1.0L/1037836800.0L) - 463.0L/12454041600.0L) + 19.0L/319334400.0L) + 823.0L/502951680.0L) - 517.0L/304819200.0L) - 9353.0L/243855360.0L) + 1363.0L/58060800.0L) + 453109.0L/958003200.0L) - 24149.0L/159667200.0L) - 186467471.0L/65383718400.0L) + 1058149.0L/2724321600.0L) + 3213647.0L/454053600.0L) - 1.0L/3861.0L) - 2.0L/429.0L); -poly_val[15] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((1.0L/2615348736000.0L)*x - 1.0L/1307674368000.0L) - 109.0L/1307674368000.0L) + 31.0L/261534873600.0L) + 83.0L/11675664000.0L) - 97.0L/14370048000.0L) - 30491.0L/100590336000.0L) + 331.0L/1828915200.0L) + 126241.0L/18289152000.0L) - 2747.0L/1143072000.0L) - 299063.0L/3592512000.0L) + 2711.0L/179625600.0L) + 2993917.0L/6054048000.0L) - 21701.0L/567567000.0L) - 51169.0L/42042000.0L) + 8.0L/315315.0L) + 4.0L/5005.0L); -poly_val[16] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-1.0L/20922789888000.0L*x + 1.0L/20922789888000.0L) + 53.0L/5230697472000.0L) - 1.0L/149448499200.0L) - 179.0L/213497856000.0L) + 41.0L/114960384000.0L) + 14017.0L/402361344000.0L) - 67.0L/7315660800.0L) - 113791.0L/146313216000.0L) + 2473.0L/20901888000.0L) + 4747.0L/513216000.0L) - 4201.0L/5748019200.0L) - 7912501.0L/145297152000.0L) + 266681.0L/145297152000.0L) + 269131.0L/2018016000.0L) - 1.0L/823680.0L) - 1.0L/11440.0L); -poly_val[17] = x*(pow(x, 2)*(pow(x, 2)*(pow(x, 2)*(pow(x, 2)*(pow(x, 2)*(pow(x, 2)*(pow(x, 2)*((1.0L/355687428096000.0L)*pow(x, 2) - 1.0L/1743565824000.0L) + 23.0L/498161664000.0L) - 757.0L/402361344000.0L) + 2021.0L/48771072000.0L) - 4679.0L/9580032000.0L) + 3739217.0L/1307674368000.0L) - 63397.0L/9081072000.0L) + 1.0L/218790.0L); -break; -case 1: -poly_val[0] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-1.0L/20922789888000.0L*x + 1.0L/1307674368000.0L) + 1.0L/348713164800.0L) - 1.0L/10674892800.0L) + 1.0L/10450944000.0L) + 41.0L/9580032000.0L) - 391.0L/36578304000.0L) - 67.0L/731566080.0L) + 4657.0L/16257024000.0L) + 2473.0L/2612736000.0L) - 6583.0L/2052864000.0L) - 4201.0L/958003200.0L) + 144689.0L/9686476800.0L) + 266681.0L/36324288000.0L) - 471.0L/20384000.0L) - 1.0L/411840.0L) + 1.0L/194480.0L; -poly_val[1] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((17.0L/20922789888000.0L)*x - 1.0L/81729648000.0L) - 23.0L/348713164800.0L) + 31.0L/18681062400.0L) - 67.0L/114960384000.0L) - 97.0L/1197504000.0L) + 6143.0L/36578304000.0L) + 331.0L/182891520.0L) - 82289.0L/16257024000.0L) - 2747.0L/142884000.0L) + 244843.0L/4105728000.0L) + 2711.0L/29937600.0L) - 8287319.0L/29059430400.0L) - 21701.0L/141891750.0L) + 150307.0L/336336000.0L) + 16.0L/315315.0L) - 1.0L/10010.0L; -poly_val[2] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-17.0L/2615348736000.0L*x + 1.0L/10897286400.0L) + 19.0L/29059430400.0L) - 1.0L/74131200.0L) - 31.0L/4790016000.0L) + 19.0L/26611200.0L) - 2543.0L/2286144000.0L) - 517.0L/30481920.0L) + 27683.0L/677376000.0L) + 1363.0L/7257600.0L) - 356513.0L/684288000.0L) - 24149.0L/26611200.0L) + 169537087.0L/65383718400.0L) + 1058149.0L/681080400.0L) - 3135247.0L/756756000.0L) - 2.0L/3861.0L) + 2.0L/2145.0L; -poly_val[3] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((17.0L/523069747200.0L)*x - 1.0L/2335132800.0L) - 67.0L/17435658240.0L) + 179.0L/2668723200.0L) + 139.0L/1437004800.0L) - 919.0L/239500800.0L) + 6427.0L/1828915200.0L) + 2591.0L/26127360.0L) - 78299.0L/406425600.0L) - 4787.0L/4082400.0L) + 287941.0L/102643200.0L) + 176989.0L/29937600.0L) - 401207.0L/26906880.0L) - 41981.0L/4054050.0L) + 68923.0L/2802800.0L) + 112.0L/32175.0L) - 4.0L/715.0L; -poly_val[4] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-17.0L/149448499200.0L*x + 1.0L/718502400.0L) + 19.0L/1245404160.0L) - 47.0L/205286400.0L) - 3323.0L/5748019200.0L) + 6707.0L/479001600.0L) - 109.0L/52254720.0L) - 10331.0L/26127360.0L) + 2489.0L/4644864.0L) + 678739.0L/130636800.0L) - 4125323.0L/410572800.0L) - 6782981.0L/239500800.0L) + 12532313.0L/207567360.0L) + 1033649.0L/19958400.0L) - 1014049.0L/9609600.0L) - 7.0L/396.0L) + 7.0L/286.0L; -poly_val[5] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((17.0L/57480192000.0L)*x - 1.0L/299376000.0L) - 1.0L/22809600.0L) + 13.0L/22809600.0L) + 20371.0L/9580032000.0L) - 4889.0L/133056000.0L) - 2521.0L/93312000.0L) + 9767.0L/8709120.0L) - 139379.0L/193536000.0L) - 37517.0L/2268000.0L) + 7618319.0L/342144000.0L) + 352423.0L/3326400.0L) - 126741731.0L/718502400.0L) - 999349.0L/4677750.0L) + 2919647.0L/8316000.0L) + 112.0L/1485.0L) - 14.0L/165.0L; -poly_val[6] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-17.0L/28740096000.0L*x + 1.0L/163296000.0L) + 13.0L/136857600.0L) - 1.0L/933120.0L) - 78143.0L/14370048000.0L) + 7811.0L/108864000.0L) + 2543.0L/20412000.0L) - 6067.0L/2612736.0L) - 126523.0L/290304000.0L) + 12312353.0L/326592000.0L) - 48574927.0L/2052864000.0L) - 248945.0L/870912.0L) + 7969111.0L/26611200.0L) + 901349.0L/1134000.0L) - 287383.0L/308000.0L) - 14.0L/45.0L) + 14.0L/55.0L; -poly_val[7] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((17.0L/18289152000.0L)*x - 1.0L/114307200.0L) - 97.0L/609638400.0L) + 29.0L/18662400.0L) + 3341.0L/326592000.0L) - 331.0L/3110400.0L) - 2788489.0L/9144576000.0L) + 652969.0L/182891520.0L) + 8209463.0L/2032128000.0L) - 251539.0L/4082400.0L) - 586787.0L/46656000.0L) + 203617.0L/388800.0L) - 4022849.0L/25401600.0L) - 372149.0L/198450.0L) + 293749.0L/294000.0L) + 16.0L/9.0L) - 4.0L/5.0L; -poly_val[8] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-17.0L/14631321600.0L*x + 1.0L/101606400.0L) + 17.0L/81285120.0L) - 17.0L/9676800.0L) - 5083.0L/348364800.0L) + 391.0L/3225600.0L) + 1840267.0L/3657830400.0L) - 167297.0L/40642560.0L) - 4913051.0L/541900800.0L) + 4913051.0L/67737600.0L) + 1034059.0L/12441600.0L) - 1034059.0L/1612800.0L) - 63566689.0L/182891520.0L) + 63566689.0L/25401600.0L) + 1077749.0L/2116800.0L) - 1077749.0L/352800.0L) - 1.0L/9.0L; -poly_val[9] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((17.0L/14631321600.0L)*x - 1.0L/114307200.0L) - 53.0L/243855360.0L) + 29.0L/18662400.0L) + 16757.0L/1045094400.0L) - 331.0L/3110400.0L) - 439417.0L/731566080.0L) + 652969.0L/182891520.0L) + 3992581.0L/325140480.0L) - 251539.0L/4082400.0L) - 5114489.0L/37324800.0L) + 203617.0L/388800.0L) + 5310539.0L/6773760.0L) - 372149.0L/198450.0L) - 150183.0L/78400.0L) + 16.0L/9.0L) + 1; -poly_val[10] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-17.0L/18289152000.0L*x + 1.0L/163296000.0L) + 109.0L/609638400.0L) - 1.0L/933120.0L) - 17849.0L/1306368000.0L) + 7811.0L/108864000.0L) + 1220857.0L/2286144000.0L) - 6067.0L/2612736.0L) - 23232953.0L/2032128000.0L) + 12312353.0L/326592000.0L) + 24721061.0L/186624000.0L) - 248945.0L/870912.0L) - 38328917.0L/50803200.0L) + 901349.0L/1134000.0L) + 940549.0L/588000.0L) - 14.0L/45.0L) - 2.0L/5.0L; -poly_val[11] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((17.0L/28740096000.0L)*x - 1.0L/299376000.0L) - 37.0L/319334400.0L) + 13.0L/22809600.0L) + 1963.0L/217728000.0L) - 4889.0L/133056000.0L) - 466597.0L/1306368000.0L) + 9767.0L/8709120.0L) + 739651.0L/96768000.0L) - 37517.0L/2268000.0L) - 14666779.0L/171072000.0L) + 352423.0L/3326400.0L) + 158720899.0L/359251200.0L) - 999349.0L/4677750.0L) - 279677.0L/378000.0L) + 112.0L/1485.0L) + 28.0L/165.0L; -poly_val[12] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-17.0L/57480192000.0L*x + 1.0L/718502400.0L) + 1.0L/17107200.0L) - 47.0L/205286400.0L) - 131183.0L/28740096000.0L) + 6707.0L/479001600.0L) + 33481.0L/186624000.0L) - 10331.0L/26127360.0L) - 2166403.0L/580608000.0L) + 678739.0L/130636800.0L) + 80761123.0L/2052864000.0L) - 6782981.0L/239500800.0L) - 1622179.0L/8870400.0L) + 1033649.0L/19958400.0L) + 351083.0L/1232000.0L) - 7.0L/396.0L) - 7.0L/110.0L; -poly_val[13] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((17.0L/149448499200.0L)*x - 1.0L/2335132800.0L) - 1.0L/44478720.0L) + 179.0L/2668723200.0L) + 10001.0L/5748019200.0L) - 919.0L/239500800.0L) - 2183.0L/32659200.0L) + 2591.0L/26127360.0L) + 154891.0L/116121600.0L) - 4787.0L/4082400.0L) - 2723543.0L/205286400.0L) + 176989.0L/29937600.0L) + 12175981.0L/207567360.0L) - 41981.0L/4054050.0L) - 213041.0L/2402400.0L) + 112.0L/32175.0L) + 14.0L/715.0L; -poly_val[14] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-17.0L/523069747200.0L*x + 1.0L/10897286400.0L) + 37.0L/5811886080.0L) - 1.0L/74131200.0L) - 463.0L/958003200.0L) + 19.0L/26611200.0L) + 823.0L/45722880.0L) - 517.0L/30481920.0L) - 9353.0L/27095040.0L) + 1363.0L/7257600.0L) + 453109.0L/136857600.0L) - 24149.0L/26611200.0L) - 186467471.0L/13076743680.0L) + 1058149.0L/681080400.0L) + 3213647.0L/151351200.0L) - 2.0L/3861.0L) - 2.0L/429.0L; -poly_val[15] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((17.0L/2615348736000.0L)*x - 1.0L/81729648000.0L) - 109.0L/87178291200.0L) + 31.0L/18681062400.0L) + 83.0L/898128000.0L) - 97.0L/1197504000.0L) - 30491.0L/9144576000.0L) + 331.0L/182891520.0L) + 126241.0L/2032128000.0L) - 2747.0L/142884000.0L) - 299063.0L/513216000.0L) + 2711.0L/29937600.0L) + 2993917.0L/1210809600.0L) - 21701.0L/141891750.0L) - 51169.0L/14014000.0L) + 16.0L/315315.0L) + 4.0L/5005.0L; -poly_val[16] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-17.0L/20922789888000.0L*x + 1.0L/1307674368000.0L) + 53.0L/348713164800.0L) - 1.0L/10674892800.0L) - 179.0L/16422912000.0L) + 41.0L/9580032000.0L) + 14017.0L/36578304000.0L) - 67.0L/731566080.0L) - 113791.0L/16257024000.0L) + 2473.0L/2612736000.0L) + 33229.0L/513216000.0L) - 4201.0L/958003200.0L) - 7912501.0L/29059430400.0L) + 266681.0L/36324288000.0L) + 269131.0L/672672000.0L) - 1.0L/411840.0L) - 1.0L/11440.0L; -poly_val[17] = pow(x, 2)*(pow(x, 2)*(pow(x, 2)*(pow(x, 2)*(pow(x, 2)*(pow(x, 2)*(pow(x, 2)*((1.0L/20922789888000.0L)*pow(x, 2) - 1.0L/116237721600.0L) + 23.0L/38320128000.0L) - 757.0L/36578304000.0L) + 2021.0L/5419008000.0L) - 4679.0L/1368576000.0L) + 3739217.0L/261534873600.0L) - 63397.0L/3027024000.0L) + 1.0L/218790.0L; -break; -case 2: -poly_val[0] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-1.0L/1307674368000.0L*x + 1.0L/87178291200.0L) + 1.0L/24908083200.0L) - 1.0L/821145600.0L) + 1.0L/870912000.0L) + 41.0L/870912000.0L) - 391.0L/3657830400.0L) - 67.0L/81285120.0L) + 4657.0L/2032128000.0L) + 2473.0L/373248000.0L) - 6583.0L/342144000.0L) - 4201.0L/191600640.0L) + 144689.0L/2421619200.0L) + 266681.0L/12108096000.0L) - 471.0L/10192000.0L) - 1.0L/411840.0L; -poly_val[1] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((17.0L/1307674368000.0L)*x - 1.0L/5448643200.0L) - 23.0L/24908083200.0L) + 31.0L/1437004800.0L) - 67.0L/9580032000.0L) - 97.0L/108864000.0L) + 6143.0L/3657830400.0L) + 331.0L/20321280.0L) - 82289.0L/2032128000.0L) - 2747.0L/20412000.0L) + 244843.0L/684288000.0L) + 2711.0L/5987520.0L) - 8287319.0L/7264857600.0L) - 21701.0L/47297250.0L) + 150307.0L/168168000.0L) + 16.0L/315315.0L; -poly_val[2] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-17.0L/163459296000.0L*x + 1.0L/726485760.0L) + 19.0L/2075673600.0L) - 1.0L/5702400.0L) - 31.0L/399168000.0L) + 19.0L/2419200.0L) - 2543.0L/228614400.0L) - 517.0L/3386880.0L) + 27683.0L/84672000.0L) + 1363.0L/1036800.0L) - 356513.0L/114048000.0L) - 24149.0L/5322240.0L) + 169537087.0L/16345929600.0L) + 1058149.0L/227026800.0L) - 3135247.0L/378378000.0L) - 2.0L/3861.0L; -poly_val[3] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((17.0L/32691859200.0L)*x - 1.0L/155675520.0L) - 67.0L/1245404160.0L) + 179.0L/205286400.0L) + 139.0L/119750400.0L) - 919.0L/21772800.0L) + 6427.0L/182891520.0L) + 2591.0L/2903040.0L) - 78299.0L/50803200.0L) - 4787.0L/583200.0L) + 287941.0L/17107200.0L) + 176989.0L/5987520.0L) - 401207.0L/6726720.0L) - 41981.0L/1351350.0L) + 68923.0L/1401400.0L) + 112.0L/32175.0L; -poly_val[4] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-17.0L/9340531200.0L*x + 1.0L/47900160.0L) + 19.0L/88957440.0L) - 611.0L/205286400.0L) - 3323.0L/479001600.0L) + 6707.0L/43545600.0L) - 109.0L/5225472.0L) - 10331.0L/2903040.0L) + 2489.0L/580608.0L) + 678739.0L/18662400.0L) - 4125323.0L/68428800.0L) - 6782981.0L/47900160.0L) + 12532313.0L/51891840.0L) + 1033649.0L/6652800.0L) - 1014049.0L/4804800.0L) - 7.0L/396.0L; -poly_val[5] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((17.0L/3592512000.0L)*x - 1.0L/19958400.0L) - 7.0L/11404800.0L) + 169.0L/22809600.0L) + 20371.0L/798336000.0L) - 4889.0L/12096000.0L) - 2521.0L/9331200.0L) + 9767.0L/967680.0L) - 139379.0L/24192000.0L) - 37517.0L/324000.0L) + 7618319.0L/57024000.0L) + 352423.0L/665280.0L) - 126741731.0L/179625600.0L) - 999349.0L/1559250.0L) + 2919647.0L/4158000.0L) + 112.0L/1485.0L; -poly_val[6] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-17.0L/1796256000.0L*x + 1.0L/10886400.0L) + 91.0L/68428800.0L) - 13.0L/933120.0L) - 78143.0L/1197504000.0L) + 85921.0L/108864000.0L) + 2543.0L/2041200.0L) - 6067.0L/290304.0L) - 126523.0L/36288000.0L) + 12312353.0L/46656000.0L) - 48574927.0L/342144000.0L) - 1244725.0L/870912.0L) + 7969111.0L/6652800.0L) + 901349.0L/378000.0L) - 287383.0L/154000.0L) - 14.0L/45.0L; -poly_val[7] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((17.0L/1143072000.0L)*x - 1.0L/7620480.0L) - 97.0L/43545600.0L) + 377.0L/18662400.0L) + 3341.0L/27216000.0L) - 3641.0L/3110400.0L) - 2788489.0L/914457600.0L) + 652969.0L/20321280.0L) + 8209463.0L/254016000.0L) - 251539.0L/583200.0L) - 586787.0L/7776000.0L) + 203617.0L/77760.0L) - 4022849.0L/6350400.0L) - 372149.0L/66150.0L) + 293749.0L/147000.0L) + 16.0L/9.0L; -poly_val[8] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-17.0L/914457600.0L*x + 1.0L/6773760.0L) + 17.0L/5806080.0L) - 221.0L/9676800.0L) - 5083.0L/29030400.0L) + 4301.0L/3225600.0L) + 1840267.0L/365783040.0L) - 167297.0L/4515840.0L) - 4913051.0L/67737600.0L) + 4913051.0L/9676800.0L) + 1034059.0L/2073600.0L) - 1034059.0L/322560.0L) - 63566689.0L/45722880.0L) + 63566689.0L/8467200.0L) + 1077749.0L/1058400.0L) - 1077749.0L/352800.0L; -poly_val[9] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((17.0L/914457600.0L)*x - 1.0L/7620480.0L) - 53.0L/17418240.0L) + 377.0L/18662400.0L) + 16757.0L/87091200.0L) - 3641.0L/3110400.0L) - 439417.0L/73156608.0L) + 652969.0L/20321280.0L) + 3992581.0L/40642560.0L) - 251539.0L/583200.0L) - 5114489.0L/6220800.0L) + 203617.0L/77760.0L) + 5310539.0L/1693440.0L) - 372149.0L/66150.0L) - 150183.0L/39200.0L) + 16.0L/9.0L; -poly_val[10] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-17.0L/1143072000.0L*x + 1.0L/10886400.0L) + 109.0L/43545600.0L) - 13.0L/933120.0L) - 17849.0L/108864000.0L) + 85921.0L/108864000.0L) + 1220857.0L/228614400.0L) - 6067.0L/290304.0L) - 23232953.0L/254016000.0L) + 12312353.0L/46656000.0L) + 24721061.0L/31104000.0L) - 1244725.0L/870912.0L) - 38328917.0L/12700800.0L) + 901349.0L/378000.0L) + 940549.0L/294000.0L) - 14.0L/45.0L; -poly_val[11] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((17.0L/1796256000.0L)*x - 1.0L/19958400.0L) - 37.0L/22809600.0L) + 169.0L/22809600.0L) + 1963.0L/18144000.0L) - 4889.0L/12096000.0L) - 466597.0L/130636800.0L) + 9767.0L/967680.0L) + 739651.0L/12096000.0L) - 37517.0L/324000.0L) - 14666779.0L/28512000.0L) + 352423.0L/665280.0L) + 158720899.0L/89812800.0L) - 999349.0L/1559250.0L) - 279677.0L/189000.0L) + 112.0L/1485.0L; -poly_val[12] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-17.0L/3592512000.0L*x + 1.0L/47900160.0L) + 7.0L/8553600.0L) - 611.0L/205286400.0L) - 131183.0L/2395008000.0L) + 6707.0L/43545600.0L) + 33481.0L/18662400.0L) - 10331.0L/2903040.0L) - 2166403.0L/72576000.0L) + 678739.0L/18662400.0L) + 80761123.0L/342144000.0L) - 6782981.0L/47900160.0L) - 1622179.0L/2217600.0L) + 1033649.0L/6652800.0L) + 351083.0L/616000.0L) - 7.0L/396.0L; -poly_val[13] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((17.0L/9340531200.0L)*x - 1.0L/155675520.0L) - 7.0L/22239360.0L) + 179.0L/205286400.0L) + 10001.0L/479001600.0L) - 919.0L/21772800.0L) - 2183.0L/3265920.0L) + 2591.0L/2903040.0L) + 154891.0L/14515200.0L) - 4787.0L/583200.0L) - 2723543.0L/34214400.0L) + 176989.0L/5987520.0L) + 12175981.0L/51891840.0L) - 41981.0L/1351350.0L) - 213041.0L/1201200.0L) + 112.0L/32175.0L; -poly_val[14] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-17.0L/32691859200.0L*x + 1.0L/726485760.0L) + 37.0L/415134720.0L) - 1.0L/5702400.0L) - 463.0L/79833600.0L) + 19.0L/2419200.0L) + 823.0L/4572288.0L) - 517.0L/3386880.0L) - 9353.0L/3386880.0L) + 1363.0L/1036800.0L) + 453109.0L/22809600.0L) - 24149.0L/5322240.0L) - 186467471.0L/3269185920.0L) + 1058149.0L/227026800.0L) + 3213647.0L/75675600.0L) - 2.0L/3861.0L; -poly_val[15] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((17.0L/163459296000.0L)*x - 1.0L/5448643200.0L) - 109.0L/6227020800.0L) + 31.0L/1437004800.0L) + 83.0L/74844000.0L) - 97.0L/108864000.0L) - 30491.0L/914457600.0L) + 331.0L/20321280.0L) + 126241.0L/254016000.0L) - 2747.0L/20412000.0L) - 299063.0L/85536000.0L) + 2711.0L/5987520.0L) + 2993917.0L/302702400.0L) - 21701.0L/47297250.0L) - 51169.0L/7007000.0L) + 16.0L/315315.0L; -poly_val[16] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-17.0L/1307674368000.0L*x + 1.0L/87178291200.0L) + 53.0L/24908083200.0L) - 1.0L/821145600.0L) - 179.0L/1368576000.0L) + 41.0L/870912000.0L) + 14017.0L/3657830400.0L) - 67.0L/81285120.0L) - 113791.0L/2032128000.0L) + 2473.0L/373248000.0L) + 33229.0L/85536000.0L) - 4201.0L/191600640.0L) - 7912501.0L/7264857600.0L) + 266681.0L/12108096000.0L) + 269131.0L/336336000.0L) - 1.0L/411840.0L; -poly_val[17] = x*(pow(x, 2)*(pow(x, 2)*(pow(x, 2)*(pow(x, 2)*(pow(x, 2)*(pow(x, 2)*((1.0L/1307674368000.0L)*pow(x, 2) - 1.0L/8302694400.0L) + 23.0L/3193344000.0L) - 757.0L/3657830400.0L) + 2021.0L/677376000.0L) - 4679.0L/228096000.0L) + 3739217.0L/65383718400.0L) - 63397.0L/1513512000.0L); -break; +void beta_Lagrange_n9(int deriv, double x, double *poly_val) { + switch(deriv) + { + case 0: + poly_val[0] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-1.0L/121645100408832000.0L*x + 1.0L/6402373705728000.0L) + 1.0L/1067062284288000.0L) - 1.0L/31384184832000.0L) + 1.0L/62768369664000.0L) + 23.0L/8966909952000.0L) - 313.0L/47076277248000.0L) - 757.0L/7242504192000.0L) + 3611.0L/9656672256000.0L) + 2021.0L/877879296000.0L) - 45757.0L/4828336128000.0L) - 4679.0L/172440576000.0L) + 1414271.0L/11769069312000.0L) + 3739217.0L/23538138624000.0L) - 957311.0L/1307674368000.0L) - 63397.0L/163459296000.0L) + 56311.0L/30875644800.0L) + 1.0L/3938220.0L) - 1.0L/831402.0L); + poly_val[1] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((1.0L/6402373705728000.0L)*x - 1.0L/355687428096000.0L) - 47.0L/2134124568576000.0L) + 1.0L/1609445376000.0L) + 1.0L/5706215424000.0L) - 79.0L/1494484992000.0L) + 20827.0L/188305108992000.0L) + 1801.0L/804722688000.0L) - 68909.0L/9656672256000.0L) - 7403.0L/146313216000.0L) + 1846049.0L/9656672256000.0L) + 139499.0L/229920768000.0L) - 117554201.0L/47076277248000.0L) - 1043321.0L/290594304000.0L) + 3677437.0L/237758976000.0L) + 10949.0L/1241856000.0L) - 599971.0L/15437822400.0L) - 9.0L/1555840.0L) + 1.0L/38896.0L); + poly_val[2] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-1.0L/711374856192000.0L*x + 1.0L/41845579776000.0L) + 83.0L/355687428096000.0L) - 59.0L/10461394944000.0L) - 1.0L/146313216000.0L) + 10691.0L/20922789888000.0L) - 167.0L/217945728000.0L) - 18301.0L/804722688000.0L) + 200467.0L/3218890752000.0L) + 156031.0L/292626432000.0L) - 2913091.0L/1609445376000.0L) - 2637347.0L/402361344000.0L) + 16131677.0L/653837184000.0L) + 11448761.0L/290594304000.0L) - 2067001.0L/13208832000.0L) - 196909.0L/2018016000.0L) + 105031.0L/263894400.0L) + 9.0L/140140.0L) - 9.0L/34034.0L); + poly_val[3] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((1.0L/125536739328000.0L)*x - 1.0L/7846046208000.0L) - 1.0L/664215552000.0L) + 83.0L/2615348736000.0L) + 137.0L/1902071808000.0L) - 571.0L/186810624000.0L) + 151451.0L/62768369664000.0L) + 43693.0L/301771008000.0L) - 146939.0L/459841536000.0L) - 65377.0L/18289152000.0L) + 33971617.0L/3218890752000.0L) + 1311029.0L/28740096000.0L) - 2408863267.0L/15692092416000.0L) - 550937221.0L/1961511552000.0L) + 4896967.0L/4852224000.0L) + 9601741.0L/13621608000.0L) - 9495901.0L/3632428800.0L) - 1.0L/2145.0L) + 1.0L/572.0L); + poly_val[4] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-1.0L/31384184832000.0L*x + 1.0L/2092278988800.0L) + 1.0L/149448499200.0L) - 1.0L/8047226880.0L) - 83.0L/193729536000.0L) + 1889.0L/149448499200.0L) + 199.0L/392302310400.0L) - 5147.0L/8047226880.0L) + 115103.0L/114960384000.0L) + 248167.0L/14631321600.0L) - 1105981.0L/26824089600.0L) - 131977.0L/574801920.0L) + 654411847.0L/980755776000.0L) + 21379409.0L/14529715200.0L) - 14487307.0L/3113510400.0L) - 5861.0L/1552320.0L) + 9397117.0L/756756000.0L) + 9.0L/3575.0L) - 6.0L/715.0L); + poly_val[5] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((1.0L/10461394944000.0L)*x - 1.0L/747242496000.0L) - 229.0L/10461394944000.0L) + 269.0L/747242496000.0L) + 8941.0L/5230697472000.0L) - 14321.0L/373621248000.0L) - 65903.0L/1743565824000.0L) + 59273.0L/28740096000.0L) - 1335239.0L/804722688000.0L) - 309691.0L/5225472000.0L) + 85570171.0L/804722688000.0L) + 50693869.0L/57480192000.0L) - 1363537171.0L/653837184000.0L) - 63114127.0L/10378368000.0L) + 1177851371.0L/72648576000.0L) + 9381241.0L/576576000.0L) - 9222481.0L/201801600.0L) - 63.0L/5720.0L) + 9.0L/286.0L); + poly_val[6] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-1.0L/4483454976000.0L*x + 1.0L/344881152000.0L) + 41.0L/747242496000.0L) - 23.0L/28740096000.0L) - 3697.0L/747242496000.0L) + 5077.0L/57480192000.0L) + 101419.0L/560431872000.0L) - 431303.0L/86220288000.0L) - 2027.0L/5474304000.0L) + 1618681.0L/10450944000.0L) - 9174989.0L/57480192000.0L) - 36951877.0L/14370048000.0L) + 152331367.0L/35026992000.0L) + 444183529.0L/21555072000.0L) - 2003176661.0L/46702656000.0L) - 9072541.0L/149688000.0L) + 8860861.0L/64864800.0L) + 7.0L/165.0L) - 14.0L/143.0L); + poly_val[7] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((1.0L/2414168064000.0L)*x - 1.0L/201180672000.0L) - 29.0L/268240896000.0L) + 281.0L/201180672000.0L) + 397.0L/36578304000.0L) - 2273.0L/14370048000.0L) - 617821.0L/1207084032000.0L) + 936041.0L/100590336000.0L) + 2668157.0L/268240896000.0L) - 5586823.0L/18289152000.0L) + 35775989.0L/804722688000.0L) + 160253299.0L/28740096000.0L) - 739625771.0L/150885504000.0L) - 294484709.0L/5588352000.0L) + 107051279.0L/1524096000.0L) + 8190541.0L/38808000.0L) - 7873021.0L/23284800.0L) - 9.0L/55.0L) + 3.0L/11.0L); + poly_val[8] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-1.0L/1609445376000.0L*x + 1.0L/146313216000.0L) + 137.0L/804722688000.0L) - 71.0L/36578304000.0L) - 14911.0L/804722688000.0L) + 2333.0L/10450944000.0L) + 2531.0L/2483712000.0L) - 491677.0L/36578304000.0L) - 47309593.0L/1609445376000.0L) + 66976673.0L/146313216000.0L) + 315424951.0L/804722688000.0L) - 23225297.0L/2612736000.0L) - 24429967.0L/50295168000.0L) + 95749481.0L/1016064000.0L) - 231950053.0L/5588352000.0L) - 3427741.0L/7056000.0L) + 2792701.0L/7761600.0L) + 9.0L/10.0L) - 9.0L/11.0L); + poly_val[9] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((1.0L/1316818944000.0L)*x - 1.0L/131681894400.0L) - 19.0L/87787929600.0L) + 19.0L/8778792960.0L) + 5491.0L/219469824000.0L) - 5491.0L/21946982400.0L) - 199937.0L/131681894400.0L) + 199937.0L/13168189440.0L) + 22981127.0L/438939648000.0L) - 22981127.0L/43893964800.0L) - 91172887.0L/87787929600.0L) + 91172887.0L/8778792960.0L) + 1886067737.0L/164602368000.0L) - 1886067737.0L/16460236800.0L) - 117868837.0L/1828915200.0L) + 117868837.0L/182891520.0L) + 9778141.0L/63504000.0L) - 9778141.0L/6350400.0L) - 1.0L/10.0L) + 1; + poly_val[10] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-1.0L/1316818944000.0L*x + 1.0L/146313216000.0L) + 1.0L/4478976000.0L) - 71.0L/36578304000.0L) - 5917.0L/219469824000.0L) + 2333.0L/10450944000.0L) + 143333.0L/82301184000.0L) - 491677.0L/36578304000.0L) - 4125893.0L/62705664000.0L) + 66976673.0L/146313216000.0L) + 328397227.0L/219469824000.0L) - 23225297.0L/2612736000.0L) - 418657681.0L/20575296000.0L) + 95749481.0L/1016064000.0L) + 103649251.0L/653184000.0L) - 3427741.0L/7056000.0L) - 4062781.0L/6350400.0L) + 9.0L/10.0L) + 1); + poly_val[11] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((1.0L/1609445376000.0L)*x - 1.0L/201180672000.0L) - 43.0L/229920768000.0L) + 281.0L/201180672000.0L) + 18721.0L/804722688000.0L) - 2273.0L/14370048000.0L) - 418087.0L/268240896000.0L) + 936041.0L/100590336000.0L) + 14128099.0L/229920768000.0L) - 5586823.0L/18289152000.0L) - 2350874153.0L/1609445376000.0L) + 160253299.0L/28740096000.0L) + 4129613923.0L/201180672000.0L) - 294484709.0L/5588352000.0L) - 252469003.0L/1596672000.0L) + 8190541.0L/38808000.0L) + 8508061.0L/15523200.0L) - 9.0L/55.0L) - 9.0L/22.0L); + poly_val[12] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-1.0L/2414168064000.0L*x + 1.0L/344881152000.0L) + 17.0L/134120448000.0L) - 23.0L/28740096000.0L) - 587.0L/36578304000.0L) + 5077.0L/57480192000.0L) + 82471.0L/75442752000.0L) - 431303.0L/86220288000.0L) - 35057611.0L/804722688000.0L) + 1618681.0L/10450944000.0L) + 414889873.0L/402361344000.0L) - 36951877.0L/14370048000.0L) - 1053508997.0L/75442752000.0L) + 444183529.0L/21555072000.0L) + 221696239.0L/2286144000.0L) - 9072541.0L/149688000.0L) - 9284221.0L/34927200.0L) + 7.0L/165.0L) + 2.0L/11.0L); + poly_val[13] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((1.0L/4483454976000.0L)*x - 1.0L/747242496000.0L) - 103.0L/1494484992000.0L) + 269.0L/747242496000.0L) + 199.0L/22643712000.0L) - 14321.0L/373621248000.0L) - 1343389.0L/2241727488000.0L) + 59273.0L/28740096000.0L) + 388021.0L/16422912000.0L) - 309691.0L/5225472000.0L) - 6924367.0L/12773376000.0L) + 50693869.0L/57480192000.0L) + 965782157.0L/140107968000.0L) - 63114127.0L/10378368000.0L) - 122428519.0L/2830464000.0L) + 9381241.0L/576576000.0L) + 9540001.0L/86486400.0L) - 63.0L/5720.0L) - 21.0L/286.0L); + poly_val[14] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-1.0L/10461394944000.0L*x + 1.0L/2092278988800.0L) + 31.0L/1046139494400.0L) - 1.0L/8047226880.0L) - 1793.0L/475517952000.0L) + 1889.0L/149448499200.0L) + 22171.0L/87178291200.0L) - 5147.0L/8047226880.0L) - 7876837.0L/804722688000.0L) + 248167.0L/14631321600.0L) + 17344541.0L/80472268800.0L) - 131977.0L/574801920.0L) - 26463329.0L/10216206000.0L) + 21379409.0L/14529715200.0L) + 10216631.0L/660441600.0L) - 5861.0L/1552320.0L) - 9651133.0L/252252000.0L) + 9.0L/3575.0L) + 18.0L/715.0L); + poly_val[15] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((1.0L/31384184832000.0L)*x - 1.0L/7846046208000.0L) - 103.0L/10461394944000.0L) + 83.0L/2615348736000.0L) + 499.0L/402361344000.0L) - 571.0L/186810624000.0L) - 1287469.0L/15692092416000.0L) + 43693.0L/301771008000.0L) + 822289.0L/268240896000.0L) - 65377.0L/18289152000.0L) - 52326023.0L/804722688000.0L) + 1311029.0L/28740096000.0L) + 369975061.0L/490377888000.0L) - 550937221.0L/1961511552000.0L) - 2869906997.0L/653837184000.0L) + 9601741.0L/13621608000.0L) + 746737.0L/69854400.0L) - 1.0L/2145.0L) - 1.0L/143.0L); + poly_val[16] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-1.0L/125536739328000.0L*x + 1.0L/41845579776000.0L) + 17.0L/6974263296000.0L) - 59.0L/10461394944000.0L) - 6317.0L/20922789888000.0L) + 10691.0L/20922789888000.0L) + 306049.0L/15692092416000.0L) - 18301.0L/804722688000.0L) - 760069.0L/1072963584000.0L) + 156031.0L/292626432000.0L) + 23540441.0L/1609445376000.0L) - 2637347.0L/402361344000.0L) - 651515867.0L/3923023104000.0L) + 11448761.0L/290594304000.0L) + 414884083.0L/435891456000.0L) - 196909.0L/2018016000.0L) - 1391323.0L/605404800.0L) + 9.0L/140140.0L) + 3.0L/2002.0L); + poly_val[17] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((1.0L/711374856192000.0L)*x - 1.0L/355687428096000.0L) - 43.0L/101624979456000.0L) + 1.0L/1609445376000.0L) + 1073.0L/20922789888000.0L) - 79.0L/1494484992000.0L) - 7517.0L/2324754432000.0L) + 1801.0L/804722688000.0L) + 52799.0L/459841536000.0L) - 7403.0L/146313216000.0L) - 7491133.0L/3218890752000.0L) + 139499.0L/229920768000.0L) + 136333979.0L/5230697472000.0L) - 1043321.0L/290594304000.0L) - 6144839.0L/41513472000.0L) + 10949.0L/1241856000.0L) + 1219787.0L/3430627200.0L) - 9.0L/1555840.0L) - 9.0L/38896.0L); + poly_val[18] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-1.0L/6402373705728000.0L*x + 1.0L/6402373705728000.0L) + 1.0L/21776781312000.0L) - 1.0L/31384184832000.0L) - 31.0L/5706215424000.0L) + 23.0L/8966909952000.0L) + 3947.0L/11769069312000.0L) - 757.0L/7242504192000.0L) - 16153.0L/1379524608000.0L) + 2021.0L/877879296000.0L) + 1131407.0L/4828336128000.0L) - 4679.0L/172440576000.0L) - 15305183.0L/5884534656000.0L) + 3739217.0L/23538138624000.0L) + 83131.0L/5660928000.0L) - 63397.0L/163459296000.0L) - 40207.0L/1143542400.0L) + 1.0L/3938220.0L) + 1.0L/43758.0L); + poly_val[19] = x*(pow(x, 2)*(pow(x, 2)*(pow(x, 2)*(pow(x, 2)*(pow(x, 2)*(pow(x, 2)*(pow(x, 2)*(pow(x, 2)*((1.0L/121645100408832000.0L)*pow(x, 2) - 1.0L/426824913715200.0L) + 17.0L/62768369664000.0L) - 619.0L/37661021798400.0L) + 5473.0L/9656672256000.0L) - 21713.0L/1931334451200.0L) + 5839219.0L/47076277248000.0L) - 364919.0L/523069747200.0L) + 514639.0L/308756448000.0L) - 1.0L/923780.0L); + break; + case 1: + poly_val[0] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-1.0L/6402373705728000.0L*x + 1.0L/355687428096000.0L) + 1.0L/62768369664000.0L) - 1.0L/1961511552000.0L) + 1.0L/4184557977600.0L) + 23.0L/640493568000.0L) - 313.0L/3621252096000.0L) - 757.0L/603542016000.0L) + 3611.0L/877879296000.0L) + 2021.0L/87787929600.0L) - 45757.0L/536481792000.0L) - 4679.0L/21555072000.0L) + 1414271.0L/1681295616000.0L) + 3739217.0L/3923023104000.0L) - 957311.0L/261534873600.0L) - 63397.0L/40864824000.0L) + 56311.0L/10291881600.0L) + 1.0L/1969110.0L) - 1.0L/831402.0L; + poly_val[1] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((19.0L/6402373705728000.0L)*x - 1.0L/19760412672000.0L) - 47.0L/125536739328000.0L) + 1.0L/100590336000.0L) + 1.0L/380414361600.0L) - 79.0L/106748928000.0L) + 20827.0L/14485008384000.0L) + 1801.0L/67060224000.0L) - 68909.0L/877879296000.0L) - 7403.0L/14631321600.0L) + 1846049.0L/1072963584000.0L) + 139499.0L/28740096000.0L) - 117554201.0L/6725182464000.0L) - 1043321.0L/48432384000.0L) + 3677437.0L/47551795200.0L) + 10949.0L/310464000.0L) - 599971.0L/5145940800.0L) - 9.0L/777920.0L) + 1.0L/38896.0L; + poly_val[2] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-19.0L/711374856192000.0L*x + 1.0L/2324754432000.0L) + 83.0L/20922789888000.0L) - 59.0L/653837184000.0L) - 1.0L/9754214400.0L) + 10691.0L/1494484992000.0L) - 167.0L/16765056000.0L) - 18301.0L/67060224000.0L) + 200467.0L/292626432000.0L) + 156031.0L/29262643200.0L) - 2913091.0L/178827264000.0L) - 2637347.0L/50295168000.0L) + 16131677.0L/93405312000.0L) + 11448761.0L/48432384000.0L) - 2067001.0L/2641766400.0L) - 196909.0L/504504000.0L) + 105031.0L/87964800.0L) + 9.0L/70070.0L) - 9.0L/34034.0L; + poly_val[3] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((19.0L/125536739328000.0L)*x - 1.0L/435891456000.0L) - 17.0L/664215552000.0L) + 83.0L/163459296000.0L) + 137.0L/126804787200.0L) - 571.0L/13343616000.0L) + 151451.0L/4828336128000.0L) + 43693.0L/25147584000.0L) - 146939.0L/41803776000.0L) - 65377.0L/1828915200.0L) + 33971617.0L/357654528000.0L) + 1311029.0L/3592512000.0L) - 2408863267.0L/2241727488000.0L) - 550937221.0L/326918592000.0L) + 4896967.0L/970444800.0L) + 9601741.0L/3405402000.0L) - 9495901.0L/1210809600.0L) - 2.0L/2145.0L) + 1.0L/572.0L; + poly_val[4] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-19.0L/31384184832000.0L*x + 1.0L/116237721600.0L) + 17.0L/149448499200.0L) - 1.0L/502951680.0L) - 83.0L/12915302400.0L) + 1889.0L/10674892800.0L) + 199.0L/30177100800.0L) - 5147.0L/670602240.0L) + 115103.0L/10450944000.0L) + 248167.0L/1463132160.0L) - 1105981.0L/2980454400.0L) - 131977.0L/71850240.0L) + 654411847.0L/140107968000.0L) + 21379409.0L/2421619200.0L) - 14487307.0L/622702080.0L) - 5861.0L/388080.0L) + 9397117.0L/252252000.0L) + 18.0L/3575.0L) - 6.0L/715.0L; + poly_val[5] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((19.0L/10461394944000.0L)*x - 1.0L/41513472000.0L) - 3893.0L/10461394944000.0L) + 269.0L/46702656000.0L) + 8941.0L/348713164800.0L) - 14321.0L/26687232000.0L) - 65903.0L/134120448000.0L) + 59273.0L/2395008000.0L) - 1335239.0L/73156608000.0L) - 309691.0L/522547200.0L) + 85570171.0L/89413632000.0L) + 50693869.0L/7185024000.0L) - 1363537171.0L/93405312000.0L) - 63114127.0L/1729728000.0L) + 1177851371.0L/14529715200.0L) + 9381241.0L/144144000.0L) - 9222481.0L/67267200.0L) - 63.0L/2860.0L) + 9.0L/286.0L; + poly_val[6] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-19.0L/4483454976000.0L*x + 1.0L/19160064000.0L) + 697.0L/747242496000.0L) - 23.0L/1796256000.0L) - 3697.0L/49816166400.0L) + 5077.0L/4105728000.0L) + 101419.0L/43110144000.0L) - 431303.0L/7185024000.0L) - 2027.0L/497664000.0L) + 1618681.0L/1045094400.0L) - 9174989.0L/6386688000.0L) - 36951877.0L/1796256000.0L) + 152331367.0L/5003856000.0L) + 444183529.0L/3592512000.0L) - 2003176661.0L/9340531200.0L) - 9072541.0L/37422000.0L) + 8860861.0L/21621600.0L) + 14.0L/165.0L) - 14.0L/143.0L; + poly_val[7] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((19.0L/2414168064000.0L)*x - 1.0L/11176704000.0L) - 493.0L/268240896000.0L) + 281.0L/12573792000.0L) + 397.0L/2438553600.0L) - 2273.0L/1026432000.0L) - 8031673.0L/1207084032000.0L) + 936041.0L/8382528000.0L) + 2668157.0L/24385536000.0L) - 5586823.0L/1828915200.0L) + 35775989.0L/89413632000.0L) + 160253299.0L/3592512000.0L) - 739625771.0L/21555072000.0L) - 294484709.0L/931392000.0L) + 107051279.0L/304819200.0L) + 8190541.0L/9702000.0L) - 7873021.0L/7761600.0L) - 18.0L/55.0L) + 3.0L/11.0L; + poly_val[8] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-19.0L/1609445376000.0L*x + 1.0L/8128512000.0L) + 2329.0L/804722688000.0L) - 71.0L/2286144000.0L) - 14911.0L/53648179200.0L) + 2333.0L/746496000.0L) + 32903.0L/2483712000.0L) - 491677.0L/3048192000.0L) - 47309593.0L/146313216000.0L) + 66976673.0L/14631321600.0L) + 315424951.0L/89413632000.0L) - 23225297.0L/326592000.0L) - 24429967.0L/7185024000.0L) + 95749481.0L/169344000.0L) - 231950053.0L/1117670400.0L) - 3427741.0L/1764000.0L) + 2792701.0L/2587200.0L) + 9.0L/5.0L) - 9.0L/11.0L; + poly_val[9] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((19.0L/1316818944000.0L)*x - 1.0L/7315660800.0L) - 323.0L/87787929600.0L) + 19.0L/548674560.0L) + 5491.0L/14631321600.0L) - 5491.0L/1567641600.0L) - 2599181.0L/131681894400.0L) + 199937.0L/1097349120.0L) + 252792397.0L/438939648000.0L) - 22981127.0L/4389396480.0L) - 91172887.0L/9754214400.0L) + 91172887.0L/1097349120.0L) + 1886067737.0L/23514624000.0L) - 1886067737.0L/2743372800.0L) - 117868837.0L/365783040.0L) + 117868837.0L/45722880.0L) + 9778141.0L/21168000.0L) - 9778141.0L/3175200.0L) - 1.0L/10.0L; + poly_val[10] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-19.0L/1316818944000.0L*x + 1.0L/8128512000.0L) + 17.0L/4478976000.0L) - 71.0L/2286144000.0L) - 5917.0L/14631321600.0L) + 2333.0L/746496000.0L) + 1863329.0L/82301184000.0L) - 491677.0L/3048192000.0L) - 45384823.0L/62705664000.0L) + 66976673.0L/14631321600.0L) + 328397227.0L/24385536000.0L) - 23225297.0L/326592000.0L) - 418657681.0L/2939328000.0L) + 95749481.0L/169344000.0L) + 103649251.0L/130636800.0L) - 3427741.0L/1764000.0L) - 4062781.0L/2116800.0L) + 9.0L/5.0L) + 1; + poly_val[11] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((19.0L/1609445376000.0L)*x - 1.0L/11176704000.0L) - 731.0L/229920768000.0L) + 281.0L/12573792000.0L) + 18721.0L/53648179200.0L) - 2273.0L/1026432000.0L) - 5435131.0L/268240896000.0L) + 936041.0L/8382528000.0L) + 14128099.0L/20901888000.0L) - 5586823.0L/1828915200.0L) - 2350874153.0L/178827264000.0L) + 160253299.0L/3592512000.0L) + 4129613923.0L/28740096000.0L) - 294484709.0L/931392000.0L) - 252469003.0L/319334400.0L) + 8190541.0L/9702000.0L) + 8508061.0L/5174400.0L) - 18.0L/55.0L) - 9.0L/22.0L; + poly_val[12] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-19.0L/2414168064000.0L*x + 1.0L/19160064000.0L) + 289.0L/134120448000.0L) - 23.0L/1796256000.0L) - 587.0L/2438553600.0L) + 5077.0L/4105728000.0L) + 1072123.0L/75442752000.0L) - 431303.0L/7185024000.0L) - 35057611.0L/73156608000.0L) + 1618681.0L/1045094400.0L) + 414889873.0L/44706816000.0L) - 36951877.0L/1796256000.0L) - 1053508997.0L/10777536000.0L) + 444183529.0L/3592512000.0L) + 221696239.0L/457228800.0L) - 9072541.0L/37422000.0L) - 9284221.0L/11642400.0L) + 14.0L/165.0L) + 2.0L/11.0L; + poly_val[13] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((19.0L/4483454976000.0L)*x - 1.0L/41513472000.0L) - 1751.0L/1494484992000.0L) + 269.0L/46702656000.0L) + 199.0L/1509580800.0L) - 14321.0L/26687232000.0L) - 1343389.0L/172440576000.0L) + 59273.0L/2395008000.0L) + 388021.0L/1492992000.0L) - 309691.0L/522547200.0L) - 6924367.0L/1419264000.0L) + 50693869.0L/7185024000.0L) + 965782157.0L/20015424000.0L) - 63114127.0L/1729728000.0L) - 122428519.0L/566092800.0L) + 9381241.0L/144144000.0L) + 9540001.0L/28828800.0L) - 63.0L/2860.0L) - 21.0L/286.0L; + poly_val[14] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-19.0L/10461394944000.0L*x + 1.0L/116237721600.0L) + 527.0L/1046139494400.0L) - 1.0L/502951680.0L) - 1793.0L/31701196800.0L) + 1889.0L/10674892800.0L) + 22171.0L/6706022400.0L) - 5147.0L/670602240.0L) - 7876837.0L/73156608000.0L) + 248167.0L/1463132160.0L) + 17344541.0L/8941363200.0L) - 131977.0L/71850240.0L) - 26463329.0L/1459458000.0L) + 21379409.0L/2421619200.0L) + 10216631.0L/132088320.0L) - 5861.0L/388080.0L) - 9651133.0L/84084000.0L) + 18.0L/3575.0L) + 18.0L/715.0L; + poly_val[15] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((19.0L/31384184832000.0L)*x - 1.0L/435891456000.0L) - 1751.0L/10461394944000.0L) + 83.0L/163459296000.0L) + 499.0L/26824089600.0L) - 571.0L/13343616000.0L) - 1287469.0L/1207084032000.0L) + 43693.0L/25147584000.0L) + 822289.0L/24385536000.0L) - 65377.0L/1828915200.0L) - 52326023.0L/89413632000.0L) + 1311029.0L/3592512000.0L) + 369975061.0L/70053984000.0L) - 550937221.0L/326918592000.0L) - 2869906997.0L/130767436800.0L) + 9601741.0L/3405402000.0L) + 746737.0L/23284800.0L) - 2.0L/2145.0L) - 1.0L/143.0L; + poly_val[16] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-19.0L/125536739328000.0L*x + 1.0L/2324754432000.0L) + 289.0L/6974263296000.0L) - 59.0L/653837184000.0L) - 6317.0L/1394852659200.0L) + 10691.0L/1494484992000.0L) + 306049.0L/1207084032000.0L) - 18301.0L/67060224000.0L) - 760069.0L/97542144000.0L) + 156031.0L/29262643200.0L) + 23540441.0L/178827264000.0L) - 2637347.0L/50295168000.0L) - 651515867.0L/560431872000.0L) + 11448761.0L/48432384000.0L) + 414884083.0L/87178291200.0L) - 196909.0L/504504000.0L) - 1391323.0L/201801600.0L) + 9.0L/70070.0L) + 3.0L/2002.0L; + poly_val[17] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((19.0L/711374856192000.0L)*x - 1.0L/19760412672000.0L) - 43.0L/5977939968000.0L) + 1.0L/100590336000.0L) + 1073.0L/1394852659200.0L) - 79.0L/106748928000.0L) - 7517.0L/178827264000.0L) + 1801.0L/67060224000.0L) + 52799.0L/41803776000.0L) - 7403.0L/14631321600.0L) - 7491133.0L/357654528000.0L) + 139499.0L/28740096000.0L) + 136333979.0L/747242496000.0L) - 1043321.0L/48432384000.0L) - 6144839.0L/8302694400.0L) + 10949.0L/310464000.0L) + 1219787.0L/1143542400.0L) - 9.0L/777920.0L) - 9.0L/38896.0L; + poly_val[18] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-19.0L/6402373705728000.0L*x + 1.0L/355687428096000.0L) + 1.0L/1280987136000.0L) - 1.0L/1961511552000.0L) - 31.0L/380414361600.0L) + 23.0L/640493568000.0L) + 3947.0L/905313024000.0L) - 757.0L/603542016000.0L) - 16153.0L/125411328000.0L) + 2021.0L/87787929600.0L) + 1131407.0L/536481792000.0L) - 4679.0L/21555072000.0L) - 15305183.0L/840647808000.0L) + 3739217.0L/3923023104000.0L) + 83131.0L/1132185600.0L) - 63397.0L/40864824000.0L) - 40207.0L/381180800.0L) + 1.0L/1969110.0L) + 1.0L/43758.0L; + poly_val[19] = pow(x, 2)*(pow(x, 2)*(pow(x, 2)*(pow(x, 2)*(pow(x, 2)*(pow(x, 2)*(pow(x, 2)*(pow(x, 2)*((1.0L/6402373705728000.0L)*pow(x, 2) - 1.0L/25107347865600.0L) + 17.0L/4184557977600.0L) - 619.0L/2897001676800.0L) + 5473.0L/877879296000.0L) - 21713.0L/214592716800.0L) + 5839219.0L/6725182464000.0L) - 364919.0L/104613949440.0L) + 514639.0L/102918816000.0L) - 1.0L/923780.0L; + break; + case 2: + poly_val[0] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-1.0L/355687428096000.0L*x + 1.0L/20922789888000.0L) + 1.0L/3923023104000.0L) - 1.0L/130767436800.0L) + 1.0L/298896998400.0L) + 23.0L/49268736000.0L) - 313.0L/301771008000.0L) - 757.0L/54867456000.0L) + 3611.0L/87787929600.0L) + 2021.0L/9754214400.0L) - 45757.0L/67060224000.0L) - 4679.0L/3079296000.0L) + 1414271.0L/280215936000.0L) + 3739217.0L/784604620800.0L) - 957311.0L/65383718400.0L) - 63397.0L/13621608000.0L) + 56311.0L/5145940800.0L) + 1.0L/1969110.0L; + poly_val[1] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((19.0L/355687428096000.0L)*x - 1.0L/1162377216000.0L) - 47.0L/7846046208000.0L) + 1.0L/6706022400.0L) + 1.0L/27172454400.0L) - 79.0L/8211456000.0L) + 20827.0L/1207084032000.0L) + 1801.0L/6096384000.0L) - 68909.0L/87787929600.0L) - 7403.0L/1625702400.0L) + 1846049.0L/134120448000.0L) + 139499.0L/4105728000.0L) - 117554201.0L/1120863744000.0L) - 1043321.0L/9686476800.0L) + 3677437.0L/11887948800.0L) + 10949.0L/103488000.0L) - 599971.0L/2572970400.0L) - 9.0L/777920.0L; + poly_val[2] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-19.0L/39520825344000.0L*x + 17.0L/2324754432000.0L) + 83.0L/1307674368000.0L) - 59.0L/43589145600.0L) - 1.0L/696729600.0L) + 10691.0L/114960384000.0L) - 167.0L/1397088000.0L) - 18301.0L/6096384000.0L) + 200467.0L/29262643200.0L) + 156031.0L/3251404800.0L) - 2913091.0L/22353408000.0L) - 2637347.0L/7185024000.0L) + 16131677.0L/15567552000.0L) + 11448761.0L/9686476800.0L) - 2067001.0L/660441600.0L) - 196909.0L/168168000.0L) + 105031.0L/43982400.0L) + 9.0L/70070.0L; + poly_val[3] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((19.0L/6974263296000.0L)*x - 17.0L/435891456000.0L) - 17.0L/41513472000.0L) + 83.0L/10897286400.0L) + 137.0L/9057484800.0L) - 571.0L/1026432000.0L) + 151451.0L/402361344000.0L) + 43693.0L/2286144000.0L) - 146939.0L/4180377600.0L) - 65377.0L/203212800.0L) + 33971617.0L/44706816000.0L) + 1311029.0L/513216000.0L) - 2408863267.0L/373621248000.0L) - 550937221.0L/65383718400.0L) + 4896967.0L/242611200.0L) + 9601741.0L/1135134000.0L) - 9495901.0L/605404800.0L) - 2.0L/2145.0L; + poly_val[4] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-19.0L/1743565824000.0L*x + 17.0L/116237721600.0L) + 17.0L/9340531200.0L) - 1.0L/33530112.0L) - 83.0L/922521600.0L) + 1889.0L/821145600.0L) + 199.0L/2514758400.0L) - 5147.0L/60963840.0L) + 115103.0L/1045094400.0L) + 248167.0L/162570240.0L) - 1105981.0L/372556800.0L) - 131977.0L/10264320.0L) + 654411847.0L/23351328000.0L) + 21379409.0L/484323840.0L) - 14487307.0L/155675520.0L) - 5861.0L/129360.0L) + 9397117.0L/126126000.0L) + 18.0L/3575.0L; + poly_val[5] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((19.0L/581188608000.0L)*x - 17.0L/41513472000.0L) - 3893.0L/653837184000.0L) + 269.0L/3113510400.0L) + 8941.0L/24908083200.0L) - 14321.0L/2052864000.0L) - 65903.0L/11176704000.0L) + 59273.0L/217728000.0L) - 1335239.0L/7315660800.0L) - 309691.0L/58060800.0L) + 85570171.0L/11176704000.0L) + 50693869.0L/1026432000.0L) - 1363537171.0L/15567552000.0L) - 63114127.0L/345945600.0L) + 1177851371.0L/3632428800.0L) + 9381241.0L/48048000.0L) - 9222481.0L/33633600.0L) - 63.0L/2860.0L; + poly_val[6] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-19.0L/249080832000.0L*x + 17.0L/19160064000.0L) + 697.0L/46702656000.0L) - 23.0L/119750400.0L) - 3697.0L/3558297600.0L) + 66001.0L/4105728000.0L) + 101419.0L/3592512000.0L) - 431303.0L/653184000.0L) - 2027.0L/49766400.0L) + 1618681.0L/116121600.0L) - 9174989.0L/798336000.0L) - 36951877.0L/256608000.0L) + 152331367.0L/833976000.0L) + 444183529.0L/718502400.0L) - 2003176661.0L/2335132800.0L) - 9072541.0L/12474000.0L) + 8860861.0L/10810800.0L) + 14.0L/165.0L; + poly_val[7] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((19.0L/134120448000.0L)*x - 17.0L/11176704000.0L) - 493.0L/16765056000.0L) + 281.0L/838252800.0L) + 397.0L/174182400.0L) - 29549.0L/1026432000.0L) - 8031673.0L/100590336000.0L) + 936041.0L/762048000.0L) + 2668157.0L/2438553600.0L) - 5586823.0L/203212800.0L) + 35775989.0L/11176704000.0L) + 160253299.0L/513216000.0L) - 739625771.0L/3592512000.0L) - 294484709.0L/186278400.0L) + 107051279.0L/76204800.0L) + 8190541.0L/3234000.0L) - 7873021.0L/3880800.0L) - 18.0L/55.0L; + poly_val[8] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-19.0L/89413632000.0L*x + 17.0L/8128512000.0L) + 2329.0L/50295168000.0L) - 71.0L/152409600.0L) - 14911.0L/3832012800.0L) + 30329.0L/746496000.0L) + 32903.0L/206976000.0L) - 5408447.0L/3048192000.0L) - 47309593.0L/14631321600.0L) + 66976673.0L/1625702400.0L) + 315424951.0L/11176704000.0L) - 23225297.0L/46656000.0L) - 24429967.0L/1197504000.0L) + 95749481.0L/33868800.0L) - 231950053.0L/279417600.0L) - 3427741.0L/588000.0L) + 2792701.0L/1293600.0L) + 9.0L/5.0L; + poly_val[9] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((19.0L/73156608000.0L)*x - 17.0L/7315660800.0L) - 323.0L/5486745600.0L) + 19.0L/36578304.0L) + 5491.0L/1045094400.0L) - 71383.0L/1567641600.0L) - 2599181.0L/10973491200.0L) + 2199307.0L/1097349120.0L) + 252792397.0L/43893964800.0L) - 22981127.0L/487710720.0L) - 91172887.0L/1219276800.0L) + 91172887.0L/156764160.0L) + 1886067737.0L/3919104000.0L) - 1886067737.0L/548674560.0L) - 117868837.0L/91445760.0L) + 117868837.0L/15240960.0L) + 9778141.0L/10584000.0L) - 9778141.0L/3175200.0L; + poly_val[10] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-19.0L/73156608000.0L*x + 17.0L/8128512000.0L) + 17.0L/279936000.0L) - 71.0L/152409600.0L) - 5917.0L/1045094400.0L) + 30329.0L/746496000.0L) + 1863329.0L/6858432000.0L) - 5408447.0L/3048192000.0L) - 45384823.0L/6270566400.0L) + 66976673.0L/1625702400.0L) + 328397227.0L/3048192000.0L) - 23225297.0L/46656000.0L) - 418657681.0L/489888000.0L) + 95749481.0L/33868800.0L) + 103649251.0L/32659200.0L) - 3427741.0L/588000.0L) - 4062781.0L/1058400.0L) + 9.0L/5.0L; + poly_val[11] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((19.0L/89413632000.0L)*x - 17.0L/11176704000.0L) - 731.0L/14370048000.0L) + 281.0L/838252800.0L) + 18721.0L/3832012800.0L) - 29549.0L/1026432000.0L) - 5435131.0L/22353408000.0L) + 936041.0L/762048000.0L) + 14128099.0L/2090188800.0L) - 5586823.0L/203212800.0L) - 2350874153.0L/22353408000.0L) + 160253299.0L/513216000.0L) + 4129613923.0L/4790016000.0L) - 294484709.0L/186278400.0L) - 252469003.0L/79833600.0L) + 8190541.0L/3234000.0L) + 8508061.0L/2587200.0L) - 18.0L/55.0L; + poly_val[12] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-19.0L/134120448000.0L*x + 17.0L/19160064000.0L) + 289.0L/8382528000.0L) - 23.0L/119750400.0L) - 587.0L/174182400.0L) + 66001.0L/4105728000.0L) + 1072123.0L/6286896000.0L) - 431303.0L/653184000.0L) - 35057611.0L/7315660800.0L) + 1618681.0L/116121600.0L) + 414889873.0L/5588352000.0L) - 36951877.0L/256608000.0L) - 1053508997.0L/1796256000.0L) + 444183529.0L/718502400.0L) + 221696239.0L/114307200.0L) - 9072541.0L/12474000.0L) - 9284221.0L/5821200.0L) + 14.0L/165.0L; + poly_val[13] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((19.0L/249080832000.0L)*x - 17.0L/41513472000.0L) - 1751.0L/93405312000.0L) + 269.0L/3113510400.0L) + 199.0L/107827200.0L) - 14321.0L/2052864000.0L) - 1343389.0L/14370048000.0L) + 59273.0L/217728000.0L) + 388021.0L/149299200.0L) - 309691.0L/58060800.0L) - 6924367.0L/177408000.0L) + 50693869.0L/1026432000.0L) + 965782157.0L/3335904000.0L) - 63114127.0L/345945600.0L) - 122428519.0L/141523200.0L) + 9381241.0L/48048000.0L) + 9540001.0L/14414400.0L) - 63.0L/2860.0L; + poly_val[14] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-19.0L/581188608000.0L*x + 17.0L/116237721600.0L) + 527.0L/65383718400.0L) - 1.0L/33530112.0L) - 1793.0L/2264371200.0L) + 1889.0L/821145600.0L) + 22171.0L/558835200.0L) - 5147.0L/60963840.0L) - 7876837.0L/7315660800.0L) + 248167.0L/162570240.0L) + 17344541.0L/1117670400.0L) - 131977.0L/10264320.0L) - 26463329.0L/243243000.0L) + 21379409.0L/484323840.0L) + 10216631.0L/33022080.0L) - 5861.0L/129360.0L) - 9651133.0L/42042000.0L) + 18.0L/3575.0L; + poly_val[15] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((19.0L/1743565824000.0L)*x - 17.0L/435891456000.0L) - 1751.0L/653837184000.0L) + 83.0L/10897286400.0L) + 499.0L/1916006400.0L) - 571.0L/1026432000.0L) - 1287469.0L/100590336000.0L) + 43693.0L/2286144000.0L) + 822289.0L/2438553600.0L) - 65377.0L/203212800.0L) - 52326023.0L/11176704000.0L) + 1311029.0L/513216000.0L) + 369975061.0L/11675664000.0L) - 550937221.0L/65383718400.0L) - 2869906997.0L/32691859200.0L) + 9601741.0L/1135134000.0L) + 746737.0L/11642400.0L) - 2.0L/2145.0L; + poly_val[16] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-19.0L/6974263296000.0L*x + 17.0L/2324754432000.0L) + 289.0L/435891456000.0L) - 59.0L/43589145600.0L) - 6317.0L/99632332800.0L) + 10691.0L/114960384000.0L) + 306049.0L/100590336000.0L) - 18301.0L/6096384000.0L) - 760069.0L/9754214400.0L) + 156031.0L/3251404800.0L) + 23540441.0L/22353408000.0L) - 2637347.0L/7185024000.0L) - 651515867.0L/93405312000.0L) + 11448761.0L/9686476800.0L) + 414884083.0L/21794572800.0L) - 196909.0L/168168000.0L) - 1391323.0L/100900800.0L) + 9.0L/70070.0L; + poly_val[17] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((19.0L/39520825344000.0L)*x - 1.0L/1162377216000.0L) - 43.0L/373621248000.0L) + 1.0L/6706022400.0L) + 1073.0L/99632332800.0L) - 79.0L/8211456000.0L) - 7517.0L/14902272000.0L) + 1801.0L/6096384000.0L) + 52799.0L/4180377600.0L) - 7403.0L/1625702400.0L) - 7491133.0L/44706816000.0L) + 139499.0L/4105728000.0L) + 136333979.0L/124540416000.0L) - 1043321.0L/9686476800.0L) - 6144839.0L/2075673600.0L) + 10949.0L/103488000.0L) + 1219787.0L/571771200.0L) - 9.0L/777920.0L; + poly_val[18] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-19.0L/355687428096000.0L*x + 1.0L/20922789888000.0L) + 1.0L/80061696000.0L) - 1.0L/130767436800.0L) - 31.0L/27172454400.0L) + 23.0L/49268736000.0L) + 3947.0L/75442752000.0L) - 757.0L/54867456000.0L) - 16153.0L/12541132800.0L) + 2021.0L/9754214400.0L) + 1131407.0L/67060224000.0L) - 4679.0L/3079296000.0L) - 15305183.0L/140107968000.0L) + 3739217.0L/784604620800.0L) + 83131.0L/283046400.0L) - 63397.0L/13621608000.0L) - 40207.0L/190590400.0L) + 1.0L/1969110.0L; + poly_val[19] = x*(pow(x, 2)*(pow(x, 2)*(pow(x, 2)*(pow(x, 2)*(pow(x, 2)*(pow(x, 2)*(pow(x, 2)*((1.0L/355687428096000.0L)*pow(x, 2) - 1.0L/1569209241600.0L) + 17.0L/298896998400.0L) - 619.0L/241416806400.0L) + 5473.0L/87787929600.0L) - 21713.0L/26824089600.0L) + 5839219.0L/1120863744000.0L) - 364919.0L/26153487360.0L) + 514639.0L/51459408000.0L); + break; + } } + +void beta_Lagrange_n10(int deriv, double x, double *poly_val) { + switch(deriv) + { + case 0: + poly_val[0] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-1.0L/51090942171709440000.0L*x + 1.0L/2432902008176640000.0L) + 1.0L/291948240981196800.0L) - 1.0L/8536498274304000.0L) - 1.0L/32011868528640000.0L) + 17.0L/1255367393280000.0L) - 503.0L/15817629155328000.0L) - 619.0L/753220435968000.0L) + 66779.0L/22596613079040000.0L) + 5473.0L/193133445120000.0L) - 14099.0L/115880067072000.0L) - 21713.0L/38626689024000.0L) + 209515049.0L/79088145776640000.0L) + 5839219.0L/941525544960000.0L) - 4353367.0L/141228831744000.0L) - 364919.0L/10461394944000.0L) + 238420331.0L/1333827855360000.0L) + 514639.0L/6175128960000.0L) - 106924511.0L/246387645504000.0L) - 1.0L/18475600.0L) + 1.0L/3527160.0L); + poly_val[1] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((1.0L/2432902008176640000.0L)*x - 1.0L/121645100408832000.0L) - 41.0L/486580401635328000.0L) + 1.0L/400148356608000.0L) + 59.0L/21341245685760000.0L) - 19.0L/62768369664000.0L) + 59.0L/107602919424000.0L) + 1789.0L/94152554496000.0L) - 455633.0L/7532204359680000.0L) - 19447.0L/28970016768000.0L) + 102191.0L/38626689024000.0L) + 32699.0L/2414168064000.0L) - 112017667.0L/1883051089920000.0L) - 3558293.0L/23538138624000.0L) + 66422087.0L/94152554496000.0L) + 5040143.0L/5884534656000.0L) - 306556247.0L/74101547520000.0L) - 11419.0L/5557616064.0L) + 790297.0L/78218300160.0L) + 5.0L/3741309.0L) - 1.0L/151164.0L); + poly_val[2] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-1.0L/243290200817664000.0L*x + 1.0L/12804747411456000.0L) + 233.0L/243290200817664000.0L) - 107.0L/4268249137152000.0L) - 37.0L/711374856192000.0L) + 401.0L/125536739328000.0L) - 1429.0L/376610217984000.0L) - 78713.0L/376610217984000.0L) + 426961.0L/753220435968000.0L) + 147211.0L/19313344512000.0L) - 173941.0L/6437781504000.0L) - 3039931.0L/19313344512000.0L) + 3416723.0L/5380145971200.0L) + 336140003.0L/188305108992000.0L) - 363897701.0L/47076277248000.0L) - 53447083.0L/5230697472000.0L) + 1023220943.0L/22230464256000.0L) + 487121.0L/19760412672.0L) - 5318641.0L/46930980096.0L) - 5.0L/311168.0L) + 5.0L/67184.0L); + poly_val[3] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((1.0L/38414242234368000.0L)*x - 1.0L/2134124568576000.0L) - 37.0L/5487748890624000.0L) + 1.0L/6351561216000.0L) + 1.0L/2010164428800.0L) - 1.0L/47443968000.0L) + 10993.0L/1129830653952000.0L) + 45439.0L/31384184832000.0L) - 144587.0L/46115536896000.0L) - 25313.0L/459841536000.0L) + 201319.0L/1182449664000.0L) + 6427.0L/5474304000.0L) - 2409537919.0L/564915326976000.0L) - 106900847.0L/7846046208000.0L) + 1087177561.0L/20175547392000.0L) + 2464771.0L/31135104000.0L) - 34334629.0L/104696064000.0L) - 809.0L/4200768.0L) + 3026437.0L/3705077376.0L) + 15.0L/119119.0L) - 5.0L/9282.0L); + poly_val[4] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-1.0L/8536498274304000.0L*x + 1.0L/502146957312000.0L) + 283.0L/8536498274304000.0L) - 349.0L/502146957312000.0L) - 359.0L/118562476032000.0L) + 8147.0L/83691159552000.0L) + 9059.0L/251073478656000.0L) - 1767109.0L/251073478656000.0L) + 1077763.0L/100429391462400.0L) + 10896041.0L/38626689024000.0L) - 9330647.0L/12875563008000.0L) - 81091903.0L/12875563008000.0L) + 5021638757.0L/251073478656000.0L) + 4749355073.0L/62768369664000.0L) - 524300089.0L/1961511552000.0L) - 14119093201.0L/31384184832000.0L) + 149510878211.0L/88921857024000.0L) + 1933049.0L/1743565824.0L) - 21051859.0L/4940103168.0L) - 5.0L/6864.0L) + 5.0L/1768.0L); + poly_val[5] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((1.0L/2510734786560000.0L)*x - 1.0L/156920924160000.0L) - 61.0L/502146957312000.0L) + 1.0L/435891456000.0L) + 5441.0L/418455797760000.0L) - 8741.0L/26153487360000.0L) - 109829.0L/251073478656000.0L) + 199141.0L/7846046208000.0L) - 49745219.0L/2510734786560000.0L) - 1447093.0L/1341204480000.0L) + 5418317.0L/2575112601600.0L) + 5165621.0L/201180672000.0L) - 42621719501.0L/627683696640000.0L) - 3194891431.0L/9807557760000.0L) + 31196765993.0L/31384184832000.0L) + 109639993.0L/54486432000.0L) - 28770379499.0L/4358914560000.0L) - 9587629.0L/1891890000.0L) + 104193839.0L/6054048000.0L) + 12.0L/3575.0L) - 3.0L/260.0L); + poly_val[6] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-1.0L/941525544960000.0L*x + 1.0L/62768369664000.0L) + 1.0L/2897001676800.0L) - 41.0L/6974263296000.0L) - 6551.0L/156920924160000.0L) + 9257.0L/10461394944000.0L) + 39229.0L/18830510899200.0L) - 2202649.0L/31384184832000.0L) - 4562501.0L/941525544960000.0L) + 1695689.0L/536481792000.0L) - 747037.0L/193133445120.0L) - 130451323.0L/1609445376000.0L) + 2397003491.0L/14711336640000.0L) + 17611561711.0L/15692092416000.0L) - 253178371.0L/90531302400.0L) - 3240137519.0L/435891456000.0L) + 8379111271.0L/408648240000.0L) + 1888949.0L/96864768.0L) - 20460919.0L/363242880.0L) - 15.0L/1144.0L) + 1.0L/26.0L); + poly_val[7] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((1.0L/439378587648000.0L)*x - 1.0L/31384184832000.0L) - 1.0L/1280987136000.0L) + 47.0L/3923023104000.0L) + 1087.0L/10461394944000.0L) - 9677.0L/5230697472000.0L) - 1427683.0L/219689293824000.0L) + 1192853.0L/7846046208000.0L) + 1397573.0L/8966909952000.0L) - 17379821.0L/2414168064000.0L) + 499573.0L/178827264000.0L) + 39926233.0L/201180672000.0L) - 5487440311.0L/21968929382400.0L) - 5986965079.0L/1961511552000.0L) + 3098878547.0L/560431872000.0L) + 11431199701.0L/490377888000.0L) - 16397134673.0L/326918592000.0L) - 1827209.0L/27243216.0L) + 19675939.0L/127135008.0L) + 20.0L/429.0L) - 10.0L/91.0L); + poly_val[8] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-1.0L/251073478656000.0L*x + 1.0L/19313344512000.0L) + 359.0L/251073478656000.0L) - 127.0L/6437781504000.0L) - 859.0L/4184557977600.0L) + 3329.0L/1072963584000.0L) + 266857.0L/17933819904000.0L) - 2527141.0L/9656672256000.0L) - 137469049.0L/251073478656000.0L) + 82887751.0L/6437781504000.0L) + 46068893.0L/6437781504000.0L) - 269158459.0L/715309056000.0L) + 18550682911.0L/125536739328000.0L) + 15347344853.0L/2414168064000.0L) - 49953999287.0L/7846046208000.0L) - 7656977201.0L/134120448000.0L) + 69369468211.0L/871782912000.0L) + 1650809.0L/7451136.0L) - 17523859.0L/48432384.0L) - 15.0L/88.0L) + 15.0L/52.0L); + poly_val[9] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((1.0L/173820100608000.0L)*x - 1.0L/14485008384000.0L) - 373.0L/173820100608000.0L) + 1.0L/37721376000.0L) + 9473.0L/28970016768000.0L) - 10177.0L/2414168064000.0L) - 2280613.0L/86910050304000.0L) + 1308227.0L/3621252096000.0L) + 41217097.0L/34764020121600.0L) - 87882491.0L/4828336128000.0L) - 1699387243.0L/57940033536000.0L) + 666523661.0L/1207084032000.0L) + 1995302329.0L/6207860736000.0L) - 8990599279.0L/905313024000.0L) + 8279706833.0L/10863756288000.0L) + 2517135701.0L/25147584000.0L) - 15120010711.0L/301771008000.0L) - 698249.0L/1397088.0L) + 6410659.0L/16765056.0L) + 10.0L/11.0L) - 5.0L/6.0L); + poly_val[10] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-1.0L/144850083840000.0L*x + 1.0L/13168189440000.0L) + 1.0L/376233984000.0L) - 11.0L/376233984000.0L) - 19.0L/44789760000.0L) + 209.0L/44789760000.0L) + 48127.0L/1316818944000.0L) - 529397.0L/1316818944000.0L) - 3491953.0L/1881169920000.0L) + 38411483.0L/1881169920000.0L) + 11240723.0L/197074944000.0L) - 11240723.0L/17915904000.0L) - 3451088009.0L/3292047360000.0L) + 37961968099.0L/3292047360000.0L) + 258720739.0L/23514624000.0L) - 2845928129.0L/23514624000.0L) - 27988159.0L/466560000.0L) + 307869749.0L/466560000.0L) + 178939.0L/1270080.0L) - 1968329.0L/1270080.0L) - 1.0L/11.0L) + 1; + poly_val[11] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((1.0L/144850083840000.0L)*x - 1.0L/14485008384000.0L) - 79.0L/28970016768000.0L) + 1.0L/37721376000.0L) + 403.0L/894136320000.0L) - 10177.0L/2414168064000.0L) - 590459.0L/14485008384000.0L) + 1308227.0L/3621252096000.0L) + 321209461.0L/144850083840000.0L) - 87882491.0L/4828336128000.0L) - 242186803.0L/3218890752000.0L) + 666523661.0L/1207084032000.0L) + 57957677929.0L/36212520960000.0L) - 8990599279.0L/905313024000.0L) - 37902695461.0L/1810626048000.0L) + 2517135701.0L/25147584000.0L) + 40256974711.0L/251475840000.0L) - 698249.0L/1397088.0L) - 8950819.0L/13970880.0L) + 10.0L/11.0L) + 1); + poly_val[12] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-1.0L/173820100608000.0L*x + 1.0L/19313344512000.0L) + 403.0L/173820100608000.0L) - 127.0L/6437781504000.0L) - 1423.0L/3621252096000.0L) + 3329.0L/1072963584000.0L) + 3186283.0L/86910050304000.0L) - 2527141.0L/9656672256000.0L) - 359857457.0L/173820100608000.0L) + 82887751.0L/6437781504000.0L) + 4245956653.0L/57940033536000.0L) - 269158459.0L/715309056000.0L) - 28265888347.0L/17382010060800.0L) + 15347344853.0L/2414168064000.0L) + 7429174631.0L/339492384000.0L) - 7656977201.0L/134120448000.0L) - 99084030211.0L/603542016000.0L) + 1650809.0L/7451136.0L) + 18793939.0L/33530112.0L) - 15.0L/88.0L) - 5.0L/12.0L); + poly_val[13] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((1.0L/251073478656000.0L)*x - 1.0L/31384184832000.0L) - 409.0L/251073478656000.0L) + 47.0L/3923023104000.0L) + 29.0L/103322419200.0L) - 9677.0L/5230697472000.0L) - 3343729.0L/125536739328000.0L) + 1192853.0L/7846046208000.0L) + 383394269.0L/251073478656000.0L) - 17379821.0L/2414168064000.0L) - 350882963.0L/6437781504000.0L) + 39926233.0L/201180672000.0L) + 75332922187.0L/62768369664000.0L) - 5986965079.0L/1961511552000.0L) - 243294646411.0L/15692092416000.0L) + 11431199701.0L/490377888000.0L) + 136706450711.0L/1307674368000.0L) - 1827209.0L/27243216.0L) - 20522659.0L/72648576.0L) + 20.0L/429.0L) + 5.0L/26.0L); + poly_val[14] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-1.0L/439378587648000.0L*x + 1.0L/62768369664000.0L) + 59.0L/62768369664000.0L) - 41.0L/6974263296000.0L) - 1709.0L/10461394944000.0L) + 9257.0L/10461394944000.0L) + 3424573.0L/219689293824000.0L) - 2202649.0L/31384184832000.0L) - 2241307.0L/2510734786560.0L) + 1695689.0L/536481792000.0L) + 50611753.0L/1609445376000.0L) - 130451323.0L/1609445376000.0L) - 36787589639.0L/54922323456000.0L) + 17611561711.0L/15692092416000.0L) + 7960300589.0L/980755776000.0L) - 3240137519.0L/435891456000.0L) - 2697612881.0L/54486432000.0L) + 1888949.0L/96864768.0L) + 21095959.0L/169513344.0L) - 15.0L/1144.0L) - 15.0L/182.0L); + poly_val[15] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((1.0L/941525544960000.0L)*x - 1.0L/156920924160000.0L) - 83.0L/188305108992000.0L) + 1.0L/435891456000.0L) + 12041.0L/156920924160000.0L) - 8741.0L/26153487360000.0L) - 2803.0L/384296140800.0L) + 199141.0L/7846046208000.0L) + 388364981.0L/941525544960000.0L) - 1447093.0L/1341204480000.0L) - 68416553.0L/4828336128000.0L) + 5165621.0L/201180672000.0L) + 68180850949.0L/235381386240000.0L) - 3194891431.0L/9807557760000.0L) - 39090845489.0L/11769069312000.0L) + 109639993.0L/54486432000.0L) + 31531616651.0L/1634592960000.0L) - 9587629.0L/1891890000.0L) - 106733999.0L/2270268000.0L) + 12.0L/3575.0L) + 2.0L/65.0L); + poly_val[16] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-1.0L/2510734786560000.0L*x + 1.0L/502146957312000.0L) + 83.0L/502146957312000.0L) - 349.0L/502146957312000.0L) - 461.0L/16094453760000.0L) + 8147.0L/83691159552000.0L) + 676043.0L/251073478656000.0L) - 1767109.0L/251073478656000.0L) - 374906921.0L/2510734786560000.0L) + 10896041.0L/38626689024000.0L) + 21386987.0L/4291854336000.0L) - 81091903.0L/12875563008000.0L) - 17623242779.0L/179338199040000.0L) + 4749355073.0L/62768369664000.0L) + 17084781061.0L/15692092416000.0L) - 14119093201.0L/31384184832000.0L) - 12393013247.0L/2011806720000.0L) + 1933049.0L/1743565824.0L) + 21475219.0L/1452971520.0L) - 5.0L/6864.0L) - 1.0L/104.0L); + poly_val[17] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((1.0L/8536498274304000.0L)*x - 1.0L/2134124568576000.0L) - 59.0L/1219499753472000.0L) + 1.0L/6351561216000.0L) + 241.0L/29035708416000.0L) - 1.0L/47443968000.0L) - 192749.0L/251073478656000.0L) + 45439.0L/31384184832000.0L) + 2986523.0L/71735279616000.0L) - 25313.0L/459841536000.0L) - 355567.0L/262766592000.0L) + 6427.0L/5474304000.0L) + 652948939.0L/25107347865600.0L) - 106900847.0L/7846046208000.0L) - 1264641073.0L/4483454976000.0L) + 2464771.0L/31135104000.0L) + 475474177.0L/302455296000.0L) - 809.0L/4200768.0L) - 3078277.0L/823350528.0L) + 15.0L/119119.0L) + 15.0L/6188.0L); + poly_val[18] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-1.0L/38414242234368000.0L*x + 1.0L/12804747411456000.0L) + 409.0L/38414242234368000.0L) - 107.0L/4268249137152000.0L) - 461.0L/256094948229120.0L) + 401.0L/125536739328000.0L) + 184577.0L/1129830653952000.0L) - 78713.0L/376610217984000.0L) - 19594717.0L/2259661307904000.0L) + 147211.0L/19313344512000.0L) + 15994499.0L/57940033536000.0L) - 3039931.0L/19313344512000.0L) - 2944400801.0L/564915326976000.0L) + 336140003.0L/188305108992000.0L) + 7876103813.0L/141228831744000.0L) - 53447083.0L/5230697472000.0L) - 20537220167.0L/66691392768000.0L) + 487121.0L/19760412672.0L) + 5398021.0L/7410154752.0L) - 5.0L/311168.0L) - 5.0L/10608.0L); + poly_val[19] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((1.0L/243290200817664000.0L)*x - 1.0L/121645100408832000.0L) - 31.0L/18714630832128000.0L) + 1.0L/400148356608000.0L) + 587.0L/2134124568576000.0L) - 19.0L/62768369664000.0L) - 9221.0L/376610217984000.0L) + 1789.0L/94152554496000.0L) + 192251.0L/150644087193600.0L) - 19447.0L/28970016768000.0L) - 772547.0L/19313344512000.0L) + 32699.0L/2414168064000.0L) + 140484011.0L/188305108992000.0L) - 3558293.0L/23538138624000.0L) - 28648583.0L/3621252096000.0L) + 5040143.0L/5884534656000.0L) + 965344741.0L/22230464256000.0L) - 11419.0L/5557616064.0L) - 2402251.0L/23465490048.0L) + 5.0L/3741309.0L) + 5.0L/75582.0L); + poly_val[20] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-1.0L/2432902008176640000.0L*x + 1.0L/2432902008176640000.0L) + 79.0L/486580401635328000.0L) - 1.0L/8536498274304000.0L) - 47.0L/1778437140480000.0L) + 17.0L/1255367393280000.0L) + 1741.0L/753220435968000.0L) - 619.0L/753220435968000.0L) - 894347.0L/7532204359680000.0L) + 5473.0L/193133445120000.0L) + 15791.0L/4291854336000.0L) - 21713.0L/38626689024000.0L) - 256228801.0L/3766102179840000.0L) + 5839219.0L/941525544960000.0L) + 421973.0L/588453465600.0L) - 364919.0L/10461394944000.0L) - 1743050333.0L/444609285120000.0L) + 514639.0L/6175128960000.0L) + 108194591.0L/11732745024000.0L) - 1.0L/18475600.0L) - 1.0L/167960.0L); + poly_val[21] = x*(pow(x, 2)*(pow(x, 2)*(pow(x, 2)*(pow(x, 2)*(pow(x, 2)*(pow(x, 2)*(pow(x, 2)*(pow(x, 2)*(pow(x, 2)*((1.0L/51090942171709440000.0L)*pow(x, 2) - 1.0L/132703745900544000.0L) + 1.0L/831477104640000.0L) - 149.0L/1437966286848000.0L) + 10811.0L/2054237552640000.0L) - 2677.0L/16554295296000.0L) + 10684483.0L/3594915717120000.0L) - 800993.0L/25677969408000.0L) + 1473061.0L/8661219840000.0L) - 178939.0L/447977537280.0L) + 1.0L/3879876.0L); + break; + case 1: + poly_val[0] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-1.0L/2432902008176640000.0L*x + 1.0L/121645100408832000.0L) + 1.0L/15365696893747200.0L) - 1.0L/474249904128000.0L) - 1.0L/1883051089920000.0L) + 17.0L/78460462080000.0L) - 503.0L/1054508610355200.0L) - 619.0L/53801459712000.0L) + 66779.0L/1738201006080000.0L) + 5473.0L/16094453760000.0L) - 14099.0L/10534551552000.0L) - 21713.0L/3862668902400.0L) + 209515049.0L/8787571752960000.0L) + 5839219.0L/117690693120000.0L) - 4353367.0L/20175547392000.0L) - 364919.0L/1743565824000.0L) + 238420331.0L/266765571072000.0L) + 514639.0L/1543782240000.0L) - 106924511.0L/82129215168000.0L) - 1.0L/9237800.0L) + 1.0L/3527160.0L; + poly_val[1] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((1.0L/115852476579840000.0L)*x - 1.0L/6082255020441600.0L) - 41.0L/25609494822912000.0L) + 1.0L/22230464256000.0L) + 59.0L/1255367393280000.0L) - 19.0L/3923023104000.0L) + 59.0L/7173527961600.0L) + 1789.0L/6725182464000.0L) - 455633.0L/579400335360000.0L) - 19447.0L/2414168064000.0L) + 102191.0L/3511517184000.0L) + 32699.0L/241416806400.0L) - 112017667.0L/209227898880000.0L) - 3558293.0L/2942267328000.0L) + 66422087.0L/13450364928000.0L) + 5040143.0L/980755776000.0L) - 306556247.0L/14820309504000.0L) - 11419.0L/1389404016.0L) + 790297.0L/26072766720.0L) + 10.0L/3741309.0L) - 1.0L/151164.0L; + poly_val[2] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-1.0L/11585247657984000.0L*x + 1.0L/640237370572800.0L) + 233.0L/12804747411456000.0L) - 107.0L/237124952064000.0L) - 37.0L/41845579776000.0L) + 401.0L/7846046208000.0L) - 1429.0L/25107347865600.0L) - 78713.0L/26900729856000.0L) + 426961.0L/57940033536000.0L) + 147211.0L/1609445376000.0L) - 173941.0L/585252864000.0L) - 3039931.0L/1931334451200.0L) + 3416723.0L/597793996800.0L) + 336140003.0L/23538138624000.0L) - 363897701.0L/6725182464000.0L) - 53447083.0L/871782912000.0L) + 1023220943.0L/4446092851200.0L) + 487121.0L/4940103168.0L) - 5318641.0L/15643660032.0L) - 5.0L/155584.0L) + 5.0L/67184.0L; + poly_val[3] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((1.0L/1829249630208000.0L)*x - 1.0L/106706228428800.0L) - 703.0L/5487748890624000.0L) + 1.0L/352864512000.0L) + 1.0L/118244966400.0L) - 1.0L/2965248000.0L) + 10993.0L/75322043596800.0L) + 45439.0L/2241727488000.0L) - 144587.0L/3547348992000.0L) - 25313.0L/38320128000.0L) + 201319.0L/107495424000.0L) + 6427.0L/547430400.0L) - 2409537919.0L/62768369664000.0L) - 106900847.0L/980755776000.0L) + 1087177561.0L/2882221056000.0L) + 2464771.0L/5189184000.0L) - 34334629.0L/20939212800.0L) - 809.0L/1050192.0L) + 3026437.0L/1235025792.0L) + 30.0L/119119.0L) - 5.0L/9282.0L; + poly_val[4] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-1.0L/406499917824000.0L*x + 1.0L/25107347865600.0L) + 5377.0L/8536498274304000.0L) - 349.0L/27897053184000.0L) - 359.0L/6974263296000.0L) + 8147.0L/5230697472000.0L) + 9059.0L/16738231910400.0L) - 1767109.0L/17933819904000.0L) + 1077763.0L/7725337804800.0L) + 10896041.0L/3218890752000.0L) - 9330647.0L/1170505728000.0L) - 81091903.0L/1287556300800.0L) + 5021638757.0L/27897053184000.0L) + 4749355073.0L/7846046208000.0L) - 524300089.0L/280215936000.0L) - 14119093201.0L/5230697472000.0L) + 149510878211.0L/17784371404800.0L) + 1933049.0L/435891456.0L) - 21051859.0L/1646701056.0L) - 5.0L/3432.0L) + 5.0L/1768.0L; + poly_val[5] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((1.0L/119558799360000.0L)*x - 1.0L/7846046208000.0L) - 1159.0L/502146957312000.0L) + 1.0L/24216192000.0L) + 92497.0L/418455797760000.0L) - 8741.0L/1634592960000.0L) - 109829.0L/16738231910400.0L) + 199141.0L/560431872000.0L) - 49745219.0L/193133445120000.0L) - 1447093.0L/111767040000.0L) + 5418317.0L/234101145600.0L) + 5165621.0L/20118067200.0L) - 42621719501.0L/69742632960000.0L) - 3194891431.0L/1225944720000.0L) + 31196765993.0L/4483454976000.0L) + 109639993.0L/9081072000.0L) - 28770379499.0L/871782912000.0L) - 9587629.0L/472972500.0L) + 104193839.0L/2018016000.0L) + 24.0L/3575.0L) - 3.0L/260.0L; + poly_val[6] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-1.0L/44834549760000.0L*x + 1.0L/3138418483200.0L) + 19.0L/2897001676800.0L) - 41.0L/387459072000.0L) - 111367.0L/156920924160000.0L) + 9257.0L/653837184000.0L) + 39229.0L/1255367393280.0L) - 2202649.0L/2241727488000.0L) - 4562501.0L/72425041920000.0L) + 1695689.0L/44706816000.0L) - 747037.0L/17557585920.0L) - 130451323.0L/160944537600.0L) + 2397003491.0L/1634592960000.0L) + 17611561711.0L/1961511552000.0L) - 253178371.0L/12933043200.0L) - 3240137519.0L/72648576000.0L) + 8379111271.0L/81729648000.0L) + 1888949.0L/24216192.0L) - 20460919.0L/121080960.0L) - 15.0L/572.0L) + 1.0L/26.0L; + poly_val[7] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((1.0L/20922789888000.0L)*x - 1.0L/1569209241600.0L) - 19.0L/1280987136000.0L) + 47.0L/217945728000.0L) + 18479.0L/10461394944000.0L) - 9677.0L/326918592000.0L) - 1427683.0L/14645952921600.0L) + 1192853.0L/560431872000.0L) + 1397573.0L/689762304000.0L) - 17379821.0L/201180672000.0L) + 499573.0L/16257024000.0L) + 39926233.0L/20118067200.0L) - 5487440311.0L/2440992153600.0L) - 5986965079.0L/245188944000.0L) + 3098878547.0L/80061696000.0L) + 11431199701.0L/81729648000.0L) - 16397134673.0L/65383718400.0L) - 1827209.0L/6810804.0L) + 19675939.0L/42378336.0L) + 40.0L/429.0L) - 10.0L/91.0L; + poly_val[8] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-1.0L/11955879936000.0L*x + 1.0L/965667225600.0L) + 6821.0L/251073478656000.0L) - 127.0L/357654528000.0L) - 14603.0L/4184557977600.0L) + 3329.0L/67060224000.0L) + 266857.0L/1195587993600.0L) - 2527141.0L/689762304000.0L) - 137469049.0L/19313344512000.0L) + 82887751.0L/536481792000.0L) + 46068893.0L/585252864000.0L) - 269158459.0L/71530905600.0L) + 18550682911.0L/13948526592000.0L) + 15347344853.0L/301771008000.0L) - 49953999287.0L/1120863744000.0L) - 7656977201.0L/22353408000.0L) + 69369468211.0L/174356582400.0L) + 1650809.0L/1862784.0L) - 17523859.0L/16144128.0L) - 15.0L/44.0L) + 15.0L/52.0L; + poly_val[9] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((1.0L/8277147648000.0L)*x - 1.0L/724250419200.0L) - 7087.0L/173820100608000.0L) + 1.0L/2095632000.0L) + 161041.0L/28970016768000.0L) - 10177.0L/150885504000.0L) - 2280613.0L/5794003353600.0L) + 1308227.0L/258660864000.0L) + 535822261.0L/34764020121600.0L) - 87882491.0L/402361344000.0L) - 1699387243.0L/5267275776000.0L) + 666523661.0L/120708403200.0L) + 1995302329.0L/689762304000.0L) - 8990599279.0L/113164128000.0L) + 8279706833.0L/1551965184000.0L) + 2517135701.0L/4191264000.0L) - 15120010711.0L/60354201600.0L) - 698249.0L/349272.0L) + 6410659.0L/5588352.0L) + 20.0L/11.0L) - 5.0L/6.0L; + poly_val[10] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-1.0L/6897623040000.0L*x + 1.0L/658409472000.0L) + 19.0L/376233984000.0L) - 11.0L/20901888000.0L) - 323.0L/44789760000.0L) + 209.0L/2799360000.0L) + 48127.0L/87787929600.0L) - 529397.0L/94058496000.0L) - 45395389.0L/1881169920000.0L) + 38411483.0L/156764160000.0L) + 11240723.0L/17915904000.0L) - 11240723.0L/1791590400.0L) - 3451088009.0L/365783040000.0L) + 37961968099.0L/411505920000.0L) + 258720739.0L/3359232000.0L) - 2845928129.0L/3919104000.0L) - 27988159.0L/93312000.0L) + 307869749.0L/116640000.0L) + 178939.0L/423360.0L) - 1968329.0L/635040.0L) - 1.0L/11.0L; + poly_val[11] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((1.0L/6897623040000.0L)*x - 1.0L/724250419200.0L) - 1501.0L/28970016768000.0L) + 1.0L/2095632000.0L) + 6851.0L/894136320000.0L) - 10177.0L/150885504000.0L) - 590459.0L/965667225600.0L) + 1308227.0L/258660864000.0L) + 4175722993.0L/144850083840000.0L) - 87882491.0L/402361344000.0L) - 242186803.0L/292626432000.0L) + 666523661.0L/120708403200.0L) + 57957677929.0L/4023613440000.0L) - 8990599279.0L/113164128000.0L) - 37902695461.0L/258660864000.0L) + 2517135701.0L/4191264000.0L) + 40256974711.0L/50295168000.0L) - 698249.0L/349272.0L) - 8950819.0L/4656960.0L) + 20.0L/11.0L) + 1; + poly_val[12] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-1.0L/8277147648000.0L*x + 1.0L/965667225600.0L) + 7657.0L/173820100608000.0L) - 127.0L/357654528000.0L) - 24191.0L/3621252096000.0L) + 3329.0L/67060224000.0L) + 3186283.0L/5794003353600.0L) - 2527141.0L/689762304000.0L) - 4678146941.0L/173820100608000.0L) + 82887751.0L/536481792000.0L) + 4245956653.0L/5267275776000.0L) - 269158459.0L/71530905600.0L) - 28265888347.0L/1931334451200.0L) + 15347344853.0L/301771008000.0L) + 7429174631.0L/48498912000.0L) - 7656977201.0L/22353408000.0L) - 99084030211.0L/120708403200.0L) + 1650809.0L/1862784.0L) + 18793939.0L/11176704.0L) - 15.0L/44.0L) - 5.0L/12.0L; + poly_val[13] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((1.0L/11955879936000.0L)*x - 1.0L/1569209241600.0L) - 7771.0L/251073478656000.0L) + 47.0L/217945728000.0L) + 493.0L/103322419200.0L) - 9677.0L/326918592000.0L) - 3343729.0L/8369115955200.0L) + 1192853.0L/560431872000.0L) + 383394269.0L/19313344512000.0L) - 17379821.0L/201180672000.0L) - 350882963.0L/585252864000.0L) + 39926233.0L/20118067200.0L) + 75332922187.0L/6974263296000.0L) - 5986965079.0L/245188944000.0L) - 243294646411.0L/2241727488000.0L) + 11431199701.0L/81729648000.0L) + 136706450711.0L/261534873600.0L) - 1827209.0L/6810804.0L) - 20522659.0L/24216192.0L) + 40.0L/429.0L) + 5.0L/26.0L; + poly_val[14] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-1.0L/20922789888000.0L*x + 1.0L/3138418483200.0L) + 1121.0L/62768369664000.0L) - 41.0L/387459072000.0L) - 29053.0L/10461394944000.0L) + 9257.0L/653837184000.0L) + 3424573.0L/14645952921600.0L) - 2202649.0L/2241727488000.0L) - 2241307.0L/193133445120.0L) + 1695689.0L/44706816000.0L) + 50611753.0L/146313216000.0L) - 130451323.0L/160944537600.0L) - 36787589639.0L/6102480384000.0L) + 17611561711.0L/1961511552000.0L) + 7960300589.0L/140107968000.0L) - 3240137519.0L/72648576000.0L) - 2697612881.0L/10897286400.0L) + 1888949.0L/24216192.0L) + 21095959.0L/56504448.0L) - 15.0L/572.0L) - 15.0L/182.0L; + poly_val[15] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((1.0L/44834549760000.0L)*x - 1.0L/7846046208000.0L) - 1577.0L/188305108992000.0L) + 1.0L/24216192000.0L) + 204697.0L/156920924160000.0L) - 8741.0L/1634592960000.0L) - 2803.0L/25619742720.0L) + 199141.0L/560431872000.0L) + 388364981.0L/72425041920000.0L) - 1447093.0L/111767040000.0L) - 68416553.0L/438939648000.0L) + 5165621.0L/20118067200.0L) + 68180850949.0L/26153487360000.0L) - 3194891431.0L/1225944720000.0L) - 39090845489.0L/1681295616000.0L) + 109639993.0L/9081072000.0L) + 31531616651.0L/326918592000.0L) - 9587629.0L/472972500.0L) - 106733999.0L/756756000.0L) + 24.0L/3575.0L) + 2.0L/65.0L; + poly_val[16] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-1.0L/119558799360000.0L*x + 1.0L/25107347865600.0L) + 1577.0L/502146957312000.0L) - 349.0L/27897053184000.0L) - 7837.0L/16094453760000.0L) + 8147.0L/5230697472000.0L) + 676043.0L/16738231910400.0L) - 1767109.0L/17933819904000.0L) - 374906921.0L/193133445120000.0L) + 10896041.0L/3218890752000.0L) + 21386987.0L/390168576000.0L) - 81091903.0L/1287556300800.0L) - 17623242779.0L/19926466560000.0L) + 4749355073.0L/7846046208000.0L) + 17084781061.0L/2241727488000.0L) - 14119093201.0L/5230697472000.0L) - 12393013247.0L/402361344000.0L) + 1933049.0L/435891456.0L) + 21475219.0L/484323840.0L) - 5.0L/3432.0L) - 1.0L/104.0L; + poly_val[17] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((1.0L/406499917824000.0L)*x - 1.0L/106706228428800.0L) - 1121.0L/1219499753472000.0L) + 1.0L/352864512000.0L) + 241.0L/1707982848000.0L) - 1.0L/2965248000.0L) - 192749.0L/16738231910400.0L) + 45439.0L/2241727488000.0L) + 2986523.0L/5518098432000.0L) - 25313.0L/38320128000.0L) - 355567.0L/23887872000.0L) + 6427.0L/547430400.0L) + 652948939.0L/2789705318400.0L) - 106900847.0L/980755776000.0L) - 1264641073.0L/640493568000.0L) + 2464771.0L/5189184000.0L) + 475474177.0L/60491059200.0L) - 809.0L/1050192.0L) - 3078277.0L/274450176.0L) + 30.0L/119119.0L) + 15.0L/6188.0L; + poly_val[18] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-1.0L/1829249630208000.0L*x + 1.0L/640237370572800.0L) + 7771.0L/38414242234368000.0L) - 107.0L/237124952064000.0L) - 461.0L/15064408719360.0L) + 401.0L/7846046208000.0L) + 184577.0L/75322043596800.0L) - 78713.0L/26900729856000.0L) - 19594717.0L/173820100608000.0L) + 147211.0L/1609445376000.0L) + 15994499.0L/5267275776000.0L) - 3039931.0L/1931334451200.0L) - 2944400801.0L/62768369664000.0L) + 336140003.0L/23538138624000.0L) + 7876103813.0L/20175547392000.0L) - 53447083.0L/871782912000.0L) - 20537220167.0L/13338278553600.0L) + 487121.0L/4940103168.0L) + 5398021.0L/2470051584.0L) - 5.0L/155584.0L) - 5.0L/10608.0L; + poly_val[19] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((1.0L/11585247657984000.0L)*x - 1.0L/6082255020441600.0L) - 31.0L/984980570112000.0L) + 1.0L/22230464256000.0L) + 587.0L/125536739328000.0L) - 19.0L/3923023104000.0L) - 9221.0L/25107347865600.0L) + 1789.0L/6725182464000.0L) + 192251.0L/11588006707200.0L) - 19447.0L/2414168064000.0L) - 772547.0L/1755758592000.0L) + 32699.0L/241416806400.0L) + 140484011.0L/20922789888000.0L) - 3558293.0L/2942267328000.0L) - 28648583.0L/517321728000.0L) + 5040143.0L/980755776000.0L) + 965344741.0L/4446092851200.0L) - 11419.0L/1389404016.0L) - 2402251.0L/7821830016.0L) + 10.0L/3741309.0L) + 5.0L/75582.0L; + poly_val[20] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-1.0L/115852476579840000.0L*x + 1.0L/121645100408832000.0L) + 79.0L/25609494822912000.0L) - 1.0L/474249904128000.0L) - 47.0L/104613949440000.0L) + 17.0L/78460462080000.0L) + 1741.0L/50214695731200.0L) - 619.0L/53801459712000.0L) - 894347.0L/579400335360000.0L) + 5473.0L/16094453760000.0L) + 15791.0L/390168576000.0L) - 21713.0L/3862668902400.0L) - 256228801.0L/418455797760000.0L) + 5839219.0L/117690693120000.0L) + 421973.0L/84064780800.0L) - 364919.0L/1743565824000.0L) - 1743050333.0L/88921857024000.0L) + 514639.0L/1543782240000.0L) + 108194591.0L/3910915008000.0L) - 1.0L/9237800.0L) - 1.0L/167960.0L; + poly_val[21] = pow(x, 2)*(pow(x, 2)*(pow(x, 2)*(pow(x, 2)*(pow(x, 2)*(pow(x, 2)*(pow(x, 2)*(pow(x, 2)*(pow(x, 2)*((1.0L/2432902008176640000.0L)*pow(x, 2) - 1.0L/6984407678976000.0L) + 1.0L/48910417920000.0L) - 149.0L/95864419123200.0L) + 10811.0L/158018273280000.0L) - 2677.0L/1504935936000.0L) + 10684483.0L/399435079680000.0L) - 800993.0L/3668281344000.0L) + 1473061.0L/1732243968000.0L) - 178939.0L/149325845760.0L) + 1.0L/3879876.0L; + break; + case 2: + poly_val[0] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-1.0L/121645100408832000.0L*x + 1.0L/6402373705728000.0L) + 1.0L/853649827430400.0L) - 1.0L/27897053184000.0L) - 1.0L/117690693120000.0L) + 17.0L/5230697472000.0L) - 503.0L/75322043596800.0L) - 619.0L/4138573824000.0L) + 66779.0L/144850083840000.0L) + 5473.0L/1463132160000.0L) - 14099.0L/1053455155200.0L) - 21713.0L/429185433600.0L) + 209515049.0L/1098446469120000.0L) + 5839219.0L/16812956160000.0L) - 4353367.0L/3362591232000.0L) - 364919.0L/348713164800.0L) + 238420331.0L/66691392768000.0L) + 514639.0L/514594080000.0L) - 106924511.0L/41064607584000.0L) - 1.0L/9237800.0L; + poly_val[1] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((1.0L/5792623828992000.0L)*x - 1.0L/320118685286400.0L) - 41.0L/1422749712384000.0L) + 1.0L/1307674368000.0L) + 59.0L/78460462080000.0L) - 19.0L/261534873600.0L) + 59.0L/512394854400.0L) + 1789.0L/517321728000.0L) - 455633.0L/48283361280000.0L) - 19447.0L/219469824000.0L) + 102191.0L/351151718400.0L) + 32699.0L/26824089600.0L) - 112017667.0L/26153487360000.0L) - 3558293.0L/420323904000.0L) + 66422087.0L/2241727488000.0L) + 5040143.0L/196151155200.0L) - 306556247.0L/3705077376000.0L) - 11419.0L/463134672.0L) + 790297.0L/13036383360.0L) + 10.0L/3741309.0L; + poly_val[2] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-1.0L/579262382899200.0L*x + 19.0L/640237370572800.0L) + 233.0L/711374856192000.0L) - 107.0L/13948526592000.0L) - 37.0L/2615348736000.0L) + 401.0L/523069747200.0L) - 1429.0L/1793381990400.0L) - 78713.0L/2069286912000.0L) + 426961.0L/4828336128000.0L) + 147211.0L/146313216000.0L) - 173941.0L/58525286400.0L) - 3039931.0L/214592716800.0L) + 3416723.0L/74724249600.0L) + 336140003.0L/3362591232000.0L) - 363897701.0L/1120863744000.0L) - 53447083.0L/174356582400.0L) + 1023220943.0L/1111523212800.0L) + 487121.0L/1646701056.0L) - 5318641.0L/7821830016.0L) - 5.0L/155584.0L; + poly_val[3] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((1.0L/91462481510400.0L)*x - 19.0L/106706228428800.0L) - 703.0L/304874938368000.0L) + 1.0L/20756736000.0L) + 1.0L/7390310400.0L) - 1.0L/197683200.0L) + 10993.0L/5380145971200.0L) + 45439.0L/172440576000.0L) - 144587.0L/295612416000.0L) - 25313.0L/3483648000.0L) + 201319.0L/10749542400.0L) + 6427.0L/60825600.0L) - 2409537919.0L/7846046208000.0L) - 106900847.0L/140107968000.0L) + 1087177561.0L/480370176000.0L) + 2464771.0L/1037836800.0L) - 34334629.0L/5234803200.0L) - 809.0L/350064.0L) + 3026437.0L/617512896.0L) + 30.0L/119119.0L; + poly_val[4] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-1.0L/20324995891200.0L*x + 19.0L/25107347865600.0L) + 5377.0L/474249904128000.0L) - 5933.0L/27897053184000.0L) - 359.0L/435891456000.0L) + 8147.0L/348713164800.0L) + 9059.0L/1195587993600.0L) - 1767109.0L/1379524608000.0L) + 1077763.0L/643778150400.0L) + 10896041.0L/292626432000.0L) - 9330647.0L/117050572800.0L) - 81091903.0L/143061811200.0L) + 5021638757.0L/3487131648000.0L) + 4749355073.0L/1120863744000.0L) - 524300089.0L/46702656000.0L) - 14119093201.0L/1046139494400.0L) + 149510878211.0L/4446092851200.0L) + 1933049.0L/145297152.0L) - 21051859.0L/823350528.0L) - 5.0L/3432.0L; + poly_val[5] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((1.0L/5977939968000.0L)*x - 19.0L/7846046208000.0L) - 1159.0L/27897053184000.0L) + 17.0L/24216192000.0L) + 92497.0L/26153487360000.0L) - 8741.0L/108972864000.0L) - 109829.0L/1195587993600.0L) + 199141.0L/43110144000.0L) - 49745219.0L/16094453760000.0L) - 1447093.0L/10160640000.0L) + 5418317.0L/23410114560.0L) + 5165621.0L/2235340800.0L) - 42621719501.0L/8717829120000.0L) - 3194891431.0L/175134960000.0L) + 31196765993.0L/747242496000.0L) + 109639993.0L/1816214400.0L) - 28770379499.0L/217945728000.0L) - 9587629.0L/157657500.0L) + 104193839.0L/1009008000.0L) + 24.0L/3575.0L; + poly_val[6] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-1.0L/2241727488000.0L*x + 19.0L/3138418483200.0L) + 19.0L/160944537600.0L) - 697.0L/387459072000.0L) - 111367.0L/9807557760000.0L) + 9257.0L/43589145600.0L) + 39229.0L/89669099520.0L) - 2202649.0L/172440576000.0L) - 4562501.0L/6035420160000.0L) + 1695689.0L/4064256000.0L) - 747037.0L/1755758592.0L) - 130451323.0L/17882726400.0L) + 2397003491.0L/204324120000.0L) + 17611561711.0L/280215936000.0L) - 253178371.0L/2155507200.0L) - 3240137519.0L/14529715200.0L) + 8379111271.0L/20432412000.0L) + 1888949.0L/8072064.0L) - 20460919.0L/60540480.0L) - 15.0L/572.0L; + poly_val[7] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((1.0L/1046139494400.0L)*x - 19.0L/1569209241600.0L) - 19.0L/71165952000.0L) + 799.0L/217945728000.0L) + 18479.0L/653837184000.0L) - 9677.0L/21794572800.0L) - 1427683.0L/1046139494400.0L) + 1192853.0L/43110144000.0L) + 1397573.0L/57480192000.0L) - 17379821.0L/18289152000.0L) + 499573.0L/1625702400.0L) + 39926233.0L/2235340800.0L) - 5487440311.0L/305124019200.0L) - 5986965079.0L/35026992000.0L) + 3098878547.0L/13343616000.0L) + 11431199701.0L/16345929600.0L) - 16397134673.0L/16345929600.0L) - 1827209.0L/2270268.0L) + 19675939.0L/21189168.0L) + 40.0L/429.0L; + poly_val[8] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-1.0L/597793996800.0L*x + 19.0L/965667225600.0L) + 6821.0L/13948526592000.0L) - 2159.0L/357654528000.0L) - 14603.0L/261534873600.0L) + 3329.0L/4470681600.0L) + 266857.0L/85399142400.0L) - 32852833.0L/689762304000.0L) - 137469049.0L/1609445376000.0L) + 82887751.0L/48771072000.0L) + 46068893.0L/58525286400.0L) - 269158459.0L/7947878400.0L) + 18550682911.0L/1743565824000.0L) + 15347344853.0L/43110144000.0L) - 49953999287.0L/186810624000.0L) - 7656977201.0L/4470681600.0L) + 69369468211.0L/43589145600.0L) + 1650809.0L/620928.0L) - 17523859.0L/8072064.0L) - 15.0L/44.0L; + poly_val[9] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((1.0L/413857382400.0L)*x - 19.0L/724250419200.0L) - 7087.0L/9656672256000.0L) + 17.0L/2095632000.0L) + 161041.0L/1810626048000.0L) - 10177.0L/10059033600.0L) - 2280613.0L/413857382400.0L) + 17006951.0L/258660864000.0L) + 535822261.0L/2897001676800.0L) - 87882491.0L/36578304000.0L) - 1699387243.0L/526727577600.0L) + 666523661.0L/13412044800.0L) + 1995302329.0L/86220288000.0L) - 8990599279.0L/16166304000.0L) + 8279706833.0L/258660864000.0L) + 2517135701.0L/838252800.0L) - 15120010711.0L/15088550400.0L) - 698249.0L/116424.0L) + 6410659.0L/2794176.0L) + 20.0L/11.0L; + poly_val[10] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-1.0L/344881152000.0L*x + 19.0L/658409472000.0L) + 19.0L/20901888000.0L) - 187.0L/20901888000.0L) - 323.0L/2799360000.0L) + 209.0L/186624000.0L) + 48127.0L/6270566400.0L) - 6882161.0L/94058496000.0L) - 45395389.0L/156764160000.0L) + 422526313.0L/156764160000.0L) + 11240723.0L/1791590400.0L) - 11240723.0L/199065600.0L) - 3451088009.0L/45722880000.0L) + 37961968099.0L/58786560000.0L) + 258720739.0L/559872000.0L) - 2845928129.0L/783820800.0L) - 27988159.0L/23328000.0L) + 307869749.0L/38880000.0L) + 178939.0L/211680.0L) - 1968329.0L/635040.0L; + poly_val[11] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((1.0L/344881152000.0L)*x - 19.0L/724250419200.0L) - 1501.0L/1609445376000.0L) + 17.0L/2095632000.0L) + 6851.0L/55883520000.0L) - 10177.0L/10059033600.0L) - 590459.0L/68976230400.0L) + 17006951.0L/258660864000.0L) + 4175722993.0L/12070840320000.0L) - 87882491.0L/36578304000.0L) - 242186803.0L/29262643200.0L) + 666523661.0L/13412044800.0L) + 57957677929.0L/502951680000.0L) - 8990599279.0L/16166304000.0L) - 37902695461.0L/43110144000.0L) + 2517135701.0L/838252800.0L) + 40256974711.0L/12573792000.0L) - 698249.0L/116424.0L) - 8950819.0L/2328480.0L) + 20.0L/11.0L; + poly_val[12] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-1.0L/413857382400.0L*x + 19.0L/965667225600.0L) + 7657.0L/9656672256000.0L) - 2159.0L/357654528000.0L) - 24191.0L/226328256000.0L) + 3329.0L/4470681600.0L) + 3186283.0L/413857382400.0L) - 32852833.0L/689762304000.0L) - 4678146941.0L/14485008384000.0L) + 82887751.0L/48771072000.0L) + 4245956653.0L/526727577600.0L) - 269158459.0L/7947878400.0L) - 28265888347.0L/241416806400.0L) + 15347344853.0L/43110144000.0L) + 7429174631.0L/8083152000.0L) - 7656977201.0L/4470681600.0L) - 99084030211.0L/30177100800.0L) + 1650809.0L/620928.0L) + 18793939.0L/5588352.0L) - 15.0L/44.0L; + poly_val[13] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((1.0L/597793996800.0L)*x - 19.0L/1569209241600.0L) - 7771.0L/13948526592000.0L) + 799.0L/217945728000.0L) + 493.0L/6457651200.0L) - 9677.0L/21794572800.0L) - 3343729.0L/597793996800.0L) + 1192853.0L/43110144000.0L) + 383394269.0L/1609445376000.0L) - 17379821.0L/18289152000.0L) - 350882963.0L/58525286400.0L) + 39926233.0L/2235340800.0L) + 75332922187.0L/871782912000.0L) - 5986965079.0L/35026992000.0L) - 243294646411.0L/373621248000.0L) + 11431199701.0L/16345929600.0L) + 136706450711.0L/65383718400.0L) - 1827209.0L/2270268.0L) - 20522659.0L/12108096.0L) + 40.0L/429.0L; + poly_val[14] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-1.0L/1046139494400.0L*x + 19.0L/3138418483200.0L) + 1121.0L/3487131648000.0L) - 697.0L/387459072000.0L) - 29053.0L/653837184000.0L) + 9257.0L/43589145600.0L) + 3424573.0L/1046139494400.0L) - 2202649.0L/172440576000.0L) - 2241307.0L/16094453760.0L) + 1695689.0L/4064256000.0L) + 50611753.0L/14631321600.0L) - 130451323.0L/17882726400.0L) - 36787589639.0L/762810048000.0L) + 17611561711.0L/280215936000.0L) + 7960300589.0L/23351328000.0L) - 3240137519.0L/14529715200.0L) - 2697612881.0L/2724321600.0L) + 1888949.0L/8072064.0L) + 21095959.0L/28252224.0L) - 15.0L/572.0L; + poly_val[15] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((1.0L/2241727488000.0L)*x - 19.0L/7846046208000.0L) - 1577.0L/10461394944000.0L) + 17.0L/24216192000.0L) + 204697.0L/9807557760000.0L) - 8741.0L/108972864000.0L) - 19621.0L/12809871360.0L) + 199141.0L/43110144000.0L) + 388364981.0L/6035420160000.0L) - 1447093.0L/10160640000.0L) - 68416553.0L/43893964800.0L) + 5165621.0L/2235340800.0L) + 68180850949.0L/3269185920000.0L) - 3194891431.0L/175134960000.0L) - 39090845489.0L/280215936000.0L) + 109639993.0L/1816214400.0L) + 31531616651.0L/81729648000.0L) - 9587629.0L/157657500.0L) - 106733999.0L/378378000.0L) + 24.0L/3575.0L; + poly_val[16] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-1.0L/5977939968000.0L*x + 19.0L/25107347865600.0L) + 1577.0L/27897053184000.0L) - 5933.0L/27897053184000.0L) - 7837.0L/1005903360000.0L) + 8147.0L/348713164800.0L) + 676043.0L/1195587993600.0L) - 1767109.0L/1379524608000.0L) - 374906921.0L/16094453760000.0L) + 10896041.0L/292626432000.0L) + 21386987.0L/39016857600.0L) - 81091903.0L/143061811200.0L) - 17623242779.0L/2490808320000.0L) + 4749355073.0L/1120863744000.0L) + 17084781061.0L/373621248000.0L) - 14119093201.0L/1046139494400.0L) - 12393013247.0L/100590336000.0L) + 1933049.0L/145297152.0L) + 21475219.0L/242161920.0L) - 5.0L/3432.0L; + poly_val[17] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((1.0L/20324995891200.0L)*x - 19.0L/106706228428800.0L) - 1121.0L/67749986304000.0L) + 1.0L/20756736000.0L) + 241.0L/106748928000.0L) - 1.0L/197683200.0L) - 192749.0L/1195587993600.0L) + 45439.0L/172440576000.0L) + 2986523.0L/459841536000.0L) - 25313.0L/3483648000.0L) - 355567.0L/2388787200.0L) + 6427.0L/60825600.0L) + 652948939.0L/348713164800.0L) - 106900847.0L/140107968000.0L) - 1264641073.0L/106748928000.0L) + 2464771.0L/1037836800.0L) + 475474177.0L/15122764800.0L) - 809.0L/350064.0L) - 3078277.0L/137225088.0L) + 30.0L/119119.0L; + poly_val[18] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-1.0L/91462481510400.0L*x + 19.0L/640237370572800.0L) + 7771.0L/2134124568576000.0L) - 107.0L/13948526592000.0L) - 461.0L/941525544960.0L) + 401.0L/523069747200.0L) + 184577.0L/5380145971200.0L) - 78713.0L/2069286912000.0L) - 19594717.0L/14485008384000.0L) + 147211.0L/146313216000.0L) + 15994499.0L/526727577600.0L) - 3039931.0L/214592716800.0L) - 2944400801.0L/7846046208000.0L) + 336140003.0L/3362591232000.0L) + 7876103813.0L/3362591232000.0L) - 53447083.0L/174356582400.0L) - 20537220167.0L/3334569638400.0L) + 487121.0L/1646701056.0L) + 5398021.0L/1235025792.0L) - 5.0L/155584.0L; + poly_val[19] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*((1.0L/579262382899200.0L)*x - 1.0L/320118685286400.0L) - 31.0L/54721142784000.0L) + 1.0L/1307674368000.0L) + 587.0L/7846046208000.0L) - 19.0L/261534873600.0L) - 9221.0L/1793381990400.0L) + 1789.0L/517321728000.0L) + 192251.0L/965667225600.0L) - 19447.0L/219469824000.0L) - 772547.0L/175575859200.0L) + 32699.0L/26824089600.0L) + 140484011.0L/2615348736000.0L) - 3558293.0L/420323904000.0L) - 28648583.0L/86220288000.0L) + 5040143.0L/196151155200.0L) + 965344741.0L/1111523212800.0L) - 11419.0L/463134672.0L) - 2402251.0L/3910915008.0L) + 10.0L/3741309.0L; + poly_val[20] = x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(x*(-1.0L/5792623828992000.0L*x + 1.0L/6402373705728000.0L) + 79.0L/1422749712384000.0L) - 1.0L/27897053184000.0L) - 47.0L/6538371840000.0L) + 17.0L/5230697472000.0L) + 1741.0L/3586763980800.0L) - 619.0L/4138573824000.0L) - 894347.0L/48283361280000.0L) + 5473.0L/1463132160000.0L) + 15791.0L/39016857600.0L) - 21713.0L/429185433600.0L) - 256228801.0L/52306974720000.0L) + 5839219.0L/16812956160000.0L) + 421973.0L/14010796800.0L) - 364919.0L/348713164800.0L) - 1743050333.0L/22230464256000.0L) + 514639.0L/514594080000.0L) + 108194591.0L/1955457504000.0L) - 1.0L/9237800.0L; + poly_val[21] = x*(pow(x, 2)*(pow(x, 2)*(pow(x, 2)*(pow(x, 2)*(pow(x, 2)*(pow(x, 2)*(pow(x, 2)*(pow(x, 2)*((1.0L/121645100408832000.0L)*pow(x, 2) - 1.0L/388022648832000.0L) + 1.0L/3056901120000.0L) - 149.0L/6847458508800.0L) + 10811.0L/13168189440000.0L) - 2677.0L/150493593600.0L) + 10684483.0L/49929384960000.0L) - 800993.0L/611380224000.0L) + 1473061.0L/433060992000.0L) - 178939.0L/74662922880.0L); + break; + } } diff --git a/bfps/cpp/Lagrange_polys.hpp b/bfps/cpp/Lagrange_polys.hpp index 401b9b7f076eeaa4ac44c8190ebaff947cd62247..9f4742a3303d9586cc7832e568162465767a4922 100644 --- a/bfps/cpp/Lagrange_polys.hpp +++ b/bfps/cpp/Lagrange_polys.hpp @@ -36,6 +36,8 @@ void beta_Lagrange_n5(const int deriv, const double x, double *__restrict__ poly void beta_Lagrange_n6(const int deriv, const double x, double *__restrict__ poly_val); void beta_Lagrange_n7(const int deriv, const double x, double *__restrict__ poly_val); void beta_Lagrange_n8(const int deriv, const double x, double *__restrict__ poly_val); +void beta_Lagrange_n9(const int deriv, const double x, double *__restrict__ poly_val); +void beta_Lagrange_n10(const int deriv, const double x, double *__restrict__ poly_val); #endif//LAGRANGE_POLYS diff --git a/bfps/cpp/base.hpp b/bfps/cpp/base.hpp index ee2d74d5b751451e9bb34600a0e2b09891a73d1f..61029c2641b37aab5e47ebc4bb511f38b893eeb1 100644 --- a/bfps/cpp/base.hpp +++ b/bfps/cpp/base.hpp @@ -24,11 +24,12 @@ +#include <cassert> #include <mpi.h> #include <stdarg.h> #include <iostream> #include <typeinfo> -#include "io_tools.hpp" +#include "hdf5_tools.hpp" #ifndef BASE @@ -42,6 +43,9 @@ inline int MOD(int a, int n) return ((a%n) + n) % n; } +///////////////////////////////////////////////////////////// +///////////////////////////////////////////////////////////// + #ifdef OMPI_MPI_H #define BFPS_MPICXX_DOUBLE_COMPLEX MPI_DOUBLE_COMPLEX @@ -52,6 +56,37 @@ inline int MOD(int a, int n) #endif//OMPI_MPI_H +template <class realtype> +class mpi_real_type; + +template <> +class mpi_real_type<float> +{ +public: + static constexpr MPI_Datatype real(){ + return MPI_FLOAT; + } + + static constexpr MPI_Datatype complex(){ + return MPI_COMPLEX; + } +}; + +template <> +class mpi_real_type<double> +{ +public: + static constexpr MPI_Datatype real(){ + return MPI_DOUBLE; + } + + static constexpr MPI_Datatype complex(){ + return BFPS_MPICXX_DOUBLE_COMPLEX; + } +}; + +///////////////////////////////////////////////////////////// +///////////////////////////////////////////////////////////// #ifndef NDEBUG @@ -99,5 +134,7 @@ inline void DEBUG_MSG_WAIT(MPI_Comm communicator, const char * format, ...) #endif//NDEBUG +#define variable_used_only_in_assert(x) ((void)(x)) + #endif//BASE diff --git a/bfps/cpp/bfps_timer.hpp b/bfps/cpp/bfps_timer.hpp new file mode 100644 index 0000000000000000000000000000000000000000..b299dc115555bd3952e22ce5984f27eb9bfb1914 --- /dev/null +++ b/bfps/cpp/bfps_timer.hpp @@ -0,0 +1,104 @@ +/********************************************************************** +* * +* Copyright 2015 Max Planck Institute * +* for Dynamics and Self-Organization * +* * +* This file is part of bfps. * +* * +* bfps is free software: you can redistribute it and/or modify * +* it under the terms of the GNU General Public License as published * +* by the Free Software Foundation, either version 3 of the License, * +* or (at your option) any later version. * +* * +* bfps is distributed in the hope that it will be useful, * +* but WITHOUT ANY WARRANTY; without even the implied warranty of * +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * +* GNU General Public License for more details. * +* * +* You should have received a copy of the GNU General Public License * +* along with bfps. If not, see <http://www.gnu.org/licenses/> * +* * +* Contact: Cristian.Lalescu@ds.mpg.de * +* * +**********************************************************************/ +#ifndef BFPS_TIMER_HPP +#define BFPS_TIMER_HPP + +#include <chrono> + +/** + * @file + * + * Each section to measure should be embraced by start/stop. + * The measured time is given by "getElapsed". + * The total time measured by a timer is given by "getCumulated". + * Example : + * @code bfps_timer tm; // Implicit start + * @code ... + * @code tm.stop(); // stop the timer + * @code tm.getElapsed(); // return the duration in s [A] + * @code tm.start(); // restart the timer + * @code ... + * @code tm.stopAndGetElapsed(); // stop the timer and return the duraction in s + * [B] + * @code tm.getCumulated(); // Equal [A] + [B] + */ +class bfps_timer { + using double_second_time = std::chrono::duration<double, std::ratio<1, 1>>; + + std::chrono::high_resolution_clock::time_point + m_start; ///< m_start time (start) + std::chrono::high_resolution_clock::time_point m_end; ///< stop time (stop) + std::chrono::nanoseconds m_cumulate; ///< the m_cumulate time + +public: + /// Constructor + bfps_timer() : m_cumulate(std::chrono::nanoseconds::zero()) { start(); } + + /// Copy constructor + bfps_timer(const bfps_timer& other) = delete; + /// Copies an other timer + bfps_timer& operator=(const bfps_timer& other) = delete; + /// Move constructor + bfps_timer(bfps_timer&& other) = delete; + /// Copies an other timer + bfps_timer& operator=(bfps_timer&& other) = delete; + + /** Rest all the values, and apply start */ + void reset() { + m_start = std::chrono::high_resolution_clock::time_point(); + m_end = std::chrono::high_resolution_clock::time_point(); + m_cumulate = std::chrono::nanoseconds::zero(); + start(); + } + + /** Start the timer */ + void start() { + m_start = std::chrono::high_resolution_clock::now(); + } + + /** Stop the current timer */ + void stop() { + m_end = std::chrono::high_resolution_clock::now(); + m_cumulate += std::chrono::duration_cast<std::chrono::nanoseconds>(m_end - m_start); + } + + /** Return the elapsed time between start and stop (in second) */ + double getElapsed() const { + return std::chrono::duration_cast<double_second_time>( + std::chrono::duration_cast<std::chrono::nanoseconds>(m_end - m_start)).count(); + } + + /** Return the total counted time */ + double getCumulated() const { + return std::chrono::duration_cast<double_second_time>(m_cumulate).count(); + } + + /** End the current counter (stop) and return the elapsed time */ + double stopAndGetElapsed() { + stop(); + return getElapsed(); + } +}; + +#endif diff --git a/bfps/cpp/distributed_particles.cpp b/bfps/cpp/distributed_particles.cpp index 7d0808419cc0c7c001e37f38e25395fe3fd559b1..73fd0275d8138d41bb4ee7fbc28e2d41e8017661 100644 --- a/bfps/cpp/distributed_particles.cpp +++ b/bfps/cpp/distributed_particles.cpp @@ -24,17 +24,19 @@ -#define NDEBUG +//#define NDEBUG #include <cmath> #include <cassert> #include <cstring> #include <string> #include <sstream> +#include <array> #include "base.hpp" #include "distributed_particles.hpp" #include "fftw_tools.hpp" +#include "scope_timer.hpp" extern int myrank, nprocs; @@ -43,17 +45,17 @@ template <particle_types particle_type, class rnumber, int interp_neighbours> distributed_particles<particle_type, rnumber, interp_neighbours>::distributed_particles( const char *NAME, const hid_t data_file_id, - interpolator<rnumber, interp_neighbours> *FIELD, + interpolator<rnumber, interp_neighbours> *VEL, const int TRAJ_SKIP, const int INTEGRATION_STEPS) : particles_io_base<particle_type>( NAME, TRAJ_SKIP, data_file_id, - FIELD->descriptor->comm) + VEL->descriptor->comm) { assert((INTEGRATION_STEPS <= 6) && (INTEGRATION_STEPS >= 1)); - this->vel = FIELD; + this->vel = VEL; this->rhs.resize(INTEGRATION_STEPS); this->integration_steps = INTEGRATION_STEPS; this->state.reserve(2*this->nparticles / this->nprocs); @@ -72,14 +74,13 @@ void distributed_particles<particle_type, rnumber, interp_neighbours>::sample( const std::unordered_map<int, single_particle_state<particle_type>> &x, std::unordered_map<int, single_particle_state<POINT3D>> &y) { - double *yy = new double[3]; + std::array<double, 3> yy; y.clear(); for (auto &pp: x) { - (*field)(pp.second.data, yy); - y[pp.first] = yy; + (*field)(pp.second.data, &yy.front()); + y[pp.first] = &yy.front(); } - delete[] yy; } template <particle_types particle_type, class rnumber, int interp_neighbours> @@ -121,6 +122,7 @@ void distributed_particles<particle_type, rnumber, interp_neighbours>::redistrib std::unordered_map<int, single_particle_state<particle_type>> &x, std::vector<std::unordered_map<int, single_particle_state<particle_type>>> &vals) { + TIMEZONE("distributed_particles::redistribute"); //DEBUG_MSG("entered redistribute\n"); /* neighbouring rank offsets */ int ro[2]; @@ -312,6 +314,7 @@ void distributed_particles<particle_type, rnumber, interp_neighbours>::AdamsBash template <particle_types particle_type, class rnumber, int interp_neighbours> void distributed_particles<particle_type, rnumber, interp_neighbours>::step() { + TIMEZONE("distributed_particles::step"); this->AdamsBashforth((this->iteration < this->integration_steps) ? this->iteration+1 : this->integration_steps); @@ -368,6 +371,7 @@ void distributed_particles<particle_type, rnumber, interp_neighbours>::write( const char *dset_name, std::unordered_map<int, single_particle_state<POINT3D>> &y) { + TIMEZONE("distributed_particles::write"); double *data = new double[this->nparticles*3]; double *yy = new double[this->nparticles*3]; for (unsigned int cindex=0; cindex<this->get_number_of_chunks(); cindex++) @@ -399,6 +403,7 @@ template <particle_types particle_type, class rnumber, int interp_neighbours> void distributed_particles<particle_type, rnumber, interp_neighbours>::write( const bool write_rhs) { + TIMEZONE("distributed_particles::write2"); double *temp0 = new double[this->chunk_size*state_dimension(particle_type)]; double *temp1 = new double[this->chunk_size*state_dimension(particle_type)]; for (unsigned int cindex=0; cindex<this->get_number_of_chunks(); cindex++) @@ -411,7 +416,7 @@ void distributed_particles<particle_type, rnumber, interp_neighbours>::write( if (pp != this->state.end()) std::copy(pp->second.data, pp->second.data + state_dimension(particle_type), - temp0 + pp->first*state_dimension(particle_type)); + temp0 + p*state_dimension(particle_type)); } MPI_Allreduce( temp0, @@ -433,7 +438,7 @@ void distributed_particles<particle_type, rnumber, interp_neighbours>::write( if (pp != this->rhs[i].end()) std::copy(pp->second.data, pp->second.data + state_dimension(particle_type), - temp0 + pp->first*state_dimension(particle_type)); + temp0 + p*state_dimension(particle_type)); } MPI_Allreduce( temp0, diff --git a/bfps/cpp/fftw_interface.hpp b/bfps/cpp/fftw_interface.hpp new file mode 100644 index 0000000000000000000000000000000000000000..495ec9fa3712153df4d31faf7dfb3046637b5483 --- /dev/null +++ b/bfps/cpp/fftw_interface.hpp @@ -0,0 +1,173 @@ +/********************************************************************** +* * +* Copyright 2015 Max Planck Institute * +* for Dynamics and Self-Organization * +* * +* This file is part of bfps. * +* * +* bfps is free software: you can redistribute it and/or modify * +* it under the terms of the GNU General Public License as published * +* by the Free Software Foundation, either version 3 of the License, * +* or (at your option) any later version. * +* * +* bfps is distributed in the hope that it will be useful, * +* but WITHOUT ANY WARRANTY; without even the implied warranty of * +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * +* GNU General Public License for more details. * +* * +* You should have received a copy of the GNU General Public License * +* along with bfps. If not, see <http://www.gnu.org/licenses/> * +* * +* Contact: Cristian.Lalescu@ds.mpg.de * +* * +**********************************************************************/ + +#ifndef FFTW_INTERFACE_HPP +#define FFTW_INTERFACE_HPP + +#include <fftw3-mpi.h> + +#ifdef USE_FFTWESTIMATE +#define DEFAULT_FFTW_FLAG FFTW_ESTIMATE +#warning You are using FFTW estimate +#else +#define DEFAULT_FFTW_FLAG FFTW_PATIENT +#endif + +template <class realtype> +class fftw_interface; + +template <> +class fftw_interface<float> +{ +public: + using real = float; + using complex = fftwf_complex; + using plan = fftwf_plan; + using iodim = fftwf_iodim; + + static complex* alloc_complex(const size_t in_size){ + return fftwf_alloc_complex(in_size); + } + + static real* alloc_real(const size_t in_size){ + return fftwf_alloc_real(in_size); + } + + static void free(void* ptr){ + fftwf_free(ptr); + } + + static void execute(plan in_plan){ + fftwf_execute(in_plan); + } + + static void destroy_plan(plan in_plan){ + fftwf_destroy_plan(in_plan); + } + + template <class ... Params> + static plan mpi_plan_transpose(Params ... params){ + return fftwf_mpi_plan_transpose(params...); + } + + template <class ... Params> + static plan mpi_plan_many_transpose(Params ... params){ + return fftwf_mpi_plan_many_transpose(params...); + } + + template <class ... Params> + static plan plan_guru_r2r(Params ... params){ + return fftwf_plan_guru_r2r(params...); + } + + template <class ... Params> + static plan plan_guru_dft(Params ... params){ + return fftwf_plan_guru_dft(params...); + } + + template <class ... Params> + static plan mpi_plan_many_dft_c2r(Params ... params){ + return fftwf_mpi_plan_many_dft_c2r(params...); + } + + template <class ... Params> + static plan mpi_plan_many_dft_r2c(Params ... params){ + return fftwf_mpi_plan_many_dft_r2c(params...); + } + + template <class ... Params> + static plan mpi_plan_dft_c2r_3d(Params ... params){ + return fftwf_mpi_plan_dft_c2r_3d(params...); + } +}; + +template <> +class fftw_interface<double> +{ +public: + using real = double; + using complex = fftw_complex; + using plan = fftw_plan; + using iodim = fftw_iodim; + + static complex* alloc_complex(const size_t in_size){ + return fftw_alloc_complex(in_size); + } + + static real* alloc_real(const size_t in_size){ + return fftw_alloc_real(in_size); + } + + static void free(void* ptr){ + fftw_free(ptr); + } + + static void execute(plan in_plan){ + fftw_execute(in_plan); + } + + static void destroy_plan(plan in_plan){ + fftw_destroy_plan(in_plan); + } + + template <class ... Params> + static plan mpi_plan_transpose(Params ... params){ + return fftw_mpi_plan_transpose(params...); + } + + template <class ... Params> + static plan mpi_plan_many_transpose(Params ... params){ + return fftw_mpi_plan_many_transpose(params...); + } + + template <class ... Params> + static plan plan_guru_r2r(Params ... params){ + return fftw_plan_guru_r2r(params...); + } + + template <class ... Params> + static plan plan_guru_dft(Params ... params){ + return fftw_plan_guru_dft(params...); + } + + template <class ... Params> + static plan mpi_plan_many_dft_c2r(Params ... params){ + return fftw_mpi_plan_many_dft_c2r(params...); + } + + template <class ... Params> + static plan mpi_plan_many_dft_r2c(Params ... params){ + return fftw_mpi_plan_many_dft_r2c(params...); + } + + template <class ... Params> + static plan mpi_plan_dft_c2r_3d(Params ... params){ + return fftw_mpi_plan_dft_c2r_3d(params...); + } +}; + + + +#endif // FFTW_INTERFACE_HPP + diff --git a/bfps/cpp/fftw_tools.cpp b/bfps/cpp/fftw_tools.cpp index f6eacbf1dfe2dfe31e603e9239c42d4639327d3d..61e03d292f81aed1fa4b2dfcab880fb7105b676e 100644 --- a/bfps/cpp/fftw_tools.cpp +++ b/bfps/cpp/fftw_tools.cpp @@ -27,6 +27,7 @@ #include <iostream> #include "base.hpp" #include "fftw_tools.hpp" +#include "fftw_interface.hpp" #define NDEBUG @@ -51,150 +52,171 @@ int clip_zero_padding( return EXIT_SUCCESS; } +template +int clip_zero_padding<float>( + field_descriptor<float> *f, + float *a, + int howmany); +template +int clip_zero_padding<double>( + field_descriptor<double> *f, + double *a, + int howmany); + + + +template <class rnumber> +int copy_complex_array( + field_descriptor<rnumber> *fi, + rnumber (*ai)[2], +field_descriptor<rnumber> *fo, +rnumber (*ao)[2], +int howmany) +{ + DEBUG_MSG("entered copy_complex_array\n"); + typename fftw_interface<rnumber>::complex *buffer; + buffer = fftw_interface<rnumber>::alloc_complex(fi->slice_size*howmany); + + int min_fast_dim; + min_fast_dim = + (fi->sizes[2] > fo->sizes[2]) ? + fo->sizes[2] : fi->sizes[2]; -#define TOOLS_IMPLEMENTATION(FFTW, R, MPI_RNUM, MPI_CNUM) \ -template <> \ -int copy_complex_array<R>( \ - field_descriptor<R> *fi, \ - R (*ai)[2], \ - field_descriptor<R> *fo, \ - R (*ao)[2], \ - int howmany) \ -{ \ - DEBUG_MSG("entered copy_complex_array\n"); \ - FFTW(complex) *buffer; \ - buffer = FFTW(alloc_complex)(fi->slice_size*howmany); \ - \ - int min_fast_dim; \ - min_fast_dim = \ - (fi->sizes[2] > fo->sizes[2]) ? \ - fo->sizes[2] : fi->sizes[2]; \ - \ /* clean up destination, in case we're padding with zeros - (even if only for one dimension) */ \ - std::fill_n((R*)ao, fo->local_size*2, 0.0); \ - \ - int64_t ii0, ii1; \ - int64_t oi0, oi1; \ - int64_t delta1, delta0; \ - int irank, orank; \ - delta0 = (fo->sizes[0] - fi->sizes[0]); \ - delta1 = (fo->sizes[1] - fi->sizes[1]); \ - for (ii0=0; ii0 < fi->sizes[0]; ii0++) \ - { \ - if (ii0 <= fi->sizes[0]/2) \ - { \ - oi0 = ii0; \ - if (oi0 > fo->sizes[0]/2) \ - continue; \ - } \ - else \ - { \ - oi0 = ii0 + delta0; \ - if ((oi0 < 0) || ((fo->sizes[0] - oi0) >= fo->sizes[0]/2)) \ - continue; \ - } \ - irank = fi->rank[ii0]; \ - orank = fo->rank[oi0]; \ - if ((irank == orank) && \ - (irank == fi->myrank)) \ - { \ - std::copy( \ - (R*)(ai + (ii0 - fi->starts[0] )*fi->slice_size), \ - (R*)(ai + (ii0 - fi->starts[0] + 1)*fi->slice_size), \ - (R*)buffer); \ - } \ - else \ - { \ - if (fi->myrank == irank) \ - { \ - MPI_Send( \ - (void*)(ai + (ii0-fi->starts[0])*fi->slice_size), \ - fi->slice_size, \ - MPI_CNUM, \ - orank, \ - ii0, \ - fi->comm); \ - } \ - if (fi->myrank == orank) \ - { \ - MPI_Recv( \ - (void*)(buffer), \ - fi->slice_size, \ - MPI_CNUM, \ - irank, \ - ii0, \ - fi->comm, \ - MPI_STATUS_IGNORE); \ - } \ - } \ - if (fi->myrank == orank) \ - { \ - for (ii1 = 0; ii1 < fi->sizes[1]; ii1++) \ - { \ - if (ii1 <= fi->sizes[1]/2) \ - { \ - oi1 = ii1; \ - if (oi1 > fo->sizes[1]/2) \ - continue; \ - } \ - else \ - { \ - oi1 = ii1 + delta1; \ - if ((oi1 < 0) || ((fo->sizes[1] - oi1) >= fo->sizes[1]/2)) \ - continue; \ - } \ - std::copy( \ - (R*)(buffer + (ii1*fi->sizes[2]*howmany)), \ - (R*)(buffer + (ii1*fi->sizes[2] + min_fast_dim)*howmany), \ - (R*)(ao + \ - ((oi0 - fo->starts[0])*fo->sizes[1] + \ - oi1)*fo->sizes[2]*howmany)); \ - } \ - } \ - } \ - fftw_free(buffer); \ - MPI_Barrier(fi->comm); \ - \ - DEBUG_MSG("exiting copy_complex_array\n"); \ - return EXIT_SUCCESS; \ -} \ - \ -template <> \ -int get_descriptors_3D<R>( \ - int n0, int n1, int n2, \ - field_descriptor<R> **fr, \ - field_descriptor<R> **fc) \ -{ \ - int ntmp[3]; \ - ntmp[0] = n0; \ - ntmp[1] = n1; \ - ntmp[2] = n2; \ - *fr = new field_descriptor<R>(3, ntmp, MPI_RNUM, MPI_COMM_WORLD); \ - ntmp[0] = n0; \ - ntmp[1] = n1; \ - ntmp[2] = n2/2+1; \ - *fc = new field_descriptor<R>(3, ntmp, MPI_CNUM, MPI_COMM_WORLD); \ - return EXIT_SUCCESS; \ -} \ - \ -template \ -int clip_zero_padding<R>( \ - field_descriptor<R> *f, \ - R *a, \ - int howmany); \ - - - -TOOLS_IMPLEMENTATION( - FFTW_MANGLE_FLOAT, - float, - MPI_FLOAT, - MPI_COMPLEX) -TOOLS_IMPLEMENTATION( - FFTW_MANGLE_DOUBLE, - double, - MPI_DOUBLE, - BFPS_MPICXX_DOUBLE_COMPLEX) + (even if only for one dimension) */ + std::fill_n((rnumber*)ao, fo->local_size*2, 0.0); + + int64_t ii0, ii1; + int64_t oi0, oi1; + int64_t delta1, delta0; + int irank, orank; + delta0 = (fo->sizes[0] - fi->sizes[0]); + delta1 = (fo->sizes[1] - fi->sizes[1]); + for (ii0=0; ii0 < fi->sizes[0]; ii0++) + { + if (ii0 <= fi->sizes[0]/2) + { + oi0 = ii0; + if (oi0 > fo->sizes[0]/2) + continue; + } + else + { + oi0 = ii0 + delta0; + if ((oi0 < 0) || ((fo->sizes[0] - oi0) >= fo->sizes[0]/2)) + continue; + } + irank = fi->rank[ii0]; + orank = fo->rank[oi0]; + if ((irank == orank) && + (irank == fi->myrank)) + { + std::copy( + (rnumber*)(ai + (ii0 - fi->starts[0] )*fi->slice_size), + (rnumber*)(ai + (ii0 - fi->starts[0] + 1)*fi->slice_size), + (rnumber*)buffer); + } + else + { + if (fi->myrank == irank) + { + MPI_Send( + (void*)(ai + (ii0-fi->starts[0])*fi->slice_size), + fi->slice_size, + mpi_real_type<rnumber>::complex(), + orank, + ii0, + fi->comm); + } + if (fi->myrank == orank) + { + MPI_Recv( + (void*)(buffer), + fi->slice_size, + mpi_real_type<rnumber>::complex(), + irank, + ii0, + fi->comm, + MPI_STATUS_IGNORE); + } + } + if (fi->myrank == orank) + { + for (ii1 = 0; ii1 < fi->sizes[1]; ii1++) + { + if (ii1 <= fi->sizes[1]/2) + { + oi1 = ii1; + if (oi1 > fo->sizes[1]/2) + continue; + } + else + { + oi1 = ii1 + delta1; + if ((oi1 < 0) || ((fo->sizes[1] - oi1) >= fo->sizes[1]/2)) + continue; + } + std::copy( + (rnumber*)(buffer + (ii1*fi->sizes[2]*howmany)), + (rnumber*)(buffer + (ii1*fi->sizes[2] + min_fast_dim)*howmany), + (rnumber*)(ao + + ((oi0 - fo->starts[0])*fo->sizes[1] + + oi1)*fo->sizes[2]*howmany)); + } + } + } + fftw_interface<rnumber>::free(buffer); + MPI_Barrier(fi->comm); + + DEBUG_MSG("exiting copy_complex_array\n"); + return EXIT_SUCCESS; +} + +template +int copy_complex_array<float>( + field_descriptor<float> *fi, + float (*ai)[2], + field_descriptor<float> *fo, + float (*ao)[2], + int howmany); + +template +int copy_complex_array<double>( + field_descriptor<double> *fi, + double (*ai)[2], + field_descriptor<double> *fo, + double (*ao)[2], + int howmany); + + +template <class rnumber> +int get_descriptors_3D( + int n0, int n1, int n2, + field_descriptor<rnumber> **fr, + field_descriptor<rnumber> **fc) +{ + int ntmp[3]; + ntmp[0] = n0; + ntmp[1] = n1; + ntmp[2] = n2; + *fr = new field_descriptor<rnumber>(3, ntmp, mpi_real_type<rnumber>::real(), MPI_COMM_WORLD); + ntmp[0] = n0; + ntmp[1] = n1; + ntmp[2] = n2/2+1; + *fc = new field_descriptor<rnumber>(3, ntmp, mpi_real_type<rnumber>::complex(), MPI_COMM_WORLD); + return EXIT_SUCCESS; +} + +template +int get_descriptors_3D<float>( + int n0, int n1, int n2, + field_descriptor<float> **fr, + field_descriptor<float> **fc); + +template +int get_descriptors_3D<double>( + int n0, int n1, int n2, + field_descriptor<double> **fr, + field_descriptor<double> **fc); diff --git a/bfps/cpp/field.cpp b/bfps/cpp/field.cpp index ad1e77f107113952bd84a5a3f72f2a5d64064f9a..cc28ced9c829d82132cf061affe5f4e5262f1d2e 100644 --- a/bfps/cpp/field.cpp +++ b/bfps/cpp/field.cpp @@ -23,87 +23,16 @@ **********************************************************************/ +#include <sys/stat.h> +#include <cmath> #include <cstdlib> #include <algorithm> #include <cassert> #include "field.hpp" +#include "scope_timer.hpp" +#include "shared_array.hpp" -template <field_components fc> -field_layout<fc>::field_layout( - const hsize_t *SIZES, - const hsize_t *SUBSIZES, - const hsize_t *STARTS, - const MPI_Comm COMM_TO_USE) -{ - this->comm = COMM_TO_USE; - MPI_Comm_rank(this->comm, &this->myrank); - MPI_Comm_size(this->comm, &this->nprocs); - std::copy(SIZES, SIZES + 3, this->sizes); - std::copy(SUBSIZES, SUBSIZES + 3, this->subsizes); - std::copy(STARTS, STARTS + 3, this->starts); - if (fc == THREE || fc == THREExTHREE) - { - this->sizes[3] = 3; - this->subsizes[3] = 3; - this->starts[3] = 0; - } - if (fc == THREExTHREE) - { - this->sizes[4] = 3; - this->subsizes[4] = 3; - this->starts[4] = 0; - } - this->local_size = 1; - this->full_size = 1; - for (unsigned int i=0; i<ndim(fc); i++) - { - this->local_size *= this->subsizes[i]; - this->full_size *= this->sizes[i]; - } - - /*field will at most be distributed in 2D*/ - this->rank.resize(2); - this->all_start.resize(2); - this->all_size.resize(2); - for (int i=0; i<2; i++) - { - this->rank[i].resize(this->sizes[i]); - std::vector<int> local_rank; - local_rank.resize(this->sizes[i], 0); - for (unsigned int ii=this->starts[i]; ii<this->starts[i]+this->subsizes[i]; ii++) - local_rank[ii] = this->myrank; - MPI_Allreduce( - &local_rank.front(), - &this->rank[i].front(), - this->sizes[i], - MPI_INT, - MPI_SUM, - this->comm); - this->all_start[i].resize(this->nprocs); - std::vector<int> local_start; - local_start.resize(this->nprocs, 0); - local_start[this->myrank] = this->starts[i]; - MPI_Allreduce( - &local_start.front(), - &this->all_start[i].front(), - this->nprocs, - MPI_INT, - MPI_SUM, - this->comm); - this->all_size[i].resize(this->nprocs); - std::vector<int> local_subsize; - local_subsize.resize(this->nprocs, 0); - local_subsize[this->myrank] = this->subsizes[i]; - MPI_Allreduce( - &local_subsize.front(), - &this->all_size[i].front(), - this->nprocs, - MPI_INT, - MPI_SUM, - this->comm); - } -} template <typename rnumber, field_backend be, @@ -115,6 +44,7 @@ field<rnumber, be, fc>::field( const MPI_Comm COMM_TO_USE, const unsigned FFTW_PLAN_RIGOR) { + TIMEZONE("field::field"); this->comm = COMM_TO_USE; MPI_Comm_rank(this->comm, &this->myrank); MPI_Comm_size(this->comm, &this->nprocs); @@ -164,47 +94,28 @@ field<rnumber, be, fc>::field( starts[0] = local_0_start; starts[1] = 0; starts[2] = 0; this->rmemlayout = new field_layout<fc>( sizes, subsizes, starts, this->comm); - sizes[0] = nz; sizes[1] = ny; sizes[2] = nx/2+1; - subsizes[0] = local_n1; subsizes[1] = ny; subsizes[2] = nx/2+1; + sizes[0] = ny; sizes[1] = nz; sizes[2] = nx/2+1; + subsizes[0] = local_n1; subsizes[1] = nz; subsizes[2] = nx/2+1; starts[0] = local_1_start; starts[1] = 0; starts[2] = 0; this->clayout = new field_layout<fc>( sizes, subsizes, starts, this->comm); - this->data = (rnumber*)fftw_malloc( - sizeof(rnumber)*this->rmemlayout->local_size); - if(typeid(rnumber) == typeid(float)) - { - this->c2r_plan = new fftwf_plan; - this->r2c_plan = new fftwf_plan; - *((fftwf_plan*)this->c2r_plan) = fftwf_mpi_plan_many_dft_c2r( - 3, nfftw, ncomp(fc), - FFTW_MPI_DEFAULT_BLOCK, FFTW_MPI_DEFAULT_BLOCK, - (fftwf_complex*)this->data, (float*)this->data, - this->comm, - this->fftw_plan_rigor | FFTW_MPI_TRANSPOSED_IN); - *((fftwf_plan*)this->r2c_plan) = fftwf_mpi_plan_many_dft_r2c( - 3, nfftw, ncomp(fc), - FFTW_MPI_DEFAULT_BLOCK, FFTW_MPI_DEFAULT_BLOCK, - (float*)this->data, (fftwf_complex*)this->data, - this->comm, - this->fftw_plan_rigor | FFTW_MPI_TRANSPOSED_OUT); - } - if (typeid(rnumber) == typeid(double)) - { - this->c2r_plan = new fftw_plan; - this->r2c_plan = new fftw_plan; - *((fftw_plan*)this->c2r_plan) = fftw_mpi_plan_many_dft_c2r( - 3, nfftw, ncomp(fc), - FFTW_MPI_DEFAULT_BLOCK, FFTW_MPI_DEFAULT_BLOCK, - (fftw_complex*)this->data, (double*)this->data, - this->comm, - this->fftw_plan_rigor | FFTW_MPI_TRANSPOSED_IN); - *((fftw_plan*)this->r2c_plan) = fftw_mpi_plan_many_dft_r2c( - 3, nfftw, ncomp(fc), - FFTW_MPI_DEFAULT_BLOCK, FFTW_MPI_DEFAULT_BLOCK, - (double*)this->data, (fftw_complex*)this->data, - this->comm, - this->fftw_plan_rigor | FFTW_MPI_TRANSPOSED_OUT); - } + this->data = fftw_interface<rnumber>::alloc_real( + this->rmemlayout->local_size); + memset(this->data, 0, sizeof(rnumber)*this->rmemlayout->local_size); + this->c2r_plan = fftw_interface<rnumber>::mpi_plan_many_dft_c2r( + 3, nfftw, ncomp(fc), + FFTW_MPI_DEFAULT_BLOCK, FFTW_MPI_DEFAULT_BLOCK, + (typename fftw_interface<rnumber>::complex*)this->data, + this->data, + this->comm, + this->fftw_plan_rigor | FFTW_MPI_TRANSPOSED_IN); + this->r2c_plan = fftw_interface<rnumber>::mpi_plan_many_dft_r2c( + 3, nfftw, ncomp(fc), + FFTW_MPI_DEFAULT_BLOCK, FFTW_MPI_DEFAULT_BLOCK, + this->data, + (typename fftw_interface<rnumber>::complex*)this->data, + this->comm, + this->fftw_plan_rigor | FFTW_MPI_TRANSPOSED_OUT); break; } } @@ -223,21 +134,9 @@ field<rnumber, be, fc>::~field() delete this->rlayout; delete this->rmemlayout; delete this->clayout; - fftw_free(this->data); - if (typeid(rnumber) == typeid(float)) - { - fftwf_destroy_plan(*(fftwf_plan*)this->c2r_plan); - delete (fftwf_plan*)this->c2r_plan; - fftwf_destroy_plan(*(fftwf_plan*)this->r2c_plan); - delete (fftwf_plan*)this->r2c_plan; - } - else if (typeid(rnumber) == typeid(double)) - { - fftw_destroy_plan(*(fftw_plan*)this->c2r_plan); - delete (fftw_plan*)this->c2r_plan; - fftw_destroy_plan(*(fftw_plan*)this->r2c_plan); - delete (fftw_plan*)this->r2c_plan; - } + fftw_interface<rnumber>::free(this->data); + fftw_interface<rnumber>::destroy_plan(this->c2r_plan); + fftw_interface<rnumber>::destroy_plan(this->r2c_plan); break; } } @@ -247,10 +146,8 @@ template <typename rnumber, field_components fc> void field<rnumber, be, fc>::ift() { - if (typeid(rnumber) == typeid(float)) - fftwf_execute(*((fftwf_plan*)this->c2r_plan)); - else if (typeid(rnumber) == typeid(double)) - fftw_execute(*((fftw_plan*)this->c2r_plan)); + TIMEZONE("field::ift"); + fftw_interface<rnumber>::execute(this->c2r_plan); this->real_space_representation = true; } @@ -259,10 +156,8 @@ template <typename rnumber, field_components fc> void field<rnumber, be, fc>::dft() { - if (typeid(rnumber) == typeid(float)) - fftwf_execute(*((fftwf_plan*)this->r2c_plan)); - else if (typeid(rnumber) == typeid(double)) - fftw_execute(*((fftw_plan*)this->r2c_plan)); + TIMEZONE("field::dft"); + fftw_interface<rnumber>::execute(this->r2c_plan); this->real_space_representation = false; } @@ -271,59 +166,340 @@ template <typename rnumber, field_components fc> int field<rnumber, be, fc>::io( const std::string fname, - const std::string dset_name, + const std::string field_name, + const int iteration, + const bool read) +{ + /* file dataset has same dimensions as field */ + TIMEZONE("field::io"); + hid_t file_id, dset_id, plist_id; + dset_id = H5I_BADID; + std::string representation = std::string( + this->real_space_representation ? + "real" : "complex"); + std::string dset_name = ( + "/" + field_name + + "/" + representation + + "/" + std::to_string(iteration)); + + /* open/create file */ + plist_id = H5Pcreate(H5P_FILE_ACCESS); + H5Pset_fapl_mpio(plist_id, this->comm, MPI_INFO_NULL); + bool file_exists = false; + { + struct stat file_buffer; + file_exists = (stat(fname.c_str(), &file_buffer) == 0); + } + if (read) + { + assert(file_exists); + file_id = H5Fopen(fname.c_str(), H5F_ACC_RDONLY, plist_id); + } + else + { + if (file_exists) + file_id = H5Fopen(fname.c_str(), H5F_ACC_RDWR, plist_id); + else + file_id = H5Fcreate(fname.c_str(), H5F_ACC_EXCL, H5P_DEFAULT, plist_id); + } + assert(file_id >= 0); + H5Pclose(plist_id); + + /* check what kind of representation is being used */ + if (read) + { + dset_id = H5Dopen( + file_id, + dset_name.c_str(), + H5P_DEFAULT); + assert(dset_id >= 0); + hid_t dset_type = H5Dget_type(dset_id); + assert(dset_type >= 0); + bool io_for_real = ( + H5Tequal(dset_type, H5T_IEEE_F32BE) || + H5Tequal(dset_type, H5T_IEEE_F32LE) || + H5Tequal(dset_type, H5T_INTEL_F32) || + H5Tequal(dset_type, H5T_NATIVE_FLOAT) || + H5Tequal(dset_type, H5T_IEEE_F64BE) || + H5Tequal(dset_type, H5T_IEEE_F64LE) || + H5Tequal(dset_type, H5T_INTEL_F64) || + H5Tequal(dset_type, H5T_NATIVE_DOUBLE)); + H5Tclose(dset_type); + assert(this->real_space_representation == io_for_real); + } + + /* generic space initialization */ + hid_t fspace, mspace; + hsize_t count[ndim(fc)], offset[ndim(fc)], dims[ndim(fc)]; + hsize_t memoffset[ndim(fc)], memshape[ndim(fc)]; + + if (this->real_space_representation) + { + for (unsigned int i=0; i<ndim(fc); i++) + { + count[i] = this->rlayout->subsizes[i]; + offset[i] = this->rlayout->starts[i]; + dims[i] = this->rlayout->sizes[i]; + memshape[i] = this->rmemlayout->subsizes[i]; + memoffset[i] = 0; + } + } + else + { + for (unsigned int i=0; i<ndim(fc); i++) + { + count [i] = this->clayout->subsizes[i]; + offset[i] = this->clayout->starts[i]; + dims [i] = this->clayout->sizes[i]; + memshape [i] = count[i]; + memoffset[i] = 0; + } + } + mspace = H5Screate_simple(ndim(fc), memshape, NULL); + H5Sselect_hyperslab(mspace, H5S_SELECT_SET, memoffset, NULL, count, NULL); + + /* open/create data set */ + if (read) + fspace = H5Dget_space(dset_id); + else + { + if (!H5Lexists(file_id, field_name.c_str(), H5P_DEFAULT)) + { + hid_t gid_tmp = H5Gcreate( + file_id, field_name.c_str(), + H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + H5Gclose(gid_tmp); + } + + if (!H5Lexists(file_id, (field_name + "/" + representation).c_str(), H5P_DEFAULT)) + { + hid_t gid_tmp = H5Gcreate( + file_id, ("/" + field_name + "/" + representation).c_str(), + H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + H5Gclose(gid_tmp); + } + if (H5Lexists(file_id, dset_name.c_str(), H5P_DEFAULT)) + { + dset_id = H5Dopen(file_id, dset_name.c_str(), H5P_DEFAULT); + fspace = H5Dget_space(dset_id); + } + else + { + fspace = H5Screate_simple( + ndim(fc), + dims, + NULL); + /* chunking needs to go in here */ + dset_id = H5Dcreate( + file_id, + dset_name.c_str(), + (this->real_space_representation ? this->rnumber_H5T : this->cnumber_H5T), + fspace, + H5P_DEFAULT, + H5P_DEFAULT, + H5P_DEFAULT); + } + } + /* both dset_id and fspace should now have sane values */ + + /* check file space */ + int ndims_fspace = H5Sget_simple_extent_dims(fspace, dims, NULL); + assert(((unsigned int)(ndims_fspace)) == ndim(fc)); + if (this->real_space_representation) + { + for (unsigned int i=0; i<ndim(fc); i++) + { + offset[i] = this->rlayout->starts[i]; + assert(dims[i] == this->rlayout->sizes[i]); + } + H5Sselect_hyperslab(fspace, H5S_SELECT_SET, offset, NULL, count, NULL); + if (read) + { + std::fill_n(this->data, this->rmemlayout->local_size, 0); + H5Dread(dset_id, this->rnumber_H5T, mspace, fspace, H5P_DEFAULT, this->data); + } + else + { + assert(this->real_space_representation); + H5Dwrite(dset_id, this->rnumber_H5T, mspace, fspace, H5P_DEFAULT, this->data); + } + H5Sclose(mspace); + } + else + { + for (unsigned int i=0; i<ndim(fc); i++) + { + offset[i] = this->clayout->starts[i]; + assert(dims[i] == this->clayout->sizes[i]); + } + H5Sselect_hyperslab(fspace, H5S_SELECT_SET, offset, NULL, count, NULL); + if (read) + { + std::fill_n(this->data, this->clayout->local_size*2, 0); + H5Dread(dset_id, this->cnumber_H5T, mspace, fspace, H5P_DEFAULT, this->data); + this->symmetrize(); + } + else + { + assert(!this->real_space_representation); + H5Dwrite(dset_id, this->cnumber_H5T, mspace, fspace, H5P_DEFAULT, this->data); + } + H5Sclose(mspace); + } + + H5Sclose(fspace); + /* close data set */ + H5Dclose(dset_id); + /* close file */ + H5Fclose(file_id); + return EXIT_SUCCESS; +} + +template <typename rnumber, + field_backend be, + field_components fc> +int field<rnumber, be, fc>::io_database( + const std::string fname, + const std::string field_name, const int toffset, const bool read) { + /* file dataset is has a time dimension as well */ + TIMEZONE("field::io_database"); hid_t file_id, dset_id, plist_id; - hid_t dset_type; - bool io_for_real = false; + dset_id = H5I_BADID; + std::string representation = std::string( + this->real_space_representation ? + "real" : "complex"); + std::string dset_name = ( + "/" + field_name + + "/" + representation); - /* open file */ + /* open/create file */ plist_id = H5Pcreate(H5P_FILE_ACCESS); H5Pset_fapl_mpio(plist_id, this->comm, MPI_INFO_NULL); + bool file_exists = false; + { + struct stat file_buffer; + file_exists = (stat(fname.c_str(), &file_buffer) == 0); + } if (read) + { + assert(file_exists); file_id = H5Fopen(fname.c_str(), H5F_ACC_RDONLY, plist_id); + } else - file_id = H5Fopen(fname.c_str(), H5F_ACC_RDWR, plist_id); + { + if (file_exists) + file_id = H5Fopen(fname.c_str(), H5F_ACC_RDWR, plist_id); + else + file_id = H5Fcreate(fname.c_str(), H5F_ACC_EXCL, H5P_DEFAULT, plist_id); + } H5Pclose(plist_id); - /* open data set */ - dset_id = H5Dopen(file_id, dset_name.c_str(), H5P_DEFAULT); - dset_type = H5Dget_type(dset_id); - io_for_real = ( - H5Tequal(dset_type, H5T_IEEE_F32BE) || - H5Tequal(dset_type, H5T_IEEE_F32LE) || - H5Tequal(dset_type, H5T_INTEL_F32) || - H5Tequal(dset_type, H5T_NATIVE_FLOAT) || - H5Tequal(dset_type, H5T_IEEE_F64BE) || - H5Tequal(dset_type, H5T_IEEE_F64LE) || - H5Tequal(dset_type, H5T_INTEL_F64) || - H5Tequal(dset_type, H5T_NATIVE_DOUBLE)); + /* check what kind of representation is being used */ + if (read) + { + dset_id = H5Dopen( + file_id, + dset_name.c_str(), + H5P_DEFAULT); + hid_t dset_type = H5Dget_type(dset_id); + bool io_for_real = ( + H5Tequal(dset_type, H5T_IEEE_F32BE) || + H5Tequal(dset_type, H5T_IEEE_F32LE) || + H5Tequal(dset_type, H5T_INTEL_F32) || + H5Tequal(dset_type, H5T_NATIVE_FLOAT) || + H5Tequal(dset_type, H5T_IEEE_F64BE) || + H5Tequal(dset_type, H5T_IEEE_F64LE) || + H5Tequal(dset_type, H5T_INTEL_F64) || + H5Tequal(dset_type, H5T_NATIVE_DOUBLE)); + H5Tclose(dset_type); + assert(this->real_space_representation == io_for_real); + } /* generic space initialization */ hid_t fspace, mspace; - fspace = H5Dget_space(dset_id); hsize_t count[ndim(fc)+1], offset[ndim(fc)+1], dims[ndim(fc)+1]; hsize_t memoffset[ndim(fc)+1], memshape[ndim(fc)+1]; - H5Sget_simple_extent_dims(fspace, dims, NULL); + + int dim_counter_offset = 1; + dim_counter_offset = 1; count[0] = 1; - offset[0] = toffset; memshape[0] = 1; memoffset[0] = 0; - if (io_for_real) + if (this->real_space_representation) { for (unsigned int i=0; i<ndim(fc); i++) { - count[i+1] = this->rlayout->subsizes[i]; - offset[i+1] = this->rlayout->starts[i]; - assert(dims[i+1] == this->rlayout->sizes[i]); - memshape[i+1] = this->rmemlayout->subsizes[i]; - memoffset[i+1] = 0; + count[i+dim_counter_offset] = this->rlayout->subsizes[i]; + offset[i+dim_counter_offset] = this->rlayout->starts[i]; + dims[i+dim_counter_offset] = this->rlayout->sizes[i]; + memshape[i+dim_counter_offset] = this->rmemlayout->subsizes[i]; + memoffset[i+dim_counter_offset] = 0; } - mspace = H5Screate_simple(ndim(fc)+1, memshape, NULL); - H5Sselect_hyperslab(fspace, H5S_SELECT_SET, offset, NULL, count, NULL); + mspace = H5Screate_simple(dim_counter_offset + ndim(fc), memshape, NULL); + H5Sselect_hyperslab(mspace, H5S_SELECT_SET, memoffset, NULL, count, NULL); + } + else + { + for (unsigned int i=0; i<ndim(fc); i++) + { + count[i+dim_counter_offset] = this->clayout->subsizes[i]; + offset[i+dim_counter_offset] = this->clayout->starts[i]; + dims[i+dim_counter_offset] = this->clayout->sizes[i]; + memshape[i+dim_counter_offset] = count[i+dim_counter_offset]; + memoffset[i+dim_counter_offset] = 0; + } + mspace = H5Screate_simple(dim_counter_offset + ndim(fc), memshape, NULL); H5Sselect_hyperslab(mspace, H5S_SELECT_SET, memoffset, NULL, count, NULL); + } + + /* open/create data set */ + if (read) + fspace = H5Dget_space(dset_id); + else + { + if (!H5Lexists(file_id, field_name.c_str(), H5P_DEFAULT)) + H5Gcreate( + file_id, field_name.c_str(), + H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + if (H5Lexists(file_id, dset_name.c_str(), H5P_DEFAULT)) + { + dset_id = H5Dopen(file_id, dset_name.c_str(), H5P_DEFAULT); + fspace = H5Dget_space(dset_id); + } + else + { + fspace = H5Screate_simple( + ndim(fc), + dims, + NULL); + /* chunking needs to go in here */ + dset_id = H5Dcreate( + file_id, + dset_name.c_str(), + (this->real_space_representation ? this->rnumber_H5T : this->cnumber_H5T), + fspace, + H5P_DEFAULT, + H5P_DEFAULT, + H5P_DEFAULT); + } + } + /* both dset_id and fspace should now have sane values */ + + /* check file space */ + int ndims_fspace = H5Sget_simple_extent_dims(fspace, dims, NULL); + assert(ndims_fspace == int(ndim(fc) + 1)); + offset[0] = toffset; + if (this->real_space_representation) + { + for (unsigned int i=0; i<ndim(fc); i++) + { + offset[i+dim_counter_offset] = this->rlayout->starts[i]; + assert(dims[i+dim_counter_offset] == this->rlayout->sizes[i]); + } + H5Sselect_hyperslab(fspace, H5S_SELECT_SET, offset, NULL, count, NULL); if (read) { std::fill_n(this->data, this->rmemlayout->local_size, 0); @@ -332,13 +508,8 @@ int field<rnumber, be, fc>::io( } else { + assert(this->real_space_representation); H5Dwrite(dset_id, this->rnumber_H5T, mspace, fspace, H5P_DEFAULT, this->data); - if (!this->real_space_representation) - /* in principle we could do an inverse Fourier transform in here, - * however that would be unsafe since we wouldn't know whether we'd need to - * normalize or not. - * */ - DEBUG_MSG("I just wrote complex field into real space dataset. It's probably nonsense.\n"); } H5Sclose(mspace); } @@ -346,30 +517,24 @@ int field<rnumber, be, fc>::io( { for (unsigned int i=0; i<ndim(fc); i++) { - count[i+1] = this->clayout->subsizes[i]; - offset[i+1] = this->clayout->starts[i]; - assert(dims[i+1] == this->clayout->sizes[i]); - memshape[i+1] = count[i+1]; - memoffset[i+1] = 0; + offset[i+dim_counter_offset] = this->clayout->starts[i]; + assert(dims[i+dim_counter_offset] == this->clayout->sizes[i]); } - mspace = H5Screate_simple(ndim(fc)+1, memshape, NULL); H5Sselect_hyperslab(fspace, H5S_SELECT_SET, offset, NULL, count, NULL); - H5Sselect_hyperslab(mspace, H5S_SELECT_SET, memoffset, NULL, count, NULL); if (read) { H5Dread(dset_id, this->cnumber_H5T, mspace, fspace, H5P_DEFAULT, this->data); this->real_space_representation = false; + this->symmetrize(); } else { + assert(!this->real_space_representation); H5Dwrite(dset_id, this->cnumber_H5T, mspace, fspace, H5P_DEFAULT, this->data); - if (this->real_space_representation) - DEBUG_MSG("I just wrote real space field into complex dataset. It's probably nonsense.\n"); } H5Sclose(mspace); } - H5Tclose(dset_type); H5Sclose(fspace); /* close data set */ H5Dclose(dset_id); @@ -378,17 +543,129 @@ int field<rnumber, be, fc>::io( return EXIT_SUCCESS; } + template <typename rnumber, field_backend be, field_components fc> -void field<rnumber, be, fc>::compute_rspace_stats( +int field<rnumber, be, fc>::write_0slice( + const hid_t group, + const std::string field_name, + const int iteration) +{ + // this should in principle work for any fc + TIMEZONE("field::write_0slice"); + assert(this->real_space_representation); + if (this->myrank == 0) + { + hid_t dset, wspace, mspace; + int ndims; + hsize_t count[5], offset[5], dims[5]; + offset[0] = iteration; + offset[1] = 0; + offset[2] = 0; + offset[3] = 0; + offset[4] = 0; + dset = H5Dopen( + group, + ("0slices/" + field_name + "/real").c_str(), + H5P_DEFAULT); + wspace = H5Dget_space(dset); + ndims = H5Sget_simple_extent_dims(wspace, dims, NULL); + // array in memory has 2 extra x points, because FFTW + count[0] = 1; + count[1] = this->rmemlayout->sizes[1]; + count[2] = this->rmemlayout->sizes[2]; + count[3] = 3; + count[3] = 3; + mspace = H5Screate_simple(ndims, count, NULL); + // array in file should not have the extra 2 points + count[1] = this->rlayout->sizes[1]; + count[2] = this->rlayout->sizes[2]; + // select right slice in file + H5Sselect_hyperslab( + wspace, + H5S_SELECT_SET, + offset, + NULL, + count, + NULL); + offset[0] = 0; + // select proper regions of memory + H5Sselect_hyperslab( + mspace, + H5S_SELECT_SET, + offset, + NULL, + count, + NULL); + H5Dwrite( + dset, + this->rnumber_H5T, + mspace, + wspace, + H5P_DEFAULT, + this->data); + H5Dclose(dset); + H5Sclose(mspace); + H5Sclose(wspace); + } + return EXIT_SUCCESS; +} + + +template <typename rnumber, + field_backend be, + field_components fc> +void field<rnumber, be, fc>::compute_rspace_xincrement_stats( + const int xcells, const hid_t group, const std::string dset_name, const hsize_t toffset, const std::vector<double> max_estimate) { + TIMEZONE("field::compute_rspace_xincrement_stats"); assert(this->real_space_representation); assert(fc == ONE || fc == THREE); + field<rnumber, be, fc> *tmp_field = new field<rnumber, be, fc>( + this->rlayout->sizes[2], + this->rlayout->sizes[1], + this->rlayout->sizes[0], + this->rlayout->comm); + tmp_field->real_space_representation = true; + this->RLOOP( + [&](ptrdiff_t rindex, + ptrdiff_t xindex, + ptrdiff_t yindex, + ptrdiff_t zindex){ + hsize_t rrindex = (xindex + xcells)%this->rlayout->sizes[2] + ( + zindex * this->rlayout->subsizes[1] + yindex)*( + this->rmemlayout->subsizes[2]); + for (unsigned int component=0; component < ncomp(fc); component++) + tmp_field->data[rindex*ncomp(fc) + component] = + this->data[rrindex*ncomp(fc) + component] - + this->data[rindex*ncomp(fc) + component]; + }); + tmp_field->compute_rspace_stats( + group, + dset_name, + toffset, + max_estimate); + delete tmp_field; +} + + + +template <typename rnumber, + field_backend be, + field_components fc> +void field<rnumber, be, fc>::compute_rspace_stats( + const hid_t group, + const std::string dset_name, + const hsize_t toffset, + const std::vector<double> max_estimate) +{ + TIMEZONE("field::compute_rspace_stats"); + assert(this->real_space_representation); const unsigned int nmoments = 10; int nvals, nbins; if (this->myrank == 0) @@ -427,25 +704,46 @@ void field<rnumber, be, fc>::compute_rspace_stats( H5Sclose(wspace); H5Dclose(dset); } - MPI_Bcast(&nvals, 1, MPI_INT, 0, this->comm); - MPI_Bcast(&nbins, 1, MPI_INT, 0, this->comm); + { + TIMEZONE("MPI_Bcast"); + MPI_Bcast(&nvals, 1, MPI_INT, 0, this->comm); + MPI_Bcast(&nbins, 1, MPI_INT, 0, this->comm); + } assert(nvals == int(max_estimate.size())); - double *moments = new double[nmoments*nvals]; - double *local_moments = new double[nmoments*nvals]; - double *val_tmp = new double[nvals]; + + shared_array<double> local_moments_threaded(nmoments*nvals, [&](double* local_moments){ + std::fill_n(local_moments, nmoments*nvals, 0); + if (nvals == 4) local_moments[3] = max_estimate[3]; + }); + + shared_array<double> val_tmp_threaded(nvals,[&](double *val_tmp){ + std::fill_n(val_tmp, nvals, 0); + }); + + shared_array<ptrdiff_t> local_hist_threaded(nbins*nvals,[&](ptrdiff_t* local_hist){ + std::fill_n(local_hist, nbins*nvals, 0); + }); + + shared_array<double> local_pow_tmp(nvals); + double *binsize = new double[nvals]; - double *pow_tmp = new double[nvals]; - ptrdiff_t *hist = new ptrdiff_t[nbins*nvals]; - ptrdiff_t *local_hist = new ptrdiff_t[nbins*nvals]; - int bin; for (int i=0; i<nvals; i++) binsize[i] = 2*max_estimate[i] / nbins; - std::fill_n(local_hist, nbins*nvals, 0); - std::fill_n(local_moments, nmoments*nvals, 0); - if (nvals == 4) local_moments[3] = max_estimate[3]; - FIELD_RLOOP( - this, - std::fill_n(pow_tmp, nvals, 1.0); + + { + TIMEZONE("field::RLOOP"); + this->RLOOP( + [&](ptrdiff_t rindex, + ptrdiff_t xindex, + ptrdiff_t yindex, + ptrdiff_t zindex){ + double* pow_tmp = local_pow_tmp.getMine(); + std::fill_n(pow_tmp, nvals, 1); + + double *local_moments = local_moments_threaded.getMine(); + double *val_tmp = val_tmp_threaded.getMine(); + ptrdiff_t *local_hist = local_hist_threaded.getMine(); + if (nvals == int(4)) val_tmp[3] = 0.0; for (unsigned int i=0; i<ncomp(fc); i++) { @@ -459,9 +757,10 @@ void field<rnumber, be, fc>::compute_rspace_stats( local_moments[0*nvals+3] = val_tmp[3]; if (val_tmp[3] > local_moments[9*nvals+3]) local_moments[9*nvals+3] = val_tmp[3]; - bin = int(floor(val_tmp[3]*2/binsize[3])); - if (bin >= 0 && bin < nbins) + int bin = int(floor(val_tmp[3]*2/binsize[3])); + if (bin >= 0 && bin < nbins){ local_hist[bin*nvals+3]++; + } } for (unsigned int i=0; i<ncomp(fc); i++) { @@ -469,47 +768,73 @@ void field<rnumber, be, fc>::compute_rspace_stats( local_moments[0*nvals+i] = val_tmp[i]; if (val_tmp[i] > local_moments[(nmoments-1)*nvals+i]) local_moments[(nmoments-1)*nvals+i] = val_tmp[i]; - bin = int(floor((val_tmp[i] + max_estimate[i]) / binsize[i])); + int bin = int(floor((val_tmp[i] + max_estimate[i]) / binsize[i])); if (bin >= 0 && bin < nbins) local_hist[bin*nvals+i]++; } - for (int n=1; n < int(nmoments)-1; n++) - for (int i=0; i<nvals; i++) + for (int n=1; n < int(nmoments)-1; n++){ + for (int i=0; i<nvals; i++){ local_moments[n*nvals + i] += (pow_tmp[i] = val_tmp[i]*pow_tmp[i]); - ); - MPI_Allreduce( - (void*)local_moments, - (void*)moments, - nvals, - MPI_DOUBLE, MPI_MIN, this->comm); - MPI_Allreduce( - (void*)(local_moments + nvals), - (void*)(moments+nvals), - (nmoments-2)*nvals, - MPI_DOUBLE, MPI_SUM, this->comm); - MPI_Allreduce( - (void*)(local_moments + (nmoments-1)*nvals), - (void*)(moments+(nmoments-1)*nvals), - nvals, - MPI_DOUBLE, MPI_MAX, this->comm); - MPI_Allreduce( - (void*)local_hist, - (void*)hist, - nbins*nvals, - MPI_INT64_T, MPI_SUM, this->comm); + } + } + }); + + TIMEZONE("FIELD_RLOOP::Merge"); + local_moments_threaded.mergeParallel([&](const int idx, const double& v1, const double& v2) -> double { + if(nvals == int(4) && idx == 0*nvals+3){ + return std::min(v1, v2); + } + if(nvals == int(4) && idx == 9*nvals+3){ + return std::max(v1, v2); + } + if(idx < int(ncomp(fc))){ + return std::min(v1, v2); + } + if(int(nmoments-1)*nvals <= idx && idx < int(int(nmoments-1)*nvals+ncomp(fc))){ + return std::max(v1, v2); + } + return v1 + v2; + }); + + local_hist_threaded.mergeParallel(); + } + ptrdiff_t *hist = new ptrdiff_t[nbins*nvals]; + double *moments = new double[nmoments*nvals]; + { + TIMEZONE("MPI_Allreduce"); + MPI_Allreduce( + (void*)local_moments_threaded.getMasterData(), + (void*)moments, + nvals, + MPI_DOUBLE, MPI_MIN, this->comm); + MPI_Allreduce( + (void*)(local_moments_threaded.getMasterData() + nvals), + (void*)(moments+nvals), + (nmoments-2)*nvals, + MPI_DOUBLE, MPI_SUM, this->comm); + MPI_Allreduce( + (void*)(local_moments_threaded.getMasterData() + (nmoments-1)*nvals), + (void*)(moments+(nmoments-1)*nvals), + nvals, + MPI_DOUBLE, MPI_MAX, this->comm); + MPI_Allreduce( + (void*)local_hist_threaded.getMasterData(), + (void*)hist, + nbins*nvals, + MPI_INT64_T, MPI_SUM, this->comm); + } for (int n=1; n < int(nmoments)-1; n++) for (int i=0; i<nvals; i++) moments[n*nvals + i] /= this->npoints; - delete[] local_moments; - delete[] local_hist; - delete[] val_tmp; + delete[] binsize; - delete[] pow_tmp; if (this->myrank == 0) { + TIMEZONE("root-work"); hid_t dset, wspace, mspace; hsize_t count[ndim(fc)-1], offset[ndim(fc)-1], dims[ndim(fc)-1]; dset = H5Dopen(group, ("moments/" + dset_name).c_str(), H5P_DEFAULT); + assert(dset>0); wspace = H5Dget_space(dset); H5Sget_simple_extent_dims(wspace, dims, NULL); offset[0] = toffset; @@ -535,6 +860,7 @@ void field<rnumber, be, fc>::compute_rspace_stats( H5Sclose(mspace); H5Dclose(dset); dset = H5Dopen(group, ("histograms/" + dset_name).c_str(), H5P_DEFAULT); + assert(dset > 0); wspace = H5Dget_space(dset); count[1] = nbins; mspace = H5Screate_simple(ndim(fc)-1, count, NULL); @@ -543,6 +869,20 @@ void field<rnumber, be, fc>::compute_rspace_stats( H5Sclose(wspace); H5Sclose(mspace); H5Dclose(dset); + if (H5Lexists( + group, + "0slices", + H5P_DEFAULT)) + { + if (H5Lexists( + group, + (std::string("0slices/") + dset_name).c_str(), + H5P_DEFAULT)) + this->write_0slice( + group, + dset_name, + toffset); + } } delete[] moments; delete[] hist; @@ -557,6 +897,86 @@ void field<rnumber, be, fc>::normalize() this->data[tmp_index] /= this->npoints; } +template <typename rnumber, + field_backend be, + field_components fc> +void field<rnumber, be, fc>::symmetrize() +{ + TIMEZONE("field::symmetrize"); + assert(!this->real_space_representation); + ptrdiff_t ii, cc; + typename fftw_interface<rnumber>::complex *data = this->get_cdata(); + MPI_Status *mpistatus = new MPI_Status; + if (this->myrank == this->clayout->rank[0][0]) + { + for (cc = 0; cc < ncomp(fc); cc++) + data[cc][1] = 0.0; + for (ii = 1; ii < ptrdiff_t(this->clayout->sizes[1]/2); ii++) + for (cc = 0; cc < ncomp(fc); cc++) { + ( *(data + cc + ncomp(fc)*(this->clayout->sizes[1] - ii)*this->clayout->sizes[2]))[0] = + (*(data + cc + ncomp(fc)*( ii)*this->clayout->sizes[2]))[0]; + ( *(data + cc + ncomp(fc)*(this->clayout->sizes[1] - ii)*this->clayout->sizes[2]))[1] = + -(*(data + cc + ncomp(fc)*( ii)*this->clayout->sizes[2]))[1]; + } + } + typename fftw_interface<rnumber>::complex *buffer; + buffer = fftw_interface<rnumber>::alloc_complex(ncomp(fc)*this->clayout->sizes[1]); + ptrdiff_t yy; + /*ptrdiff_t tindex;*/ + int ranksrc, rankdst; + for (yy = 1; yy < ptrdiff_t(this->clayout->sizes[0]/2); yy++) { + ranksrc = this->clayout->rank[0][yy]; + rankdst = this->clayout->rank[0][this->clayout->sizes[0] - yy]; + if (this->clayout->myrank == ranksrc) + for (ii = 0; ii < ptrdiff_t(this->clayout->sizes[1]); ii++) + for (cc = 0; cc < ncomp(fc); cc++) + for (int imag_comp=0; imag_comp<2; imag_comp++) + (*(buffer + ncomp(fc)*ii+cc))[imag_comp] = + (*(data + ncomp(fc)*((yy - this->clayout->starts[0])*this->clayout->sizes[1] + ii)*this->clayout->sizes[2] + cc))[imag_comp]; + if (ranksrc != rankdst) + { + if (this->clayout->myrank == ranksrc) + MPI_Send((void*)buffer, + ncomp(fc)*this->clayout->sizes[1], mpi_real_type<rnumber>::complex(), rankdst, yy, + this->clayout->comm); + if (this->clayout->myrank == rankdst) + MPI_Recv((void*)buffer, + ncomp(fc)*this->clayout->sizes[1], mpi_real_type<rnumber>::complex(), ranksrc, yy, + this->clayout->comm, mpistatus); + } + if (this->clayout->myrank == rankdst) + { + for (ii = 1; ii < ptrdiff_t(this->clayout->sizes[1]); ii++) + for (cc = 0; cc < ncomp(fc); cc++) + { + (*(data + ncomp(fc)*((this->clayout->sizes[0] - yy - this->clayout->starts[0])*this->clayout->sizes[1] + ii)*this->clayout->sizes[2] + cc))[0] = + (*(buffer + ncomp(fc)*(this->clayout->sizes[1]-ii)+cc))[0]; + (*(data + ncomp(fc)*((this->clayout->sizes[0] - yy - this->clayout->starts[0])*this->clayout->sizes[1] + ii)*this->clayout->sizes[2] + cc))[1] = + -(*(buffer + ncomp(fc)*(this->clayout->sizes[1]-ii)+cc))[1]; + } + for (cc = 0; cc < ncomp(fc); cc++) + { + (*((data + cc + ncomp(fc)*(this->clayout->sizes[0] - yy - this->clayout->starts[0])*this->clayout->sizes[1]*this->clayout->sizes[2])))[0] = (*(buffer + cc))[0]; + (*((data + cc + ncomp(fc)*(this->clayout->sizes[0] - yy - this->clayout->starts[0])*this->clayout->sizes[1]*this->clayout->sizes[2])))[1] = -(*(buffer + cc))[1]; + } + } + } + fftw_interface<rnumber>::free(buffer); + delete mpistatus; + /* put asymmetric data to 0 */ + /*if (this->clayout->myrank == this->clayout->rank[0][this->clayout->sizes[0]/2]) + { + tindex = ncomp(fc)*(this->clayout->sizes[0]/2 - this->clayout->starts[0])*this->clayout->sizes[1]*this->clayout->sizes[2]; + for (ii = 0; ii < this->clayout->sizes[1]; ii++) + { + std::fill_n((rnumber*)(data + tindex), ncomp(fc)*2*this->clayout->sizes[2], 0.0); + tindex += ncomp(fc)*this->clayout->sizes[2]; + } + } + tindex = ncomp(fc)*(); + std::fill_n((rnumber*)(data + tindex), ncomp(fc)*2, 0.0);*/ +} + template <typename rnumber, field_backend be, field_components fc> @@ -568,6 +988,7 @@ void field<rnumber, be, fc>::compute_stats( const hsize_t toffset, const double max_estimate) { + TIMEZONE("field::compute_stats"); std::vector<double> max_estimate_vector; bool did_rspace = false; switch(fc) @@ -585,6 +1006,7 @@ void field<rnumber, be, fc>::compute_stats( } if (this->real_space_representation) { + TIMEZONE("field::compute_stats::compute_rspace_stats"); this->compute_rspace_stats( group, dset_name, @@ -593,14 +1015,15 @@ void field<rnumber, be, fc>::compute_stats( did_rspace = true; this->dft(); // normalize + TIMEZONE("field::normalize"); for (hsize_t tmp_index=0; tmp_index<this->rmemlayout->local_size; tmp_index++) this->data[tmp_index] /= this->npoints; } // what follows gave me a headache until I found this link: // http://stackoverflow.com/questions/8256636/expected-primary-expression-error-on-template-method-using kk->template cospectrum<rnumber, fc>( - (cnumber*)this->data, - (cnumber*)this->data, + (typename fftw_interface<rnumber>::complex*)this->data, + (typename fftw_interface<rnumber>::complex*)this->data, group, dset_name + "_" + dset_name, toffset); @@ -616,218 +1039,100 @@ void field<rnumber, be, fc>::compute_stats( } } -template <field_backend be, +template <typename rnumber, + field_backend be, + field_components fc1, + field_components fc2, kspace_dealias_type dt> -template <field_components fc> -kspace<be, dt>::kspace( - const field_layout<fc> *source_layout, - const double DKX, - const double DKY, - const double DKZ) +int compute_gradient( + kspace<be, dt> *kk, + field<rnumber, be, fc1> *src, + field<rnumber, be, fc2> *dst) { - /* get layout */ - this->layout = new field_layout<ONE>( - source_layout->sizes, - source_layout->subsizes, - source_layout->starts, - source_layout->comm); - - /* store dk values */ - this->dkx = DKX; - this->dky = DKY; - this->dkz = DKZ; - - /* compute kx, ky, kz and compute kM values */ - switch(be) - { - case FFTW: - this->kx.resize(this->layout->sizes[2]); - this->ky.resize(this->layout->subsizes[0]); - this->kz.resize(this->layout->sizes[1]); - int i, ii; - for (i = 0; i<int(this->layout->sizes[2]); i++) - this->kx[i] = i*this->dkx; - for (i = 0; i<int(this->layout->subsizes[0]); i++) + TIMEZONE("compute_gradient"); + assert(!src->real_space_representation); + assert((fc1 == ONE && fc2 == THREE) || + (fc1 == THREE && fc2 == THREExTHREE)); + std::fill_n(dst->get_rdata(), dst->rmemlayout->local_size, 0); + dst->real_space_representation = false; + switch(fc1) { - ii = i + this->layout->starts[0]; - if (ii <= int(this->layout->sizes[1]/2)) - this->ky[i] = this->dky*ii; - else - this->ky[i] = this->dky*(ii - int(this->layout->sizes[1])); - } - for (i = 0; i<int(this->layout->sizes[1]); i++) - { - if (i <= int(this->layout->sizes[0]/2)) - this->kz[i] = this->dkz*i; - else - this->kz[i] = this->dkz*(i - int(this->layout->sizes[0])); - } - switch(dt) + case ONE: + kk->CLOOP_K2( + [&](ptrdiff_t cindex, + ptrdiff_t xindex, + ptrdiff_t yindex, + ptrdiff_t zindex, + double k2){ + if (k2 < kk->kM2) { - case TWO_THIRDS: - this->kMx = this->dkx*(int(2*(int(this->layout->sizes[2])-1)/3)-1); - this->kMy = this->dky*(int(this->layout->sizes[0] / 3)-1); - this->kMz = this->dkz*(int(this->layout->sizes[1] / 3)-1); + dst->cval(cindex, 0, 0) = -kk->kx[xindex]*src->cval(cindex, 1); + dst->cval(cindex, 0, 1) = kk->kx[xindex]*src->cval(cindex, 0); + dst->cval(cindex, 1, 0) = -kk->ky[yindex]*src->cval(cindex, 1); + dst->cval(cindex, 1, 1) = kk->ky[yindex]*src->cval(cindex, 0); + dst->cval(cindex, 2, 0) = -kk->kz[zindex]*src->cval(cindex, 1); + dst->cval(cindex, 2, 1) = kk->kz[zindex]*src->cval(cindex, 0); + }}); break; - case SMOOTH: - this->kMx = this->dkx*(int(this->layout->sizes[2])-2); - this->kMy = this->dky*(int(this->layout->sizes[0] / 2)-1); - this->kMz = this->dkz*(int(this->layout->sizes[1] / 2)-1); - break; - } - break; - } - - /* get global kM and dk */ - this->kM = this->kMx; - if (this->kM < this->kMy) this->kM = this->kMy; - if (this->kM < this->kMz) this->kM = this->kMz; - this->kM2 = this->kM * this->kM; - this->dk = this->dkx; - if (this->dk > this->dky) this->dk = this->dky; - if (this->dk > this->dkz) this->dk = this->dkz; - this->dk2 = this->dk*this->dk; - - /* spectra stuff */ - this->nshells = int(this->kM / this->dk) + 2; - this->kshell.resize(this->nshells, 0); - this->nshell.resize(this->nshells, 0); - std::vector<double> kshell_local; - kshell_local.resize(this->nshells, 0); - std::vector<int64_t> nshell_local; - nshell_local.resize(this->nshells, 0); - double knorm; - KSPACE_CLOOP_K2_NXMODES( - this, - if (k2 < this->kM2) + case THREE: + kk->CLOOP_K2( + [&](ptrdiff_t cindex, + ptrdiff_t xindex, + ptrdiff_t yindex, + ptrdiff_t zindex, + double k2){ + if (k2 < kk->kM2) { - knorm = sqrt(k2); - nshell_local[int(knorm/this->dk)] += nxmodes; - kshell_local[int(knorm/this->dk)] += nxmodes*knorm; + for (unsigned int field_component = 0; + field_component < ncomp(fc1); + field_component++) + { + dst->cval(cindex, 0, field_component, 0) = -kk->kx[xindex]*src->cval(cindex, field_component, 1); + dst->cval(cindex, 0, field_component, 1) = kk->kx[xindex]*src->cval(cindex, field_component, 0); + dst->cval(cindex, 1, field_component, 0) = -kk->ky[yindex]*src->cval(cindex, field_component, 1); + dst->cval(cindex, 1, field_component, 1) = kk->ky[yindex]*src->cval(cindex, field_component, 0); + dst->cval(cindex, 2, field_component, 0) = -kk->kz[zindex]*src->cval(cindex, field_component, 1); + dst->cval(cindex, 2, field_component, 1) = kk->kz[zindex]*src->cval(cindex, field_component, 0); + } + }}); + break; } - if (dt == TWO_THIRDS) - this->dealias_filter[int(round(k2 / this->dk2))] = exp(-36.0 * pow(k2/this->kM2, 18.)); - ); - MPI_Allreduce( - &nshell_local.front(), - &this->nshell.front(), - this->nshells, - MPI_INT64_T, MPI_SUM, this->layout->comm); - MPI_Allreduce( - &kshell_local.front(), - &this->kshell.front(), - this->nshells, - MPI_DOUBLE, MPI_SUM, this->layout->comm); - for (int n=0; n<this->nshells; n++) - this->kshell[n] /= this->nshell[n]; -} - -template <field_backend be, - kspace_dealias_type dt> -kspace<be, dt>::~kspace() -{ - delete this->layout; -} - -template <field_backend be, - kspace_dealias_type dt> -template <typename rnumber, - field_components fc> -void kspace<be, dt>::low_pass(rnumber *__restrict__ a, const double kmax) -{ - const double km2 = kmax*kmax; - KSPACE_CLOOP_K2( - this, - if (k2 >= km2) - std::fill_n(a + 2*ncomp(fc)*cindex, 2*ncomp(fc), 0); - ); + return EXIT_SUCCESS; } -template <field_backend be, - kspace_dealias_type dt> template <typename rnumber, - field_components fc> -void kspace<be, dt>::dealias(rnumber *__restrict__ a) -{ - switch(be) - { - case TWO_THIRDS: - this->low_pass<rnumber, fc>(a, this->kM); - break; - case SMOOTH: - KSPACE_CLOOP_K2( - this, - double tval = this->dealias_filter[int(round(k2 / this->dk2))]; - for (int tcounter=0; tcounter<2*ncomp(fc); tcounter++) - a[2*ncomp(fc)*cindex + tcounter] *= tval; - ); - break; - } -} - -template <field_backend be, + field_backend be, kspace_dealias_type dt> -template <typename rnumber, - field_components fc> -void kspace<be, dt>::cospectrum( - const rnumber(* __restrict a)[2], - const rnumber(* __restrict b)[2], - const hid_t group, - const std::string dset_name, - const hsize_t toffset) +int invert_curl( + kspace<be, dt> *kk, + field<rnumber, be, THREE> *src, + field<rnumber, be, THREE> *dst) { - std::vector<double> spec, spec_local; - spec.resize(this->nshells*ncomp(fc)*ncomp(fc), 0); - spec_local.resize(this->nshells*ncomp(fc)*ncomp(fc), 0); - KSPACE_CLOOP_K2_NXMODES( - this, - if (k2 <= this->kM2) - { - int tmp_int = int(sqrt(k2) / this->dk)*ncomp(fc)*ncomp(fc); - for (hsize_t i=0; i<ncomp(fc); i++) - for (hsize_t j=0; j<ncomp(fc); j++) - spec_local[tmp_int + i*ncomp(fc)+j] += nxmodes * ( - (a[ncomp(fc)*cindex + i][0] * b[ncomp(fc)*cindex + j][0]) + - (a[ncomp(fc)*cindex + i][1] * b[ncomp(fc)*cindex + j][1])); - } - ); - MPI_Allreduce( - &spec_local.front(), - &spec.front(), - spec.size(), - MPI_DOUBLE, MPI_SUM, this->layout->comm); - if (this->layout->myrank == 0) - { - hid_t dset, wspace, mspace; - hsize_t count[(ndim(fc)-2)*2], offset[(ndim(fc)-2)*2], dims[(ndim(fc)-2)*2]; - dset = H5Dopen(group, ("spectra/" + dset_name).c_str(), H5P_DEFAULT); - wspace = H5Dget_space(dset); - H5Sget_simple_extent_dims(wspace, dims, NULL); - switch (fc) + TIMEZONE("invert_curl"); + assert(!src->real_space_representation); + std::fill_n(dst->get_rdata(), dst->rmemlayout->local_size, 0); + dst->real_space_representation = false; + kk->CLOOP_K2( + [&](ptrdiff_t cindex, + ptrdiff_t xindex, + ptrdiff_t yindex, + ptrdiff_t zindex, + double k2){ + if (k2 <= kk->kM2 && k2 > 0) { - case THREExTHREE: - offset[4] = 0; - offset[5] = 0; - count[4] = ncomp(fc); - count[5] = ncomp(fc); - case THREE: - offset[2] = 0; - offset[3] = 0; - count[2] = ncomp(fc); - count[3] = ncomp(fc); - default: - offset[0] = toffset; - offset[1] = 0; - count[0] = 1; - count[1] = this->nshells; + dst->cval(cindex,0,0) = -(kk->ky[yindex]*src->cval(cindex,2,1) - kk->kz[zindex]*src->cval(cindex,1,1)) / k2; + dst->cval(cindex,0,1) = (kk->ky[yindex]*src->cval(cindex,2,0) - kk->kz[zindex]*src->cval(cindex,1,0)) / k2; + dst->cval(cindex,1,0) = -(kk->kz[zindex]*src->cval(cindex,0,1) - kk->kx[xindex]*src->cval(cindex,2,1)) / k2; + dst->cval(cindex,1,1) = (kk->kz[zindex]*src->cval(cindex,0,0) - kk->kx[xindex]*src->cval(cindex,2,0)) / k2; + dst->cval(cindex,2,0) = -(kk->kx[xindex]*src->cval(cindex,1,1) - kk->ky[yindex]*src->cval(cindex,0,1)) / k2; + dst->cval(cindex,2,1) = (kk->kx[xindex]*src->cval(cindex,1,0) - kk->ky[yindex]*src->cval(cindex,0,0)) / k2; } - mspace = H5Screate_simple((ndim(fc)-2)*2, count, NULL); - H5Sselect_hyperslab(wspace, H5S_SELECT_SET, offset, NULL, count, NULL); - H5Dwrite(dset, H5T_NATIVE_DOUBLE, mspace, wspace, H5P_DEFAULT, &spec.front()); - H5Sclose(wspace); - H5Sclose(mspace); - H5Dclose(dset); + else + std::fill_n((rnumber*)(dst->get_cdata()+3*cindex), 6, 0.0); } + ); + dst->symmetrize(); + return EXIT_SUCCESS; } template class field<float, FFTW, ONE>; @@ -837,49 +1142,6 @@ template class field<double, FFTW, ONE>; template class field<double, FFTW, THREE>; template class field<double, FFTW, THREExTHREE>; -template class kspace<FFTW, TWO_THIRDS>; -template class kspace<FFTW, SMOOTH>; - -template kspace<FFTW, TWO_THIRDS>::kspace<>( - const field_layout<ONE> *, - const double, const double, const double); -template kspace<FFTW, TWO_THIRDS>::kspace<>( - const field_layout<THREE> *, - const double, const double, const double); -template kspace<FFTW, TWO_THIRDS>::kspace<>( - const field_layout<THREExTHREE> *, - const double, const double, const double); - -template kspace<FFTW, SMOOTH>::kspace<>( - const field_layout<ONE> *, - const double, const double, const double); -template kspace<FFTW, SMOOTH>::kspace<>( - const field_layout<THREE> *, - const double, const double, const double); -template kspace<FFTW, SMOOTH>::kspace<>( - const field_layout<THREExTHREE> *, - const double, const double, const double); - -template void kspace<FFTW, SMOOTH>::low_pass<float, ONE>( - float *__restrict__ a, - const double kmax); -template void kspace<FFTW, SMOOTH>::low_pass<float, THREE>( - float *__restrict__ a, - const double kmax); -template void kspace<FFTW, SMOOTH>::low_pass<float, THREExTHREE>( - float *__restrict__ a, - const double kmax); - -template void kspace<FFTW, SMOOTH>::low_pass<double, ONE>( - double *__restrict__ a, - const double kmax); -template void kspace<FFTW, SMOOTH>::low_pass<double, THREE>( - double *__restrict__ a, - const double kmax); -template void kspace<FFTW, SMOOTH>::low_pass<double, THREExTHREE>( - double *__restrict__ a, - const double kmax); - template void field<float, FFTW, ONE>::compute_stats<TWO_THIRDS>( kspace<FFTW, TWO_THIRDS> *, const hid_t, const std::string, const hsize_t, const double); @@ -920,3 +1182,30 @@ template void field<double, FFTW, THREExTHREE>::compute_stats<SMOOTH>( kspace<FFTW, SMOOTH> *, const hid_t, const std::string, const hsize_t, const double); +template int compute_gradient<float, FFTW, THREE, THREExTHREE, SMOOTH>( + kspace<FFTW, SMOOTH> *, + field<float, FFTW, THREE> *, + field<float, FFTW, THREExTHREE> *); +template int compute_gradient<double, FFTW, THREE, THREExTHREE, SMOOTH>( + kspace<FFTW, SMOOTH> *, + field<double, FFTW, THREE> *, + field<double, FFTW, THREExTHREE> *); + +template int compute_gradient<float, FFTW, ONE, THREE, SMOOTH>( + kspace<FFTW, SMOOTH> *, + field<float, FFTW, ONE> *, + field<float, FFTW, THREE> *); +template int compute_gradient<double, FFTW, ONE, THREE, SMOOTH>( + kspace<FFTW, SMOOTH> *, + field<double, FFTW, ONE> *, + field<double, FFTW, THREE> *); + +template int invert_curl<float, FFTW, SMOOTH>( + kspace<FFTW, SMOOTH> *, + field<float, FFTW, THREE> *, + field<float, FFTW, THREE> *); +template int invert_curl<double, FFTW, SMOOTH>( + kspace<FFTW, SMOOTH> *, + field<double, FFTW, THREE> *, + field<double, FFTW, THREE> *); + diff --git a/bfps/cpp/field.hpp b/bfps/cpp/field.hpp index 6ebd4090e38795b2209fffcb3b6d7aab2642a8f2..82ce7afaf2fb7ba0c3d3f13ed54f4759b45425e9 100644 --- a/bfps/cpp/field.hpp +++ b/bfps/cpp/field.hpp @@ -24,110 +24,31 @@ -#include <mpi.h> #include <hdf5.h> -#include <fftw3-mpi.h> #include <unordered_map> #include <vector> #include <string> -#include "base.hpp" - -#ifndef FIELD - -#define FIELD - -enum field_backend {FFTW}; -enum field_components {ONE, THREE, THREExTHREE}; -enum kspace_dealias_type {TWO_THIRDS, SMOOTH}; - -constexpr unsigned int ncomp( - field_components fc) - /* return actual number of field components for each enum value */ -{ - return ((fc == THREE) ? 3 : ( - (fc == THREExTHREE) ? 9 : 1)); -} - -constexpr unsigned int ndim( - field_components fc) - /* return actual number of field dimensions for each enum value */ -{ - return ((fc == THREE) ? 4 : ( - (fc == THREExTHREE) ? 5 : 3)); -} - -template <field_components fc> -class field_layout -{ - public: - /* description */ - hsize_t sizes[ndim(fc)]; - hsize_t subsizes[ndim(fc)]; - hsize_t starts[ndim(fc)]; - hsize_t local_size, full_size; - - int myrank, nprocs; - MPI_Comm comm; - - std::vector<std::vector<int>> rank; - std::vector<std::vector<int>> all_start; - std::vector<std::vector<int>> all_size; - - /* methods */ - field_layout( - const hsize_t *SIZES, - const hsize_t *SUBSIZES, - const hsize_t *STARTS, - const MPI_Comm COMM_TO_USE); - ~field_layout(){} -}; - -template <field_backend be, - kspace_dealias_type dt> -class kspace -{ - public: - /* relevant field layout */ - field_layout<ONE> *layout; - - /* physical parameters */ - double dkx, dky, dkz, dk, dk2; - - /* mode and dealiasing information */ - double kMx, kMy, kMz, kM, kM2; - double kMspec, kMspec2; - std::vector<double> kx, ky, kz; - std::unordered_map<int, double> dealias_filter; - std::vector<double> kshell; - std::vector<int64_t> nshell; - int nshells; - - /* methods */ - template <field_components fc> - kspace( - const field_layout<fc> *source_layout, - const double DKX = 1.0, - const double DKY = 1.0, - const double DKZ = 1.0); - ~kspace(); - - template <typename rnumber, - field_components fc> - void low_pass(rnumber *__restrict__ a, const double kmax); - - template <typename rnumber, - field_components fc> - void dealias(rnumber *__restrict__ a); - - template <typename rnumber, - field_components fc> - void cospectrum( - const rnumber(* __restrict__ a)[2], - const rnumber(* __restrict__ b)[2], - const hid_t group, - const std::string dset_name, - const hsize_t toffset); -}; +#include "kspace.hpp" +#include "omputils.hpp" + +#ifndef FIELD_HPP + +#define FIELD_HPP + +/** \class field + * \brief Holds field data, performs FFTs and HDF5 I/O operations. + * + * The purpose of this class is to manage memory for field data, create/destroy + * FFT plans for them, and compute HDF5 input/output operations. + * + * FFTW recommendations are to create different plans for different arrays, + * hence the plans are member variables. + * All plans are for in-place transforms, since even with out-of-place transforms + * there are no guarantees that input data is not messed up by an inverse FFT, so + * there's no point in wasting the memory. + * + * + */ template <typename rnumber, field_backend be, @@ -135,26 +56,24 @@ template <typename rnumber, class field { private: - /* data arrays */ - rnumber *data; - typedef rnumber cnumber[2]; - hsize_t npoints; + rnumber *__restrict__ data; /**< data array */ public: - bool real_space_representation; - /* basic MPI information */ - int myrank, nprocs; - MPI_Comm comm; + hsize_t npoints; /**< total number of grid points. Useful for normalization. */ + bool real_space_representation; /**< `true` if field is in real space representation. */ + + int myrank, nprocs; /**< basic MPI information. */ + MPI_Comm comm; /**< MPI communicator this fields lives in. */ /* descriptions of field layout and distribution */ /* for the FFTW backend, at least, the real space field requires more * space to be allocated than strictly needed for the data, hence the - * two layout descriptors. + * two real space layout descriptors. * */ field_layout<fc> *clayout, *rlayout, *rmemlayout; /* FFT plans */ - void *c2r_plan; - void *r2c_plan; + typename fftw_interface<rnumber>::plan c2r_plan; + typename fftw_interface<rnumber>::plan r2c_plan; unsigned fftw_plan_rigor; /* HDF5 data types for arrays */ @@ -166,34 +85,112 @@ class field const int ny, const int nz, const MPI_Comm COMM_TO_USE, - const unsigned FFTW_PLAN_RIGOR = FFTW_ESTIMATE); + const unsigned FFTW_PLAN_RIGOR = DEFAULT_FFTW_FLAG); ~field(); int io( const std::string fname, - const std::string dset_name, + const std::string field_name, + const int iteration, + const bool read = true); + int io_database( + const std::string fname, + const std::string field_name, const int toffset, const bool read = true); + int write_0slice( + const hid_t group, + const std::string field_name, + const int iteration); + + int io_binary( + const std::string fname, + const int iteration, + const bool read = true); + + /* essential FFT stuff */ void dft(); void ift(); void normalize(); + void symmetrize(); + + /* stats */ + void compute_rspace_xincrement_stats( + const int xcells, + const hid_t group, + const std::string dset_name, + const hsize_t toffset, + const std::vector<double> max_estimate); void compute_rspace_stats( const hid_t group, const std::string dset_name, const hsize_t toffset, const std::vector<double> max_estimate); - inline rnumber *get_rdata() + + /* acess data */ + inline rnumber *__restrict__ get_rdata() + { + return this->data; + } + + inline const rnumber *__restrict__ get_rdata() const { return this->data; } - inline cnumber *get_cdata() + + inline typename fftw_interface<rnumber>::complex *__restrict__ get_cdata() + { + return (typename fftw_interface<rnumber>::complex*__restrict__)this->data; + } + + inline rnumber &rval(ptrdiff_t rindex, unsigned int component = 0) + { + assert(fc == ONE || fc == THREE); + assert(component >= 0 && component < ncomp(fc)); + return *(this->data + rindex*ncomp(fc) + component); + } + + inline const rnumber& rval(ptrdiff_t rindex, unsigned int component = 0) const + { + assert(fc == ONE || fc == THREE); + assert(component >= 0 && component < ncomp(fc)); + return *(this->data + rindex*ncomp(fc) + component); + } + + inline rnumber &rval(ptrdiff_t rindex, int comp1, int comp0) + { + assert(fc == THREExTHREE); + assert(comp1 >= 0 && comp1 < 3); + assert(comp0 >= 0 && comp0 < 3); + return *(this->data + ((rindex*3 + comp1)*3 + comp0)); + } + + inline rnumber &cval(ptrdiff_t cindex, int imag) + { + assert(fc == ONE); + assert(imag == 0 || imag == 1); + return *(this->data + cindex*2 + imag); + } + + inline rnumber &cval(ptrdiff_t cindex, int component, int imag) { - return (cnumber*)this->data; + assert(fc == THREE); + assert(imag == 0 || imag == 1); + return *(this->data + (cindex*ncomp(fc) + component)*2 + imag); } - inline field<rnumber, be, fc>& operator=(const cnumber *__restrict__ source) + inline rnumber &cval(ptrdiff_t cindex, int comp1, int comp0, int imag) + { + assert(fc == THREExTHREE); + assert(comp1 >= 0 && comp1 < 3); + assert(comp0 >= 0 && comp0 < 3); + assert(imag == 0 || imag == 1); + return *(this->data + ((cindex*3 + comp1)*3+comp0)*2 + imag); + } + + inline field<rnumber, be, fc>& operator=(const typename fftw_interface<rnumber>::complex *__restrict__ source) { std::copy((rnumber*)source, (rnumber*)(source + this->clayout->local_size), @@ -210,6 +207,15 @@ class field this->real_space_representation = true; return *this; } + + inline field<rnumber, be, fc>& operator=(const rnumber value) + { + std::fill_n(this->data, + this->rmemlayout->local_size, + value); + return *this; + } + template <kspace_dealias_type dt> void compute_stats( kspace<be, dt> *kk, @@ -217,74 +223,88 @@ class field const std::string dset_name, const hsize_t toffset, const double max_estimate); + inline void impose_zero_mode() + { + if (this->clayout->myrank == this->clayout->rank[0][0] && + this->real_space_representation == false) + { + std::fill_n(this->data, 2*ncomp(fc), 0.0); + } + } + template <class func_type> + void RLOOP(func_type expression) + { + switch(be) + { + case FFTW: + #pragma omp parallel + { + const hsize_t start = OmpUtils::ForIntervalStart(this->rlayout->subsizes[1]); + const hsize_t end = OmpUtils::ForIntervalEnd(this->rlayout->subsizes[1]); + + for (hsize_t zindex = 0; zindex < this->rlayout->subsizes[0]; zindex++) + for (hsize_t yindex = start; yindex < end; yindex++) + { + ptrdiff_t rindex = ( + zindex * this->rlayout->subsizes[1] + yindex)*( + this->rmemlayout->subsizes[2]); + for (hsize_t xindex = 0; xindex < this->rlayout->subsizes[2]; xindex++) + { + expression(rindex, xindex, yindex, zindex); + rindex++; + } + } + } + break; + } + } + ptrdiff_t get_cindex( + ptrdiff_t xindex, + ptrdiff_t yindex, + ptrdiff_t zindex) + { + return ((yindex*this->clayout->subsizes[1] + + zindex)*this->clayout->subsizes[2] + + xindex); + } + + ptrdiff_t get_rindex( + ptrdiff_t xindex, + ptrdiff_t yindex, + ptrdiff_t zindex) const + { + return ((zindex*this->rmemlayout->subsizes[1] + + yindex)*this->rmemlayout->subsizes[2] + + xindex); + } + + ptrdiff_t get_rindex_from_global(const ptrdiff_t in_global_x, const ptrdiff_t in_global_y, const ptrdiff_t in_global_z) const { + assert(in_global_x >= 0 && in_global_x < ptrdiff_t(this->rlayout->sizes[2])); + assert(in_global_y >= 0 && in_global_y < ptrdiff_t(this->rlayout->sizes[1])); + assert(in_global_z >= 0 && in_global_z < ptrdiff_t(this->rlayout->sizes[0])); + return get_rindex(in_global_x - this->rlayout->starts[2], + in_global_y - this->rlayout->starts[1], + in_global_z - this->rlayout->starts[0]); + } }; -/* real space loop */ -#define FIELD_RLOOP(obj, expression) \ - \ -{ \ - switch (be) \ - { \ - case FFTW: \ - for (hsize_t zindex = 0; zindex < obj->rlayout->subsizes[0]; zindex++) \ - for (hsize_t yindex = 0; yindex < obj->rlayout->subsizes[1]; yindex++) \ - { \ - ptrdiff_t rindex = ( \ - zindex * obj->rlayout->subsizes[1] + yindex)*( \ - obj->rmemlayout->subsizes[2]); \ - for (hsize_t xindex = 0; xindex < obj->rlayout->subsizes[2]; xindex++) \ - { \ - expression; \ - rindex++; \ - } \ - } \ - break; \ - } \ -} - -#define KSPACE_CLOOP_K2(obj, expression) \ - \ -{ \ - double k2; \ - ptrdiff_t cindex = 0; \ - for (hsize_t yindex = 0; yindex < obj->layout->subsizes[0]; yindex++) \ - for (hsize_t zindex = 0; zindex < obj->layout->subsizes[1]; zindex++) \ - for (hsize_t xindex = 0; xindex < obj->layout->subsizes[2]; xindex++) \ - { \ - k2 = (obj->kx[xindex]*obj->kx[xindex] + \ - obj->ky[yindex]*obj->ky[yindex] + \ - obj->kz[zindex]*obj->kz[zindex]); \ - expression; \ - cindex++; \ - } \ -} - -#define KSPACE_CLOOP_K2_NXMODES(obj, expression) \ - \ -{ \ - double k2; \ - ptrdiff_t cindex = 0; \ - for (hsize_t yindex = 0; yindex < obj->layout->subsizes[0]; yindex++) \ - for (hsize_t zindex = 0; zindex < obj->layout->subsizes[1]; zindex++) \ - { \ - int nxmodes = 1; \ - hsize_t xindex = 0; \ - k2 = (obj->kx[xindex]*obj->kx[xindex] + \ - obj->ky[yindex]*obj->ky[yindex] + \ - obj->kz[zindex]*obj->kz[zindex]); \ - expression; \ - cindex++; \ - nxmodes = 2; \ - for (xindex = 1; xindex < obj->layout->subsizes[2]; xindex++) \ - { \ - k2 = (obj->kx[xindex]*obj->kx[xindex] + \ - obj->ky[yindex]*obj->ky[yindex] + \ - obj->kz[zindex]*obj->kz[zindex]); \ - expression; \ - cindex++; \ - } \ - } \ -} - -#endif//FIELD +template <typename rnumber, + field_backend be, + field_components fc1, + field_components fc2, + kspace_dealias_type dt> +int compute_gradient( + kspace<be, dt> *kk, + field<rnumber, be, fc1> *source, + field<rnumber, be, fc2> *destination); + +template <typename rnumber, + field_backend be, + kspace_dealias_type dt> +int invert_curl( + kspace<be, dt> *kk, + field<rnumber, be, THREE> *source, + field<rnumber, be, THREE> *destination); + +#endif//FIELD_HPP diff --git a/bfps/cpp/field_binary_IO.cpp b/bfps/cpp/field_binary_IO.cpp new file mode 100644 index 0000000000000000000000000000000000000000..52ca21f5947689408265423e0c4f075a670fa578 --- /dev/null +++ b/bfps/cpp/field_binary_IO.cpp @@ -0,0 +1,227 @@ +/********************************************************************** +* * +* Copyright 2015 Max Planck Institute * +* for Dynamics and Self-Organization * +* * +* This file is part of bfps. * +* * +* bfps is free software: you can redistribute it and/or modify * +* it under the terms of the GNU General Public License as published * +* by the Free Software Foundation, either version 3 of the License, * +* or (at your option) any later version. * +* * +* bfps is distributed in the hope that it will be useful, * +* but WITHOUT ANY WARRANTY; without even the implied warranty of * +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * +* GNU General Public License for more details. * +* * +* You should have received a copy of the GNU General Public License * +* along with bfps. If not, see <http://www.gnu.org/licenses/> * +* * +* Contact: Cristian.Lalescu@ds.mpg.de * +* * +**********************************************************************/ + + + +#include <vector> +#include <array> +#include "base.hpp" +#include "scope_timer.hpp" +#include "field_binary_IO.hpp" + +template <typename rnumber, field_representation fr, field_components fc> +field_binary_IO<rnumber, fr, fc>::field_binary_IO( + const hsize_t *SIZES, + const hsize_t *SUBSIZES, + const hsize_t *STARTS, + const MPI_Comm COMM_TO_USE): + field_layout<fc>( + SIZES, + SUBSIZES, + STARTS, + COMM_TO_USE) +{ + TIMEZONE("field_binary_IO::field_binary_IO"); + std::vector<int> tsizes ; + std::vector<int> tsubsizes; + std::vector<int> tstarts ; + tsizes.resize(ndim(fc)); + tsubsizes.resize(ndim(fc)); + tstarts.resize(ndim(fc)); + for (int i=0; i<int(ndim(fc)); i++) + { + tsizes[i] = int(this->sizes[i]); + tsubsizes[i] = int(this->subsizes[i]); + tstarts[i] = int(this->starts[i]); + } + // these are required if using unsigned char in the subarray creation + //tsizes[ndim(fc)-1] *= sizeof(element_type); + //tsubsizes[ndim(fc)-1] *= sizeof(element_type); + //tstarts[ndim(fc)-1] *= sizeof(element_type); + MPI_Type_create_subarray( + ndim(fc), + &tsizes.front(), + &tsubsizes.front(), + &tstarts.front(), + MPI_ORDER_C, + //MPI_UNSIGNED_CHAR, // in case element type fails + mpi_type<rnumber>(fr), + &this->mpi_array_dtype); + MPI_Type_commit(&this->mpi_array_dtype); + + // check if there are processes without any data + int local_zero_array[this->nprocs], zero_array[this->nprocs]; + for (int i=0; i<this->nprocs; i++) + local_zero_array[i] = 0; + local_zero_array[this->myrank] = (this->subsizes[0] == 0) ? 1 : 0; + MPI_Allreduce( + local_zero_array, + zero_array, + this->nprocs, + MPI_INT, + MPI_SUM, + this->comm); + int no_of_excluded_ranks = 0; + for (int i = 0; i<this->nprocs; i++) + no_of_excluded_ranks += zero_array[i]; + if (no_of_excluded_ranks == 0) + { + this->io_comm = this->comm; + this->io_comm_nprocs = this->nprocs; + this->io_comm_myrank = this->myrank; + } + else + { + int excluded_rank[no_of_excluded_ranks]; + for (int i=0, j=0; i<this->nprocs; i++) + if (zero_array[i]) + { + excluded_rank[j] = i; + j++; + } + MPI_Group tgroup0, tgroup; + MPI_Comm_group(this->comm, &tgroup0); + MPI_Group_excl(tgroup0, no_of_excluded_ranks, excluded_rank, &tgroup); + MPI_Comm_create(this->comm, tgroup, &this->io_comm); + MPI_Group_free(&tgroup0); + MPI_Group_free(&tgroup); + if (this->subsizes[0] > 0) + { + MPI_Comm_rank(this->io_comm, &this->io_comm_myrank); + MPI_Comm_size(this->io_comm, &this->io_comm_nprocs); + } + else + { + this->io_comm_myrank = MPI_PROC_NULL; + this->io_comm_nprocs = -1; + } + } +} + +template <typename rnumber, field_representation fr, field_components fc> +field_binary_IO<rnumber, fr, fc>::~field_binary_IO() +{ + TIMEZONE("field_binary_IO::~field_binary_IO"); + MPI_Type_free(&this->mpi_array_dtype); + if (this->nprocs != this->io_comm_nprocs && + this->io_comm_myrank != MPI_PROC_NULL) + { + MPI_Comm_free(&this->io_comm); + } +} + +template <typename rnumber, field_representation fr, field_components fc> +int field_binary_IO<rnumber, fr, fc>::read( + const std::string fname, + void *buffer) +{ + TIMEZONE("field_binary_IO::read"); + char representation[] = "native"; + if (this->subsizes[0] > 0) + { + MPI_Info info; + MPI_Info_create(&info); + MPI_File f; + char ffname[512]; + sprintf(ffname, "%s", fname.c_str()); + + MPI_File_open( + this->io_comm, + ffname, + MPI_MODE_RDONLY, + info, + &f); + MPI_File_set_view( + f, + 0, + mpi_type<rnumber>(fr), + this->mpi_array_dtype, + representation, + info); + MPI_File_read_all( + f, + buffer, + this->local_size, + mpi_type<rnumber>(fr), + MPI_STATUS_IGNORE); + MPI_File_close(&f); + } + return EXIT_SUCCESS; +} + +template <typename rnumber, field_representation fr, field_components fc> +int field_binary_IO<rnumber, fr, fc>::write( + const std::string fname, + void *buffer) +{ + TIMEZONE("field_binary_IO::write"); + char representation[] = "native"; + if (this->subsizes[0] > 0) + { + MPI_Info info; + MPI_Info_create(&info); + MPI_File f; + char ffname[512]; + sprintf(ffname, "%s", fname.c_str()); + + MPI_File_open( + this->io_comm, + ffname, + MPI_MODE_CREATE | MPI_MODE_WRONLY, + info, + &f); + MPI_File_set_view( + f, + 0, + mpi_type<rnumber>(fr), + this->mpi_array_dtype, + representation, + info); + MPI_File_write_all( + f, + buffer, + this->local_size, + mpi_type<rnumber>(fr), + MPI_STATUS_IGNORE); + MPI_File_close(&f); + } + + return EXIT_SUCCESS; +} + +template class field_binary_IO<float , REAL , ONE>; +template class field_binary_IO<float , COMPLEX, ONE>; +template class field_binary_IO<double, REAL , ONE>; +template class field_binary_IO<double, COMPLEX, ONE>; + +template class field_binary_IO<float , REAL , THREE>; +template class field_binary_IO<float , COMPLEX, THREE>; +template class field_binary_IO<double, REAL , THREE>; +template class field_binary_IO<double, COMPLEX, THREE>; + +template class field_binary_IO<float , REAL , THREExTHREE>; +template class field_binary_IO<float , COMPLEX, THREExTHREE>; +template class field_binary_IO<double, REAL , THREExTHREE>; +template class field_binary_IO<double, COMPLEX, THREExTHREE>; + diff --git a/bfps/cpp/field_binary_IO.hpp b/bfps/cpp/field_binary_IO.hpp new file mode 100644 index 0000000000000000000000000000000000000000..b06e901c27acd99855a314dc7b5afb8b43f190b7 --- /dev/null +++ b/bfps/cpp/field_binary_IO.hpp @@ -0,0 +1,78 @@ +/********************************************************************** +* * +* Copyright 2015 Max Planck Institute * +* for Dynamics and Self-Organization * +* * +* This file is part of bfps. * +* * +* bfps is free software: you can redistribute it and/or modify * +* it under the terms of the GNU General Public License as published * +* by the Free Software Foundation, either version 3 of the License, * +* or (at your option) any later version. * +* * +* bfps is distributed in the hope that it will be useful, * +* but WITHOUT ANY WARRANTY; without even the implied warranty of * +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * +* GNU General Public License for more details. * +* * +* You should have received a copy of the GNU General Public License * +* along with bfps. If not, see <http://www.gnu.org/licenses/> * +* * +* Contact: Cristian.Lalescu@ds.mpg.de * +* * +**********************************************************************/ + + + +#include <vector> +#include <string> +#include "base.hpp" +#include "fftw_interface.hpp" +#include "field_layout.hpp" +#include "field.hpp" + +#ifndef FIELD_BINARY_IO_HPP + +#define FIELD_BINARY_IO_HPP + +/* could this be a boolean somehow?*/ +enum field_representation: bool { + REAL = true, + COMPLEX = false}; + +template <typename rnumber> +constexpr MPI_Datatype mpi_type( + field_representation fr) +{ + return ((fr == REAL) ? + mpi_real_type<rnumber>::real() : + mpi_real_type<rnumber>::complex()); +} + +template <typename rnumber, field_representation fr, field_components fc> +class field_binary_IO:public field_layout<fc> +{ + private: + MPI_Comm io_comm; + int io_comm_myrank, io_comm_nprocs; + MPI_Datatype mpi_array_dtype; + public: + + /* methods */ + field_binary_IO( + const hsize_t *SIZES, + const hsize_t *SUBSIZES, + const hsize_t *STARTS, + const MPI_Comm COMM_TO_USE); + ~field_binary_IO(); + + int read( + const std::string fname, + void *buffer); + int write( + const std::string fname, + void *buffer); +}; + +#endif//FIELD_BINARY_IO_HPP + diff --git a/bfps/cpp/field_descriptor.cpp b/bfps/cpp/field_descriptor.cpp index b5025835903a37ea5384cb4102c716f527aabfe5..20c634262dbb45ad4c2bb5a1b5640b6df23d4d2c 100644 --- a/bfps/cpp/field_descriptor.cpp +++ b/bfps/cpp/field_descriptor.cpp @@ -31,476 +31,470 @@ #include <iostream> #include "base.hpp" #include "field_descriptor.hpp" - +#include "fftw_interface.hpp" +#include "scope_timer.hpp" /*****************************************************************************/ /* macro for specializations to numeric types compatible with FFTW */ -#define CLASS_IMPLEMENTATION(FFTW, R, MPI_RNUM, MPI_CNUM) \ - \ -template<> \ -field_descriptor<R>::field_descriptor( \ - int ndims, \ - int *n, \ - MPI_Datatype element_type, \ - MPI_Comm COMM_TO_USE) \ -{ \ - DEBUG_MSG("entered field_descriptor::field_descriptor\n"); \ - this->comm = COMM_TO_USE; \ - MPI_Comm_rank(this->comm, &this->myrank); \ - MPI_Comm_size(this->comm, &this->nprocs); \ - this->ndims = ndims; \ - this->sizes = new int[ndims]; \ - this->subsizes = new int[ndims]; \ - this->starts = new int[ndims]; \ - int tsizes [ndims]; \ - int tsubsizes[ndims]; \ - int tstarts [ndims]; \ - ptrdiff_t *nfftw = new ptrdiff_t[ndims]; \ - ptrdiff_t local_n0, local_0_start; \ - for (int i = 0; i < this->ndims; i++) \ - nfftw[i] = n[i]; \ - this->local_size = fftw_mpi_local_size_many( \ - this->ndims, \ - nfftw, \ - 1, \ - FFTW_MPI_DEFAULT_BLOCK, \ - this->comm, \ - &local_n0, \ - &local_0_start); \ - this->sizes[0] = n[0]; \ - this->subsizes[0] = (int)local_n0; \ - this->starts[0] = (int)local_0_start; \ - DEBUG_MSG_WAIT( \ - this->comm, \ - "first subsizes[0] = %d %d %d\n", \ - this->subsizes[0], \ - tsubsizes[0], \ - (int)local_n0); \ - tsizes[0] = n[0]; \ - tsubsizes[0] = (int)local_n0; \ - tstarts[0] = (int)local_0_start; \ - DEBUG_MSG_WAIT( \ - this->comm, \ - "second subsizes[0] = %d %d %d\n", \ - this->subsizes[0], \ - tsubsizes[0], \ - (int)local_n0); \ - this->mpi_dtype = element_type; \ - this->slice_size = 1; \ - this->full_size = this->sizes[0]; \ - for (int i = 1; i < this->ndims; i++) \ - { \ - this->sizes[i] = n[i]; \ - this->subsizes[i] = n[i]; \ - this->starts[i] = 0; \ - this->slice_size *= this->subsizes[i]; \ - this->full_size *= this->sizes[i]; \ - tsizes[i] = this->sizes[i]; \ - tsubsizes[i] = this->subsizes[i]; \ - tstarts[i] = this->starts[i]; \ - } \ - tsizes[ndims-1] *= sizeof(R); \ - tsubsizes[ndims-1] *= sizeof(R); \ - tstarts[ndims-1] *= sizeof(R); \ - if (this->mpi_dtype == MPI_CNUM) \ - { \ - tsizes[ndims-1] *= 2; \ - tsubsizes[ndims-1] *= 2; \ - tstarts[ndims-1] *= 2; \ - } \ - int local_zero_array[this->nprocs], zero_array[this->nprocs]; \ - for (int i=0; i<this->nprocs; i++) \ - local_zero_array[i] = 0; \ - local_zero_array[this->myrank] = (this->subsizes[0] == 0) ? 1 : 0; \ - MPI_Allreduce( \ - local_zero_array, \ - zero_array, \ - this->nprocs, \ - MPI_INT, \ - MPI_SUM, \ - this->comm); \ - int no_of_excluded_ranks = 0; \ - for (int i = 0; i<this->nprocs; i++) \ - no_of_excluded_ranks += zero_array[i]; \ - DEBUG_MSG_WAIT( \ - this->comm, \ - "subsizes[0] = %d %d\n", \ - this->subsizes[0], \ - tsubsizes[0]); \ - if (no_of_excluded_ranks == 0) \ - { \ - this->io_comm = this->comm; \ - this->io_nprocs = this->nprocs; \ - this->io_myrank = this->myrank; \ - } \ - else \ - { \ - int excluded_rank[no_of_excluded_ranks]; \ - for (int i=0, j=0; i<this->nprocs; i++) \ - if (zero_array[i]) \ - { \ - excluded_rank[j] = i; \ - j++; \ - } \ - MPI_Group tgroup0, tgroup; \ - MPI_Comm_group(this->comm, &tgroup0); \ - MPI_Group_excl(tgroup0, no_of_excluded_ranks, excluded_rank, &tgroup); \ - MPI_Comm_create(this->comm, tgroup, &this->io_comm); \ - MPI_Group_free(&tgroup0); \ - MPI_Group_free(&tgroup); \ - if (this->subsizes[0] > 0) \ - { \ - MPI_Comm_rank(this->io_comm, &this->io_myrank); \ - MPI_Comm_size(this->io_comm, &this->io_nprocs); \ - } \ - else \ - { \ - this->io_myrank = MPI_PROC_NULL; \ - this->io_nprocs = -1; \ - } \ - } \ - DEBUG_MSG_WAIT( \ - this->comm, \ - "inside field_descriptor constructor, about to call " \ - "MPI_Type_create_subarray " \ - "%d %d %d\n", \ - this->sizes[0], \ - this->subsizes[0], \ - this->starts[0]); \ - for (int i=0; i<this->ndims; i++) \ - DEBUG_MSG_WAIT( \ - this->comm, \ - "tsizes " \ - "%d %d %d\n", \ - tsizes[i], \ - tsubsizes[i], \ - tstarts[i]); \ - if (this->subsizes[0] > 0) \ - { \ - DEBUG_MSG("creating subarray\n"); \ - MPI_Type_create_subarray( \ - ndims, \ - tsizes, \ - tsubsizes, \ - tstarts, \ - MPI_ORDER_C, \ - MPI_UNSIGNED_CHAR, \ - &this->mpi_array_dtype); \ - MPI_Type_commit(&this->mpi_array_dtype); \ - } \ - this->rank = new int[this->sizes[0]]; \ - int *local_rank = new int[this->sizes[0]]; \ - std::fill_n(local_rank, this->sizes[0], 0); \ - for (int i = 0; i < this->sizes[0]; i++) \ - if (i >= this->starts[0] && i < this->starts[0] + this->subsizes[0]) \ - local_rank[i] = this->myrank; \ - MPI_Allreduce( \ - local_rank, \ - this->rank, \ - this->sizes[0], \ - MPI_INT, \ - MPI_SUM, \ - this->comm); \ - delete[] local_rank; \ - this->all_start0 = new int[this->nprocs]; \ - int *local_start0 = new int[this->nprocs]; \ - std::fill_n(local_start0, this->nprocs, 0); \ - for (int i = 0; i < this->nprocs; i++) \ - if (this->myrank == i) \ - local_start0[i] = this->starts[0]; \ - MPI_Allreduce( \ - local_start0, \ - this->all_start0, \ - this->nprocs, \ - MPI_INT, \ - MPI_SUM, \ - this->comm); \ - delete[] local_start0; \ - this->all_size0 = new int[this->nprocs]; \ - int *local_size0 = new int[this->nprocs]; \ - std::fill_n(local_size0, this->nprocs, 0); \ - for (int i = 0; i < this->nprocs; i++) \ - if (this->myrank == i) \ - local_size0[i] = this->subsizes[0]; \ - MPI_Allreduce( \ - local_size0, \ - this->all_size0, \ - this->nprocs, \ - MPI_INT, \ - MPI_SUM, \ - this->comm); \ - delete[] local_size0; \ - DEBUG_MSG("exiting field_descriptor constructor\n"); \ -} \ - \ -template <> \ -int field_descriptor<R>::read( \ - const char *fname, \ - void *buffer) \ -{ \ - DEBUG_MSG("entered field_descriptor::read\n"); \ - char representation[] = "native"; \ - if (this->subsizes[0] > 0) \ - { \ - MPI_Info info; \ - MPI_Info_create(&info); \ - MPI_File f; \ - ptrdiff_t read_size = this->local_size*sizeof(R); \ - DEBUG_MSG("read size is %ld\n", read_size); \ - char ffname[200]; \ - if (this->mpi_dtype == MPI_CNUM) \ - read_size *= 2; \ - DEBUG_MSG("read size is %ld\n", read_size); \ - sprintf(ffname, "%s", fname); \ - \ - MPI_File_open( \ - this->io_comm, \ - ffname, \ - MPI_MODE_RDONLY, \ - info, \ - &f); \ - DEBUG_MSG("opened file\n"); \ - MPI_File_set_view( \ - f, \ - 0, \ - MPI_UNSIGNED_CHAR, \ - this->mpi_array_dtype, \ - representation, \ - info); \ - DEBUG_MSG("view is set\n"); \ - MPI_File_read_all( \ - f, \ - buffer, \ - read_size, \ - MPI_UNSIGNED_CHAR, \ - MPI_STATUS_IGNORE); \ - DEBUG_MSG("info is read\n"); \ - MPI_File_close(&f); \ - } \ - DEBUG_MSG("finished with field_descriptor::read\n"); \ - return EXIT_SUCCESS; \ -} \ - \ -template <> \ -int field_descriptor<R>::write( \ - const char *fname, \ - void *buffer) \ -{ \ - char representation[] = "native"; \ - if (this->subsizes[0] > 0) \ - { \ - MPI_Info info; \ - MPI_Info_create(&info); \ - MPI_File f; \ - ptrdiff_t read_size = this->local_size*sizeof(R); \ - char ffname[200]; \ - if (this->mpi_dtype == MPI_CNUM) \ - read_size *= 2; \ - sprintf(ffname, "%s", fname); \ - \ - MPI_File_open( \ - this->io_comm, \ - ffname, \ - MPI_MODE_CREATE | MPI_MODE_WRONLY, \ - info, \ - &f); \ - MPI_File_set_view( \ - f, \ - 0, \ - MPI_UNSIGNED_CHAR, \ - this->mpi_array_dtype, \ - representation, \ - info); \ - MPI_File_write_all( \ - f, \ - buffer, \ - read_size, \ - MPI_UNSIGNED_CHAR, \ - MPI_STATUS_IGNORE); \ - MPI_File_close(&f); \ - } \ - \ - return EXIT_SUCCESS; \ -} \ - \ -template <> \ -int field_descriptor<R>::transpose( \ - R *input, \ - R *output) \ -{ \ - /* IMPORTANT NOTE: \ - for 3D transposition, the input data is messed up */ \ - FFTW(plan) tplan; \ - if (this->ndims == 3) \ - { \ - /* transpose the two local dimensions 1 and 2 */ \ - R *atmp; \ - atmp = FFTW(alloc_real)(this->slice_size); \ - for (int k = 0; k < this->subsizes[0]; k++) \ - { \ - /* put transposed slice in atmp */ \ - for (int j = 0; j < this->sizes[1]; j++) \ - for (int i = 0; i < this->sizes[2]; i++) \ - atmp[i*this->sizes[1] + j] = \ - input[(k*this->sizes[1] + j)*this->sizes[2] + i]; \ - /* copy back transposed slice */ \ - std::copy( \ - atmp, \ - atmp + this->slice_size, \ - input + k*this->slice_size); \ - } \ - FFTW(free)(atmp); \ - } \ - tplan = FFTW(mpi_plan_transpose)( \ - this->sizes[0], this->slice_size, \ - input, output, \ - this->comm, \ - FFTW_ESTIMATE); \ - FFTW(execute)(tplan); \ - FFTW(destroy_plan)(tplan); \ - return EXIT_SUCCESS; \ -} \ - \ -template<> \ -int field_descriptor<R>::transpose( \ - FFTW(complex) *input, \ - FFTW(complex) *output) \ -{ \ - switch (this->ndims) \ - { \ - case 2: \ - /* do a global transpose over the 2 dimensions */ \ - if (output == NULL) \ - { \ - std::cerr << "bad arguments for transpose.\n" << std::endl; \ - return EXIT_FAILURE; \ - } \ - FFTW(plan) tplan; \ - tplan = FFTW(mpi_plan_many_transpose)( \ - this->sizes[0], this->sizes[1], 2, \ - FFTW_MPI_DEFAULT_BLOCK, \ - FFTW_MPI_DEFAULT_BLOCK, \ - (R*)input, (R*)output, \ - this->comm, \ - FFTW_ESTIMATE); \ - FFTW(execute)(tplan); \ - FFTW(destroy_plan)(tplan); \ - break; \ - case 3: \ - /* transpose the two local dimensions 1 and 2 */ \ - FFTW(complex) *atmp; \ - atmp = FFTW(alloc_complex)(this->slice_size); \ - for (int k = 0; k < this->subsizes[0]; k++) \ - { \ - /* put transposed slice in atmp */ \ - for (int j = 0; j < this->sizes[1]; j++) \ - for (int i = 0; i < this->sizes[2]; i++) \ - { \ - atmp[i*this->sizes[1] + j][0] = \ - input[(k*this->sizes[1] + j)*this->sizes[2] + i][0]; \ - atmp[i*this->sizes[1] + j][1] = \ - input[(k*this->sizes[1] + j)*this->sizes[2] + i][1]; \ - } \ - /* copy back transposed slice */ \ - std::copy( \ - (R*)(atmp), \ - (R*)(atmp + this->slice_size), \ - (R*)(input + k*this->slice_size)); \ - } \ - FFTW(free)(atmp); \ - break; \ - default: \ - return EXIT_FAILURE; \ - break; \ - } \ - return EXIT_SUCCESS; \ -} \ - \ -template<> \ -int field_descriptor<R>::interleave( \ - R *a, \ - int dim) \ -{ \ -/* the following is copied from \ - * http://agentzlerich.blogspot.com/2010/01/using-fftw-for-in-place-matrix.html \ - * */ \ - FFTW(iodim) howmany_dims[2]; \ - howmany_dims[0].n = dim; \ - howmany_dims[0].is = this->local_size; \ - howmany_dims[0].os = 1; \ - howmany_dims[1].n = this->local_size; \ - howmany_dims[1].is = 1; \ - howmany_dims[1].os = dim; \ - const int howmany_rank = sizeof(howmany_dims)/sizeof(howmany_dims[0]); \ - \ - FFTW(plan) tmp = FFTW(plan_guru_r2r)( \ - /*rank*/0, \ - /*dims*/NULL, \ - howmany_rank, \ - howmany_dims, \ - a, \ - a, \ - /*kind*/NULL, \ - FFTW_ESTIMATE); \ - FFTW(execute)(tmp); \ - FFTW(destroy_plan)(tmp); \ - return EXIT_SUCCESS; \ -} \ - \ -template<> \ -int field_descriptor<R>::interleave( \ - FFTW(complex) *a, \ - int dim) \ -{ \ - FFTW(iodim) howmany_dims[2]; \ - howmany_dims[0].n = dim; \ - howmany_dims[0].is = this->local_size; \ - howmany_dims[0].os = 1; \ - howmany_dims[1].n = this->local_size; \ - howmany_dims[1].is = 1; \ - howmany_dims[1].os = dim; \ - const int howmany_rank = sizeof(howmany_dims)/sizeof(howmany_dims[0]); \ - \ - FFTW(plan) tmp = FFTW(plan_guru_dft)( \ - /*rank*/0, \ - /*dims*/NULL, \ - howmany_rank, \ - howmany_dims, \ - a, \ - a, \ - +1, \ - FFTW_ESTIMATE); \ - FFTW(execute)(tmp); \ - FFTW(destroy_plan)(tmp); \ - return EXIT_SUCCESS; \ -} \ - \ -template<> \ -field_descriptor<R>* field_descriptor<R>::get_transpose() \ -{ \ - int n[this->ndims]; \ - for (int i=0; i<this->ndims; i++) \ - n[i] = this->sizes[this->ndims - i - 1]; \ - return new field_descriptor<R>(this->ndims, n, this->mpi_dtype, this->comm); \ -} \ -/*****************************************************************************/ +template <class rnumber> +field_descriptor<rnumber>::field_descriptor( + int ndims, + int *n, + MPI_Datatype element_type, + MPI_Comm COMM_TO_USE) +{ + TIMEZONE("field_descriptor"); + DEBUG_MSG("entered field_descriptor::field_descriptor\n"); + this->comm = COMM_TO_USE; + MPI_Comm_rank(this->comm, &this->myrank); + MPI_Comm_size(this->comm, &this->nprocs); + this->ndims = ndims; + this->sizes = new int[ndims]; + this->subsizes = new int[ndims]; + this->starts = new int[ndims]; + int tsizes [ndims]; + int tsubsizes[ndims]; + int tstarts [ndims]; + std::vector<ptrdiff_t> nfftw; + nfftw.resize(ndims); + ptrdiff_t local_n0, local_0_start; + for (int i = 0; i < this->ndims; i++) + nfftw[i] = n[i]; + this->local_size = fftw_mpi_local_size_many( + this->ndims, + &nfftw.front(), + 1, + FFTW_MPI_DEFAULT_BLOCK, + this->comm, + &local_n0, + &local_0_start); + this->sizes[0] = n[0]; + this->subsizes[0] = (int)local_n0; + this->starts[0] = (int)local_0_start; + DEBUG_MSG_WAIT( + this->comm, + "first subsizes[0] = %d %d %d\n", + this->subsizes[0], + tsubsizes[0], + (int)local_n0); + tsizes[0] = n[0]; + tsubsizes[0] = (int)local_n0; + tstarts[0] = (int)local_0_start; + DEBUG_MSG_WAIT( + this->comm, + "second subsizes[0] = %d %d %d\n", + this->subsizes[0], + tsubsizes[0], + (int)local_n0); + this->mpi_dtype = element_type; + this->slice_size = 1; + this->full_size = this->sizes[0]; + for (int i = 1; i < this->ndims; i++) + { + this->sizes[i] = n[i]; + this->subsizes[i] = n[i]; + this->starts[i] = 0; + this->slice_size *= this->subsizes[i]; + this->full_size *= this->sizes[i]; + tsizes[i] = this->sizes[i]; + tsubsizes[i] = this->subsizes[i]; + tstarts[i] = this->starts[i]; + } + tsizes[ndims-1] *= sizeof(rnumber); + tsubsizes[ndims-1] *= sizeof(rnumber); + tstarts[ndims-1] *= sizeof(rnumber); + if (this->mpi_dtype == mpi_real_type<rnumber>::complex()) + { + tsizes[ndims-1] *= 2; + tsubsizes[ndims-1] *= 2; + tstarts[ndims-1] *= 2; + } + int local_zero_array[this->nprocs], zero_array[this->nprocs]; + for (int i=0; i<this->nprocs; i++) + local_zero_array[i] = 0; + local_zero_array[this->myrank] = (this->subsizes[0] == 0) ? 1 : 0; + MPI_Allreduce( + local_zero_array, + zero_array, + this->nprocs, + MPI_INT, + MPI_SUM, + this->comm); + int no_of_excluded_ranks = 0; + for (int i = 0; i<this->nprocs; i++) + no_of_excluded_ranks += zero_array[i]; + DEBUG_MSG_WAIT( + this->comm, + "subsizes[0] = %d %d\n", + this->subsizes[0], + tsubsizes[0]); + if (no_of_excluded_ranks == 0) + { + this->io_comm = this->comm; + this->io_nprocs = this->nprocs; + this->io_myrank = this->myrank; + } + else + { + int excluded_rank[no_of_excluded_ranks]; + for (int i=0, j=0; i<this->nprocs; i++) + if (zero_array[i]) + { + excluded_rank[j] = i; + j++; + } + MPI_Group tgroup0, tgroup; + MPI_Comm_group(this->comm, &tgroup0); + MPI_Group_excl(tgroup0, no_of_excluded_ranks, excluded_rank, &tgroup); + MPI_Comm_create(this->comm, tgroup, &this->io_comm); + MPI_Group_free(&tgroup0); + MPI_Group_free(&tgroup); + if (this->subsizes[0] > 0) + { + MPI_Comm_rank(this->io_comm, &this->io_myrank); + MPI_Comm_size(this->io_comm, &this->io_nprocs); + } + else + { + this->io_myrank = MPI_PROC_NULL; + this->io_nprocs = -1; + } + } + DEBUG_MSG_WAIT( + this->comm, + "inside field_descriptor constructor, about to call " + "MPI_Type_create_subarray " + "%d %d %d\n", + this->sizes[0], + this->subsizes[0], + this->starts[0]); + for (int i=0; i<this->ndims; i++) + DEBUG_MSG_WAIT( + this->comm, + "tsizes " + "%d %d %d\n", + tsizes[i], + tsubsizes[i], + tstarts[i]); + if (this->subsizes[0] > 0) + { + DEBUG_MSG("creating subarray\n"); + MPI_Type_create_subarray( + ndims, + tsizes, + tsubsizes, + tstarts, + MPI_ORDER_C, + MPI_UNSIGNED_CHAR, + &this->mpi_array_dtype); + MPI_Type_commit(&this->mpi_array_dtype); + } + this->rank = new int[this->sizes[0]]; + int *local_rank = new int[this->sizes[0]]; + std::fill_n(local_rank, this->sizes[0], 0); + for (int i = 0; i < this->sizes[0]; i++) + if (i >= this->starts[0] && i < this->starts[0] + this->subsizes[0]) + local_rank[i] = this->myrank; + MPI_Allreduce( + local_rank, + this->rank, + this->sizes[0], + MPI_INT, + MPI_SUM, + this->comm); + delete[] local_rank; + this->all_start0 = new int[this->nprocs]; + int *local_start0 = new int[this->nprocs]; + std::fill_n(local_start0, this->nprocs, 0); + for (int i = 0; i < this->nprocs; i++) + if (this->myrank == i) + local_start0[i] = this->starts[0]; + MPI_Allreduce( + local_start0, + this->all_start0, + this->nprocs, + MPI_INT, + MPI_SUM, + this->comm); + delete[] local_start0; + this->all_size0 = new int[this->nprocs]; + int *local_size0 = new int[this->nprocs]; + std::fill_n(local_size0, this->nprocs, 0); + for (int i = 0; i < this->nprocs; i++) + if (this->myrank == i) + local_size0[i] = this->subsizes[0]; + MPI_Allreduce( + local_size0, + this->all_size0, + this->nprocs, + MPI_INT, + MPI_SUM, + this->comm); + delete[] local_size0; + DEBUG_MSG("exiting field_descriptor constructor\n"); +} + +template <class rnumber> +int field_descriptor<rnumber>::read( + const char *fname, + void *buffer) +{ + TIMEZONE("field_descriptor::read"); + DEBUG_MSG("entered field_descriptor::read\n"); + char representation[] = "native"; + if (this->subsizes[0] > 0) + { + MPI_Info info; + MPI_Info_create(&info); + MPI_File f; + ptrdiff_t read_size = this->local_size*sizeof(rnumber); + DEBUG_MSG("read size is %ld\n", read_size); + char ffname[200]; + if (this->mpi_dtype == mpi_real_type<rnumber>::complex()) + read_size *= 2; + DEBUG_MSG("read size is %ld\n", read_size); + sprintf(ffname, "%s", fname); + + MPI_File_open( + this->io_comm, + ffname, + MPI_MODE_RDONLY, + info, + &f); + DEBUG_MSG("opened file\n"); + MPI_File_set_view( + f, + 0, + MPI_UNSIGNED_CHAR, + this->mpi_array_dtype, + representation, + info); + DEBUG_MSG("view is set\n"); + MPI_File_read_all( + f, + buffer, + read_size, + MPI_UNSIGNED_CHAR, + MPI_STATUS_IGNORE); + DEBUG_MSG("info is read\n"); + MPI_File_close(&f); + } + DEBUG_MSG("finished with field_descriptor::read\n"); + return EXIT_SUCCESS; +} + +template <class rnumber> +int field_descriptor<rnumber>::write( + const char *fname, + void *buffer) +{ + TIMEZONE("field_descriptor::write"); + char representation[] = "native"; + if (this->subsizes[0] > 0) + { + MPI_Info info; + MPI_Info_create(&info); + MPI_File f; + ptrdiff_t read_size = this->local_size*sizeof(rnumber); + char ffname[200]; + if (this->mpi_dtype == mpi_real_type<rnumber>::complex()) + read_size *= 2; + sprintf(ffname, "%s", fname); + + MPI_File_open( + this->io_comm, + ffname, + MPI_MODE_CREATE | MPI_MODE_WRONLY, + info, + &f); + MPI_File_set_view( + f, + 0, + MPI_UNSIGNED_CHAR, + this->mpi_array_dtype, + representation, + info); + MPI_File_write_all( + f, + buffer, + read_size, + MPI_UNSIGNED_CHAR, + MPI_STATUS_IGNORE); + MPI_File_close(&f); + } + + return EXIT_SUCCESS; +} +template <class rnumber> +int field_descriptor<rnumber>::transpose( + rnumber *input, + rnumber *output) +{ + TIMEZONE("field_descriptor::transpose"); + /* IMPORTANT NOTE: + for 3D transposition, the input data is messed up */ + typename fftw_interface<rnumber>::plan tplan; + if (this->ndims == 3) + { + /* transpose the two local dimensions 1 and 2 */ + rnumber *atmp; + atmp = fftw_interface<rnumber>::alloc_real(this->slice_size); + for (int k = 0; k < this->subsizes[0]; k++) + { + /* put transposed slice in atmp */ + for (int j = 0; j < this->sizes[1]; j++) + for (int i = 0; i < this->sizes[2]; i++) + atmp[i*this->sizes[1] + j] = + input[(k*this->sizes[1] + j)*this->sizes[2] + i]; + /* copy back transposed slice */ + std::copy( + atmp, + atmp + this->slice_size, + input + k*this->slice_size); + } + fftw_interface<rnumber>::free(atmp); + } + tplan = fftw_interface<rnumber>::mpi_plan_transpose( + this->sizes[0], this->slice_size, + input, output, + this->comm, + DEFAULT_FFTW_FLAG); + fftw_interface<rnumber>::execute(tplan); + fftw_interface<rnumber>::destroy_plan(tplan); + return EXIT_SUCCESS; +} +template <class rnumber> +int field_descriptor<rnumber>::transpose( + typename fftw_interface<rnumber>::complex *input, + typename fftw_interface<rnumber>::complex *output) +{ + TIMEZONE("field_descriptor::transpose2"); + switch (this->ndims) + { + case 2: + /* do a global transpose over the 2 dimensions */ + if (output == NULL) + { + std::cerr << "bad arguments for transpose.\n" << std::endl; + return EXIT_FAILURE; + } + typename fftw_interface<rnumber>::plan tplan; + tplan = fftw_interface<rnumber>::mpi_plan_many_transpose( + this->sizes[0], this->sizes[1], 2, + FFTW_MPI_DEFAULT_BLOCK, + FFTW_MPI_DEFAULT_BLOCK, + (rnumber*)input, (rnumber*)output, + this->comm, + DEFAULT_FFTW_FLAG); + fftw_interface<rnumber>::execute(tplan); + fftw_interface<rnumber>::destroy_plan(tplan); + break; + case 3: + /* transpose the two local dimensions 1 and 2 */ + typename fftw_interface<rnumber>::complex *atmp; + atmp = fftw_interface<rnumber>::alloc_complex(this->slice_size); + for (int k = 0; k < this->subsizes[0]; k++) + { + /* put transposed slice in atmp */ + for (int j = 0; j < this->sizes[1]; j++) + for (int i = 0; i < this->sizes[2]; i++) + { + atmp[i*this->sizes[1] + j][0] = + input[(k*this->sizes[1] + j)*this->sizes[2] + i][0]; + atmp[i*this->sizes[1] + j][1] = + input[(k*this->sizes[1] + j)*this->sizes[2] + i][1]; + } + /* copy back transposed slice */ + std::copy( + (rnumber*)(atmp), + (rnumber*)(atmp + this->slice_size), + (rnumber*)(input + k*this->slice_size)); + } + fftw_interface<rnumber>::free(atmp); + break; + default: + return EXIT_FAILURE; + break; + } + return EXIT_SUCCESS; +} + +template <class rnumber> +int field_descriptor<rnumber>::interleave( + rnumber *a, + int dim) +{ + TIMEZONE("field_descriptor::interleav"); + /* the following is copied from + * http://agentzlerich.blogspot.com/2010/01/using-fftw-for-in-place-matrix.html + * */ + typename fftw_interface<rnumber>::iodim howmany_dims[2]; + howmany_dims[0].n = dim; + howmany_dims[0].is = this->local_size; + howmany_dims[0].os = 1; + howmany_dims[1].n = this->local_size; + howmany_dims[1].is = 1; + howmany_dims[1].os = dim; + const int howmany_rank = sizeof(howmany_dims)/sizeof(howmany_dims[0]); + + typename fftw_interface<rnumber>::plan tmp = fftw_interface<rnumber>::plan_guru_r2r( + /*rank*/0, + /*dims*/nullptr, + howmany_rank, + howmany_dims, + a, + a, + /*kind*/nullptr, + DEFAULT_FFTW_FLAG); + fftw_interface<rnumber>::execute(tmp); + fftw_interface<rnumber>::destroy_plan(tmp); + return EXIT_SUCCESS; +} + +template <class rnumber> +int field_descriptor<rnumber>::interleave( + typename fftw_interface<rnumber>::complex *a, + int dim) +{ + TIMEZONE("field_descriptor::interleave2"); + typename fftw_interface<rnumber>::iodim howmany_dims[2]; + howmany_dims[0].n = dim; + howmany_dims[0].is = this->local_size; + howmany_dims[0].os = 1; + howmany_dims[1].n = this->local_size; + howmany_dims[1].is = 1; + howmany_dims[1].os = dim; + const int howmany_rank = sizeof(howmany_dims)/sizeof(howmany_dims[0]); + + typename fftw_interface<rnumber>::plan tmp = fftw_interface<rnumber>::plan_guru_dft( + /*rank*/0, + /*dims*/nullptr, + howmany_rank, + howmany_dims, + a, + a, + +1, + DEFAULT_FFTW_FLAG); + fftw_interface<rnumber>::execute(tmp); + fftw_interface<rnumber>::destroy_plan(tmp); + return EXIT_SUCCESS; +} + +template <class rnumber> +field_descriptor<rnumber>* field_descriptor<rnumber>::get_transpose() +{ + TIMEZONE("field_descriptor::get_transpose"); + int n[this->ndims]; + for (int i=0; i<this->ndims; i++) + n[i] = this->sizes[this->ndims - i - 1]; + return new field_descriptor<rnumber>(this->ndims, n, this->mpi_dtype, this->comm); +} /*****************************************************************************/ -/* now actually use the macro defined above */ -CLASS_IMPLEMENTATION( - FFTW_MANGLE_FLOAT, - float, - MPI_FLOAT, - MPI_COMPLEX) -CLASS_IMPLEMENTATION( - FFTW_MANGLE_DOUBLE, - double, - MPI_DOUBLE, - BFPS_MPICXX_DOUBLE_COMPLEX) /*****************************************************************************/ @@ -511,23 +505,23 @@ template <class rnumber> field_descriptor<rnumber>::~field_descriptor() { DEBUG_MSG_WAIT( - MPI_COMM_WORLD, - this->io_comm == MPI_COMM_NULL ? "null\n" : "not null\n"); + MPI_COMM_WORLD, + this->io_comm == MPI_COMM_NULL ? "null\n" : "not null\n"); DEBUG_MSG_WAIT( - MPI_COMM_WORLD, - "subsizes[0] = %d \n", this->subsizes[0]); + MPI_COMM_WORLD, + "subsizes[0] = %d \n", this->subsizes[0]); if (this->subsizes[0] > 0) { DEBUG_MSG_WAIT( - this->io_comm, - "deallocating mpi_array_dtype\n"); + this->io_comm, + "deallocating mpi_array_dtype\n"); MPI_Type_free(&this->mpi_array_dtype); } if (this->nprocs != this->io_nprocs && this->io_myrank != MPI_PROC_NULL) { DEBUG_MSG_WAIT( - this->io_comm, - "freeing io_comm\n"); + this->io_comm, + "freeing io_comm\n"); MPI_Comm_free(&this->io_comm); } delete[] this->sizes; diff --git a/bfps/cpp/field_descriptor.hpp b/bfps/cpp/field_descriptor.hpp index bfcf52ed415ddb90bd77a6c6793974aea6a94734..2fb491bca7c130704fc5de5d22c3393cb196eec7 100644 --- a/bfps/cpp/field_descriptor.hpp +++ b/bfps/cpp/field_descriptor.hpp @@ -26,6 +26,7 @@ #include <mpi.h> #include <fftw3-mpi.h> +#include "fftw_interface.hpp" #ifndef FIELD_DESCRIPTOR @@ -85,14 +86,14 @@ class field_descriptor rnumber *input, rnumber *output); int transpose( - cnumber *input, - cnumber *output = NULL); + typename fftw_interface<rnumber>::complex *input, + typename fftw_interface<rnumber>::complex *output = NULL); int interleave( rnumber *input, int dim); int interleave( - cnumber *input, + typename fftw_interface<rnumber>::complex *input, int dim); }; diff --git a/bfps/cpp/field_layout.cpp b/bfps/cpp/field_layout.cpp new file mode 100644 index 0000000000000000000000000000000000000000..908904991d5d95b0c89ba679b402d8d5727b8c85 --- /dev/null +++ b/bfps/cpp/field_layout.cpp @@ -0,0 +1,111 @@ +/********************************************************************** +* * +* Copyright 2015 Max Planck Institute * +* for Dynamics and Self-Organization * +* * +* This file is part of bfps. * +* * +* bfps is free software: you can redistribute it and/or modify * +* it under the terms of the GNU General Public License as published * +* by the Free Software Foundation, either version 3 of the License, * +* or (at your option) any later version. * +* * +* bfps is distributed in the hope that it will be useful, * +* but WITHOUT ANY WARRANTY; without even the implied warranty of * +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * +* GNU General Public License for more details. * +* * +* You should have received a copy of the GNU General Public License * +* along with bfps. If not, see <http://www.gnu.org/licenses/> * +* * +* Contact: Cristian.Lalescu@ds.mpg.de * +* * +**********************************************************************/ + + +#include <cassert> +#include "field_layout.hpp" +#include "scope_timer.hpp" + +template <field_components fc> +field_layout<fc>::field_layout( + const hsize_t *SIZES, + const hsize_t *SUBSIZES, + const hsize_t *STARTS, + const MPI_Comm COMM_TO_USE) +{ + TIMEZONE("field_layout::field_layout"); + this->comm = COMM_TO_USE; + MPI_Comm_rank(this->comm, &this->myrank); + MPI_Comm_size(this->comm, &this->nprocs); + + std::copy(SIZES, SIZES + 3, this->sizes); + std::copy(SUBSIZES, SUBSIZES + 3, this->subsizes); + std::copy(STARTS, STARTS + 3, this->starts); + if (fc == THREE || fc == THREExTHREE) + { + this->sizes[3] = 3; + this->subsizes[3] = 3; + this->starts[3] = 0; + } + if (fc == THREExTHREE) + { + this->sizes[4] = 3; + this->subsizes[4] = 3; + this->starts[4] = 0; + } + this->local_size = 1; + this->full_size = 1; + for (unsigned int i=0; i<ndim(fc); i++) + { + this->local_size *= this->subsizes[i]; + this->full_size *= this->sizes[i]; + } + + /*field will at most be distributed in 2D*/ + this->rank.resize(2); + this->all_start.resize(2); + this->all_size.resize(2); + for (int i=0; i<2; i++) + { + this->rank[i].resize(this->sizes[i]); + std::vector<int> local_rank; + local_rank.resize(this->sizes[i], 0); + for (unsigned int ii=this->starts[i]; ii<this->starts[i]+this->subsizes[i]; ii++) + local_rank[ii] = this->myrank; + MPI_Allreduce( + &local_rank.front(), + &this->rank[i].front(), + this->sizes[i], + MPI_INT, + MPI_SUM, + this->comm); + this->all_start[i].resize(this->nprocs); + std::vector<int> local_start; + local_start.resize(this->nprocs, 0); + local_start[this->myrank] = this->starts[i]; + MPI_Allreduce( + &local_start.front(), + &this->all_start[i].front(), + this->nprocs, + MPI_INT, + MPI_SUM, + this->comm); + this->all_size[i].resize(this->nprocs); + std::vector<int> local_subsize; + local_subsize.resize(this->nprocs, 0); + local_subsize[this->myrank] = this->subsizes[i]; + MPI_Allreduce( + &local_subsize.front(), + &this->all_size[i].front(), + this->nprocs, + MPI_INT, + MPI_SUM, + this->comm); + } +} + +template class field_layout<ONE>; +template class field_layout<THREE>; +template class field_layout<THREExTHREE>; + diff --git a/bfps/cpp/field_layout.hpp b/bfps/cpp/field_layout.hpp new file mode 100644 index 0000000000000000000000000000000000000000..770119c2dcb05017d495b62559f050646872dc84 --- /dev/null +++ b/bfps/cpp/field_layout.hpp @@ -0,0 +1,79 @@ +/********************************************************************** +* * +* Copyright 2015 Max Planck Institute * +* for Dynamics and Self-Organization * +* * +* This file is part of bfps. * +* * +* bfps is free software: you can redistribute it and/or modify * +* it under the terms of the GNU General Public License as published * +* by the Free Software Foundation, either version 3 of the License, * +* or (at your option) any later version. * +* * +* bfps is distributed in the hope that it will be useful, * +* but WITHOUT ANY WARRANTY; without even the implied warranty of * +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * +* GNU General Public License for more details. * +* * +* You should have received a copy of the GNU General Public License * +* along with bfps. If not, see <http://www.gnu.org/licenses/> * +* * +* Contact: Cristian.Lalescu@ds.mpg.de * +* * +**********************************************************************/ + + + +#include <vector> +#include "base.hpp" + +#ifndef FIELD_LAYOUT_HPP + +#define FIELD_LAYOUT_HPP + +enum field_components {ONE, THREE, THREExTHREE}; + +constexpr unsigned int ncomp( + field_components fc) + /* return actual number of field components for each enum value */ +{ + return ((fc == THREE) ? 3 : ( + (fc == THREExTHREE) ? 9 : 1)); +} + +constexpr unsigned int ndim( + field_components fc) + /* return actual number of field dimensions for each enum value */ +{ + return ((fc == THREE) ? 4 : ( + (fc == THREExTHREE) ? 5 : 3)); +} + +template <field_components fc> +class field_layout +{ + public: + /* description */ + hsize_t sizes[ndim(fc)]; + hsize_t subsizes[ndim(fc)]; + hsize_t starts[ndim(fc)]; + hsize_t local_size, full_size; + + int myrank, nprocs; + MPI_Comm comm; + + std::vector<std::vector<int>> rank; + std::vector<std::vector<int>> all_start; + std::vector<std::vector<int>> all_size; + + /* methods */ + field_layout( + const hsize_t *SIZES, + const hsize_t *SUBSIZES, + const hsize_t *STARTS, + const MPI_Comm COMM_TO_USE); + ~field_layout(){} +}; + +#endif//FIELD_LAYOUT_HPP + diff --git a/bfps/cpp/fluid_solver.cpp b/bfps/cpp/fluid_solver.cpp index a634117bc43075db475be47256f1579b39bc1193..319186103797f8135d4d3e2244ed5e3a8f271b00 100644 --- a/bfps/cpp/fluid_solver.cpp +++ b/bfps/cpp/fluid_solver.cpp @@ -31,7 +31,8 @@ #include <cstring> #include "fluid_solver.hpp" #include "fftw_tools.hpp" - +#include "scope_timer.hpp" +#include "shared_array.hpp" template <class rnumber> @@ -48,911 +49,1003 @@ void fluid_solver<rnumber>::impose_zero_modes() /*****************************************************************************/ /* macro for specializations to numeric types compatible with FFTW */ -#define FLUID_SOLVER_DEFINITIONS(FFTW, R, MPI_RNUM, MPI_CNUM) \ - \ -template<> \ -fluid_solver<R>::fluid_solver( \ - const char *NAME, \ - int nx, \ - int ny, \ - int nz, \ - double DKX, \ - double DKY, \ - double DKZ, \ - int DEALIAS_TYPE, \ - unsigned FFTW_PLAN_RIGOR) : fluid_solver_base<R>( \ - NAME, \ - nx , ny , nz, \ - DKX, DKY, DKZ, \ - DEALIAS_TYPE, \ - FFTW_PLAN_RIGOR) \ -{ \ - this->cvorticity = FFTW(alloc_complex)(this->cd->local_size);\ - this->cvelocity = FFTW(alloc_complex)(this->cd->local_size);\ - this->rvorticity = FFTW(alloc_real)(this->cd->local_size*2);\ - /*this->rvelocity = (R*)(this->cvelocity);*/\ - this->rvelocity = FFTW(alloc_real)(this->cd->local_size*2);\ - \ - this->ru = this->rvelocity;\ - this->cu = this->cvelocity;\ - \ - this->rv[0] = this->rvorticity;\ - this->rv[3] = this->rvorticity;\ - this->cv[0] = this->cvorticity;\ - this->cv[3] = this->cvorticity;\ - \ - this->cv[1] = FFTW(alloc_complex)(this->cd->local_size);\ - this->cv[2] = this->cv[1];\ - this->rv[1] = FFTW(alloc_real)(this->cd->local_size*2);\ - this->rv[2] = this->rv[1];\ - \ - this->c2r_vorticity = new FFTW(plan);\ - this->r2c_vorticity = new FFTW(plan);\ - this->c2r_velocity = new FFTW(plan);\ - this->r2c_velocity = new FFTW(plan);\ - \ - ptrdiff_t sizes[] = {nz, \ - ny, \ - nx};\ - \ - *(FFTW(plan)*)this->c2r_vorticity = FFTW(mpi_plan_many_dft_c2r)( \ - 3, sizes, 3, FFTW_MPI_DEFAULT_BLOCK, FFTW_MPI_DEFAULT_BLOCK, \ - this->cvorticity, this->rvorticity, \ - MPI_COMM_WORLD, this->fftw_plan_rigor | FFTW_MPI_TRANSPOSED_IN); \ - \ - *(FFTW(plan)*)this->r2c_vorticity = FFTW(mpi_plan_many_dft_r2c)( \ - 3, sizes, 3, FFTW_MPI_DEFAULT_BLOCK, FFTW_MPI_DEFAULT_BLOCK, \ - this->rvorticity, this->cvorticity, \ - MPI_COMM_WORLD, this->fftw_plan_rigor | FFTW_MPI_TRANSPOSED_OUT); \ - \ - *(FFTW(plan)*)this->c2r_velocity = FFTW(mpi_plan_many_dft_c2r)( \ - 3, sizes, 3, FFTW_MPI_DEFAULT_BLOCK, FFTW_MPI_DEFAULT_BLOCK, \ - this->cvelocity, this->rvelocity, \ - MPI_COMM_WORLD, this->fftw_plan_rigor | FFTW_MPI_TRANSPOSED_IN); \ - \ - *(FFTW(plan)*)this->r2c_velocity = FFTW(mpi_plan_many_dft_r2c)( \ - 3, sizes, 3, FFTW_MPI_DEFAULT_BLOCK, FFTW_MPI_DEFAULT_BLOCK, \ - this->rvelocity, this->cvelocity, \ - MPI_COMM_WORLD, this->fftw_plan_rigor | FFTW_MPI_TRANSPOSED_OUT); \ - \ - this->uc2r = this->c2r_velocity;\ - this->ur2c = this->r2c_velocity;\ - this->vc2r[0] = this->c2r_vorticity;\ - this->vr2c[0] = this->r2c_vorticity;\ - \ - this->vc2r[1] = new FFTW(plan);\ - this->vr2c[1] = new FFTW(plan);\ - this->vc2r[2] = new FFTW(plan);\ - this->vr2c[2] = new FFTW(plan);\ - \ - *(FFTW(plan)*)(this->vc2r[1]) = FFTW(mpi_plan_many_dft_c2r)( \ - 3, sizes, 3, FFTW_MPI_DEFAULT_BLOCK, FFTW_MPI_DEFAULT_BLOCK, \ - this->cv[1], this->rv[1], \ - MPI_COMM_WORLD, this->fftw_plan_rigor | FFTW_MPI_TRANSPOSED_IN); \ - \ - *(FFTW(plan)*)this->vc2r[2] = FFTW(mpi_plan_many_dft_c2r)( \ - 3, sizes, 3, FFTW_MPI_DEFAULT_BLOCK, FFTW_MPI_DEFAULT_BLOCK, \ - this->cv[2], this->rv[2], \ - MPI_COMM_WORLD, this->fftw_plan_rigor | FFTW_MPI_TRANSPOSED_IN); \ - \ - *(FFTW(plan)*)this->vr2c[1] = FFTW(mpi_plan_many_dft_r2c)( \ - 3, sizes, 3, FFTW_MPI_DEFAULT_BLOCK, FFTW_MPI_DEFAULT_BLOCK, \ - this->rv[1], this->cv[1], \ - MPI_COMM_WORLD, this->fftw_plan_rigor | FFTW_MPI_TRANSPOSED_OUT); \ - \ - *(FFTW(plan)*)this->vr2c[2] = FFTW(mpi_plan_many_dft_r2c)( \ - 3, sizes, 3, FFTW_MPI_DEFAULT_BLOCK, FFTW_MPI_DEFAULT_BLOCK, \ - this->rv[2], this->cv[2], \ - MPI_COMM_WORLD, this->fftw_plan_rigor | FFTW_MPI_TRANSPOSED_OUT); \ - \ - /* ``physical'' parameters etc, initialized here just in case */ \ - \ - this->nu = 0.1; \ - this->fmode = 1; \ - this->famplitude = 1.0; \ - this->fk0 = 0; \ - this->fk1 = 3.0; \ - /* initialization of fields must be done AFTER planning */ \ - std::fill_n((R*)this->cvorticity, this->cd->local_size*2, 0.0); \ - std::fill_n((R*)this->cvelocity, this->cd->local_size*2, 0.0); \ - std::fill_n(this->rvelocity, this->cd->local_size*2, 0.0); \ - std::fill_n(this->rvorticity, this->cd->local_size*2, 0.0); \ - std::fill_n((R*)this->cv[1], this->cd->local_size*2, 0.0); \ - std::fill_n(this->rv[1], this->cd->local_size*2, 0.0); \ - std::fill_n(this->rv[2], this->cd->local_size*2, 0.0); \ -} \ - \ -template<> \ -fluid_solver<R>::~fluid_solver() \ -{ \ - FFTW(destroy_plan)(*(FFTW(plan)*)this->c2r_vorticity);\ - FFTW(destroy_plan)(*(FFTW(plan)*)this->r2c_vorticity);\ - FFTW(destroy_plan)(*(FFTW(plan)*)this->c2r_velocity );\ - FFTW(destroy_plan)(*(FFTW(plan)*)this->r2c_velocity );\ - FFTW(destroy_plan)(*(FFTW(plan)*)this->vc2r[1]);\ - FFTW(destroy_plan)(*(FFTW(plan)*)this->vr2c[1]);\ - FFTW(destroy_plan)(*(FFTW(plan)*)this->vc2r[2]);\ - FFTW(destroy_plan)(*(FFTW(plan)*)this->vr2c[2]);\ - \ - delete (FFTW(plan)*)this->c2r_vorticity;\ - delete (FFTW(plan)*)this->r2c_vorticity;\ - delete (FFTW(plan)*)this->c2r_velocity ;\ - delete (FFTW(plan)*)this->r2c_velocity ;\ - delete (FFTW(plan)*)this->vc2r[1];\ - delete (FFTW(plan)*)this->vr2c[1];\ - delete (FFTW(plan)*)this->vc2r[2];\ - delete (FFTW(plan)*)this->vr2c[2];\ - \ - FFTW(free)(this->cv[1]);\ - FFTW(free)(this->rv[1]);\ - FFTW(free)(this->cvorticity);\ - FFTW(free)(this->rvorticity);\ - FFTW(free)(this->cvelocity);\ - FFTW(free)(this->rvelocity);\ -} \ - \ -template<> \ -void fluid_solver<R>::compute_vorticity() \ -{ \ - ptrdiff_t tindex; \ - CLOOP_K2( \ - this, \ - tindex = 3*cindex; \ - if (k2 <= this->kM2) \ - { \ - this->cvorticity[tindex+0][0] = -(this->ky[yindex]*this->cu[tindex+2][1] - this->kz[zindex]*this->cu[tindex+1][1]); \ - this->cvorticity[tindex+1][0] = -(this->kz[zindex]*this->cu[tindex+0][1] - this->kx[xindex]*this->cu[tindex+2][1]); \ - this->cvorticity[tindex+2][0] = -(this->kx[xindex]*this->cu[tindex+1][1] - this->ky[yindex]*this->cu[tindex+0][1]); \ - this->cvorticity[tindex+0][1] = (this->ky[yindex]*this->cu[tindex+2][0] - this->kz[zindex]*this->cu[tindex+1][0]); \ - this->cvorticity[tindex+1][1] = (this->kz[zindex]*this->cu[tindex+0][0] - this->kx[xindex]*this->cu[tindex+2][0]); \ - this->cvorticity[tindex+2][1] = (this->kx[xindex]*this->cu[tindex+1][0] - this->ky[yindex]*this->cu[tindex+0][0]); \ - } \ - else \ - std::fill_n((R*)(this->cvorticity+tindex), 6, 0.0); \ - ); \ - this->symmetrize(this->cvorticity, 3); \ -} \ - \ -template<> \ -void fluid_solver<R>::compute_velocity(FFTW(complex) *vorticity) \ -{ \ - ptrdiff_t tindex; \ - CLOOP_K2( \ - this, \ - tindex = 3*cindex; \ - if (k2 <= this->kM2 && k2 > 0) \ - { \ - this->cu[tindex+0][0] = -(this->ky[yindex]*vorticity[tindex+2][1] - this->kz[zindex]*vorticity[tindex+1][1]) / k2; \ - this->cu[tindex+1][0] = -(this->kz[zindex]*vorticity[tindex+0][1] - this->kx[xindex]*vorticity[tindex+2][1]) / k2; \ - this->cu[tindex+2][0] = -(this->kx[xindex]*vorticity[tindex+1][1] - this->ky[yindex]*vorticity[tindex+0][1]) / k2; \ - this->cu[tindex+0][1] = (this->ky[yindex]*vorticity[tindex+2][0] - this->kz[zindex]*vorticity[tindex+1][0]) / k2; \ - this->cu[tindex+1][1] = (this->kz[zindex]*vorticity[tindex+0][0] - this->kx[xindex]*vorticity[tindex+2][0]) / k2; \ - this->cu[tindex+2][1] = (this->kx[xindex]*vorticity[tindex+1][0] - this->ky[yindex]*vorticity[tindex+0][0]) / k2; \ - } \ - else \ - std::fill_n((R*)(this->cu+tindex), 6, 0.0); \ - ); \ - /*this->symmetrize(this->cu, 3);*/ \ -} \ - \ -template<> \ -void fluid_solver<R>::ift_velocity() \ -{ \ - FFTW(execute)(*((FFTW(plan)*)this->c2r_velocity )); \ -} \ - \ -template<> \ -void fluid_solver<R>::ift_vorticity() \ -{ \ - std::fill_n(this->rvorticity, this->cd->local_size*2, 0.0); \ - FFTW(execute)(*((FFTW(plan)*)this->c2r_vorticity )); \ -} \ - \ -template<> \ -void fluid_solver<R>::dft_velocity() \ -{ \ - FFTW(execute)(*((FFTW(plan)*)this->r2c_velocity )); \ -} \ - \ -template<> \ -void fluid_solver<R>::dft_vorticity() \ -{ \ - std::fill_n((R*)this->cvorticity, this->cd->local_size*2, 0.0); \ - FFTW(execute)(*((FFTW(plan)*)this->r2c_vorticity )); \ -} \ - \ -template<> \ -void fluid_solver<R>::add_forcing(\ - FFTW(complex) *acc_field, FFTW(complex) *vort_field, R factor) \ -{ \ - if (strcmp(this->forcing_type, "none") == 0) \ - return; \ - if (strcmp(this->forcing_type, "Kolmogorov") == 0) \ - { \ - ptrdiff_t cindex; \ - if (this->cd->myrank == this->cd->rank[this->fmode]) \ - { \ - cindex = ((this->fmode - this->cd->starts[0]) * this->cd->sizes[1])*this->cd->sizes[2]*3; \ - acc_field[cindex+2][0] -= this->famplitude*factor/2; \ - } \ - if (this->cd->myrank == this->cd->rank[this->cd->sizes[0] - this->fmode]) \ - { \ - cindex = ((this->cd->sizes[0] - this->fmode - this->cd->starts[0]) * this->cd->sizes[1])*this->cd->sizes[2]*3; \ - acc_field[cindex+2][0] -= this->famplitude*factor/2; \ - } \ - return; \ - } \ - if (strcmp(this->forcing_type, "linear") == 0) \ - { \ - double knorm; \ - CLOOP( \ - this, \ - knorm = sqrt(this->kx[xindex]*this->kx[xindex] + \ - this->ky[yindex]*this->ky[yindex] + \ - this->kz[zindex]*this->kz[zindex]); \ - if ((this->fk0 <= knorm) && \ - (this->fk1 >= knorm)) \ - for (int c=0; c<3; c++) \ - for (int i=0; i<2; i++) \ - acc_field[cindex*3+c][i] += this->famplitude*vort_field[cindex*3+c][i]*factor; \ - ); \ - return; \ - } \ -} \ - \ -template<> \ -void fluid_solver<R>::omega_nonlin( \ - int src) \ -{ \ - assert(src >= 0 && src < 3); \ - this->compute_velocity(this->cv[src]); \ - /* get fields from Fourier space to real space */ \ - FFTW(execute)(*((FFTW(plan)*)this->c2r_velocity )); \ - FFTW(execute)(*((FFTW(plan)*)this->vc2r[src])); \ - /* compute cross product $u \times \omega$, and normalize */ \ - R tmp[3][2]; \ - ptrdiff_t tindex; \ - RLOOP ( \ - this, \ - tindex = 3*rindex; \ - for (int cc=0; cc<3; cc++) \ - tmp[cc][0] = (this->ru[tindex+(cc+1)%3]*this->rv[src][tindex+(cc+2)%3] - \ - this->ru[tindex+(cc+2)%3]*this->rv[src][tindex+(cc+1)%3]); \ - for (int cc=0; cc<3; cc++) \ - this->ru[(3*rindex)+cc] = tmp[cc][0] / this->normalization_factor; \ - ); \ - /* go back to Fourier space */ \ - this->clean_up_real_space(this->ru, 3); \ - FFTW(execute)(*((FFTW(plan)*)this->r2c_velocity )); \ - this->dealias(this->cu, 3); \ - /* $\imath k \times Fourier(u \times \omega)$ */ \ - CLOOP( \ - this, \ - tindex = 3*cindex; \ - { \ - tmp[0][0] = -(this->ky[yindex]*this->cu[tindex+2][1] - this->kz[zindex]*this->cu[tindex+1][1]); \ - tmp[1][0] = -(this->kz[zindex]*this->cu[tindex+0][1] - this->kx[xindex]*this->cu[tindex+2][1]); \ - tmp[2][0] = -(this->kx[xindex]*this->cu[tindex+1][1] - this->ky[yindex]*this->cu[tindex+0][1]); \ - tmp[0][1] = (this->ky[yindex]*this->cu[tindex+2][0] - this->kz[zindex]*this->cu[tindex+1][0]); \ - tmp[1][1] = (this->kz[zindex]*this->cu[tindex+0][0] - this->kx[xindex]*this->cu[tindex+2][0]); \ - tmp[2][1] = (this->kx[xindex]*this->cu[tindex+1][0] - this->ky[yindex]*this->cu[tindex+0][0]); \ - } \ - for (int cc=0; cc<3; cc++) for (int i=0; i<2; i++) \ - this->cu[tindex+cc][i] = tmp[cc][i]; \ - ); \ - this->add_forcing(this->cu, this->cv[src], 1.0); \ - this->force_divfree(this->cu); \ -} \ - \ -template<> \ -void fluid_solver<R>::step(double dt) \ -{ \ - double factor0, factor1; \ - std::fill_n((R*)this->cv[1], this->cd->local_size*2, 0.0); \ - this->omega_nonlin(0); \ - CLOOP_K2( \ - this, \ - if (k2 <= this->kM2) \ - { \ - factor0 = exp(-this->nu * k2 * dt); \ - for (int cc=0; cc<3; cc++) for (int i=0; i<2; i++) \ - this->cv[1][3*cindex+cc][i] = (this->cv[0][3*cindex+cc][i] + \ - dt*this->cu[3*cindex+cc][i])*factor0; \ - } \ - ); \ - \ - this->omega_nonlin(1); \ - CLOOP_K2( \ - this, \ - if (k2 <= this->kM2) \ - { \ - factor0 = exp(-this->nu * k2 * dt/2); \ - factor1 = exp( this->nu * k2 * dt/2); \ - for (int cc=0; cc<3; cc++) for (int i=0; i<2; i++) \ - this->cv[2][3*cindex+cc][i] = (3*this->cv[0][3*cindex+cc][i]*factor0 + \ - (this->cv[1][3*cindex+cc][i] + \ - dt*this->cu[3*cindex+cc][i])*factor1)*0.25; \ - } \ - ); \ - \ - this->omega_nonlin(2); \ - CLOOP_K2( \ - this, \ - if (k2 <= this->kM2) \ - { \ - factor0 = exp(-this->nu * k2 * dt * 0.5); \ - for (int cc=0; cc<3; cc++) for (int i=0; i<2; i++) \ - this->cv[3][3*cindex+cc][i] = (this->cv[0][3*cindex+cc][i]*factor0 + \ - 2*(this->cv[2][3*cindex+cc][i] + \ - dt*this->cu[3*cindex+cc][i]))*factor0/3; \ - } \ - ); \ - \ - this->force_divfree(this->cvorticity); \ - this->symmetrize(this->cvorticity, 3); \ - this->iteration++; \ -} \ - \ -template<> \ -int fluid_solver<R>::read(char field, char representation) \ -{ \ - char fname[512]; \ - int read_result; \ - if (field == 'v') \ - { \ - if (representation == 'c') \ - { \ - this->fill_up_filename("cvorticity", fname); \ - read_result = this->cd->read(fname, (void*)this->cvorticity); \ - if (read_result != EXIT_SUCCESS) \ - return read_result; \ - } \ - if (representation == 'r') \ - { \ - read_result = this->read_base("rvorticity", this->rvorticity); \ - if (read_result != EXIT_SUCCESS) \ - return read_result; \ - else \ - FFTW(execute)(*((FFTW(plan)*)this->r2c_vorticity )); \ - } \ - this->low_pass_Fourier(this->cvorticity, 3, this->kM); \ - this->force_divfree(this->cvorticity); \ - this->symmetrize(this->cvorticity, 3); \ - return EXIT_SUCCESS; \ - } \ - if ((field == 'u') && (representation == 'c')) \ - { \ - read_result = this->read_base("cvelocity", this->cvelocity); \ - this->low_pass_Fourier(this->cvelocity, 3, this->kM); \ - this->force_divfree(this->cvorticity); \ - this->symmetrize(this->cvorticity, 3); \ - return read_result; \ - } \ - if ((field == 'u') && (representation == 'r')) \ - return this->read_base("rvelocity", this->rvelocity); \ - return EXIT_FAILURE; \ -} \ - \ -template<> \ -int fluid_solver<R>::write(char field, char representation) \ -{ \ - char fname[512]; \ - if ((field == 'v') && (representation == 'c')) \ - { \ - this->fill_up_filename("cvorticity", fname); \ - return this->cd->write(fname, (void*)this->cvorticity); \ - } \ - if ((field == 'v') && (representation == 'r')) \ - { \ - FFTW(execute)(*((FFTW(plan)*)this->c2r_vorticity )); \ - clip_zero_padding<R>(this->rd, this->rvorticity, 3); \ - this->fill_up_filename("rvorticity", fname); \ - return this->rd->write(fname, this->rvorticity); \ - } \ - this->compute_velocity(this->cvorticity); \ - if ((field == 'u') && (representation == 'c')) \ - { \ - this->fill_up_filename("cvelocity", fname); \ - return this->cd->write(fname, this->cvelocity); \ - } \ - if ((field == 'u') && (representation == 'r')) \ - { \ - this->ift_velocity(); \ - clip_zero_padding<R>(this->rd, this->rvelocity, 3); \ - this->fill_up_filename("rvelocity", fname); \ - return this->rd->write(fname, this->rvelocity); \ - } \ - return EXIT_FAILURE; \ -} \ - \ -template<> \ -int fluid_solver<R>::write_rTrS2() \ -{ \ - char fname[512]; \ - this->fill_up_filename("rTrS2", fname); \ - FFTW(complex) *ca; \ - R *ra; \ - ca = FFTW(alloc_complex)(this->cd->local_size*3); \ - ra = (R*)(ca); \ - this->compute_velocity(this->cvorticity); \ - this->compute_vector_gradient(ca, this->cvelocity); \ - for (int cc=0; cc<3; cc++) \ - { \ - std::copy( \ - (R*)(ca + cc*this->cd->local_size), \ - (R*)(ca + (cc+1)*this->cd->local_size), \ - (R*)this->cv[1]); \ - FFTW(execute)(*((FFTW(plan)*)this->vc2r[1])); \ - std::copy( \ - this->rv[1], \ - this->rv[1] + this->cd->local_size*2, \ - ra + cc*this->cd->local_size*2); \ - } \ - /* velocity gradient is now stored, in real space, in ra */ \ - R *dx_u, *dy_u, *dz_u; \ - dx_u = ra; \ - dy_u = ra + 2*this->cd->local_size; \ - dz_u = ra + 4*this->cd->local_size; \ - R *trS2 = FFTW(alloc_real)((this->cd->local_size/3)*2); \ - double average_local = 0; \ - RLOOP( \ - this, \ - R AxxAxx; \ - R AyyAyy; \ - R AzzAzz; \ - R Sxy; \ - R Syz; \ - R Szx; \ - ptrdiff_t tindex = 3*rindex; \ - AxxAxx = dx_u[tindex+0]*dx_u[tindex+0]; \ - AyyAyy = dy_u[tindex+1]*dy_u[tindex+1]; \ - AzzAzz = dz_u[tindex+2]*dz_u[tindex+2]; \ - Sxy = dx_u[tindex+1]+dy_u[tindex+0]; \ - Syz = dy_u[tindex+2]+dz_u[tindex+1]; \ - Szx = dz_u[tindex+0]+dx_u[tindex+2]; \ - trS2[rindex] = (AxxAxx + AyyAyy + AzzAzz + \ - (Sxy*Sxy + Syz*Syz + Szx*Szx)/2); \ - average_local += trS2[rindex]; \ - ); \ - double average; \ - MPI_Allreduce( \ - &average_local, \ - &average, \ - 1, \ - MPI_DOUBLE, MPI_SUM, this->cd->comm); \ - DEBUG_MSG("average TrS2 is %g\n", average); \ - FFTW(free)(ca); \ - /* output goes here */ \ - int ntmp[3]; \ - ntmp[0] = this->rd->sizes[0]; \ - ntmp[1] = this->rd->sizes[1]; \ - ntmp[2] = this->rd->sizes[2]; \ - field_descriptor<R> *scalar_descriptor = new field_descriptor<R>(3, ntmp, MPI_RNUM, this->cd->comm); \ - clip_zero_padding<R>(scalar_descriptor, trS2, 1); \ - int return_value = scalar_descriptor->write(fname, trS2); \ - delete scalar_descriptor; \ - FFTW(free)(trS2); \ - return return_value; \ -} \ - \ -template<> \ -int fluid_solver<R>::write_renstrophy() \ -{ \ - char fname[512]; \ - this->fill_up_filename("renstrophy", fname); \ - R *enstrophy = FFTW(alloc_real)((this->cd->local_size/3)*2); \ - this->ift_vorticity(); \ - double average_local = 0; \ - RLOOP( \ - this, \ - ptrdiff_t tindex = 3*rindex; \ - enstrophy[rindex] = ( \ - this->rvorticity[tindex+0]*this->rvorticity[tindex+0] + \ - this->rvorticity[tindex+1]*this->rvorticity[tindex+1] + \ - this->rvorticity[tindex+2]*this->rvorticity[tindex+2] \ - )/2; \ - average_local += enstrophy[rindex]; \ - ); \ - double average; \ - MPI_Allreduce( \ - &average_local, \ - &average, \ - 1, \ - MPI_DOUBLE, MPI_SUM, this->cd->comm); \ - DEBUG_MSG("average enstrophy is %g\n", average); \ - /* output goes here */ \ - int ntmp[3]; \ - ntmp[0] = this->rd->sizes[0]; \ - ntmp[1] = this->rd->sizes[1]; \ - ntmp[2] = this->rd->sizes[2]; \ - field_descriptor<R> *scalar_descriptor = new field_descriptor<R>(3, ntmp, MPI_RNUM, this->cd->comm); \ - clip_zero_padding<R>(scalar_descriptor, enstrophy, 1); \ - int return_value = scalar_descriptor->write(fname, enstrophy); \ - delete scalar_descriptor; \ - FFTW(free)(enstrophy); \ - return return_value; \ -} \ - \ -template<> \ -void fluid_solver<R>::compute_pressure(FFTW(complex) *pressure) \ -{ \ - /* assume velocity is already in real space representation */ \ - ptrdiff_t tindex; \ - \ - /* diagonal terms 11 22 33 */\ - RLOOP ( \ - this, \ - tindex = 3*rindex; \ - for (int cc=0; cc<3; cc++) \ - this->rv[1][tindex+cc] = this->ru[tindex+cc]*this->ru[tindex+cc]; \ - ); \ - this->clean_up_real_space(this->rv[1], 3); \ - FFTW(execute)(*((FFTW(plan)*)this->vr2c[1])); \ - this->dealias(this->cv[1], 3); \ - CLOOP_K2( \ - this, \ - if (k2 <= this->kM2 && k2 > 0) \ - { \ - tindex = 3*cindex; \ - for (int i=0; i<2; i++) \ - { \ - pressure[cindex][i] = -(this->kx[xindex]*this->kx[xindex]*this->cv[1][tindex+0][i] + \ - this->ky[yindex]*this->ky[yindex]*this->cv[1][tindex+1][i] + \ - this->kz[zindex]*this->kz[zindex]*this->cv[1][tindex+2][i]); \ - } \ - } \ - else \ - std::fill_n((R*)(pressure+cindex), 2, 0.0); \ - ); \ - /* off-diagonal terms 12 23 31 */\ - RLOOP ( \ - this, \ - tindex = 3*rindex; \ - for (int cc=0; cc<3; cc++) \ - this->rv[1][tindex+cc] = this->ru[tindex+cc]*this->ru[tindex+(cc+1)%3]; \ - ); \ - this->clean_up_real_space(this->rv[1], 3); \ - FFTW(execute)(*((FFTW(plan)*)this->vr2c[1])); \ - this->dealias(this->cv[1], 3); \ - CLOOP_K2( \ - this, \ - if (k2 <= this->kM2 && k2 > 0) \ - { \ - tindex = 3*cindex; \ - for (int i=0; i<2; i++) \ - { \ - pressure[cindex][i] -= 2*(this->kx[xindex]*this->ky[yindex]*this->cv[1][tindex+0][i] + \ - this->ky[yindex]*this->kz[zindex]*this->cv[1][tindex+1][i] + \ - this->kz[zindex]*this->kx[xindex]*this->cv[1][tindex+2][i]); \ - pressure[cindex][i] /= this->normalization_factor*k2; \ - } \ - } \ - ); \ -} \ - \ -template<> \ -void fluid_solver<R>::compute_gradient_statistics( \ - FFTW(complex) *vec, \ - double *gradu_moments, \ - double *trS2QR_moments, \ - ptrdiff_t *gradu_hist, \ - ptrdiff_t *trS2QR_hist, \ - ptrdiff_t *QR2D_hist, \ - double trS2QR_max_estimates[], \ - double gradu_max_estimates[], \ - int nbins, \ - int QR2D_nbins) \ -{ \ - FFTW(complex) *ca; \ - R *ra; \ - ca = FFTW(alloc_complex)(this->cd->local_size*3); \ - ra = (R*)(ca); \ - this->compute_vector_gradient(ca, vec); \ - for (int cc=0; cc<3; cc++) \ - { \ - std::copy( \ - (R*)(ca + cc*this->cd->local_size), \ - (R*)(ca + (cc+1)*this->cd->local_size), \ - (R*)this->cv[1]); \ - FFTW(execute)(*((FFTW(plan)*)this->vc2r[1])); \ - std::copy( \ - this->rv[1], \ - this->rv[1] + this->cd->local_size*2, \ - ra + cc*this->cd->local_size*2); \ - } \ - /* velocity gradient is now stored, in real space, in ra */ \ - std::fill_n(this->rv[1], 2*this->cd->local_size, 0.0); \ - R *dx_u, *dy_u, *dz_u; \ - dx_u = ra; \ - dy_u = ra + 2*this->cd->local_size; \ - dz_u = ra + 4*this->cd->local_size; \ - double binsize[2]; \ - double tmp_max_estimate[3]; \ - tmp_max_estimate[0] = trS2QR_max_estimates[0]; \ - tmp_max_estimate[1] = trS2QR_max_estimates[1]; \ - tmp_max_estimate[2] = trS2QR_max_estimates[2]; \ - binsize[0] = 2*tmp_max_estimate[2] / QR2D_nbins; \ - binsize[1] = 2*tmp_max_estimate[1] / QR2D_nbins; \ - ptrdiff_t *local_hist = new ptrdiff_t[QR2D_nbins*QR2D_nbins]; \ - std::fill_n(local_hist, QR2D_nbins*QR2D_nbins, 0); \ - RLOOP( \ - this, \ - R AxxAxx; \ - R AyyAyy; \ - R AzzAzz; \ - R AxyAyx; \ - R AyzAzy; \ - R AzxAxz; \ - R Sxy; \ - R Syz; \ - R Szx; \ - ptrdiff_t tindex = 3*rindex; \ - AxxAxx = dx_u[tindex+0]*dx_u[tindex+0]; \ - AyyAyy = dy_u[tindex+1]*dy_u[tindex+1]; \ - AzzAzz = dz_u[tindex+2]*dz_u[tindex+2]; \ - AxyAyx = dx_u[tindex+1]*dy_u[tindex+0]; \ - AyzAzy = dy_u[tindex+2]*dz_u[tindex+1]; \ - AzxAxz = dz_u[tindex+0]*dx_u[tindex+2]; \ - this->rv[1][tindex+1] = - (AxxAxx + AyyAyy + AzzAzz)/2 - AxyAyx - AyzAzy - AzxAxz; \ - this->rv[1][tindex+2] = - (dx_u[tindex+0]*(AxxAxx/3 + AxyAyx + AzxAxz) + \ - dy_u[tindex+1]*(AyyAyy/3 + AxyAyx + AyzAzy) + \ - dz_u[tindex+2]*(AzzAzz/3 + AzxAxz + AyzAzy) + \ - dx_u[tindex+1]*dy_u[tindex+2]*dz_u[tindex+0] + \ - dx_u[tindex+2]*dy_u[tindex+0]*dz_u[tindex+1]); \ - int bin0 = int(floor((this->rv[1][tindex+2] + tmp_max_estimate[2]) / binsize[0])); \ - int bin1 = int(floor((this->rv[1][tindex+1] + tmp_max_estimate[1]) / binsize[1])); \ - if ((bin0 >= 0 && bin0 < QR2D_nbins) && \ - (bin1 >= 0 && bin1 < QR2D_nbins)) \ - local_hist[bin1*QR2D_nbins + bin0]++; \ - Sxy = dx_u[tindex+1]+dy_u[tindex+0]; \ - Syz = dy_u[tindex+2]+dz_u[tindex+1]; \ - Szx = dz_u[tindex+0]+dx_u[tindex+2]; \ - this->rv[1][tindex] = (AxxAxx + AyyAyy + AzzAzz + \ - (Sxy*Sxy + Syz*Syz + Szx*Szx)/2); \ - ); \ - MPI_Allreduce( \ - local_hist, \ - QR2D_hist, \ - QR2D_nbins * QR2D_nbins, \ - MPI_INT64_T, MPI_SUM, this->cd->comm); \ - delete[] local_hist; \ - this->compute_rspace_stats3( \ - this->rv[1], \ - trS2QR_moments, \ - trS2QR_hist, \ - tmp_max_estimate, \ - nbins); \ - double *tmp_moments = new double[10*3]; \ - ptrdiff_t *tmp_hist = new ptrdiff_t[nbins*3]; \ - for (int cc=0; cc<3; cc++) \ - { \ - tmp_max_estimate[0] = gradu_max_estimates[cc*3 + 0]; \ - tmp_max_estimate[1] = gradu_max_estimates[cc*3 + 1]; \ - tmp_max_estimate[2] = gradu_max_estimates[cc*3 + 2]; \ - this->compute_rspace_stats3( \ - dx_u, \ - tmp_moments, \ - tmp_hist, \ - tmp_max_estimate, \ - nbins); \ - for (int n = 0; n < 10; n++) \ - for (int i = 0; i < 3 ; i++) \ - { \ - gradu_moments[(n*3 + cc)*3 + i] = tmp_moments[n*3 + i]; \ - } \ - for (int n = 0; n < nbins; n++) \ - for (int i = 0; i < 3; i++) \ - { \ - gradu_hist[(n*3 + cc)*3 + i] = tmp_hist[n*3 + i]; \ - } \ - } \ - delete[] tmp_moments; \ - delete[] tmp_hist; \ - FFTW(free)(ca); \ -} \ - \ -template<> \ -void fluid_solver<R>::compute_Lagrangian_acceleration(R (*acceleration)[2]) \ -{ \ - ptrdiff_t tindex; \ - FFTW(complex) *pressure; \ - pressure = FFTW(alloc_complex)(this->cd->local_size/3); \ - this->compute_velocity(this->cvorticity); \ - this->ift_velocity(); \ - this->compute_pressure(pressure); \ - this->compute_velocity(this->cvorticity); \ - std::fill_n((R*)this->cv[1], 2*this->cd->local_size, 0.0); \ - CLOOP_K2( \ - this, \ - if (k2 <= this->kM2) \ - { \ - tindex = 3*cindex; \ - for (int cc=0; cc<3; cc++) \ - for (int i=0; i<2; i++) \ - this->cv[1][tindex+cc][i] = - this->nu*k2*this->cu[tindex+cc][i]; \ - if (strcmp(this->forcing_type, "linear") == 0) \ - { \ - double knorm = sqrt(k2); \ - if ((this->fk0 <= knorm) && \ - (this->fk1 >= knorm)) \ - for (int c=0; c<3; c++) \ - for (int i=0; i<2; i++) \ - this->cv[1][tindex+c][i] += this->famplitude*this->cu[tindex+c][i]; \ - } \ - this->cv[1][tindex+0][0] += this->kx[xindex]*pressure[cindex][1]; \ - this->cv[1][tindex+1][0] += this->ky[yindex]*pressure[cindex][1]; \ - this->cv[1][tindex+2][0] += this->kz[zindex]*pressure[cindex][1]; \ - this->cv[1][tindex+0][1] -= this->kx[xindex]*pressure[cindex][0]; \ - this->cv[1][tindex+1][1] -= this->ky[yindex]*pressure[cindex][0]; \ - this->cv[1][tindex+2][1] -= this->kz[zindex]*pressure[cindex][0]; \ - } \ - ); \ - std::copy( \ - (R*)this->cv[1], \ - (R*)(this->cv[1] + this->cd->local_size), \ - (R*)acceleration); \ - FFTW(free)(pressure); \ -} \ - \ -template<> \ -void fluid_solver<R>::compute_Eulerian_acceleration(FFTW(complex) *acceleration) \ -{ \ - std::fill_n((R*)(acceleration), 2*this->cd->local_size, 0.0); \ - ptrdiff_t tindex; \ - this->compute_velocity(this->cvorticity); \ - /* put in linear terms */ \ - CLOOP_K2( \ - this, \ - if (k2 <= this->kM2) \ - { \ - tindex = 3*cindex; \ - for (int cc=0; cc<3; cc++) \ - for (int i=0; i<2; i++) \ - acceleration[tindex+cc][i] = - this->nu*k2*this->cu[tindex+cc][i]; \ - if (strcmp(this->forcing_type, "linear") == 0) \ - { \ - double knorm = sqrt(k2); \ - if ((this->fk0 <= knorm) && \ - (this->fk1 >= knorm)) \ - { \ - for (int c=0; c<3; c++) \ - for (int i=0; i<2; i++) \ - acceleration[tindex+c][i] += this->famplitude*this->cu[tindex+c][i]; \ - } \ - } \ - } \ - ); \ - this->ift_velocity(); \ - /* compute uu */ \ - /* 11 22 33 */ \ - RLOOP ( \ - this, \ - tindex = 3*rindex; \ - for (int cc=0; cc<3; cc++) \ - this->rv[1][tindex+cc] = this->ru[tindex+cc]*this->ru[tindex+cc] / this->normalization_factor; \ - ); \ - this->clean_up_real_space(this->rv[1], 3); \ - FFTW(execute)(*((FFTW(plan)*)this->vr2c[1])); \ - this->dealias(this->cv[1], 3); \ - CLOOP_K2( \ - this, \ - if (k2 <= this->kM2) \ - { \ - tindex = 3*cindex; \ - acceleration[tindex+0][0] += \ - this->kx[xindex]*this->cv[1][tindex+0][1]; \ - acceleration[tindex+0][1] += \ - -this->kx[xindex]*this->cv[1][tindex+0][0]; \ - acceleration[tindex+1][0] += \ - this->ky[yindex]*this->cv[1][tindex+1][1]; \ - acceleration[tindex+1][1] += \ - -this->ky[yindex]*this->cv[1][tindex+1][0]; \ - acceleration[tindex+2][0] += \ - this->kz[zindex]*this->cv[1][tindex+2][1]; \ - acceleration[tindex+2][1] += \ - -this->kz[zindex]*this->cv[1][tindex+2][0]; \ - } \ - ); \ - /* 12 23 31 */ \ - RLOOP ( \ - this, \ - tindex = 3*rindex; \ - for (int cc=0; cc<3; cc++) \ - this->rv[1][tindex+cc] = this->ru[tindex+cc]*this->ru[tindex+(cc+1)%3] / this->normalization_factor; \ - ); \ - this->clean_up_real_space(this->rv[1], 3); \ - FFTW(execute)(*((FFTW(plan)*)this->vr2c[1])); \ - this->dealias(this->cv[1], 3); \ - CLOOP_K2( \ - this, \ - if (k2 <= this->kM2) \ - { \ - tindex = 3*cindex; \ - acceleration[tindex+0][0] += \ - (this->ky[yindex]*this->cv[1][tindex+0][1] + \ - this->kz[zindex]*this->cv[1][tindex+2][1]); \ - acceleration[tindex+0][1] += \ - - (this->ky[yindex]*this->cv[1][tindex+0][0] + \ - this->kz[zindex]*this->cv[1][tindex+2][0]); \ - acceleration[tindex+1][0] += \ - (this->kz[zindex]*this->cv[1][tindex+1][1] + \ - this->kx[xindex]*this->cv[1][tindex+0][1]); \ - acceleration[tindex+1][1] += \ - - (this->kz[zindex]*this->cv[1][tindex+1][0] + \ - this->kx[xindex]*this->cv[1][tindex+0][0]); \ - acceleration[tindex+2][0] += \ - (this->kx[xindex]*this->cv[1][tindex+2][1] + \ - this->ky[yindex]*this->cv[1][tindex+1][1]); \ - acceleration[tindex+2][1] += \ - - (this->kx[xindex]*this->cv[1][tindex+2][0] + \ - this->ky[yindex]*this->cv[1][tindex+1][0]); \ - } \ - ); \ - if (this->cd->myrank == this->cd->rank[0]) \ - std::fill_n((R*)(acceleration), 6, 0.0); \ - this->force_divfree(acceleration); \ -} \ - \ -template<> \ -void fluid_solver<R>::compute_Lagrangian_acceleration(R *acceleration) \ -{ \ - this->compute_Lagrangian_acceleration((FFTW(complex)*)acceleration); \ - FFTW(execute)(*((FFTW(plan)*)this->vc2r[1])); \ - std::copy( \ - this->rv[1], \ - this->rv[1] + 2*this->cd->local_size, \ - acceleration); \ -} \ - \ -template<> \ -int fluid_solver<R>::write_rpressure() \ -{ \ - char fname[512]; \ - FFTW(complex) *pressure; \ - pressure = FFTW(alloc_complex)(this->cd->local_size/3); \ - this->compute_velocity(this->cvorticity); \ - this->ift_velocity(); \ - this->compute_pressure(pressure); \ - this->fill_up_filename("rpressure", fname); \ - R *rpressure = FFTW(alloc_real)((this->cd->local_size/3)*2); \ - FFTW(plan) c2r; \ - c2r = FFTW(mpi_plan_dft_c2r_3d)( \ - this->rd->sizes[0], this->rd->sizes[1], this->rd->sizes[2], \ - pressure, rpressure, this->cd->comm, \ - this->fftw_plan_rigor | FFTW_MPI_TRANSPOSED_IN); \ - FFTW(execute)(c2r); \ - /* output goes here */ \ - int ntmp[3]; \ - ntmp[0] = this->rd->sizes[0]; \ - ntmp[1] = this->rd->sizes[1]; \ - ntmp[2] = this->rd->sizes[2]; \ - field_descriptor<R> *scalar_descriptor = new field_descriptor<R>(3, ntmp, MPI_RNUM, this->cd->comm); \ - clip_zero_padding<R>(scalar_descriptor, rpressure, 1); \ - int return_value = scalar_descriptor->write(fname, rpressure); \ - delete scalar_descriptor; \ - FFTW(destroy_plan)(c2r); \ - FFTW(free)(pressure); \ - FFTW(free)(rpressure); \ - return return_value; \ -} \ +template <class rnumber> +fluid_solver<rnumber>::fluid_solver( + const char *NAME, + int nx, + int ny, + int nz, + double DKX, + double DKY, + double DKZ, + int DEALIAS_TYPE, + unsigned FFTW_PLAN_RIGOR) : fluid_solver_base<rnumber>( + NAME, + nx , ny , nz, + DKX, DKY, DKZ, + DEALIAS_TYPE, + FFTW_PLAN_RIGOR) +{ + TIMEZONE("fluid_solver::fluid_solver"); + this->cvorticity = fftw_interface<rnumber>::alloc_complex(this->cd->local_size); + this->cvelocity = fftw_interface<rnumber>::alloc_complex(this->cd->local_size); + this->rvorticity = fftw_interface<rnumber>::alloc_real(this->cd->local_size*2); + /*this->rvelocity = (rnumber*)(this->cvelocity);*/ + this->rvelocity = fftw_interface<rnumber>::alloc_real(this->cd->local_size*2); + + this->ru = this->rvelocity; + this->cu = this->cvelocity; -/*****************************************************************************/ + this->rv[0] = this->rvorticity; + this->rv[3] = this->rvorticity; + this->cv[0] = this->cvorticity; + this->cv[3] = this->cvorticity; + this->cv[1] = fftw_interface<rnumber>::alloc_complex(this->cd->local_size); + this->cv[2] = this->cv[1]; + this->rv[1] = fftw_interface<rnumber>::alloc_real(this->cd->local_size*2); + this->rv[2] = this->rv[1]; + this->c2r_vorticity = new typename fftw_interface<rnumber>::plan; + this->r2c_vorticity = new typename fftw_interface<rnumber>::plan; + this->c2r_velocity = new typename fftw_interface<rnumber>::plan; + this->r2c_velocity = new typename fftw_interface<rnumber>::plan; + + ptrdiff_t sizes[] = {nz, + ny, + nx}; + + *this->c2r_vorticity = fftw_interface<rnumber>::mpi_plan_many_dft_c2r( + 3, sizes, 3, FFTW_MPI_DEFAULT_BLOCK, FFTW_MPI_DEFAULT_BLOCK, + this->cvorticity, this->rvorticity, + MPI_COMM_WORLD, this->fftw_plan_rigor | FFTW_MPI_TRANSPOSED_IN); + + *this->r2c_vorticity = fftw_interface<rnumber>::mpi_plan_many_dft_r2c( + 3, sizes, 3, FFTW_MPI_DEFAULT_BLOCK, FFTW_MPI_DEFAULT_BLOCK, + this->rvorticity, this->cvorticity, + MPI_COMM_WORLD, this->fftw_plan_rigor | FFTW_MPI_TRANSPOSED_OUT); + + *this->c2r_velocity = fftw_interface<rnumber>::mpi_plan_many_dft_c2r( + 3, sizes, 3, FFTW_MPI_DEFAULT_BLOCK, FFTW_MPI_DEFAULT_BLOCK, + this->cvelocity, this->rvelocity, + MPI_COMM_WORLD, this->fftw_plan_rigor | FFTW_MPI_TRANSPOSED_IN); + + *this->r2c_velocity = fftw_interface<rnumber>::mpi_plan_many_dft_r2c( + 3, sizes, 3, FFTW_MPI_DEFAULT_BLOCK, FFTW_MPI_DEFAULT_BLOCK, + this->rvelocity, this->cvelocity, + MPI_COMM_WORLD, this->fftw_plan_rigor | FFTW_MPI_TRANSPOSED_OUT); + + this->uc2r = this->c2r_velocity; + this->ur2c = this->r2c_velocity; + this->vc2r[0] = this->c2r_vorticity; + this->vr2c[0] = this->r2c_vorticity; + + this->vc2r[1] = new typename fftw_interface<rnumber>::plan; + this->vr2c[1] = new typename fftw_interface<rnumber>::plan; + this->vc2r[2] = new typename fftw_interface<rnumber>::plan; + this->vr2c[2] = new typename fftw_interface<rnumber>::plan; + + *(this->vc2r[1]) = fftw_interface<rnumber>::mpi_plan_many_dft_c2r( + 3, sizes, 3, FFTW_MPI_DEFAULT_BLOCK, FFTW_MPI_DEFAULT_BLOCK, + this->cv[1], this->rv[1], + MPI_COMM_WORLD, this->fftw_plan_rigor | FFTW_MPI_TRANSPOSED_IN); + + *this->vc2r[2] = fftw_interface<rnumber>::mpi_plan_many_dft_c2r( + 3, sizes, 3, FFTW_MPI_DEFAULT_BLOCK, FFTW_MPI_DEFAULT_BLOCK, + this->cv[2], this->rv[2], + MPI_COMM_WORLD, this->fftw_plan_rigor | FFTW_MPI_TRANSPOSED_IN); + + *this->vr2c[1] = fftw_interface<rnumber>::mpi_plan_many_dft_r2c( + 3, sizes, 3, FFTW_MPI_DEFAULT_BLOCK, FFTW_MPI_DEFAULT_BLOCK, + this->rv[1], this->cv[1], + MPI_COMM_WORLD, this->fftw_plan_rigor | FFTW_MPI_TRANSPOSED_OUT); + + *this->vr2c[2] = fftw_interface<rnumber>::mpi_plan_many_dft_r2c( + 3, sizes, 3, FFTW_MPI_DEFAULT_BLOCK, FFTW_MPI_DEFAULT_BLOCK, + this->rv[2], this->cv[2], + MPI_COMM_WORLD, this->fftw_plan_rigor | FFTW_MPI_TRANSPOSED_OUT); + + /* ``physical'' parameters etc, initialized here just in case */ + + this->nu = 0.1; + this->fmode = 1; + this->famplitude = 1.0; + this->fk0 = 0; + this->fk1 = 3.0; + /* initialization of fields must be done AFTER planning */ + std::fill_n((rnumber*)this->cvorticity, this->cd->local_size*2, 0.0); + std::fill_n((rnumber*)this->cvelocity, this->cd->local_size*2, 0.0); + std::fill_n(this->rvelocity, this->cd->local_size*2, 0.0); + std::fill_n(this->rvorticity, this->cd->local_size*2, 0.0); + std::fill_n((rnumber*)this->cv[1], this->cd->local_size*2, 0.0); + std::fill_n(this->rv[1], this->cd->local_size*2, 0.0); + std::fill_n(this->rv[2], this->cd->local_size*2, 0.0); +} + +template <class rnumber> +fluid_solver<rnumber>::~fluid_solver() +{ + fftw_interface<rnumber>::destroy_plan(*this->c2r_vorticity); + fftw_interface<rnumber>::destroy_plan(*this->r2c_vorticity); + fftw_interface<rnumber>::destroy_plan(*this->c2r_velocity ); + fftw_interface<rnumber>::destroy_plan(*this->r2c_velocity ); + fftw_interface<rnumber>::destroy_plan(*this->vc2r[1]); + fftw_interface<rnumber>::destroy_plan(*this->vr2c[1]); + fftw_interface<rnumber>::destroy_plan(*this->vc2r[2]); + fftw_interface<rnumber>::destroy_plan(*this->vr2c[2]); + + delete this->c2r_vorticity; + delete this->r2c_vorticity; + delete this->c2r_velocity ; + delete this->r2c_velocity ; + delete this->vc2r[1]; + delete this->vr2c[1]; + delete this->vc2r[2]; + delete this->vr2c[2]; + + fftw_interface<rnumber>::free(this->cv[1]); + fftw_interface<rnumber>::free(this->rv[1]); + fftw_interface<rnumber>::free(this->cvorticity); + fftw_interface<rnumber>::free(this->rvorticity); + fftw_interface<rnumber>::free(this->cvelocity); + fftw_interface<rnumber>::free(this->rvelocity); +} + +template <class rnumber> +void fluid_solver<rnumber>::compute_vorticity() +{ + TIMEZONE("fluid_solver::compute_vorticity"); + CLOOP_K2( + this, + [&](ptrdiff_t cindex, ptrdiff_t xindex, ptrdiff_t yindex, ptrdiff_t zindex, double k2){ + // cindex indexing is thread safe (and tindex too) + it is a write + ptrdiff_t tindex = 3*cindex; + if (k2 <= this->kM2) + { + this->cvorticity[tindex+0][0] = -(this->ky[yindex]*this->cu[tindex+2][1] - this->kz[zindex]*this->cu[tindex+1][1]); + this->cvorticity[tindex+1][0] = -(this->kz[zindex]*this->cu[tindex+0][1] - this->kx[xindex]*this->cu[tindex+2][1]); + this->cvorticity[tindex+2][0] = -(this->kx[xindex]*this->cu[tindex+1][1] - this->ky[yindex]*this->cu[tindex+0][1]); + this->cvorticity[tindex+0][1] = (this->ky[yindex]*this->cu[tindex+2][0] - this->kz[zindex]*this->cu[tindex+1][0]); + this->cvorticity[tindex+1][1] = (this->kz[zindex]*this->cu[tindex+0][0] - this->kx[xindex]*this->cu[tindex+2][0]); + this->cvorticity[tindex+2][1] = (this->kx[xindex]*this->cu[tindex+1][0] - this->ky[yindex]*this->cu[tindex+0][0]); + } + else{ + std::fill_n((rnumber*)(this->cvorticity+tindex), 6, 0.0); + } + } + ); + this->symmetrize(this->cvorticity, 3); +} + +template <class rnumber> +void fluid_solver<rnumber>::compute_velocity(rnumber (*__restrict__ vorticity)[2]) +{ + TIMEZONE("fluid_solver::compute_velocity"); + CLOOP_K2( + this, + [&](ptrdiff_t cindex, ptrdiff_t xindex, ptrdiff_t yindex, ptrdiff_t zindex, double k2){ + // cindex indexing is thread safe (and tindex too) + it is a write + ptrdiff_t tindex = 3*cindex; + if (k2 <= this->kM2 && k2 > 0) + { + this->cu[tindex+0][0] = -(this->ky[yindex]*vorticity[tindex+2][1] - this->kz[zindex]*vorticity[tindex+1][1]) / k2; + this->cu[tindex+1][0] = -(this->kz[zindex]*vorticity[tindex+0][1] - this->kx[xindex]*vorticity[tindex+2][1]) / k2; + this->cu[tindex+2][0] = -(this->kx[xindex]*vorticity[tindex+1][1] - this->ky[yindex]*vorticity[tindex+0][1]) / k2; + this->cu[tindex+0][1] = (this->ky[yindex]*vorticity[tindex+2][0] - this->kz[zindex]*vorticity[tindex+1][0]) / k2; + this->cu[tindex+1][1] = (this->kz[zindex]*vorticity[tindex+0][0] - this->kx[xindex]*vorticity[tindex+2][0]) / k2; + this->cu[tindex+2][1] = (this->kx[xindex]*vorticity[tindex+1][0] - this->ky[yindex]*vorticity[tindex+0][0]) / k2; + } + else + std::fill_n((rnumber*)(this->cu+tindex), 6, 0.0); + } + ); + /*this->symmetrize(this->cu, 3);*/ +} + +template <class rnumber> +void fluid_solver<rnumber>::ift_velocity() +{ + TIMEZONE("fluid_solver::ift_velocity"); + fftw_interface<rnumber>::execute(*(this->c2r_velocity )); +} + +template <class rnumber> +void fluid_solver<rnumber>::ift_vorticity() +{ + TIMEZONE("fluid_solver::ift_vorticity"); + std::fill_n(this->rvorticity, this->cd->local_size*2, 0.0); + fftw_interface<rnumber>::execute(*(this->c2r_vorticity )); +} + +template <class rnumber> +void fluid_solver<rnumber>::dft_velocity() +{ + TIMEZONE("fluid_solver::dft_velocity"); + fftw_interface<rnumber>::execute(*(this->r2c_velocity )); +} + +template <class rnumber> +void fluid_solver<rnumber>::dft_vorticity() +{ + TIMEZONE("fluid_solver::dft_vorticity"); + std::fill_n((rnumber*)this->cvorticity, this->cd->local_size*2, 0.0); + fftw_interface<rnumber>::execute(*(this->r2c_vorticity )); +} + +template <class rnumber> +void fluid_solver<rnumber>::add_forcing( + rnumber (*__restrict__ acc_field)[2], rnumber (*__restrict__ vort_field)[2], rnumber factor) +{ + TIMEZONE("fluid_solver::add_forcing"); + if (strcmp(this->forcing_type, "none") == 0) + return; + if (strcmp(this->forcing_type, "Kolmogorov") == 0) + { + ptrdiff_t cindex; + if (this->cd->myrank == this->cd->rank[this->fmode]) + { + cindex = ((this->fmode - this->cd->starts[0]) * this->cd->sizes[1])*this->cd->sizes[2]*3; + acc_field[cindex+2][0] -= this->famplitude*factor/2; + } + if (this->cd->myrank == this->cd->rank[this->cd->sizes[0] - this->fmode]) + { + cindex = ((this->cd->sizes[0] - this->fmode - this->cd->starts[0]) * this->cd->sizes[1])*this->cd->sizes[2]*3; + acc_field[cindex+2][0] -= this->famplitude*factor/2; + } + return; + } + if (strcmp(this->forcing_type, "linear") == 0) + { + CLOOP( + this, + [&](ptrdiff_t cindex, ptrdiff_t xindex, ptrdiff_t yindex, ptrdiff_t zindex){ + // cindex indexing is thread safe (and cindex*3+c too) + double knorm = sqrt(this->kx[xindex]*this->kx[xindex] + + this->ky[yindex]*this->ky[yindex] + + this->kz[zindex]*this->kz[zindex]); + if ((this->fk0 <= knorm) && (this->fk1 >= knorm)) + for (int c=0; c<3; c++) + for (int i=0; i<2; i++) + acc_field[cindex*3+c][i] += this->famplitude*vort_field[cindex*3+c][i]*factor; + } + ); + return; + } +} + +template <class rnumber> +void fluid_solver<rnumber>::omega_nonlin( + int src) +{ + TIMEZONE("fluid_solver::omega_nonlin"); + assert(src >= 0 && src < 3); + this->compute_velocity(this->cv[src]); + /* get fields from Fourier space to real space */ + { + TIMEZONE("fluid_solver::omega_nonlin::fftw"); + fftw_interface<rnumber>::execute(*(this->c2r_velocity )); + fftw_interface<rnumber>::execute(*(this->vc2r[src])); + } + /* compute cross product $u \times \omega$, and normalize */ + { + TIMEZONE("fluid_solver::omega_nonlin::RLOOP"); + RLOOP ( + this, + [&](ptrdiff_t rindex, ptrdiff_t /*xindex*/, ptrdiff_t /*yindex*/, ptrdiff_t /*zindex*/){ + ptrdiff_t tindex = 3*rindex; + rnumber tmp[3][2]; + for (int cc=0; cc<3; cc++) + tmp[cc][0] = (this->ru[tindex+(cc+1)%3]*this->rv[src][tindex+(cc+2)%3] - + this->ru[tindex+(cc+2)%3]*this->rv[src][tindex+(cc+1)%3]); + // Access to rindex is thread safe so there is no overlap between threads + for (int cc=0; cc<3; cc++) + this->ru[(3*rindex)+cc] = tmp[cc][0] / this->normalization_factor; + } + ); + } + /* go back to Fourier space */ + this->clean_up_real_space(this->ru, 3); + { + TIMEZONE("fluid_solver::omega_nonlin::fftw-2"); + fftw_interface<rnumber>::execute(*(this->r2c_velocity )); + } + this->dealias(this->cu, 3); + /* $\imath k \times Fourier(u \times \omega)$ */ + { + TIMEZONE("fluid_solver::omega_nonlin::CLOOP"); + CLOOP( + this, + [&](ptrdiff_t cindex, ptrdiff_t xindex, ptrdiff_t yindex, ptrdiff_t zindex){ + rnumber tmp[3][2]; + ptrdiff_t tindex = 3*cindex; + { + tmp[0][0] = -(this->ky[yindex]*this->cu[tindex+2][1] - this->kz[zindex]*this->cu[tindex+1][1]); + tmp[1][0] = -(this->kz[zindex]*this->cu[tindex+0][1] - this->kx[xindex]*this->cu[tindex+2][1]); + tmp[2][0] = -(this->kx[xindex]*this->cu[tindex+1][1] - this->ky[yindex]*this->cu[tindex+0][1]); + tmp[0][1] = (this->ky[yindex]*this->cu[tindex+2][0] - this->kz[zindex]*this->cu[tindex+1][0]); + tmp[1][1] = (this->kz[zindex]*this->cu[tindex+0][0] - this->kx[xindex]*this->cu[tindex+2][0]); + tmp[2][1] = (this->kx[xindex]*this->cu[tindex+1][0] - this->ky[yindex]*this->cu[tindex+0][0]); + } + // cindex indexing is thread safe so it is 3*cindex so there is no overlap between threads + for (int cc=0; cc<3; cc++) + for (int i=0; i<2; i++) + this->cu[tindex+cc][i] = tmp[cc][i]; + } + ); + } + { + TIMEZONE("fluid_solver::omega_nonlin::add_forcing"); + this->add_forcing(this->cu, this->cv[src], 1.0); + } + { + TIMEZONE("fluid_solver::omega_nonlin::force_divfree"); + this->force_divfree(this->cu); + } +} + +template <class rnumber> +void fluid_solver<rnumber>::step(double dt) +{ + TIMEZONE("fluid_solver::step"); + std::fill_n((rnumber*)this->cv[1], this->cd->local_size*2, 0.0); + this->omega_nonlin(0); + CLOOP_K2( + this, + [&](ptrdiff_t cindex, ptrdiff_t /*xindex*/, ptrdiff_t /*yindex*/, ptrdiff_t /*zindex*/, double k2){ + if (k2 <= this->kM2) + { + double factor0 = exp(-this->nu * k2 * dt); + // cindex indexing is thread safe so there is no overlap between threads + for (int cc=0; cc<3; cc++) for (int i=0; i<2; i++) + this->cv[1][3*cindex+cc][i] = (this->cv[0][3*cindex+cc][i] + + dt*this->cu[3*cindex+cc][i])*factor0; + } + } + ); + + this->omega_nonlin(1); + CLOOP_K2( + this, + [&](ptrdiff_t cindex, ptrdiff_t /*xindex*/, ptrdiff_t /*yindex*/, ptrdiff_t /*zindex*/, double k2){ + if (k2 <= this->kM2) + { + double factor0 = exp(-this->nu * k2 * dt/2); + double factor1 = exp( this->nu * k2 * dt/2); + // cindex indexing is thread safe so there is no overlap between threads + for (int cc=0; cc<3; cc++) for (int i=0; i<2; i++) + this->cv[2][3*cindex+cc][i] = (3*this->cv[0][3*cindex+cc][i]*factor0 + + (this->cv[1][3*cindex+cc][i] + + dt*this->cu[3*cindex+cc][i])*factor1)*0.25; + } + } + ); + + this->omega_nonlin(2); + CLOOP_K2( + this, + [&](ptrdiff_t cindex, ptrdiff_t /*xindex*/, ptrdiff_t /*yindex*/, ptrdiff_t /*zindex*/, double k2){ + if (k2 <= this->kM2) + { + double factor0 = exp(-this->nu * k2 * dt * 0.5); + // cindex indexing is thread safe so there is no overlap between threads + for (int cc=0; cc<3; cc++) for (int i=0; i<2; i++) + this->cv[3][3*cindex+cc][i] = (this->cv[0][3*cindex+cc][i]*factor0 + + 2*(this->cv[2][3*cindex+cc][i] + + dt*this->cu[3*cindex+cc][i]))*factor0/3; + } + } + ); + + this->force_divfree(this->cvorticity); + this->symmetrize(this->cvorticity, 3); + this->iteration++; +} + +template <class rnumber> +int fluid_solver<rnumber>::read(char field, char representation) +{ + TIMEZONE("fluid_solver::read"); + char fname[512]; + int read_result; + if (field == 'v') + { + if (representation == 'c') + { + this->fill_up_filename("cvorticity", fname); + read_result = this->cd->read(fname, (void*)this->cvorticity); + if (read_result != EXIT_SUCCESS) + return read_result; + } + if (representation == 'r') + { + read_result = this->read_base("rvorticity", this->rvorticity); + if (read_result != EXIT_SUCCESS) + return read_result; + else + fftw_interface<rnumber>::execute(*(this->r2c_vorticity )); + } + this->low_pass_Fourier(this->cvorticity, 3, this->kM); + this->force_divfree(this->cvorticity); + this->symmetrize(this->cvorticity, 3); + return EXIT_SUCCESS; + } + if ((field == 'u') && (representation == 'c')) + { + read_result = this->read_base("cvelocity", this->cvelocity); + this->low_pass_Fourier(this->cvelocity, 3, this->kM); + this->force_divfree(this->cvorticity); + this->symmetrize(this->cvorticity, 3); + return read_result; + } + if ((field == 'u') && (representation == 'r')) + return this->read_base("rvelocity", this->rvelocity); + return EXIT_FAILURE; +} + +template <class rnumber> +int fluid_solver<rnumber>::write(char field, char representation) +{ + TIMEZONE("fluid_solver::write"); + char fname[512]; + if ((field == 'v') && (representation == 'c')) + { + this->fill_up_filename("cvorticity", fname); + return this->cd->write(fname, (void*)this->cvorticity); + } + if ((field == 'v') && (representation == 'r')) + { + fftw_interface<rnumber>::execute(*(this->c2r_vorticity )); + clip_zero_padding<rnumber>(this->rd, this->rvorticity, 3); + this->fill_up_filename("rvorticity", fname); + return this->rd->write(fname, this->rvorticity); + } + this->compute_velocity(this->cvorticity); + if ((field == 'u') && (representation == 'c')) + { + this->fill_up_filename("cvelocity", fname); + return this->cd->write(fname, this->cvelocity); + } + if ((field == 'u') && (representation == 'r')) + { + this->ift_velocity(); + clip_zero_padding<rnumber>(this->rd, this->rvelocity, 3); + this->fill_up_filename("rvelocity", fname); + return this->rd->write(fname, this->rvelocity); + } + return EXIT_FAILURE; +} + +template <class rnumber> +int fluid_solver<rnumber>::write_rTrS2() +{ + TIMEZONE("fluid_solver::write_rTrS2"); + char fname[512]; + this->fill_up_filename("rTrS2", fname); + typename fftw_interface<rnumber>::complex *ca; + rnumber *ra; + ca = fftw_interface<rnumber>::alloc_complex(this->cd->local_size*3); + ra = (rnumber*)(ca); + this->compute_velocity(this->cvorticity); + this->compute_vector_gradient(ca, this->cvelocity); + for (int cc=0; cc<3; cc++) + { + std::copy( + (rnumber*)(ca + cc*this->cd->local_size), + (rnumber*)(ca + (cc+1)*this->cd->local_size), + (rnumber*)this->cv[1]); + fftw_interface<rnumber>::execute(*(this->vc2r[1])); + std::copy( + this->rv[1], + this->rv[1] + this->cd->local_size*2, + ra + cc*this->cd->local_size*2); + } + /* velocity gradient is now stored, in real space, in ra */ + rnumber *dx_u, *dy_u, *dz_u; + dx_u = ra; + dy_u = ra + 2*this->cd->local_size; + dz_u = ra + 4*this->cd->local_size; + rnumber *trS2 = fftw_interface<rnumber>::alloc_real((this->cd->local_size/3)*2); + shared_array<double> average_local(1, [&](double* data){ + data[0] = 0; + }); + + RLOOP( + this, + [&](ptrdiff_t rindex, ptrdiff_t /*xindex*/, ptrdiff_t /*yindex*/, ptrdiff_t /*zindex*/){ + rnumber AxxAxx; + rnumber AyyAyy; + rnumber AzzAzz; + rnumber Sxy; + rnumber Syz; + rnumber Szx; + ptrdiff_t tindex = 3*rindex; + AxxAxx = dx_u[tindex+0]*dx_u[tindex+0]; + AyyAyy = dy_u[tindex+1]*dy_u[tindex+1]; + AzzAzz = dz_u[tindex+2]*dz_u[tindex+2]; + Sxy = dx_u[tindex+1]+dy_u[tindex+0]; + Syz = dy_u[tindex+2]+dz_u[tindex+1]; + Szx = dz_u[tindex+0]+dx_u[tindex+2]; + // rindex is thread safe + No overlap between thread it is a write + trS2[rindex] = (AxxAxx + AyyAyy + AzzAzz + + (Sxy*Sxy + Syz*Syz + Szx*Szx)/2); + average_local.getMine()[0] += trS2[rindex]; + } + ); + average_local.mergeParallel(); + double average; + MPI_Allreduce( + average_local.getMasterData(), + &average, + 1, + MPI_DOUBLE, MPI_SUM, this->cd->comm); + DEBUG_MSG("average TrS2 is %g\n", average); + fftw_interface<rnumber>::free(ca); + /* output goes here */ + int ntmp[3]; + ntmp[0] = this->rd->sizes[0]; + ntmp[1] = this->rd->sizes[1]; + ntmp[2] = this->rd->sizes[2]; + field_descriptor<rnumber> *scalar_descriptor = new field_descriptor<rnumber>(3, ntmp, mpi_real_type<rnumber>::real(), this->cd->comm); + clip_zero_padding<rnumber>(scalar_descriptor, trS2, 1); + int return_value = scalar_descriptor->write(fname, trS2); + delete scalar_descriptor; + fftw_interface<rnumber>::free(trS2); + return return_value; +} + +template <class rnumber> +int fluid_solver<rnumber>::write_renstrophy() +{ + TIMEZONE("fluid_solver::write_renstrophy"); + char fname[512]; + this->fill_up_filename("renstrophy", fname); + rnumber *enstrophy = fftw_interface<rnumber>::alloc_real((this->cd->local_size/3)*2); + this->ift_vorticity(); + shared_array<double> average_local(1, [&](double* data){ + data[0] = 0; + }); + + RLOOP( + this, + [&](ptrdiff_t rindex, ptrdiff_t /*xindex*/, ptrdiff_t /*yindex*/, ptrdiff_t /*zindex*/){ + ptrdiff_t tindex = 3*rindex; + // rindex indexing is thread safe so there is no overlap between threads + enstrophy[rindex] = ( + this->rvorticity[tindex+0]*this->rvorticity[tindex+0] + + this->rvorticity[tindex+1]*this->rvorticity[tindex+1] + + this->rvorticity[tindex+2]*this->rvorticity[tindex+2] + )/2; + average_local.getMine()[0] += enstrophy[rindex]; + } + ); + average_local.mergeParallel(); + double average; + MPI_Allreduce( + average_local.getMasterData(), + &average, + 1, + MPI_DOUBLE, MPI_SUM, this->cd->comm); + DEBUG_MSG("average enstrophy is %g\n", average); + /* output goes here */ + int ntmp[3]; + ntmp[0] = this->rd->sizes[0]; + ntmp[1] = this->rd->sizes[1]; + ntmp[2] = this->rd->sizes[2]; + field_descriptor<rnumber> *scalar_descriptor = new field_descriptor<rnumber>(3, ntmp, mpi_real_type<rnumber>::real(), this->cd->comm); + clip_zero_padding<rnumber>(scalar_descriptor, enstrophy, 1); + int return_value = scalar_descriptor->write(fname, enstrophy); + delete scalar_descriptor; + fftw_interface<rnumber>::free(enstrophy); + return return_value; +} + +template <class rnumber> +void fluid_solver<rnumber>::compute_pressure(rnumber (*__restrict__ pressure)[2]) +{ + TIMEZONE("fluid_solver::compute_pressure"); + /* assume velocity is already in real space representation */ + /* diagonal terms 11 22 33 */ + RLOOP ( + this, + [&](ptrdiff_t rindex, ptrdiff_t /*xindex*/, ptrdiff_t /*yindex*/, ptrdiff_t /*zindex*/){ + // rindex indexing is thread safe so there is no overlap between threads + ptrdiff_t tindex = 3*rindex; + for (int cc=0; cc<3; cc++) + this->rv[1][tindex+cc] = this->ru[tindex+cc]*this->ru[tindex+cc]; + } + ); + this->clean_up_real_space(this->rv[1], 3); + { + TIMEZONE("fftw_interface<rnumber>::execute"); + fftw_interface<rnumber>::execute(*(this->vr2c[1])); + } + this->dealias(this->cv[1], 3); + CLOOP_K2( + this, + [&](ptrdiff_t cindex, ptrdiff_t xindex, ptrdiff_t yindex, ptrdiff_t zindex, double k2){ + if (k2 <= this->kM2 && k2 > 0) + { + // cindex indexing is thread safe so there is no overlap between threads + ptrdiff_t tindex = 3*cindex; + for (int i=0; i<2; i++) + { + pressure[cindex][i] = -(this->kx[xindex]*this->kx[xindex]*this->cv[1][tindex+0][i] + + this->ky[yindex]*this->ky[yindex]*this->cv[1][tindex+1][i] + + this->kz[zindex]*this->kz[zindex]*this->cv[1][tindex+2][i]); + } + } + else + std::fill_n((rnumber*)(pressure+cindex), 2, 0.0); + } + ); + /* off-diagonal terms 12 23 31 */ + RLOOP ( + this, + [&](ptrdiff_t rindex, ptrdiff_t /*xindex*/, ptrdiff_t /*yindex*/, ptrdiff_t /*zindex*/){ + // rindex indexing is thread safe so there is no overlap between threads + ptrdiff_t tindex = 3*rindex; + for (int cc=0; cc<3; cc++) + this->rv[1][tindex+cc] = this->ru[tindex+cc]*this->ru[tindex+(cc+1)%3]; + } + ); + this->clean_up_real_space(this->rv[1], 3); + { + TIMEZONE("fftw_interface<rnumber>::execute"); + fftw_interface<rnumber>::execute(*(this->vr2c[1])); + } + this->dealias(this->cv[1], 3); + CLOOP_K2( + this, + [&](ptrdiff_t cindex, ptrdiff_t xindex, ptrdiff_t yindex, ptrdiff_t zindex, double k2){ + if (k2 <= this->kM2 && k2 > 0) + { + // cindex indexing is thread safe so there is no overlap between threads + ptrdiff_t tindex = 3*cindex; + for (int i=0; i<2; i++) + { + pressure[cindex][i] -= 2*(this->kx[xindex]*this->ky[yindex]*this->cv[1][tindex+0][i] + + this->ky[yindex]*this->kz[zindex]*this->cv[1][tindex+1][i] + + this->kz[zindex]*this->kx[xindex]*this->cv[1][tindex+2][i]); + pressure[cindex][i] /= this->normalization_factor*k2; + } + } + } + ); +} + +template <class rnumber> +void fluid_solver<rnumber>::compute_gradient_statistics( + rnumber (*__restrict__ vec)[2], +double *gradu_moments, +double *trS2QR_moments, +ptrdiff_t *gradu_hist, +ptrdiff_t *trS2QR_hist, +ptrdiff_t *QR2D_hist, +double trS2QR_max_estimates[], +double gradu_max_estimates[], +int nbins, +int QR2D_nbins) +{ + TIMEZONE("fluid_solver::compute_gradient_statistics"); + typename fftw_interface<rnumber>::complex *ca; + rnumber *ra; + ca = fftw_interface<rnumber>::alloc_complex(this->cd->local_size*3); + ra = (rnumber*)(ca); + this->compute_vector_gradient(ca, vec); + for (int cc=0; cc<3; cc++) + { + std::copy( + (rnumber*)(ca + cc*this->cd->local_size), + (rnumber*)(ca + (cc+1)*this->cd->local_size), + (rnumber*)this->cv[1]); + fftw_interface<rnumber>::execute(*(this->vc2r[1])); + std::copy( + this->rv[1], + this->rv[1] + this->cd->local_size*2, + ra + cc*this->cd->local_size*2); + } + /* velocity gradient is now stored, in real space, in ra */ + std::fill_n(this->rv[1], 2*this->cd->local_size, 0.0); + rnumber *dx_u, *dy_u, *dz_u; + dx_u = ra; + dy_u = ra + 2*this->cd->local_size; + dz_u = ra + 4*this->cd->local_size; + double binsize[2]; + double tmp_max_estimate[3]; + tmp_max_estimate[0] = trS2QR_max_estimates[0]; + tmp_max_estimate[1] = trS2QR_max_estimates[1]; + tmp_max_estimate[2] = trS2QR_max_estimates[2]; + binsize[0] = 2*tmp_max_estimate[2] / QR2D_nbins; + binsize[1] = 2*tmp_max_estimate[1] / QR2D_nbins; + ptrdiff_t *local_hist = new ptrdiff_t[QR2D_nbins*QR2D_nbins]; + std::fill_n(local_hist, QR2D_nbins*QR2D_nbins, 0); + RLOOP( + this, + [&](ptrdiff_t rindex, ptrdiff_t /*xindex*/, ptrdiff_t /*yindex*/, ptrdiff_t /*zindex*/){ + rnumber AxxAxx; + rnumber AyyAyy; + rnumber AzzAzz; + rnumber AxyAyx; + rnumber AyzAzy; + rnumber AzxAxz; + rnumber Sxy; + rnumber Syz; + rnumber Szx; + // rindex indexing is thread safe so there is no overlap between threads + // tindex[0:2] is thread safe too + ptrdiff_t tindex = 3*rindex; + AxxAxx = dx_u[tindex+0]*dx_u[tindex+0]; + AyyAyy = dy_u[tindex+1]*dy_u[tindex+1]; + AzzAzz = dz_u[tindex+2]*dz_u[tindex+2]; + AxyAyx = dx_u[tindex+1]*dy_u[tindex+0]; + AyzAzy = dy_u[tindex+2]*dz_u[tindex+1]; + AzxAxz = dz_u[tindex+0]*dx_u[tindex+2]; + this->rv[1][tindex+1] = - (AxxAxx + AyyAyy + AzzAzz)/2 - AxyAyx - AyzAzy - AzxAxz; + this->rv[1][tindex+2] = - (dx_u[tindex+0]*(AxxAxx/3 + AxyAyx + AzxAxz) + + dy_u[tindex+1]*(AyyAyy/3 + AxyAyx + AyzAzy) + + dz_u[tindex+2]*(AzzAzz/3 + AzxAxz + AyzAzy) + + dx_u[tindex+1]*dy_u[tindex+2]*dz_u[tindex+0] + + dx_u[tindex+2]*dy_u[tindex+0]*dz_u[tindex+1]); + int bin0 = int(floor((this->rv[1][tindex+2] + tmp_max_estimate[2]) / binsize[0])); + int bin1 = int(floor((this->rv[1][tindex+1] + tmp_max_estimate[1]) / binsize[1])); + if ((bin0 >= 0 && bin0 < QR2D_nbins) && + (bin1 >= 0 && bin1 < QR2D_nbins)) + local_hist[bin1*QR2D_nbins + bin0]++; + Sxy = dx_u[tindex+1]+dy_u[tindex+0]; + Syz = dy_u[tindex+2]+dz_u[tindex+1]; + Szx = dz_u[tindex+0]+dx_u[tindex+2]; + this->rv[1][tindex] = (AxxAxx + AyyAyy + AzzAzz + + (Sxy*Sxy + Syz*Syz + Szx*Szx)/2); + } + ); + MPI_Allreduce( + local_hist, + QR2D_hist, + QR2D_nbins * QR2D_nbins, + MPI_INT64_T, MPI_SUM, this->cd->comm); + delete[] local_hist; + this->compute_rspace_stats3( + this->rv[1], + trS2QR_moments, + trS2QR_hist, + tmp_max_estimate, + nbins); + double *tmp_moments = new double[10*3]; + ptrdiff_t *tmp_hist = new ptrdiff_t[nbins*3]; + for (int cc=0; cc<3; cc++) + { + tmp_max_estimate[0] = gradu_max_estimates[cc*3 + 0]; + tmp_max_estimate[1] = gradu_max_estimates[cc*3 + 1]; + tmp_max_estimate[2] = gradu_max_estimates[cc*3 + 2]; + this->compute_rspace_stats3( + dx_u + cc*2*this->cd->local_size, + tmp_moments, + tmp_hist, + tmp_max_estimate, + nbins); + for (int n = 0; n < 10; n++) + for (int i = 0; i < 3 ; i++) + { + gradu_moments[(n*3 + cc)*3 + i] = tmp_moments[n*3 + i]; + } + for (int n = 0; n < nbins; n++) + for (int i = 0; i < 3; i++) + { + gradu_hist[(n*3 + cc)*3 + i] = tmp_hist[n*3 + i]; + } + } + delete[] tmp_moments; + delete[] tmp_hist; + fftw_interface<rnumber>::free(ca); +} + +template <class rnumber> +void fluid_solver<rnumber>::compute_Lagrangian_acceleration(rnumber (*acceleration)[2]) +{ + TIMEZONE("fluid_solver::compute_Lagrangian_acceleration"); + typename fftw_interface<rnumber>::complex *pressure; + pressure = fftw_interface<rnumber>::alloc_complex(this->cd->local_size/3); + this->compute_velocity(this->cvorticity); + this->ift_velocity(); + this->compute_pressure(pressure); + this->compute_velocity(this->cvorticity); + std::fill_n((rnumber*)this->cv[1], 2*this->cd->local_size, 0.0); + CLOOP_K2( + this, + [&](ptrdiff_t cindex, ptrdiff_t xindex, ptrdiff_t yindex, ptrdiff_t zindex, double k2){ + if (k2 <= this->kM2) + { + // cindex indexing is thread safe so there is no overlap between threads + ptrdiff_t tindex = 3*cindex; + for (int cc=0; cc<3; cc++) + for (int i=0; i<2; i++) + this->cv[1][tindex+cc][i] = - this->nu*k2*this->cu[tindex+cc][i]; + if (strcmp(this->forcing_type, "linear") == 0) + { + double knorm = sqrt(k2); + if ((this->fk0 <= knorm) && + (this->fk1 >= knorm)) + for (int c=0; c<3; c++) + for (int i=0; i<2; i++) + this->cv[1][tindex+c][i] += this->famplitude*this->cu[tindex+c][i]; + } + this->cv[1][tindex+0][0] += this->kx[xindex]*pressure[cindex][1]; + this->cv[1][tindex+1][0] += this->ky[yindex]*pressure[cindex][1]; + this->cv[1][tindex+2][0] += this->kz[zindex]*pressure[cindex][1]; + this->cv[1][tindex+0][1] -= this->kx[xindex]*pressure[cindex][0]; + this->cv[1][tindex+1][1] -= this->ky[yindex]*pressure[cindex][0]; + this->cv[1][tindex+2][1] -= this->kz[zindex]*pressure[cindex][0]; + } + } + ); + std::copy( + (rnumber*)this->cv[1], + (rnumber*)(this->cv[1] + this->cd->local_size), + (rnumber*)acceleration); + fftw_interface<rnumber>::free(pressure); +} + +template <class rnumber> +void fluid_solver<rnumber>::compute_Eulerian_acceleration(rnumber (*__restrict__ acceleration)[2]) +{ + TIMEZONE("fluid_solver::compute_Eulerian_acceleration"); + std::fill_n((rnumber*)(acceleration), 2*this->cd->local_size, 0.0); + this->compute_velocity(this->cvorticity); + /* put in linear terms */ + CLOOP_K2( + this, + [&](ptrdiff_t cindex, ptrdiff_t /*xindex*/, ptrdiff_t /*yindex*/, ptrdiff_t /*zindex*/, double k2){ + if (k2 <= this->kM2) + { + // cindex indexing is thread safe so there is no overlap between threads + ptrdiff_t tindex = 3*cindex; + for (int cc=0; cc<3; cc++) + for (int i=0; i<2; i++) + acceleration[tindex+cc][i] = - this->nu*k2*this->cu[tindex+cc][i]; + if (strcmp(this->forcing_type, "linear") == 0) + { + double knorm = sqrt(k2); + if ((this->fk0 <= knorm) && + (this->fk1 >= knorm)) + { + for (int c=0; c<3; c++) + for (int i=0; i<2; i++) + acceleration[tindex+c][i] += this->famplitude*this->cu[tindex+c][i]; + } + } + } + } + ); + this->ift_velocity(); + /* compute uu */ + /* 11 22 33 */ + RLOOP ( + this, + [&](ptrdiff_t rindex, ptrdiff_t /*xindex*/, ptrdiff_t /*yindex*/, ptrdiff_t /*zindex*/){ + // cindex indexing is thread safe so there is no overlap between threads + ptrdiff_t tindex = 3*rindex; + for (int cc=0; cc<3; cc++) + this->rv[1][tindex+cc] = this->ru[tindex+cc]*this->ru[tindex+cc] / this->normalization_factor; + } + ); + this->clean_up_real_space(this->rv[1], 3); + fftw_interface<rnumber>::execute(*(this->vr2c[1])); + this->dealias(this->cv[1], 3); + CLOOP_K2( + this, + [&](ptrdiff_t cindex, ptrdiff_t xindex, ptrdiff_t yindex, ptrdiff_t zindex, double k2){ + if (k2 <= this->kM2) + { + // cindex indexing is thread safe so there is no overlap between threads + ptrdiff_t tindex = 3*cindex; + acceleration[tindex+0][0] += + this->kx[xindex]*this->cv[1][tindex+0][1]; + acceleration[tindex+0][1] += + -this->kx[xindex]*this->cv[1][tindex+0][0]; + acceleration[tindex+1][0] += + this->ky[yindex]*this->cv[1][tindex+1][1]; + acceleration[tindex+1][1] += + -this->ky[yindex]*this->cv[1][tindex+1][0]; + acceleration[tindex+2][0] += + this->kz[zindex]*this->cv[1][tindex+2][1]; + acceleration[tindex+2][1] += + -this->kz[zindex]*this->cv[1][tindex+2][0]; + } + } + ); + /* 12 23 31 */ + RLOOP ( + this, + [&](ptrdiff_t rindex, ptrdiff_t /*xindex*/, ptrdiff_t /*yindex*/, ptrdiff_t /*zindex*/){ + // cindex indexing is thread safe so there is no overlap between threads + ptrdiff_t tindex = 3*rindex; + for (int cc=0; cc<3; cc++) + this->rv[1][tindex+cc] = this->ru[tindex+cc]*this->ru[tindex+(cc+1)%3] / this->normalization_factor; + } + ); + this->clean_up_real_space(this->rv[1], 3); + fftw_interface<rnumber>::execute(*(this->vr2c[1])); + this->dealias(this->cv[1], 3); + CLOOP_K2( + this, + [&](ptrdiff_t cindex, ptrdiff_t xindex, ptrdiff_t yindex, ptrdiff_t zindex, double k2){ + if (k2 <= this->kM2) + { + // cindex indexing is thread safe so there is no overlap between threads + ptrdiff_t tindex = 3*cindex; + acceleration[tindex+0][0] += + (this->ky[yindex]*this->cv[1][tindex+0][1] + + this->kz[zindex]*this->cv[1][tindex+2][1]); + acceleration[tindex+0][1] += + - (this->ky[yindex]*this->cv[1][tindex+0][0] + + this->kz[zindex]*this->cv[1][tindex+2][0]); + acceleration[tindex+1][0] += + (this->kz[zindex]*this->cv[1][tindex+1][1] + + this->kx[xindex]*this->cv[1][tindex+0][1]); + acceleration[tindex+1][1] += + - (this->kz[zindex]*this->cv[1][tindex+1][0] + + this->kx[xindex]*this->cv[1][tindex+0][0]); + acceleration[tindex+2][0] += + (this->kx[xindex]*this->cv[1][tindex+2][1] + + this->ky[yindex]*this->cv[1][tindex+1][1]); + acceleration[tindex+2][1] += + - (this->kx[xindex]*this->cv[1][tindex+2][0] + + this->ky[yindex]*this->cv[1][tindex+1][0]); + } + } + ); + if (this->cd->myrank == this->cd->rank[0]) + std::fill_n((rnumber*)(acceleration), 6, 0.0); + this->force_divfree(acceleration); +} + +template <class rnumber> +void fluid_solver<rnumber>::compute_Lagrangian_acceleration(rnumber *__restrict__ acceleration) +{ + TIMEZONE("fluid_solver::compute_Lagrangian_acceleration"); + this->compute_Lagrangian_acceleration((typename fftw_interface<rnumber>::complex*)acceleration); + fftw_interface<rnumber>::execute(*(this->vc2r[1])); + std::copy( + this->rv[1], + this->rv[1] + 2*this->cd->local_size, + acceleration); +} + +template <class rnumber> +int fluid_solver<rnumber>::write_rpressure() +{ + TIMEZONE("fluid_solver::write_rpressure"); + char fname[512]; + typename fftw_interface<rnumber>::complex *pressure; + pressure = fftw_interface<rnumber>::alloc_complex(this->cd->local_size/3); + this->compute_velocity(this->cvorticity); + this->ift_velocity(); + this->compute_pressure(pressure); + this->fill_up_filename("rpressure", fname); + rnumber *rpressure = fftw_interface<rnumber>::alloc_real((this->cd->local_size/3)*2); + typename fftw_interface<rnumber>::plan c2r; + c2r = fftw_interface<rnumber>::mpi_plan_dft_c2r_3d( + this->rd->sizes[0], this->rd->sizes[1], this->rd->sizes[2], + pressure, rpressure, this->cd->comm, + this->fftw_plan_rigor | FFTW_MPI_TRANSPOSED_IN); + fftw_interface<rnumber>::execute(c2r); + /* output goes here */ + int ntmp[3]; + ntmp[0] = this->rd->sizes[0]; + ntmp[1] = this->rd->sizes[1]; + ntmp[2] = this->rd->sizes[2]; + field_descriptor<rnumber> *scalar_descriptor = new field_descriptor<rnumber>(3, ntmp, mpi_real_type<rnumber>::real(), this->cd->comm); + clip_zero_padding<rnumber>(scalar_descriptor, rpressure, 1); + int return_value = scalar_descriptor->write(fname, rpressure); + delete scalar_descriptor; + fftw_interface<rnumber>::destroy_plan(c2r); + fftw_interface<rnumber>::free(pressure); + fftw_interface<rnumber>::free(rpressure); + return return_value; +} /*****************************************************************************/ -/* now actually use the macro defined above */ -FLUID_SOLVER_DEFINITIONS( - FFTW_MANGLE_FLOAT, - float, - MPI_FLOAT, - MPI_COMPLEX) -FLUID_SOLVER_DEFINITIONS( - FFTW_MANGLE_DOUBLE, - double, - MPI_DOUBLE, - BFPS_MPICXX_DOUBLE_COMPLEX) -/*****************************************************************************/ + diff --git a/bfps/cpp/fluid_solver.hpp b/bfps/cpp/fluid_solver.hpp index 2b6ec64de12cc133687074c83c71696ffc507509..4cc75cee4385353f64dc9bc9e7d34c6efba9ad48 100644 --- a/bfps/cpp/fluid_solver.hpp +++ b/bfps/cpp/fluid_solver.hpp @@ -55,12 +55,12 @@ class fluid_solver:public fluid_solver_base<rnumber> typename fluid_solver_base<rnumber>::cnumber *cu, *cv[4]; /* plans */ - void *c2r_vorticity; - void *r2c_vorticity; - void *c2r_velocity; - void *r2c_velocity; - void *uc2r, *ur2c; - void *vr2c[3], *vc2r[3]; + typename fftw_interface<rnumber>::plan *c2r_vorticity; + typename fftw_interface<rnumber>::plan *r2c_vorticity; + typename fftw_interface<rnumber>::plan *c2r_velocity; + typename fftw_interface<rnumber>::plan *r2c_velocity; + typename fftw_interface<rnumber>::plan *uc2r, *ur2c; + typename fftw_interface<rnumber>::plan *vr2c[3], *vc2r[3]; /* physical parameters */ double nu; diff --git a/bfps/cpp/fluid_solver_base.cpp b/bfps/cpp/fluid_solver_base.cpp index 2f2aeee9a8ae699b7863c90dcffb550bc905390a..6e4fd3335238218bad0b78462d3506ca9b48c721 100644 --- a/bfps/cpp/fluid_solver_base.cpp +++ b/bfps/cpp/fluid_solver_base.cpp @@ -32,7 +32,8 @@ #include "base.hpp" #include "fluid_solver_base.hpp" #include "fftw_tools.hpp" - +#include "scope_timer.hpp" +#include "shared_array.hpp" template <class rnumber> void fluid_solver_base<rnumber>::fill_up_filename(const char *base_name, char *destination) @@ -43,6 +44,7 @@ void fluid_solver_base<rnumber>::fill_up_filename(const char *base_name, char *d template <class rnumber> void fluid_solver_base<rnumber>::clean_up_real_space(rnumber *a, int howmany) { + TIMEZONE("fluid_solver_base::clean_up_real_space"); for (ptrdiff_t rindex = 0; rindex < this->cd->local_size*2; rindex += howmany*(this->rd->subsizes[2]+2)) std::fill_n(a+rindex+this->rd->subsizes[2]*howmany, 2*howmany, 0.0); } @@ -65,65 +67,76 @@ double fluid_solver_base<rnumber>::autocorrel(cnumber *a) template <class rnumber> void fluid_solver_base<rnumber>::cospectrum(cnumber *a, cnumber *b, double *spec) { - double *cospec_local = fftw_alloc_real(this->nshells*9); - std::fill_n(cospec_local, this->nshells*9, 0); - int tmp_int; + TIMEZONE("fluid_solver_base::cospectrum"); + shared_array<double> cospec_local_thread(this->nshells*9,[&](double* cospec_local){ + std::fill_n(cospec_local, this->nshells*9, 0); + }); + CLOOP_K2_NXMODES( - this, - if (k2 <= this->kMspec2) - { - tmp_int = int(sqrt(k2)/this->dk)*9; - for (int i=0; i<3; i++) - for (int j=0; j<3; j++) - { - cospec_local[tmp_int+i*3+j] += nxmodes * ( - (*(a + 3*cindex+i))[0] * (*(b + 3*cindex+j))[0] + - (*(a + 3*cindex+i))[1] * (*(b + 3*cindex+j))[1]); - } - } - ); + this, + + [&](ptrdiff_t cindex, ptrdiff_t /*xindex*/, ptrdiff_t /*yindex*/, + ptrdiff_t /*zindex*/, double k2, int nxmodes){ + if (k2 <= this->kMspec2) + { + int tmp_int = int(sqrt(k2)/this->dk)*9; + double* cospec_local = cospec_local_thread.getMine(); + for (int i=0; i<3; i++) + for (int j=0; j<3; j++) + { + cospec_local[tmp_int+i*3+j] += nxmodes * ( + (*(a + 3*cindex+i))[0] * (*(b + 3*cindex+j))[0] + + (*(a + 3*cindex+i))[1] * (*(b + 3*cindex+j))[1]); + } + }} + ); + cospec_local_thread.mergeParallel(); MPI_Allreduce( - (void*)cospec_local, - (void*)spec, - this->nshells*9, - MPI_DOUBLE, MPI_SUM, this->cd->comm); - fftw_free(cospec_local); + cospec_local_thread.getMasterData(), + (void*)spec, + this->nshells*9, + MPI_DOUBLE, MPI_SUM, this->cd->comm); } template <class rnumber> void fluid_solver_base<rnumber>::cospectrum(cnumber *a, cnumber *b, double *spec, const double k2exponent) { - double *cospec_local = fftw_alloc_real(this->nshells*9); - std::fill_n(cospec_local, this->nshells*9, 0); - double factor = 1; - int tmp_int; + TIMEZONE("fluid_solver_base::cospectrum2"); + shared_array<double> cospec_local_thread(this->nshells*9,[&](double* cospec_local){ + std::fill_n(cospec_local, this->nshells*9, 0); + }); + CLOOP_K2_NXMODES( - this, - if (k2 <= this->kMspec2) - { - factor = nxmodes*pow(k2, k2exponent); - tmp_int = int(sqrt(k2)/this->dk)*9; - for (int i=0; i<3; i++) - for (int j=0; j<3; j++) - { - cospec_local[tmp_int+i*3+j] += factor * ( - (*(a + 3*cindex+i))[0] * (*(b + 3*cindex+j))[0] + - (*(a + 3*cindex+i))[1] * (*(b + 3*cindex+j))[1]); - } - } - ); + this, + + [&](ptrdiff_t cindex, ptrdiff_t /*xindex*/, ptrdiff_t /*yindex*/, + ptrdiff_t /*zindex*/, double k2, int nxmodes){ + if (k2 <= this->kMspec2) + { + double factor = nxmodes*pow(k2, k2exponent); + int tmp_int = int(sqrt(k2)/this->dk)*9; + double* cospec_local = cospec_local_thread.getMine(); + for (int i=0; i<3; i++) + for (int j=0; j<3; j++) + { + cospec_local[tmp_int+i*3+j] += factor * ( + (*(a + 3*cindex+i))[0] * (*(b + 3*cindex+j))[0] + + (*(a + 3*cindex+i))[1] * (*(b + 3*cindex+j))[1]); + } + }} + ); + cospec_local_thread.mergeParallel(); MPI_Allreduce( - (void*)cospec_local, - (void*)spec, - this->nshells*9, - MPI_DOUBLE, MPI_SUM, this->cd->comm); + cospec_local_thread.getMasterData(), + (void*)spec, + this->nshells*9, + MPI_DOUBLE, MPI_SUM, this->cd->comm); //for (int n=0; n<this->nshells; n++) //{ // spec[n] *= 12.5663706144*pow(this->kshell[n], 2) / this->nshell[n]; // /*is normalization needed? // * spec[n] /= this->normalization_factor*/ //} - fftw_free(cospec_local); } template <class rnumber> @@ -134,6 +147,7 @@ void fluid_solver_base<rnumber>::compute_rspace_stats( const hsize_t toffset, const std::vector<double> max_estimate) { + TIMEZONE("fluid_solver_base::compute_rspace_stats"); const int nmoments = 10; int nvals, nbins; if (this->rd->myrank == 0) @@ -145,6 +159,7 @@ void fluid_solver_base<rnumber>::compute_rspace_stats( wspace = H5Dget_space(dset); ndims = H5Sget_simple_extent_dims(wspace, dims, NULL); assert(ndims == 3); + variable_used_only_in_assert(ndims); assert(dims[1] == nmoments); nvals = dims[2]; H5Sclose(wspace); @@ -161,22 +176,29 @@ void fluid_solver_base<rnumber>::compute_rspace_stats( MPI_Bcast(&nvals, 1, MPI_INT, 0, this->rd->comm); MPI_Bcast(&nbins, 1, MPI_INT, 0, this->rd->comm); assert(nvals == max_estimate.size()); - double *moments = new double[nmoments*nvals]; - double *local_moments = new double[nmoments*nvals]; - double *val_tmp = new double[nvals]; + shared_array<double> threaded_local_moments(nmoments*nvals, [&](double* local_moments){ + std::fill_n(local_moments, nmoments*nvals, 0); + if (nvals == 4) local_moments[3] = max_estimate[3]; + }); + + shared_array<double> threaded_val_tmp(nvals); + + shared_array<ptrdiff_t> threaded_local_hist(nbins*nvals, [&](ptrdiff_t* local_hist){ + std::fill_n(local_hist, nbins*nvals, 0); + }); + + // Not written by threads double *binsize = new double[nvals]; - double *pow_tmp = new double[nvals]; - ptrdiff_t *hist = new ptrdiff_t[nbins*nvals]; - ptrdiff_t *local_hist = new ptrdiff_t[nbins*nvals]; - int bin; for (int i=0; i<nvals; i++) binsize[i] = 2*max_estimate[i] / nbins; - std::fill_n(local_hist, nbins*nvals, 0); - std::fill_n(local_moments, nmoments*nvals, 0); - if (nvals == 4) local_moments[3] = max_estimate[3]; + RLOOP( - this, - std::fill_n(pow_tmp, nvals, 1.0); + this, + [&](ptrdiff_t rindex, ptrdiff_t /*xindex*/, ptrdiff_t /*yindex*/, ptrdiff_t /*zindex*/){ + double *val_tmp = threaded_val_tmp.getMine(); + ptrdiff_t* local_hist = threaded_local_hist.getMine(); + double *local_moments = threaded_local_moments.getMine(); + if (nvals == 4) val_tmp[3] = 0.0; for (int i=0; i<3; i++) { @@ -190,7 +212,7 @@ void fluid_solver_base<rnumber>::compute_rspace_stats( local_moments[0*nvals+3] = val_tmp[3]; if (val_tmp[3] > local_moments[9*nvals+3]) local_moments[9*nvals+3] = val_tmp[3]; - bin = int(floor(val_tmp[3]*2/binsize[3])); + int bin = int(floor(val_tmp[3]*2/binsize[3])); if (bin >= 0 && bin < nbins) local_hist[bin*nvals+3]++; } @@ -200,42 +222,63 @@ void fluid_solver_base<rnumber>::compute_rspace_stats( local_moments[0*nvals+i] = val_tmp[i]; if (val_tmp[i] > local_moments[(nmoments-1)*nvals+i]) local_moments[(nmoments-1)*nvals+i] = val_tmp[i]; - bin = int(floor((val_tmp[i] + max_estimate[i]) / binsize[i])); + int bin = int(floor((val_tmp[i] + max_estimate[i]) / binsize[i])); if (bin >= 0 && bin < nbins) local_hist[bin*nvals+i]++; } - for (int n=1; n < nmoments-1; n++) - for (int i=0; i<nvals; i++) - local_moments[n*nvals + i] += (pow_tmp[i] = val_tmp[i]*pow_tmp[i]); - ); + for (int n=1; n < nmoments-1; n++){ + double pow_tmp = 1.; + for (int i=0; i<nvals; i++){ + local_moments[n*nvals + i] += (pow_tmp = val_tmp[i]*pow_tmp); + } + } + } + ); + + threaded_local_hist.mergeParallel(); + threaded_local_moments.mergeParallel([&](const int idx, const double& v1, const double& v2) -> double { + if(nvals == int(4) && idx == 0*nvals+3){ + return std::min(v1, v2); + } + if(nvals == int(4) && idx == 9*nvals+3){ + return std::max(v1, v2); + } + if(idx < 3){ + return std::min(v1, v2); + } + if((nmoments-1)*nvals <= idx && idx < (nmoments-1)*nvals+3){ + return std::max(v1, v2); + } + return v1 + v2; + }); + + + double *moments = new double[nmoments*nvals]; MPI_Allreduce( - (void*)local_moments, - (void*)moments, - nvals, - MPI_DOUBLE, MPI_MIN, this->cd->comm); + threaded_local_moments.getMasterData(), + (void*)moments, + nvals, + MPI_DOUBLE, MPI_MIN, this->cd->comm); MPI_Allreduce( - (void*)(local_moments + nvals), - (void*)(moments+nvals), - (nmoments-2)*nvals, - MPI_DOUBLE, MPI_SUM, this->cd->comm); + (threaded_local_moments.getMasterData() + nvals), + (void*)(moments+nvals), + (nmoments-2)*nvals, + MPI_DOUBLE, MPI_SUM, this->cd->comm); MPI_Allreduce( - (void*)(local_moments + (nmoments-1)*nvals), - (void*)(moments+(nmoments-1)*nvals), - nvals, - MPI_DOUBLE, MPI_MAX, this->cd->comm); + (threaded_local_moments.getMasterData() + (nmoments-1)*nvals), + (void*)(moments+(nmoments-1)*nvals), + nvals, + MPI_DOUBLE, MPI_MAX, this->cd->comm); + ptrdiff_t *hist = new ptrdiff_t[nbins*nvals]; MPI_Allreduce( - (void*)local_hist, - (void*)hist, - nbins*nvals, - MPI_INT64_T, MPI_SUM, this->cd->comm); + threaded_local_hist.getMasterData(), + (void*)hist, + nbins*nvals, + MPI_INT64_T, MPI_SUM, this->cd->comm); for (int n=1; n < nmoments-1; n++) for (int i=0; i<nvals; i++) moments[n*nvals + i] /= this->normalization_factor; - delete[] local_moments; - delete[] local_hist; - delete[] val_tmp; delete[] binsize; - delete[] pow_tmp; if (this->rd->myrank == 0) { hid_t dset, wspace, mspace; @@ -280,18 +323,28 @@ void fluid_solver_base<rnumber>::compute_rspace_stats( double max_estimate[], const int nbins) { - double *local_moments = fftw_alloc_real(10*nvals); - double val_tmp[nvals], binsize[nvals], pow_tmp[nvals]; - ptrdiff_t *local_hist = new ptrdiff_t[nbins*nvals]; - int bin; + TIMEZONE("fluid_solver_base::compute_rspace_stats"); + shared_array<double> threaded_local_moments(10*nvals,[&](double* local_moments){ + std::fill_n(local_moments, 10*nvals, 0); + if (nvals == 4) local_moments[3] = max_estimate[3]; + }); + + shared_array<ptrdiff_t> threaded_local_hist(nbins*nvals, [&](ptrdiff_t* local_hist){ + std::fill_n(local_hist, nbins*nvals, 0); + }); + + // Will not be modified by the threads + double binsize[nvals]; for (int i=0; i<nvals; i++) binsize[i] = 2*max_estimate[i] / nbins; - std::fill_n(local_hist, nbins*nvals, 0); - std::fill_n(local_moments, 10*nvals, 0); - if (nvals == 4) local_moments[3] = max_estimate[3]; + RLOOP( - this, - std::fill_n(pow_tmp, nvals, 1.0); + this, + [&](ptrdiff_t rindex, ptrdiff_t /*xindex*/, ptrdiff_t /*yindex*/, ptrdiff_t /*zindex*/){ + ptrdiff_t *local_hist = threaded_local_hist.getMine(); + double *local_moments = threaded_local_moments.getMine(); + + double val_tmp[nvals]; if (nvals == 4) val_tmp[3] = 0.0; for (int i=0; i<3; i++) { @@ -305,7 +358,7 @@ void fluid_solver_base<rnumber>::compute_rspace_stats( local_moments[0*nvals+3] = val_tmp[3]; if (val_tmp[3] > local_moments[9*nvals+3]) local_moments[9*nvals+3] = val_tmp[3]; - bin = int(floor(val_tmp[3]*2/binsize[3])); + int bin = int(floor(val_tmp[3]*2/binsize[3])); if (bin >= 0 && bin < nbins) local_hist[bin*nvals+3]++; } @@ -315,44 +368,65 @@ void fluid_solver_base<rnumber>::compute_rspace_stats( local_moments[0*nvals+i] = val_tmp[i]; if (val_tmp[i] > local_moments[9*nvals+i]) local_moments[9*nvals+i] = val_tmp[i]; - bin = int(floor((val_tmp[i] + max_estimate[i]) / binsize[i])); + int bin = int(floor((val_tmp[i] + max_estimate[i]) / binsize[i])); if (bin >= 0 && bin < nbins) local_hist[bin*nvals+i]++; } - for (int n=1; n<9; n++) - for (int i=0; i<nvals; i++) - local_moments[n*nvals + i] += (pow_tmp[i] = val_tmp[i]*pow_tmp[i]); - ); + for (int n=1; n<9; n++){ + double pow_tmp = 1; + for (int i=0; i<nvals; i++){ + local_moments[n*nvals + i] += (pow_tmp = val_tmp[i]*pow_tmp); + } + } + } + ); + + threaded_local_moments.mergeParallel([&](const int idx, const double& v1, const double& v2) -> double { + if(nvals == int(4) && idx == 0*nvals+3){ + return std::min(v1, v2); + } + if(nvals == int(4) && idx == 9*nvals+3){ + return std::max(v1, v2); + } + if(idx < 3){ + return std::min(v1, v2); + } + if(9*nvals <= idx && idx < 9*nvals+3){ + return std::max(v1, v2); + } + return v1 + v2; + }); + threaded_local_hist.mergeParallel(); + MPI_Allreduce( - (void*)local_moments, - (void*)moments, - nvals, - MPI_DOUBLE, MPI_MIN, this->cd->comm); + threaded_local_moments.getMasterData(), + (void*)moments, + nvals, + MPI_DOUBLE, MPI_MIN, this->cd->comm); MPI_Allreduce( - (void*)(local_moments + nvals), - (void*)(moments+nvals), - 8*nvals, - MPI_DOUBLE, MPI_SUM, this->cd->comm); + (threaded_local_moments.getMasterData() + nvals), + (void*)(moments+nvals), + 8*nvals, + MPI_DOUBLE, MPI_SUM, this->cd->comm); MPI_Allreduce( - (void*)(local_moments + 9*nvals), - (void*)(moments+9*nvals), - nvals, - MPI_DOUBLE, MPI_MAX, this->cd->comm); + (threaded_local_moments.getMasterData() + 9*nvals), + (void*)(moments+9*nvals), + nvals, + MPI_DOUBLE, MPI_MAX, this->cd->comm); MPI_Allreduce( - (void*)local_hist, - (void*)hist, - nbins*nvals, - MPI_INT64_T, MPI_SUM, this->cd->comm); + (void*)threaded_local_hist.getMasterData(), + (void*)hist, + nbins*nvals, + MPI_INT64_T, MPI_SUM, this->cd->comm); for (int n=1; n<9; n++) for (int i=0; i<nvals; i++) moments[n*nvals + i] /= this->normalization_factor; - fftw_free(local_moments); - delete[] local_hist; } template <class rnumber> void fluid_solver_base<rnumber>::write_spectrum(const char *fname, cnumber *a, const double k2exponent) { + TIMEZONE("fluid_solver_base::write_spectrum"); double *spec = fftw_alloc_real(this->nshells); this->cospectrum(a, a, spec, k2exponent); if (this->cd->myrank == 0) @@ -371,362 +445,390 @@ void fluid_solver_base<rnumber>::write_spectrum(const char *fname, cnumber *a, c /*****************************************************************************/ /* macro for specializations to numeric types compatible with FFTW */ -#define FLUID_SOLVER_BASE_DEFINITIONS(FFTW, R, MPI_RNUM, MPI_CNUM) \ - \ -template<> \ -fluid_solver_base<R>::fluid_solver_base( \ - const char *NAME, \ - int nx, \ - int ny, \ - int nz, \ - double DKX, \ - double DKY, \ - double DKZ, \ - int DEALIAS_TYPE, \ - unsigned FFTW_PLAN_RIGOR) \ -{ \ - strncpy(this->name, NAME, 256); \ - this->name[255] = '\0'; \ - this->iteration = 0; \ - this->fftw_plan_rigor = FFTW_PLAN_RIGOR; \ - \ - int ntmp[4]; \ - ntmp[0] = nz; \ - ntmp[1] = ny; \ - ntmp[2] = nx; \ - ntmp[3] = 3; \ - this->rd = new field_descriptor<R>( \ - 4, ntmp, MPI_RNUM, MPI_COMM_WORLD);\ - this->normalization_factor = (this->rd->full_size/3); \ - ntmp[0] = ny; \ - ntmp[1] = nz; \ - ntmp[2] = nx/2 + 1; \ - ntmp[3] = 3; \ - this->cd = new field_descriptor<R>( \ - 4, ntmp, MPI_CNUM, this->rd->comm);\ - \ - this->dkx = DKX; \ - this->dky = DKY; \ - this->dkz = DKZ; \ - this->kx = new double[this->cd->sizes[2]]; \ - this->ky = new double[this->cd->subsizes[0]]; \ - this->kz = new double[this->cd->sizes[1]]; \ - this->dealias_type = DEALIAS_TYPE; \ - switch(this->dealias_type) \ - { \ - /* HL07 smooth filter */ \ - case 1: \ - this->kMx = this->dkx*(int(this->rd->sizes[2] / 2)-1); \ - this->kMy = this->dky*(int(this->rd->sizes[1] / 2)-1); \ - this->kMz = this->dkz*(int(this->rd->sizes[0] / 2)-1); \ - break; \ - default: \ - this->kMx = this->dkx*(int(this->rd->sizes[2] / 3)-1); \ - this->kMy = this->dky*(int(this->rd->sizes[1] / 3)-1); \ - this->kMz = this->dkz*(int(this->rd->sizes[0] / 3)-1); \ - } \ - int i, ii; \ - for (i = 0; i<this->cd->sizes[2]; i++) \ - this->kx[i] = i*this->dkx; \ - for (i = 0; i<this->cd->subsizes[0]; i++) \ - { \ - ii = i + this->cd->starts[0]; \ - if (ii <= this->rd->sizes[1]/2) \ - this->ky[i] = this->dky*ii; \ - else \ - this->ky[i] = this->dky*(ii - this->rd->sizes[1]); \ - } \ - for (i = 0; i<this->cd->sizes[1]; i++) \ - { \ - if (i <= this->rd->sizes[0]/2) \ - this->kz[i] = this->dkz*i; \ - else \ - this->kz[i] = this->dkz*(i - this->rd->sizes[0]); \ - } \ - this->kM = this->kMx; \ - if (this->kM < this->kMy) this->kM = this->kMy; \ - if (this->kM < this->kMz) this->kM = this->kMz; \ - this->kM2 = this->kM * this->kM; \ - this->kMspec = this->kM; \ - this->kMspec2 = this->kM2; \ - this->dk = this->dkx; \ - if (this->dk > this->dky) this->dk = this->dky; \ - if (this->dk > this->dkz) this->dk = this->dkz; \ - this->dk2 = this->dk*this->dk; \ - DEBUG_MSG( \ - "kM = %g, kM2 = %g, dk = %g, dk2 = %g\n", \ - this->kM, this->kM2, this->dk, this->dk2); \ - /* spectra stuff */ \ - this->nshells = int(this->kMspec / this->dk) + 2; \ - DEBUG_MSG( \ - "kMspec = %g, kMspec2 = %g, nshells = %ld\n", \ - this->kMspec, this->kMspec2, this->nshells); \ - this->kshell = new double[this->nshells]; \ - std::fill_n(this->kshell, this->nshells, 0.0); \ - this->nshell = new int64_t[this->nshells]; \ - std::fill_n(this->nshell, this->nshells, 0); \ - double *kshell_local = new double[this->nshells]; \ - std::fill_n(kshell_local, this->nshells, 0.0); \ - int64_t *nshell_local = new int64_t[this->nshells]; \ - std::fill_n(nshell_local, this->nshells, 0.0); \ - double knorm; \ - CLOOP_K2_NXMODES( \ - this, \ - if (k2 < this->kM2) \ - { \ - knorm = sqrt(k2); \ - nshell_local[int(knorm/this->dk)] += nxmodes; \ - kshell_local[int(knorm/this->dk)] += nxmodes*knorm; \ - } \ - this->Fourier_filter[int(round(k2 / this->dk2))] = exp(-36.0 * pow(k2/this->kM2, 18.)); \ - ); \ - \ - MPI_Allreduce( \ - (void*)(nshell_local), \ - (void*)(this->nshell), \ - this->nshells, \ - MPI_INT64_T, MPI_SUM, this->cd->comm); \ - MPI_Allreduce( \ - (void*)(kshell_local), \ - (void*)(this->kshell), \ - this->nshells, \ - MPI_DOUBLE, MPI_SUM, this->cd->comm); \ - for (unsigned int n=0; n<this->nshells; n++) \ - { \ - this->kshell[n] /= this->nshell[n]; \ - } \ - delete[] nshell_local; \ - delete[] kshell_local; \ -} \ - \ -template<> \ -fluid_solver_base<R>::~fluid_solver_base() \ -{ \ - delete[] this->kshell; \ - delete[] this->nshell; \ - \ - delete[] this->kx; \ - delete[] this->ky; \ - delete[] this->kz; \ - \ - delete this->cd; \ - delete this->rd; \ -} \ - \ -template<> \ -void fluid_solver_base<R>::low_pass_Fourier(FFTW(complex) *a, const int howmany, const double kmax) \ -{ \ - const double km2 = kmax*kmax; \ - const int howmany2 = 2*howmany; \ - /*DEBUG_MSG("entered low_pass_Fourier, kmax=%lg km2=%lg howmany2=%d\n", kmax, km2, howmany2);*/ \ - CLOOP_K2( \ - this, \ - /*DEBUG_MSG("kx=%lg ky=%lg kz=%lg k2=%lg\n", \ - this->kx[xindex], \ - this->ky[yindex], \ - this->kz[zindex], \ - k2);*/ \ - if (k2 >= km2) \ - std::fill_n((R*)(a + howmany*cindex), howmany2, 0.0); \ - );\ -} \ - \ -template<> \ -void fluid_solver_base<R>::dealias(FFTW(complex) *a, const int howmany) \ -{ \ - if (this->dealias_type == 0) \ - { \ - this->low_pass_Fourier(a, howmany, this->kM); \ - return; \ - } \ - double tval; \ - CLOOP_K2( \ - this, \ - tval = this->Fourier_filter[int(round(k2/this->dk2))]; \ - for (int tcounter = 0; tcounter < howmany; tcounter++) \ - for (int i=0; i<2; i++) \ - a[howmany*cindex+tcounter][i] *= tval; \ - ); \ -} \ - \ -template<> \ -void fluid_solver_base<R>::force_divfree(FFTW(complex) *a) \ -{ \ - FFTW(complex) tval; \ - CLOOP_K2( \ - this, \ - if (k2 > 0) \ - { \ - tval[0] = (this->kx[xindex]*((*(a + cindex*3 ))[0]) + \ - this->ky[yindex]*((*(a + cindex*3+1))[0]) + \ - this->kz[zindex]*((*(a + cindex*3+2))[0]) ) / k2; \ - tval[1] = (this->kx[xindex]*((*(a + cindex*3 ))[1]) + \ - this->ky[yindex]*((*(a + cindex*3+1))[1]) + \ - this->kz[zindex]*((*(a + cindex*3+2))[1]) ) / k2; \ - for (int imag_part=0; imag_part<2; imag_part++) \ - { \ - a[cindex*3 ][imag_part] -= tval[imag_part]*this->kx[xindex]; \ - a[cindex*3+1][imag_part] -= tval[imag_part]*this->ky[yindex]; \ - a[cindex*3+2][imag_part] -= tval[imag_part]*this->kz[zindex]; \ - } \ - } \ - );\ - if (this->cd->myrank == this->cd->rank[0]) \ - std::fill_n((R*)(a), 6, 0.0); \ -} \ - \ -template<> \ -void fluid_solver_base<R>::compute_vector_gradient(FFTW(complex) *A, FFTW(complex) *cvec) \ -{ \ - ptrdiff_t tindex; \ - std::fill_n((R*)A, 3*2*this->cd->local_size, 0.0); \ - FFTW(complex) *dx_u, *dy_u, *dz_u; \ - dx_u = A; \ - dy_u = A + this->cd->local_size; \ - dz_u = A + 2*this->cd->local_size; \ - CLOOP_K2( \ - this, \ - if (k2 <= this->kM2) \ - { \ - tindex = 3*cindex; \ - for (int cc=0; cc<3; cc++) \ - { \ - dx_u[tindex + cc][0] = -this->kx[xindex]*cvec[tindex+cc][1]; \ - dx_u[tindex + cc][1] = this->kx[xindex]*cvec[tindex+cc][0]; \ - dy_u[tindex + cc][0] = -this->ky[yindex]*cvec[tindex+cc][1]; \ - dy_u[tindex + cc][1] = this->ky[yindex]*cvec[tindex+cc][0]; \ - dz_u[tindex + cc][0] = -this->kz[zindex]*cvec[tindex+cc][1]; \ - dz_u[tindex + cc][1] = this->kz[zindex]*cvec[tindex+cc][0]; \ - } \ - } \ - ); \ -} \ - \ -template<> \ -void fluid_solver_base<R>::symmetrize(FFTW(complex) *data, const int howmany) \ -{ \ - ptrdiff_t ii, cc; \ - MPI_Status *mpistatus = new MPI_Status; \ - if (this->cd->myrank == this->cd->rank[0]) \ - { \ - for (cc = 0; cc < howmany; cc++) \ - data[cc][1] = 0.0; \ - for (ii = 1; ii < this->cd->sizes[1]/2; ii++) \ - for (cc = 0; cc < howmany; cc++) { \ - ( *(data + cc + howmany*(this->cd->sizes[1] - ii)*this->cd->sizes[2]))[0] = \ - (*(data + cc + howmany*( ii)*this->cd->sizes[2]))[0]; \ - ( *(data + cc + howmany*(this->cd->sizes[1] - ii)*this->cd->sizes[2]))[1] = \ - -(*(data + cc + howmany*( ii)*this->cd->sizes[2]))[1]; \ - } \ - } \ - FFTW(complex) *buffer; \ - buffer = FFTW(alloc_complex)(howmany*this->cd->sizes[1]); \ - ptrdiff_t yy; \ - /*ptrdiff_t tindex;*/ \ - int ranksrc, rankdst; \ - for (yy = 1; yy < this->cd->sizes[0]/2; yy++) { \ - ranksrc = this->cd->rank[yy]; \ - rankdst = this->cd->rank[this->cd->sizes[0] - yy]; \ - if (this->cd->myrank == ranksrc) \ - for (ii = 0; ii < this->cd->sizes[1]; ii++) \ - for (cc = 0; cc < howmany; cc++) \ - for (int imag_comp=0; imag_comp<2; imag_comp++) \ - (*(buffer + howmany*ii+cc))[imag_comp] = \ - (*(data + howmany*((yy - this->cd->starts[0])*this->cd->sizes[1] + ii)*this->cd->sizes[2] + cc))[imag_comp]; \ - if (ranksrc != rankdst) \ - { \ - if (this->cd->myrank == ranksrc) \ - MPI_Send((void*)buffer, \ - howmany*this->cd->sizes[1], MPI_CNUM, rankdst, yy, \ - this->cd->comm); \ - if (this->cd->myrank == rankdst) \ - MPI_Recv((void*)buffer, \ - howmany*this->cd->sizes[1], MPI_CNUM, ranksrc, yy, \ - this->cd->comm, mpistatus); \ - } \ - if (this->cd->myrank == rankdst) \ - { \ - for (ii = 1; ii < this->cd->sizes[1]; ii++) \ - for (cc = 0; cc < howmany; cc++) \ - { \ - (*(data + howmany*((this->cd->sizes[0] - yy - this->cd->starts[0])*this->cd->sizes[1] + ii)*this->cd->sizes[2] + cc))[0] = \ - (*(buffer + howmany*(this->cd->sizes[1]-ii)+cc))[0]; \ - (*(data + howmany*((this->cd->sizes[0] - yy - this->cd->starts[0])*this->cd->sizes[1] + ii)*this->cd->sizes[2] + cc))[1] = \ - -(*(buffer + howmany*(this->cd->sizes[1]-ii)+cc))[1]; \ - } \ - for (cc = 0; cc < howmany; cc++) \ - { \ - (*((data + cc + howmany*(this->cd->sizes[0] - yy - this->cd->starts[0])*this->cd->sizes[1]*this->cd->sizes[2])))[0] = (*(buffer + cc))[0]; \ - (*((data + cc + howmany*(this->cd->sizes[0] - yy - this->cd->starts[0])*this->cd->sizes[1]*this->cd->sizes[2])))[1] = -(*(buffer + cc))[1]; \ - } \ - } \ - } \ - FFTW(free)(buffer); \ - delete mpistatus; \ - /* put asymmetric data to 0 */\ - /*if (this->cd->myrank == this->cd->rank[this->cd->sizes[0]/2]) \ - { \ - tindex = howmany*(this->cd->sizes[0]/2 - this->cd->starts[0])*this->cd->sizes[1]*this->cd->sizes[2]; \ - for (ii = 0; ii < this->cd->sizes[1]; ii++) \ - { \ - std::fill_n((R*)(data + tindex), howmany*2*this->cd->sizes[2], 0.0); \ - tindex += howmany*this->cd->sizes[2]; \ - } \ - } \ - tindex = howmany*(); \ - std::fill_n((R*)(data + tindex), howmany*2, 0.0);*/ \ -} \ - \ -template<> \ -int fluid_solver_base<R>::read_base(const char *fname, R *data) \ -{ \ - char full_name[512]; \ - sprintf(full_name, "%s_%s_i%.5x", this->name, fname, this->iteration); \ - return this->rd->read(full_name, (void*)data); \ -} \ - \ -template<> \ -int fluid_solver_base<R>::read_base(const char *fname, FFTW(complex) *data) \ -{ \ - char full_name[512]; \ - sprintf(full_name, "%s_%s_i%.5x", this->name, fname, this->iteration); \ - return this->cd->read(full_name, (void*)data); \ -} \ - \ -template<> \ -int fluid_solver_base<R>::write_base(const char *fname, R *data) \ -{ \ - char full_name[512]; \ - sprintf(full_name, "%s_%s_i%.5x", this->name, fname, this->iteration); \ - return this->rd->write(full_name, (void*)data); \ -} \ - \ -template<> \ -int fluid_solver_base<R>::write_base(const char *fname, FFTW(complex) *data) \ -{ \ - char full_name[512]; \ - sprintf(full_name, "%s_%s_i%.5x", this->name, fname, this->iteration); \ - return this->cd->write(full_name, (void*)data); \ -} \ - \ -/* finally, force generation of code */ \ -template class fluid_solver_base<R>; \ +template <class rnumber> +fluid_solver_base<rnumber>::fluid_solver_base( + const char *NAME, + int nx, + int ny, + int nz, + double DKX, + double DKY, + double DKZ, + int DEALIAS_TYPE, + unsigned FFTW_PLAN_RIGOR) +{ + TIMEZONE("fluid_solver_base::fluid_solver_base"); + strncpy(this->name, NAME, 256); + this->name[255] = '\0'; + this->iteration = 0; + this->fftw_plan_rigor = FFTW_PLAN_RIGOR; -/*****************************************************************************/ + int ntmp[4]; + ntmp[0] = nz; + ntmp[1] = ny; + ntmp[2] = nx; + ntmp[3] = 3; + this->rd = new field_descriptor<rnumber>( + 4, ntmp, mpi_real_type<rnumber>::real(), MPI_COMM_WORLD); + this->normalization_factor = (this->rd->full_size/3); + ntmp[0] = ny; + ntmp[1] = nz; + ntmp[2] = nx/2 + 1; + ntmp[3] = 3; + this->cd = new field_descriptor<rnumber>( + 4, ntmp, mpi_real_type<rnumber>::complex(), this->rd->comm); + this->dkx = DKX; + this->dky = DKY; + this->dkz = DKZ; + this->kx = new double[this->cd->sizes[2]]; + this->ky = new double[this->cd->subsizes[0]]; + this->kz = new double[this->cd->sizes[1]]; + this->dealias_type = DEALIAS_TYPE; + switch(this->dealias_type) + { + /* HL07 smooth filter */ + case 1: + this->kMx = this->dkx*(int(this->rd->sizes[2] / 2)-1); + this->kMy = this->dky*(int(this->rd->sizes[1] / 2)-1); + this->kMz = this->dkz*(int(this->rd->sizes[0] / 2)-1); + break; + default: + this->kMx = this->dkx*(int(this->rd->sizes[2] / 3)-1); + this->kMy = this->dky*(int(this->rd->sizes[1] / 3)-1); + this->kMz = this->dkz*(int(this->rd->sizes[0] / 3)-1); + } + int i, ii; + for (i = 0; i<this->cd->sizes[2]; i++) + this->kx[i] = i*this->dkx; + for (i = 0; i<this->cd->subsizes[0]; i++) + { + ii = i + this->cd->starts[0]; + if (ii <= this->rd->sizes[1]/2) + this->ky[i] = this->dky*ii; + else + this->ky[i] = this->dky*(ii - this->rd->sizes[1]); + } + for (i = 0; i<this->cd->sizes[1]; i++) + { + if (i <= this->rd->sizes[0]/2) + this->kz[i] = this->dkz*i; + else + this->kz[i] = this->dkz*(i - this->rd->sizes[0]); + } + this->kM = this->kMx; + if (this->kM < this->kMy) this->kM = this->kMy; + if (this->kM < this->kMz) this->kM = this->kMz; + this->kM2 = this->kM * this->kM; + this->kMspec = this->kM; + this->kMspec2 = this->kM2; + this->dk = this->dkx; + if (this->dk > this->dky) this->dk = this->dky; + if (this->dk > this->dkz) this->dk = this->dkz; + this->dk2 = this->dk*this->dk; + DEBUG_MSG( + "kM = %g, kM2 = %g, dk = %g, dk2 = %g\n", + this->kM, this->kM2, this->dk, this->dk2); + /* spectra stuff */ + this->nshells = int(this->kMspec / this->dk) + 2; + DEBUG_MSG( + "kMspec = %g, kMspec2 = %g, nshells = %ld\n", + this->kMspec, this->kMspec2, this->nshells); + this->kshell = new double[this->nshells]; + std::fill_n(this->kshell, this->nshells, 0.0); + this->nshell = new int64_t[this->nshells]; + std::fill_n(this->nshell, this->nshells, 0); + DEBUG_MSG("fluid_solver_base::fluid_solver_base before declaring shared_array\n"); + shared_array<double> kshell_local_threaded(this->nshells,[&](double* kshell_local){ + std::fill_n(kshell_local, this->nshells, 0.0); + }); + DEBUG_MSG("fluid_solver_base::fluid_solver_base before declaring shared_array\n"); + shared_array<int64_t> nshell_local_threaded(this->nshells,[&](int64_t* nshell_local){ + std::fill_n(nshell_local, this->nshells, 0); + }); + + std::vector<std::unordered_map<int, double>> Fourier_filter_threaded(omp_get_max_threads()); + + DEBUG_MSG("fluid_solver_base::fluid_solver_base before cloop_k2_nxmodes\n"); + CLOOP_K2_NXMODES( + this, + + [&](ptrdiff_t /*cindex*/, ptrdiff_t /*xindex*/, ptrdiff_t /*yindex*/, + ptrdiff_t /*zindex*/, double k2, int nxmodes){ + if (k2 < this->kM2) + { + double knorm = sqrt(k2); + nshell_local_threaded.getMine()[int(knorm/this->dk)] += nxmodes; + kshell_local_threaded.getMine()[int(knorm/this->dk)] += nxmodes*knorm; + } + Fourier_filter_threaded[omp_get_thread_num()][int(round(k2 / this->dk2))] = exp(-36.0 * pow(k2/this->kM2, 18.));} + ); + + // Merge results + nshell_local_threaded.mergeParallel(); + kshell_local_threaded.mergeParallel(); + for(int idxMerge = 0 ; idxMerge < int(Fourier_filter_threaded.size()) ; ++idxMerge){ + for(const auto kv : Fourier_filter_threaded[idxMerge]){ + this->Fourier_filter[kv.first] = kv.second; + } + } + + MPI_Allreduce( + (void*)(nshell_local_threaded.getMasterData()), + (void*)(this->nshell), + this->nshells, + MPI_INT64_T, MPI_SUM, this->cd->comm); + MPI_Allreduce( + (void*)(kshell_local_threaded.getMasterData()), + (void*)(this->kshell), + this->nshells, + MPI_DOUBLE, MPI_SUM, this->cd->comm); + for (unsigned int n=0; n<this->nshells; n++) + { + if (this->nshell[n] != 0) + this->kshell[n] /= this->nshell[n]; + else + this->kshell[n] = -1; + } + DEBUG_MSG("exiting fluid_solver_base::fluid_solver_base\n"); +} + +template <class rnumber> +fluid_solver_base<rnumber>::~fluid_solver_base() +{ + delete[] this->kshell; + delete[] this->nshell; + + delete[] this->kx; + delete[] this->ky; + delete[] this->kz; + + delete this->cd; + delete this->rd; +} + +template <class rnumber> +void fluid_solver_base<rnumber>::low_pass_Fourier(cnumber *a, const int howmany, const double kmax) +{ + TIMEZONE("fluid_solver_base::low_pass_Fourier"); + const double km2 = kmax*kmax; + const int howmany2 = 2*howmany; + /*DEBUG_MSG("entered low_pass_Fourier, kmax=%lg km2=%lg howmany2=%d\n", kmax, km2, howmany2);*/ + CLOOP_K2( + this, + /*DEBUG_MSG("kx=%lg ky=%lg kz=%lg k2=%lg\n", + this->kx[xindex], + this->ky[yindex], + this->kz[zindex], + k2);*/ + + [&](ptrdiff_t cindex, ptrdiff_t xindex, ptrdiff_t yindex, + ptrdiff_t zindex, double k2){ + if (k2 >= km2) + std::fill_n((rnumber*)(a + howmany*cindex), howmany2, 0.0);} + ); +} + +template <class rnumber> +void fluid_solver_base<rnumber>::dealias(cnumber *a, const int howmany) +{ + TIMEZONE("fluid_solver_base::dealias"); + if (this->dealias_type == 0) + { + this->low_pass_Fourier(a, howmany, this->kM); + return; + } + + CLOOP_K2( + this, + [&](ptrdiff_t cindex, ptrdiff_t /*xindex*/, ptrdiff_t /*yindex*/, + ptrdiff_t /*zindex*/, double k2){ + double tval = this->Fourier_filter[int(round(k2/this->dk2))]; + // It is thread safe on the index cindex + for (int tcounter = 0; tcounter < howmany; tcounter++) + for (int i=0; i<2; i++) + a[howmany*cindex+tcounter][i] *= tval; + } + ); +} + +template <class rnumber> +void fluid_solver_base<rnumber>::force_divfree(cnumber *a) +{ + TIMEZONE("fluid_solver_base::force_divfree"); + CLOOP_K2( + this, + + [&](ptrdiff_t cindex, ptrdiff_t xindex, ptrdiff_t yindex, + ptrdiff_t zindex, double k2){ + if (k2 > 0) + { + // It is thread safe on index cindex + cnumber tval; + tval[0] = (this->kx[xindex]*((*(a + cindex*3 ))[0]) + + this->ky[yindex]*((*(a + cindex*3+1))[0]) + + this->kz[zindex]*((*(a + cindex*3+2))[0]) ) / k2; + tval[1] = (this->kx[xindex]*((*(a + cindex*3 ))[1]) + + this->ky[yindex]*((*(a + cindex*3+1))[1]) + + this->kz[zindex]*((*(a + cindex*3+2))[1]) ) / k2; + for (int imag_part=0; imag_part<2; imag_part++) + { + a[cindex*3 ][imag_part] -= tval[imag_part]*this->kx[xindex]; + a[cindex*3+1][imag_part] -= tval[imag_part]*this->ky[yindex]; + a[cindex*3+2][imag_part] -= tval[imag_part]*this->kz[zindex]; + } + }} + ); + if (this->cd->myrank == this->cd->rank[0]) + std::fill_n((rnumber*)(a), 6, 0.0); +} + +template <class rnumber> +void fluid_solver_base<rnumber>::compute_vector_gradient(cnumber *A, cnumber *cvec) +{ + TIMEZONE("fluid_solver_base::compute_vector_gradient"); + std::fill_n((rnumber*)A, 3*2*this->cd->local_size, 0.0); + cnumber *dx_u, *dy_u, *dz_u; + dx_u = A; + dy_u = A + this->cd->local_size; + dz_u = A + 2*this->cd->local_size; + CLOOP_K2( + this, + + [&](ptrdiff_t cindex, ptrdiff_t xindex, ptrdiff_t yindex, + ptrdiff_t zindex, double k2){ + if (k2 <= this->kM2) + { + // It is thread safe on cindex + ptrdiff_t tindex = 3*cindex; + for (int cc=0; cc<3; cc++) + { + dx_u[tindex + cc][0] = -this->kx[xindex]*cvec[tindex+cc][1]; + dx_u[tindex + cc][1] = this->kx[xindex]*cvec[tindex+cc][0]; + dy_u[tindex + cc][0] = -this->ky[yindex]*cvec[tindex+cc][1]; + dy_u[tindex + cc][1] = this->ky[yindex]*cvec[tindex+cc][0]; + dz_u[tindex + cc][0] = -this->kz[zindex]*cvec[tindex+cc][1]; + dz_u[tindex + cc][1] = this->kz[zindex]*cvec[tindex+cc][0]; + } + }} + ); +} + +template <class rnumber> +void fluid_solver_base<rnumber>::symmetrize(cnumber *data, const int howmany) +{ + TIMEZONE("fluid_solver_base::symmetrize"); + ptrdiff_t ii, cc; + MPI_Status *mpistatus = new MPI_Status; + if (this->cd->myrank == this->cd->rank[0]) + { + for (cc = 0; cc < howmany; cc++) + data[cc][1] = 0.0; + for (ii = 1; ii < this->cd->sizes[1]/2; ii++) + for (cc = 0; cc < howmany; cc++) { + ( *(data + cc + howmany*(this->cd->sizes[1] - ii)*this->cd->sizes[2]))[0] = + (*(data + cc + howmany*( ii)*this->cd->sizes[2]))[0]; + ( *(data + cc + howmany*(this->cd->sizes[1] - ii)*this->cd->sizes[2]))[1] = + -(*(data + cc + howmany*( ii)*this->cd->sizes[2]))[1]; + } + } + cnumber *buffer; + buffer = fftw_interface<rnumber>::alloc_complex(howmany*this->cd->sizes[1]); + ptrdiff_t yy; + /*ptrdiff_t tindex;*/ + int ranksrc, rankdst; + for (yy = 1; yy < this->cd->sizes[0]/2; yy++) { + ranksrc = this->cd->rank[yy]; + rankdst = this->cd->rank[this->cd->sizes[0] - yy]; + if (this->cd->myrank == ranksrc) + for (ii = 0; ii < this->cd->sizes[1]; ii++) + for (cc = 0; cc < howmany; cc++) + for (int imag_comp=0; imag_comp<2; imag_comp++) + (*(buffer + howmany*ii+cc))[imag_comp] = + (*(data + howmany*((yy - this->cd->starts[0])*this->cd->sizes[1] + ii)*this->cd->sizes[2] + cc))[imag_comp]; + if (ranksrc != rankdst) + { + if (this->cd->myrank == ranksrc) + MPI_Send((void*)buffer, + howmany*this->cd->sizes[1], mpi_real_type<rnumber>::complex(), rankdst, yy, + this->cd->comm); + if (this->cd->myrank == rankdst) + MPI_Recv((void*)buffer, + howmany*this->cd->sizes[1], mpi_real_type<rnumber>::complex(), ranksrc, yy, + this->cd->comm, mpistatus); + } + if (this->cd->myrank == rankdst) + { + for (ii = 1; ii < this->cd->sizes[1]; ii++) + for (cc = 0; cc < howmany; cc++) + { + (*(data + howmany*((this->cd->sizes[0] - yy - this->cd->starts[0])*this->cd->sizes[1] + ii)*this->cd->sizes[2] + cc))[0] = + (*(buffer + howmany*(this->cd->sizes[1]-ii)+cc))[0]; + (*(data + howmany*((this->cd->sizes[0] - yy - this->cd->starts[0])*this->cd->sizes[1] + ii)*this->cd->sizes[2] + cc))[1] = + -(*(buffer + howmany*(this->cd->sizes[1]-ii)+cc))[1]; + } + for (cc = 0; cc < howmany; cc++) + { + (*((data + cc + howmany*(this->cd->sizes[0] - yy - this->cd->starts[0])*this->cd->sizes[1]*this->cd->sizes[2])))[0] = (*(buffer + cc))[0]; + (*((data + cc + howmany*(this->cd->sizes[0] - yy - this->cd->starts[0])*this->cd->sizes[1]*this->cd->sizes[2])))[1] = -(*(buffer + cc))[1]; + } + } + } + fftw_interface<rnumber>::free(buffer); + delete mpistatus; + /* put asymmetric data to 0 */ + /*if (this->cd->myrank == this->cd->rank[this->cd->sizes[0]/2]) + { + tindex = howmany*(this->cd->sizes[0]/2 - this->cd->starts[0])*this->cd->sizes[1]*this->cd->sizes[2]; + for (ii = 0; ii < this->cd->sizes[1]; ii++) + { + std::fill_n((rnumber*)(data + tindex), howmany*2*this->cd->sizes[2], 0.0); + tindex += howmany*this->cd->sizes[2]; + } + } + tindex = howmany*(); + std::fill_n((rnumber*)(data + tindex), howmany*2, 0.0);*/ +} + +template <class rnumber> +int fluid_solver_base<rnumber>::read_base(const char *fname, rnumber *data) +{ + char full_name[512]; + sprintf(full_name, "%s_%s_i%.5x", this->name, fname, this->iteration); + return this->rd->read(full_name, (void*)data); +} + +template <class rnumber> +int fluid_solver_base<rnumber>::read_base(const char *fname, cnumber *data) +{ + char full_name[512]; + sprintf(full_name, "%s_%s_i%.5x", this->name, fname, this->iteration); + return this->cd->read(full_name, (void*)data); +} + +template <class rnumber> +int fluid_solver_base<rnumber>::write_base(const char *fname, rnumber *data) +{ + char full_name[512]; + sprintf(full_name, "%s_%s_i%.5x", this->name, fname, this->iteration); + return this->rd->write(full_name, (void*)data); +} + +template <class rnumber> +int fluid_solver_base<rnumber>::write_base(const char *fname, cnumber *data) +{ + char full_name[512]; + sprintf(full_name, "%s_%s_i%.5x", this->name, fname, this->iteration); + return this->cd->write(full_name, (void*)data); +} + +/* finally, force generation of code */ +template class fluid_solver_base<float>; +template class fluid_solver_base<double>; /*****************************************************************************/ -/* now actually use the macro defined above */ -FLUID_SOLVER_BASE_DEFINITIONS( - FFTW_MANGLE_FLOAT, - float, - MPI_FLOAT, - MPI_COMPLEX) -FLUID_SOLVER_BASE_DEFINITIONS( - FFTW_MANGLE_DOUBLE, - double, - MPI_DOUBLE, - BFPS_MPICXX_DOUBLE_COMPLEX) -/*****************************************************************************/ + + + diff --git a/bfps/cpp/fluid_solver_base.hpp b/bfps/cpp/fluid_solver_base.hpp index 62deb597b4a6a3f4fc87198099d15778e7a2a255..e446956001a08fdbf0d3b11da8552e1cb6c61a45 100644 --- a/bfps/cpp/fluid_solver_base.hpp +++ b/bfps/cpp/fluid_solver_base.hpp @@ -30,6 +30,8 @@ #include <vector> #include "base.hpp" #include "field_descriptor.hpp" +#include "scope_timer.hpp" +#include "omputils.hpp" #ifndef FLUID_SOLVER_BASE @@ -81,7 +83,7 @@ class fluid_solver_base double DKY = 1.0, double DKZ = 1.0, int DEALIAS_TYPE = 0, - unsigned FFTW_PLAN_RIGOR = FFTW_ESTIMATE); + unsigned FFTW_PLAN_RIGOR = DEFAULT_FFTW_FLAG); ~fluid_solver_base(); void low_pass_Fourier(cnumber *__restrict__ a, int howmany, double kmax); @@ -135,97 +137,133 @@ class fluid_solver_base /* macros for loops */ /* Fourier space loop */ -#define CLOOP(obj, expression) \ - \ -{ \ - ptrdiff_t cindex = 0; \ - for (ptrdiff_t yindex = 0; yindex < obj->cd->subsizes[0]; yindex++) \ - for (ptrdiff_t zindex = 0; zindex < obj->cd->subsizes[1]; zindex++) \ - for (ptrdiff_t xindex = 0; xindex < obj->cd->subsizes[2]; xindex++) \ - { \ - expression; \ - cindex++; \ - } \ +template <class ObjectType, class FuncType> +void CLOOP(ObjectType* obj, FuncType expression) +{ + TIMEZONE("CLOOP"); + #pragma omp parallel + { + const hsize_t start = OmpUtils::ForIntervalStart(obj->cd->subsizes[0]); + const hsize_t end = OmpUtils::ForIntervalEnd(obj->cd->subsizes[0]); + for (ptrdiff_t yindex = start; yindex < ptrdiff_t(end); yindex++){ + ptrdiff_t cindex = yindex*obj->cd->subsizes[1]*obj->cd->subsizes[2]; + for (ptrdiff_t zindex = 0; zindex < obj->cd->subsizes[1]; zindex++) + for (ptrdiff_t xindex = 0; xindex < obj->cd->subsizes[2]; xindex++) + { + expression(cindex, xindex, yindex, zindex); + cindex++; + } + } + } } -#define CLOOP_NXMODES(obj, expression) \ - \ -{ \ - ptrdiff_t cindex = 0; \ - for (ptrdiff_t yindex = 0; yindex < obj->cd->subsizes[0]; yindex++) \ - for (ptrdiff_t zindex = 0; zindex < obj->cd->subsizes[1]; zindex++) \ - { \ - int nxmodes = 1; \ - ptrdiff_t xindex = 0; \ - expression; \ - cindex++; \ - nxmodes = 2; \ - for (xindex = 1; xindex < obj->cd->subsizes[2]; xindex++) \ - { \ - expression; \ - cindex++; \ - } \ - } \ +template <class ObjectType, class FuncType> +void CLOOP_NXMODES(ObjectType* obj, FuncType expression) +{ + TIMEZONE("CLOOP_NXMODES"); + #pragma omp parallel + { + const hsize_t start = OmpUtils::ForIntervalStart(obj->cd->subsizes[1]); + const hsize_t end = OmpUtils::ForIntervalEnd(obj->cd->subsizes[1]); + for (ptrdiff_t yindex = 0; yindex < obj->cd->subsizes[0]; yindex++){ + for (ptrdiff_t zindex = start; zindex < ptrdiff_t(end); zindex++) + { + ptrdiff_t cindex = yindex*obj->cd->subsizes[1]*obj->cd->subsizes[2] + + zindex*obj->cd->subsizes[2]; + int nxmodes = 1; + ptrdiff_t xindex = 0; + expression(); + cindex++; + nxmodes = 2; + for (xindex = 1; xindex < obj->cd->subsizes[2]; xindex++) + { + expression(); + cindex++; + } + } + } + } } -#define CLOOP_K2(obj, expression) \ - \ -{ \ - double k2; \ - ptrdiff_t cindex = 0; \ - for (ptrdiff_t yindex = 0; yindex < obj->cd->subsizes[0]; yindex++) \ - for (ptrdiff_t zindex = 0; zindex < obj->cd->subsizes[1]; zindex++) \ - for (ptrdiff_t xindex = 0; xindex < obj->cd->subsizes[2]; xindex++) \ - { \ - k2 = (obj->kx[xindex]*obj->kx[xindex] + \ - obj->ky[yindex]*obj->ky[yindex] + \ - obj->kz[zindex]*obj->kz[zindex]); \ - expression; \ - cindex++; \ - } \ + +template <class ObjectType, class FuncType> +void CLOOP_K2(ObjectType* obj, FuncType expression) +{ + TIMEZONE("CLOOP_K2"); + #pragma omp parallel + { + const hsize_t start = OmpUtils::ForIntervalStart(obj->cd->subsizes[1]); + const hsize_t end = OmpUtils::ForIntervalEnd(obj->cd->subsizes[1]); + for (ptrdiff_t yindex = 0; yindex < obj->cd->subsizes[0]; yindex++){ + for (ptrdiff_t zindex = start; zindex < ptrdiff_t(end); zindex++){ + ptrdiff_t cindex = yindex*obj->cd->subsizes[1]*obj->cd->subsizes[2] + + zindex*obj->cd->subsizes[2]; + for (ptrdiff_t xindex = 0; xindex < obj->cd->subsizes[2]; xindex++) + { + double k2 = (obj->kx[xindex]*obj->kx[xindex] + + obj->ky[yindex]*obj->ky[yindex] + + obj->kz[zindex]*obj->kz[zindex]); + expression(cindex, xindex, yindex, zindex, k2); + cindex++; + } + } + } + } } -#define CLOOP_K2_NXMODES(obj, expression) \ - \ -{ \ - double k2; \ - ptrdiff_t cindex = 0; \ - for (ptrdiff_t yindex = 0; yindex < obj->cd->subsizes[0]; yindex++) \ - for (ptrdiff_t zindex = 0; zindex < obj->cd->subsizes[1]; zindex++) \ - { \ - int nxmodes = 1; \ - ptrdiff_t xindex = 0; \ - k2 = (obj->kx[xindex]*obj->kx[xindex] + \ - obj->ky[yindex]*obj->ky[yindex] + \ - obj->kz[zindex]*obj->kz[zindex]); \ - expression; \ - cindex++; \ - nxmodes = 2; \ - for (xindex = 1; xindex < obj->cd->subsizes[2]; xindex++) \ - { \ - k2 = (obj->kx[xindex]*obj->kx[xindex] + \ - obj->ky[yindex]*obj->ky[yindex] + \ - obj->kz[zindex]*obj->kz[zindex]); \ - expression; \ - cindex++; \ - } \ - } \ + +template <class ObjectType, class FuncType> +void CLOOP_K2_NXMODES(ObjectType* obj, FuncType expression) +{ + #pragma omp parallel + { + const hsize_t start = OmpUtils::ForIntervalStart(obj->cd->subsizes[1]); + const hsize_t end = OmpUtils::ForIntervalEnd(obj->cd->subsizes[1]); + for (ptrdiff_t yindex = 0; yindex < obj->cd->subsizes[0]; yindex++){ + for (ptrdiff_t zindex = start; zindex < ptrdiff_t(end); zindex++) + { + ptrdiff_t cindex = yindex*obj->cd->subsizes[1]*obj->cd->subsizes[2] + + zindex*obj->cd->subsizes[2]; + int nxmodes = 1; + ptrdiff_t xindex = 0; + double k2 = (obj->kx[xindex]*obj->kx[xindex] + + obj->ky[yindex]*obj->ky[yindex] + + obj->kz[zindex]*obj->kz[zindex]); + expression(cindex, xindex, yindex, zindex, k2, nxmodes); + cindex++; + nxmodes = 2; + for (xindex = 1; xindex < obj->cd->subsizes[2]; xindex++) + { + double k2 = (obj->kx[xindex]*obj->kx[xindex] + + obj->ky[yindex]*obj->ky[yindex] + + obj->kz[zindex]*obj->kz[zindex]); + expression(cindex, xindex, yindex, zindex, k2, nxmodes); + cindex++; + } + } + } + } } -/* real space loop */ -#define RLOOP(obj, expression) \ - \ -{ \ - for (int zindex = 0; zindex < obj->rd->subsizes[0]; zindex++) \ - for (int yindex = 0; yindex < obj->rd->subsizes[1]; yindex++) \ - { \ - ptrdiff_t rindex = (zindex * obj->rd->subsizes[1] + yindex)*(obj->rd->subsizes[2]+2); \ - for (int xindex = 0; xindex < obj->rd->subsizes[2]; xindex++) \ - { \ - expression; \ - rindex++; \ - } \ - } \ + +template <class ObjectType, class FuncType> +void RLOOP(ObjectType* obj, FuncType expression) +{ + #pragma omp parallel + { + const hsize_t start = OmpUtils::ForIntervalStart(obj->rd->subsizes[1]); + const hsize_t end = OmpUtils::ForIntervalEnd(obj->rd->subsizes[1]); + for (int zindex = 0; zindex < obj->rd->subsizes[0] ; zindex++) + for (int yindex = start; yindex < ptrdiff_t(end); yindex++) + { + ptrdiff_t rindex = (zindex * obj->rd->subsizes[1] + yindex)*(obj->rd->subsizes[2]+2); + for (int xindex = 0; xindex < obj->rd->subsizes[2]; xindex++) + { + expression(rindex, xindex, yindex, zindex); + rindex++; + } + } + } } /*****************************************************************************/ diff --git a/bfps/cpp/full_code/NSVE.cpp b/bfps/cpp/full_code/NSVE.cpp new file mode 100644 index 0000000000000000000000000000000000000000..1e24c7af531e7184f75b1f14257d42b822db7a9c --- /dev/null +++ b/bfps/cpp/full_code/NSVE.cpp @@ -0,0 +1,139 @@ +#include <string> +#include <cmath> +#include "NSVE.hpp" +#include "scope_timer.hpp" + + +template <typename rnumber> +int NSVE<rnumber>::initialize(void) +{ + this->read_iteration(); + this->read_parameters(); + if (this->myrank == 0) + { + // set caching parameters + hid_t fapl = H5Pcreate(H5P_FILE_ACCESS); + herr_t cache_err = H5Pset_cache(fapl, 0, 521, 134217728, 1.0); + DEBUG_MSG("when setting stat_file cache I got %d\n", cache_err); + this->stat_file = H5Fopen( + (this->simname + ".h5").c_str(), + H5F_ACC_RDWR, + fapl); + } + int data_file_problem; + if (this->myrank == 0) + data_file_problem = this->grow_file_datasets(); + MPI_Bcast(&data_file_problem, 1, MPI_INT, 0, this->comm); + if (data_file_problem > 0) + { + std::cerr << + data_file_problem << + " problems growing file datasets.\ntrying to exit now." << + std::endl; + return EXIT_FAILURE; + } + this->fs = new vorticity_equation<rnumber, FFTW>( + simname.c_str(), + nx, ny, nz, + dkx, dky, dkz, + DEFAULT_FFTW_FLAG); + this->tmp_vec_field = new field<rnumber, FFTW, THREE>( + nx, ny, nz, + this->comm, + DEFAULT_FFTW_FLAG); + + + this->fs->checkpoints_per_file = checkpoints_per_file; + this->fs->nu = nu; + this->fs->fmode = fmode; + this->fs->famplitude = famplitude; + this->fs->fk0 = fk0; + this->fs->fk1 = fk1; + strncpy(this->fs->forcing_type, forcing_type, 128); + this->fs->iteration = this->iteration; + this->fs->checkpoint = this->checkpoint; + + this->fs->cvorticity->real_space_representation = false; + this->fs->io_checkpoint(); + + if (this->myrank == 0 && this->iteration == 0) + this->fs->kk->store(stat_file); + return EXIT_SUCCESS; +} + +template <typename rnumber> +int NSVE<rnumber>::step(void) +{ + this->fs->step(this->dt); + this->iteration = this->fs->iteration; + return EXIT_SUCCESS; +} + +template <typename rnumber> +int NSVE<rnumber>::write_checkpoint(void) +{ + this->fs->io_checkpoint(false); + this->checkpoint = this->fs->checkpoint; + this->write_iteration(); + return EXIT_SUCCESS; +} + +template <typename rnumber> +int NSVE<rnumber>::finalize(void) +{ + if (this->myrank == 0) + H5Fclose(this->stat_file); + delete this->fs; + delete this->tmp_vec_field; + return EXIT_SUCCESS; +} + +/** \brief Compute standard statistics for velocity and vorticity fields. + * + * IMPORTANT: at the end of this subroutine, `this->fs->cvelocity` contains + * the Fourier space representation of the velocity field, and + * `this->tmp_vec_field` contains the real space representation of the + * velocity field. + * This behavior is relied upon in the `NSVEparticles` class, so please + * don't break it. + */ + +template <typename rnumber> +int NSVE<rnumber>::do_stats() +{ + if (!(this->iteration % this->niter_stat == 0)) + return EXIT_SUCCESS; + hid_t stat_group; + if (this->myrank == 0) + stat_group = H5Gopen( + this->stat_file, + "statistics", + H5P_DEFAULT); + else + stat_group = 0; + + *tmp_vec_field = fs->cvorticity->get_cdata(); + tmp_vec_field->compute_stats( + fs->kk, + stat_group, + "vorticity", + fs->iteration / niter_stat, + max_vorticity_estimate/sqrt(3)); + + fs->compute_velocity(fs->cvorticity); + *tmp_vec_field = fs->cvelocity->get_cdata(); + tmp_vec_field->compute_stats( + fs->kk, + stat_group, + "velocity", + fs->iteration / niter_stat, + max_velocity_estimate/sqrt(3)); + + if (this->myrank == 0) + H5Gclose(stat_group); + return EXIT_SUCCESS; +} + +template class NSVE<float>; +template class NSVE<double>; + diff --git a/bfps/cpp/full_code/NSVE.hpp b/bfps/cpp/full_code/NSVE.hpp new file mode 100644 index 0000000000000000000000000000000000000000..d444b71ceb48ea19dc292a57cc91ac81157e15ed --- /dev/null +++ b/bfps/cpp/full_code/NSVE.hpp @@ -0,0 +1,78 @@ +/********************************************************************** +* * +* Copyright 2017 Max Planck Institute * +* for Dynamics and Self-Organization * +* * +* This file is part of bfps. * +* * +* bfps is free software: you can redistribute it and/or modify * +* it under the terms of the GNU General Public License as published * +* by the Free Software Foundation, either version 3 of the License, * +* or (at your option) any later version. * +* * +* bfps is distributed in the hope that it will be useful, * +* but WITHOUT ANY WARRANTY; without even the implied warranty of * +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * +* GNU General Public License for more details. * +* * +* You should have received a copy of the GNU General Public License * +* along with bfps. If not, see <http://www.gnu.org/licenses/> * +* * +* Contact: Cristian.Lalescu@ds.mpg.de * +* * +**********************************************************************/ + + + +#ifndef NSVE_HPP +#define NSVE_HPP + + + +#include <cstdlib> +#include "base.hpp" +#include "vorticity_equation.hpp" +#include "full_code/direct_numerical_simulation.hpp" + +template <typename rnumber> +class NSVE: public direct_numerical_simulation +{ + public: + + /* parameters that are read in read_parameters */ + double dt; + double famplitude; + double fk0; + double fk1; + int fmode; + char forcing_type[512]; + int histogram_bins; + double max_velocity_estimate; + double max_vorticity_estimate; + double nu; + + /* other stuff */ + vorticity_equation<rnumber, FFTW> *fs; + field<rnumber, FFTW, THREE> *tmp_vec_field; + field<rnumber, FFTW, ONE> *tmp_scal_field; + + + NSVE( + const MPI_Comm COMMUNICATOR, + const std::string &simulation_name): + direct_numerical_simulation( + COMMUNICATOR, + simulation_name){} + ~NSVE(){} + + int initialize(void); + int step(void); + int finalize(void); + + virtual int read_parameters(void); + int write_checkpoint(void); + int do_stats(void); +}; + +#endif//NSVE_HPP + diff --git a/bfps/cpp/full_code/NSVE_field_stats.cpp b/bfps/cpp/full_code/NSVE_field_stats.cpp new file mode 100644 index 0000000000000000000000000000000000000000..7e33acf93644208d292c5d8df66653f4bb7b806f --- /dev/null +++ b/bfps/cpp/full_code/NSVE_field_stats.cpp @@ -0,0 +1,93 @@ +#include <string> +#include <cmath> +#include "NSVE_field_stats.hpp" +#include "scope_timer.hpp" + + +template <typename rnumber> +int NSVE_field_stats<rnumber>::initialize(void) +{ + this->postprocess::read_parameters(); + this->vorticity = new field<rnumber, FFTW, THREE>( + nx, ny, nz, + this->comm, + DEFAULT_FFTW_FLAG); + this->vorticity->real_space_representation = false; + hid_t parameter_file = H5Fopen( + (this->simname + std::string(".h5")).c_str(), + H5F_ACC_RDONLY, + H5P_DEFAULT); + if (!H5Lexists(parameter_file, "field_dtype", H5P_DEFAULT)) + this->bin_IO = NULL; + else + { + hid_t dset = H5Dopen(parameter_file, "field_dtype", H5P_DEFAULT); + hid_t space = H5Dget_space(dset); + hid_t memtype = H5Dget_type(dset); + char *string_data = (char*)malloc(256); + H5Dread(dset, memtype, H5S_ALL, H5S_ALL, H5P_DEFAULT, &string_data); + // check that we're using the correct data type + // field_dtype SHOULD be something like "<f4", "<f8", ">f4", ">f8" + // first character is ordering, which is machine specific + // for the other two I am checking that they have the correct values + assert(string_data[1] == 'f'); + assert(string_data[2] == '0' + sizeof(rnumber)); + free(string_data); + H5Sclose(space); + H5Tclose(memtype); + H5Dclose(dset); + this->bin_IO = new field_binary_IO<rnumber, COMPLEX, THREE>( + this->vorticity->clayout->sizes, + this->vorticity->clayout->subsizes, + this->vorticity->clayout->starts, + this->vorticity->clayout->comm); + } + H5Fclose(parameter_file); + return EXIT_SUCCESS; +} + +template <typename rnumber> +int NSVE_field_stats<rnumber>::read_current_cvorticity(void) +{ + this->vorticity->real_space_representation = false; + if (this->bin_IO != NULL) + { + char itername[16]; + sprintf(itername, "i%.5x", this->iteration); + std::string native_binary_fname = ( + this->simname + + std::string("_cvorticity_") + + std::string(itername)); + this->bin_IO->read( + native_binary_fname, + this->vorticity->get_cdata()); + } + else + { + this->vorticity->io( + this->simname + std::string("_fields.h5"), + "vorticity", + this->iteration, + true); + } + return EXIT_SUCCESS; +} + +template <typename rnumber> +int NSVE_field_stats<rnumber>::finalize(void) +{ + if (this->bin_IO != NULL) + delete this->bin_IO; + delete this->vorticity; + return EXIT_SUCCESS; +} + +template <typename rnumber> +int NSVE_field_stats<rnumber>::work_on_current_iteration(void) +{ + return EXIT_SUCCESS; +} + +template class NSVE_field_stats<float>; +template class NSVE_field_stats<double>; + diff --git a/bfps/cpp/io_tools.cpp b/bfps/cpp/full_code/NSVE_field_stats.hpp similarity index 59% rename from bfps/cpp/io_tools.cpp rename to bfps/cpp/full_code/NSVE_field_stats.hpp index 224803dc4879ae7ed273bb443b9b465ed0ec0248..d544c0c7d5f4c75559e63ea3e59bf9457d4730c5 100644 --- a/bfps/cpp/io_tools.cpp +++ b/bfps/cpp/full_code/NSVE_field_stats.hpp @@ -1,6 +1,6 @@ /********************************************************************** * * -* Copyright 2015 Max Planck Institute * +* Copyright 2017 Max Planck Institute * * for Dynamics and Self-Organization * * * * This file is part of bfps. * @@ -24,39 +24,40 @@ -#include <typeinfo> -#include <cassert> -#include "io_tools.hpp" +#ifndef NSVE_FIELD_STATS_HPP +#define NSVE_FIELD_STATS_HPP +#include <cstdlib> +#include <sys/types.h> +#include <sys/stat.h> +#include <vector> +#include "base.hpp" +#include "field.hpp" +#include "field_binary_IO.hpp" +#include "full_code/postprocess.hpp" -template <typename number> -std::vector<number> read_vector( - hid_t group, - std::string dset_name) +template <typename rnumber> +class NSVE_field_stats: public postprocess { - std::vector<number> result; - hsize_t vector_length; - // first, read size of array - hid_t dset, dspace; - hid_t mem_dtype; - if (typeid(number) == typeid(int)) - mem_dtype = H5Tcopy(H5T_NATIVE_INT); - else if (typeid(number) == typeid(double)) - mem_dtype = H5Tcopy(H5T_NATIVE_DOUBLE); - dset = H5Dopen(group, dset_name.c_str(), H5P_DEFAULT); - dspace = H5Dget_space(dset); - assert(H5Sget_simple_extent_ndims(dspace) == 1); - H5Sget_simple_extent_dims(dspace, &vector_length, NULL); - result.resize(vector_length); - H5Dread(dset, mem_dtype, H5S_ALL, H5S_ALL, H5P_DEFAULT, &result.front()); - H5Sclose(dspace); - H5Dclose(dset); - H5Tclose(mem_dtype); - return result; -} - -template std::vector<int> read_vector( - hid_t, std::string); -template std::vector<double> read_vector( - hid_t, std::string); + private: + field_binary_IO<rnumber, COMPLEX, THREE> *bin_IO; + public: + field<rnumber, FFTW, THREE> *vorticity; + + NSVE_field_stats( + const MPI_Comm COMMUNICATOR, + const std::string &simulation_name): + postprocess( + COMMUNICATOR, + simulation_name){} + virtual ~NSVE_field_stats(){} + + virtual int initialize(void); + virtual int work_on_current_iteration(void); + virtual int finalize(void); + + int read_current_cvorticity(void); +}; + +#endif//NSVE_FIELD_STATS_HPP diff --git a/bfps/cpp/full_code/NSVE_no_output.hpp b/bfps/cpp/full_code/NSVE_no_output.hpp new file mode 100644 index 0000000000000000000000000000000000000000..0047a45a02dd58ae8934f78fdd8d804424ae817c --- /dev/null +++ b/bfps/cpp/full_code/NSVE_no_output.hpp @@ -0,0 +1,25 @@ +#ifndef NSVE_NO_OUTPUT_HPP +#define NSVE_NO_OUTPUT_HPP + +#include "full_code/NSVE.hpp" + +template <typename rnumber> +class NSVE_no_output: public NSVE<rnumber> +{ + public: + NSVE_no_output( + const MPI_Comm COMMUNICATOR, + const std::string &simulation_name): + NSVE<rnumber>( + COMMUNICATOR, + simulation_name){} + ~NSVE_no_output(){} + int write_checkpoint(void) + { + return 0; + } + int read_parameters(void); +}; + +#endif//NSVE_NO_OUTPUT_HPP + diff --git a/bfps/cpp/full_code/NSVEparticles.cpp b/bfps/cpp/full_code/NSVEparticles.cpp new file mode 100644 index 0000000000000000000000000000000000000000..ba84b3943d579965836f05af2447722e273f2dc3 --- /dev/null +++ b/bfps/cpp/full_code/NSVEparticles.cpp @@ -0,0 +1,102 @@ +#include <string> +#include <cmath> +#include "NSVEparticles.hpp" +#include "scope_timer.hpp" +#include "particles/particles_sampling.hpp" + +template <typename rnumber> +int NSVEparticles<rnumber>::initialize(void) +{ + this->NSVE<rnumber>::initialize(); + + this->ps = particles_system_builder( + this->fs->cvelocity, // (field object) + this->fs->kk, // (kspace object, contains dkx, dky, dkz) + tracers0_integration_steps, // to check coherency between parameters and hdf input file (nb rhs) + (long long int)nparticles, // to check coherency between parameters and hdf input file + this->fs->get_current_fname(), // particles input filename + std::string("/tracers0/state/") + std::to_string(this->fs->iteration), // dataset name for initial input + std::string("/tracers0/rhs/") + std::to_string(this->fs->iteration), // dataset name for initial input + tracers0_neighbours, // parameter (interpolation no neighbours) + tracers0_smoothness, // parameter + this->comm, + this->fs->iteration+1); + this->particles_output_writer_mpi = new particles_output_hdf5< + long long int, double, 3, 3>( + MPI_COMM_WORLD, + "tracers0", + nparticles, + tracers0_integration_steps); + return EXIT_SUCCESS; +} + +template <typename rnumber> +int NSVEparticles<rnumber>::step(void) +{ + this->fs->compute_velocity(this->fs->cvorticity); + this->fs->cvelocity->ift(); + this->ps->completeLoop(this->dt); + this->NSVE<rnumber>::step(); + return EXIT_SUCCESS; +} + +template <typename rnumber> +int NSVEparticles<rnumber>::write_checkpoint(void) +{ + this->NSVE<rnumber>::write_checkpoint(); + this->particles_output_writer_mpi->open_file(this->fs->get_current_fname()); + this->particles_output_writer_mpi->save( + this->ps->getParticlesPositions(), + this->ps->getParticlesRhs(), + this->ps->getParticlesIndexes(), + this->ps->getLocalNbParticles(), + this->fs->iteration); + this->particles_output_writer_mpi->close_file(); + return EXIT_SUCCESS; +} + +template <typename rnumber> +int NSVEparticles<rnumber>::finalize(void) +{ + this->NSVE<rnumber>::finalize(); + this->ps.release(); + delete this->particles_output_writer_mpi; + return EXIT_SUCCESS; +} + +/** \brief Compute fluid stats and sample fields at particle locations. + */ + +template <typename rnumber> +int NSVEparticles<rnumber>::do_stats() +{ + /// fluid stats go here + this->NSVE<rnumber>::do_stats(); + + + if (!(this->iteration % this->niter_part == 0)) + return EXIT_SUCCESS; + + /// sample velocity + sample_from_particles_system(*this->tmp_vec_field, // field to save + this->ps, + (this->simname + "_particles.h5"), // filename + "tracers0", // hdf5 parent group + "velocity" // dataset basename TODO + ); + + /// compute acceleration and sample it + this->fs->compute_Lagrangian_acceleration(this->tmp_vec_field); + this->tmp_vec_field->ift(); + sample_from_particles_system(*this->tmp_vec_field, + this->ps, + (this->simname + "_particles.h5"), + "tracers0", + "acceleration"); + + return EXIT_SUCCESS; +} + +template class NSVEparticles<float>; +template class NSVEparticles<double>; + diff --git a/bfps/cpp/full_code/NSVEparticles.hpp b/bfps/cpp/full_code/NSVEparticles.hpp new file mode 100644 index 0000000000000000000000000000000000000000..ccafe6eeb09d27a6b211cfd75ecfba4fc5abe92b --- /dev/null +++ b/bfps/cpp/full_code/NSVEparticles.hpp @@ -0,0 +1,81 @@ +/********************************************************************** +* * +* Copyright 2017 Max Planck Institute * +* for Dynamics and Self-Organization * +* * +* This file is part of bfps. * +* * +* bfps is free software: you can redistribute it and/or modify * +* it under the terms of the GNU General Public License as published * +* by the Free Software Foundation, either version 3 of the License, * +* or (at your option) any later version. * +* * +* bfps is distributed in the hope that it will be useful, * +* but WITHOUT ANY WARRANTY; without even the implied warranty of * +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * +* GNU General Public License for more details. * +* * +* You should have received a copy of the GNU General Public License * +* along with bfps. If not, see <http://www.gnu.org/licenses/> * +* * +* Contact: Cristian.Lalescu@ds.mpg.de * +* * +**********************************************************************/ + + + +#ifndef NSVEPARTICLES_HPP +#define NSVEPARTICLES_HPP + + + +#include <cstdlib> +#include "base.hpp" +#include "vorticity_equation.hpp" +#include "full_code/NSVE.hpp" +#include "particles/particles_system_builder.hpp" +#include "particles/particles_output_hdf5.hpp" + +/** \brief Navier-Stokes solver that includes simple Lagrangian tracers. + * + * Child of Navier Stokes vorticity equation solver, this class calls all the + * methods from `NSVE`, and in addition integrates simple Lagrangian tracers + * in the resulting velocity field. + */ + +template <typename rnumber> +class NSVEparticles: public NSVE<rnumber> +{ + public: + + /* parameters that are read in read_parameters */ + int niter_part; + int nparticles; + int tracers0_integration_steps; + int tracers0_neighbours; + int tracers0_smoothness; + + /* other stuff */ + std::unique_ptr<abstract_particles_system<long long int, double>> ps; + particles_output_hdf5<long long int, double,3,3> *particles_output_writer_mpi; + + + NSVEparticles( + const MPI_Comm COMMUNICATOR, + const std::string &simulation_name): + NSVE<rnumber>( + COMMUNICATOR, + simulation_name){} + ~NSVEparticles(){} + + int initialize(void); + int step(void); + int finalize(void); + + int read_parameters(void); + int write_checkpoint(void); + int do_stats(void); +}; + +#endif//NSVEPARTICLES_HPP + diff --git a/bfps/cpp/full_code/NSVEparticles_no_output.hpp b/bfps/cpp/full_code/NSVEparticles_no_output.hpp new file mode 100644 index 0000000000000000000000000000000000000000..264fd75ac9b0628aff167d018d888030b7029a35 --- /dev/null +++ b/bfps/cpp/full_code/NSVEparticles_no_output.hpp @@ -0,0 +1,25 @@ +#ifndef NSVEPARTICLES_NO_OUTPUT_HPP +#define NSVEPARTICLES_NO_OUTPUT_HPP + +#include "full_code/NSVEparticles.hpp" + +template <typename rnumber> +class NSVEparticles_no_output: public NSVEparticles<rnumber> +{ + public: + NSVEparticles_no_output( + const MPI_Comm COMMUNICATOR, + const std::string &simulation_name): + NSVEparticles<rnumber>( + COMMUNICATOR, + simulation_name){} + ~NSVEparticles_no_output(){} + int write_checkpoint(void) + { + return 0; + } + int read_parameters(void); +}; + +#endif//NSVEPARTICLES_NO_OUTPUT_HPP + diff --git a/bfps/cpp/full_code/code_base.cpp b/bfps/cpp/full_code/code_base.cpp new file mode 100644 index 0000000000000000000000000000000000000000..1b06fe8e66a4180034b9f6a494a1a432ae5ea3f9 --- /dev/null +++ b/bfps/cpp/full_code/code_base.cpp @@ -0,0 +1,36 @@ +#include "code_base.hpp" +#include "scope_timer.hpp" + +code_base::code_base( + const MPI_Comm COMMUNICATOR, + const std::string &simulation_name): + comm(COMMUNICATOR), + simname(simulation_name) +{ + MPI_Comm_rank(this->comm, &this->myrank); + MPI_Comm_size(this->comm, &this->nprocs); + this->stop_code_now = false; +} + +int code_base::check_stopping_condition(void) +{ + if (myrank == 0) + { + std::string fname = ( + std::string("stop_") + + std::string(this->simname)); + { + struct stat file_buffer; + this->stop_code_now = ( + stat(fname.c_str(), &file_buffer) == 0); + } + } + MPI_Bcast( + &this->stop_code_now, + 1, + MPI_C_BOOL, + 0, + MPI_COMM_WORLD); + return EXIT_SUCCESS; +} + diff --git a/bfps/cpp/full_code/code_base.hpp b/bfps/cpp/full_code/code_base.hpp new file mode 100644 index 0000000000000000000000000000000000000000..cf0521e2b7383edf925e1129d4fa4a931a55efe4 --- /dev/null +++ b/bfps/cpp/full_code/code_base.hpp @@ -0,0 +1,117 @@ +/********************************************************************** +* * +* Copyright 2017 Max Planck Institute * +* for Dynamics and Self-Organization * +* * +* This file is part of bfps. * +* * +* bfps is free software: you can redistribute it and/or modify * +* it under the terms of the GNU General Public License as published * +* by the Free Software Foundation, either version 3 of the License, * +* or (at your option) any later version. * +* * +* bfps is distributed in the hope that it will be useful, * +* but WITHOUT ANY WARRANTY; without even the implied warranty of * +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * +* GNU General Public License for more details. * +* * +* You should have received a copy of the GNU General Public License * +* along with bfps. If not, see <http://www.gnu.org/licenses/> * +* * +* Contact: Cristian.Lalescu@ds.mpg.de * +* * +**********************************************************************/ + + + +#ifndef CODE_BASE_HPP +#define CODE_BASE_HPP + +#include <cstdlib> +#include <sys/types.h> +#include <sys/stat.h> +#include "base.hpp" + +/** \class code_base + * \brief Defines basic timer and method to check stopping condition. + * + * Any computational task will consist of a loop over temporal snapshots, be it + * a simulation or a postprocessing job. + * This class declares the three required methods (initialize, step and finalize + * functionalities). + * Implementation should be done in children classes, since it will be different + * for simulations or postprocessing jobs. + * + * What the class actually implements is a basic timer (calls to system clock), + * and a method to check for a stopping condition. + * These are meant to be used by children classes as needed. + */ + +class code_base +{ + private: + clock_t time0, time1; + public: + int myrank, nprocs; + MPI_Comm comm; + + std::string simname; + int iteration; + + bool stop_code_now; + + int nx; + int ny; + int nz; + int dealias_type; + double dkx; + double dky; + double dkz; + + code_base( + const MPI_Comm COMMUNICATOR, + const std::string &simulation_name); + virtual ~code_base(){} + + int check_stopping_condition(void); + + int start_simple_timer(void) + { + this->time0 = clock(); + return EXIT_SUCCESS; + } + + int print_simple_timer( + const std::string operation_name) + { + this->time1 = clock(); + double local_time_difference = (( + (unsigned int)(this->time1 - this->time0)) / + ((double)CLOCKS_PER_SEC)); + double time_difference = 0.0; + MPI_Allreduce( + &local_time_difference, + &time_difference, + 1, + MPI_DOUBLE, + MPI_SUM, + MPI_COMM_WORLD); + if (this->myrank == 0) + std::cout << operation_name << + " took " << time_difference/this->nprocs << + " seconds" << std::endl; + if (this->myrank == 0) + std::cerr << operation_name << + " took " << time_difference/this->nprocs << + " seconds" << std::endl; + this->time0 = this->time1; + return EXIT_SUCCESS; + } + + virtual int initialize(void) = 0; + virtual int main_loop(void) = 0; + virtual int finalize(void) = 0; +}; + +#endif//CODE_BASE_HPP + diff --git a/bfps/cpp/full_code/codes_with_no_output.hpp b/bfps/cpp/full_code/codes_with_no_output.hpp new file mode 100644 index 0000000000000000000000000000000000000000..f4cd3b5495ecb432653a7027bcaa330954865d21 --- /dev/null +++ b/bfps/cpp/full_code/codes_with_no_output.hpp @@ -0,0 +1,9 @@ +#ifndef CODES_WITH_NO_OUTPUT_HPP +#define CODES_WITH_NO_OUTPUT_HPP + +#include "full_code/NSVE_no_output.hpp" +#include "full_code/NSVEparticles_no_output.hpp" + + +#endif//CODES_WITH_NO_OUTPUT_HPP + diff --git a/bfps/cpp/full_code/direct_numerical_simulation.cpp b/bfps/cpp/full_code/direct_numerical_simulation.cpp new file mode 100644 index 0000000000000000000000000000000000000000..edc2f99497a21368c63348167190dc6c64b44712 --- /dev/null +++ b/bfps/cpp/full_code/direct_numerical_simulation.cpp @@ -0,0 +1,119 @@ +#include <cstdlib> +#include <sys/types.h> +#include <sys/stat.h> +#include "direct_numerical_simulation.hpp" +#include "scope_timer.hpp" +#include "hdf5_tools.hpp" + + +int direct_numerical_simulation::grow_file_datasets() +{ + return hdf5_tools::grow_file_datasets( + this->stat_file, + "statistics", + this->niter_todo / this->niter_stat); +} + +int direct_numerical_simulation::read_iteration(void) +{ + /* read iteration */ + hid_t dset; + hid_t iteration_file = H5Fopen( + (this->simname + std::string(".h5")).c_str(), + H5F_ACC_RDONLY, + H5P_DEFAULT); + dset = H5Dopen( + iteration_file, + "iteration", + H5P_DEFAULT); + H5Dread( + dset, + H5T_NATIVE_INT, + H5S_ALL, + H5S_ALL, + H5P_DEFAULT, + &this->iteration); + H5Dclose(dset); + dset = H5Dopen( + iteration_file, + "checkpoint", + H5P_DEFAULT); + H5Dread( + dset, + H5T_NATIVE_INT, + H5S_ALL, + H5S_ALL, + H5P_DEFAULT, + &this->checkpoint); + H5Dclose(dset); + H5Fclose(iteration_file); + DEBUG_MSG("simname is %s, iteration is %d and checkpoint is %d\n", + this->simname.c_str(), + this->iteration, + this->checkpoint); + return EXIT_SUCCESS; +} + +int direct_numerical_simulation::write_iteration(void) +{ + if (this->myrank == 0) + { + hid_t dset = H5Dopen( + this->stat_file, + "iteration", + H5P_DEFAULT); + H5Dwrite( + dset, + H5T_NATIVE_INT, + H5S_ALL, + H5S_ALL, + H5P_DEFAULT, + &this->iteration); + H5Dclose(dset); + dset = H5Dopen( + this->stat_file, + "checkpoint", + H5P_DEFAULT); + H5Dwrite( + dset, + H5T_NATIVE_INT, + H5S_ALL, + H5S_ALL, + H5P_DEFAULT, + &this->checkpoint); + H5Dclose(dset); + } + return EXIT_SUCCESS; +} + +int direct_numerical_simulation::main_loop(void) +{ + this->start_simple_timer(); + int max_iter = (this->iteration + this->niter_todo - + (this->iteration % this->niter_todo)); + for (; this->iteration < max_iter;) + { + #ifdef USE_TIMINGOUTPUT + const std::string loopLabel = ("code::main_start::loop-" + + std::to_string(this->iteration)); + TIMEZONE(loopLabel.c_str()); + #endif + this->do_stats(); + + this->step(); + if (this->iteration % this->niter_out == 0) + this->write_checkpoint(); + this->print_simple_timer( + "iteration " + std::to_string(this->iteration)); + this->check_stopping_condition(); + if (this->stop_code_now) + break; + } + this->do_stats(); + this->print_simple_timer( + "final call to do_stats "); + if (this->iteration % this->niter_out != 0) + this->write_checkpoint(); + return EXIT_SUCCESS; +} + diff --git a/bfps/cpp/full_code/direct_numerical_simulation.hpp b/bfps/cpp/full_code/direct_numerical_simulation.hpp new file mode 100644 index 0000000000000000000000000000000000000000..8050bb045b29acf29d655273f7dff310dd10d0fa --- /dev/null +++ b/bfps/cpp/full_code/direct_numerical_simulation.hpp @@ -0,0 +1,67 @@ +/********************************************************************** +* * +* Copyright 2017 Max Planck Institute * +* for Dynamics and Self-Organization * +* * +* This file is part of bfps. * +* * +* bfps is free software: you can redistribute it and/or modify * +* it under the terms of the GNU General Public License as published * +* by the Free Software Foundation, either version 3 of the License, * +* or (at your option) any later version. * +* * +* bfps is distributed in the hope that it will be useful, * +* but WITHOUT ANY WARRANTY; without even the implied warranty of * +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * +* GNU General Public License for more details. * +* * +* You should have received a copy of the GNU General Public License * +* along with bfps. If not, see <http://www.gnu.org/licenses/> * +* * +* Contact: Cristian.Lalescu@ds.mpg.de * +* * +**********************************************************************/ + + + +#ifndef DIRECT_NUMERICAL_SIMULATION_HPP +#define DIRECT_NUMERICAL_SIMULATION_HPP + +#include <cstdlib> +#include <sys/types.h> +#include <sys/stat.h> +#include "base.hpp" +#include "full_code/code_base.hpp" + +class direct_numerical_simulation: public code_base +{ + public: + int checkpoint; + int checkpoints_per_file; + int niter_out; + int niter_stat; + int niter_todo; + hid_t stat_file; + + direct_numerical_simulation( + const MPI_Comm COMMUNICATOR, + const std::string &simulation_name): + code_base( + COMMUNICATOR, + simulation_name){} + virtual ~direct_numerical_simulation(){} + + virtual int write_checkpoint(void) = 0; + virtual int initialize(void) = 0; + virtual int step(void) = 0; + virtual int do_stats(void) = 0; + virtual int finalize(void) = 0; + + int main_loop(void); + int read_iteration(void); + int write_iteration(void); + int grow_file_datasets(void); +}; + +#endif//DIRECT_NUMERICAL_SIMULATION_HPP + diff --git a/bfps/cpp/full_code/get_rfields.cpp b/bfps/cpp/full_code/get_rfields.cpp new file mode 100644 index 0000000000000000000000000000000000000000..0df8b564a61fba11118ef3f551b0a2db6cbfec1d --- /dev/null +++ b/bfps/cpp/full_code/get_rfields.cpp @@ -0,0 +1,93 @@ +#include <string> +#include <cmath> +#include "get_rfields.hpp" +#include "scope_timer.hpp" + + +template <typename rnumber> +int get_rfields<rnumber>::initialize(void) +{ + this->NSVE_field_stats<rnumber>::initialize(); + this->kk = new kspace<FFTW, SMOOTH>( + this->vorticity->clayout, this->dkx, this->dky, this->dkz); + hid_t parameter_file = H5Fopen( + (this->simname + std::string(".h5")).c_str(), + H5F_ACC_RDONLY, + H5P_DEFAULT); + hid_t dset = H5Dopen(parameter_file, "/parameters/niter_out", H5P_DEFAULT); + H5Dread(dset, H5T_NATIVE_INT, H5S_ALL, H5S_ALL, H5P_DEFAULT, &this->niter_out); + H5Dclose(dset); + if (H5Lexists(parameter_file, "/parameters/checkpoints_per_file", H5P_DEFAULT)) + { + dset = H5Dopen(parameter_file, "/parameters/checkpoints_per_file", H5P_DEFAULT); + H5Dread(dset, H5T_NATIVE_INT, H5S_ALL, H5S_ALL, H5P_DEFAULT, &this->checkpoints_per_file); + H5Dclose(dset); + } + else + this->checkpoints_per_file = 1; + this->iteration_list = hdf5_tools::read_vector<int>( + parameter_file, + "/get_rfields/iteration_list"); + H5Fclose(parameter_file); + return EXIT_SUCCESS; +} + +template <typename rnumber> +int get_rfields<rnumber>::work_on_current_iteration(void) +{ + DEBUG_MSG("entered get_rfields::work_on_current_iteration\n"); + this->read_current_cvorticity(); + field<rnumber, FFTW, THREE> *vel = new field<rnumber, FFTW, THREE>( + this->nx, this->ny, this->nz, + this->comm, + this->vorticity->fftw_plan_rigor); + + vel->real_space_representation = false; + this->kk->CLOOP_K2( + [&](ptrdiff_t cindex, + ptrdiff_t xindex, + ptrdiff_t yindex, + ptrdiff_t zindex, + double k2){ + if (k2 <= this->kk->kM2 && k2 > 0) + { + vel->cval(cindex,0,0) = -(this->kk->ky[yindex]*this->vorticity->cval(cindex,2,1) - this->kk->kz[zindex]*this->vorticity->cval(cindex,1,1)) / k2; + vel->cval(cindex,0,1) = (this->kk->ky[yindex]*this->vorticity->cval(cindex,2,0) - this->kk->kz[zindex]*this->vorticity->cval(cindex,1,0)) / k2; + vel->cval(cindex,1,0) = -(this->kk->kz[zindex]*this->vorticity->cval(cindex,0,1) - this->kk->kx[xindex]*this->vorticity->cval(cindex,2,1)) / k2; + vel->cval(cindex,1,1) = (this->kk->kz[zindex]*this->vorticity->cval(cindex,0,0) - this->kk->kx[xindex]*this->vorticity->cval(cindex,2,0)) / k2; + vel->cval(cindex,2,0) = -(this->kk->kx[xindex]*this->vorticity->cval(cindex,1,1) - this->kk->ky[yindex]*this->vorticity->cval(cindex,0,1)) / k2; + vel->cval(cindex,2,1) = (this->kk->kx[xindex]*this->vorticity->cval(cindex,1,0) - this->kk->ky[yindex]*this->vorticity->cval(cindex,0,0)) / k2; + } + else + std::fill_n((rnumber*)(vel->get_cdata()+3*cindex), 6, 0.0); + } + ); + vel->symmetrize(); + vel->ift(); + + std::string fname = ( + this->simname + + std::string("_checkpoint_") + + std::to_string(this->iteration / (this->niter_out*this->checkpoints_per_file)) + + std::string(".h5")); + vel->io( + fname, + "velocity", + this->iteration, + false); + + delete vel; + return EXIT_SUCCESS; +} + +template <typename rnumber> +int get_rfields<rnumber>::finalize(void) +{ + delete this->kk; + this->NSVE_field_stats<rnumber>::finalize(); + return EXIT_SUCCESS; +} + +template class get_rfields<float>; +template class get_rfields<double>; + diff --git a/bfps/cpp/full_code/get_rfields.hpp b/bfps/cpp/full_code/get_rfields.hpp new file mode 100644 index 0000000000000000000000000000000000000000..ae669aa3012ce492835a9737ca443cdc846e00ba --- /dev/null +++ b/bfps/cpp/full_code/get_rfields.hpp @@ -0,0 +1,61 @@ +/********************************************************************** +* * +* Copyright 2017 Max Planck Institute * +* for Dynamics and Self-Organization * +* * +* This file is part of bfps. * +* * +* bfps is free software: you can redistribute it and/or modify * +* it under the terms of the GNU General Public License as published * +* by the Free Software Foundation, either version 3 of the License, * +* or (at your option) any later version. * +* * +* bfps is distributed in the hope that it will be useful, * +* but WITHOUT ANY WARRANTY; without even the implied warranty of * +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * +* GNU General Public License for more details. * +* * +* You should have received a copy of the GNU General Public License * +* along with bfps. If not, see <http://www.gnu.org/licenses/> * +* * +* Contact: Cristian.Lalescu@ds.mpg.de * +* * +**********************************************************************/ + + + +#ifndef GET_RFIELDS_HPP +#define GET_RFIELDS_HPP + +#include <cstdlib> +#include <sys/types.h> +#include <sys/stat.h> +#include <vector> +#include "base.hpp" +#include "field.hpp" +#include "field_binary_IO.hpp" +#include "full_code/NSVE_field_stats.hpp" + +template <typename rnumber> +class get_rfields: public NSVE_field_stats<rnumber> +{ + public: + int checkpoints_per_file; + int niter_out; + kspace<FFTW, SMOOTH> *kk; + + get_rfields( + const MPI_Comm COMMUNICATOR, + const std::string &simulation_name): + NSVE_field_stats<rnumber>( + COMMUNICATOR, + simulation_name){} + virtual ~get_rfields(){} + + int initialize(void); + int work_on_current_iteration(void); + int finalize(void); +}; + +#endif//GET_RFIELDS_HPP + diff --git a/bfps/cpp/full_code/main_code.hpp b/bfps/cpp/full_code/main_code.hpp new file mode 100644 index 0000000000000000000000000000000000000000..cae34b69b18b4f2550f5b6e5f28b48a659ea75f1 --- /dev/null +++ b/bfps/cpp/full_code/main_code.hpp @@ -0,0 +1,173 @@ +/********************************************************************** +* * +* Copyright 2017 Max Planck Institute * +* for Dynamics and Self-Organization * +* * +* This file is part of bfps. * +* * +* bfps is free software: you can redistribute it and/or modify * +* it under the terms of the GNU General Public License as published * +* by the Free Software Foundation, either version 3 of the License, * +* or (at your option) any later version. * +* * +* bfps is distributed in the hope that it will be useful, * +* but WITHOUT ANY WARRANTY; without even the implied warranty of * +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * +* GNU General Public License for more details. * +* * +* You should have received a copy of the GNU General Public License * +* along with bfps. If not, see <http://www.gnu.org/licenses/> * +* * +* Contact: Cristian.Lalescu@ds.mpg.de * +* * +**********************************************************************/ + + + +#ifndef MAIN_CODE_HPP +#define MAIN_CODE_HPP + + + +#include <cfenv> +#include <string> +#include <iostream> +#include "base.hpp" +#include "field.hpp" +#include "scope_timer.hpp" + +int myrank, nprocs; + +template <class DNS> +int main_code( + int argc, + char *argv[], + const bool floating_point_exceptions) +{ + /* floating point exception switch */ + if (floating_point_exceptions) + feenableexcept(FE_INVALID | FE_OVERFLOW); + else + // using std::cerr because DEBUG_MSG requires myrank to be defined + std::cerr << "FPE have been turned OFF" << std::endl; + + if (argc != 2) + { + std::cerr << + "Wrong number of command line arguments. Stopping." << + std::endl; + MPI_Init(&argc, &argv); + MPI_Finalize(); + return EXIT_SUCCESS; + } + std::string simname = std::string(argv[1]); + + + /* initialize MPI environment */ +#ifdef NO_FFTWOMP + MPI_Init(&argc, &argv); + MPI_Comm_rank(MPI_COMM_WORLD, &myrank); + MPI_Comm_size(MPI_COMM_WORLD, &nprocs); + fftw_mpi_init(); + fftwf_mpi_init(); + DEBUG_MSG("There are %d processes\n", nprocs); +#else + int mpiprovided; + MPI_Init_thread(&argc, &argv, MPI_THREAD_FUNNELED, &mpiprovided); + assert(mpiprovided >= MPI_THREAD_FUNNELED); + MPI_Comm_rank(MPI_COMM_WORLD, &myrank); + MPI_Comm_size(MPI_COMM_WORLD, &nprocs); + const int nThreads = omp_get_max_threads(); + DEBUG_MSG("Number of threads for the FFTW = %d\n", + nThreads); + if (nThreads > 1){ + fftw_init_threads(); + fftwf_init_threads(); + } + fftw_mpi_init(); + fftwf_mpi_init(); + DEBUG_MSG("There are %d processes and %d threads\n", + nprocs, + nThreads); + if (nThreads > 1){ + fftw_plan_with_nthreads(nThreads); + fftwf_plan_with_nthreads(nThreads); + } +#endif + + + + /* import fftw wisdom */ + if (myrank == 0) + fftwf_import_wisdom_from_filename( + (simname + std::string("_fftw_wisdom.txt")).c_str()); + fftwf_mpi_broadcast_wisdom(MPI_COMM_WORLD); + + + + /* actually run DNS */ + /* + * MPI environment: + * I could in principle pass myrank and nprocs instead of the global + * communicator, but it is possible that we'd like to do something more + * complex in the future (since I've done it in the past), and it's not + * expensive to keep several copies of myrank and nprocs. + * + * usage of assert: + * we could use assert here, but I assume that any problems we can still + * recover from should not be important enough to not clean up fftw and MPI + * things. + */ + DNS *dns = new DNS( + MPI_COMM_WORLD, + simname); + int return_value; + return_value = dns->initialize(); + if (return_value == EXIT_SUCCESS) + return_value = dns->main_loop(); + else + DEBUG_MSG("problem calling dns->initialize(), return value is %d", + return_value); + if (return_value == EXIT_SUCCESS) + return_value = dns->finalize(); + else + DEBUG_MSG("problem calling dns->main_loop(), return value is %d", + return_value); + if (return_value != EXIT_SUCCESS) + DEBUG_MSG("problem calling dns->finalize(), return value is %d", + return_value); + + delete dns; + + + + /* export fftw wisdom */ + fftwf_mpi_gather_wisdom(MPI_COMM_WORLD); + MPI_Barrier(MPI_COMM_WORLD); + if (myrank == 0) + fftwf_export_wisdom_to_filename( + (simname + std::string("_fftw_wisdom.txt")).c_str()); + + + + /* clean up */ + fftwf_mpi_cleanup(); + fftw_mpi_cleanup(); +#ifndef NO_FFTWOMP + if (nThreads > 1){ + fftw_cleanup_threads(); + fftwf_cleanup_threads(); + } +#endif +#ifdef USE_TIMINGOUTPUT + global_timer_manager.show(MPI_COMM_WORLD); + global_timer_manager.showHtml(MPI_COMM_WORLD); +#endif + + MPI_Finalize(); + return EXIT_SUCCESS; +} + + +#endif//MAIN_CODE_HPP + diff --git a/bfps/cpp/full_code/native_binary_to_hdf5.cpp b/bfps/cpp/full_code/native_binary_to_hdf5.cpp new file mode 100644 index 0000000000000000000000000000000000000000..7774e2dea9012394c389858038e8ca82674256d7 --- /dev/null +++ b/bfps/cpp/full_code/native_binary_to_hdf5.cpp @@ -0,0 +1,70 @@ +#include <string> +#include <cmath> +#include "native_binary_to_hdf5.hpp" +#include "scope_timer.hpp" + + +template <typename rnumber> +int native_binary_to_hdf5<rnumber>::initialize(void) +{ + this->read_parameters(); + this->vec_field = new field<rnumber, FFTW, THREE>( + nx, ny, nz, + this->comm, + DEFAULT_FFTW_FLAG); + this->vec_field->real_space_representation = false; + this->bin_IO = new field_binary_IO<rnumber, COMPLEX, THREE>( + this->vec_field->clayout->sizes, + this->vec_field->clayout->subsizes, + this->vec_field->clayout->starts, + this->vec_field->clayout->comm); + return EXIT_SUCCESS; +} + +template <typename rnumber> +int native_binary_to_hdf5<rnumber>::work_on_current_iteration(void) +{ + char itername[16]; + sprintf(itername, "i%.5x", this->iteration); + std::string native_binary_fname = ( + this->simname + + std::string("_cvorticity_") + + std::string(itername)); + this->bin_IO->read( + native_binary_fname, + this->vec_field->get_cdata()); + this->vec_field->io( + (native_binary_fname + + std::string(".h5")), + "vorticity", + this->iteration, + false); + return EXIT_SUCCESS; +} + +template <typename rnumber> +int native_binary_to_hdf5<rnumber>::finalize(void) +{ + delete this->bin_IO; + delete this->vec_field; + return EXIT_SUCCESS; +} + +template <typename rnumber> +int native_binary_to_hdf5<rnumber>::read_parameters(void) +{ + this->postprocess::read_parameters(); + hid_t parameter_file = H5Fopen( + (this->simname + std::string(".h5")).c_str(), + H5F_ACC_RDONLY, + H5P_DEFAULT); + this->iteration_list = hdf5_tools::read_vector<int>( + parameter_file, + "/native_binary_to_hdf5/iteration_list"); + H5Fclose(parameter_file); + return EXIT_SUCCESS; +} + +template class native_binary_to_hdf5<float>; +template class native_binary_to_hdf5<double>; + diff --git a/bfps/cpp/full_code/native_binary_to_hdf5.hpp b/bfps/cpp/full_code/native_binary_to_hdf5.hpp new file mode 100644 index 0000000000000000000000000000000000000000..35619952a754ec9680b7681fd51d7bff862ac36a --- /dev/null +++ b/bfps/cpp/full_code/native_binary_to_hdf5.hpp @@ -0,0 +1,62 @@ +/********************************************************************** +* * +* Copyright 2017 Max Planck Institute * +* for Dynamics and Self-Organization * +* * +* This file is part of bfps. * +* * +* bfps is free software: you can redistribute it and/or modify * +* it under the terms of the GNU General Public License as published * +* by the Free Software Foundation, either version 3 of the License, * +* or (at your option) any later version. * +* * +* bfps is distributed in the hope that it will be useful, * +* but WITHOUT ANY WARRANTY; without even the implied warranty of * +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * +* GNU General Public License for more details. * +* * +* You should have received a copy of the GNU General Public License * +* along with bfps. If not, see <http://www.gnu.org/licenses/> * +* * +* Contact: Cristian.Lalescu@ds.mpg.de * +* * +**********************************************************************/ + + + +#ifndef NATIVE_BINARY_TO_HDF5_HPP +#define NATIVE_BINARY_TO_HDF5_HPP + +#include <cstdlib> +#include <sys/types.h> +#include <sys/stat.h> +#include <vector> +#include "base.hpp" +#include "field.hpp" +#include "field_binary_IO.hpp" +#include "full_code/postprocess.hpp" + +template <typename rnumber> +class native_binary_to_hdf5: public postprocess +{ + public: + + field<rnumber, FFTW, THREE> *vec_field; + field_binary_IO<rnumber, COMPLEX, THREE> *bin_IO; + + native_binary_to_hdf5( + const MPI_Comm COMMUNICATOR, + const std::string &simulation_name): + postprocess( + COMMUNICATOR, + simulation_name){} + virtual ~native_binary_to_hdf5(){} + + int initialize(void); + int work_on_current_iteration(void); + int finalize(void); + virtual int read_parameters(void); +}; + +#endif//NATIVE_BINARY_TO_HDF5_HPP + diff --git a/bfps/cpp/full_code/postprocess.cpp b/bfps/cpp/full_code/postprocess.cpp new file mode 100644 index 0000000000000000000000000000000000000000..edb5929f72c5197c123f8f4e20d426ca1ad9eb6f --- /dev/null +++ b/bfps/cpp/full_code/postprocess.cpp @@ -0,0 +1,94 @@ +#include <cstdlib> +#include <sys/types.h> +#include <sys/stat.h> +#include "scope_timer.hpp" +#include "hdf5_tools.hpp" +#include "full_code/postprocess.hpp" + + +int postprocess::main_loop(void) +{ + this->start_simple_timer(); + for (unsigned int iteration_counter = 0; + iteration_counter < iteration_list.size(); + iteration_counter++) + { + this->iteration = iteration_list[iteration_counter]; + #ifdef USE_TIMINGOUTPUT + const std::string loopLabel = ("postprocess::main_loop-" + + std::to_string(this->iteration)); + TIMEZONE(loopLabel.c_str()); + #endif + this->work_on_current_iteration(); + this->print_simple_timer( + "iteration " + std::to_string(this->iteration)); + + this->check_stopping_condition(); + if (this->stop_code_now) + break; + } + return EXIT_SUCCESS; +} + + +int postprocess::read_parameters() +{ + hid_t parameter_file; + hid_t dset, memtype, space; + char fname[256]; + char *string_data; + sprintf(fname, "%s.h5", this->simname.c_str()); + parameter_file = H5Fopen(fname, H5F_ACC_RDONLY, H5P_DEFAULT); + dset = H5Dopen(parameter_file, "/parameters/dealias_type", H5P_DEFAULT); + H5Dread(dset, H5T_NATIVE_INT, H5S_ALL, H5S_ALL, H5P_DEFAULT, &this->dealias_type); + H5Dclose(dset); + dset = H5Dopen(parameter_file, "/parameters/dkx", H5P_DEFAULT); + H5Dread(dset, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, &this->dkx); + H5Dclose(dset); + dset = H5Dopen(parameter_file, "/parameters/dky", H5P_DEFAULT); + H5Dread(dset, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, &this->dky); + H5Dclose(dset); + dset = H5Dopen(parameter_file, "/parameters/dkz", H5P_DEFAULT); + H5Dread(dset, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, &this->dkz); + H5Dclose(dset); + dset = H5Dopen(parameter_file, "/parameters/dt", H5P_DEFAULT); + H5Dread(dset, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, &this->dt); + H5Dclose(dset); + dset = H5Dopen(parameter_file, "/parameters/famplitude", H5P_DEFAULT); + H5Dread(dset, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, &this->famplitude); + H5Dclose(dset); + dset = H5Dopen(parameter_file, "/parameters/fk0", H5P_DEFAULT); + H5Dread(dset, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, &this->fk0); + H5Dclose(dset); + dset = H5Dopen(parameter_file, "/parameters/fk1", H5P_DEFAULT); + H5Dread(dset, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, &this->fk1); + H5Dclose(dset); + dset = H5Dopen(parameter_file, "/parameters/fmode", H5P_DEFAULT); + H5Dread(dset, H5T_NATIVE_INT, H5S_ALL, H5S_ALL, H5P_DEFAULT, &this->fmode); + H5Dclose(dset); + dset = H5Dopen(parameter_file, "/parameters/forcing_type", H5P_DEFAULT); + space = H5Dget_space(dset); + memtype = H5Dget_type(dset); + string_data = (char*)malloc(256); + H5Dread(dset, memtype, H5S_ALL, H5S_ALL, H5P_DEFAULT, &string_data); + sprintf(this->forcing_type, "%s", string_data); + free(string_data); + H5Sclose(space); + H5Tclose(memtype); + H5Dclose(dset); + dset = H5Dopen(parameter_file, "/parameters/nu", H5P_DEFAULT); + H5Dread(dset, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, &this->nu); + H5Dclose(dset); + dset = H5Dopen(parameter_file, "/parameters/nx", H5P_DEFAULT); + H5Dread(dset, H5T_NATIVE_INT, H5S_ALL, H5S_ALL, H5P_DEFAULT, &this->nx); + H5Dclose(dset); + dset = H5Dopen(parameter_file, "/parameters/ny", H5P_DEFAULT); + H5Dread(dset, H5T_NATIVE_INT, H5S_ALL, H5S_ALL, H5P_DEFAULT, &this->ny); + H5Dclose(dset); + dset = H5Dopen(parameter_file, "/parameters/nz", H5P_DEFAULT); + H5Dread(dset, H5T_NATIVE_INT, H5S_ALL, H5S_ALL, H5P_DEFAULT, &this->nz); + H5Dclose(dset); + H5Fclose(parameter_file); + return 0; +} + diff --git a/bfps/cpp/full_code/postprocess.hpp b/bfps/cpp/full_code/postprocess.hpp new file mode 100644 index 0000000000000000000000000000000000000000..c80fc3f2dfdc35691d9e69442fa3ad7b6e592891 --- /dev/null +++ b/bfps/cpp/full_code/postprocess.hpp @@ -0,0 +1,69 @@ +/********************************************************************** +* * +* Copyright 2017 Max Planck Institute * +* for Dynamics and Self-Organization * +* * +* This file is part of bfps. * +* * +* bfps is free software: you can redistribute it and/or modify * +* it under the terms of the GNU General Public License as published * +* by the Free Software Foundation, either version 3 of the License, * +* or (at your option) any later version. * +* * +* bfps is distributed in the hope that it will be useful, * +* but WITHOUT ANY WARRANTY; without even the implied warranty of * +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * +* GNU General Public License for more details. * +* * +* You should have received a copy of the GNU General Public License * +* along with bfps. If not, see <http://www.gnu.org/licenses/> * +* * +* Contact: Cristian.Lalescu@ds.mpg.de * +* * +**********************************************************************/ + + + +#ifndef POSTPROCESS_HPP +#define POSTPROCESS_HPP + +#include <cstdlib> +#include <sys/types.h> +#include <sys/stat.h> +#include <vector> +#include "base.hpp" +#include "full_code/code_base.hpp" + +class postprocess: public code_base +{ + public: + std::vector<int> iteration_list; + hid_t stat_file; + + /* parameters that are read in read_parameters */ + double dt; + double famplitude; + double fk0; + double fk1; + int fmode; + char forcing_type[512]; + double nu; + + postprocess( + const MPI_Comm COMMUNICATOR, + const std::string &simulation_name): + code_base( + COMMUNICATOR, + simulation_name){} + virtual ~postprocess(){} + + virtual int initialize(void) = 0; + virtual int work_on_current_iteration(void) = 0; + virtual int finalize(void) = 0; + + int main_loop(void); + virtual int read_parameters(void); +}; + +#endif//POSTPROCESS_HPP + diff --git a/bfps/cpp/hdf5_tools.cpp b/bfps/cpp/hdf5_tools.cpp new file mode 100644 index 0000000000000000000000000000000000000000..4328b28703ac60de7e82e4e3e729134ee3ff1520 --- /dev/null +++ b/bfps/cpp/hdf5_tools.cpp @@ -0,0 +1,216 @@ +#include "hdf5_tools.hpp" + +int hdf5_tools::require_size_single_dataset(hid_t dset, int tsize) +{ + int ndims; + hsize_t space; + space = H5Dget_space(dset); + ndims = H5Sget_simple_extent_ndims(space); + hsize_t *dims = new hsize_t[ndims]; + hsize_t *maxdims = new hsize_t[ndims]; + H5Sget_simple_extent_dims(space, dims, maxdims); + if (dims[0] < hsize_t(tsize) && maxdims[0] == H5S_UNLIMITED) + { + dims[0] = tsize; + H5Dset_extent(dset, dims); + } + H5Sclose(space); + delete[] maxdims; + delete[] dims; + return EXIT_SUCCESS; +} + +int hdf5_tools::grow_single_dataset(hid_t dset, int tincrement) +{ + int ndims; + hsize_t space; + space = H5Dget_space(dset); + ndims = H5Sget_simple_extent_ndims(space); + hsize_t *dims = new hsize_t[ndims]; + hsize_t *maxdims = new hsize_t[ndims]; + H5Sget_simple_extent_dims(space, dims, maxdims); + if (maxdims[0] == H5S_UNLIMITED) + { + dims[0] += tincrement; + H5Dset_extent(dset, dims); + } + H5Sclose(space); + delete[] maxdims; + delete[] dims; + return EXIT_SUCCESS; +} + +herr_t hdf5_tools::require_size_dataset_visitor( + hid_t o_id, + const char *name, + const H5O_info_t *info, + void *op_data) +{ + if (info->type == H5O_TYPE_DATASET) + { + hsize_t dset = H5Dopen(o_id, name, H5P_DEFAULT); + require_size_single_dataset(dset, *((int*)(op_data))); + H5Dclose(dset); + } + return EXIT_SUCCESS; +} + +herr_t hdf5_tools::grow_dataset_visitor( + hid_t o_id, + const char *name, + const H5O_info_t *info, + void *op_data) +{ + if (info->type == H5O_TYPE_DATASET) + { + hsize_t dset = H5Dopen(o_id, name, H5P_DEFAULT); + grow_single_dataset(dset, *((int*)(op_data))); + H5Dclose(dset); + } + return EXIT_SUCCESS; +} + + +int hdf5_tools::grow_file_datasets( + const hid_t stat_file, + const std::string group_name, + int tincrement) +{ + int file_problems = 0; + + hid_t group; + group = H5Gopen(stat_file, group_name.c_str(), H5P_DEFAULT); + H5Ovisit( + group, + H5_INDEX_NAME, + H5_ITER_NATIVE, + grow_dataset_visitor, + &tincrement); + H5Gclose(group); + return file_problems; +} + + +int hdf5_tools::require_size_file_datasets( + const hid_t stat_file, + const std::string group_name, + int tsize) +{ + int file_problems = 0; + + hid_t group; + group = H5Gopen(stat_file, group_name.c_str(), H5P_DEFAULT); + H5Ovisit( + group, + H5_INDEX_NAME, + H5_ITER_NATIVE, + require_size_dataset_visitor, + &tsize); + H5Gclose(group); + return file_problems; +} + +template <typename number> +std::vector<number> hdf5_tools::read_vector( + const hid_t group, + const std::string dset_name) +{ + std::vector<number> result; + hsize_t vector_length; + // first, read size of array + hid_t dset, dspace; + hid_t mem_dtype; + if (typeid(number) == typeid(int)) + mem_dtype = H5Tcopy(H5T_NATIVE_INT); + else if (typeid(number) == typeid(double)) + mem_dtype = H5Tcopy(H5T_NATIVE_DOUBLE); + dset = H5Dopen(group, dset_name.c_str(), H5P_DEFAULT); + dspace = H5Dget_space(dset); + assert(H5Sget_simple_extent_ndims(dspace) == 1); + H5Sget_simple_extent_dims(dspace, &vector_length, NULL); + result.resize(vector_length); + H5Dread(dset, mem_dtype, H5S_ALL, H5S_ALL, H5P_DEFAULT, &result.front()); + H5Sclose(dspace); + H5Dclose(dset); + H5Tclose(mem_dtype); + return result; +} + +template <typename dtype> +std::vector<dtype> hdf5_tools::read_vector_with_single_rank( + const int myrank, + const int rank_to_use, + const MPI_Comm COMM, + const hid_t file_id, + const std::string dset_name) +{ + std::vector<dtype> data; + int vector_size; + if (myrank == rank_to_use) + { + data = hdf5_tools::read_vector<dtype>( + file_id, + dset_name); + vector_size = data.size(); + } + MPI_Bcast( + &vector_size, + 1, + MPI_INT, + rank_to_use, + COMM); + + if (myrank != rank_to_use) + data.resize(vector_size); + MPI_Bcast( + &data.front(), + vector_size, + (typeid(dtype) == typeid(int)) ? MPI_INT : MPI_DOUBLE, + rank_to_use, + COMM); + return data; +} + +std::string hdf5_tools::read_string( + const hid_t group, + const std::string dset_name) +{ + hid_t dset = H5Dopen(group, dset_name.c_str(), H5P_DEFAULT); + hid_t space = H5Dget_space(dset); + hid_t memtype = H5Dget_type(dset); + char *string_data = (char*)malloc(256); + H5Dread(dset, memtype, H5S_ALL, H5S_ALL, H5P_DEFAULT, &string_data); + std::string std_string_data = std::string(string_data); + free(string_data); + H5Sclose(space); + H5Tclose(memtype); + H5Dclose(dset); + return std_string_data; +} + +template +std::vector<int> hdf5_tools::read_vector<int>( + const hid_t, + const std::string); + +template +std::vector<double> hdf5_tools::read_vector<double>( + const hid_t, + const std::string); + +template +std::vector<int> hdf5_tools::read_vector_with_single_rank<int>( + const int myrank, + const int rank_to_use, + const MPI_Comm COMM, + const hid_t file_id, + const std::string dset_name); + +template +std::vector<double> hdf5_tools::read_vector_with_single_rank<double>( + const int myrank, + const int rank_to_use, + const MPI_Comm COMM, + const hid_t file_id, + const std::string dset_name); + diff --git a/bfps/cpp/hdf5_tools.hpp b/bfps/cpp/hdf5_tools.hpp new file mode 100644 index 0000000000000000000000000000000000000000..456beefe362c5d0871f8014c7a1cc468614e6374 --- /dev/null +++ b/bfps/cpp/hdf5_tools.hpp @@ -0,0 +1,85 @@ +/********************************************************************** +* * +* Copyright 2017 Max Planck Institute * +* for Dynamics and Self-Organization * +* * +* This file is part of bfps. * +* * +* bfps is free software: you can redistribute it and/or modify * +* it under the terms of the GNU General Public License as published * +* by the Free Software Foundation, either version 3 of the License, * +* or (at your option) any later version. * +* * +* bfps is distributed in the hope that it will be useful, * +* but WITHOUT ANY WARRANTY; without even the implied warranty of * +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * +* GNU General Public License for more details. * +* * +* You should have received a copy of the GNU General Public License * +* along with bfps. If not, see <http://www.gnu.org/licenses/> * +* * +* Contact: Cristian.Lalescu@ds.mpg.de * +* * +**********************************************************************/ + + + +#ifndef HDF5_TOOLS_HPP +#define HDF5_TOOLS_HPP + +#include <vector> +#include <hdf5.h> +#include "base.hpp" + +namespace hdf5_tools +{ + int grow_single_dataset( + hid_t dset, + int tincrement); + + herr_t grow_dataset_visitor( + hid_t o_id, + const char *name, + const H5O_info_t *info, + void *op_data); + + int grow_file_datasets( + const hid_t stat_file, + const std::string group_name, + int tincrement); + + int require_size_single_dataset( + hid_t dset, + int tincrement); + + herr_t require_size_dataset_visitor( + hid_t o_id, + const char *name, + const H5O_info_t *info, + void *op_data); + + int require_size_file_datasets( + const hid_t stat_file, + const std::string group_name, + int tincrement); + + template <typename number> + std::vector<number> read_vector( + const hid_t group, + const std::string dset_name); + + template <typename number> + std::vector<number> read_vector_with_single_rank( + const int myrank, + const int rank_to_use, + const MPI_Comm COMM, + const hid_t group, + const std::string dset_name); + + std::string read_string( + const hid_t group, + const std::string dset_name); +} + +#endif//HDF5_TOOLS_HPP + diff --git a/bfps/cpp/interpolator.cpp b/bfps/cpp/interpolator.cpp index ef53742a4fdeb2545f02954f10c47d2bcb3f6538..a0b38c4059585cc7fd58ab830b792be4f8bc193d 100644 --- a/bfps/cpp/interpolator.cpp +++ b/bfps/cpp/interpolator.cpp @@ -150,7 +150,7 @@ template <class rnumber, int interp_neighbours> void interpolator<rnumber, interp_neighbours>::operator()( const int *xg, const double *xx, - double *dest, + double *__restrict__ dest, const int *deriv) { double bx[interp_neighbours*2+2], by[interp_neighbours*2+2], bz[interp_neighbours*2+2]; @@ -197,10 +197,18 @@ template class interpolator<float, 3>; template class interpolator<float, 4>; template class interpolator<float, 5>; template class interpolator<float, 6>; +template class interpolator<float, 7>; +template class interpolator<float, 8>; +template class interpolator<float, 9>; +template class interpolator<float, 10>; template class interpolator<double, 1>; template class interpolator<double, 2>; template class interpolator<double, 3>; template class interpolator<double, 4>; template class interpolator<double, 5>; template class interpolator<double, 6>; +template class interpolator<double, 7>; +template class interpolator<double, 8>; +template class interpolator<double, 9>; +template class interpolator<double, 10>; diff --git a/bfps/cpp/interpolator_base.cpp b/bfps/cpp/interpolator_base.cpp index 58bf57cf13382f0704da4537dae9d21bb4a841da..668a965c65744ac5aae31afb6bee05711a433657 100644 --- a/bfps/cpp/interpolator_base.cpp +++ b/bfps/cpp/interpolator_base.cpp @@ -43,6 +43,20 @@ interpolator_base<rnumber, interp_neighbours>::interpolator_base( this->dz = 4*acos(0) / (fs->dkz*this->descriptor->sizes[0]); } +template <class rnumber, int interp_neighbours> +interpolator_base<rnumber, interp_neighbours>::interpolator_base( + vorticity_equation<rnumber, FFTW> *fs, + base_polynomial_values BETA_POLYS) +{ +// this->descriptor = fs->rd; +// this->compute_beta = BETA_POLYS; +// +// // compute dx, dy, dz; +// this->dx = 4*acos(0) / (fs->kk->dkx*this->descriptor->sizes[2]); +// this->dy = 4*acos(0) / (fs->kk->dky*this->descriptor->sizes[1]); +// this->dz = 4*acos(0) / (fs->kk->dkz*this->descriptor->sizes[0]); +} + template <class rnumber, int interp_neighbours> void interpolator_base<rnumber, interp_neighbours>::get_grid_coordinates( const int nparticles, @@ -82,10 +96,18 @@ template class interpolator_base<float, 3>; template class interpolator_base<float, 4>; template class interpolator_base<float, 5>; template class interpolator_base<float, 6>; +template class interpolator_base<float, 7>; +template class interpolator_base<float, 8>; +template class interpolator_base<float, 9>; +template class interpolator_base<float, 10>; template class interpolator_base<double, 1>; template class interpolator_base<double, 2>; template class interpolator_base<double, 3>; template class interpolator_base<double, 4>; template class interpolator_base<double, 5>; template class interpolator_base<double, 6>; +template class interpolator_base<double, 7>; +template class interpolator_base<double, 8>; +template class interpolator_base<double, 9>; +template class interpolator_base<double, 10>; diff --git a/bfps/cpp/interpolator_base.hpp b/bfps/cpp/interpolator_base.hpp index 7dda7fb08319bf2a044bcc220e204b748d6336d6..f4c28db7b9de632e8ec4977dd67f929f06080e19 100644 --- a/bfps/cpp/interpolator_base.hpp +++ b/bfps/cpp/interpolator_base.hpp @@ -25,12 +25,17 @@ #include "fluid_solver_base.hpp" +#include "vorticity_equation.hpp" #include "spline_n1.hpp" #include "spline_n2.hpp" #include "spline_n3.hpp" #include "spline_n4.hpp" #include "spline_n5.hpp" #include "spline_n6.hpp" +#include "spline_n7.hpp" +#include "spline_n8.hpp" +#include "spline_n9.hpp" +#include "spline_n10.hpp" #include "Lagrange_polys.hpp" #ifndef INTERPOLATOR_BASE @@ -58,6 +63,10 @@ class interpolator_base interpolator_base( fluid_solver_base<rnumber> *FSOLVER, base_polynomial_values BETA_POLYS); + + interpolator_base( + vorticity_equation<rnumber, FFTW> *FSOLVER, + base_polynomial_values BETA_POLYS); virtual ~interpolator_base(){} /* may not destroy input */ diff --git a/bfps/cpp/kspace.cpp b/bfps/cpp/kspace.cpp new file mode 100644 index 0000000000000000000000000000000000000000..96425fd218b616221bc81d19eca6c692f6662111 --- /dev/null +++ b/bfps/cpp/kspace.cpp @@ -0,0 +1,645 @@ +/********************************************************************** +* * +* Copyright 2015 Max Planck Institute * +* for Dynamics and Self-Organization * +* * +* This file is part of bfps. * +* * +* bfps is free software: you can redistribute it and/or modify * +* it under the terms of the GNU General Public License as published * +* by the Free Software Foundation, either version 3 of the License, * +* or (at your option) any later version. * +* * +* bfps is distributed in the hope that it will be useful, * +* but WITHOUT ANY WARRANTY; without even the implied warranty of * +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * +* GNU General Public License for more details. * +* * +* You should have received a copy of the GNU General Public License * +* along with bfps. If not, see <http://www.gnu.org/licenses/> * +* * +* Contact: Cristian.Lalescu@ds.mpg.de * +* * +**********************************************************************/ + + +#include <cmath> +#include <cstdlib> +#include <algorithm> +#include <cassert> +#include "kspace.hpp" +#include "scope_timer.hpp" +#include "shared_array.hpp" + +template <field_backend be, + kspace_dealias_type dt> +template <field_components fc> +kspace<be, dt>::kspace( + const field_layout<fc> *source_layout, + const double DKX, + const double DKY, + const double DKZ) +{ + TIMEZONE("kspace::kspace"); + /* get layout */ + this->layout = new field_layout<ONE>( + source_layout->sizes, + source_layout->subsizes, + source_layout->starts, + source_layout->comm); + + /* store dk values */ + this->dkx = DKX; + this->dky = DKY; + this->dkz = DKZ; + + /* compute kx, ky, kz and compute kM values */ + switch(be) + { + case FFTW: + this->kx.resize(this->layout->sizes[2]); + this->ky.resize(this->layout->subsizes[0]); + this->kz.resize(this->layout->sizes[1]); + int i, ii; + for (i = 0; i<int(this->layout->sizes[2]); i++) + this->kx[i] = i*this->dkx; + for (i = 0; i<int(this->layout->subsizes[0]); i++) + { + ii = i + this->layout->starts[0]; + if (ii <= int(this->layout->sizes[1]/2)) + this->ky[i] = this->dky*ii; + else + this->ky[i] = this->dky*(ii - int(this->layout->sizes[1])); + } + for (i = 0; i<int(this->layout->sizes[1]); i++) + { + if (i <= int(this->layout->sizes[0]/2)) + this->kz[i] = this->dkz*i; + else + this->kz[i] = this->dkz*(i - int(this->layout->sizes[0])); + } + switch(dt) + { + case TWO_THIRDS: + this->kMx = this->dkx*(int(2*(int(this->layout->sizes[2])-1)/3)-1); + this->kMy = this->dky*(int(this->layout->sizes[0] / 3)-1); + this->kMz = this->dkz*(int(this->layout->sizes[1] / 3)-1); + break; + case SMOOTH: + this->kMx = this->dkx*(int(this->layout->sizes[2])-2); + this->kMy = this->dky*(int(this->layout->sizes[0] / 2)-1); + this->kMz = this->dkz*(int(this->layout->sizes[1] / 2)-1); + break; + } + break; + } + + /* get global kM and dk */ + this->kM = this->kMx; + if (this->kM < this->kMy) this->kM = this->kMy; + if (this->kM < this->kMz) this->kM = this->kMz; + this->kM2 = this->kM * this->kM; + this->dk = this->dkx; + if (this->dk > this->dky) this->dk = this->dky; + if (this->dk > this->dkz) this->dk = this->dkz; + this->dk2 = this->dk*this->dk; + + /* spectra stuff */ + this->nshells = int(this->kM / this->dk) + 2; + this->kshell.resize(this->nshells, 0); + this->nshell.resize(this->nshells, 0); + + shared_array<double> kshell_local_thread(this->nshells,[&](double* kshell_local){ + std::fill_n(kshell_local, this->nshells, 0); + }); + shared_array<int64_t> nshell_local_thread(this->nshells,[&](int64_t* nshell_local){ + std::fill_n(nshell_local, this->nshells, 0); + }); + + std::vector<std::unordered_map<int, double>> dealias_filter_threaded(omp_get_max_threads()); + + this->CLOOP_K2_NXMODES( + [&](ptrdiff_t cindex, + ptrdiff_t xindex, + ptrdiff_t yindex, + ptrdiff_t zindex, + double k2, + int nxmodes){ + if (k2 < this->kM2) + { + double knorm = sqrt(k2); + kshell_local_thread.getMine()[int(knorm/this->dk)] += nxmodes*knorm; + nshell_local_thread.getMine()[int(knorm/this->dk)] += nxmodes; + } + if (dt == SMOOTH){ + dealias_filter_threaded[omp_get_thread_num()][int(round(k2 / this->dk2))] = exp(-36.0 * pow(k2/this->kM2, 18.)); + } + }); + + // Merge results + + kshell_local_thread.mergeParallel(); + nshell_local_thread.mergeParallel(); + + if (dt == SMOOTH){ + for(int idxMerge = 0 ; idxMerge < int(dealias_filter_threaded.size()) ; ++idxMerge){ + for(const auto kv : dealias_filter_threaded[idxMerge]){ + this->dealias_filter[kv.first] = kv.second; + } + } + } + + MPI_Allreduce( + nshell_local_thread.getMasterData(), + &this->nshell.front(), + this->nshells, + MPI_INT64_T, MPI_SUM, this->layout->comm); + MPI_Allreduce( + kshell_local_thread.getMasterData(), + &this->kshell.front(), + this->nshells, + MPI_DOUBLE, MPI_SUM, this->layout->comm); + for (int n=0; n<this->nshells; n++){ + if(this->nshell[n] != 0){ + this->kshell[n] /= this->nshell[n]; + } + } +} + +template <field_backend be, + kspace_dealias_type dt> +kspace<be, dt>::~kspace() +{ + delete this->layout; +} + +template <field_backend be, + kspace_dealias_type dt> +int kspace<be, dt>::store(hid_t stat_file) +{ + TIMEZONE("kspace::store"); + assert(this->layout->myrank == 0); + hsize_t dims[4]; + hid_t space, dset; + // store kspace information + dset = H5Dopen(stat_file, "/kspace/kshell", H5P_DEFAULT); + space = H5Dget_space(dset); + H5Sget_simple_extent_dims(space, dims, NULL); + H5Sclose(space); + if (this->nshells != int(dims[0])) + { + DEBUG_MSG( + "ERROR: computed nshells %d not equal to data file nshells %d\n", + this->nshells, dims[0]); + } + H5Dwrite( + dset, + H5T_NATIVE_DOUBLE, + H5S_ALL, + H5S_ALL, + H5P_DEFAULT, + &this->kshell.front()); + H5Dclose(dset); + dset = H5Dopen( + stat_file, + "/kspace/nshell", + H5P_DEFAULT); + H5Dwrite( + dset, + H5T_NATIVE_INT64, + H5S_ALL, + H5S_ALL, + H5P_DEFAULT, + &this->nshell.front()); + H5Dclose(dset); + dset = H5Dopen(stat_file, "/kspace/kM", H5P_DEFAULT); + H5Dwrite( + dset, + H5T_NATIVE_DOUBLE, + H5S_ALL, + H5S_ALL, + H5P_DEFAULT, + &this->kM); + H5Dclose(dset); + dset = H5Dopen(stat_file, "/kspace/dk", H5P_DEFAULT); + H5Dwrite(dset, + H5T_NATIVE_DOUBLE, + H5S_ALL, + H5S_ALL, + H5P_DEFAULT, + &this->dk); + H5Dclose(dset); + return EXIT_SUCCESS; +} + +template <field_backend be, + kspace_dealias_type dt> +template <typename rnumber, + field_components fc> +void kspace<be, dt>::low_pass( + typename fftw_interface<rnumber>::complex *__restrict__ a, + const double kmax) +{ + const double km2 = kmax*kmax; + this->CLOOP_K2( + [&](ptrdiff_t cindex, + ptrdiff_t xindex, + ptrdiff_t yindex, + ptrdiff_t zindex, + double k2){ + if (k2 >= km2) + std::fill_n((rnumber*)(a + ncomp(fc)*cindex), 2*ncomp(fc), 0); + }); +} + +/** \brief Filter a field using a Gaussian kernel. + * + * Filter's mathematical expression in Fourier space is as follows: + * \f[ + * \hat{g}_\ell(\mathbf{k}) = \exp(-k^2 \sigma^2 / 2) + * \f] + */ +template <field_backend be, + kspace_dealias_type dt> +template <typename rnumber, + field_components fc> +void kspace<be, dt>::Gauss_filter( + typename fftw_interface<rnumber>::complex *__restrict__ a, + const double sigma) +{ + const double prefactor = - sigma*sigma/2; + this->CLOOP_K2( + [&](ptrdiff_t cindex, + ptrdiff_t xindex, + ptrdiff_t yindex, + ptrdiff_t zindex, + double k2){ + if (k2 <= this->kM2) + { + for (unsigned int tcounter=0; tcounter<2*ncomp(fc); tcounter++) + ((rnumber*)a)[2*ncomp(fc)*cindex + tcounter] *= exp(prefactor*k2); + } + }); +} + +/** \brief Filter a field. + * + * This is a wrapper that can choose between a sharp Fourier spherical filter + * and a Gaussian filter. + * The cutoff wavenumber \f$k_c\f$ is a parameter, so the low pass Fourier + * operation is straightforward. + * + * For the Gaussian filter, it's mathematical expression in Fourier space is + * as follows: + * \f[ + * \hat{g}_\ell(\mathbf{k}) = \exp(-k^2 \ell^2 / 2) + * \f] + * And we choose the convention \f$\ell = \frac{\pi}{k_c}\f$. + * This is the same convention used in \cite Buzzicotti2017 . + */ +template <field_backend be, + kspace_dealias_type dt> +template <typename rnumber, + field_components fc> +int kspace<be, dt>::filter( + typename fftw_interface<rnumber>::complex *__restrict__ a, + const double wavenumber, + std::string filter_type) +{ + if (filter_type == std::string("sharp_Fourier_sphere")) + { + this->template low_pass<rnumber, fc>( + a, + wavenumber); + } + else if (filter_type == std::string("Gauss")) + { + this->template Gauss_filter<rnumber, fc>( + a, + 2*acos(0.)/wavenumber); + } + return EXIT_SUCCESS; +} + +template <field_backend be, + kspace_dealias_type dt> +template <typename rnumber, + field_components fc> +void kspace<be, dt>::dealias(typename fftw_interface<rnumber>::complex *__restrict__ a) +{ + switch(dt) + { + case TWO_THIRDS: + this->low_pass<rnumber, fc>(a, this->kM); + break; + case SMOOTH: + this->CLOOP_K2( + [&](ptrdiff_t cindex, + ptrdiff_t xindex, + ptrdiff_t yindex, + ptrdiff_t zindex, + double k2){ + double tval = this->dealias_filter[int(round(k2 / this->dk2))]; + for (unsigned int tcounter=0; tcounter<2*ncomp(fc); tcounter++) + ((rnumber*)a)[2*ncomp(fc)*cindex + tcounter] *= tval; + }); + break; + } +} + +template <field_backend be, + kspace_dealias_type dt> +template <typename rnumber> +void kspace<be, dt>::force_divfree(typename fftw_interface<rnumber>::complex *__restrict__ a) +{ + TIMEZONE("kspace::force_divfree"); + this->CLOOP_K2( + [&](ptrdiff_t cindex, + ptrdiff_t xindex, + ptrdiff_t yindex, + ptrdiff_t zindex, + double k2){ + if (k2 > 0) + { + typename fftw_interface<rnumber>::complex tval; + tval[0] = (this->kx[xindex]*((*(a + cindex*3 ))[0]) + + this->ky[yindex]*((*(a + cindex*3+1))[0]) + + this->kz[zindex]*((*(a + cindex*3+2))[0]) ) / k2; + tval[1] = (this->kx[xindex]*((*(a + cindex*3 ))[1]) + + this->ky[yindex]*((*(a + cindex*3+1))[1]) + + this->kz[zindex]*((*(a + cindex*3+2))[1]) ) / k2; + for (int imag_part=0; imag_part<2; imag_part++) + { + a[cindex*3 ][imag_part] -= tval[imag_part]*this->kx[xindex]; + a[cindex*3+1][imag_part] -= tval[imag_part]*this->ky[yindex]; + a[cindex*3+2][imag_part] -= tval[imag_part]*this->kz[zindex]; + } + } + } + ); + if (this->layout->myrank == this->layout->rank[0][0]) + std::fill_n((rnumber*)(a), 6, 0.0); +} + +template <field_backend be, + kspace_dealias_type dt> +template <typename rnumber, + field_components fc> +void kspace<be, dt>::cospectrum( + const rnumber(* __restrict a)[2], + const rnumber(* __restrict b)[2], + const hid_t group, + const std::string dset_name, + const hsize_t toffset) +{ + TIMEZONE("field::cospectrum"); + shared_array<double> spec_local_thread(this->nshells*ncomp(fc)*ncomp(fc),[&](double* spec_local){ + std::fill_n(spec_local, this->nshells*ncomp(fc)*ncomp(fc), 0); + }); + + this->CLOOP_K2_NXMODES( + [&](ptrdiff_t cindex, + ptrdiff_t xindex, + ptrdiff_t yindex, + ptrdiff_t zindex, + double k2, + int nxmodes){ + if (k2 <= this->kM2) + { + double* spec_local = spec_local_thread.getMine(); + int tmp_int = int(sqrt(k2) / this->dk)*ncomp(fc)*ncomp(fc); + for (hsize_t i=0; i<ncomp(fc); i++) + for (hsize_t j=0; j<ncomp(fc); j++){ + spec_local[tmp_int + i*ncomp(fc)+j] += nxmodes * ( + (a[ncomp(fc)*cindex + i][0] * b[ncomp(fc)*cindex + j][0]) + + (a[ncomp(fc)*cindex + i][1] * b[ncomp(fc)*cindex + j][1])); + } + } + }); + + spec_local_thread.mergeParallel(); + + std::vector<double> spec; + spec.resize(this->nshells*ncomp(fc)*ncomp(fc), 0); + MPI_Allreduce( + spec_local_thread.getMasterData(), + &spec.front(), + spec.size(), + MPI_DOUBLE, MPI_SUM, this->layout->comm); + if (this->layout->myrank == 0) + { + hid_t dset, wspace, mspace; + hsize_t count[(ndim(fc)-2)*2], offset[(ndim(fc)-2)*2], dims[(ndim(fc)-2)*2]; + dset = H5Dopen(group, ("spectra/" + dset_name).c_str(), H5P_DEFAULT); + wspace = H5Dget_space(dset); + H5Sget_simple_extent_dims(wspace, dims, NULL); + switch (fc) + { + case THREExTHREE: + offset[4] = 0; + offset[5] = 0; + count[4] = ncomp(fc); + count[5] = ncomp(fc); + case THREE: + offset[2] = 0; + offset[3] = 0; + count[2] = ncomp(fc); + count[3] = ncomp(fc); + default: + offset[0] = toffset; + offset[1] = 0; + count[0] = 1; + count[1] = this->nshells; + } + mspace = H5Screate_simple((ndim(fc)-2)*2, count, NULL); + H5Sselect_hyperslab(wspace, H5S_SELECT_SET, offset, NULL, count, NULL); + H5Dwrite(dset, H5T_NATIVE_DOUBLE, mspace, wspace, H5P_DEFAULT, &spec.front()); + H5Sclose(wspace); + H5Sclose(mspace); + H5Dclose(dset); + } +} + + +template class kspace<FFTW, TWO_THIRDS>; +template class kspace<FFTW, SMOOTH>; + +template kspace<FFTW, TWO_THIRDS>::kspace<>( + const field_layout<ONE> *, + const double, const double, const double); +template kspace<FFTW, TWO_THIRDS>::kspace<>( + const field_layout<THREE> *, + const double, const double, const double); +template kspace<FFTW, TWO_THIRDS>::kspace<>( + const field_layout<THREExTHREE> *, + const double, const double, const double); + +template kspace<FFTW, SMOOTH>::kspace<>( + const field_layout<ONE> *, + const double, const double, const double); +template kspace<FFTW, SMOOTH>::kspace<>( + const field_layout<THREE> *, + const double, const double, const double); +template kspace<FFTW, SMOOTH>::kspace<>( + const field_layout<THREExTHREE> *, + const double, const double, const double); + +template void kspace<FFTW, SMOOTH>::low_pass<float, ONE>( + typename fftw_interface<float>::complex *__restrict__ a, + const double kmax); +template void kspace<FFTW, SMOOTH>::low_pass<float, THREE>( + typename fftw_interface<float>::complex *__restrict__ a, + const double kmax); +template void kspace<FFTW, SMOOTH>::low_pass<float, THREExTHREE>( + typename fftw_interface<float>::complex *__restrict__ a, + const double kmax); + +template void kspace<FFTW, SMOOTH>::low_pass<double, ONE>( + typename fftw_interface<double>::complex *__restrict__ a, + const double kmax); +template void kspace<FFTW, SMOOTH>::low_pass<double, THREE>( + typename fftw_interface<double>::complex *__restrict__ a, + const double kmax); +template void kspace<FFTW, SMOOTH>::low_pass<double, THREExTHREE>( + typename fftw_interface<double>::complex *__restrict__ a, + const double kmax); + +template void kspace<FFTW, SMOOTH>::Gauss_filter<float, ONE>( + typename fftw_interface<float>::complex *__restrict__ a, + const double kmax); +template void kspace<FFTW, SMOOTH>::Gauss_filter<float, THREE>( + typename fftw_interface<float>::complex *__restrict__ a, + const double kmax); +template void kspace<FFTW, SMOOTH>::Gauss_filter<float, THREExTHREE>( + typename fftw_interface<float>::complex *__restrict__ a, + const double kmax); + +template void kspace<FFTW, SMOOTH>::Gauss_filter<double, ONE>( + typename fftw_interface<double>::complex *__restrict__ a, + const double kmax); +template void kspace<FFTW, SMOOTH>::Gauss_filter<double, THREE>( + typename fftw_interface<double>::complex *__restrict__ a, + const double kmax); +template void kspace<FFTW, SMOOTH>::Gauss_filter<double, THREExTHREE>( + typename fftw_interface<double>::complex *__restrict__ a, + const double kmax); + +template int kspace<FFTW, SMOOTH>::filter<float, ONE>( + typename fftw_interface<float>::complex *__restrict__ a, + const double kmax, + std::string filter_type); +template int kspace<FFTW, SMOOTH>::filter<float, THREE>( + typename fftw_interface<float>::complex *__restrict__ a, + const double kmax, + std::string filter_type); +template int kspace<FFTW, SMOOTH>::filter<float, THREExTHREE>( + typename fftw_interface<float>::complex *__restrict__ a, + const double kmax, + std::string filter_type); + +template int kspace<FFTW, SMOOTH>::filter<double, ONE>( + typename fftw_interface<double>::complex *__restrict__ a, + const double kmax, + std::string filter_type); +template int kspace<FFTW, SMOOTH>::filter<double, THREE>( + typename fftw_interface<double>::complex *__restrict__ a, + const double kmax, + std::string filter_type); +template int kspace<FFTW, SMOOTH>::filter<double, THREExTHREE>( + typename fftw_interface<double>::complex *__restrict__ a, + const double kmax, + std::string filter_type); + +template void kspace<FFTW, SMOOTH>::dealias<float, ONE>( + typename fftw_interface<float>::complex *__restrict__ a); +template void kspace<FFTW, SMOOTH>::dealias<float, THREE>( + typename fftw_interface<float>::complex *__restrict__ a); +template void kspace<FFTW, SMOOTH>::dealias<float, THREExTHREE>( + typename fftw_interface<float>::complex *__restrict__ a); + +template void kspace<FFTW, SMOOTH>::dealias<double, ONE>( + typename fftw_interface<double>::complex *__restrict__ a); +template void kspace<FFTW, SMOOTH>::dealias<double, THREE>( + typename fftw_interface<double>::complex *__restrict__ a); +template void kspace<FFTW, SMOOTH>::dealias<double, THREExTHREE>( + typename fftw_interface<double>::complex *__restrict__ a); + +template void kspace<FFTW, TWO_THIRDS>::cospectrum<float, ONE>( + const typename fftw_interface<float>::complex *__restrict__ a, + const typename fftw_interface<float>::complex *__restrict__ b, + const hid_t group, + const std::string dset_name, + const hsize_t toffset); +template void kspace<FFTW, TWO_THIRDS>::cospectrum<float, THREE>( + const typename fftw_interface<float>::complex *__restrict__ a, + const typename fftw_interface<float>::complex *__restrict__ b, + const hid_t group, + const std::string dset_name, + const hsize_t toffset); +template void kspace<FFTW, TWO_THIRDS>::cospectrum<float, THREExTHREE>( + const typename fftw_interface<float>::complex *__restrict__ a, + const typename fftw_interface<float>::complex *__restrict__ b, + const hid_t group, + const std::string dset_name, + const hsize_t toffset); +template void kspace<FFTW, TWO_THIRDS>::cospectrum<double, ONE>( + const typename fftw_interface<double>::complex *__restrict__ a, + const typename fftw_interface<double>::complex *__restrict__ b, + const hid_t group, + const std::string dset_name, + const hsize_t toffset); +template void kspace<FFTW, TWO_THIRDS>::cospectrum<double, THREE>( + const typename fftw_interface<double>::complex *__restrict__ a, + const typename fftw_interface<double>::complex *__restrict__ b, + const hid_t group, + const std::string dset_name, + const hsize_t toffset); +template void kspace<FFTW, TWO_THIRDS>::cospectrum<double, THREExTHREE>( + const typename fftw_interface<double>::complex *__restrict__ a, + const typename fftw_interface<double>::complex *__restrict__ b, + const hid_t group, + const std::string dset_name, + const hsize_t toffset); + +template void kspace<FFTW, SMOOTH>::cospectrum<float, ONE>( + const typename fftw_interface<float>::complex *__restrict__ a, + const typename fftw_interface<float>::complex *__restrict__ b, + const hid_t group, + const std::string dset_name, + const hsize_t toffset); +template void kspace<FFTW, SMOOTH>::cospectrum<float, THREE>( + const typename fftw_interface<float>::complex *__restrict__ a, + const typename fftw_interface<float>::complex *__restrict__ b, + const hid_t group, + const std::string dset_name, + const hsize_t toffset); +template void kspace<FFTW, SMOOTH>::cospectrum<float, THREExTHREE>( + const typename fftw_interface<float>::complex *__restrict__ a, + const typename fftw_interface<float>::complex *__restrict__ b, + const hid_t group, + const std::string dset_name, + const hsize_t toffset); +template void kspace<FFTW, SMOOTH>::cospectrum<double, ONE>( + const typename fftw_interface<double>::complex *__restrict__ a, + const typename fftw_interface<double>::complex *__restrict__ b, + const hid_t group, + const std::string dset_name, + const hsize_t toffset); +template void kspace<FFTW, SMOOTH>::cospectrum<double, THREE>( + const typename fftw_interface<double>::complex *__restrict__ a, + const typename fftw_interface<double>::complex *__restrict__ b, + const hid_t group, + const std::string dset_name, + const hsize_t toffset); +template void kspace<FFTW, SMOOTH>::cospectrum<double, THREExTHREE>( + const typename fftw_interface<double>::complex *__restrict__ a, + const typename fftw_interface<double>::complex *__restrict__ b, + const hid_t group, + const std::string dset_name, + const hsize_t toffset); + +template void kspace<FFTW, SMOOTH>::force_divfree<float>( + typename fftw_interface<float>::complex *__restrict__ a); +template void kspace<FFTW, SMOOTH>::force_divfree<double>( + typename fftw_interface<double>::complex *__restrict__ a); + diff --git a/bfps/cpp/kspace.hpp b/bfps/cpp/kspace.hpp new file mode 100644 index 0000000000000000000000000000000000000000..7fa77e8b75fc3b86e27681550dd8c34b5568d7c9 --- /dev/null +++ b/bfps/cpp/kspace.hpp @@ -0,0 +1,185 @@ +/********************************************************************** +* * +* Copyright 2015 Max Planck Institute * +* for Dynamics and Self-Organization * +* * +* This file is part of bfps. * +* * +* bfps is free software: you can redistribute it and/or modify * +* it under the terms of the GNU General Public License as published * +* by the Free Software Foundation, either version 3 of the License, * +* or (at your option) any later version. * +* * +* bfps is distributed in the hope that it will be useful, * +* but WITHOUT ANY WARRANTY; without even the implied warranty of * +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * +* GNU General Public License for more details. * +* * +* You should have received a copy of the GNU General Public License * +* along with bfps. If not, see <http://www.gnu.org/licenses/> * +* * +* Contact: Cristian.Lalescu@ds.mpg.de * +* * +**********************************************************************/ + + + +#include <hdf5.h> +#include <unordered_map> +#include <vector> +#include <string> +#include "omputils.hpp" +#include "fftw_interface.hpp" +#include "field_layout.hpp" + +#ifndef KSPACE_HPP + +#define KSPACE_HPP + +enum field_backend {FFTW}; +enum kspace_dealias_type {TWO_THIRDS, SMOOTH}; + + +template <field_backend be, + kspace_dealias_type dt> +class kspace +{ + public: + /* relevant field layout */ + field_layout<ONE> *layout; + + /* physical parameters */ + double dkx, dky, dkz, dk, dk2; + + /* mode and dealiasing information */ + double kMx, kMy, kMz, kM, kM2; + std::vector<double> kx, ky, kz; + std::unordered_map<int, double> dealias_filter; + std::vector<double> kshell; + std::vector<int64_t> nshell; + int nshells; + + /* methods */ + template <field_components fc> + kspace( + const field_layout<fc> *source_layout, + const double DKX = 1.0, + const double DKY = 1.0, + const double DKZ = 1.0); + ~kspace(); + + int store(hid_t stat_file); + + template <typename rnumber, + field_components fc> + void low_pass( + typename fftw_interface<rnumber>::complex *__restrict__ a, + const double kmax); + + template <typename rnumber, + field_components fc> + void Gauss_filter( + typename fftw_interface<rnumber>::complex *__restrict__ a, + const double sigma); + + template <typename rnumber, + field_components fc> + int filter( + typename fftw_interface<rnumber>::complex *__restrict__ a, + const double wavenumber, + std::string filter_type = std::string("Gauss")); + + template <typename rnumber, + field_components fc> + void dealias(typename fftw_interface<rnumber>::complex *__restrict__ a); + + template <typename rnumber, + field_components fc> + void cospectrum( + const rnumber(* __restrict__ a)[2], + const rnumber(* __restrict__ b)[2], + const hid_t group, + const std::string dset_name, + const hsize_t toffset); + template <class func_type> + void CLOOP(func_type expression) + { + #pragma omp parallel + { + const hsize_t start = OmpUtils::ForIntervalStart(this->layout->subsizes[1]); + const hsize_t end = OmpUtils::ForIntervalEnd(this->layout->subsizes[1]); + + for (hsize_t yindex = 0; yindex < this->layout->subsizes[0]; yindex++){ + for (hsize_t zindex = start; zindex < end; zindex++){ + ptrdiff_t cindex = yindex*this->layout->subsizes[1]*this->layout->subsizes[2] + + zindex*this->layout->subsizes[2]; + for (hsize_t xindex = 0; xindex < this->layout->subsizes[2]; xindex++) + { + expression(cindex, xindex, yindex, zindex); + cindex++; + } + } + } + } + } + template <class func_type> + void CLOOP_K2(func_type expression) + { + #pragma omp parallel + { + const hsize_t start = OmpUtils::ForIntervalStart(this->layout->subsizes[1]); + const hsize_t end = OmpUtils::ForIntervalEnd(this->layout->subsizes[1]); + + for (hsize_t yindex = 0; yindex < this->layout->subsizes[0]; yindex++){ + for (hsize_t zindex = start; zindex < end; zindex++){ + ptrdiff_t cindex = yindex*this->layout->subsizes[1]*this->layout->subsizes[2] + + zindex*this->layout->subsizes[2]; + for (hsize_t xindex = 0; xindex < this->layout->subsizes[2]; xindex++) + { + double k2 = (this->kx[xindex]*this->kx[xindex] + + this->ky[yindex]*this->ky[yindex] + + this->kz[zindex]*this->kz[zindex]); + expression(cindex, xindex, yindex, zindex, k2); + cindex++; + } + } + } + } + } + template <class func_type> + void CLOOP_K2_NXMODES(func_type expression) + { + #pragma omp parallel + { + const hsize_t start = OmpUtils::ForIntervalStart(this->layout->subsizes[1]); + const hsize_t end = OmpUtils::ForIntervalEnd(this->layout->subsizes[1]); + + for (hsize_t yindex = 0; yindex < this->layout->subsizes[0]; yindex++){ + for (hsize_t zindex = start; zindex < end; zindex++){ + ptrdiff_t cindex = yindex*this->layout->subsizes[1]*this->layout->subsizes[2] + + zindex*this->layout->subsizes[2]; + hsize_t xindex = 0; + double k2 = ( + this->kx[xindex]*this->kx[xindex] + + this->ky[yindex]*this->ky[yindex] + + this->kz[zindex]*this->kz[zindex]); + expression(cindex, xindex, yindex, zindex, k2, 1); + cindex++; + for (xindex = 1; xindex < this->layout->subsizes[2]; xindex++) + { + k2 = (this->kx[xindex]*this->kx[xindex] + + this->ky[yindex]*this->ky[yindex] + + this->kz[zindex]*this->kz[zindex]); + expression(cindex, xindex, yindex, zindex, k2, 2); + cindex++; + } + } + } + } + } + template <typename rnumber> + void force_divfree(typename fftw_interface<rnumber>::complex *__restrict__ a); +}; + +#endif//KSPACE_HPP + diff --git a/bfps/cpp/omputils.hpp b/bfps/cpp/omputils.hpp new file mode 100644 index 0000000000000000000000000000000000000000..cdd6c6c173c7cf002b72e0c1a7aebcf727f2d33e --- /dev/null +++ b/bfps/cpp/omputils.hpp @@ -0,0 +1,27 @@ +#ifndef OMPUTILS_HPP +#define OMPUTILS_HPP + +#include <omp.h> + +namespace OmpUtils{ + +template <class IndexType> +inline IndexType ForIntervalStart(const IndexType size){ + const double chunk = double(size)/double(omp_get_num_threads()); + const IndexType start = IndexType(chunk*double(omp_get_thread_num())); + return start; +} + +template <class IndexType> +inline IndexType ForIntervalEnd(const IndexType size){ + const double chunk = double(size)/double(omp_get_num_threads()); + const IndexType end = (omp_get_thread_num() == omp_get_num_threads()-1) ? + size: + IndexType(chunk*double(omp_get_thread_num()+1)); + return end; +} + +} + + +#endif diff --git a/bfps/cpp/particles.cpp b/bfps/cpp/particles.cpp index 847f065d49299b559162060876402101fe48d9d4..cdaf157cb912c3074faf84bfecf1d9b3752c78a7 100644 --- a/bfps/cpp/particles.cpp +++ b/bfps/cpp/particles.cpp @@ -43,17 +43,17 @@ template <particle_types particle_type, class rnumber, int interp_neighbours> particles<particle_type, rnumber, interp_neighbours>::particles( const char *NAME, const hid_t data_file_id, - interpolator_base<rnumber, interp_neighbours> *FIELD, + interpolator_base<rnumber, interp_neighbours> *VEL, const int TRAJ_SKIP, const int INTEGRATION_STEPS) : particles_io_base<particle_type>( NAME, TRAJ_SKIP, data_file_id, - FIELD->descriptor->comm) + VEL->descriptor->comm) { assert((INTEGRATION_STEPS <= 6) && (INTEGRATION_STEPS >= 1)); - this->vel = FIELD; + this->vel = VEL; this->integration_steps = INTEGRATION_STEPS; this->array_size = this->nparticles * state_dimension(particle_type); this->state = new double[this->array_size]; diff --git a/bfps/cpp/particles/abstract_particles_input.hpp b/bfps/cpp/particles/abstract_particles_input.hpp new file mode 100644 index 0000000000000000000000000000000000000000..77dcbc638903a668ce6e2a0084815832b0580495 --- /dev/null +++ b/bfps/cpp/particles/abstract_particles_input.hpp @@ -0,0 +1,21 @@ +#ifndef ABSTRACT_PARTICLES_INPUT_HPP +#define ABSTRACT_PARTICLES_INPUT_HPP + +#include <tuple> + +template <class partsize_t, class real_number> +class abstract_particles_input { +public: + virtual ~abstract_particles_input(){} + + virtual partsize_t getTotalNbParticles() = 0; + virtual partsize_t getLocalNbParticles() = 0; + virtual int getNbRhs() = 0; + + virtual std::unique_ptr<real_number[]> getMyParticles() = 0; + virtual std::unique_ptr<partsize_t[]> getMyParticlesIndexes() = 0; + virtual std::vector<std::unique_ptr<real_number[]>> getMyRhs() = 0; +}; + + +#endif diff --git a/bfps/cpp/particles/abstract_particles_output.hpp b/bfps/cpp/particles/abstract_particles_output.hpp new file mode 100644 index 0000000000000000000000000000000000000000..a6eccaea003618b8acbf1a9252c1e6c5bedb3378 --- /dev/null +++ b/bfps/cpp/particles/abstract_particles_output.hpp @@ -0,0 +1,270 @@ +#ifndef ABSTRACT_PARTICLES_OUTPUT +#define ABSTRACT_PARTICLES_OUTPUT + +#include <memory> +#include <vector> +#include <cassert> +#include <algorithm> +#include <cstddef> + +#include "base.hpp" +#include "particles_utils.hpp" +#include "alltoall_exchanger.hpp" +#include "scope_timer.hpp" +#include "env_utils.hpp" + +template <class partsize_t, class real_number, int size_particle_positions, int size_particle_rhs> +class abstract_particles_output { + MPI_Comm mpi_com; + MPI_Comm mpi_com_writer; + + int my_rank; + int nb_processes; + + const partsize_t total_nb_particles; + const int nb_rhs; + + std::unique_ptr<std::pair<partsize_t,partsize_t>[]> buffer_indexes_send; + std::unique_ptr<real_number[]> buffer_particles_positions_send; + std::vector<std::unique_ptr<real_number[]>> buffer_particles_rhs_send; + partsize_t size_buffers_send; + + std::unique_ptr<real_number[]> buffer_particles_positions_recv; + std::vector<std::unique_ptr<real_number[]>> buffer_particles_rhs_recv; + std::unique_ptr<partsize_t[]> buffer_indexes_recv; + partsize_t size_buffers_recv; + + int nb_processes_involved; + bool current_is_involved; + partsize_t particles_chunk_per_process; + partsize_t particles_chunk_current_size; + partsize_t particles_chunk_current_offset; + +protected: + MPI_Comm& getComWriter(){ + return mpi_com_writer; + } + + int getNbRhs() const { + return nb_rhs; + } + + int getMyRank(){ + return this->my_rank; + } + + bool isInvolved() const{ + return current_is_involved; + } + +public: + abstract_particles_output(MPI_Comm in_mpi_com, const partsize_t inTotalNbParticles, const int in_nb_rhs) throw() + : mpi_com(in_mpi_com), my_rank(-1), nb_processes(-1), + total_nb_particles(inTotalNbParticles), nb_rhs(in_nb_rhs), + buffer_particles_rhs_send(in_nb_rhs), size_buffers_send(-1), + buffer_particles_rhs_recv(in_nb_rhs), size_buffers_recv(-1), + nb_processes_involved(0), current_is_involved(true), particles_chunk_per_process(0), + particles_chunk_current_size(0), particles_chunk_current_offset(0) { + + AssertMpi(MPI_Comm_rank(mpi_com, &my_rank)); + AssertMpi(MPI_Comm_size(mpi_com, &nb_processes)); + + const size_t MinBytesPerProcess = env_utils::GetValue<size_t>("BFPS_PO_MIN_BYTES", 32 * 1024 * 1024); // Default 32MB + const size_t ChunkBytes = env_utils::GetValue<size_t>("BFPS_PO_CHUNK_BYTES", 8 * 1024 * 1024); // Default 8MB + const int MaxProcessesInvolved = std::min(nb_processes, env_utils::GetValue<int>("BFPS_PO_MAX_PROCESSES", 128)); + + // We split the processes using positions size only + const size_t totalBytesForPositions = total_nb_particles*size_particle_positions*sizeof(real_number); + + + if(MinBytesPerProcess*MaxProcessesInvolved < totalBytesForPositions){ + size_t extraChunkBytes = 1; + while((MinBytesPerProcess+extraChunkBytes*ChunkBytes)*MaxProcessesInvolved < totalBytesForPositions){ + extraChunkBytes += 1; + } + const size_t bytesPerProcess = (MinBytesPerProcess+extraChunkBytes*ChunkBytes); + particles_chunk_per_process = partsize_t((bytesPerProcess+sizeof(real_number)*size_particle_positions-1)/(sizeof(real_number)*size_particle_positions)); + nb_processes_involved = int((total_nb_particles+particles_chunk_per_process-1)/particles_chunk_per_process); + } + // else limit based on minBytesPerProcess + else{ + nb_processes_involved = std::max(1,std::min(MaxProcessesInvolved,int((totalBytesForPositions+MinBytesPerProcess-1)/MinBytesPerProcess))); + particles_chunk_per_process = partsize_t((MinBytesPerProcess+sizeof(real_number)*size_particle_positions-1)/(sizeof(real_number)*size_particle_positions)); + } + + // Print out + if(my_rank == 0){ + DEBUG_MSG("[INFO] Limit of processes involved in the particles ouput = %d (BFPS_PO_MAX_PROCESSES)\n", MaxProcessesInvolved); + DEBUG_MSG("[INFO] Minimum bytes per process to write = %llu (BFPS_PO_MIN_BYTES) for a complete output of = %llu for positions\n", MinBytesPerProcess, totalBytesForPositions); + DEBUG_MSG("[INFO] Consequently, there are %d processes that actually write data (%d particles per process)\n", nb_processes_involved, particles_chunk_per_process); + } + + if(my_rank < nb_processes_involved){ + current_is_involved = true; + particles_chunk_current_offset = my_rank*particles_chunk_per_process; + assert(particles_chunk_current_offset < total_nb_particles); + particles_chunk_current_size = std::min(particles_chunk_per_process, total_nb_particles-particles_chunk_current_offset); + assert(particles_chunk_current_offset + particles_chunk_current_size <= total_nb_particles); + assert(my_rank != nb_processes_involved-1 || particles_chunk_current_offset + particles_chunk_current_size == total_nb_particles); + } + else{ + current_is_involved = false; + particles_chunk_current_size = 0; + particles_chunk_current_offset = total_nb_particles; + } + + AssertMpi( MPI_Comm_split(mpi_com, + (current_is_involved ? 1 : MPI_UNDEFINED), + my_rank, &mpi_com_writer) ); + } + + virtual ~abstract_particles_output(){ + if(current_is_involved){ + AssertMpi( MPI_Comm_free(&mpi_com_writer) ); + } + } + + partsize_t getTotalNbParticles() const { + return total_nb_particles; + } + + void releaseMemory(){ + buffer_indexes_send.release(); + buffer_particles_positions_send.release(); + size_buffers_send = -1; + buffer_indexes_recv.release(); + buffer_particles_positions_recv.release(); + size_buffers_recv = -1; + for(int idx_rhs = 0 ; idx_rhs < nb_rhs ; ++idx_rhs){ + buffer_particles_rhs_send[idx_rhs].release(); + buffer_particles_rhs_recv[idx_rhs].release(); + } + } + + void save( + const real_number input_particles_positions[], + const std::unique_ptr<real_number[]> input_particles_rhs[], + const partsize_t index_particles[], + const partsize_t nb_particles, + const int idx_time_step){ + TIMEZONE("abstract_particles_output::save"); + assert(total_nb_particles != -1); + + { + TIMEZONE("sort-to-distribute"); + + if(size_buffers_send < nb_particles && nb_particles){ + buffer_indexes_send.reset(new std::pair<partsize_t,partsize_t>[nb_particles]); + buffer_particles_positions_send.reset(new real_number[nb_particles*size_particle_positions]); + for(int idx_rhs = 0 ; idx_rhs < nb_rhs ; ++idx_rhs){ + buffer_particles_rhs_send[idx_rhs].reset(new real_number[nb_particles*size_particle_rhs]); + } + size_buffers_send = nb_particles; + } + + for(partsize_t idx_part = 0 ; idx_part < nb_particles ; ++idx_part){ + buffer_indexes_send[idx_part].first = idx_part; + buffer_indexes_send[idx_part].second = index_particles[idx_part]; + } + + std::sort(&buffer_indexes_send[0], &buffer_indexes_send[nb_particles], [](const std::pair<partsize_t,partsize_t>& p1, + const std::pair<partsize_t,partsize_t>& p2){ + return p1.second < p2.second; + }); + + for(partsize_t idx_part = 0 ; idx_part < nb_particles ; ++idx_part){ + const partsize_t src_idx = buffer_indexes_send[idx_part].first; + const partsize_t dst_idx = idx_part; + + for(int idx_val = 0 ; idx_val < size_particle_positions ; ++idx_val){ + buffer_particles_positions_send[dst_idx*size_particle_positions + idx_val] + = input_particles_positions[src_idx*size_particle_positions + idx_val]; + } + for(int idx_rhs = 0 ; idx_rhs < nb_rhs ; ++idx_rhs){ + for(int idx_val = 0 ; idx_val < int(size_particle_rhs) ; ++idx_val){ + buffer_particles_rhs_send[idx_rhs][dst_idx*size_particle_rhs + idx_val] + = input_particles_rhs[idx_rhs][src_idx*size_particle_rhs + idx_val]; + } + } + } + } + + partsize_t* buffer_indexes_send_tmp = reinterpret_cast<partsize_t*>(buffer_indexes_send.get());// trick re-use buffer_indexes_send memory + std::vector<partsize_t> nb_particles_to_send(nb_processes, 0); + for(partsize_t idx_part = 0 ; idx_part < nb_particles ; ++idx_part){ + const int dest_proc = int(buffer_indexes_send[idx_part].second/particles_chunk_per_process); + assert(dest_proc < nb_processes_involved); + nb_particles_to_send[dest_proc] += 1; + buffer_indexes_send_tmp[idx_part] = buffer_indexes_send[idx_part].second; + } + + alltoall_exchanger exchanger(mpi_com, std::move(nb_particles_to_send)); + // nb_particles_to_send is invalid after here + + const int nb_to_receive = exchanger.getTotalToRecv(); + assert(nb_to_receive == particles_chunk_current_size); + + if(size_buffers_recv < nb_to_receive && nb_to_receive){ + buffer_indexes_recv.reset(new partsize_t[nb_to_receive]); + buffer_particles_positions_recv.reset(new real_number[nb_to_receive*size_particle_positions]); + for(int idx_rhs = 0 ; idx_rhs < nb_rhs ; ++idx_rhs){ + buffer_particles_rhs_recv[idx_rhs].reset(new real_number[nb_to_receive*size_particle_rhs]); + } + size_buffers_recv = nb_to_receive; + } + + { + TIMEZONE("exchange"); + // Could be done with multiple asynchronous coms + exchanger.alltoallv<partsize_t>(buffer_indexes_send_tmp, buffer_indexes_recv.get()); + exchanger.alltoallv<real_number>(buffer_particles_positions_send.get(), buffer_particles_positions_recv.get(), size_particle_positions); + for(int idx_rhs = 0 ; idx_rhs < nb_rhs ; ++idx_rhs){ + exchanger.alltoallv<real_number>(buffer_particles_rhs_send[idx_rhs].get(), buffer_particles_rhs_recv[idx_rhs].get(), size_particle_rhs); + } + } + + // Stop here if not involved + if(current_is_involved == false){ + assert(nb_to_receive == 0); + return; + } + + if(size_buffers_send < nb_to_receive && nb_to_receive){ + buffer_indexes_send.reset(new std::pair<partsize_t,partsize_t>[nb_to_receive]); + buffer_particles_positions_send.reset(new real_number[nb_to_receive*size_particle_positions]); + for(int idx_rhs = 0 ; idx_rhs < nb_rhs ; ++idx_rhs){ + buffer_particles_rhs_send[idx_rhs].reset(new real_number[nb_to_receive*size_particle_rhs]); + } + size_buffers_send = nb_to_receive; + } + + { + TIMEZONE("copy-local-order"); + for(partsize_t idx_part = 0 ; idx_part < nb_to_receive ; ++idx_part){ + const partsize_t src_idx = idx_part; + const partsize_t dst_idx = buffer_indexes_recv[idx_part]-particles_chunk_current_offset; + assert(0 <= dst_idx); + assert(dst_idx < particles_chunk_current_size); + + for(int idx_val = 0 ; idx_val < size_particle_positions ; ++idx_val){ + buffer_particles_positions_send[dst_idx*size_particle_positions + idx_val] + = buffer_particles_positions_recv[src_idx*size_particle_positions + idx_val]; + } + for(int idx_rhs = 0 ; idx_rhs < nb_rhs ; ++idx_rhs){ + for(int idx_val = 0 ; idx_val < int(size_particle_rhs) ; ++idx_val){ + buffer_particles_rhs_send[idx_rhs][dst_idx*size_particle_rhs + idx_val] + = buffer_particles_rhs_recv[idx_rhs][src_idx*size_particle_rhs + idx_val]; + } + } + } + } + + write(idx_time_step, buffer_particles_positions_send.get(), buffer_particles_rhs_send.data(), + nb_to_receive, particles_chunk_current_offset); + } + + virtual void write(const int idx_time_step, const real_number* positions, const std::unique_ptr<real_number[]>* rhs, + const partsize_t nb_particles, const partsize_t particles_idx_offset) = 0; +}; + +#endif diff --git a/bfps/cpp/particles/abstract_particles_system.hpp b/bfps/cpp/particles/abstract_particles_system.hpp new file mode 100644 index 0000000000000000000000000000000000000000..1c8592f37536e5c6c6b4df8f45cc855b3f21eb3f --- /dev/null +++ b/bfps/cpp/particles/abstract_particles_system.hpp @@ -0,0 +1,57 @@ +#ifndef ABSTRACT_PARTICLES_SYSTEM_HPP +#define ABSTRACT_PARTICLES_SYSTEM_HPP + +#include <memory> + +//- Not generic to enable sampling begin +#include "field.hpp" +#include "kspace.hpp" +//- Not generic to enable sampling end + + +template <class partsize_t, class real_number> +class abstract_particles_system { +public: + virtual void compute() = 0; + + virtual void move(const real_number dt) = 0; + + virtual void redistribute() = 0; + + virtual void inc_step_idx() = 0; + + virtual void shift_rhs_vectors() = 0; + + virtual void completeLoop(const real_number dt) = 0; + + virtual const real_number* getParticlesPositions() const = 0; + + virtual const std::unique_ptr<real_number[]>* getParticlesRhs() const = 0; + + virtual const partsize_t* getParticlesIndexes() const = 0; + + virtual partsize_t getLocalNbParticles() const = 0; + + virtual partsize_t getGlobalNbParticles() const = 0; + + virtual int getNbRhs() const = 0; + + virtual int get_step_idx() const = 0; + + //- Not generic to enable sampling begin + virtual void sample_compute_field(const field<float, FFTW, ONE>& sample_field, + real_number sample_rhs[]) = 0; + virtual void sample_compute_field(const field<float, FFTW, THREE>& sample_field, + real_number sample_rhs[]) = 0; + virtual void sample_compute_field(const field<float, FFTW, THREExTHREE>& sample_field, + real_number sample_rhs[]) = 0; + virtual void sample_compute_field(const field<double, FFTW, ONE>& sample_field, + real_number sample_rhs[]) = 0; + virtual void sample_compute_field(const field<double, FFTW, THREE>& sample_field, + real_number sample_rhs[]) = 0; + virtual void sample_compute_field(const field<double, FFTW, THREExTHREE>& sample_field, + real_number sample_rhs[]) = 0; + //- Not generic to enable sampling end +}; + +#endif diff --git a/bfps/cpp/particles/alltoall_exchanger.hpp b/bfps/cpp/particles/alltoall_exchanger.hpp new file mode 100644 index 0000000000000000000000000000000000000000..2beaf092e8e6c7a801efd492270d29c2d4dba398 --- /dev/null +++ b/bfps/cpp/particles/alltoall_exchanger.hpp @@ -0,0 +1,131 @@ +#ifndef ALLTOALL_EXCHANGER_HPP +#define ALLTOALL_EXCHANGER_HPP + +#include <mpi.h> +#include <cassert> + +#include "base.hpp" +#include "particles_utils.hpp" +#include "scope_timer.hpp" + +class alltoall_exchanger { + const MPI_Comm mpi_com; + + int my_rank; + int nb_processes; + + const std::vector<int> nb_items_to_send; + + std::vector<int> offset_items_to_send; + + std::vector<int> nb_items_to_sendrecv_all; + std::vector<int> nb_items_to_recv; + std::vector<int> offset_items_to_recv; + + int total_to_recv; + + template <class index_type> + static std::vector<int> ConvertVector(const std::vector<index_type>& vector){ + std::vector<int> resVector(vector.size()); + for(size_t idx = 0 ; idx < vector.size() ; ++idx){ + assert(vector[idx] <= std::numeric_limits<int>::max()); + resVector[idx] = int(vector[idx]); + } + return resVector; + } + +public: + template <class index_type> + alltoall_exchanger(const MPI_Comm& in_mpi_com, const std::vector<index_type>& in_nb_items_to_send) + : alltoall_exchanger(in_mpi_com, ConvertVector(in_nb_items_to_send)){ + + } + + alltoall_exchanger(const MPI_Comm& in_mpi_com, std::vector<int>/*no ref to move here*/ in_nb_items_to_send) + :mpi_com(in_mpi_com), nb_items_to_send(std::move(in_nb_items_to_send)), total_to_recv(0){ + TIMEZONE("alltoall_exchanger::constructor"); + + AssertMpi(MPI_Comm_rank(mpi_com, &my_rank)); + AssertMpi(MPI_Comm_size(mpi_com, &nb_processes)); + + assert(int(nb_items_to_send.size()) == nb_processes); + + offset_items_to_send.resize(nb_processes+1, 0); + for(int idx_proc = 0 ; idx_proc < nb_processes ; ++idx_proc){ + offset_items_to_send[idx_proc+1] = offset_items_to_send[idx_proc] + + nb_items_to_send[idx_proc]; + } + + nb_items_to_sendrecv_all.resize(nb_processes*nb_processes); + AssertMpi(MPI_Allgather(const_cast<int*>(nb_items_to_send.data()), nb_processes, MPI_INT, + nb_items_to_sendrecv_all.data(), nb_processes, MPI_INT, + mpi_com)); + + nb_items_to_recv.resize(nb_processes, 0); + offset_items_to_recv.resize(nb_processes+1, 0); + for(int idx_proc = 0 ; idx_proc < nb_processes ; ++idx_proc){ + const int nbrecv = nb_items_to_sendrecv_all[idx_proc*nb_processes + my_rank]; + assert(static_cast<long long int>(total_to_recv) + static_cast<long long int>(nbrecv) <= std::numeric_limits<int>::max()); + total_to_recv += nbrecv; + nb_items_to_recv[idx_proc] = nbrecv; + assert(static_cast<long long int>(nb_items_to_recv[idx_proc]) + static_cast<long long int>(offset_items_to_recv[idx_proc]) <= std::numeric_limits<int>::max()); + offset_items_to_recv[idx_proc+1] = nb_items_to_recv[idx_proc] + + offset_items_to_recv[idx_proc]; + } + } + + int getTotalToRecv() const{ + return total_to_recv; + } + + template <class ItemType> + void alltoallv_dt(const ItemType in_to_send[], + ItemType out_to_recv[], const MPI_Datatype& in_type) const { + TIMEZONE("alltoallv"); + AssertMpi(MPI_Alltoallv(const_cast<ItemType*>(in_to_send), const_cast<int*>(nb_items_to_send.data()), + const_cast<int*>(offset_items_to_send.data()), in_type, out_to_recv, + const_cast<int*>(nb_items_to_recv.data()), const_cast<int*>(offset_items_to_recv.data()), in_type, + mpi_com)); + } + + template <class ItemType> + void alltoallv(const ItemType in_to_send[], + ItemType out_to_recv[]) const { + alltoallv_dt<ItemType>(in_to_send, out_to_recv, particles_utils::GetMpiType(ItemType())); + } + + template <class ItemType> + void alltoallv_dt(const ItemType in_to_send[], + ItemType out_to_recv[], const MPI_Datatype& in_type, const int in_nb_values_per_item) const { + TIMEZONE("alltoallv"); + std::vector<int> nb_items_to_send_tmp = nb_items_to_send; + particles_utils::transform(nb_items_to_send_tmp.begin(), nb_items_to_send_tmp.end(), nb_items_to_send_tmp.begin(), + [&](const int val) -> int { assert(static_cast<long long int>(val) * static_cast<long long int>(in_nb_values_per_item) <= std::numeric_limits<int>::max()); + return val * in_nb_values_per_item ;}); + std::vector<int> offset_items_to_send_tmp = offset_items_to_send; + particles_utils::transform(offset_items_to_send_tmp.begin(), offset_items_to_send_tmp.end(), offset_items_to_send_tmp.begin(), + [&](const int val) -> int { assert(static_cast<long long int>(val) * static_cast<long long int>(in_nb_values_per_item) <= std::numeric_limits<int>::max()); + return val * in_nb_values_per_item ;}); + std::vector<int> nb_items_to_recv_tmp = nb_items_to_recv; + particles_utils::transform(nb_items_to_recv_tmp.begin(), nb_items_to_recv_tmp.end(), nb_items_to_recv_tmp.begin(), + [&](const int val) -> int { assert(static_cast<long long int>(val) * static_cast<long long int>(in_nb_values_per_item) <= std::numeric_limits<int>::max()); + return val * in_nb_values_per_item ;}); + std::vector<int> offset_items_to_recv_tmp = offset_items_to_recv; + particles_utils::transform(offset_items_to_recv_tmp.begin(), offset_items_to_recv_tmp.end(), offset_items_to_recv_tmp.begin(), + [&](const int val) -> int { assert(static_cast<long long int>(val) * static_cast<long long int>(in_nb_values_per_item) <= std::numeric_limits<int>::max()); + return val * in_nb_values_per_item ;}); + + AssertMpi(MPI_Alltoallv(const_cast<ItemType*>(in_to_send), const_cast<int*>(nb_items_to_send_tmp.data()), + const_cast<int*>(offset_items_to_send_tmp.data()), in_type, out_to_recv, + const_cast<int*>(nb_items_to_recv_tmp.data()), const_cast<int*>(offset_items_to_recv_tmp.data()), in_type, + mpi_com)); + } + + template <class ItemType> + void alltoallv(const ItemType in_to_send[], + ItemType out_to_recv[], const int in_nb_values_per_item) const { + alltoallv_dt<ItemType>(in_to_send, out_to_recv,particles_utils::GetMpiType(ItemType()), in_nb_values_per_item); + } +}; + +#endif diff --git a/bfps/cpp/particles/env_utils.hpp b/bfps/cpp/particles/env_utils.hpp new file mode 100644 index 0000000000000000000000000000000000000000..cd6fb3026ac19397fb525235f3d4f87e2cc2bb94 --- /dev/null +++ b/bfps/cpp/particles/env_utils.hpp @@ -0,0 +1,82 @@ +#ifndef ENV_UTILS_HPP +#define ENV_UTILS_HPP + + +#include <cstdlib> +#include <sstream> +#include <iostream> +#include <cstring> +#include <cstring> +#include <array> + + +class env_utils { + template <class VariableType> + static const VariableType StrToOther(const char* const str, const VariableType& defaultValue = VariableType()){ + std::istringstream iss(str,std::istringstream::in); + VariableType value; + iss >> value; + if( /*iss.tellg()*/ iss.eof() ) return value; + return defaultValue; + } + +public: + static bool VariableIsDefine(const char inVarName[]){ + return getenv(inVarName) != 0; + } + + template <class VariableType> + static const VariableType GetValue(const char inVarName[], const VariableType defaultValue = VariableType()){ + const char*const value = getenv(inVarName); + if(!value){ + return defaultValue; + } + return StrToOther(value,defaultValue); + } + + static bool GetBool(const char inVarName[], const bool defaultValue = false){ + const char*const value = getenv(inVarName); + if(!value){ + return defaultValue; + } + return (strcmp(value,"TRUE") == 0) || (strcmp(value,"true") == 0) || (strcmp(value,"1") == 0); + } + + static const char* GetStr(const char inVarName[], const char* const defaultValue = 0){ + const char*const value = getenv(inVarName); + if(!value){ + return defaultValue; + } + return value; + } + + template <class VariableType, class ArrayType> + static int GetValueInArray(const char inVarName[], const ArrayType& possibleValues, const int nbPossibleValues, const int defaultIndex = -1){ + const char*const value = getenv(inVarName); + if(value){ + for(int idxPossible = 0 ; idxPossible < nbPossibleValues ; ++idxPossible){ + if( StrToOther(value,VariableType()) == possibleValues[idxPossible] ){ + return idxPossible; + } + } + } + return defaultIndex; + } + + + template <class ArrayType> + static int GetStrInArray(const char inVarName[], const ArrayType& possibleValues, const int nbPossibleValues, const int defaultIndex = -1){ + const char*const value = getenv(inVarName); + if(value){ + for(int idxPossible = 0 ; idxPossible < nbPossibleValues ; ++idxPossible){ + if( strcmp(value,possibleValues[idxPossible]) == 0 ){ + return idxPossible; + } + } + } + return defaultIndex; + } +}; + +#endif + diff --git a/bfps/cpp/particles/particles_adams_bashforth.hpp b/bfps/cpp/particles/particles_adams_bashforth.hpp new file mode 100644 index 0000000000000000000000000000000000000000..2fb61462f7970d823acd6dc3405799e362fa15af --- /dev/null +++ b/bfps/cpp/particles/particles_adams_bashforth.hpp @@ -0,0 +1,138 @@ +#ifndef PARTICLES_ADAMS_BASHFORTH_HPP +#define PARTICLES_ADAMS_BASHFORTH_HPP + +#include <stdexcept> +#include <omp.h> + +#include "scope_timer.hpp" +#include "particles_utils.hpp" + +template <class partsize_t, class real_number, int size_particle_positions = 3, int size_particle_rhs = 3> +class particles_adams_bashforth { + static_assert(size_particle_positions == size_particle_rhs, + "Not having the same dimension for positions and rhs looks like a bug," + "otherwise comment this assertion."); +public: + static const int Max_steps = 6; + + void move_particles(real_number*__restrict__ particles_positions, + const partsize_t nb_particles, + const std::unique_ptr<real_number[]> particles_rhs[], + const int nb_rhs, const real_number dt) const{ + TIMEZONE("particles_adams_bashforth::move_particles"); + + if(Max_steps < nb_rhs){ + throw std::runtime_error("Error, in bfps particles_adams_bashforth.\n" + "Step in particles_adams_bashforth is too large," + "you must add formulation up this number or limit the number of steps."); + } + + // Not needed: TIMEZONE_OMP_INIT_PREPARALLEL(omp_get_max_threads()) +#pragma omp parallel default(shared) + { + particles_utils::IntervalSplitter<partsize_t> interval(nb_particles, + omp_get_num_threads(), + omp_get_thread_num()); + + const partsize_t value_start = interval.getMyOffset()*size_particle_positions; + const partsize_t value_end = (interval.getMyOffset()+interval.getMySize())*size_particle_positions; + + // TODO full unroll + blocking + switch (nb_rhs){ + case 1: + { + const real_number* __restrict__ rhs_0 = particles_rhs[0].get(); + for(partsize_t idx_value = value_start ; idx_value < value_end ; ++idx_value){ + // dt × [0] + particles_positions[idx_value] += dt * rhs_0[idx_value]; + } + } + break; + case 2: + { + const real_number* __restrict__ rhs_0 = particles_rhs[0].get(); + const real_number* __restrict__ rhs_1 = particles_rhs[1].get(); + for(partsize_t idx_value = value_start ; idx_value < value_end ; ++idx_value){ + // dt × (3[0] - [1])/2 + particles_positions[idx_value] + += dt * (3.*rhs_0[idx_value] + - rhs_1[idx_value])/2.; + } + } + break; + case 3: + { + const real_number* __restrict__ rhs_0 = particles_rhs[0].get(); + const real_number* __restrict__ rhs_1 = particles_rhs[1].get(); + const real_number* __restrict__ rhs_2 = particles_rhs[2].get(); + for(partsize_t idx_value = value_start ; idx_value < value_end ; ++idx_value){ + // dt × (23[0] - 16[1] + 5[2])/12 + particles_positions[idx_value] + += dt * (23.*rhs_0[idx_value] + - 16.*rhs_1[idx_value] + + 5.*rhs_2[idx_value])/12.; + } + } + break; + case 4: + { + const real_number* __restrict__ rhs_0 = particles_rhs[0].get(); + const real_number* __restrict__ rhs_1 = particles_rhs[1].get(); + const real_number* __restrict__ rhs_2 = particles_rhs[2].get(); + const real_number* __restrict__ rhs_3 = particles_rhs[3].get(); + for(partsize_t idx_value = value_start ; idx_value < value_end ; ++idx_value){ + // dt × (55[0] - 59[1] + 37[2] - 9[3])/24 + particles_positions[idx_value] + += dt * (55.*rhs_0[idx_value] + - 59.*rhs_1[idx_value] + + 37.*rhs_2[idx_value] + - 9.*rhs_3[idx_value])/24.; + } + } + break; + case 5: + { + const real_number* __restrict__ rhs_0 = particles_rhs[0].get(); + const real_number* __restrict__ rhs_1 = particles_rhs[1].get(); + const real_number* __restrict__ rhs_2 = particles_rhs[2].get(); + const real_number* __restrict__ rhs_3 = particles_rhs[3].get(); + const real_number* __restrict__ rhs_4 = particles_rhs[4].get(); + for(partsize_t idx_value = value_start ; idx_value < value_end ; ++idx_value){ + // dt × (1901[0] - 2774[1] + 2616[2] - 1274[3] + 251[4])/720 + particles_positions[idx_value] + += dt * (1901.*rhs_0[idx_value] + - 2774.*rhs_1[idx_value] + + 2616.*rhs_2[idx_value] + - 1274.*rhs_3[idx_value] + + 251.*rhs_4[idx_value])/720.; + } + } + break; + case 6: + { + const real_number* __restrict__ rhs_0 = particles_rhs[0].get(); + const real_number* __restrict__ rhs_1 = particles_rhs[1].get(); + const real_number* __restrict__ rhs_2 = particles_rhs[2].get(); + const real_number* __restrict__ rhs_3 = particles_rhs[3].get(); + const real_number* __restrict__ rhs_4 = particles_rhs[4].get(); + const real_number* __restrict__ rhs_5 = particles_rhs[5].get(); + for(partsize_t idx_value = value_start ; idx_value < value_end ; ++idx_value){ + // dt × (4277[0] - 7923[1] + 9982[2] - 7298[3] + 2877[4] - 475[5])/1440 + particles_positions[idx_value] + += dt * (4277.*rhs_0[idx_value] + - 7923.*rhs_1[idx_value] + + 9982.*rhs_2[idx_value] + - 7298.*rhs_3[idx_value] + + 2877.*rhs_4[idx_value] + - 475.*rhs_5[idx_value])/1440.; + } + } + break; + } + } + } +}; + + + +#endif diff --git a/bfps/cpp/particles/particles_distr_mpi.hpp b/bfps/cpp/particles/particles_distr_mpi.hpp new file mode 100644 index 0000000000000000000000000000000000000000..485595181f69b9fe1cf204b06df550a9ca74215d --- /dev/null +++ b/bfps/cpp/particles/particles_distr_mpi.hpp @@ -0,0 +1,838 @@ +#ifndef PARTICLES_DISTR_MPI_HPP +#define PARTICLES_DISTR_MPI_HPP + +#include <mpi.h> + +#include <vector> +#include <memory> +#include <cassert> + +#include <type_traits> +#include <omp.h> + +#include "scope_timer.hpp" +#include "particles_utils.hpp" + + +template <class partsize_t, class real_number> +class particles_distr_mpi { +protected: + static const int MaxNbRhs = 100; + + enum MpiTag{ + TAG_LOW_UP_NB_PARTICLES, + TAG_UP_LOW_NB_PARTICLES, + TAG_LOW_UP_PARTICLES, + TAG_UP_LOW_PARTICLES, + TAG_LOW_UP_RESULTS, + TAG_UP_LOW_RESULTS, + + TAG_LOW_UP_MOVED_NB_PARTICLES, + TAG_UP_LOW_MOVED_NB_PARTICLES, + TAG_LOW_UP_MOVED_PARTICLES, + TAG_UP_LOW_MOVED_PARTICLES, + + TAG_LOW_UP_MOVED_PARTICLES_INDEXES, + TAG_UP_LOW_MOVED_PARTICLES_INDEXES, + + TAG_LOW_UP_MOVED_PARTICLES_RHS, + TAG_LOW_UP_MOVED_PARTICLES_RHS_MAX = TAG_LOW_UP_MOVED_PARTICLES_RHS+MaxNbRhs, + + TAG_UP_LOW_MOVED_PARTICLES_RHS = TAG_LOW_UP_MOVED_PARTICLES_RHS_MAX, + TAG_UP_LOW_MOVED_PARTICLES_RHS_MAX = TAG_UP_LOW_MOVED_PARTICLES_RHS+MaxNbRhs, + }; + + struct NeighborDescriptor{ + int nbPartitionsToSend; + int nbPartitionsToRecv; + partsize_t nbParticlesToSend; + partsize_t nbParticlesToRecv; + int destProc; + int rankDiff; + bool isLower; + int idxLowerUpper; + + std::unique_ptr<real_number[]> toRecvAndMerge; + std::unique_ptr<real_number[]> toCompute; + std::unique_ptr<real_number[]> results; + }; + + enum Action{ + NOTHING_TODO, + RECV_PARTICLES, + COMPUTE_PARTICLES, + RELEASE_BUFFER_PARTICLES, + MERGE_PARTICLES, + + RECV_MOVE_NB_LOW, + RECV_MOVE_NB_UP, + RECV_MOVE_LOW, + RECV_MOVE_UP + }; + + MPI_Comm current_com; + + int my_rank; + int nb_processes; + int nb_processes_involved; + + const std::pair<int,int> current_partition_interval; + const int current_partition_size; + const std::array<size_t,3> field_grid_dim; + + std::unique_ptr<int[]> partition_interval_size_per_proc; + std::unique_ptr<int[]> partition_interval_offset_per_proc; + + std::unique_ptr<partsize_t[]> current_offset_particles_for_partition; + + std::vector<std::pair<Action,int>> whatNext; + std::vector<MPI_Request> mpiRequests; + std::vector<NeighborDescriptor> neigDescriptors; + +public: + //////////////////////////////////////////////////////////////////////////// + + particles_distr_mpi(MPI_Comm in_current_com, + const std::pair<int,int>& in_current_partitions, + const std::array<size_t,3>& in_field_grid_dim) + : current_com(in_current_com), + my_rank(-1), nb_processes(-1),nb_processes_involved(-1), + current_partition_interval(in_current_partitions), + current_partition_size(current_partition_interval.second-current_partition_interval.first), + field_grid_dim(in_field_grid_dim){ + + AssertMpi(MPI_Comm_rank(current_com, &my_rank)); + AssertMpi(MPI_Comm_size(current_com, &nb_processes)); + + partition_interval_size_per_proc.reset(new int[nb_processes]); + AssertMpi( MPI_Allgather( const_cast<int*>(¤t_partition_size), 1, MPI_INT, + partition_interval_size_per_proc.get(), 1, MPI_INT, + current_com) ); + assert(partition_interval_size_per_proc[my_rank] == current_partition_size); + + partition_interval_offset_per_proc.reset(new int[nb_processes+1]); + partition_interval_offset_per_proc[0] = 0; + for(int idxProc = 0 ; idxProc < nb_processes ; ++idxProc){ + partition_interval_offset_per_proc[idxProc+1] = partition_interval_offset_per_proc[idxProc] + partition_interval_size_per_proc[idxProc]; + } + + current_offset_particles_for_partition.reset(new partsize_t[current_partition_size+1]); + + nb_processes_involved = nb_processes; + while(nb_processes_involved != 0 && partition_interval_size_per_proc[nb_processes_involved-1] == 0){ + nb_processes_involved -= 1; + } + assert(nb_processes_involved != 0); + for(int idx_proc_involved = 0 ; idx_proc_involved < nb_processes_involved ; ++idx_proc_involved){ + assert(partition_interval_size_per_proc[idx_proc_involved] != 0); + } + + assert(int(field_grid_dim[IDX_Z]) == partition_interval_offset_per_proc[nb_processes_involved]); + } + + virtual ~particles_distr_mpi(){} + + //////////////////////////////////////////////////////////////////////////// + + template <class computer_class, class field_class, int size_particle_positions, int size_particle_rhs> + void compute_distr(computer_class& in_computer, + field_class& in_field, + const partsize_t current_my_nb_particles_per_partition[], + const real_number particles_positions[], + real_number particles_current_rhs[], + const int interpolation_size){ + TIMEZONE("compute_distr"); + + // Some processes might not be involved + if(nb_processes_involved <= my_rank){ + return; + } + + current_offset_particles_for_partition[0] = 0; + partsize_t myTotalNbParticles = 0; + for(int idxPartition = 0 ; idxPartition < current_partition_size ; ++idxPartition){ + myTotalNbParticles += current_my_nb_particles_per_partition[idxPartition]; + current_offset_particles_for_partition[idxPartition+1] = current_offset_particles_for_partition[idxPartition] + current_my_nb_particles_per_partition[idxPartition]; + } + + ////////////////////////////////////////////////////////////////////// + /// Exchange the number of particles in each partition + /// Could involve only here but I do not think it will be a problem + ////////////////////////////////////////////////////////////////////// + + + assert(whatNext.size() == 0); + assert(mpiRequests.size() == 0); + + neigDescriptors.clear(); + + int nbProcToRecvLower; + { + int nextDestProc = my_rank; + for(int idxLower = 1 ; idxLower <= interpolation_size+1 ; idxLower += partition_interval_size_per_proc[nextDestProc]){ + nextDestProc = (nextDestProc-1+nb_processes_involved)%nb_processes_involved; + if(nextDestProc == my_rank){ + // We are back on our process + break; + } + + const int destProc = nextDestProc; + const int lowerRankDiff = (nextDestProc < my_rank ? my_rank - nextDestProc : nb_processes_involved-nextDestProc+my_rank); + + const int nbPartitionsToSend = std::min(current_partition_size, interpolation_size-(idxLower-1)); + assert(nbPartitionsToSend >= 0); + const partsize_t nbParticlesToSend = current_offset_particles_for_partition[nbPartitionsToSend] - current_offset_particles_for_partition[0]; + + const int nbPartitionsToRecv = std::min(partition_interval_size_per_proc[destProc], (interpolation_size+1)-(idxLower-1)); + assert(nbPartitionsToRecv > 0); + const partsize_t nbParticlesToRecv = -1; + + NeighborDescriptor descriptor; + descriptor.destProc = destProc; + descriptor.rankDiff = lowerRankDiff; + descriptor.nbPartitionsToSend = nbPartitionsToSend; + descriptor.nbParticlesToSend = nbParticlesToSend; + descriptor.nbPartitionsToRecv = nbPartitionsToRecv; + descriptor.nbParticlesToRecv = nbParticlesToRecv; + descriptor.isLower = true; + descriptor.idxLowerUpper = idxLower; + + neigDescriptors.emplace_back(std::move(descriptor)); + } + nbProcToRecvLower = int(neigDescriptors.size()); + + nextDestProc = my_rank; + for(int idxUpper = 1 ; idxUpper <= interpolation_size+1 ; idxUpper += partition_interval_size_per_proc[nextDestProc]){ + nextDestProc = (nextDestProc+1+nb_processes_involved)%nb_processes_involved; + if(nextDestProc == my_rank){ + // We are back on our process + break; + } + + const int destProc = nextDestProc; + const int upperRankDiff = (nextDestProc > my_rank ? nextDestProc - my_rank: nb_processes_involved-my_rank+nextDestProc); + + const int nbPartitionsToSend = std::min(current_partition_size, (interpolation_size+1)-(idxUpper-1)); + assert(nbPartitionsToSend > 0); + const partsize_t nbParticlesToSend = current_offset_particles_for_partition[current_partition_size] - current_offset_particles_for_partition[current_partition_size-nbPartitionsToSend]; + + const int nbPartitionsToRecv = std::min(partition_interval_size_per_proc[destProc], interpolation_size-(idxUpper-1)); + assert(nbPartitionsToSend >= 0); + const partsize_t nbParticlesToRecv = -1; + + NeighborDescriptor descriptor; + descriptor.destProc = destProc; + descriptor.rankDiff = upperRankDiff; + descriptor.nbPartitionsToSend = nbPartitionsToSend; + descriptor.nbParticlesToSend = nbParticlesToSend; + descriptor.nbPartitionsToRecv = nbPartitionsToRecv; + descriptor.nbParticlesToRecv = nbParticlesToRecv; + descriptor.isLower = false; + descriptor.idxLowerUpper = idxUpper; + + neigDescriptors.emplace_back(std::move(descriptor)); + } + } + const int nbProcToRecvUpper = int(neigDescriptors.size())-nbProcToRecvLower; + const int nbProcToRecv = nbProcToRecvUpper + nbProcToRecvLower; + assert(int(neigDescriptors.size()) == nbProcToRecv); + + for(int idxDescr = 0 ; idxDescr < int(neigDescriptors.size()) ; ++idxDescr){ + NeighborDescriptor& descriptor = neigDescriptors[idxDescr]; + + if(descriptor.isLower){ + if(descriptor.nbPartitionsToSend > 0){ + whatNext.emplace_back(std::pair<Action,int>{NOTHING_TODO, -1}); + mpiRequests.emplace_back(); + AssertMpi(MPI_Isend(const_cast<partsize_t*>(&descriptor.nbParticlesToSend), 1, particles_utils::GetMpiType(partsize_t()), + descriptor.destProc, TAG_LOW_UP_NB_PARTICLES, + current_com, &mpiRequests.back())); + + if(descriptor.nbParticlesToSend){ + whatNext.emplace_back(std::pair<Action,int>{NOTHING_TODO, -1}); + mpiRequests.emplace_back(); + assert(descriptor.nbParticlesToSend*size_particle_positions < std::numeric_limits<int>::max()); + AssertMpi(MPI_Isend(const_cast<real_number*>(&particles_positions[0]), int(descriptor.nbParticlesToSend*size_particle_positions), particles_utils::GetMpiType(real_number()), descriptor.destProc, TAG_LOW_UP_PARTICLES, + current_com, &mpiRequests.back())); + + assert(descriptor.toRecvAndMerge == nullptr); + descriptor.toRecvAndMerge.reset(new real_number[descriptor.nbParticlesToSend*size_particle_rhs]); + whatNext.emplace_back(std::pair<Action,int>{MERGE_PARTICLES, idxDescr}); + mpiRequests.emplace_back(); + assert(descriptor.nbParticlesToSend*size_particle_rhs < std::numeric_limits<int>::max()); + AssertMpi(MPI_Irecv(descriptor.toRecvAndMerge.get(), int(descriptor.nbParticlesToSend*size_particle_rhs), particles_utils::GetMpiType(real_number()), descriptor.destProc, TAG_UP_LOW_RESULTS, + current_com, &mpiRequests.back())); + } + } + + assert(descriptor.nbPartitionsToRecv); + whatNext.emplace_back(std::pair<Action,int>{RECV_PARTICLES, idxDescr}); + mpiRequests.emplace_back(); + AssertMpi(MPI_Irecv(&descriptor.nbParticlesToRecv, + 1, particles_utils::GetMpiType(partsize_t()), descriptor.destProc, TAG_UP_LOW_NB_PARTICLES, + current_com, &mpiRequests.back())); + } + else{ + assert(descriptor.nbPartitionsToSend); + whatNext.emplace_back(std::pair<Action,int>{NOTHING_TODO, -1}); + mpiRequests.emplace_back(); + AssertMpi(MPI_Isend(const_cast<partsize_t*>(&descriptor.nbParticlesToSend), 1, particles_utils::GetMpiType(partsize_t()), + descriptor.destProc, TAG_UP_LOW_NB_PARTICLES, + current_com, &mpiRequests.back())); + + if(descriptor.nbParticlesToSend){ + whatNext.emplace_back(std::pair<Action,int>{NOTHING_TODO, -1}); + mpiRequests.emplace_back(); + assert(descriptor.nbParticlesToSend*size_particle_positions < std::numeric_limits<int>::max()); + AssertMpi(MPI_Isend(const_cast<real_number*>(&particles_positions[(current_offset_particles_for_partition[current_partition_size-descriptor.nbPartitionsToSend])*size_particle_positions]), + int(descriptor.nbParticlesToSend*size_particle_positions), particles_utils::GetMpiType(real_number()), + descriptor.destProc, TAG_UP_LOW_PARTICLES, + current_com, &mpiRequests.back())); + + assert(descriptor.toRecvAndMerge == nullptr); + descriptor.toRecvAndMerge.reset(new real_number[descriptor.nbParticlesToSend*size_particle_rhs]); + whatNext.emplace_back(std::pair<Action,int>{MERGE_PARTICLES, idxDescr}); + mpiRequests.emplace_back(); + assert(descriptor.nbParticlesToSend*size_particle_rhs < std::numeric_limits<int>::max()); + AssertMpi(MPI_Irecv(descriptor.toRecvAndMerge.get(), int(descriptor.nbParticlesToSend*size_particle_rhs), particles_utils::GetMpiType(real_number()), descriptor.destProc, TAG_LOW_UP_RESULTS, + current_com, &mpiRequests.back())); + } + + if(descriptor.nbPartitionsToRecv){ + whatNext.emplace_back(std::pair<Action,int>{RECV_PARTICLES, idxDescr}); + mpiRequests.emplace_back(); + AssertMpi(MPI_Irecv(&descriptor.nbParticlesToRecv, + 1, particles_utils::GetMpiType(partsize_t()), descriptor.destProc, TAG_LOW_UP_NB_PARTICLES, + current_com, &mpiRequests.back())); + } + } + } + + const bool more_than_one_thread = (omp_get_max_threads() > 1); + + TIMEZONE_OMP_INIT_PREPARALLEL(omp_get_max_threads()) + #pragma omp parallel default(shared) + { + #pragma omp master + { + while(mpiRequests.size()){ + assert(mpiRequests.size() == whatNext.size()); + + int idxDone = int(mpiRequests.size()); + { + TIMEZONE("wait"); + AssertMpi(MPI_Waitany(int(mpiRequests.size()), mpiRequests.data(), &idxDone, MPI_STATUSES_IGNORE)); + } + const std::pair<Action, int> releasedAction = whatNext[idxDone]; + std::swap(mpiRequests[idxDone], mpiRequests[mpiRequests.size()-1]); + std::swap(whatNext[idxDone], whatNext[mpiRequests.size()-1]); + mpiRequests.pop_back(); + whatNext.pop_back(); + + ////////////////////////////////////////////////////////////////////// + /// Data to exchange particles + ////////////////////////////////////////////////////////////////////// + if(releasedAction.first == RECV_PARTICLES){ + NeighborDescriptor& descriptor = neigDescriptors[releasedAction.second]; + + if(descriptor.isLower){ + //const int idxLower = descriptor.idxLowerUpper; + const int destProc = descriptor.destProc; + //const int nbPartitionsToRecv = descriptor.nbPartitionsToRecv; + const partsize_t NbParticlesToReceive = descriptor.nbParticlesToRecv; + assert(NbParticlesToReceive != -1); + assert(descriptor.toCompute == nullptr); + if(NbParticlesToReceive){ + descriptor.toCompute.reset(new real_number[NbParticlesToReceive*size_particle_positions]); + whatNext.emplace_back(std::pair<Action,int>{COMPUTE_PARTICLES, releasedAction.second}); + mpiRequests.emplace_back(); + assert(NbParticlesToReceive*size_particle_positions < std::numeric_limits<int>::max()); + AssertMpi(MPI_Irecv(descriptor.toCompute.get(), int(NbParticlesToReceive*size_particle_positions), + particles_utils::GetMpiType(real_number()), destProc, TAG_UP_LOW_PARTICLES, + current_com, &mpiRequests.back())); + } + } + else{ + //const int idxUpper = descriptor.idxLowerUpper; + const int destProc = descriptor.destProc; + //const int nbPartitionsToRecv = descriptor.nbPartitionsToRecv; + const partsize_t NbParticlesToReceive = descriptor.nbParticlesToRecv; + assert(NbParticlesToReceive != -1); + assert(descriptor.toCompute == nullptr); + if(NbParticlesToReceive){ + descriptor.toCompute.reset(new real_number[NbParticlesToReceive*size_particle_positions]); + whatNext.emplace_back(std::pair<Action,int>{COMPUTE_PARTICLES, releasedAction.second}); + mpiRequests.emplace_back(); + assert(NbParticlesToReceive*size_particle_positions < std::numeric_limits<int>::max()); + AssertMpi(MPI_Irecv(descriptor.toCompute.get(), int(NbParticlesToReceive*size_particle_positions), + particles_utils::GetMpiType(real_number()), destProc, TAG_LOW_UP_PARTICLES, + current_com, &mpiRequests.back())); + } + } + } + + ////////////////////////////////////////////////////////////////////// + /// Computation + ////////////////////////////////////////////////////////////////////// + if(releasedAction.first == COMPUTE_PARTICLES){ + NeighborDescriptor& descriptor = neigDescriptors[releasedAction.second]; + const partsize_t NbParticlesToReceive = descriptor.nbParticlesToRecv; + + assert(descriptor.toCompute != nullptr); + descriptor.results.reset(new real_number[NbParticlesToReceive*size_particle_rhs]); + in_computer.template init_result_array<size_particle_rhs>(descriptor.results.get(), NbParticlesToReceive); + + if(more_than_one_thread == false){ + in_computer.template apply_computation<field_class, size_particle_rhs>(in_field, descriptor.toCompute.get(), descriptor.results.get(), NbParticlesToReceive); + } + else{ + TIMEZONE_OMP_INIT_PRETASK(timeZoneTaskKey) + NeighborDescriptor* ptr_descriptor = &descriptor; + #pragma omp taskgroup + { + for(partsize_t idxPart = 0 ; idxPart < NbParticlesToReceive ; idxPart += 300){ + const partsize_t sizeToDo = std::min(partsize_t(300), NbParticlesToReceive-idxPart); + #pragma omp task default(shared) firstprivate(ptr_descriptor, idxPart, sizeToDo) priority(10) \ + TIMEZONE_OMP_PRAGMA_TASK_KEY(timeZoneTaskKey) + { + TIMEZONE_OMP_TASK("in_computer.apply_computation", timeZoneTaskKey); + in_computer.template apply_computation<field_class, size_particle_rhs>(in_field, &ptr_descriptor->toCompute[idxPart*size_particle_positions], + &ptr_descriptor->results[idxPart*size_particle_rhs], sizeToDo); + } + } + } + } + + const int destProc = descriptor.destProc; + whatNext.emplace_back(std::pair<Action,int>{RELEASE_BUFFER_PARTICLES, releasedAction.second}); + mpiRequests.emplace_back(); + const int tag = descriptor.isLower? TAG_LOW_UP_RESULTS : TAG_UP_LOW_RESULTS; + assert(NbParticlesToReceive*size_particle_rhs < std::numeric_limits<int>::max()); + AssertMpi(MPI_Isend(descriptor.results.get(), int(NbParticlesToReceive*size_particle_rhs), particles_utils::GetMpiType(real_number()), destProc, tag, + current_com, &mpiRequests.back())); + } + ////////////////////////////////////////////////////////////////////// + /// Computation + ////////////////////////////////////////////////////////////////////// + if(releasedAction.first == RELEASE_BUFFER_PARTICLES){ + NeighborDescriptor& descriptor = neigDescriptors[releasedAction.second]; + assert(descriptor.toCompute != nullptr); + descriptor.toCompute.release(); + } + ////////////////////////////////////////////////////////////////////// + /// Merge + ////////////////////////////////////////////////////////////////////// + if(releasedAction.first == MERGE_PARTICLES && more_than_one_thread == false){ + NeighborDescriptor& descriptor = neigDescriptors[releasedAction.second]; + + if(descriptor.isLower){ + TIMEZONE("reduce"); + assert(descriptor.toRecvAndMerge != nullptr); + in_computer.template reduce_particles_rhs<size_particle_rhs>(&particles_current_rhs[0], descriptor.toRecvAndMerge.get(), descriptor.nbParticlesToSend); + descriptor.toRecvAndMerge.release(); + } + else { + TIMEZONE("reduce"); + assert(descriptor.toRecvAndMerge != nullptr); + in_computer.template reduce_particles_rhs<size_particle_rhs>(&particles_current_rhs[(current_offset_particles_for_partition[current_partition_size]-descriptor.nbParticlesToSend)*size_particle_rhs], + descriptor.toRecvAndMerge.get(), descriptor.nbParticlesToSend); + descriptor.toRecvAndMerge.release(); + } + } + } + } + if(more_than_one_thread && omp_get_thread_num() == 1){ + TIMEZONE_OMP_INIT_PRETASK(timeZoneTaskKey) + #pragma omp taskgroup + { + // Do for all partitions except the first and last one + for(int idxPartition = 0 ; idxPartition < current_partition_size ; ++idxPartition){ + for(partsize_t idxPart = current_offset_particles_for_partition[idxPartition] ; + idxPart < current_offset_particles_for_partition[idxPartition+1] ; idxPart += 300){ + + const partsize_t sizeToDo = std::min(partsize_t(300), current_offset_particles_for_partition[idxPartition+1]-idxPart); + + // Low priority to help master thread when possible + #pragma omp task default(shared) firstprivate(idxPart, sizeToDo) priority(0) TIMEZONE_OMP_PRAGMA_TASK_KEY(timeZoneTaskKey) + { + TIMEZONE_OMP_TASK("in_computer.apply_computation", timeZoneTaskKey); + in_computer.template apply_computation<field_class, size_particle_rhs>(in_field, &particles_positions[idxPart*size_particle_positions], + &particles_current_rhs[idxPart*size_particle_rhs], + sizeToDo); + } + } + } + } + } + } + + if(more_than_one_thread == true){ + for(int idxDescr = 0 ; idxDescr < int(neigDescriptors.size()) ; ++idxDescr){ + NeighborDescriptor& descriptor = neigDescriptors[idxDescr]; + if(descriptor.nbParticlesToSend){ + if(descriptor.isLower){ + TIMEZONE("reduce_later"); + assert(descriptor.toRecvAndMerge != nullptr); + in_computer.template reduce_particles_rhs<size_particle_rhs>(&particles_current_rhs[0], descriptor.toRecvAndMerge.get(), descriptor.nbParticlesToSend); + descriptor.toRecvAndMerge.release(); + } + else { + TIMEZONE("reduce_later"); + assert(descriptor.toRecvAndMerge != nullptr); + in_computer.template reduce_particles_rhs<size_particle_rhs>(&particles_current_rhs[(current_offset_particles_for_partition[current_partition_size]-descriptor.nbParticlesToSend)*size_particle_rhs], + descriptor.toRecvAndMerge.get(), descriptor.nbParticlesToSend); + descriptor.toRecvAndMerge.release(); + } + } + } + } + + // Do my own computation if not threaded + if(more_than_one_thread == false){ + TIMEZONE("compute-my_compute"); + // Compute my particles + if(myTotalNbParticles){ + in_computer.template apply_computation<field_class, size_particle_rhs>(in_field, particles_positions, particles_current_rhs, myTotalNbParticles); + } + } + + assert(whatNext.size() == 0); + assert(mpiRequests.size() == 0); + } + + + //////////////////////////////////////////////////////////////////////////// + + template <class computer_class, int size_particle_positions, int size_particle_rhs, int size_particle_index> + void redistribute(computer_class& in_computer, + partsize_t current_my_nb_particles_per_partition[], + partsize_t* nb_particles, + std::unique_ptr<real_number[]>* inout_positions_particles, + std::unique_ptr<real_number[]> inout_rhs_particles[], const int in_nb_rhs, + std::unique_ptr<partsize_t[]>* inout_index_particles){ + TIMEZONE("redistribute"); + + // Some latest processes might not be involved + if(nb_processes_involved <= my_rank){ + return; + } + + current_offset_particles_for_partition[0] = 0; + partsize_t myTotalNbParticles = 0; + for(int idxPartition = 0 ; idxPartition < current_partition_size ; ++idxPartition){ + myTotalNbParticles += current_my_nb_particles_per_partition[idxPartition]; + current_offset_particles_for_partition[idxPartition+1] = current_offset_particles_for_partition[idxPartition] + current_my_nb_particles_per_partition[idxPartition]; + } + assert((*nb_particles) == myTotalNbParticles); + + // Find particles outside my interval + const partsize_t nbOutLower = particles_utils::partition_extra<partsize_t, size_particle_positions>(&(*inout_positions_particles)[0], current_my_nb_particles_per_partition[0], + [&](const real_number val[]){ + const int partition_level = in_computer.pbc_field_layer(val[IDX_Z], IDX_Z); + assert(partition_level == current_partition_interval.first + || partition_level == (current_partition_interval.first-1+int(field_grid_dim[IDX_Z]))%int(field_grid_dim[IDX_Z]) + || partition_level == (current_partition_interval.first+1)%int(field_grid_dim[IDX_Z])); + const bool isLower = partition_level == (current_partition_interval.first-1+int(field_grid_dim[IDX_Z]))%int(field_grid_dim[IDX_Z]); + return isLower; + }, + [&](const partsize_t idx1, const partsize_t idx2){ + for(int idx_val = 0 ; idx_val < size_particle_index ; ++idx_val){ + std::swap((*inout_index_particles)[idx1], (*inout_index_particles)[idx2]); + } + + for(int idx_rhs = 0 ; idx_rhs < in_nb_rhs ; ++idx_rhs){ + for(int idx_val = 0 ; idx_val < size_particle_rhs ; ++idx_val){ + std::swap(inout_rhs_particles[idx_rhs][idx1*size_particle_rhs + idx_val], + inout_rhs_particles[idx_rhs][idx2*size_particle_rhs + idx_val]); + } + } + }); + const partsize_t offesetOutLow = (current_partition_size==1? nbOutLower : 0); + + const partsize_t nbOutUpper = current_my_nb_particles_per_partition[current_partition_size-1] - offesetOutLow - particles_utils::partition_extra<partsize_t, size_particle_positions>( + &(*inout_positions_particles)[(current_offset_particles_for_partition[current_partition_size-1]+offesetOutLow)*size_particle_positions], + myTotalNbParticles - (current_offset_particles_for_partition[current_partition_size-1]+offesetOutLow), + [&](const real_number val[]){ + const int partition_level = in_computer.pbc_field_layer(val[IDX_Z], IDX_Z); + assert(partition_level == (current_partition_interval.second-1) + || partition_level == ((current_partition_interval.second-1)-1+int(field_grid_dim[IDX_Z]))%int(field_grid_dim[IDX_Z]) + || partition_level == ((current_partition_interval.second-1)+1)%int(field_grid_dim[IDX_Z])); + const bool isUpper = (partition_level == ((current_partition_interval.second-1)+1)%int(field_grid_dim[IDX_Z])); + return !isUpper; + }, + [&](const partsize_t idx1, const partsize_t idx2){ + for(int idx_val = 0 ; idx_val < size_particle_index ; ++idx_val){ + std::swap((*inout_index_particles)[idx1], (*inout_index_particles)[idx2]); + } + + for(int idx_rhs = 0 ; idx_rhs < in_nb_rhs ; ++idx_rhs){ + for(int idx_val = 0 ; idx_val < size_particle_rhs ; ++idx_val){ + std::swap(inout_rhs_particles[idx_rhs][idx1*size_particle_rhs + idx_val], + inout_rhs_particles[idx_rhs][idx2*size_particle_rhs + idx_val]); + } + } + }, (current_offset_particles_for_partition[current_partition_size-1]+offesetOutLow)); + + // Exchange number + int eventsBeforeWaitall = 0; + partsize_t nbNewFromLow = 0; + partsize_t nbNewFromUp = 0; + std::unique_ptr<real_number[]> newParticlesLow; + std::unique_ptr<real_number[]> newParticlesUp; + std::unique_ptr<partsize_t[]> newParticlesLowIndexes; + std::unique_ptr<partsize_t[]> newParticlesUpIndexes; + std::vector<std::unique_ptr<real_number[]>> newParticlesLowRhs(in_nb_rhs); + std::vector<std::unique_ptr<real_number[]>> newParticlesUpRhs(in_nb_rhs); + + { + assert(whatNext.size() == 0); + assert(mpiRequests.size() == 0); + + whatNext.emplace_back(std::pair<Action,int>{RECV_MOVE_NB_LOW, -1}); + mpiRequests.emplace_back(); + AssertMpi(MPI_Irecv(&nbNewFromLow, 1, particles_utils::GetMpiType(partsize_t()), + (my_rank-1+nb_processes_involved)%nb_processes_involved, TAG_UP_LOW_MOVED_NB_PARTICLES, + MPI_COMM_WORLD, &mpiRequests.back())); + eventsBeforeWaitall += 1; + + whatNext.emplace_back(std::pair<Action,int>{NOTHING_TODO, -1}); + mpiRequests.emplace_back(); + AssertMpi(MPI_Isend(const_cast<partsize_t*>(&nbOutLower), 1, particles_utils::GetMpiType(partsize_t()), + (my_rank-1+nb_processes_involved)%nb_processes_involved, TAG_LOW_UP_MOVED_NB_PARTICLES, + MPI_COMM_WORLD, &mpiRequests.back())); + + if(nbOutLower){ + whatNext.emplace_back(std::pair<Action,int>{NOTHING_TODO, -1}); + mpiRequests.emplace_back(); + assert(nbOutLower*size_particle_positions < std::numeric_limits<int>::max()); + AssertMpi(MPI_Isend(&(*inout_positions_particles)[0], int(nbOutLower*size_particle_positions), particles_utils::GetMpiType(real_number()), (my_rank-1+nb_processes_involved)%nb_processes_involved, TAG_LOW_UP_MOVED_PARTICLES, + MPI_COMM_WORLD, &mpiRequests.back())); + whatNext.emplace_back(std::pair<Action,int>{NOTHING_TODO, -1}); + mpiRequests.emplace_back(); + assert(nbOutLower < std::numeric_limits<int>::max()); + AssertMpi(MPI_Isend(&(*inout_index_particles)[0], int(nbOutLower), particles_utils::GetMpiType(partsize_t()), + (my_rank-1+nb_processes_involved)%nb_processes_involved, TAG_LOW_UP_MOVED_PARTICLES_INDEXES, + MPI_COMM_WORLD, &mpiRequests.back())); + + for(int idx_rhs = 0 ; idx_rhs < in_nb_rhs ; ++idx_rhs){ + whatNext.emplace_back(std::pair<Action,int>{NOTHING_TODO, -1}); + mpiRequests.emplace_back(); + assert(nbOutLower*size_particle_rhs < std::numeric_limits<int>::max()); + AssertMpi(MPI_Isend(&inout_rhs_particles[idx_rhs][0], int(nbOutLower*size_particle_rhs), particles_utils::GetMpiType(real_number()), (my_rank-1+nb_processes_involved)%nb_processes_involved, TAG_LOW_UP_MOVED_PARTICLES_RHS+idx_rhs, + MPI_COMM_WORLD, &mpiRequests.back())); + } + } + + whatNext.emplace_back(std::pair<Action,int>{RECV_MOVE_NB_UP, -1}); + mpiRequests.emplace_back(); + AssertMpi(MPI_Irecv(&nbNewFromUp, 1, particles_utils::GetMpiType(partsize_t()), (my_rank+1)%nb_processes_involved, + TAG_LOW_UP_MOVED_NB_PARTICLES, + MPI_COMM_WORLD, &mpiRequests.back())); + eventsBeforeWaitall += 1; + + whatNext.emplace_back(std::pair<Action,int>{NOTHING_TODO, -1}); + mpiRequests.emplace_back(); + AssertMpi(MPI_Isend(const_cast<partsize_t*>(&nbOutUpper), 1, particles_utils::GetMpiType(partsize_t()), + (my_rank+1)%nb_processes_involved, TAG_UP_LOW_MOVED_NB_PARTICLES, + MPI_COMM_WORLD, &mpiRequests.back())); + + if(nbOutUpper){ + whatNext.emplace_back(std::pair<Action,int>{NOTHING_TODO, -1}); + mpiRequests.emplace_back(); + assert(nbOutUpper*size_particle_positions < std::numeric_limits<int>::max()); + AssertMpi(MPI_Isend(&(*inout_positions_particles)[(myTotalNbParticles-nbOutUpper)*size_particle_positions], + int(nbOutUpper*size_particle_positions), particles_utils::GetMpiType(real_number()), (my_rank+1)%nb_processes_involved, TAG_UP_LOW_MOVED_PARTICLES, + MPI_COMM_WORLD, &mpiRequests.back())); + whatNext.emplace_back(std::pair<Action,int>{NOTHING_TODO, -1}); + mpiRequests.emplace_back(); + assert(nbOutUpper < std::numeric_limits<int>::max()); + AssertMpi(MPI_Isend(&(*inout_index_particles)[(myTotalNbParticles-nbOutUpper)], int(nbOutUpper), + particles_utils::GetMpiType(partsize_t()), (my_rank+1)%nb_processes_involved, TAG_UP_LOW_MOVED_PARTICLES_INDEXES, + MPI_COMM_WORLD, &mpiRequests.back())); + + + for(int idx_rhs = 0 ; idx_rhs < in_nb_rhs ; ++idx_rhs){ + whatNext.emplace_back(std::pair<Action,int>{NOTHING_TODO, -1}); + mpiRequests.emplace_back(); + assert(nbOutUpper*size_particle_rhs < std::numeric_limits<int>::max()); + AssertMpi(MPI_Isend(&inout_rhs_particles[idx_rhs][(myTotalNbParticles-nbOutUpper)*size_particle_rhs], + int(nbOutUpper*size_particle_rhs), particles_utils::GetMpiType(real_number()), (my_rank+1)%nb_processes_involved, TAG_UP_LOW_MOVED_PARTICLES_RHS+idx_rhs, + MPI_COMM_WORLD, &mpiRequests.back())); + } + } + + while(mpiRequests.size() && eventsBeforeWaitall){ + int idxDone = int(mpiRequests.size()); + { + TIMEZONE("waitany_move"); + AssertMpi(MPI_Waitany(int(mpiRequests.size()), mpiRequests.data(), &idxDone, MPI_STATUSES_IGNORE)); + } + const std::pair<Action, int> releasedAction = whatNext[idxDone]; + std::swap(mpiRequests[idxDone], mpiRequests[mpiRequests.size()-1]); + std::swap(whatNext[idxDone], whatNext[mpiRequests.size()-1]); + mpiRequests.pop_back(); + whatNext.pop_back(); + + if(releasedAction.first == RECV_MOVE_NB_LOW){ + if(nbNewFromLow){ + assert(newParticlesLow == nullptr); + newParticlesLow.reset(new real_number[nbNewFromLow*size_particle_positions]); + whatNext.emplace_back(std::pair<Action,int>{RECV_MOVE_LOW, -1}); + mpiRequests.emplace_back(); + assert(nbNewFromLow*size_particle_positions < std::numeric_limits<int>::max()); + AssertMpi(MPI_Irecv(&newParticlesLow[0], int(nbNewFromLow*size_particle_positions), particles_utils::GetMpiType(real_number()), + (my_rank-1+nb_processes_involved)%nb_processes_involved, TAG_UP_LOW_MOVED_PARTICLES, + MPI_COMM_WORLD, &mpiRequests.back())); + + newParticlesLowIndexes.reset(new partsize_t[nbNewFromLow]); + whatNext.emplace_back(std::pair<Action,int>{NOTHING_TODO, -1}); + mpiRequests.emplace_back(); + assert(nbNewFromLow < std::numeric_limits<int>::max()); + AssertMpi(MPI_Irecv(&newParticlesLowIndexes[0], int(nbNewFromLow), particles_utils::GetMpiType(partsize_t()), + (my_rank-1+nb_processes_involved)%nb_processes_involved, TAG_UP_LOW_MOVED_PARTICLES_INDEXES, + MPI_COMM_WORLD, &mpiRequests.back())); + + for(int idx_rhs = 0 ; idx_rhs < in_nb_rhs ; ++idx_rhs){ + newParticlesLowRhs[idx_rhs].reset(new real_number[nbNewFromLow*size_particle_rhs]); + whatNext.emplace_back(std::pair<Action,int>{NOTHING_TODO, -1}); + mpiRequests.emplace_back(); + assert(nbNewFromLow*size_particle_rhs < std::numeric_limits<int>::max()); + AssertMpi(MPI_Irecv(&newParticlesLowRhs[idx_rhs][0], int(nbNewFromLow*size_particle_rhs), particles_utils::GetMpiType(real_number()), (my_rank-1+nb_processes_involved)%nb_processes_involved, TAG_UP_LOW_MOVED_PARTICLES_RHS+idx_rhs, + MPI_COMM_WORLD, &mpiRequests.back())); + } + } + eventsBeforeWaitall -= 1; + } + else if(releasedAction.first == RECV_MOVE_NB_UP){ + if(nbNewFromUp){ + assert(newParticlesUp == nullptr); + newParticlesUp.reset(new real_number[nbNewFromUp*size_particle_positions]); + whatNext.emplace_back(std::pair<Action,int>{RECV_MOVE_UP, -1}); + mpiRequests.emplace_back(); + assert(nbNewFromUp*size_particle_positions < std::numeric_limits<int>::max()); + AssertMpi(MPI_Irecv(&newParticlesUp[0], int(nbNewFromUp*size_particle_positions), particles_utils::GetMpiType(real_number()), (my_rank+1)%nb_processes_involved, TAG_LOW_UP_MOVED_PARTICLES, + MPI_COMM_WORLD, &mpiRequests.back())); + + newParticlesUpIndexes.reset(new partsize_t[nbNewFromUp]); + whatNext.emplace_back(std::pair<Action,int>{NOTHING_TODO, -1}); + mpiRequests.emplace_back(); + assert(nbNewFromUp < std::numeric_limits<int>::max()); + AssertMpi(MPI_Irecv(&newParticlesUpIndexes[0], int(nbNewFromUp), particles_utils::GetMpiType(partsize_t()), + (my_rank+1)%nb_processes_involved, TAG_LOW_UP_MOVED_PARTICLES_INDEXES, + MPI_COMM_WORLD, &mpiRequests.back())); + + for(int idx_rhs = 0 ; idx_rhs < in_nb_rhs ; ++idx_rhs){ + newParticlesUpRhs[idx_rhs].reset(new real_number[nbNewFromUp*size_particle_rhs]); + whatNext.emplace_back(std::pair<Action,int>{NOTHING_TODO, -1}); + mpiRequests.emplace_back(); + assert(nbNewFromUp*size_particle_rhs < std::numeric_limits<int>::max()); + AssertMpi(MPI_Irecv(&newParticlesUpRhs[idx_rhs][0], int(nbNewFromUp*size_particle_rhs), particles_utils::GetMpiType(real_number()), (my_rank+1)%nb_processes_involved, TAG_LOW_UP_MOVED_PARTICLES_RHS+idx_rhs, + MPI_COMM_WORLD, &mpiRequests.back())); + } + } + eventsBeforeWaitall -= 1; + } + } + + if(mpiRequests.size()){ + // TODO Proceed when received + TIMEZONE("waitall-move"); + AssertMpi(MPI_Waitall(int(mpiRequests.size()), mpiRequests.data(), MPI_STATUSES_IGNORE)); + mpiRequests.clear(); + whatNext.clear(); + } + } + + // Realloc an merge + { + TIMEZONE("realloc_copy"); + const partsize_t nbOldParticlesInside = myTotalNbParticles - nbOutLower - nbOutUpper; + const partsize_t myTotalNewNbParticles = nbOldParticlesInside + nbNewFromLow + nbNewFromUp; + + std::unique_ptr<real_number[]> newArray(new real_number[myTotalNewNbParticles*size_particle_positions]); + std::unique_ptr<partsize_t[]> newArrayIndexes(new partsize_t[myTotalNewNbParticles]); + std::vector<std::unique_ptr<real_number[]>> newArrayRhs(in_nb_rhs); + for(int idx_rhs = 0 ; idx_rhs < in_nb_rhs ; ++idx_rhs){ + newArrayRhs[idx_rhs].reset(new real_number[myTotalNewNbParticles*size_particle_rhs]); + } + + // Copy new particles recv form lower first + if(nbNewFromLow){ + const particles_utils::fixed_copy fcp(0, 0, nbNewFromLow); + fcp.copy(newArray, newParticlesLow, size_particle_positions); + fcp.copy(newArrayIndexes, newParticlesLowIndexes); + for(int idx_rhs = 0 ; idx_rhs < in_nb_rhs ; ++idx_rhs){ + fcp.copy(newArrayRhs[idx_rhs], newParticlesLowRhs[idx_rhs], size_particle_rhs); + } + } + + // Copy my own particles + { + const particles_utils::fixed_copy fcp(nbNewFromLow, nbOutLower, nbOldParticlesInside); + fcp.copy(newArray, (*inout_positions_particles), size_particle_positions); + fcp.copy(newArrayIndexes, (*inout_index_particles)); + for(int idx_rhs = 0 ; idx_rhs < in_nb_rhs ; ++idx_rhs){ + fcp.copy(newArrayRhs[idx_rhs], inout_rhs_particles[idx_rhs], size_particle_rhs); + } + } + + // Copy new particles from upper at the back + if(nbNewFromUp){ + const particles_utils::fixed_copy fcp(nbNewFromLow+nbOldParticlesInside, 0, nbNewFromUp); + fcp.copy(newArray, newParticlesUp, size_particle_positions); + fcp.copy(newArrayIndexes, newParticlesUpIndexes); + for(int idx_rhs = 0 ; idx_rhs < in_nb_rhs ; ++idx_rhs){ + fcp.copy(newArrayRhs[idx_rhs], newParticlesUpRhs[idx_rhs], size_particle_rhs); + } + } + + (*inout_positions_particles) = std::move(newArray); + (*inout_index_particles) = std::move(newArrayIndexes); + for(int idx_rhs = 0 ; idx_rhs < in_nb_rhs ; ++idx_rhs){ + inout_rhs_particles[idx_rhs] = std::move(newArrayRhs[idx_rhs]); + } + + myTotalNbParticles = myTotalNewNbParticles; + } + + // Partitions all particles + { + TIMEZONE("repartition"); + particles_utils::partition_extra_z<partsize_t, size_particle_positions>(&(*inout_positions_particles)[0], + myTotalNbParticles,current_partition_size, + current_my_nb_particles_per_partition, current_offset_particles_for_partition.get(), + [&](const real_number& z_pos){ + const int partition_level = in_computer.pbc_field_layer(z_pos, IDX_Z); + assert(current_partition_interval.first <= partition_level && partition_level < current_partition_interval.second); + return partition_level - current_partition_interval.first; + }, + [&](const partsize_t idx1, const partsize_t idx2){ + for(int idx_val = 0 ; idx_val < size_particle_index ; ++idx_val){ + std::swap((*inout_index_particles)[idx1], (*inout_index_particles)[idx2]); + } + + for(int idx_rhs = 0 ; idx_rhs < in_nb_rhs ; ++idx_rhs){ + for(int idx_val = 0 ; idx_val < size_particle_rhs ; ++idx_val){ + std::swap(inout_rhs_particles[idx_rhs][idx1*size_particle_rhs + idx_val], + inout_rhs_particles[idx_rhs][idx2*size_particle_rhs + idx_val]); + } + } + }); + + {// TODO remove + for(int idxPartition = 0 ; idxPartition < current_partition_size ; ++idxPartition){ + assert(current_my_nb_particles_per_partition[idxPartition] == + current_offset_particles_for_partition[idxPartition+1] - current_offset_particles_for_partition[idxPartition]); + for(partsize_t idx = current_offset_particles_for_partition[idxPartition] ; idx < current_offset_particles_for_partition[idxPartition+1] ; ++idx){ + assert(in_computer.pbc_field_layer((*inout_positions_particles)[idx*3+IDX_Z], IDX_Z)-current_partition_interval.first == idxPartition); + } + } + } + } + (*nb_particles) = myTotalNbParticles; + + assert(mpiRequests.size() == 0); + } +}; + +#endif diff --git a/bfps/cpp/particles/particles_field_computer.hpp b/bfps/cpp/particles/particles_field_computer.hpp new file mode 100644 index 0000000000000000000000000000000000000000..f68f2fc02b4ee40aa9583385c0bd18195b92b6dc --- /dev/null +++ b/bfps/cpp/particles/particles_field_computer.hpp @@ -0,0 +1,188 @@ +#ifndef PARTICLES_FIELD_COMPUTER_HPP +#define PARTICLES_FIELD_COMPUTER_HPP + +#include <array> +#include <utility> + +#include "scope_timer.hpp" +#include "particles_utils.hpp" + +template <class partsize_t, + class real_number, + class interpolator_class, + int interp_neighbours> +class particles_field_computer { + + const std::array<int,3> field_grid_dim; + const std::pair<int,int> current_partition_interval; + + const interpolator_class& interpolator; + + const std::array<real_number,3> spatial_box_width; + const std::array<real_number,3> spatial_box_offset; + const std::array<real_number,3> box_step_width; + + int deriv[3]; + +public: + + particles_field_computer(const std::array<size_t,3>& in_field_grid_dim, + const std::pair<int,int>& in_current_partitions, + const interpolator_class& in_interpolator, + const std::array<real_number,3>& in_spatial_box_width, const std::array<real_number,3>& in_spatial_box_offset, + const std::array<real_number,3>& in_box_step_width) + : field_grid_dim({{int(in_field_grid_dim[0]),int(in_field_grid_dim[1]),int(in_field_grid_dim[2])}}), current_partition_interval(in_current_partitions), + interpolator(in_interpolator), + spatial_box_width(in_spatial_box_width), spatial_box_offset(in_spatial_box_offset), box_step_width(in_box_step_width){ + deriv[IDX_X] = 0; + deriv[IDX_Y] = 0; + deriv[IDX_Z] = 0; + } + + //////////////////////////////////////////////////////////////////////// + /// Computation related + //////////////////////////////////////////////////////////////////////// + + template <int size_particle_rhs> + void init_result_array(real_number particles_current_rhs[], + const partsize_t nb_particles) const { + // Set values to zero initialy + std::fill(particles_current_rhs, + particles_current_rhs+nb_particles*size_particle_rhs, + 0); + } + + real_number get_norm_pos_in_cell(const real_number in_pos, const int idx_pos) const { + const real_number shifted_pos = in_pos - spatial_box_offset[idx_pos]; + const real_number nb_box_repeat = floor(shifted_pos/spatial_box_width[idx_pos]); + const real_number pos_in_box = shifted_pos - nb_box_repeat*spatial_box_width[idx_pos]; + const real_number cell_idx = floor(pos_in_box/box_step_width[idx_pos]); + const real_number pos_in_cell = (pos_in_box - cell_idx*box_step_width[idx_pos]) / box_step_width[idx_pos]; + assert(0 <= pos_in_cell && pos_in_cell < 1); + return pos_in_cell; + } + + template <class field_class, int size_particle_rhs> + void apply_computation(const field_class& field, + const real_number particles_positions[], + real_number particles_current_rhs[], + const partsize_t nb_particles) const { + TIMEZONE("particles_field_computer::apply_computation"); + //DEBUG_MSG("just entered particles_field_computer::apply_computation\n"); + for(partsize_t idxPart = 0 ; idxPart < nb_particles ; ++idxPart){ + const real_number reltv_x = get_norm_pos_in_cell(particles_positions[idxPart*3+IDX_X], IDX_X); + const real_number reltv_y = get_norm_pos_in_cell(particles_positions[idxPart*3+IDX_Y], IDX_Y); + const real_number reltv_z = get_norm_pos_in_cell(particles_positions[idxPart*3+IDX_Z], IDX_Z); + + typename interpolator_class::real_number + bx[interp_neighbours*2+2], + by[interp_neighbours*2+2], + bz[interp_neighbours*2+2]; + interpolator.compute_beta(deriv[IDX_X], reltv_x, bx); + interpolator.compute_beta(deriv[IDX_Y], reltv_y, by); + interpolator.compute_beta(deriv[IDX_Z], reltv_z, bz); + + const int partGridIdx_x = pbc_field_layer(particles_positions[idxPart*3+IDX_X], IDX_X); + const int partGridIdx_y = pbc_field_layer(particles_positions[idxPart*3+IDX_Y], IDX_Y); + const int partGridIdx_z = pbc_field_layer(particles_positions[idxPart*3+IDX_Z], IDX_Z); + + assert(0 <= partGridIdx_x && partGridIdx_x < int(field_grid_dim[IDX_X])); + assert(0 <= partGridIdx_y && partGridIdx_y < int(field_grid_dim[IDX_Y])); + assert(0 <= partGridIdx_z && partGridIdx_z < int(field_grid_dim[IDX_Z])); + + const int interp_limit_mx = partGridIdx_x-interp_neighbours; + const int interp_limit_x = partGridIdx_x+interp_neighbours+1; + const int interp_limit_my = partGridIdx_y-interp_neighbours; + const int interp_limit_y = partGridIdx_y+interp_neighbours+1; + const int interp_limit_mz_bz = partGridIdx_z-interp_neighbours; + + int interp_limit_mz[2]; + int interp_limit_z[2]; + int nb_z_intervals; + + if((partGridIdx_z-interp_neighbours) < 0){ + assert(partGridIdx_z+interp_neighbours+1 < int(field_grid_dim[IDX_Z])); + interp_limit_mz[0] = std::max(current_partition_interval.first, partGridIdx_z-interp_neighbours+int(field_grid_dim[IDX_Z])); + interp_limit_z[0] = current_partition_interval.second-1; + + interp_limit_mz[1] = std::max(0, current_partition_interval.first); + interp_limit_z[1] = std::min(partGridIdx_z+interp_neighbours+1, current_partition_interval.second-1); + + nb_z_intervals = 2; + } + else if(int(field_grid_dim[IDX_Z]) <= (partGridIdx_z+interp_neighbours+1)){ + interp_limit_mz[0] = std::max(current_partition_interval.first, partGridIdx_z-interp_neighbours); + interp_limit_z[0] = std::min(int(field_grid_dim[IDX_Z])-1,current_partition_interval.second-1); + + interp_limit_mz[1] = std::max(0, current_partition_interval.first); + interp_limit_z[1] = std::min(partGridIdx_z+interp_neighbours+1-int(field_grid_dim[IDX_Z]), current_partition_interval.second-1); + + nb_z_intervals = 2; + } + else{ + interp_limit_mz[0] = std::max(partGridIdx_z-interp_neighbours, current_partition_interval.first); + interp_limit_z[0] = std::min(partGridIdx_z+interp_neighbours+1, current_partition_interval.second-1); + nb_z_intervals = 1; + } + + for(int idx_inter = 0 ; idx_inter < nb_z_intervals ; ++idx_inter){ + for(int idx_z = interp_limit_mz[idx_inter] ; idx_z <= interp_limit_z[idx_inter] ; ++idx_z ){ + const int idx_z_pbc = (idx_z + field_grid_dim[IDX_Z])%field_grid_dim[IDX_Z]; + assert(current_partition_interval.first <= idx_z_pbc && idx_z_pbc < current_partition_interval.second); + assert(((idx_z+field_grid_dim[IDX_Z]-interp_limit_mz_bz)%field_grid_dim[IDX_Z]) < interp_neighbours*2+2); + + for(int idx_x = interp_limit_mx ; idx_x <= interp_limit_x ; ++idx_x ){ + const int idx_x_pbc = (idx_x + field_grid_dim[IDX_X])%field_grid_dim[IDX_X]; + assert(idx_x-interp_limit_mx < interp_neighbours*2+2); + + for(int idx_y = interp_limit_my ; idx_y <= interp_limit_y ; ++idx_y ){ + const int idx_y_pbc = (idx_y + field_grid_dim[IDX_Y])%field_grid_dim[IDX_Y]; + assert(idx_y-interp_limit_my < interp_neighbours*2+2); + + const real_number coef = (bz[((idx_z+field_grid_dim[IDX_Z]-interp_limit_mz_bz)%field_grid_dim[IDX_Z])] + * by[idx_y-interp_limit_my] + * bx[idx_x-interp_limit_mx]); + + const ptrdiff_t tindex = field.get_rindex_from_global(idx_x_pbc, idx_y_pbc, idx_z_pbc); + + // getValue does not necessary return real_number + for(int idx_rhs_val = 0 ; idx_rhs_val < size_particle_rhs ; ++idx_rhs_val){ + particles_current_rhs[idxPart*size_particle_rhs+idx_rhs_val] += real_number(field.rval(tindex,idx_rhs_val))*coef; + } + } + } + } + } + } + } + + template <int size_particle_rhs> + void reduce_particles_rhs(real_number particles_current_rhs[], + const real_number extra_particles_current_rhs[], + const partsize_t nb_particles) const { + TIMEZONE("particles_field_computer::reduce_particles"); + // Simply sum values + for(partsize_t idxPart = 0 ; idxPart < nb_particles ; ++idxPart){ + for(int idx_rhs_val = 0 ; idx_rhs_val < size_particle_rhs ; ++idx_rhs_val){ + particles_current_rhs[idxPart*size_particle_rhs+idx_rhs_val] += extra_particles_current_rhs[idxPart*size_particle_rhs+idx_rhs_val]; + } + } + } + + //////////////////////////////////////////////////////////////////////// + /// Re-distribution related + //////////////////////////////////////////////////////////////////////// + + int pbc_field_layer(const real_number& a_z_pos, const int idx_dim) const { + const real_number shifted_pos = a_z_pos - spatial_box_offset[idx_dim]; + const int nb_level_to_pos = int(floor(shifted_pos/box_step_width[idx_dim])); + const int int_field_grid_dim = int(field_grid_dim[idx_dim]); + const int pbc_level = ((nb_level_to_pos%int_field_grid_dim)+int_field_grid_dim)%int_field_grid_dim; + assert(0 <= pbc_level && pbc_level < int(field_grid_dim[idx_dim])); + return pbc_level; + } + +}; + + +#endif diff --git a/bfps/cpp/particles/particles_generic_interp.hpp b/bfps/cpp/particles/particles_generic_interp.hpp new file mode 100644 index 0000000000000000000000000000000000000000..98d0363d4fcfae8c05b6ceabef620e17c1263eee --- /dev/null +++ b/bfps/cpp/particles/particles_generic_interp.hpp @@ -0,0 +1,316 @@ +#ifndef PARTICLES_GENERIC_INTERP_HPP +#define PARTICLES_GENERIC_INTERP_HPP + +template <class real_number, int interp_neighbours, int mode> +class particles_generic_interp; + +#include "Lagrange_polys.hpp" +#include "spline.hpp" + +template <> +class particles_generic_interp<double, 1,0>{ +public: + using real_number = double; + + void compute_beta(const int in_derivative, const double in_part_val, double poly_val[]) const { + beta_Lagrange_n1(in_derivative, in_part_val, poly_val); + } +}; + +template <> +class particles_generic_interp<double, 1,1>{ +public: + using real_number = double; + + void compute_beta(const int in_derivative, const double in_part_val, double poly_val[]) const { + beta_n1_m1(in_derivative, in_part_val, poly_val); + } +}; + +template <> +class particles_generic_interp<double, 1,2>{ +public: + using real_number = double; + + void compute_beta(const int in_derivative, const double in_part_val, double poly_val[]) const { + beta_n1_m2(in_derivative, in_part_val, poly_val); + } +}; + +template <> +class particles_generic_interp<double, 2,0>{ +public: + using real_number = double; + + void compute_beta(const int in_derivative, const double in_part_val, double poly_val[]) const { + beta_Lagrange_n2(in_derivative, in_part_val, poly_val); + } +}; + +template <> +class particles_generic_interp<double, 2,1>{ +public: + using real_number = double; + + void compute_beta(const int in_derivative, const double in_part_val, double poly_val[]) const { + beta_n2_m1(in_derivative, in_part_val, poly_val); + } +}; + +template <> +class particles_generic_interp<double, 2,2>{ +public: + using real_number = double; + + void compute_beta(const int in_derivative, const double in_part_val, double poly_val[]) const { + beta_n2_m2(in_derivative, in_part_val, poly_val); + } +}; + +template <> +class particles_generic_interp<double, 3,0>{ +public: + using real_number = double; + + void compute_beta(const int in_derivative, const double in_part_val, double poly_val[]) const { + beta_Lagrange_n3(in_derivative, in_part_val, poly_val); + } +}; + +template <> +class particles_generic_interp<double, 3,1>{ +public: + using real_number = double; + + void compute_beta(const int in_derivative, const double in_part_val, double poly_val[]) const { + beta_n3_m1(in_derivative, in_part_val, poly_val); + } +}; + +template <> +class particles_generic_interp<double, 3,2>{ +public: + using real_number = double; + + void compute_beta(const int in_derivative, const double in_part_val, double poly_val[]) const { + beta_n3_m2(in_derivative, in_part_val, poly_val); + } +}; + +template <> +class particles_generic_interp<double, 4,0>{ +public: + using real_number = double; + + void compute_beta(const int in_derivative, const double in_part_val, double poly_val[]) const { + beta_Lagrange_n4(in_derivative, in_part_val, poly_val); + } +}; + +template <> +class particles_generic_interp<double, 4,1>{ +public: + using real_number = double; + + void compute_beta(const int in_derivative, const double in_part_val, double poly_val[]) const { + beta_n4_m1(in_derivative, in_part_val, poly_val); + } +}; + +template <> +class particles_generic_interp<double, 4,2>{ +public: + using real_number = double; + + void compute_beta(const int in_derivative, const double in_part_val, double poly_val[]) const { + beta_n4_m2(in_derivative, in_part_val, poly_val); + } +}; + +template <> +class particles_generic_interp<double, 5,0>{ +public: + using real_number = double; + + void compute_beta(const int in_derivative, const double in_part_val, double poly_val[]) const { + beta_Lagrange_n5(in_derivative, in_part_val, poly_val); + } +}; + +template <> +class particles_generic_interp<double, 5,1>{ +public: + using real_number = double; + + void compute_beta(const int in_derivative, const double in_part_val, double poly_val[]) const { + beta_n5_m1(in_derivative, in_part_val, poly_val); + } +}; + +template <> +class particles_generic_interp<double, 5,2>{ +public: + using real_number = double; + + void compute_beta(const int in_derivative, const double in_part_val, double poly_val[]) const { + beta_n5_m2(in_derivative, in_part_val, poly_val); + } +}; + + +template <> +class particles_generic_interp<double, 6,0>{ +public: + using real_number = double; + + void compute_beta(const int in_derivative, const double in_part_val, double poly_val[]) const { + beta_Lagrange_n6(in_derivative, in_part_val, poly_val); + } +}; + +template <> +class particles_generic_interp<double, 6,1>{ +public: + using real_number = double; + + void compute_beta(const int in_derivative, const double in_part_val, double poly_val[]) const { + beta_n6_m1(in_derivative, in_part_val, poly_val); + } +}; + +template <> +class particles_generic_interp<double, 6,2>{ +public: + using real_number = double; + + void compute_beta(const int in_derivative, const double in_part_val, double poly_val[]) const { + beta_n6_m2(in_derivative, in_part_val, poly_val); + } +}; + + +template <> +class particles_generic_interp<double, 7,0>{ +public: + using real_number = double; + + void compute_beta(const int in_derivative, const double in_part_val, double poly_val[]) const { + beta_Lagrange_n7(in_derivative, in_part_val, poly_val); + } +}; + +template <> +class particles_generic_interp<double, 7,1>{ +public: + using real_number = double; + + void compute_beta(const int in_derivative, const double in_part_val, double poly_val[]) const { + beta_n7_m1(in_derivative, in_part_val, poly_val); + } +}; + +template <> +class particles_generic_interp<double, 7,2>{ +public: + using real_number = double; + + void compute_beta(const int in_derivative, const double in_part_val, double poly_val[]) const { + beta_n7_m2(in_derivative, in_part_val, poly_val); + } +}; + + +template <> +class particles_generic_interp<double, 8,0>{ +public: + using real_number = double; + + void compute_beta(const int in_derivative, const double in_part_val, double poly_val[]) const { + beta_Lagrange_n8(in_derivative, in_part_val, poly_val); + } +}; + +template <> +class particles_generic_interp<double, 8,1>{ +public: + using real_number = double; + + void compute_beta(const int in_derivative, const double in_part_val, double poly_val[]) const { + beta_n8_m1(in_derivative, in_part_val, poly_val); + } +}; + +template <> +class particles_generic_interp<double, 8,2>{ +public: + using real_number = double; + + void compute_beta(const int in_derivative, const double in_part_val, double poly_val[]) const { + beta_n8_m2(in_derivative, in_part_val, poly_val); + } +}; + + +template <> +class particles_generic_interp<double, 9, 0>{ +public: + using real_number = double; + + void compute_beta(const int in_derivative, const double in_part_val, double poly_val[]) const { + beta_Lagrange_n9(in_derivative, in_part_val, poly_val); + } +}; + +template <> +class particles_generic_interp<double, 9,1>{ +public: + using real_number = double; + + void compute_beta(const int in_derivative, const double in_part_val, double poly_val[]) const { + beta_n9_m1(in_derivative, in_part_val, poly_val); + } +}; + +template <> +class particles_generic_interp<double, 9,2>{ +public: + using real_number = double; + + void compute_beta(const int in_derivative, const double in_part_val, double poly_val[]) const { + beta_n9_m2(in_derivative, in_part_val, poly_val); + } +}; + + +template <> +class particles_generic_interp<double, 10,0>{ +public: + using real_number = double; + + void compute_beta(const int in_derivative, const double in_part_val, double poly_val[]) const { + beta_Lagrange_n10(in_derivative, in_part_val, poly_val); + } +}; + +template <> +class particles_generic_interp<double, 10,1>{ +public: + using real_number = double; + + void compute_beta(const int in_derivative, const double in_part_val, double poly_val[]) const { + beta_n10_m1(in_derivative, in_part_val, poly_val); + } +}; + +template <> +class particles_generic_interp<double, 10,2>{ +public: + using real_number = double; + + void compute_beta(const int in_derivative, const double in_part_val, double poly_val[]) const { + beta_n10_m2(in_derivative, in_part_val, poly_val); + } +}; + +#endif//PARTICLES_INTERP_SPLINE_HPP + diff --git a/bfps/cpp/particles/particles_input_hdf5.hpp b/bfps/cpp/particles/particles_input_hdf5.hpp new file mode 100644 index 0000000000000000000000000000000000000000..32cfec05ad854cd7f3ffd88d771418d0552237d8 --- /dev/null +++ b/bfps/cpp/particles/particles_input_hdf5.hpp @@ -0,0 +1,312 @@ +#ifndef PARTICLES_INPUT_HDF5_HPP +#define PARTICLES_INPUT_HDF5_HPP + +#include <tuple> +#include <mpi.h> +#include <hdf5.h> +#include <cassert> +#include <vector> + +#include "abstract_particles_input.hpp" +#include "base.hpp" +#include "alltoall_exchanger.hpp" +#include "particles_utils.hpp" +#include "scope_timer.hpp" + + +// why is "size_particle_rhs" a template parameter? +// I think it's safe to assume this will always be 3. +template <class partsize_t, class real_number, int size_particle_positions, int size_particle_rhs> +class particles_input_hdf5 : public abstract_particles_input<partsize_t, real_number> { + const std::string filename; + + MPI_Comm mpi_comm; + int my_rank; + int nb_processes; + + hsize_t nb_total_particles; + hsize_t nb_rhs; + partsize_t nb_particles_for_me; + + std::unique_ptr<real_number[]> my_particles_positions; + std::unique_ptr<partsize_t[]> my_particles_indexes; + std::vector<std::unique_ptr<real_number[]>> my_particles_rhs; + + static std::vector<real_number> BuildLimitsAllProcesses(MPI_Comm mpi_comm, + const real_number my_spatial_low_limit, const real_number my_spatial_up_limit){ + int my_rank; + int nb_processes; + + AssertMpi(MPI_Comm_rank(mpi_comm, &my_rank)); + AssertMpi(MPI_Comm_size(mpi_comm, &nb_processes)); + + std::vector<real_number> spatial_limit_per_proc(nb_processes*2); + + real_number intervalToSend[2] = {my_spatial_low_limit, my_spatial_up_limit}; + AssertMpi(MPI_Allgather(intervalToSend, 2, particles_utils::GetMpiType(real_number()), + spatial_limit_per_proc.data(), 2, particles_utils::GetMpiType(real_number()), mpi_comm)); + + for(int idx_proc = 0; idx_proc < nb_processes-1 ; ++idx_proc){ + assert(spatial_limit_per_proc[idx_proc*2] <= spatial_limit_per_proc[idx_proc*2+1]); + assert(spatial_limit_per_proc[idx_proc*2+1] == spatial_limit_per_proc[(idx_proc+1)*2]); + spatial_limit_per_proc[idx_proc+1] = spatial_limit_per_proc[idx_proc*2+1]; + } + spatial_limit_per_proc[nb_processes] = spatial_limit_per_proc[(nb_processes-1)*2+1]; + spatial_limit_per_proc.resize(nb_processes+1); + + return spatial_limit_per_proc; + } + +public: + particles_input_hdf5(const MPI_Comm in_mpi_comm,const std::string& inFilename, + const std::string& inDatanameState, const std::string& inDatanameRhs, + const real_number my_spatial_low_limit, const real_number my_spatial_up_limit) + : particles_input_hdf5(in_mpi_comm, inFilename, inDatanameState, inDatanameRhs, + BuildLimitsAllProcesses(in_mpi_comm, my_spatial_low_limit, my_spatial_up_limit)){ + } + + particles_input_hdf5(const MPI_Comm in_mpi_comm,const std::string& inFilename, + const std::string& inDatanameState, const std::string& inDatanameRhs, + const std::vector<real_number>& in_spatial_limit_per_proc) + : filename(inFilename), + mpi_comm(in_mpi_comm), my_rank(-1), nb_processes(-1), nb_total_particles(0), + nb_particles_for_me(0){ + TIMEZONE("particles_input_hdf5"); + + AssertMpi(MPI_Comm_rank(mpi_comm, &my_rank)); + AssertMpi(MPI_Comm_size(mpi_comm, &nb_processes)); + assert(int(in_spatial_limit_per_proc.size()) == nb_processes+1); + + hid_t plist_id_par = H5Pcreate(H5P_FILE_ACCESS); + assert(plist_id_par >= 0); + { + int retTest = H5Pset_fapl_mpio(plist_id_par, mpi_comm, MPI_INFO_NULL); + assert(retTest >= 0); + } + + hid_t particle_file = H5Fopen(filename.c_str(), H5F_ACC_RDONLY, plist_id_par); + assert(particle_file >= 0); + + { + TIMEZONE("state"); + hid_t dset = H5Dopen(particle_file, inDatanameState.c_str(), H5P_DEFAULT); + assert(dset >= 0); + + hid_t dspace = H5Dget_space(dset); // copy? + assert(dspace >= 0); + + hid_t space_dim = H5Sget_simple_extent_ndims(dspace); + assert(space_dim >= 2); + + std::vector<hsize_t> state_dim_array(space_dim); + int hdfret = H5Sget_simple_extent_dims(dspace, &state_dim_array[0], NULL); + assert(hdfret >= 0); + // Last value is the position dim of the particles + assert(state_dim_array.back() == size_particle_positions); + + nb_total_particles = 1; + for (size_t idx_dim = 0; idx_dim < state_dim_array.size()-1; ++idx_dim){ + nb_total_particles *= state_dim_array[idx_dim]; + } + + hdfret = H5Sclose(dspace); + assert(hdfret >= 0); + hdfret = H5Dclose(dset); + assert(hdfret >= 0); + } + { + TIMEZONE("rhs"); + hid_t dset = H5Dopen(particle_file, inDatanameRhs.c_str(), H5P_DEFAULT); + assert(dset >= 0); + hid_t dspace = H5Dget_space(dset); // copy? + assert(dspace >= 0); + + hid_t rhs_dim = H5Sget_simple_extent_ndims(dspace); + // Chichi comment: this assertion will fail in general, there's no reason for it. + //assert(rhs_dim == 4); + std::vector<hsize_t> rhs_dim_array(rhs_dim); + + // Chichi comment: wouldn't &rhs_dim_array.front() be safer? + int hdfret = H5Sget_simple_extent_dims(dspace, &rhs_dim_array[0], NULL); + assert(hdfret >= 0); + assert(rhs_dim_array.back() == size_particle_rhs); + // Chichi comment: this assertion will fail in general + //assert(rhs_dim_array.front() == 1); + nb_rhs = rhs_dim_array[0]; + + hdfret = H5Sclose(dspace); + assert(hdfret >= 0); + hdfret = H5Dclose(dset); + assert(hdfret >= 0); + } + + particles_utils::IntervalSplitter<hsize_t> load_splitter(nb_total_particles, nb_processes, my_rank); + + static_assert(std::is_same<real_number, double>::value + || std::is_same<real_number, float>::value, "real_number must be double or float"); + const hid_t type_id = (sizeof(real_number) == 8?H5T_NATIVE_DOUBLE:H5T_NATIVE_FLOAT); + + /// Load the data + std::unique_ptr<real_number[]> split_particles_positions(new real_number[load_splitter.getMySize()*size_particle_positions]); + { + TIMEZONE("state-read"); + hid_t dset = H5Dopen(particle_file, inDatanameState.c_str(), H5P_DEFAULT); + assert(dset >= 0); + + hid_t rspace = H5Dget_space(dset); + assert(rspace >= 0); + + hsize_t offset[2] = {load_splitter.getMyOffset(), 0}; + hsize_t mem_dims[2] = {load_splitter.getMySize(), 3}; + + hid_t mspace = H5Screate_simple(2, &mem_dims[0], NULL); + assert(mspace >= 0); + + int rethdf = H5Sselect_hyperslab(rspace, H5S_SELECT_SET, offset, + NULL, mem_dims, NULL); + assert(rethdf >= 0); + rethdf = H5Dread(dset, type_id, mspace, rspace, H5P_DEFAULT, split_particles_positions.get()); + assert(rethdf >= 0); + + rethdf = H5Sclose(rspace); + assert(rethdf >= 0); + rethdf = H5Dclose(dset); + assert(rethdf >= 0); + } + std::vector<std::unique_ptr<real_number[]>> split_particles_rhs(nb_rhs); + { + TIMEZONE("rhs-read"); + hid_t dset = H5Dopen(particle_file, inDatanameRhs.c_str(), H5P_DEFAULT); + assert(dset >= 0); + + for(hsize_t idx_rhs = 0 ; idx_rhs < nb_rhs ; ++idx_rhs){ + hid_t rspace = H5Dget_space(dset); + assert(rspace >= 0); + + split_particles_rhs[idx_rhs].reset(new real_number[load_splitter.getMySize()*size_particle_rhs]); + + hsize_t offset[3] = {idx_rhs, load_splitter.getMyOffset(), 0}; + hsize_t mem_dims[3] = {1, load_splitter.getMySize(), size_particle_rhs}; + + hid_t mspace = H5Screate_simple( 3, &mem_dims[0], NULL); + assert(mspace >= 0); + + int rethdf = H5Sselect_hyperslab( rspace, H5S_SELECT_SET, offset, + NULL, mem_dims, NULL); + assert(rethdf >= 0); + rethdf = H5Dread(dset, type_id, mspace, rspace, H5P_DEFAULT, split_particles_rhs[idx_rhs].get()); + assert(rethdf >= 0); + + rethdf = H5Sclose(mspace); + assert(rethdf >= 0); + + rethdf = H5Sclose(rspace); + assert(rethdf >= 0); + } + int rethdf = H5Dclose(dset); + assert(rethdf >= 0); + } + + std::unique_ptr<partsize_t[]> split_particles_indexes(new partsize_t[load_splitter.getMySize()]); + for(partsize_t idx_part = 0 ; idx_part < partsize_t(load_splitter.getMySize()) ; ++idx_part){ + split_particles_indexes[idx_part] = idx_part + partsize_t(load_splitter.getMyOffset()); + } + + // Permute + std::vector<partsize_t> nb_particles_per_proc(nb_processes); + { + TIMEZONE("partition"); + + const real_number spatial_box_offset = in_spatial_limit_per_proc[0]; + const real_number spatial_box_width = in_spatial_limit_per_proc[nb_processes] - in_spatial_limit_per_proc[0]; + + partsize_t previousOffset = 0; + for(int idx_proc = 0 ; idx_proc < nb_processes-1 ; ++idx_proc){ + const real_number limitPartitionShifted = in_spatial_limit_per_proc[idx_proc+1]-spatial_box_offset; + const partsize_t localOffset = particles_utils::partition_extra<partsize_t, size_particle_positions>( + &split_particles_positions[previousOffset*size_particle_positions], + partsize_t(load_splitter.getMySize())-previousOffset, + [&](const real_number val[]){ + const real_number shiftPos = val[IDX_Z]-spatial_box_offset; + const real_number nbRepeat = floor(shiftPos/spatial_box_width); + const real_number posInBox = shiftPos - (spatial_box_width*nbRepeat); + return posInBox < limitPartitionShifted; + }, + [&](const partsize_t idx1, const partsize_t idx2){ + std::swap(split_particles_indexes[idx1], split_particles_indexes[idx2]); + for(int idx_rhs = 0 ; idx_rhs < int(nb_rhs) ; ++idx_rhs){ + for(int idx_val = 0 ; idx_val < size_particle_rhs ; ++idx_val){ + std::swap(split_particles_rhs[idx_rhs][idx1*size_particle_rhs + idx_val], + split_particles_rhs[idx_rhs][idx2*size_particle_rhs + idx_val]); + } + } + }, previousOffset); + + nb_particles_per_proc[idx_proc] = localOffset; + previousOffset += localOffset; + } + nb_particles_per_proc[nb_processes-1] = partsize_t(load_splitter.getMySize()) - previousOffset; + } + + { + TIMEZONE("exchanger"); + alltoall_exchanger exchanger(mpi_comm, std::move(nb_particles_per_proc)); + // nb_particles_per_processes cannot be used after due to move + nb_particles_for_me = exchanger.getTotalToRecv(); + + my_particles_positions.reset(new real_number[exchanger.getTotalToRecv()*size_particle_positions]); + exchanger.alltoallv<real_number>(split_particles_positions.get(), my_particles_positions.get(), size_particle_positions); + split_particles_positions.release(); + + my_particles_indexes.reset(new partsize_t[exchanger.getTotalToRecv()]); + exchanger.alltoallv<partsize_t>(split_particles_indexes.get(), my_particles_indexes.get()); + split_particles_indexes.release(); + + my_particles_rhs.resize(nb_rhs); + for(int idx_rhs = 0 ; idx_rhs < int(nb_rhs) ; ++idx_rhs){ + my_particles_rhs[idx_rhs].reset(new real_number[exchanger.getTotalToRecv()*size_particle_rhs]); + exchanger.alltoallv<real_number>(split_particles_rhs[idx_rhs].get(), my_particles_rhs[idx_rhs].get(), size_particle_rhs); + } + } + + { + TIMEZONE("close"); + int hdfret = H5Fclose(particle_file); + assert(hdfret >= 0); + hdfret = H5Pclose(plist_id_par); + assert(hdfret >= 0); + } + } + + ~particles_input_hdf5(){ + } + + partsize_t getTotalNbParticles() final{ + return partsize_t(nb_total_particles); + } + + partsize_t getLocalNbParticles() final{ + return nb_particles_for_me; + } + + int getNbRhs() final{ + return int(nb_rhs); + } + + std::unique_ptr<real_number[]> getMyParticles() final { + assert(my_particles_positions != nullptr); + return std::move(my_particles_positions); + } + + std::vector<std::unique_ptr<real_number[]>> getMyRhs() final { + assert(my_particles_rhs.size() == nb_rhs); + return std::move(my_particles_rhs); + } + + std::unique_ptr<partsize_t[]> getMyParticlesIndexes() final { + assert(my_particles_indexes != nullptr); + return std::move(my_particles_indexes); + } +}; + +#endif diff --git a/bfps/cpp/particles/particles_output_hdf5.hpp b/bfps/cpp/particles/particles_output_hdf5.hpp new file mode 100644 index 0000000000000000000000000000000000000000..bc0a03690293668203dd78978680fdea03ab3a28 --- /dev/null +++ b/bfps/cpp/particles/particles_output_hdf5.hpp @@ -0,0 +1,316 @@ +#ifndef PARTICLES_OUTPUT_HDF5_HPP +#define PARTICLES_OUTPUT_HDF5_HPP + +#include <memory> +#include <vector> +#include <hdf5.h> + +#include "abstract_particles_output.hpp" +#include "scope_timer.hpp" + +template <class partsize_t, + class real_number, + int size_particle_positions, + int size_particle_rhs> +class particles_output_hdf5 : public abstract_particles_output<partsize_t, + real_number, + size_particle_positions, + size_particle_rhs>{ + using Parent = abstract_particles_output<partsize_t, + real_number, + size_particle_positions, + size_particle_rhs>; + + const std::string particle_species_name; + + hid_t file_id; + const partsize_t total_nb_particles; + + hid_t dset_id_state; + hid_t dset_id_rhs; + + bool use_collective_io; + +public: + particles_output_hdf5(MPI_Comm in_mpi_com, + const std::string ps_name, + const partsize_t inTotalNbParticles, + const int in_nb_rhs, + const bool in_use_collective_io = false) + : abstract_particles_output<partsize_t, + real_number, + size_particle_positions, + size_particle_rhs>( + in_mpi_com, + inTotalNbParticles, + in_nb_rhs), + particle_species_name(ps_name), + file_id(0), + total_nb_particles(inTotalNbParticles), + dset_id_state(0), + dset_id_rhs(0), + use_collective_io(in_use_collective_io){} + + int open_file(std::string filename){ + if(Parent::isInvolved()){ + TIMEZONE("particles_output_hdf5::open_file"); + + this->require_checkpoint_groups(filename); + + hid_t plist_id_par = H5Pcreate(H5P_FILE_ACCESS); + assert(plist_id_par >= 0); + int retTest = H5Pset_fapl_mpio( + plist_id_par, + Parent::getComWriter(), + MPI_INFO_NULL); + assert(retTest >= 0); + + // Parallel HDF5 write + file_id = H5Fopen( + filename.c_str(), + H5F_ACC_RDWR | H5F_ACC_DEBUG, + plist_id_par); + // file_id = H5Fcreate(filename.c_str(), H5F_ACC_TRUNC | H5F_ACC_DEBUG/*H5F_ACC_EXCL*/, H5P_DEFAULT/*H5F_ACC_RDWR*/, plist_id_par); + assert(file_id >= 0); + H5Pclose(plist_id_par); + + dset_id_state = H5Gopen( + file_id, + (this->particle_species_name + std::string("/state")).c_str(), + H5P_DEFAULT); + assert(dset_id_state >= 0); + dset_id_rhs = H5Gopen( + file_id, + (this->particle_species_name + std::string("/rhs")).c_str(), + H5P_DEFAULT); + assert(dset_id_rhs >= 0); + } + return EXIT_SUCCESS; + } + + ~particles_output_hdf5(){} + + int close_file(void){ + if(Parent::isInvolved()){ + TIMEZONE("particles_output_hdf5::close_file"); + + int rethdf = H5Gclose(dset_id_state); + assert(rethdf >= 0); + + rethdf = H5Gclose(dset_id_rhs); + assert(rethdf >= 0); + + rethdf = H5Fclose(file_id); + assert(rethdf >= 0); + } + return EXIT_SUCCESS; + } + + void require_checkpoint_groups(std::string filename){ + if(Parent::isInvolved()){ + if (Parent::getMyRank() == 0) + { + hid_t file_id = H5Fopen( + filename.c_str(), + H5F_ACC_RDWR | H5F_ACC_DEBUG, + H5P_DEFAULT); + assert(file_id >= 0); + bool group_exists = H5Lexists( + file_id, + this->particle_species_name.c_str(), + H5P_DEFAULT); + if (!group_exists) + { + hid_t gg = H5Gcreate( + file_id, + this->particle_species_name.c_str(), + H5P_DEFAULT, + H5P_DEFAULT, + H5P_DEFAULT); + assert(gg >= 0); + H5Gclose(gg); + } + hid_t gg = H5Gopen( + file_id, + this->particle_species_name.c_str(), + H5P_DEFAULT); + assert(gg >= 0); + group_exists = H5Lexists( + gg, + "state", + H5P_DEFAULT); + if (!group_exists) + { + hid_t ggg = H5Gcreate( + gg, + "state", + H5P_DEFAULT, + H5P_DEFAULT, + H5P_DEFAULT); + assert(ggg >= 0); + H5Gclose(ggg); + } + group_exists = H5Lexists( + gg, + "rhs", + H5P_DEFAULT); + if (!group_exists) + { + hid_t ggg = H5Gcreate( + gg, + "rhs", + H5P_DEFAULT, + H5P_DEFAULT, + H5P_DEFAULT); + assert(ggg >= 0); + H5Gclose(ggg); + } + H5Gclose(gg); + H5Fclose(file_id); + } + MPI_Barrier(Parent::getComWriter()); + } + } + + void write( + const int idx_time_step, + const real_number* particles_positions, + const std::unique_ptr<real_number[]>* particles_rhs, + const partsize_t nb_particles, + const partsize_t particles_idx_offset) final{ + assert(Parent::isInvolved()); + + TIMEZONE("particles_output_hdf5::write"); + + assert(particles_idx_offset < Parent::getTotalNbParticles() || (particles_idx_offset == Parent::getTotalNbParticles() && nb_particles == 0)); + assert(particles_idx_offset+nb_particles <= Parent::getTotalNbParticles()); + + static_assert(std::is_same<real_number, double>::value || + std::is_same<real_number, float>::value, + "real_number must be double or float"); + const hid_t type_id = (sizeof(real_number) == 8 ? H5T_NATIVE_DOUBLE : H5T_NATIVE_FLOAT); + + hid_t plist_id = H5Pcreate(H5P_DATASET_XFER); + assert(plist_id >= 0); + { + int rethdf = H5Pset_dxpl_mpio(plist_id, use_collective_io ? H5FD_MPIO_COLLECTIVE : H5FD_MPIO_INDEPENDENT); + assert(rethdf >= 0); + } + + { + assert(total_nb_particles >= 0); + assert(size_particle_positions >= 0); + const hsize_t datacount[2] = { + hsize_t(total_nb_particles), + hsize_t(size_particle_positions)}; + hid_t dataspace = H5Screate_simple(2, datacount, NULL); + assert(dataspace >= 0); + + hid_t dataset_id = H5Dcreate( dset_id_state, + std::to_string(idx_time_step).c_str(), + type_id, + dataspace, + H5P_DEFAULT, + H5P_DEFAULT, + H5P_DEFAULT); + assert(dataset_id >= 0); + + assert(nb_particles >= 0); + assert(particles_idx_offset >= 0); + const hsize_t count[2] = {hsize_t(nb_particles), size_particle_positions}; + const hsize_t offset[2] = {hsize_t(particles_idx_offset), 0}; + hid_t memspace = H5Screate_simple(2, count, NULL); + assert(memspace >= 0); + + hid_t filespace = H5Dget_space(dataset_id); + int rethdf = H5Sselect_hyperslab( + filespace, + H5S_SELECT_SET, + offset, + NULL, + count, + NULL); + assert(rethdf >= 0); + + herr_t status = H5Dwrite( + dataset_id, + type_id, + memspace, + filespace, + plist_id, + particles_positions); + assert(status >= 0); + rethdf = H5Sclose(memspace); + assert(rethdf >= 0); + rethdf = H5Dclose(dataset_id); + assert(rethdf >= 0); + rethdf = H5Sclose(filespace); + assert(rethdf >= 0); + } + { + assert(size_particle_rhs >= 0); + const hsize_t datacount[3] = {hsize_t(Parent::getNbRhs()), + hsize_t(total_nb_particles), + hsize_t(size_particle_rhs)}; + hid_t dataspace = H5Screate_simple(3, datacount, NULL); + assert(dataspace >= 0); + + hid_t dataset_id = H5Dcreate( dset_id_rhs, + std::to_string(idx_time_step).c_str(), + type_id, + dataspace, + H5P_DEFAULT, + H5P_DEFAULT, + H5P_DEFAULT); + assert(dataset_id >= 0); + + assert(particles_idx_offset >= 0); + for(int idx_rhs = 0 ; idx_rhs < Parent::getNbRhs() ; ++idx_rhs){ + const hsize_t count[3] = { + 1, + hsize_t(nb_particles), + hsize_t(size_particle_rhs)}; + const hsize_t offset[3] = { + hsize_t(idx_rhs), + hsize_t(particles_idx_offset), + 0}; + hid_t memspace = H5Screate_simple(3, count, NULL); + assert(memspace >= 0); + + hid_t filespace = H5Dget_space(dataset_id); + assert(filespace >= 0); + int rethdf = H5Sselect_hyperslab( + filespace, + H5S_SELECT_SET, + offset, + NULL, + count, + NULL); + assert(rethdf >= 0); + + herr_t status = H5Dwrite( + dataset_id, + type_id, + memspace, + filespace, + plist_id, + particles_rhs[idx_rhs].get()); + assert(status >= 0); + rethdf = H5Sclose(filespace); + assert(rethdf >= 0); + rethdf = H5Sclose(memspace); + assert(rethdf >= 0); + } + int rethdf = H5Dclose(dataset_id); + assert(rethdf >= 0); + } + + { + int rethdf = H5Pclose(plist_id); + assert(rethdf >= 0); + } + } +}; + +#endif//PARTICLES_OUTPUT_HDF5_HPP + diff --git a/bfps/cpp/particles/particles_output_mpiio.hpp b/bfps/cpp/particles/particles_output_mpiio.hpp new file mode 100644 index 0000000000000000000000000000000000000000..77dae6ca2f9441948ccf04f8a72e4a53d249894b --- /dev/null +++ b/bfps/cpp/particles/particles_output_mpiio.hpp @@ -0,0 +1,92 @@ +#ifndef PARTICLES_OUTPUT_MPIIO +#define PARTICLES_OUTPUT_MPIIO + +#include <memory> +#include <vector> +#include <string> +#include <cassert> + +#include "abstract_particles_output.hpp" +#include "scope_timer.hpp" +#include "particles_utils.hpp" + +template <class partsize_t, class real_number, int size_particle_positions, int size_particle_rhs> +class particles_output_mpiio : public abstract_particles_output<partsize_t, real_number, size_particle_positions, size_particle_rhs>{ + using Parent = abstract_particles_output<partsize_t, real_number, size_particle_positions, size_particle_rhs>; + + const std::string filename; + const int nb_step_prealloc; + + int current_step_in_file; + + MPI_File mpi_file; + +public: + particles_output_mpiio(MPI_Comm in_mpi_com, const std::string in_filename, const partsize_t inTotalNbParticles, + const int in_nb_rhs, const int in_nb_step_prealloc = -1) + : abstract_particles_output<partsize_t, real_number, size_particle_positions, size_particle_rhs>(in_mpi_com, inTotalNbParticles, in_nb_rhs), + filename(in_filename), nb_step_prealloc(in_nb_step_prealloc), current_step_in_file(0){ + if(Parent::isInvolved()){ + { + TIMEZONE("particles_output_mpiio::MPI_File_open"); + AssertMpi(MPI_File_open(Parent::getComWriter(), const_cast<char*>(filename.c_str()), + MPI_MODE_CREATE|MPI_MODE_WRONLY, MPI_INFO_NULL, &mpi_file)); + } + if(nb_step_prealloc != -1){ + TIMEZONE("particles_output_mpiio::MPI_File_set_size"); + AssertMpi(MPI_File_set_size(mpi_file, + nb_step_prealloc*Parent::getTotalNbParticles()*sizeof(real_number)*(size_particle_positions+size_particle_rhs*Parent::getNbRhs()))); + } + } + } + + ~particles_output_mpiio(){ + if(Parent::isInvolved()){ + TIMEZONE("particles_output_mpiio::MPI_File_close"); + AssertMpi(MPI_File_close(&mpi_file)); + } + } + + void write(const int /*time_step*/, const real_number* particles_positions, const std::unique_ptr<real_number[]>* particles_rhs, + const partsize_t nb_particles, const partsize_t particles_idx_offset) final{ + assert(Parent::isInvolved()); + + TIMEZONE("particles_output_mpiio::write"); + + assert(nb_step_prealloc == -1 || current_step_in_file < nb_step_prealloc); + assert(particles_idx_offset < Parent::getTotalNbParticles()); + assert(particles_idx_offset+nb_particles <= Parent::getTotalNbParticles()); + + if(nb_step_prealloc == -1){ + TIMEZONE("particles_output_mpiio::write::MPI_File_set_size"); + AssertMpi(MPI_File_set_size(mpi_file, + (current_step_in_file+1)*Parent::getTotalNbParticles()*sizeof(real_number)*(size_particle_positions+size_particle_rhs*Parent::getNbRhs()))); + } + + const MPI_Offset globalParticlesOffset = current_step_in_file*Parent::getTotalNbParticles()*(size_particle_positions+size_particle_rhs*Parent::getNbRhs()) + + nb_particles*size_particle_positions; + + const MPI_Offset writingOffset = globalParticlesOffset * sizeof(real_number); + + AssertMpi(MPI_File_write_at(mpi_file, writingOffset, + const_cast<real_number*>(particles_positions), nb_particles*size_particle_positions, particles_utils::GetMpiType(real_number()), + MPI_STATUS_IGNORE)); + + for(int idx_rsh = 0 ; idx_rsh < Parent::getNbRhs() ; ++idx_rsh){ + const MPI_Offset globalParticlesOffsetOutput = current_step_in_file*Parent::getTotalNbParticles()*(size_particle_positions+size_particle_rhs) + + Parent::getTotalNbParticles()*size_particle_positions + + idx_rsh*Parent::getTotalNbParticles()*size_particle_rhs + + nb_particles*size_particle_rhs; + + const MPI_Offset writingOffsetOutput = globalParticlesOffsetOutput * sizeof(real_number); + + AssertMpi(MPI_File_write_at(mpi_file, writingOffsetOutput, + const_cast<real_number*>(particles_rhs[idx_rsh].get()), nb_particles*size_particle_rhs, particles_utils::GetMpiType(real_number()), + MPI_STATUS_IGNORE)); + } + + current_step_in_file += 1; + } +}; + +#endif diff --git a/bfps/cpp/particles/particles_output_sampling_hdf5.hpp b/bfps/cpp/particles/particles_output_sampling_hdf5.hpp new file mode 100644 index 0000000000000000000000000000000000000000..238c9acf9a16db9c36b81d3c6eb6dc2388bbf117 --- /dev/null +++ b/bfps/cpp/particles/particles_output_sampling_hdf5.hpp @@ -0,0 +1,188 @@ +#ifndef PARTICLES_OUTPUT_SAMPLING_HDF5_HPP +#define PARTICLES_OUTPUT_SAMPLING_HDF5_HPP + +#include "abstract_particles_output.hpp" + +#include <hdf5.h> + +template <class partsize_t, + class real_number, + int size_particle_positions, + int size_particle_rhs> +class particles_output_sampling_hdf5 : public abstract_particles_output<partsize_t, + real_number, + size_particle_positions, + size_particle_rhs>{ + using Parent = abstract_particles_output<partsize_t, + real_number, + size_particle_positions, + size_particle_rhs>; + + hid_t file_id, pgroup_id; + + const std::string dataset_name; + const bool use_collective_io; + +public: + static bool DatasetExistsCol(MPI_Comm in_mpi_com, + const std::string& in_filename, + const std::string& in_groupname, + const std::string& in_dataset_name){ + int my_rank; + AssertMpi(MPI_Comm_rank(in_mpi_com, &my_rank)); + + int dataset_exists = -1; + + if(my_rank == 0){ + // Parallel HDF5 write + hid_t file_id = H5Fopen( + in_filename.c_str(), + H5F_ACC_RDWR | H5F_ACC_DEBUG, + H5P_DEFAULT); + assert(file_id >= 0); + + dataset_exists = H5Lexists( + file_id, + (in_groupname + "/" + in_dataset_name).c_str(), + H5P_DEFAULT); + + int retTest = H5Fclose(file_id); + assert(retTest >= 0); + } + + AssertMpi(MPI_Bcast( &dataset_exists, 1, MPI_INT, 0, in_mpi_com )); + return dataset_exists; + } + + particles_output_sampling_hdf5(MPI_Comm in_mpi_com, + const partsize_t inTotalNbParticles, + const std::string& in_filename, + const std::string& in_groupname, + const std::string& in_dataset_name, + const bool in_use_collective_io = false) + : Parent(in_mpi_com, inTotalNbParticles, 1), + dataset_name(in_dataset_name), + use_collective_io(in_use_collective_io){ + if(Parent::isInvolved()){ + hid_t plist_id_par = H5Pcreate(H5P_FILE_ACCESS); + assert(plist_id_par >= 0); + int retTest = H5Pset_fapl_mpio( + plist_id_par, + Parent::getComWriter(), + MPI_INFO_NULL); + assert(retTest >= 0); + + // Parallel HDF5 write + file_id = H5Fopen( + in_filename.c_str(), + H5F_ACC_RDWR | H5F_ACC_DEBUG, + plist_id_par); + assert(file_id >= 0); + retTest = H5Pclose(plist_id_par); + assert(retTest >= 0); + + pgroup_id = H5Gopen( + file_id, + in_groupname.c_str(), + H5P_DEFAULT); + assert(pgroup_id >= 0); + } + } + + ~particles_output_sampling_hdf5(){ + if(Parent::isInvolved()){ + int retTest = H5Gclose(pgroup_id); + assert(retTest >= 0); + retTest = H5Fclose(file_id); + assert(retTest >= 0); + } + } + + void write( + const int /*idx_time_step*/, + const real_number* /*particles_positions*/, + const std::unique_ptr<real_number[]>* particles_rhs, + const partsize_t nb_particles, + const partsize_t particles_idx_offset) final{ + assert(Parent::isInvolved()); + + TIMEZONE("particles_output_hdf5::write"); + + assert(particles_idx_offset < Parent::getTotalNbParticles() || (particles_idx_offset == Parent::getTotalNbParticles() && nb_particles == 0)); + assert(particles_idx_offset+nb_particles <= Parent::getTotalNbParticles()); + + static_assert(std::is_same<real_number, double>::value || + std::is_same<real_number, float>::value, + "real_number must be double or float"); + const hid_t type_id = (sizeof(real_number) == 8 ? H5T_NATIVE_DOUBLE : H5T_NATIVE_FLOAT); + + hid_t plist_id = H5Pcreate(H5P_DATASET_XFER); + assert(plist_id >= 0); + { + int rethdf = H5Pset_dxpl_mpio(plist_id, use_collective_io ? H5FD_MPIO_COLLECTIVE : H5FD_MPIO_INDEPENDENT); + assert(rethdf >= 0); + } + { + assert(size_particle_rhs >= 0); + const hsize_t datacount[3] = {hsize_t(Parent::getNbRhs()), + hsize_t(Parent::getTotalNbParticles()), + hsize_t(size_particle_rhs)}; + hid_t dataspace = H5Screate_simple(3, datacount, NULL); + assert(dataspace >= 0); + + hid_t dataset_id = H5Dcreate( pgroup_id, + dataset_name.c_str(), + type_id, + dataspace, + H5P_DEFAULT, + H5P_DEFAULT, + H5P_DEFAULT); + assert(dataset_id >= 0); + + assert(particles_idx_offset >= 0); + const hsize_t count[3] = { + 1, + hsize_t(nb_particles), + hsize_t(size_particle_rhs)}; + const hsize_t offset[3] = { + 0, + hsize_t(particles_idx_offset), + 0}; + hid_t memspace = H5Screate_simple(3, count, NULL); + assert(memspace >= 0); + + hid_t filespace = H5Dget_space(dataset_id); + assert(filespace >= 0); + int rethdf = H5Sselect_hyperslab( + filespace, + H5S_SELECT_SET, + offset, + NULL, + count, + NULL); + assert(rethdf >= 0); + + herr_t status = H5Dwrite( + dataset_id, + type_id, + memspace, + filespace, + plist_id, + particles_rhs[0].get()); + assert(status >= 0); + rethdf = H5Sclose(filespace); + assert(rethdf >= 0); + rethdf = H5Sclose(memspace); + assert(rethdf >= 0); + rethdf = H5Dclose(dataset_id); + assert(rethdf >= 0); + } + + { + int rethdf = H5Pclose(plist_id); + assert(rethdf >= 0); + } + } +}; + +#endif diff --git a/bfps/cpp/particles/particles_sampling.hpp b/bfps/cpp/particles/particles_sampling.hpp new file mode 100644 index 0000000000000000000000000000000000000000..3adc255341f3ca879d5cae1445124091f31b4394 --- /dev/null +++ b/bfps/cpp/particles/particles_sampling.hpp @@ -0,0 +1,52 @@ +#ifndef PARTICLES_SAMPLING_HPP +#define PARTICLES_SAMPLING_HPP + +#include <memory> +#include <string> + +#include "abstract_particles_system.hpp" +#include "particles_output_sampling_hdf5.hpp" + +#include "field.hpp" +#include "kspace.hpp" + + +template <class partsize_t, class particles_rnumber, class rnumber, field_backend be, field_components fc> +void sample_from_particles_system(const field<rnumber, be, fc>& in_field, // a pointer to a field<rnumber, FFTW, fc> + std::unique_ptr<abstract_particles_system<partsize_t, particles_rnumber>>& ps, // a pointer to an particles_system<double> + const std::string& filename, + const std::string& parent_groupname, + const std::string& fname){ + const std::string datasetname = fname + std::string("/") + std::to_string(ps->get_step_idx()); + const int size_particle_rhs = ncomp(fc); + + // Stop here if already exists + if(particles_output_sampling_hdf5<partsize_t, particles_rnumber, 3, size_particle_rhs>::DatasetExistsCol(MPI_COMM_WORLD, + filename, + parent_groupname, + datasetname)){ + return; + } + + const partsize_t nb_particles = ps->getLocalNbParticles(); + std::unique_ptr<particles_rnumber[]> sample_rhs(new particles_rnumber[size_particle_rhs*nb_particles]); + std::fill_n(sample_rhs.get(), size_particle_rhs*nb_particles, 0); + + ps->sample_compute_field(in_field, sample_rhs.get()); + + + + particles_output_sampling_hdf5<partsize_t, particles_rnumber, 3, size_particle_rhs> outputclass(MPI_COMM_WORLD, + ps->getGlobalNbParticles(), + filename, + parent_groupname, + datasetname); + outputclass.save(ps->getParticlesPositions(), + &sample_rhs, + ps->getParticlesIndexes(), + ps->getLocalNbParticles(), + ps->get_step_idx()); +} + +#endif + diff --git a/bfps/cpp/particles/particles_system.hpp b/bfps/cpp/particles/particles_system.hpp new file mode 100644 index 0000000000000000000000000000000000000000..02767a8b433ecb8365f4a0577d1c0d6508c2bed1 --- /dev/null +++ b/bfps/cpp/particles/particles_system.hpp @@ -0,0 +1,259 @@ +#ifndef PARTICLES_SYSTEM_HPP +#define PARTICLES_SYSTEM_HPP + +#include <array> + +#include "abstract_particles_system.hpp" +#include "particles_distr_mpi.hpp" +#include "particles_output_hdf5.hpp" +#include "particles_output_mpiio.hpp" +#include "particles_field_computer.hpp" +#include "abstract_particles_input.hpp" +#include "particles_adams_bashforth.hpp" +#include "scope_timer.hpp" + +template <class partsize_t, class real_number, class field_rnumber, class field_class, class interpolator_class, int interp_neighbours, + int size_particle_rhs> +class particles_system : public abstract_particles_system<partsize_t, real_number> { + MPI_Comm mpi_com; + + const std::pair<int,int> current_partition_interval; + const int partition_interval_size; + + interpolator_class interpolator; + + particles_distr_mpi<partsize_t, real_number> particles_distr; + + particles_adams_bashforth<partsize_t, real_number, 3, size_particle_rhs> positions_updater; + + using computer_class = particles_field_computer<partsize_t, real_number, interpolator_class, interp_neighbours>; + computer_class computer; + + field_class default_field; + + std::unique_ptr<partsize_t[]> current_my_nb_particles_per_partition; + std::unique_ptr<partsize_t[]> current_offset_particles_for_partition; + + const std::array<real_number,3> spatial_box_width; + const std::array<real_number,3> spatial_partition_width; + const real_number my_spatial_low_limit; + const real_number my_spatial_up_limit; + + std::unique_ptr<real_number[]> my_particles_positions; + std::unique_ptr<partsize_t[]> my_particles_positions_indexes; + partsize_t my_nb_particles; + const partsize_t total_nb_particles; + std::vector<std::unique_ptr<real_number[]>> my_particles_rhs; + + int step_idx; + +public: + particles_system(const std::array<size_t,3>& field_grid_dim, const std::array<real_number,3>& in_spatial_box_width, + const std::array<real_number,3>& in_spatial_box_offset, + const std::array<real_number,3>& in_spatial_partition_width, + const real_number in_my_spatial_low_limit, const real_number in_my_spatial_up_limit, + const std::array<size_t,3>& in_local_field_dims, + const std::array<size_t,3>& in_local_field_offset, + const field_class& in_field, + MPI_Comm in_mpi_com, + const partsize_t in_total_nb_particles, + const int in_current_iteration = 1) + : mpi_com(in_mpi_com), + current_partition_interval({in_local_field_offset[IDX_Z], in_local_field_offset[IDX_Z] + in_local_field_dims[IDX_Z]}), + partition_interval_size(current_partition_interval.second - current_partition_interval.first), + interpolator(), + particles_distr(in_mpi_com, current_partition_interval,field_grid_dim), + positions_updater(), + computer(field_grid_dim, current_partition_interval, + interpolator, in_spatial_box_width, in_spatial_box_offset, in_spatial_partition_width), + default_field(in_field), + spatial_box_width(in_spatial_box_width), spatial_partition_width(in_spatial_partition_width), + my_spatial_low_limit(in_my_spatial_low_limit), my_spatial_up_limit(in_my_spatial_up_limit), + my_nb_particles(0), total_nb_particles(in_total_nb_particles), step_idx(in_current_iteration){ + + current_my_nb_particles_per_partition.reset(new partsize_t[partition_interval_size]); + current_offset_particles_for_partition.reset(new partsize_t[partition_interval_size+1]); + } + + ~particles_system(){ + } + + void init(abstract_particles_input<partsize_t, real_number>& particles_input) { + TIMEZONE("particles_system::init"); + + my_particles_positions = particles_input.getMyParticles(); + my_particles_positions_indexes = particles_input.getMyParticlesIndexes(); + my_particles_rhs = particles_input.getMyRhs(); + my_nb_particles = particles_input.getLocalNbParticles(); + + for(partsize_t idx_part = 0 ; idx_part < my_nb_particles ; ++idx_part){ // TODO remove me + const int partition_level = computer.pbc_field_layer(my_particles_positions[idx_part*3+IDX_Z], IDX_Z); + assert(partition_level >= current_partition_interval.first); + assert(partition_level < current_partition_interval.second); + } + + particles_utils::partition_extra_z<partsize_t, 3>(&my_particles_positions[0], my_nb_particles, partition_interval_size, + current_my_nb_particles_per_partition.get(), current_offset_particles_for_partition.get(), + [&](const real_number& z_pos){ + const int partition_level = computer.pbc_field_layer(z_pos, IDX_Z); + assert(current_partition_interval.first <= partition_level && partition_level < current_partition_interval.second); + return partition_level - current_partition_interval.first; + }, + [&](const partsize_t idx1, const partsize_t idx2){ + std::swap(my_particles_positions_indexes[idx1], my_particles_positions_indexes[idx2]); + for(int idx_rhs = 0 ; idx_rhs < int(my_particles_rhs.size()) ; ++idx_rhs){ + for(int idx_val = 0 ; idx_val < size_particle_rhs ; ++idx_val){ + std::swap(my_particles_rhs[idx_rhs][idx1*size_particle_rhs + idx_val], + my_particles_rhs[idx_rhs][idx2*size_particle_rhs + idx_val]); + } + } + }); + + {// TODO remove + for(int idxPartition = 0 ; idxPartition < partition_interval_size ; ++idxPartition){ + assert(current_my_nb_particles_per_partition[idxPartition] == + current_offset_particles_for_partition[idxPartition+1] - current_offset_particles_for_partition[idxPartition]); + for(partsize_t idx = current_offset_particles_for_partition[idxPartition] ; idx < current_offset_particles_for_partition[idxPartition+1] ; ++idx){ + assert(computer.pbc_field_layer(my_particles_positions[idx*3+IDX_Z], IDX_Z)-current_partition_interval.first == idxPartition); + } + } + } + } + + + void compute() final { + TIMEZONE("particles_system::compute"); + particles_distr.template compute_distr<computer_class, field_class, 3, size_particle_rhs>( + computer, default_field, + current_my_nb_particles_per_partition.get(), + my_particles_positions.get(), + my_particles_rhs.front().get(), + interp_neighbours); + } + + template <class sample_field_class, int sample_size_particle_rhs> + void sample_compute(const sample_field_class& sample_field, + real_number sample_rhs[]) { + TIMEZONE("particles_system::compute"); + particles_distr.template compute_distr<computer_class, sample_field_class, 3, sample_size_particle_rhs>( + computer, sample_field, + current_my_nb_particles_per_partition.get(), + my_particles_positions.get(), + sample_rhs, + interp_neighbours); + } + + //- Not generic to enable sampling begin + void sample_compute_field(const field<float, FFTW, ONE>& sample_field, + real_number sample_rhs[]) final { + // sample_compute<decltype(sample_field), 1>(sample_field, sample_rhs); + } + void sample_compute_field(const field<float, FFTW, THREE>& sample_field, + real_number sample_rhs[]) final { + sample_compute<decltype(sample_field), 3>(sample_field, sample_rhs); + } + void sample_compute_field(const field<float, FFTW, THREExTHREE>& sample_field, + real_number sample_rhs[]) final { + sample_compute<decltype(sample_field), 9>(sample_field, sample_rhs); + } + void sample_compute_field(const field<double, FFTW, ONE>& sample_field, + real_number sample_rhs[]) final { + sample_compute<decltype(sample_field), 1>(sample_field, sample_rhs); + } + void sample_compute_field(const field<double, FFTW, THREE>& sample_field, + real_number sample_rhs[]) final { + sample_compute<decltype(sample_field), 3>(sample_field, sample_rhs); + } + void sample_compute_field(const field<double, FFTW, THREExTHREE>& sample_field, + real_number sample_rhs[]) final { + sample_compute<decltype(sample_field), 9>(sample_field, sample_rhs); + } + //- Not generic to enable sampling end + + void move(const real_number dt) final { + TIMEZONE("particles_system::move"); + positions_updater.move_particles(my_particles_positions.get(), my_nb_particles, + my_particles_rhs.data(), std::min(step_idx,int(my_particles_rhs.size())), + dt); + } + + void redistribute() final { + TIMEZONE("particles_system::redistribute"); + particles_distr.template redistribute<computer_class, 3, size_particle_rhs, 1>( + computer, + current_my_nb_particles_per_partition.get(), + &my_nb_particles, + &my_particles_positions, + my_particles_rhs.data(), int(my_particles_rhs.size()), + &my_particles_positions_indexes); + } + + void inc_step_idx() final { + step_idx += 1; + } + + int get_step_idx() const final { + return step_idx; + } + + void shift_rhs_vectors() final { + if(my_particles_rhs.size()){ + std::unique_ptr<real_number[]> next_current(std::move(my_particles_rhs.back())); + for(int idx_rhs = int(my_particles_rhs.size())-1 ; idx_rhs > 0 ; --idx_rhs){ + my_particles_rhs[idx_rhs] = std::move(my_particles_rhs[idx_rhs-1]); + } + my_particles_rhs[0] = std::move(next_current); + particles_utils::memzero(my_particles_rhs[0], size_particle_rhs*my_nb_particles); + } + } + + void completeLoop(const real_number dt) final { + TIMEZONE("particles_system::completeLoop"); + compute(); + move(dt); + redistribute(); + inc_step_idx(); + shift_rhs_vectors(); + } + + const real_number* getParticlesPositions() const final { + return my_particles_positions.get(); + } + + const std::unique_ptr<real_number[]>* getParticlesRhs() const final { + return my_particles_rhs.data(); + } + + const partsize_t* getParticlesIndexes() const final { + return my_particles_positions_indexes.get(); + } + + partsize_t getLocalNbParticles() const final { + return my_nb_particles; + } + + partsize_t getGlobalNbParticles() const final { + return total_nb_particles; + } + + int getNbRhs() const final { + return int(my_particles_rhs.size()); + } + + void checkNan() const { // TODO remove + for(partsize_t idx_part = 0 ; idx_part < my_nb_particles ; ++idx_part){ // TODO remove me + assert(std::isnan(my_particles_positions[idx_part*3+IDX_X]) == false); + assert(std::isnan(my_particles_positions[idx_part*3+IDX_Y]) == false); + assert(std::isnan(my_particles_positions[idx_part*3+IDX_Z]) == false); + + for(int idx_rhs = 0 ; idx_rhs < my_particles_rhs.size() ; ++idx_rhs){ + for(int idx_rhs_val = 0 ; idx_rhs_val < size_particle_rhs ; ++idx_rhs_val){ + assert(std::isnan(my_particles_rhs[idx_rhs][idx_part*size_particle_rhs+idx_rhs_val]) == false); + } + } + } + } +}; + + +#endif diff --git a/bfps/cpp/particles/particles_system_builder.hpp b/bfps/cpp/particles/particles_system_builder.hpp new file mode 100644 index 0000000000000000000000000000000000000000..7a2d49c07c3a6de21fb93d83b338609be858f0dc --- /dev/null +++ b/bfps/cpp/particles/particles_system_builder.hpp @@ -0,0 +1,260 @@ +#ifndef PARTICLES_SYSTEM_BUILDER_HPP +#define PARTICLES_SYSTEM_BUILDER_HPP + +#include <string> + +#include "abstract_particles_system.hpp" +#include "particles_system.hpp" +#include "particles_input_hdf5.hpp" +#include "particles_generic_interp.hpp" + +#include "field.hpp" +#include "kspace.hpp" + + + +////////////////////////////////////////////////////////////////////////////// +/// +/// Double template "for" +/// +////////////////////////////////////////////////////////////////////////////// + +namespace Template_double_for_if{ + +template <class RetType, + class IterType1, IterType1 CurrentIter1, + class IterType2, const IterType2 CurrentIter2, const IterType2 iterTo2, const IterType2 IterStep2, + class Func, bool IsNotOver, typename... Args> +struct For2{ + static RetType evaluate(IterType2 value2, Args... args){ + if(CurrentIter2 == value2){ + return std::move(Func::template instanciate<CurrentIter1, CurrentIter2>(args...)); + } + else{ + return std::move(For2<RetType, + IterType1, CurrentIter1, + IterType2, CurrentIter2+IterStep2, iterTo2, IterStep2, + Func, (CurrentIter2+IterStep2 < iterTo2), Args...>::evaluate(value2, args...)); + } + } +}; + +template <class RetType, + class IterType1, IterType1 CurrentIter1, + class IterType2, const IterType2 CurrentIter2, const IterType2 iterTo2, const IterType2 IterStep2, + class Func, typename... Args> +struct For2<RetType, + IterType1, CurrentIter1, + IterType2, CurrentIter2, iterTo2, IterStep2, + Func, false, Args...>{ + static RetType evaluate(IterType2 value2, Args... args){ + std::cout << __FUNCTION__ << "[ERROR] template values for loop 2 " << value2 << " does not exist\n"; + return RetType(); + } +}; + +template <class RetType, + class IterType1, const IterType1 CurrentIter1, const IterType1 iterTo1, const IterType1 IterStep1, + class IterType2, const IterType2 IterFrom2, const IterType2 iterTo2, const IterType2 IterStep2, + class Func, bool IsNotOver, typename... Args> +struct For1{ + static RetType evaluate(IterType1 value1, IterType2 value2, Args... args){ + if(CurrentIter1 == value1){ + return std::move(For2<RetType, + IterType1, CurrentIter1, + IterType2, IterFrom2, iterTo2, IterStep2, + Func, (IterFrom2<iterTo2), Args...>::evaluate(value2, args...)); + } + else{ + return std::move(For1<RetType, + IterType1, CurrentIter1+IterStep1, iterTo1, IterStep1, + IterType2, IterFrom2, iterTo2, IterStep2, + Func, (CurrentIter1+IterStep1 < iterTo1), Args...>::evaluate(value1, value2, args...)); + } + } +}; + +template <class RetType, + class IterType1, const IterType1 IterFrom1, const IterType1 iterTo1, const IterType1 IterStep1, + class IterType2, const IterType2 IterFrom2, const IterType2 iterTo2, const IterType2 IterStep2, + class Func, typename... Args> +struct For1<RetType, + IterType1, IterFrom1, iterTo1, IterStep1, + IterType2, IterFrom2, iterTo2, IterStep2, + Func, false, Args...>{ + static RetType evaluate(IterType1 value1, IterType2 value2, Args... args){ + std::cout << __FUNCTION__ << "[ERROR] template values for loop 1 " << value1 << " does not exist\n"; + return RetType(); + } +}; + +template <class RetType, + class IterType1, const IterType1 IterFrom1, const IterType1 iterTo1, const IterType1 IterStep1, + class IterType2, const IterType2 IterFrom2, const IterType2 iterTo2, const IterType2 IterStep2, + class Func, typename... Args> +inline RetType evaluate(IterType1 value1, IterType2 value2, Args... args){ + return std::move(For1<RetType, + IterType1, IterFrom1, iterTo1, IterStep1, + IterType2, IterFrom2, iterTo2, IterStep2, + Func, (IterFrom1<iterTo1), Args...>::evaluate(value1, value2, args...)); +} + +} + + +////////////////////////////////////////////////////////////////////////////// +/// +/// Builder Functions +/// +////////////////////////////////////////////////////////////////////////////// + +template <class partsize_t, class field_rnumber, field_backend be, field_components fc, class particles_rnumber> +struct particles_system_build_container { + template <const int interpolation_size, const int spline_mode> + static std::unique_ptr<abstract_particles_system<partsize_t, particles_rnumber>> instanciate( + const field<field_rnumber, be, fc>* fs_field, // (field object) + const kspace<be, SMOOTH>* fs_kk, // (kspace object, contains dkx, dky, dkz) + const int nsteps, // to check coherency between parameters and hdf input file (nb rhs) + const partsize_t nparticles, // to check coherency between parameters and hdf input file + const std::string& fname_input, // particles input filename + const std::string& inDatanameState, const std::string& inDatanameRhs, // input dataset names + MPI_Comm mpi_comm, + const int in_current_iteration){ + + // The size of the field grid (global size) all_size seems + std::array<size_t,3> field_grid_dim; + field_grid_dim[IDX_X] = fs_field->rlayout->sizes[FIELD_IDX_X];// nx + field_grid_dim[IDX_Y] = fs_field->rlayout->sizes[FIELD_IDX_Y];// nx + field_grid_dim[IDX_Z] = fs_field->rlayout->sizes[FIELD_IDX_Z];// nz + + // The size of the local field grid (the field nodes that belong to current process) + std::array<size_t,3> local_field_dims; + local_field_dims[IDX_X] = fs_field->rlayout->subsizes[FIELD_IDX_X]; + local_field_dims[IDX_Y] = fs_field->rlayout->subsizes[FIELD_IDX_Y]; + local_field_dims[IDX_Z] = fs_field->rlayout->subsizes[FIELD_IDX_Z]; + + // The offset of the local field grid + std::array<size_t,3> local_field_offset; + local_field_offset[IDX_X] = fs_field->rlayout->starts[FIELD_IDX_X]; + local_field_offset[IDX_Y] = fs_field->rlayout->starts[FIELD_IDX_Y]; + local_field_offset[IDX_Z] = fs_field->rlayout->starts[FIELD_IDX_Z]; + + + // Retreive split from fftw to know processes that have no work + int my_rank, nb_processes; + AssertMpi(MPI_Comm_rank(mpi_comm, &my_rank)); + AssertMpi(MPI_Comm_size(mpi_comm, &nb_processes)); + + const int split_step = (int(field_grid_dim[IDX_Z])+nb_processes-1)/nb_processes; + const int nb_processes_involved = (int(field_grid_dim[IDX_Z])+split_step-1)/split_step; + + assert((my_rank < nb_processes_involved && local_field_dims[IDX_Z] != 0) + || (nb_processes_involved <= my_rank && local_field_dims[IDX_Z] == 0)); + assert(nb_processes_involved <= int(field_grid_dim[IDX_Z])); + + // Make the idle processes starting from the limit (and not 0 as set by fftw) + if(nb_processes_involved <= my_rank){ + local_field_offset[IDX_Z] = field_grid_dim[IDX_Z]; + } + + // Ensure that 1D partitioning is used + { + assert(local_field_offset[IDX_X] == 0); + assert(local_field_offset[IDX_Y] == 0); + assert(local_field_dims[IDX_X] == field_grid_dim[IDX_X]); + assert(local_field_dims[IDX_Y] == field_grid_dim[IDX_Y]); + + assert(my_rank >= nb_processes_involved || ((my_rank == 0 && local_field_offset[IDX_Z] == 0) + || (my_rank != 0 && local_field_offset[IDX_Z] != 0))); + assert(my_rank >= nb_processes_involved || ((my_rank == nb_processes_involved-1 && local_field_offset[IDX_Z]+local_field_dims[IDX_Z] == field_grid_dim[IDX_Z]) + || (my_rank != nb_processes_involved-1 && local_field_offset[IDX_Z]+local_field_dims[IDX_Z] != field_grid_dim[IDX_Z]))); + } + + // The spatial box size (all particles should be included inside) + std::array<particles_rnumber,3> spatial_box_width; + spatial_box_width[IDX_X] = 4 * acos(0) / (fs_kk->dkx); + spatial_box_width[IDX_Y] = 4 * acos(0) / (fs_kk->dky); + spatial_box_width[IDX_Z] = 4 * acos(0) / (fs_kk->dkz); + + // Box is in the corner + std::array<particles_rnumber,3> spatial_box_offset; + spatial_box_offset[IDX_X] = 0; + spatial_box_offset[IDX_Y] = 0; + spatial_box_offset[IDX_Z] = 0; + + // The distance between two field nodes in z + std::array<particles_rnumber,3> spatial_partition_width; + spatial_partition_width[IDX_X] = spatial_box_width[IDX_X]/particles_rnumber(field_grid_dim[IDX_X]); + spatial_partition_width[IDX_Y] = spatial_box_width[IDX_Y]/particles_rnumber(field_grid_dim[IDX_Y]); + spatial_partition_width[IDX_Z] = spatial_box_width[IDX_Z]/particles_rnumber(field_grid_dim[IDX_Z]); + // The spatial interval of the current process + const particles_rnumber my_spatial_low_limit_z = particles_rnumber(local_field_offset[IDX_Z])*spatial_partition_width[IDX_Z]; + const particles_rnumber my_spatial_up_limit_z = particles_rnumber(local_field_offset[IDX_Z]+local_field_dims[IDX_Z])*spatial_partition_width[IDX_Z]; + + // Create the particles system + using particles_system_type = particles_system<partsize_t, particles_rnumber, field_rnumber, + field<field_rnumber, be, fc>, + particles_generic_interp<particles_rnumber, interpolation_size,spline_mode>, + interpolation_size, ncomp(fc)>; + particles_system_type* part_sys = new particles_system_type(field_grid_dim, + spatial_box_width, + spatial_box_offset, + spatial_partition_width, + my_spatial_low_limit_z, + my_spatial_up_limit_z, + local_field_dims, + local_field_offset, + (*fs_field), + mpi_comm, + nparticles, + in_current_iteration); + + // Load particles from hdf5 + particles_input_hdf5<partsize_t, particles_rnumber, 3,3> generator(mpi_comm, fname_input, + inDatanameState, inDatanameRhs, my_spatial_low_limit_z, my_spatial_up_limit_z); + + // Ensure parameters match the input file + if(generator.getNbRhs() != nsteps){ + std::runtime_error(std::string("Nb steps is ") + std::to_string(nsteps) + + " in the parameters but " + std::to_string(generator.getNbRhs()) + " in the particles file."); + } + // Ensure parameters match the input file + if(generator.getTotalNbParticles() != nparticles){ + std::runtime_error(std::string("Nb particles is ") + std::to_string(nparticles) + + " in the parameters but " + std::to_string(generator.getTotalNbParticles()) + " in the particles file."); + } + + // Load the particles and move them to the particles system + part_sys->init(generator); + + assert(part_sys->getNbRhs() == nsteps); + + // Return the created particles system + return std::unique_ptr<abstract_particles_system<partsize_t, particles_rnumber>>(part_sys); + } +}; + + +template <class partsize_t, class field_rnumber, field_backend be, field_components fc, class particles_rnumber = double> +inline std::unique_ptr<abstract_particles_system<partsize_t, particles_rnumber>> particles_system_builder( + const field<field_rnumber, be, fc>* fs_field, // (field object) + const kspace<be, SMOOTH>* fs_kk, // (kspace object, contains dkx, dky, dkz) + const int nsteps, // to check coherency between parameters and hdf input file (nb rhs) + const partsize_t nparticles, // to check coherency between parameters and hdf input file + const std::string& fname_input, // particles input filename + const std::string& inDatanameState, const std::string& inDatanameRhs, // input dataset names + const int interpolation_size, + const int spline_mode, + MPI_Comm mpi_comm, + const int in_current_iteration){ + return Template_double_for_if::evaluate<std::unique_ptr<abstract_particles_system<partsize_t, particles_rnumber>>, + int, 1, 11, 1, // interpolation_size + int, 0, 3, 1, // spline_mode + particles_system_build_container<partsize_t, field_rnumber,be,fc,particles_rnumber>>( + interpolation_size, // template iterator 1 + spline_mode, // template iterator 2 + fs_field,fs_kk, nsteps, nparticles, fname_input, inDatanameState, inDatanameRhs, mpi_comm, in_current_iteration); +} + + +#endif diff --git a/bfps/cpp/particles/particles_utils.hpp b/bfps/cpp/particles/particles_utils.hpp new file mode 100644 index 0000000000000000000000000000000000000000..146dc4399477b72c30329edff587d35d7b44d69d --- /dev/null +++ b/bfps/cpp/particles/particles_utils.hpp @@ -0,0 +1,317 @@ +#ifndef PARTICLES_UTILS_HPP +#define PARTICLES_UTILS_HPP + +#include <mpi.h> + +#include <cassert> +#include <stack> +#include <vector> +#include <memory> +#include <cstring> + +#if _OPENMP < 201511 +#warning Openmp priority is not supported here +#define priority(x) +#endif + + +#ifndef AssertMpi +#define AssertMpi(X) if(MPI_SUCCESS != (X)) { printf("MPI Error at line %d\n",__LINE__); fflush(stdout) ; throw std::runtime_error("Stop from from mpi erro"); } +#endif + +enum IDXS_3D { + IDX_X = 0, + IDX_Y = 1, + IDX_Z = 2 +}; + +enum FIELD_IDXS_3D { + FIELD_IDX_X = 2, + FIELD_IDX_Y = 1, + FIELD_IDX_Z = 0 +}; + +namespace particles_utils { + +class GetMpiType{ + const MPI_Datatype type; +public: + explicit GetMpiType(const long long int&) : type(MPI_LONG_LONG_INT){} + explicit GetMpiType(const unsigned char&) : type(MPI_UNSIGNED_CHAR){} + explicit GetMpiType(const unsigned short&) : type(MPI_UNSIGNED_SHORT){} + explicit GetMpiType(const unsigned int&) : type(MPI_UNSIGNED){} + explicit GetMpiType(const unsigned long&) : type(MPI_UNSIGNED_LONG){} + explicit GetMpiType(const char&) : type(MPI_CHAR){} + explicit GetMpiType(const short&) : type(MPI_SHORT){} + explicit GetMpiType(const int&) : type(MPI_INT){} + explicit GetMpiType(const long&) : type(MPI_LONG){} + explicit GetMpiType(const long double&) : type(MPI_LONG_DOUBLE){} + explicit GetMpiType(const double&) : type(MPI_DOUBLE){} + explicit GetMpiType(const float&) : type(MPI_FLOAT){} + + /*do not make it explicit*/ operator MPI_Datatype() const { return type; } +}; + + +template <class partsize_t, int nb_values, class real_number, class Predicate> +inline partsize_t partition(real_number* array, const partsize_t size, Predicate pdc) +{ + if(size == 0) return 0; + if(size == 1) return (pdc(&array[0])?1:0); + + partsize_t idxInsert = 0; + + for(partsize_t idx = 0 ; idx < size && pdc(&array[idx*nb_values]); ++idx){ + idxInsert += 1; + } + + for(partsize_t idx = idxInsert ; idx < size ; ++idx){ + if(pdc(&array[idx*nb_values])){ + for(int idxVal = 0 ; idxVal < nb_values ; ++idxVal){ + std::swap(array[idx*nb_values + idxVal], array[idxInsert*nb_values + idxVal]); + } + idxInsert += 1; + } + } + + return idxInsert; +} + + +template <class partsize_t, int nb_values, class real_number, class Predicate1, class Predicate2> +inline partsize_t partition_extra(real_number* array, const partsize_t size, Predicate1 pdc, Predicate2 pdcswap, const partsize_t offset_idx_swap = 0) +{ + if(size == 0) return 0; + if(size == 1) return (pdc(&array[0])?1:0); + + partsize_t idxInsert = 0; + + for(partsize_t idx = 0 ; idx < size && pdc(&array[idx*nb_values]); ++idx){ + idxInsert += 1; + } + + for(partsize_t idx = idxInsert ; idx < size ; ++idx){ + if(pdc(&array[idx*nb_values])){ + for(int idxVal = 0 ; idxVal < nb_values ; ++idxVal){ + std::swap(array[idx*nb_values + idxVal], array[idxInsert*nb_values + idxVal]); + } + pdcswap(idx+offset_idx_swap, idxInsert+offset_idx_swap); + idxInsert += 1; + } + } + + return idxInsert; +} + +template <class partsize_t, int nb_values, class real_number, class Predicate1, class Predicate2> +inline void partition_extra_z(real_number* array, const partsize_t size, const int nb_partitions, + partsize_t partitions_size[], partsize_t partitions_offset[], + Predicate1 partitions_levels, Predicate2 pdcswap) +{ + if(nb_partitions == 0){ + return ; + } + + partitions_offset[0] = 0; + partitions_offset[nb_partitions] = size; + + if(nb_partitions == 1){ + partitions_size[0] = size; + return; + } + + if(nb_partitions == 2){ + const partsize_t size_current = partition_extra<partsize_t, nb_values>(array, size, + [&](const real_number inval[]){ + return partitions_levels(inval[IDX_Z]) == 0; + }, pdcswap); + partitions_size[0] = size_current; + partitions_size[1] = size-size_current; + partitions_offset[1] = size_current; + return; + } + + std::stack<std::pair<int,int>> toproceed; + + toproceed.push({0, nb_partitions}); + + while(toproceed.size()){ + const std::pair<int,int> current_part = toproceed.top(); + toproceed.pop(); + + assert(current_part.second-current_part.first >= 1); + + if(current_part.second-current_part.first == 1){ + partitions_size[current_part.first] = partitions_offset[current_part.first+1] - partitions_offset[current_part.first]; + } + else{ + const int idx_middle = (current_part.second-current_part.first)/2 + current_part.first - 1; + + const partsize_t size_unpart = partitions_offset[current_part.second]- partitions_offset[current_part.first]; + + const partsize_t size_current = partition_extra<partsize_t, nb_values>(&array[partitions_offset[current_part.first]*nb_values], + size_unpart, + [&](const real_number inval[]){ + return partitions_levels(inval[IDX_Z]) <= idx_middle; + }, pdcswap, partitions_offset[current_part.first]); + + partitions_offset[idx_middle+1] = size_current + partitions_offset[current_part.first]; + + toproceed.push({current_part.first, idx_middle+1}); + + toproceed.push({idx_middle+1, current_part.second}); + } + } +} + +template <class partsize_t, int nb_values, class real_number, class Predicate1, class Predicate2> +inline std::pair<std::vector<partsize_t>,std::vector<partsize_t>> partition_extra_z(real_number* array, const partsize_t size, + const int nb_partitions, Predicate1 partitions_levels, + Predicate2 pdcswap){ + + std::vector<partsize_t> partitions_size(nb_partitions); + std::vector<partsize_t> partitions_offset(nb_partitions+1); + partition_extra_z<nb_values, real_number, Predicate1, Predicate2>(array, size, nb_partitions, + partitions_size.data(), partitions_offset.data(), + partitions_levels, pdcswap); + return {std::move(partitions_size), std::move(partitions_offset)}; +} + + +template <class NumType = int> +class IntervalSplitter { + const NumType nb_items; + const NumType nb_intervals; + const NumType my_idx; + + double step_split; + NumType offset_mine; + NumType size_mine; +public: + IntervalSplitter(const NumType in_nb_items, + const NumType in_nb_intervals, + const NumType in_my_idx) + : nb_items(in_nb_items), nb_intervals(in_nb_intervals), my_idx(in_my_idx), + step_split(0), offset_mine(0), size_mine(0){ + if(nb_items <= nb_intervals){ + step_split = 1; + if(my_idx < nb_items){ + offset_mine = my_idx; + size_mine = 1; + } + else{ + offset_mine = nb_items; + size_mine = 0; + } + } + else{ + step_split = double(nb_items)/double(nb_intervals); + if(nb_intervals <= my_idx){ + offset_mine = nb_items; + size_mine = 0; + } + else{ + offset_mine = NumType(step_split*double(my_idx)); + size_mine = (my_idx != nb_intervals-1 ? NumType(step_split*double(my_idx+1)) : nb_items) -offset_mine; + } + } + } + + NumType getMySize() const { + return size_mine; + } + + NumType getMyOffset() const { + return offset_mine; + } + + NumType getSizeOther(const NumType in_idx_other) const { + return IntervalSplitter<NumType>(nb_items, nb_intervals, in_idx_other).getMySize(); + } + + NumType getOffsetOther(const NumType in_idx_other) const { + return IntervalSplitter<NumType>(nb_items, nb_intervals, in_idx_other).getMyOffset(); + } + + NumType getOwner(const NumType in_item_idx) const { + NumType owner = NumType(double(in_item_idx)/step_split); + if(owner != nb_intervals-1 && NumType(step_split*double(owner+1)) <= in_item_idx){ + owner += 1; + } + assert(owner < nb_intervals); + assert(IntervalSplitter(nb_items, nb_intervals, owner).getMyOffset() <= in_item_idx); + assert(in_item_idx < IntervalSplitter(nb_items, nb_intervals, owner).getMySize()+IntervalSplitter(nb_items, nb_intervals, owner).getMyOffset()); + return owner; + } +}; + +// http://en.cppreference.com/w/cpp/algorithm/transform +template<class InputIt, class OutputIt, class UnaryOperation> +OutputIt transform(InputIt first1, InputIt last1, OutputIt d_first, + UnaryOperation unary_op) +{ + while (first1 != last1) { + *d_first++ = unary_op(*first1++); + } + return d_first; +} + + +template <class NumType> +void memzero(NumType* array, size_t size){ + memset(array, 0, size*sizeof(NumType)); +} + +template <class NumType> +void memzero(std::unique_ptr<NumType[]>& array, size_t size){ + memset(array.get(), 0, size*sizeof(NumType)); +} + + +class fixed_copy { + const size_t to_idx; + const size_t from_idx; + const size_t nb_elements_to_copy; + +public: + fixed_copy(const size_t in_to_idx, const size_t in_from_idx, const size_t in_nb_elements_to_copy) + : to_idx(in_to_idx), from_idx(in_from_idx), nb_elements_to_copy(in_nb_elements_to_copy){ + } + + fixed_copy(const size_t in_to_idx, const size_t in_nb_elements_to_copy) + : fixed_copy(in_to_idx, 0, in_nb_elements_to_copy){ + } + + fixed_copy(const size_t in_nb_elements_to_copy) + : fixed_copy(0, in_nb_elements_to_copy){ + } + + template <class ItemType> + const fixed_copy& copy(ItemType dest[], const ItemType source[]) const { + memcpy(&dest[to_idx], &source[from_idx], sizeof(ItemType)*nb_elements_to_copy); + return *this; + } + + template <class ItemType> + const fixed_copy& copy(ItemType dest[], const ItemType source[], const size_t nb_values_per_element) const { + memcpy(&dest[to_idx*nb_values_per_element], &source[from_idx*nb_values_per_element], sizeof(ItemType)*nb_elements_to_copy*nb_values_per_element); + return *this; + } + + template <class ItemType> + const fixed_copy& copy(std::unique_ptr<ItemType[]>& dest, const std::unique_ptr<ItemType[]>& source) const { + memcpy(&dest[to_idx], &source[from_idx], sizeof(ItemType)*nb_elements_to_copy); + return *this; + } + + template <class ItemType> + const fixed_copy& copy(std::unique_ptr<ItemType[]>& dest, const std::unique_ptr<ItemType[]>& source, const size_t nb_values_per_element) const { + memcpy(&dest[to_idx*nb_values_per_element], &source[from_idx*nb_values_per_element], sizeof(ItemType)*nb_elements_to_copy*nb_values_per_element); + return *this; + } +}; + + +} + +#endif diff --git a/bfps/cpp/particles_base.cpp b/bfps/cpp/particles_base.cpp index ff0fec32d4f0493814351788ca25081adfb27a12..1410488410a429ff463a1751e86f78cc2157679b 100644 --- a/bfps/cpp/particles_base.cpp +++ b/bfps/cpp/particles_base.cpp @@ -29,6 +29,7 @@ #include <algorithm> #include <cassert> #include "particles_base.hpp" +#include "scope_timer.hpp" template <particle_types particle_type> single_particle_state<particle_type>::single_particle_state() @@ -88,6 +89,7 @@ int get_chunk_offsets( std::vector<hsize_t> chnk_dims, std::vector<std::vector<hsize_t>> &co) { + TIMEZONE("get_chunk_offsets"); std::vector<hsize_t> nchunks(data_dims); int total_number_of_chunks = 1; for (unsigned i=0; i<nchunks.size(); i++) @@ -121,6 +123,7 @@ particles_io_base<particle_type>::particles_io_base( const hid_t data_file_id, MPI_Comm COMM) { + TIMEZONE("particles_io_base::particles_io_base"); this->name = std::string(NAME); this->traj_skip = TRAJ_SKIP; this->comm = COMM; @@ -233,6 +236,7 @@ void particles_io_base<particle_type>::read_state_chunk( const int cindex, double *data) { + TIMEZONE("particles_io_base::read_state_chunk"); DEBUG_MSG("entered read_state_chunk\n"); hid_t dset = H5Dopen(this->hdf5_group_id, "state", H5P_DEFAULT); hid_t rspace = H5Dget_space(dset); @@ -267,6 +271,7 @@ void particles_io_base<particle_type>::write_state_chunk( const int cindex, const double *data) { + TIMEZONE("particles_io_base::write_state_chunk"); hid_t dset = H5Dopen(this->hdf5_group_id, "state", H5P_DEFAULT); hid_t rspace = H5Dget_space(dset); std::vector<hsize_t> mem_dims(this->hdf5_state_chunks); @@ -300,6 +305,7 @@ void particles_io_base<particle_type>::read_rhs_chunk( const int rhsindex, double *data) { + TIMEZONE("particles_io_base::read_rhs_chunk"); //DEBUG_MSG("entered read_rhs_chunk\n"); hid_t dset = H5Dopen(this->hdf5_group_id, "rhs", H5P_DEFAULT); hid_t rspace = H5Dget_space(dset); @@ -342,6 +348,7 @@ void particles_io_base<particle_type>::write_rhs_chunk( const int rhsindex, const double *data) { + TIMEZONE("particles_io_base::write_rhs_chunk"); hid_t dset = H5Dopen(this->hdf5_group_id, "rhs", H5P_DEFAULT); hid_t rspace = H5Dget_space(dset); std::vector<hsize_t> mem_dims(this->hdf5_rhs_chunks); @@ -379,6 +386,7 @@ void particles_io_base<particle_type>::write_point3D_chunk( const int cindex, const double *data) { + TIMEZONE("particles_io_base::write_point3D_chunk"); hid_t dset = H5Dopen(this->hdf5_group_id, dset_name.c_str(), H5P_DEFAULT); hid_t rspace = H5Dget_space(dset); std::vector<hsize_t> mem_dims(this->hdf5_state_chunks); diff --git a/bfps/cpp/rFFTW_distributed_particles.cpp b/bfps/cpp/rFFTW_distributed_particles.cpp index ab694ab3cc226c4690970cf3959bb2c480207c61..265975f8c817a1b40942e076bd016c2921618bbc 100644 --- a/bfps/cpp/rFFTW_distributed_particles.cpp +++ b/bfps/cpp/rFFTW_distributed_particles.cpp @@ -32,10 +32,13 @@ #include <string> #include <sstream> #include <set> +#include <algorithm> +#include <ctime> #include "base.hpp" #include "rFFTW_distributed_particles.hpp" #include "fftw_tools.hpp" +#include "scope_timer.hpp" extern int myrank, nprocs; @@ -44,14 +47,15 @@ template <particle_types particle_type, class rnumber, int interp_neighbours> rFFTW_distributed_particles<particle_type, rnumber, interp_neighbours>::rFFTW_distributed_particles( const char *NAME, const hid_t data_file_id, - rFFTW_interpolator<rnumber, interp_neighbours> *FIELD, + rFFTW_interpolator<rnumber, interp_neighbours> *VEL, const int TRAJ_SKIP, const int INTEGRATION_STEPS) : particles_io_base<particle_type>( NAME, TRAJ_SKIP, data_file_id, - FIELD->descriptor->comm) + VEL->descriptor->comm) { + TIMEZONE("rFFTW_distributed_particles::rFFTW_distributed_particles"); /* check that integration_steps has a valid value. * If NDEBUG is defined, "assert" doesn't do anything. * With NDEBUG defined, and an invalid INTEGRATION_STEPS, @@ -65,18 +69,21 @@ rFFTW_distributed_particles<particle_type, rnumber, interp_neighbours>::rFFTW_di * therefore I prefer to just kill the code at this point, * no matter whether or not NDEBUG is present. * */ - if (interp_neighbours*2+2 > FIELD->descriptor->subsizes[0]) + if (interp_neighbours*2+2 > VEL->descriptor->subsizes[0]) { DEBUG_MSG("parameters incompatible with rFFTW_distributed_particles.\n" "interp kernel size is %d, local_z_size is %d\n", - interp_neighbours*2+2, FIELD->descriptor->subsizes[0]); - if (FIELD->descriptor->myrank == 0) + interp_neighbours*2+2, VEL->descriptor->subsizes[0]); + if (VEL->descriptor->myrank == 0) std::cerr << "parameters incompatible with rFFTW_distributed_particles." << std::endl; exit(0); } - this->vel = FIELD; + this->vel = VEL; this->rhs.resize(INTEGRATION_STEPS); this->integration_steps = INTEGRATION_STEPS; + /* the particles are expected to be evenly distributed among processes. + * therefore allocating twice that amount of memory seems enough. + * */ this->state.reserve(2*this->nparticles / this->nprocs); for (unsigned int i=0; i<this->rhs.size(); i++) this->rhs[i].reserve(2*this->nparticles / this->nprocs); @@ -157,6 +164,7 @@ void rFFTW_distributed_particles<particle_type, rnumber, interp_neighbours>::sam const std::unordered_map<int, std::unordered_set<int>> &dp, std::unordered_map<int, single_particle_state<POINT3D>> &y) { + TIMEZONE("rFFTW_distributed_particles::sample"); double *yyy; double *yy; y.clear(); @@ -184,24 +192,35 @@ void rFFTW_distributed_particles<particle_type, rnumber, interp_neighbours>::sam int tindex; tindex = 0; // can this sorting be done more efficiently? - std::set<int> ordered_dp; + std::vector<int> ordered_dp; + { + TIMEZONE("rFFTW_distributed_particles::sample::ordered_dp"); + ordered_dp.reserve(dp.at(domain_index).size()); for (auto p: dp.at(domain_index)) - ordered_dp.insert(p); + ordered_dp.push_back(p); + //std::set<int> ordered_dp(dp.at(domain_index)); + std::sort(ordered_dp.begin(), ordered_dp.end()); + } for (auto p: ordered_dp) + //for (auto p: dp.at(domain_index)) { (*field)(x.at(p).data, yy + tindex*3); tindex++; } - MPI_Allreduce( + { + TIMEZONE("rFFTW_distributed_particles::sample::MPI_Allreduce"); + MPI_Allreduce( yy, yyy, 3*dp.at(domain_index).size(), MPI_DOUBLE, MPI_SUM, this->domain_comm[domain_index]); + } tindex = 0; for (auto p: ordered_dp) + //for (auto p: dp.at(domain_index)) { y[p] = yyy + tindex*3; tindex++; @@ -224,8 +243,10 @@ void rFFTW_distributed_particles<particle_type, rnumber, interp_neighbours>::get case VELOCITY_TRACER: this->sample(this->vel, x, dp, yy); y.clear(); - for (auto &pp: x) - y[pp.first] = yy[pp.first].data; + y.reserve(yy.size()); + y.rehash(this->nparticles); + for (auto &pp: yy) + y[pp.first] = pp.second.data; break; } } @@ -253,31 +274,38 @@ void rFFTW_distributed_particles<particle_type, rnumber, interp_neighbours>::red std::vector<std::unordered_map<int, single_particle_state<particle_type>>> &vals, std::unordered_map<int, std::unordered_set<int>> &dp) { + TIMEZONE("rFFTW_distributed_particles::redistribute"); //DEBUG_MSG("entered redistribute\n"); /* get new distribution of particles */ std::unordered_map<int, std::unordered_set<int>> newdp; - this->sort_into_domains(x, newdp); + { + TIMEZONE("sort_into_domains"); + this->sort_into_domains(x, newdp); + } /* take care of particles that are leaving the shared domains */ int dindex[2] = {-1, 1}; // for each D of the 2 shared domains - for (int di=0; di<2; di++) - // for all particles previously in D - for (auto p: dp[dindex[di]]) - { - // if the particle is no longer in D - if (newdp[dindex[di]].find(p) == newdp[dindex[di]].end()) + { + TIMEZONE("Loop1"); + for (int di=0; di<2; di++) + // for all particles previously in D + for (auto p: dp[dindex[di]]) { - // and the particle is not in the local domain - if (newdp[0].find(p) == newdp[0].end()) + // if the particle is no longer in D + if (newdp[dindex[di]].find(p) == newdp[dindex[di]].end()) { - // remove the particle from the local list - x.erase(p); - for (unsigned int i=0; i<vals.size(); i++) - vals[i].erase(p); + // and the particle is not in the local domain + if (newdp[0].find(p) == newdp[0].end()) + { + // remove the particle from the local list + x.erase(p); + for (unsigned int i=0; i<vals.size(); i++) + vals[i].erase(p); + } + // if the particle is in the local domain, do nothing } - // if the particle is in the local domain, do nothing } - } + } /* take care of particles that are entering the shared domains */ /* neighbouring rank offsets */ int ro[2]; @@ -285,16 +313,23 @@ void rFFTW_distributed_particles<particle_type, rnumber, interp_neighbours>::red ro[1] = 1; /* particles to send, particles to receive */ std::vector<int> ps[2], pr[2]; + for (int tcounter = 0; tcounter < 2; tcounter++) + { + ps[tcounter].reserve(newdp[dindex[tcounter]].size()); + } /* number of particles to send, number of particles to receive */ int nps[2], npr[2]; int rsrc, rdst; /* get list of id-s to send */ - for (auto &p: dp[0]) { - for (int di=0; di<2; di++) + TIMEZONE("Loop2"); + for (auto &p: dp[0]) { - if (newdp[dindex[di]].find(p) != newdp[dindex[di]].end()) - ps[di].push_back(p); + for (int di=0; di<2; di++) + { + if (newdp[dindex[di]].find(p) != newdp[dindex[di]].end()) + ps[di].push_back(p); + } } } /* prepare data for send recv */ @@ -304,7 +339,8 @@ void rFFTW_distributed_particles<particle_type, rnumber, interp_neighbours>::red for (int i=0; i<2; i++) { rdst = MOD(rsrc+ro[i], this->nprocs); - if (this->myrank == rsrc) + if (this->myrank == rsrc){ + TIMEZONE("MPI_Send"); MPI_Send( nps+i, 1, @@ -312,7 +348,9 @@ void rFFTW_distributed_particles<particle_type, rnumber, interp_neighbours>::red rdst, 2*(rsrc*this->nprocs + rdst)+i, this->comm); - if (this->myrank == rdst) + } + if (this->myrank == rdst){ + TIMEZONE("MPI_Recv"); MPI_Recv( npr+1-i, 1, @@ -321,6 +359,7 @@ void rFFTW_distributed_particles<particle_type, rnumber, interp_neighbours>::red 2*(rsrc*this->nprocs + rdst)+i, this->comm, MPI_STATUS_IGNORE); + } } //DEBUG_MSG("I have to send %d %d particles\n", nps[0], nps[1]); //DEBUG_MSG("I have to recv %d %d particles\n", npr[0], npr[1]); @@ -338,6 +377,7 @@ void rFFTW_distributed_particles<particle_type, rnumber, interp_neighbours>::red rdst = MOD(rsrc+ro[i], this->nprocs); if (this->myrank == rsrc && nps[i] > 0) { + TIMEZONE("this->myrank == rsrc && nps[i] > 0"); MPI_Send( &ps[i].front(), nps[i], @@ -369,6 +409,7 @@ void rFFTW_distributed_particles<particle_type, rnumber, interp_neighbours>::red } if (this->myrank == rdst && npr[1-i] > 0) { + TIMEZONE("this->myrank == rdst && npr[1-i] > 0"); MPI_Recv( &pr[1-i].front(), npr[1-i], @@ -401,8 +442,10 @@ void rFFTW_distributed_particles<particle_type, rnumber, interp_neighbours>::red delete[] buffer; // x has been changed, so newdp is obsolete // we need to sort into domains again - this->sort_into_domains(x, dp); - + { + TIMEZONE("sort_into_domains2"); + this->sort_into_domains(x, dp); + } #ifndef NDEBUG /* check that all particles at x are local */ @@ -425,44 +468,51 @@ void rFFTW_distributed_particles<particle_type, rnumber, interp_neighbours>::Ada const int nsteps) { this->get_rhs(this->state, this->domain_particles, this->rhs[0]); - for (auto &pp: this->state) +#define AdamsBashforth_LOOP_PREAMBLE \ + for (auto &pp: this->state) \ for (unsigned int i=0; i<state_dimension(particle_type); i++) - switch(nsteps) - { - case 1: - pp.second[i] += this->dt*this->rhs[0][pp.first][i]; - break; - case 2: - pp.second[i] += this->dt*(3*this->rhs[0][pp.first][i] - - this->rhs[1][pp.first][i])/2; - break; - case 3: - pp.second[i] += this->dt*(23*this->rhs[0][pp.first][i] - - 16*this->rhs[1][pp.first][i] - + 5*this->rhs[2][pp.first][i])/12; - break; - case 4: - pp.second[i] += this->dt*(55*this->rhs[0][pp.first][i] - - 59*this->rhs[1][pp.first][i] - + 37*this->rhs[2][pp.first][i] - - 9*this->rhs[3][pp.first][i])/24; - break; - case 5: - pp.second[i] += this->dt*(1901*this->rhs[0][pp.first][i] - - 2774*this->rhs[1][pp.first][i] - + 2616*this->rhs[2][pp.first][i] - - 1274*this->rhs[3][pp.first][i] - + 251*this->rhs[4][pp.first][i])/720; - break; - case 6: - pp.second[i] += this->dt*(4277*this->rhs[0][pp.first][i] - - 7923*this->rhs[1][pp.first][i] - + 9982*this->rhs[2][pp.first][i] - - 7298*this->rhs[3][pp.first][i] - + 2877*this->rhs[4][pp.first][i] - - 475*this->rhs[5][pp.first][i])/1440; - break; - } + switch(nsteps) + { + case 1: + AdamsBashforth_LOOP_PREAMBLE + pp.second[i] += this->dt*this->rhs[0][pp.first][i]; + break; + case 2: + AdamsBashforth_LOOP_PREAMBLE + pp.second[i] += this->dt*(3*this->rhs[0][pp.first][i] + - this->rhs[1][pp.first][i])/2; + break; + case 3: + AdamsBashforth_LOOP_PREAMBLE + pp.second[i] += this->dt*(23*this->rhs[0][pp.first][i] + - 16*this->rhs[1][pp.first][i] + + 5*this->rhs[2][pp.first][i])/12; + break; + case 4: + AdamsBashforth_LOOP_PREAMBLE + pp.second[i] += this->dt*(55*this->rhs[0][pp.first][i] + - 59*this->rhs[1][pp.first][i] + + 37*this->rhs[2][pp.first][i] + - 9*this->rhs[3][pp.first][i])/24; + break; + case 5: + AdamsBashforth_LOOP_PREAMBLE + pp.second[i] += this->dt*(1901*this->rhs[0][pp.first][i] + - 2774*this->rhs[1][pp.first][i] + + 2616*this->rhs[2][pp.first][i] + - 1274*this->rhs[3][pp.first][i] + + 251*this->rhs[4][pp.first][i])/720; + break; + case 6: + AdamsBashforth_LOOP_PREAMBLE + pp.second[i] += this->dt*(4277*this->rhs[0][pp.first][i] + - 7923*this->rhs[1][pp.first][i] + + 9982*this->rhs[2][pp.first][i] + - 7298*this->rhs[3][pp.first][i] + + 2877*this->rhs[4][pp.first][i] + - 475*this->rhs[5][pp.first][i])/1440; + break; + } this->redistribute(this->state, this->rhs, this->domain_particles); this->roll_rhs(); } @@ -471,6 +521,7 @@ void rFFTW_distributed_particles<particle_type, rnumber, interp_neighbours>::Ada template <particle_types particle_type, class rnumber, int interp_neighbours> void rFFTW_distributed_particles<particle_type, rnumber, interp_neighbours>::step() { + TIMEZONE("rFFTW_distributed_particles::step"); this->AdamsBashforth((this->iteration < this->integration_steps) ? this->iteration+1 : this->integration_steps); @@ -483,6 +534,7 @@ void rFFTW_distributed_particles<particle_type, rnumber, interp_neighbours>::sor const std::unordered_map<int, single_particle_state<particle_type>> &x, std::unordered_map<int, std::unordered_set<int>> &dp) { + TIMEZONE("rFFTW_distributed_particles::sort_into_domains"); int tmpint1, tmpint2; dp.clear(); dp[-1] = std::unordered_set<int>(); @@ -521,19 +573,25 @@ void rFFTW_distributed_particles<particle_type, rnumber, interp_neighbours>::sor template <particle_types particle_type, class rnumber, int interp_neighbours> void rFFTW_distributed_particles<particle_type, rnumber, interp_neighbours>::read() { + TIMEZONE("rFFTW_distributed_particles::read"); double *temp = new double[this->chunk_size*state_dimension(particle_type)]; int tmpint1, tmpint2; for (unsigned int cindex=0; cindex<this->get_number_of_chunks(); cindex++) { //read state - if (this->myrank == 0) + if (this->myrank == 0){ + TIMEZONE("read_state_chunk"); this->read_state_chunk(cindex, temp); - MPI_Bcast( + } + { + TIMEZONE("MPI_Bcast"); + MPI_Bcast( temp, this->chunk_size*state_dimension(particle_type), MPI_DOUBLE, 0, this->comm); + } for (unsigned int p=0; p<this->chunk_size; p++) { if (this->vel->get_rank_info(temp[state_dimension(particle_type)*p+2], tmpint1, tmpint2)) @@ -542,17 +600,23 @@ void rFFTW_distributed_particles<particle_type, rnumber, interp_neighbours>::rea } } //read rhs - if (this->iteration > 0) + if (this->iteration > 0){ + TIMEZONE("this->iteration > 0"); for (int i=0; i<this->integration_steps; i++) { - if (this->myrank == 0) + if (this->myrank == 0){ + TIMEZONE("read_rhs_chunk"); this->read_rhs_chunk(cindex, i, temp); - MPI_Bcast( + } + { + TIMEZONE("MPI_Bcast"); + MPI_Bcast( temp, this->chunk_size*state_dimension(particle_type), MPI_DOUBLE, 0, this->comm); + } for (unsigned int p=0; p<this->chunk_size; p++) { auto pp = this->state.find(p+cindex*this->chunk_size); @@ -560,6 +624,7 @@ void rFFTW_distributed_particles<particle_type, rnumber, interp_neighbours>::rea this->rhs[i][p+cindex*this->chunk_size] = temp + state_dimension(particle_type)*p; } } + } } this->sort_into_domains(this->state, this->domain_particles); DEBUG_MSG("%s->state.size = %ld\n", this->name.c_str(), this->state.size()); @@ -575,31 +640,48 @@ void rFFTW_distributed_particles<particle_type, rnumber, interp_neighbours>::wri const char *dset_name, std::unordered_map<int, single_particle_state<POINT3D>> &y) { - double *data = new double[this->nparticles*3]; - double *yy = new double[this->nparticles*3]; - int pindex = 0; - for (unsigned int cindex=0; cindex<this->get_number_of_chunks(); cindex++) + TIMEZONE("rFFTW_distributed_particles::write"); + double *data = new double[this->chunk_size*3]; + double *yy = new double[this->chunk_size*3]; + //int pindex = 0; + for (unsigned int cindex=0; cindex<this->get_number_of_chunks(); cindex++) { std::fill_n(yy, this->chunk_size*3, 0); - for (unsigned int p=0; p<this->chunk_size; p++, pindex++) + //for (unsigned int p=0; p<this->chunk_size; p++, pindex++) + //{ + // if (this->domain_particles[-1].find(pindex) != this->domain_particles[-1].end() || + // this->domain_particles[ 0].find(pindex) != this->domain_particles[ 0].end()) + // { + // std::copy(y[pindex].data, + // y[pindex].data + 3, + // yy + p*3); + // } + //} + for (int s = -1; s <= 0; s++) + for (auto &pp: this->domain_particles[s]) + { + if (pp >= int(cindex*this->chunk_size) && + pp < int((cindex+1)*this->chunk_size)) + { + std::copy(y[pp].data, + y[pp].data + 3, + yy + (pp-cindex*this->chunk_size)*3); + } + } { - if (this->domain_particles[-1].find(pindex) != this->domain_particles[-1].end() || - this->domain_particles[ 0].find(pindex) != this->domain_particles[ 0].end()) - { - std::copy(y[pindex].data, - y[pindex].data + 3, - yy + p*3); - } - } - MPI_Allreduce( + TIMEZONE("MPI_Allreduce"); + MPI_Allreduce( yy, data, 3*this->chunk_size, MPI_DOUBLE, MPI_SUM, this->comm); - if (this->myrank == 0) + } + if (this->myrank == 0){ + TIMEZONE("write_point3D_chunk"); this->write_point3D_chunk(dset_name, cindex, data); + } } delete[] yy; delete[] data; @@ -609,59 +691,96 @@ template <particle_types particle_type, class rnumber, int interp_neighbours> void rFFTW_distributed_particles<particle_type, rnumber, interp_neighbours>::write( const bool write_rhs) { + TIMEZONE("rFFTW_distributed_particles::write2"); double *temp0 = new double[this->chunk_size*state_dimension(particle_type)]; double *temp1 = new double[this->chunk_size*state_dimension(particle_type)]; - int pindex = 0; + //int pindex = 0; for (unsigned int cindex=0; cindex<this->get_number_of_chunks(); cindex++) { //write state std::fill_n(temp0, state_dimension(particle_type)*this->chunk_size, 0); - pindex = cindex*this->chunk_size; - for (unsigned int p=0; p<this->chunk_size; p++, pindex++) + //pindex = cindex*this->chunk_size; + //for (unsigned int p=0; p<this->chunk_size; p++, pindex++) + //{ + // if (this->domain_particles[-1].find(pindex) != this->domain_particles[-1].end() || + // this->domain_particles[ 0].find(pindex) != this->domain_particles[ 0].end()) + // { + // TIMEZONE("std::copy"); + // std::copy(this->state[pindex].data, + // this->state[pindex].data + state_dimension(particle_type), + // temp0 + p*state_dimension(particle_type)); + // } + //} + for (int s = -1; s <= 0; s++) + for (auto &pp: this->domain_particles[s]) + { + if (pp >= int(cindex*this->chunk_size) && + pp < int((cindex+1)*this->chunk_size)) + { + std::copy(this->state[pp].data, + this->state[pp].data + state_dimension(particle_type), + temp0 + (pp-cindex*this->chunk_size)*state_dimension(particle_type)); + } + } { - if (this->domain_particles[-1].find(pindex) != this->domain_particles[-1].end() || - this->domain_particles[ 0].find(pindex) != this->domain_particles[ 0].end()) - { - std::copy(this->state[pindex].data, - this->state[pindex].data + state_dimension(particle_type), - temp0 + p*state_dimension(particle_type)); - } + TIMEZONE("MPI_Allreduce"); + MPI_Allreduce( + temp0, + temp1, + state_dimension(particle_type)*this->chunk_size, + MPI_DOUBLE, + MPI_SUM, + this->comm); } - MPI_Allreduce( - temp0, - temp1, - state_dimension(particle_type)*this->chunk_size, - MPI_DOUBLE, - MPI_SUM, - this->comm); - if (this->myrank == 0) + if (this->myrank == 0){ + TIMEZONE("write_state_chunk"); this->write_state_chunk(cindex, temp1); + } //write rhs - if (write_rhs) + if (write_rhs){ + TIMEZONE("write_rhs"); for (int i=0; i<this->integration_steps; i++) { std::fill_n(temp0, state_dimension(particle_type)*this->chunk_size, 0); - pindex = cindex*this->chunk_size; - for (unsigned int p=0; p<this->chunk_size; p++, pindex++) + //pindex = cindex*this->chunk_size; + //for (unsigned int p=0; p<this->chunk_size; p++, pindex++) + //{ + // if (this->domain_particles[-1].find(pindex) != this->domain_particles[-1].end() || + // this->domain_particles[ 0].find(pindex) != this->domain_particles[ 0].end()) + // { + // TIMEZONE("std::copy"); + // std::copy(this->rhs[i][pindex].data, + // this->rhs[i][pindex].data + state_dimension(particle_type), + // temp0 + p*state_dimension(particle_type)); + // } + //} + for (int s = -1; s <= 0; s++) + for (auto &pp: this->domain_particles[s]) + { + if (pp >= int(cindex*this->chunk_size) && + pp < int((cindex+1)*this->chunk_size)) + { + std::copy(this->rhs[i][pp].data, + this->rhs[i][pp].data + state_dimension(particle_type), + temp0 + (pp-cindex*this->chunk_size)*state_dimension(particle_type)); + } + } { - if (this->domain_particles[-1].find(pindex) != this->domain_particles[-1].end() || - this->domain_particles[ 0].find(pindex) != this->domain_particles[ 0].end()) - { - std::copy(this->rhs[i][pindex].data, - this->rhs[i][pindex].data + state_dimension(particle_type), - temp0 + p*state_dimension(particle_type)); - } - } - MPI_Allreduce( + TIMEZONE("MPI_Allreduce"); + MPI_Allreduce( temp0, temp1, state_dimension(particle_type)*this->chunk_size, MPI_DOUBLE, MPI_SUM, this->comm); - if (this->myrank == 0) + } + if (this->myrank == 0){ + TIMEZONE("write_rhs_chunk"); this->write_rhs_chunk(cindex, i, temp1); + } } + } } delete[] temp0; delete[] temp1; diff --git a/bfps/cpp/rFFTW_distributed_particles.hpp b/bfps/cpp/rFFTW_distributed_particles.hpp index e271bbfae56c0d49bf66cebcb5e8e8158f81940b..400411d5f1fd6e597714be494a72272a76e01206 100644 --- a/bfps/cpp/rFFTW_distributed_particles.hpp +++ b/bfps/cpp/rFFTW_distributed_particles.hpp @@ -44,12 +44,25 @@ template <particle_types particle_type, class rnumber, int interp_neighbours> class rFFTW_distributed_particles: public particles_io_base<particle_type> { private: - std::unordered_map<int, single_particle_state<particle_type>> state; - std::vector<std::unordered_map<int, single_particle_state<particle_type>>> rhs; + // a "domain" corresponds to a region in 3D real space where a fixed set + // of MPI processes are required to participate in the interpolation + // formula (i.e. they all contain required information). + // we need to know how many processes there are for each of the domains + // to which the local process belongs. std::unordered_map<int, int> domain_nprocs; + // each domain has an associated communicator, and we keep a list of the + // communicators to which the local process belongs std::unordered_map<int, MPI_Comm> domain_comm; + // for each domain, we need a list of the IDs of the particles located + // in that domain std::unordered_map<int, std::unordered_set<int>> domain_particles; + // for each domain, we need the state of each particle + std::unordered_map<int, single_particle_state<particle_type>> state; + // for each domain, we also need the last few values of the right hand + // side of the ODE, since we use Adams-Bashforth integration + std::vector<std::unordered_map<int, single_particle_state<particle_type>>> rhs; + public: int integration_steps; // this class only works with rFFTW interpolator @@ -87,9 +100,24 @@ class rFFTW_distributed_particles: public particles_io_base<particle_type> std::unordered_map<int, single_particle_state<particle_type>> &y); + /* given a list of particle positions, + * figure out which go into what local domain, and construct the relevant + * map of ID lists "dp" (for domain particles). + * */ void sort_into_domains( const std::unordered_map<int, single_particle_state<particle_type>> &x, std::unordered_map<int, std::unordered_set<int>> &dp); + /* suppose the particles are currently badly distributed, and some + * arbitrary quantities (stored in "vals") are associated to the particles, + * and we need to properly distribute them among processes. + * that's what this function does. + * In practice it's only used to redistribute the rhs values (and it + * automatically redistributes the state x being passed). + * Some more comments are present in the .cpp file, but, in brief: the + * particles are simply moved from one domain to another. + * If it turns out that the new domain contains a process which does not + * know about a particle, that information is sent from the closest process. + * */ void redistribute( std::unordered_map<int, single_particle_state<particle_type>> &x, std::vector<std::unordered_map<int, single_particle_state<particle_type>>> &vals, diff --git a/bfps/cpp/rFFTW_interpolator.cpp b/bfps/cpp/rFFTW_interpolator.cpp index bffae44f5986f9873a231442e92cba6cf005d3a4..b8b21e8811d7f5286dc4edd00833c205539ea89c 100644 --- a/bfps/cpp/rFFTW_interpolator.cpp +++ b/bfps/cpp/rFFTW_interpolator.cpp @@ -28,15 +28,15 @@ #include <cmath> #include "rFFTW_interpolator.hpp" +#include "scope_timer.hpp" template <class rnumber, int interp_neighbours> rFFTW_interpolator<rnumber, interp_neighbours>::rFFTW_interpolator( fluid_solver_base<rnumber> *fs, base_polynomial_values BETA_POLYS, - rnumber *FIELD) : interpolator_base<rnumber, interp_neighbours>(fs, BETA_POLYS) + rnumber *FIELD_POINTER) : interpolator_base<rnumber, interp_neighbours>(fs, BETA_POLYS) { - this->field_size = 2*fs->cd->local_size; - this->field = FIELD; + this->field = FIELD_POINTER; // generate compute array @@ -48,6 +48,24 @@ rFFTW_interpolator<rnumber, interp_neighbours>::rFFTW_interpolator( this->compute[((iz + this->descriptor->sizes[0]) % this->descriptor->sizes[0])] = true; } +template <class rnumber, int interp_neighbours> +rFFTW_interpolator<rnumber, interp_neighbours>::rFFTW_interpolator( + vorticity_equation<rnumber, FFTW> *fs, + base_polynomial_values BETA_POLYS, + rnumber *FIELD_POINTER) : interpolator_base<rnumber, interp_neighbours>(fs, BETA_POLYS) +{ +// this->field = FIELD_POINTER; +// +// +// // generate compute array +// this->compute = new bool[this->descriptor->sizes[0]]; +// std::fill_n(this->compute, this->descriptor->sizes[0], false); +// for (int iz = this->descriptor->starts[0]-interp_neighbours-1; +// iz <= this->descriptor->starts[0]+this->descriptor->subsizes[0]+interp_neighbours; +// iz++) +// this->compute[((iz + this->descriptor->sizes[0]) % this->descriptor->sizes[0])] = true; +} + template <class rnumber, int interp_neighbours> rFFTW_interpolator<rnumber, interp_neighbours>::~rFFTW_interpolator() { @@ -80,6 +98,7 @@ void rFFTW_interpolator<rnumber, interp_neighbours>::sample( double *__restrict__ y, const int *deriv) { + TIMEZONE("rFFTW_interpolator::sample"); /* get grid coordinates */ int *xg = new int[3*nparticles]; double *xx = new double[3*nparticles]; @@ -109,7 +128,14 @@ void rFFTW_interpolator<rnumber, interp_neighbours>::operator()( double *dest, const int *deriv) { + TIMEZONE("rFFTW_interpolator::operator()"); double bx[interp_neighbours*2+2], by[interp_neighbours*2+2], bz[interp_neighbours*2+2]; + /* please note that the polynomials in z are computed for all the different + * iz values, independently of whether or not "myrank" will perform the + * computation for all the different iz slices. + * I don't know how big a deal this really is, but it is something that we can + * optimize. + * */ if (deriv == NULL) { this->compute_beta(0, xx[0], bx); @@ -124,17 +150,30 @@ void rFFTW_interpolator<rnumber, interp_neighbours>::operator()( } std::fill_n(dest, 3, 0); ptrdiff_t bigiz, bigiy, bigix; + // loop over the 2*interp_neighbours + 2 z slices for (int iz = -interp_neighbours; iz <= interp_neighbours+1; iz++) { + // bigiz is the z index of the cell containing the particles + // this->descriptor->sizes[0] is added before taking the modulo + // because we want to be sure that "bigiz" is a positive number. + // I'm no longer sure why I don't use the MOD function here. bigiz = ptrdiff_t(((xg[2]+iz) + this->descriptor->sizes[0]) % this->descriptor->sizes[0]); + // once we know bigiz, we know whether "myrank" has the relevant slice. + // if not, go to next value of bigiz if (this->descriptor->myrank == this->descriptor->rank[bigiz]) { for (int iy = -interp_neighbours; iy <= interp_neighbours+1; iy++) { + // bigiy is the y index of the cell + // since we have all the y indices in myrank, we can safely use the + // modulo value bigiy = ptrdiff_t(MOD(xg[1]+iy, this->descriptor->sizes[1])); for (int ix = -interp_neighbours; ix <= interp_neighbours+1; ix++) { + // bigix is the x index of the cell bigix = ptrdiff_t(MOD(xg[0]+ix, this->descriptor->sizes[2])); + // here we create the index to the current grid node + // note the removal of local_z_start from bigiz. ptrdiff_t tindex = (((bigiz-this->descriptor->starts[0])*this->descriptor->sizes[1] + bigiy)*(this->descriptor->sizes[2]+2) + bigix)*3; @@ -154,10 +193,18 @@ template class rFFTW_interpolator<float, 3>; template class rFFTW_interpolator<float, 4>; template class rFFTW_interpolator<float, 5>; template class rFFTW_interpolator<float, 6>; +template class rFFTW_interpolator<float, 7>; +template class rFFTW_interpolator<float, 8>; +template class rFFTW_interpolator<float, 9>; +template class rFFTW_interpolator<float, 10>; template class rFFTW_interpolator<double, 1>; template class rFFTW_interpolator<double, 2>; template class rFFTW_interpolator<double, 3>; template class rFFTW_interpolator<double, 4>; template class rFFTW_interpolator<double, 5>; template class rFFTW_interpolator<double, 6>; +template class rFFTW_interpolator<double, 7>; +template class rFFTW_interpolator<double, 8>; +template class rFFTW_interpolator<double, 9>; +template class rFFTW_interpolator<double, 10>; diff --git a/bfps/cpp/rFFTW_interpolator.hpp b/bfps/cpp/rFFTW_interpolator.hpp index 795257d2744e432d9c346b93848cadfbd8cc85dc..5088be8b2f3094fd96332af0c923d7cc905e4f3f 100644 --- a/bfps/cpp/rFFTW_interpolator.hpp +++ b/bfps/cpp/rFFTW_interpolator.hpp @@ -27,6 +27,7 @@ #include "field_descriptor.hpp" #include "fftw_tools.hpp" #include "fluid_solver_base.hpp" +#include "vorticity_equation.hpp" #include "interpolator_base.hpp" #ifndef RFFTW_INTERPOLATOR @@ -38,41 +39,74 @@ class rFFTW_interpolator:public interpolator_base<rnumber, interp_neighbours> { public: using interpolator_base<rnumber, interp_neighbours>::operator(); - /* size of field to interpolate */ - ptrdiff_t field_size; - /* pointers to fields that are to be interpolated + /* pointer to field that has to be interpolated + * The reason this is a member variable is because I want this class + * to be consistent with the "interpolator" class, where a member + * variable is absolutely required (since that class uses padding). * */ rnumber *field; - /* compute[iz] is true if . + /* compute[iz] is an array that says whether or not the current MPI + * process is involved in the interpolation formula for a particle + * located in cell "iz". + * It is mostly used in the formula itself. + * This translates as the following condition: * local_zstart - neighbours <= iz <= local_zend + 1 + neighbours + * I think it's cleaner to keep things in an array, especially since + * "local_zend" is shorthand for another arithmetic operation anyway. * */ bool *compute; + + /* Constructors */ rFFTW_interpolator( fluid_solver_base<rnumber> *FSOLVER, base_polynomial_values BETA_POLYS, rnumber *FIELD_DATA); + + /* this constructor is empty, I just needed for a quick hack of the + * "vorticity_equation" class. + * It should be removed soon. + * */ + rFFTW_interpolator( + vorticity_equation<rnumber, FFTW> *FSOLVER, + base_polynomial_values BETA_POLYS, + rnumber *FIELD_DATA); ~rFFTW_interpolator(); - /* does not destroy input */ + /* This method is provided for consistency with "interpolator", and it + * does not destroy input */ inline int read_rFFTW(const void *src) { this->field = (rnumber*)src; return EXIT_SUCCESS; } + /* This is used when "compute" is not enough. + * For a given z location, it gives the outermost ranks that are relevant + * for the interpolation formula. + * */ bool get_rank_info(double z, int &maxz_rank, int &minz_rank); - /* interpolate field at an array of locations */ + /* interpolate field at an array of locations. + * After interpolation is performed, call Allreduce for "y", over + * this->descriptor->comm --- generally MPI_COMM_WORLD. + * This is useful for the simple "particles" class, where particle + * information is synchronized across all processes. + * */ void sample( const int nparticles, const int pdimension, const double *__restrict__ x, double *__restrict__ y, const int *deriv = NULL); - /* interpolate 1 point */ + /* interpolate 1 point. + * Result is kept local. + * This is used in the "rFFTW_distributed_particles" class, with the + * result being synchronized across the relevant "local particle + * communicator". + * */ void operator()( const int *__restrict__ xg, const double *__restrict__ xx, diff --git a/bfps/cpp/scope_timer.cpp b/bfps/cpp/scope_timer.cpp new file mode 100644 index 0000000000000000000000000000000000000000..61ddd89583fe8d53cee328c4267df603e128d417 --- /dev/null +++ b/bfps/cpp/scope_timer.cpp @@ -0,0 +1,8 @@ + + +#include "scope_timer.hpp" + + +#ifdef USE_TIMINGOUTPUT +EventManager global_timer_manager("BFPS", std::cout); +#endif diff --git a/bfps/cpp/scope_timer.hpp b/bfps/cpp/scope_timer.hpp new file mode 100644 index 0000000000000000000000000000000000000000..2c48e2eda06ded74e668825181f0444eef22f647 --- /dev/null +++ b/bfps/cpp/scope_timer.hpp @@ -0,0 +1,823 @@ +/********************************************************************** +* * +* Copyright 2015 Max Planck Institute * +* for Dynamics and Self-Organization * +* * +* This file is part of bfps. * +* * +* bfps is free software: you can redistribute it and/or modify * +* it under the terms of the GNU General Public License as published * +* by the Free Software Foundation, either version 3 of the License, * +* or (at your option) any later version. * +* * +* bfps is distributed in the hope that it will be useful, * +* but WITHOUT ANY WARRANTY; without even the implied warranty of * +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * +* GNU General Public License for more details. * +* * +* You should have received a copy of the GNU General Public License * +* along with bfps. If not, see <http://www.gnu.org/licenses/> * +* * +* Contact: Cristian.Lalescu@ds.mpg.de * +* * +**********************************************************************/ + +#ifndef SCOPE_TIMER_HPP +#define SCOPE_TIMER_HPP + +#include <memory> +#include <iostream> +#include <vector> +#include <stack> +#include <string> +#include <limits> +#include <cassert> +#include <sstream> +#include <unordered_map> +#include <mpi.h> +#include <cstring> +#include <stdexcept> +#include <omp.h> +#include <iomanip> +#include <fstream> + +#include "base.hpp" +#include "bfps_timer.hpp" + +//< To add it as friend of EventManager +class ScopeEvent; + +class EventManager { +protected: + + class CoreEvent { + protected: + //< Name of the event (from the user) + const std::string m_name; + //< Previous events (stack of parents) + std::stack<CoreEvent*> m_parentStack; + //< Current event children + std::vector<CoreEvent*> m_children; + + //< Total execution time + double m_totalTime; + //< Minimum execution time + double m_minTime; + //< Maximum execution time + double m_maxTime; + //< Number of occurrence for this event + int m_occurrence; + //< Number of occurrence that are tasks for this event + int m_nbTasks; + //< Children lock + omp_lock_t m_childrenLock; + //< Children lock + omp_lock_t m_updateLock; + + public: + /** Create a core-event from the name and the current stack */ + CoreEvent(const std::string& inName, + const std::stack<CoreEvent*>& inParentStack) + : m_name(inName), + m_parentStack(inParentStack), + m_totalTime(0), + m_minTime(std::numeric_limits<double>::max()), + m_maxTime(std::numeric_limits<double>::min()), + m_occurrence(0), + m_nbTasks(0) { + omp_init_lock(&m_childrenLock); + omp_init_lock(&m_updateLock); + } + + ~CoreEvent() { + omp_destroy_lock(&m_childrenLock); + omp_destroy_lock(&m_updateLock); + } + + /** Add a record */ + void addRecord(const double inDuration, const bool isTask) { + #pragma omp atomic update + m_totalTime += inDuration; + #pragma omp atomic update + m_occurrence += 1; + #pragma omp flush // (m_minTime, m_maxTime) + if (inDuration < m_minTime || m_maxTime < inDuration) { + omp_set_lock(&m_updateLock); + m_minTime = std::min(m_minTime, inDuration); + m_maxTime = std::max(m_maxTime, inDuration); + omp_unset_lock(&m_updateLock); + } + if (isTask) { + #pragma omp atomic update + m_nbTasks += 1; + } + } + + const std::stack<CoreEvent*>& getParents() const { return m_parentStack; } + + std::stack<CoreEvent*>& getParents() { return m_parentStack; } + + void addChild(CoreEvent* inChild) { + omp_set_lock(&m_childrenLock); + m_children.push_back(inChild); + omp_unset_lock(&m_childrenLock); + } + + //! Must not be called during a paralle execution + const std::vector<CoreEvent*>& getChildren() const { + assert(omp_in_parallel() == 0); + return m_children; + } + + const std::string& getName() const { return m_name; } + + double getMin() const { return m_minTime; } + + double getMax() const { return m_maxTime; } + + int getOccurrence() const { return m_occurrence; } + + double getAverage() const { + return m_totalTime / static_cast<double>(m_occurrence); + } + + double getDuration() const { return m_totalTime; } + + int getNbTasks() const { return m_nbTasks; } + }; + + /////////////////////////////////////////////////////////////// + + //< The main node + std::unique_ptr<CoreEvent> m_root; + //< Output stream to print out + std::ostream& m_outputStream; + + //< Current stack, there are one stack of stack per thread + std::vector<std::stack<std::stack<CoreEvent*>>> m_currentEventsStackPerThread; + //< All recorded events (that will then be delete at the end) + std::unordered_multimap<std::string, CoreEvent*> m_records; + //< Lock for m_records + omp_lock_t m_recordsLock; + + /** Find a event from its name. If such even does not exist + * the function creates one. If an event with the same name exists + * but with a different stack, a new one is created. + * It pushes the returned event in the stack. + */ + CoreEvent* getEvent(const std::string& inName, + const std::string& inUniqueKey) { + const std::string completeName = inName + inUniqueKey; + CoreEvent* foundEvent = nullptr; + + omp_set_lock(&m_recordsLock); + // find all events with this name + auto range = m_records.equal_range(completeName); + for (auto iter = range.first; iter != range.second; ++iter) { + // events are equal if same name and same parents + if ((*iter).second->getParents() == + m_currentEventsStackPerThread[omp_get_thread_num()].top()) { + foundEvent = (*iter).second; + break; + } + } + + // Keep the lock to ensure that not two threads create the same event + + if (!foundEvent) { + // create this event + foundEvent = new CoreEvent( + inName, m_currentEventsStackPerThread[omp_get_thread_num()].top()); + m_currentEventsStackPerThread[omp_get_thread_num()].top().top()->addChild( + foundEvent); + m_records.insert({completeName, foundEvent}); + } + omp_unset_lock(&m_recordsLock); + + m_currentEventsStackPerThread[omp_get_thread_num()].top().push(foundEvent); + return foundEvent; + } + + CoreEvent* getEventFromContext(const std::string& inName, + const std::string& inUniqueKey, + const std::stack<CoreEvent*>& inParentStack) { + m_currentEventsStackPerThread[omp_get_thread_num()].push(inParentStack); + return getEvent(inName, inUniqueKey); + } + + /** Pop current event */ + void popEvent(const CoreEvent* eventToRemove) { + assert(m_currentEventsStackPerThread[omp_get_thread_num()].top().size() > 1); + // Comparing address is cheaper + if (m_currentEventsStackPerThread[omp_get_thread_num()].top().top() != + eventToRemove) { + throw std::runtime_error( + "You must end events (ScopeEvent/TIMEZONE) in order.\n" + "Please make sure that you only ask to the last event to finish."); + } + m_currentEventsStackPerThread[omp_get_thread_num()].top().pop(); + } + + /** Pop current context */ + void popContext(const CoreEvent* eventToRemove) { + assert(m_currentEventsStackPerThread[omp_get_thread_num()].size() > 1); + assert(m_currentEventsStackPerThread[omp_get_thread_num()].top().size() > 1); + // Comparing address is cheaper + if (m_currentEventsStackPerThread[omp_get_thread_num()].top().top() != + eventToRemove) { + throw std::runtime_error( + "You must end events (ScopeEvent/TIMEZONE) in order.\n" + "Please make sure that you only ask to the last event to finish."); + } + m_currentEventsStackPerThread[omp_get_thread_num()].pop(); + } + +public: + /** Create an event manager */ + EventManager(const std::string& inAppName, std::ostream& inOutputStream) + : m_root(new CoreEvent(inAppName, std::stack<CoreEvent*>())), + m_outputStream(inOutputStream), + m_currentEventsStackPerThread(1) { + m_currentEventsStackPerThread[0].emplace(); + m_currentEventsStackPerThread[0].top().push(m_root.get()); + omp_init_lock(&m_recordsLock); + } + + ~EventManager() throw() { + assert(m_currentEventsStackPerThread[0].size() == 1); + + assert(m_currentEventsStackPerThread[0].top().size() == 1); + + omp_destroy_lock(&m_recordsLock); + + for (auto event : m_records) { + delete event.second; + } + } + + void startParallelRegion(const int inNbThreads) { + m_currentEventsStackPerThread.resize(1); + m_currentEventsStackPerThread.resize(inNbThreads, + m_currentEventsStackPerThread[0]); + } + + void showDistributed(const MPI_Comm inComm) const { + int myRank, nbProcess; + int retMpi = MPI_Comm_rank( inComm, &myRank); + variable_used_only_in_assert(retMpi); + assert(retMpi == MPI_SUCCESS); + retMpi = MPI_Comm_size( inComm, &nbProcess); + assert(retMpi == MPI_SUCCESS); + + if((&m_outputStream == &std::cout || &m_outputStream == &std::clog) && myrank != nbProcess-1){ + // Print in reverse order + char tmp; + retMpi = MPI_Recv(&tmp, 1, MPI_BYTE, myrank+1, 99, inComm, MPI_STATUS_IGNORE); + assert(retMpi == MPI_SUCCESS); + } + m_outputStream.flush(); + + std::stack<std::pair<int, const CoreEvent*>> events; + + for (int idx = static_cast<int>(m_root->getChildren().size()) - 1; idx >= 0; --idx) { + events.push({0, m_root->getChildren()[idx]}); + } + + m_outputStream << "[TIMING-" << myRank<< "] Local times.\n"; + m_outputStream << "[TIMING-" << myRank<< "] :" << m_root->getName() << "\n"; + + while (events.size()) { + const std::pair<int, const CoreEvent*> eventToShow = + events.top(); + events.pop(); + + m_outputStream << "[TIMING-" << myRank<< "] "; + + int offsetTab = eventToShow.first; + while (offsetTab--) { + m_outputStream << "\t"; + } + m_outputStream << "@" << eventToShow.second->getName() << " = " << eventToShow.second->getDuration() << "s"; + if (eventToShow.second->getOccurrence() != 1) { + m_outputStream << " (Min = " << eventToShow.second->getMin() << "s ; Max = " << eventToShow.second->getMax() + << "s ; Average = " << eventToShow.second->getAverage() << "s ; Occurrence = " + << eventToShow.second->getOccurrence() << ")"; + } + + m_outputStream << "\n"; + for (int idx = + static_cast<int>(eventToShow.second->getChildren().size()) - 1; + idx >= 0; --idx) { + events.push( + {eventToShow.first + 1, eventToShow.second->getChildren()[idx]}); + } + } + m_outputStream.flush(); + + if((&m_outputStream == &std::cout || &m_outputStream == &std::clog) && myrank != 0){ + // Print in reverse order + char tmp; + retMpi = MPI_Send(&tmp, 1, MPI_BYTE, myrank-1, 99, inComm); + assert(retMpi == MPI_SUCCESS); + } + } + + void show(const MPI_Comm inComm, const bool onlyP0 = true) const { + int myRank, nbProcess; + int retMpi = MPI_Comm_rank( inComm, &myRank); + variable_used_only_in_assert(retMpi); + assert(retMpi == MPI_SUCCESS); + retMpi = MPI_Comm_size( inComm, &nbProcess); + assert(retMpi == MPI_SUCCESS); + + if(onlyP0 && myRank != 0){ + return; + } + + std::stringstream myResults; + + std::stack<std::pair<int, const CoreEvent*>> events; + + for (int idx = static_cast<int>(m_root->getChildren().size()) - 1; idx >= 0; --idx) { + events.push({0, m_root->getChildren()[idx]}); + } + + myResults << "[TIMING-" << myRank<< "] Local times.\n"; + myResults << "[TIMING-" << myRank<< "] :" << m_root->getName() << "\n"; + + while (events.size()) { + const std::pair<int, const CoreEvent*> eventToShow = + events.top(); + events.pop(); + + myResults << "[TIMING-" << myRank<< "] "; + + int offsetTab = eventToShow.first; + while (offsetTab--) { + myResults << "\t"; + } + myResults << "@" << eventToShow.second->getName() << " = " << eventToShow.second->getDuration() << "s"; + if (eventToShow.second->getOccurrence() != 1) { + myResults << " (Min = " << eventToShow.second->getMin() << "s ; Max = " << eventToShow.second->getMax() + << "s ; Average = " << eventToShow.second->getAverage() << "s ; Occurrence = " + << eventToShow.second->getOccurrence() << ")"; + } + + myResults << "\n"; + for (int idx = + static_cast<int>(eventToShow.second->getChildren().size()) - 1; + idx >= 0; --idx) { + events.push( + {eventToShow.first + 1, eventToShow.second->getChildren()[idx]}); + } + } + + if(myrank != 0){ + const std::string strOutput = myResults.str(); + assert(strOutput.length() <= std::numeric_limits<int>::max()); + int sizeOutput = int(strOutput.length()); + retMpi = MPI_Send(&sizeOutput, 1, MPI_INT, 0, 99, inComm); + assert(retMpi == MPI_SUCCESS); + retMpi = MPI_Send(const_cast<char*>(strOutput.data()), sizeOutput, MPI_CHAR, 0, 100, inComm); + assert(retMpi == MPI_SUCCESS); + } + else{ + if(onlyP0 == false){ + std::vector<char> buffer; + for(int idxProc = nbProcess-1 ; idxProc > 0 ; --idxProc){ + int sizeRecv; + retMpi = MPI_Recv(&sizeRecv, 1, MPI_INT, idxProc, 99, inComm, MPI_STATUS_IGNORE); + assert(retMpi == MPI_SUCCESS); + buffer.resize(sizeRecv+1); + retMpi = MPI_Recv(buffer.data(), sizeRecv, MPI_CHAR, idxProc, 100, inComm, MPI_STATUS_IGNORE); + assert(retMpi == MPI_SUCCESS); + buffer[sizeRecv]='\0'; + m_outputStream << buffer.data(); + } + } + m_outputStream << myResults.str(); + m_outputStream.flush(); + } + } + + void showMpi(const MPI_Comm inComm) const { + struct SerializedEvent { + char path[512]; + char name[128]; + double totalTime; + double minTime; + double maxTime; + int occurrence; + }; + + // Convert my events into sendable object + + std::vector<SerializedEvent> myEvents; + myEvents.reserve(m_records.size()); + + for(const std::pair<std::string, const CoreEvent*>& event : m_records){ + myEvents.emplace_back(); + SerializedEvent& current_event = myEvents.back(); + + current_event.totalTime = event.second->getDuration(); + current_event.minTime = event.second->getMin(); + current_event.maxTime = event.second->getMax(); + current_event.occurrence = event.second->getOccurrence(); + + strncpy(current_event.name, event.second->getName().c_str(), 128); + std::stringstream path; + std::stack<CoreEvent*> parents = event.second->getParents(); + while(parents.size()){ + path << parents.top()->getName() << " << "; + parents.pop(); + } + + strncpy(current_event.path, path.str().c_str(), 512); + } + + // Send to process 0 + int myRank, nbProcess; + int retMpi = MPI_Comm_rank( inComm, &myRank); + variable_used_only_in_assert(retMpi); + assert(retMpi == MPI_SUCCESS); + retMpi = MPI_Comm_size( inComm, &nbProcess); + assert(retMpi == MPI_SUCCESS); + std::unique_ptr<int[]> nbEventsPerProc; + if(myRank == 0){ + nbEventsPerProc.reset(new int[nbProcess]); + } + const int myNbEvents = int(myEvents.size()); + retMpi = MPI_Gather(const_cast<int*>(&myNbEvents), 1, MPI_INT, + nbEventsPerProc.get(), 1, MPI_INT, + 0, inComm); + assert(retMpi == MPI_SUCCESS); + // Process 0 merge and print results + std::unique_ptr<int[]> dipls; + std::unique_ptr<SerializedEvent[]> allEvents; + std::unique_ptr<int[]> nbEventsPerProcByte; + std::unique_ptr<int[]> diplsByte; + if(myRank == 0){ + dipls.reset(new int[nbProcess+1]); + diplsByte.reset(new int[nbProcess+1]); + nbEventsPerProcByte.reset(new int[nbProcess]); + dipls[0] = 0; + diplsByte[0] = 0; + for(int idx = 1 ; idx <= nbProcess ; ++idx){ + dipls[idx] = dipls[idx-1] + nbEventsPerProc[idx-1]; + diplsByte[idx] = dipls[idx] * int(sizeof(SerializedEvent)); + nbEventsPerProcByte[idx-1] = nbEventsPerProc[idx-1] * int(sizeof(SerializedEvent)); + } + allEvents.reset(new SerializedEvent[dipls[nbProcess]]); + } + + retMpi = MPI_Gatherv(myEvents.data(), myNbEvents * int(sizeof(SerializedEvent)), MPI_BYTE, + allEvents.get(), nbEventsPerProcByte.get(), diplsByte.get(), + MPI_BYTE, 0, inComm); + assert(retMpi == MPI_SUCCESS); + + if(myRank == 0){ + struct GlobalEvent { + char path[512]; + char name[128]; + double totalTime; + double minTime; + double maxTime; + int occurrence; + int nbProcess; + double minTimeProcess; + double maxTimeProcess; + }; + + std::unordered_map<std::string, GlobalEvent> mapEvents; + for(int idxEvent = 0 ; idxEvent < dipls[nbProcess] ; ++idxEvent){ + const std::string key = std::string(allEvents[idxEvent].path) + std::string(allEvents[idxEvent].name); + if(mapEvents.find(key) == mapEvents.end()){ + GlobalEvent& newEvent = mapEvents[key]; + strncpy(newEvent.path, allEvents[idxEvent].path, 512); + strncpy(newEvent.name, allEvents[idxEvent].name, 128); + newEvent.totalTime = allEvents[idxEvent].totalTime; + newEvent.minTime = allEvents[idxEvent].minTime; + newEvent.maxTime = allEvents[idxEvent].maxTime; + newEvent.occurrence = allEvents[idxEvent].occurrence; + newEvent.nbProcess = 1; + newEvent.minTimeProcess = allEvents[idxEvent].totalTime; + newEvent.maxTimeProcess = allEvents[idxEvent].totalTime; + } + else{ + GlobalEvent& newEvent = mapEvents[key]; + assert(strcmp(newEvent.path, allEvents[idxEvent].path) == 0); + assert(strcmp(newEvent.name, allEvents[idxEvent].name) == 0); + newEvent.totalTime += allEvents[idxEvent].totalTime; + newEvent.minTime = std::min(newEvent.minTime, allEvents[idxEvent].minTime); + newEvent.maxTime = std::max(newEvent.maxTime, allEvents[idxEvent].maxTime); + newEvent.occurrence += allEvents[idxEvent].occurrence; + newEvent.nbProcess += 1; + newEvent.minTimeProcess = std::min(newEvent.minTimeProcess, + allEvents[idxEvent].totalTime); + newEvent.maxTimeProcess = std::max(newEvent.maxTimeProcess, + allEvents[idxEvent].totalTime); + } + } + + m_outputStream << "[MPI-TIMING] Mpi times.\n"; + for(const auto& iter : mapEvents){ + const GlobalEvent& gevent = iter.second; + m_outputStream << "[MPI-TIMING] @" << gevent.name << "\n"; + m_outputStream << "[MPI-TIMING] Stack => " << gevent.path << "\n"; + m_outputStream << "[MPI-TIMING] \t Done by " << gevent.nbProcess << " processes\n"; + m_outputStream << "[MPI-TIMING] \t Total time for all " << gevent.totalTime + << "s (average per process " << gevent.totalTime/gevent.nbProcess << "s)\n"; + m_outputStream << "[MPI-TIMING] \t Min time for a process " << gevent.minTimeProcess + << "s Max time for a process " << gevent.maxTimeProcess << "s\n"; + m_outputStream << "[MPI-TIMING] \t The same call has been done " << gevent.occurrence + << " times by all process (duration min " << gevent.minTime << "s max " << gevent.maxTime << "s avg " + << gevent.totalTime/gevent.occurrence << "s)\n"; + } + } + m_outputStream.flush(); + } + + void showHtml(const MPI_Comm inComm, const bool onlyP0 = true) const { + int myRank, nbProcess; + int retMpi = MPI_Comm_rank( inComm, &myRank); + assert(retMpi == MPI_SUCCESS); + variable_used_only_in_assert(retMpi); + retMpi = MPI_Comm_size( inComm, &nbProcess); + assert(retMpi == MPI_SUCCESS); + + if(onlyP0 && myRank != 0){ + return; + } + + std::stringstream myResults; + + std::stack<std::pair<int, const CoreEvent*>> events; + + for (int idx = static_cast<int>(m_root->getChildren().size()) - 1; idx >= 0; --idx) { + events.push({0, m_root->getChildren()[idx]}); + } + + myResults << "<h1>Process : " << myRank << "</h1>\n"; + + double totalDuration = 0; + for (int idx = + static_cast<int>(m_root->getChildren().size()) - 1; + idx >= 0; --idx) { + totalDuration += m_root->getChildren()[idx]->getDuration(); + } + + myResults << "<h2> " << m_root->getName() << " (" << totalDuration << "s)</h2>\n"; + myResults << "<ul>\n"; + int idxBox = myRank*100000; + + while (events.size()) { + const std::pair<int, const CoreEvent*> eventToShow = + events.top(); + events.pop(); + + if(eventToShow.first == -1){ + myResults << "</ul>\n"; + myResults << "</li>\n"; + } + else if(eventToShow.second->getChildren().size() == 0){ + myResults << "<li>● <span title=\""; + if (eventToShow.second->getOccurrence() != 1) { + myResults << "Min = " << eventToShow.second->getMin() << "s ; Max = " << eventToShow.second->getMax() + << "s ; Average = " << eventToShow.second->getAverage() << "s ; Occurrence = " + << eventToShow.second->getOccurrence(); + } + myResults << "\">" << eventToShow.second->getName(); + const double percentage = 100*eventToShow.second->getDuration()/totalDuration; + if( percentage < 0.001 ){ + myResults << " (< 0.001% -- " ; + } + else{ + myResults << " (" << std::fixed << std::setprecision(3) << percentage << "% -- " ; + } + if(eventToShow.second->getParents().size()){ + const double percentageParent = 100*eventToShow.second->getDuration()/eventToShow.second->getParents().top()->getDuration(); + myResults << "[" << std::fixed << std::setprecision(3) << percentageParent << "%] -- " ; + } + myResults << eventToShow.second->getDuration() <<"s)</span></li>\n"; + } + else{ + myResults << "<li><input type=\"checkbox\" id=\"c" << idxBox << "\" />\n"; + myResults << " <i class=\"fa fa-angle-double-right\">→ </i>\n"; + myResults << " <i class=\"fa fa-angle-double-down\">↓ </i>\n"; + myResults << " <label for=\"c" << idxBox++ << "\"><span title=\""; + if (eventToShow.second->getOccurrence() != 1) { + myResults << "Min = " << eventToShow.second->getMin() << "s ; Max = " << eventToShow.second->getMax() + << "s ; Average = " << eventToShow.second->getAverage() << "s ; Occurrence = " + << eventToShow.second->getOccurrence(); + } + myResults << "\">" << eventToShow.second->getName(); + const double percentage = 100*eventToShow.second->getDuration()/totalDuration; + if( percentage < 0.001 ){ + myResults << " (< 0.001% -- " ; + } + else{ + myResults << " (" << std::fixed << std::setprecision(3) << percentage << "% -- " ; + } + if(eventToShow.second->getParents().size()){ + const double percentageParent = 100*eventToShow.second->getDuration()/eventToShow.second->getParents().top()->getDuration(); + myResults << "[" << std::fixed << std::setprecision(3) << percentageParent << "%] -- " ; + } + myResults << eventToShow.second->getDuration() <<"s)</span></label>\n"; + myResults << "<ul>\n"; + events.push({-1, nullptr}); + + for (int idx = + static_cast<int>(eventToShow.second->getChildren().size()) - 1; + idx >= 0; --idx) { + events.push( + {eventToShow.first + 1, eventToShow.second->getChildren()[idx]}); + } + } + } + + myResults << "</ul>\n"; + + if(myRank != 0){ + const std::string strOutput = myResults.str(); + assert(strOutput.length() <= std::numeric_limits<int>::max()); + int sizeOutput = int(strOutput.length()); + retMpi = MPI_Send(&sizeOutput, 1, MPI_INT, 0, 99, inComm); + assert(retMpi == MPI_SUCCESS); + retMpi = MPI_Send(const_cast<char*>(strOutput.data()), sizeOutput, MPI_CHAR, 0, 100, inComm); + assert(retMpi == MPI_SUCCESS); + } + else{ + const std::string htmlOutput = (getenv("HTMLOUTPUT")?getenv("HTMLOUTPUT"):"timings.html"); + + std::cout << "Timing output html set to : " << htmlOutput << std::endl; + + std::ofstream htmlfile(htmlOutput); + + htmlfile << "<html>\ + <head>\ + <style>\ + input {\ + display: none;\ + }\ + input ~ ul {\ + display: none;\ + }\ + input:checked ~ ul {\ + display: block;\ + }\ + input ~ .fa-angle-double-down {\ + display: none;\ + }\ + input:checked ~ .fa-angle-double-right {\ + display: none;\ + }\ + input:checked ~ .fa-angle-double-down {\ + display: inline;\ + }\ + li {\ + display: block;\ + font-family: 'Arial';\ + font-size: 15px;\ + padding: 0.2em;\ + border: 1px solid transparent;\ + }\ + li:hover {\ + border: 1px solid grey;\ + border-radius: 3px;\ + background-color: lightgrey;\ + }\ + span:hover {\ + color: blue;\ + }\ + </style>\ + </head>\ + <body>"; + + if(onlyP0 == false){ + std::vector<char> buffer; + for(int idxProc = nbProcess-1 ; idxProc > 0 ; --idxProc){ + int sizeRecv; + retMpi = MPI_Recv(&sizeRecv, 1, MPI_INT, idxProc, 99, inComm, MPI_STATUS_IGNORE); + assert(retMpi == MPI_SUCCESS); + buffer.resize(sizeRecv+1); + retMpi = MPI_Recv(buffer.data(), sizeRecv, MPI_CHAR, idxProc, 100, inComm, MPI_STATUS_IGNORE); + assert(retMpi == MPI_SUCCESS); + buffer[sizeRecv]='\0'; + htmlfile << buffer.data(); + } + } + htmlfile << myResults.str(); + htmlfile << "</body>\ + </html>"; + } + } + + + std::stack<CoreEvent*> getCurrentThreadEvent() const { + return m_currentEventsStackPerThread[omp_get_thread_num()].top(); + } + + friend ScopeEvent; +}; + +/////////////////////////////////////////////////////////////// + +/** A scope event should be used + * to record the duration of a part of the code + * (section, scope, etc.). + * The timer is stoped automatically when the object is destroyed + * or when "finish" is explicitely called. + * The object cannot be copied/moved to ensure coherency in the + * events hierarchy. + */ +class ScopeEvent { +protected: + //< The manager to refer to + EventManager& m_manager; + //< The core event + EventManager::CoreEvent* m_event; + //< Time to get elapsed time + bfps_timer m_timer; + //< Is true if it has been created for task + bool m_isTask; + +public: + ScopeEvent(const std::string& inName, EventManager& inManager, + const std::string& inUniqueKey) + : m_manager(inManager), + m_event(inManager.getEvent(inName, inUniqueKey)), + m_isTask(false) { + m_timer.start(); + } + + ScopeEvent(const std::string& inName, EventManager& inManager, + const std::string& inUniqueKey, + const std::stack<EventManager::CoreEvent*>& inParentStack) + : m_manager(inManager), + m_event( + inManager.getEventFromContext(inName, inUniqueKey, inParentStack)), + m_isTask(true) { + m_timer.start(); + } + + ~ScopeEvent() { + m_event->addRecord(m_timer.stopAndGetElapsed(), m_isTask); + if (m_isTask == false) { + m_manager.popEvent(m_event); + } else { + m_manager.popContext(m_event); + } + } + + ScopeEvent(const ScopeEvent&) = delete; + ScopeEvent& operator=(const ScopeEvent&) = delete; + ScopeEvent(ScopeEvent&&) = delete; + ScopeEvent& operator=(ScopeEvent&&) = delete; +}; + +#define ScopeEventUniqueKey_Core_To_Str_Ext(X) #X +#define ScopeEventUniqueKey_Core_To_Str(X) \ + ScopeEventUniqueKey_Core_To_Str_Ext(X) +#define ScopeEventUniqueKey __FILE__ ScopeEventUniqueKey_Core_To_Str(__LINE__) + +#define ScopeEventMultiRefKey std::string("-- multiref event --") + +#ifdef USE_TIMINGOUTPUT + +extern EventManager global_timer_manager; + +#define TIMEZONE_Core_Merge(x, y) x##y +#define TIMEZONE_Core_Pre_Merge(x, y) TIMEZONE_Core_Merge(x, y) + +#define TIMEZONE(NAME) \ + ScopeEvent TIMEZONE_Core_Pre_Merge(____TIMEZONE_AUTO_ID, __LINE__)( \ + NAME, global_timer_manager, ScopeEventUniqueKey); +#define TIMEZONE_MULTI_REF(NAME) \ + ScopeEvent TIMEZONE_Core_Pre_Merge(____TIMEZONE_AUTO_ID, __LINE__)( \ + NAME, global_timer_manager, ScopeEventMultiRefKey); + +#define TIMEZONE_OMP_INIT_PRETASK(VARNAME) \ + auto VARNAME##core = global_timer_manager.getCurrentThreadEvent(); \ + auto VARNAME = &VARNAME##core; +#define TIMEZONE_OMP_TASK(NAME, VARNAME) \ + ScopeEvent TIMEZONE_Core_Pre_Merge(____TIMEZONE_AUTO_ID, __LINE__)( \ + NAME, global_timer_manager, ScopeEventUniqueKey, *VARNAME); +#define TIMEZONE_OMP_PRAGMA_TASK_KEY(VARNAME) \ + shared(global_timer_manager) firstprivate(VARNAME) + +#define TIMEZONE_OMP_INIT_PREPARALLEL(NBTHREADS) \ + global_timer_manager.startParallelRegion(NBTHREADS); + +#else + +#define TIMEZONE(NAME) +#define TIMEZONE_MULTI_REF(NAME) +#define TIMEZONE_OMP_INIT_PRETASK(VARNAME) +#define TIMEZONE_OMP_TASK(NAME, VARNAME) +#define TIMEZONE_OMP_PRAGMA_TASK_KEY(VARNAME) +#define TIMEZONE_OMP_INIT_PREPARALLEL(NBTHREADS) + +#endif + + +#endif diff --git a/bfps/cpp/shared_array.hpp b/bfps/cpp/shared_array.hpp new file mode 100644 index 0000000000000000000000000000000000000000..1951e2f9838ccf37367d859206453d3db91e8e19 --- /dev/null +++ b/bfps/cpp/shared_array.hpp @@ -0,0 +1,110 @@ +#ifndef SHAREDARRAY_HPP +#define SHAREDARRAY_HPP + +#include <omp.h> +#include <functional> +#include <iostream> + +// Cannot be used by different parallel section at the same time +template <class ValueType> +class shared_array{ + int currentNbThreads; + ValueType** __restrict__ values; + size_t dim; + + std::function<void(ValueType*)> initFunc; + + bool hasBeenMerged; + +public: + shared_array(const size_t inDim) + : currentNbThreads(omp_get_max_threads()), + values(nullptr), dim(inDim), hasBeenMerged(false){ + values = new ValueType*[currentNbThreads]; + values[0] = new ValueType[dim]; + for(int idxThread = 1 ; idxThread < currentNbThreads ; ++idxThread){ + values[idxThread] = nullptr; + } + } + + shared_array(const size_t inDim, std::function<void(ValueType*)> inInitFunc) + : shared_array(inDim){ + setInitFunction(inInitFunc); + } + + ~shared_array(){ + for(int idxThread = 0 ; idxThread < currentNbThreads ; ++idxThread){ + delete[] values[idxThread]; + } + delete[] values; + if(hasBeenMerged == false){ + } + } + + ValueType* getMasterData(){ + return values[0]; + } + + const ValueType* getMasterData() const{ + return values[0]; + } + + void merge(){ + ValueType* __restrict__ dest = values[0]; + for(int idxThread = 1 ; idxThread < currentNbThreads ; ++idxThread){ + if(values[idxThread]){ + const ValueType* __restrict__ src = values[idxThread]; + for( size_t idxVal = 0 ; idxVal < dim ; ++idxVal){ + dest[idxVal] += src[idxVal]; + } + } + } + hasBeenMerged = true; + } + + template <class Func> + void merge(Func func){ + ValueType* __restrict__ dest = values[0]; + for(int idxThread = 1 ; idxThread < currentNbThreads ; ++idxThread){ + if(values[idxThread]){ + const ValueType* __restrict__ src = values[idxThread]; + for( size_t idxVal = 0 ; idxVal < dim ; ++idxVal){ + dest[idxVal] = func(idxVal, dest[idxVal], src[idxVal]); + } + } + } + hasBeenMerged = true; + } + + void mergeParallel(){ + merge(); // not done yet + } + + template <class Func> + void mergeParallel(Func func){ + merge(func); // not done yet + } + + void setInitFunction(std::function<void(ValueType*)> inInitFunc){ + initFunc = inInitFunc; + initFunc(values[0]); + } + + ValueType* getMine(){ + assert(omp_get_thread_num() < currentNbThreads); + + if(values[omp_get_thread_num()] == nullptr){ + ValueType* myValue = new ValueType[dim]; + if(initFunc){ + initFunc(myValue); + } + + values[omp_get_thread_num()] = myValue; + return myValue; + } + + return values[omp_get_thread_num()]; + } +}; + +#endif diff --git a/bfps/cpp/spline.hpp b/bfps/cpp/spline.hpp new file mode 100644 index 0000000000000000000000000000000000000000..d66d2b1eb42278b987072ffff24d0123c86a1e2f --- /dev/null +++ b/bfps/cpp/spline.hpp @@ -0,0 +1,15 @@ +#ifndef SPLINE_HPP +#define SPLINE_HPP + +#include "spline_n1.hpp" +#include "spline_n2.hpp" +#include "spline_n3.hpp" +#include "spline_n4.hpp" +#include "spline_n5.hpp" +#include "spline_n6.hpp" +#include "spline_n7.hpp" +#include "spline_n8.hpp" +#include "spline_n9.hpp" +#include "spline_n10.hpp" + +#endif diff --git a/bfps/cpp/spline_n10.cpp b/bfps/cpp/spline_n10.cpp new file mode 100644 index 0000000000000000000000000000000000000000..1712663933addeecba17955f8632d1a20e8c32c6 --- /dev/null +++ b/bfps/cpp/spline_n10.cpp @@ -0,0 +1,414 @@ +/********************************************************************** +* * +* Copyright 2015 Max Planck Institute * +* for Dynamics and Self-Organization * +* * +* This file is part of bfps. * +* * +* bfps is free software: you can redistribute it and/or modify * +* it under the terms of the GNU General Public License as published * +* by the Free Software Foundation, either version 3 of the License, * +* or (at your option) any later version. * +* * +* bfps is distributed in the hope that it will be useful, * +* but WITHOUT ANY WARRANTY; without even the implied warranty of * +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * +* GNU General Public License for more details. * +* * +* You should have received a copy of the GNU General Public License * +* along with bfps. If not, see <http://www.gnu.org/licenses/> * +* * +* Contact: Cristian.Lalescu@ds.mpg.de * +* * +**********************************************************************/ + + + +#include "spline_n10.hpp" +#include <cmath> + +void beta_n10_m0(int deriv, double x, double *poly_val) { + switch(deriv) + { + case 0: + poly_val[0] = 0; + poly_val[1] = 0; + poly_val[2] = 0; + poly_val[3] = 0; + poly_val[4] = 0; + poly_val[5] = 0; + poly_val[6] = 0; + poly_val[7] = 0; + poly_val[8] = 0; + poly_val[9] = 0; + poly_val[10] = -x + 1; + poly_val[11] = x; + poly_val[12] = 0; + poly_val[13] = 0; + poly_val[14] = 0; + poly_val[15] = 0; + poly_val[16] = 0; + poly_val[17] = 0; + poly_val[18] = 0; + poly_val[19] = 0; + poly_val[20] = 0; + poly_val[21] = 0; + break; + case 1: + poly_val[0] = 0; + poly_val[1] = 0; + poly_val[2] = 0; + poly_val[3] = 0; + poly_val[4] = 0; + poly_val[5] = 0; + poly_val[6] = 0; + poly_val[7] = 0; + poly_val[8] = 0; + poly_val[9] = 0; + poly_val[10] = -1; + poly_val[11] = 1; + poly_val[12] = 0; + poly_val[13] = 0; + poly_val[14] = 0; + poly_val[15] = 0; + poly_val[16] = 0; + poly_val[17] = 0; + poly_val[18] = 0; + poly_val[19] = 0; + poly_val[20] = 0; + poly_val[21] = 0; + break; + case 2: + poly_val[0] = 0; + poly_val[1] = 0; + poly_val[2] = 0; + poly_val[3] = 0; + poly_val[4] = 0; + poly_val[5] = 0; + poly_val[6] = 0; + poly_val[7] = 0; + poly_val[8] = 0; + poly_val[9] = 0; + poly_val[10] = 0; + poly_val[11] = 0; + poly_val[12] = 0; + poly_val[13] = 0; + poly_val[14] = 0; + poly_val[15] = 0; + poly_val[16] = 0; + poly_val[17] = 0; + poly_val[18] = 0; + poly_val[19] = 0; + poly_val[20] = 0; + poly_val[21] = 0; + break; + } +} +void beta_n10_m1(int deriv, double x, double *poly_val) { + switch(deriv) + { + case 0: + poly_val[0] = x*(x*((1.0L/1847560.0L)*x - 1.0L/923780.0L) + 1.0L/1847560.0L); + poly_val[1] = x*(x*(-191.0L/16628040.0L*x + 23.0L/978120.0L) - 5.0L/415701.0L); + poly_val[2] = x*(x*((775.0L/6651216.0L)*x - 815.0L/3325608.0L) + 5.0L/38896.0L); + poly_val[3] = x*(x*(-205.0L/272272.0L*x + 445.0L/272272.0L) - 15.0L/17017.0L); + poly_val[4] = x*(x*((475.0L/136136.0L)*x - 535.0L/68068.0L) + 5.0L/1144.0L); + poly_val[5] = x*(x*(-71.0L/5720.0L*x + 167.0L/5720.0L) - 12.0L/715.0L); + poly_val[6] = x*(x*((51.0L/1430.0L)*x - 63.0L/715.0L) + 15.0L/286.0L); + poly_val[7] = x*(x*(-25.0L/286.0L*x + 5.0L/22.0L) - 20.0L/143.0L); + poly_val[8] = x*(x*((115.0L/572.0L)*x - 155.0L/286.0L) + 15.0L/44.0L); + poly_val[9] = x*(x*(-25.0L/44.0L*x + 65.0L/44.0L) - 10.0L/11.0L); + poly_val[10] = pow(x, 2)*((12.0L/11.0L)*x - 23.0L/11.0L) + 1; + poly_val[11] = x*(x*(-12.0L/11.0L*x + 13.0L/11.0L) + 10.0L/11.0L); + poly_val[12] = x*(x*((25.0L/44.0L)*x - 5.0L/22.0L) - 15.0L/44.0L); + poly_val[13] = x*(x*(-115.0L/572.0L*x + 35.0L/572.0L) + 20.0L/143.0L); + poly_val[14] = x*(x*((25.0L/286.0L)*x - 5.0L/143.0L) - 15.0L/286.0L); + poly_val[15] = x*(x*(-51.0L/1430.0L*x + 27.0L/1430.0L) + 12.0L/715.0L); + poly_val[16] = x*(x*((71.0L/5720.0L)*x - 23.0L/2860.0L) - 5.0L/1144.0L); + poly_val[17] = x*(x*(-475.0L/136136.0L*x + 355.0L/136136.0L) + 15.0L/17017.0L); + poly_val[18] = x*(x*((205.0L/272272.0L)*x - 5.0L/8008.0L) - 5.0L/38896.0L); + poly_val[19] = x*(x*(-775.0L/6651216.0L*x + 695.0L/6651216.0L) + 5.0L/415701.0L); + poly_val[20] = x*(x*((191.0L/16628040.0L)*x - 7.0L/639540.0L) - 1.0L/1847560.0L); + poly_val[21] = pow(x, 2)*(-1.0L/1847560.0L*x + 1.0L/1847560.0L); + break; + case 1: + poly_val[0] = x*((3.0L/1847560.0L)*x - 1.0L/461890.0L) + 1.0L/1847560.0L; + poly_val[1] = x*(-191.0L/5542680.0L*x + 23.0L/489060.0L) - 5.0L/415701.0L; + poly_val[2] = x*((775.0L/2217072.0L)*x - 815.0L/1662804.0L) + 5.0L/38896.0L; + poly_val[3] = x*(-615.0L/272272.0L*x + 445.0L/136136.0L) - 15.0L/17017.0L; + poly_val[4] = x*((1425.0L/136136.0L)*x - 535.0L/34034.0L) + 5.0L/1144.0L; + poly_val[5] = x*(-213.0L/5720.0L*x + 167.0L/2860.0L) - 12.0L/715.0L; + poly_val[6] = x*((153.0L/1430.0L)*x - 126.0L/715.0L) + 15.0L/286.0L; + poly_val[7] = x*(-75.0L/286.0L*x + 5.0L/11.0L) - 20.0L/143.0L; + poly_val[8] = x*((345.0L/572.0L)*x - 155.0L/143.0L) + 15.0L/44.0L; + poly_val[9] = x*(-75.0L/44.0L*x + 65.0L/22.0L) - 10.0L/11.0L; + poly_val[10] = x*((36.0L/11.0L)*x - 46.0L/11.0L); + poly_val[11] = x*(-36.0L/11.0L*x + 26.0L/11.0L) + 10.0L/11.0L; + poly_val[12] = x*((75.0L/44.0L)*x - 5.0L/11.0L) - 15.0L/44.0L; + poly_val[13] = x*(-345.0L/572.0L*x + 35.0L/286.0L) + 20.0L/143.0L; + poly_val[14] = x*((75.0L/286.0L)*x - 10.0L/143.0L) - 15.0L/286.0L; + poly_val[15] = x*(-153.0L/1430.0L*x + 27.0L/715.0L) + 12.0L/715.0L; + poly_val[16] = x*((213.0L/5720.0L)*x - 23.0L/1430.0L) - 5.0L/1144.0L; + poly_val[17] = x*(-1425.0L/136136.0L*x + 355.0L/68068.0L) + 15.0L/17017.0L; + poly_val[18] = x*((615.0L/272272.0L)*x - 5.0L/4004.0L) - 5.0L/38896.0L; + poly_val[19] = x*(-775.0L/2217072.0L*x + 695.0L/3325608.0L) + 5.0L/415701.0L; + poly_val[20] = x*((191.0L/5542680.0L)*x - 7.0L/319770.0L) - 1.0L/1847560.0L; + poly_val[21] = x*(-3.0L/1847560.0L*x + 1.0L/923780.0L); + break; + case 2: + poly_val[0] = (3.0L/923780.0L)*x - 1.0L/461890.0L; + poly_val[1] = -191.0L/2771340.0L*x + 23.0L/489060.0L; + poly_val[2] = (775.0L/1108536.0L)*x - 815.0L/1662804.0L; + poly_val[3] = -615.0L/136136.0L*x + 445.0L/136136.0L; + poly_val[4] = (1425.0L/68068.0L)*x - 535.0L/34034.0L; + poly_val[5] = -213.0L/2860.0L*x + 167.0L/2860.0L; + poly_val[6] = (153.0L/715.0L)*x - 126.0L/715.0L; + poly_val[7] = -75.0L/143.0L*x + 5.0L/11.0L; + poly_val[8] = (345.0L/286.0L)*x - 155.0L/143.0L; + poly_val[9] = -75.0L/22.0L*x + 65.0L/22.0L; + poly_val[10] = (72.0L/11.0L)*x - 46.0L/11.0L; + poly_val[11] = -72.0L/11.0L*x + 26.0L/11.0L; + poly_val[12] = (75.0L/22.0L)*x - 5.0L/11.0L; + poly_val[13] = -345.0L/286.0L*x + 35.0L/286.0L; + poly_val[14] = (75.0L/143.0L)*x - 10.0L/143.0L; + poly_val[15] = -153.0L/715.0L*x + 27.0L/715.0L; + poly_val[16] = (213.0L/2860.0L)*x - 23.0L/1430.0L; + poly_val[17] = -1425.0L/68068.0L*x + 355.0L/68068.0L; + poly_val[18] = (615.0L/136136.0L)*x - 5.0L/4004.0L; + poly_val[19] = -775.0L/1108536.0L*x + 695.0L/3325608.0L; + poly_val[20] = (191.0L/2771340.0L)*x - 7.0L/319770.0L; + poly_val[21] = -3.0L/923780.0L*x + 1.0L/923780.0L; + break; + } +} +void beta_n10_m2(int deriv, double x, double *poly_val) { + switch(deriv) + { + case 0: + poly_val[0] = x*(x*(x*(x*(-29.0L/18475600.0L*x + 7.0L/1679600.0L) - 3.0L/972400.0L) - 1.0L/18475600.0L) + 1.0L/1847560.0L); + poly_val[1] = x*(x*(x*(x*((4499.0L/136047600.0L)*x - 5507.0L/62355150.0L) + 32893.0L/498841200.0L) + 5.0L/3741309.0L) - 5.0L/415701.0L); + poly_val[2] = x*(x*(x*(x*(-159065.0L/478887552.0L*x + 427795.0L/478887552.0L) - 24815.0L/36837504.0L) - 5.0L/311168.0L) + 5.0L/38896.0L); + poly_val[3] = x*(x*(x*(x*((32275.0L/15247232.0L)*x - 2575.0L/448448.0L) + 66795.0L/15247232.0L) + 15.0L/119119.0L) - 15.0L/17017.0L); + poly_val[4] = x*(x*(x*(x*(-54965.0L/5717712.0L*x + 2955.0L/112112.0L) - 38855.0L/1905904.0L) - 5.0L/6864.0L) + 5.0L/1144.0L); + poly_val[5] = x*(x*(x*(x*((5689.0L/171600.0L)*x - 3953.0L/42900.0L) + 12427.0L/171600.0L) + 12.0L/3575.0L) - 12.0L/715.0L); + poly_val[6] = x*(x*(x*(x*(-2589.0L/28600.0L*x + 7323.0L/28600.0L) - 5859.0L/28600.0L) - 15.0L/1144.0L) + 15.0L/286.0L); + poly_val[7] = x*(x*(x*(x*((695.0L/3432.0L)*x - 335.0L/572.0L) + 545.0L/1144.0L) + 20.0L/429.0L) - 20.0L/143.0L); + poly_val[8] = x*(x*(x*(x*(-1325.0L/3432.0L*x + 3925.0L/3432.0L) - 245.0L/264.0L) - 15.0L/88.0L) + 15.0L/44.0L); + poly_val[9] = x*(x*(x*(x*((5.0L/8.0L)*x - 20.0L/11.0L) + 105.0L/88.0L) + 10.0L/11.0L) - 10.0L/11.0L); + poly_val[10] = pow(x, 2)*(x*(x*(-11370461.0L/13970880.0L*x + 10100381.0L/4656960.0L) - 3749981.0L/4656960.0L) - 1968329.0L/1270080.0L) + 1; + poly_val[11] = x*(x*(x*(x*((11370461.0L/13970880.0L)*x - 1206871.0L/635040.0L) + 3749981.0L/13970880.0L) + 10.0L/11.0L) + 10.0L/11.0L); + poly_val[12] = x*(x*(x*(x*(-5.0L/8.0L*x + 115.0L/88.0L) - 15.0L/88.0L) - 15.0L/88.0L) - 15.0L/44.0L); + poly_val[13] = x*(x*(x*(x*((1325.0L/3432.0L)*x - 225.0L/286.0L) + 245.0L/1144.0L) + 20.0L/429.0L) + 20.0L/143.0L); + poly_val[14] = x*(x*(x*(x*(-695.0L/3432.0L*x + 1465.0L/3432.0L) - 545.0L/3432.0L) - 15.0L/1144.0L) - 15.0L/286.0L); + poly_val[15] = x*(x*(x*(x*((2589.0L/28600.0L)*x - 2811.0L/14300.0L) + 189.0L/2200.0L) + 12.0L/3575.0L) + 12.0L/715.0L); + poly_val[16] = x*(x*(x*(x*(-5689.0L/171600.0L*x + 4211.0L/57200.0L) - 2023.0L/57200.0L) - 5.0L/6864.0L) - 5.0L/1144.0L); + poly_val[17] = x*(x*(x*(x*((54965.0L/5717712.0L)*x - 15515.0L/714714.0L) + 63395.0L/5717712.0L) + 15.0L/119119.0L) + 15.0L/17017.0L); + poly_val[18] = x*(x*(x*(x*(-32275.0L/15247232.0L*x + 73825.0L/15247232.0L) - 39345.0L/15247232.0L) - 5.0L/311168.0L) - 5.0L/38896.0L); + poly_val[19] = x*(x*(x*(x*((159065.0L/478887552.0L)*x - 61255.0L/79814592.0L) + 3545.0L/8401536.0L) + 5.0L/3741309.0L) + 5.0L/415701.0L); + poly_val[20] = x*(x*(x*(x*(-4499.0L/136047600.0L*x + 6781.0L/88030800.0L) - 64897.0L/1496523600.0L) - 1.0L/18475600.0L) - 1.0L/1847560.0L); + poly_val[21] = pow(x, 3)*(x*((29.0L/18475600.0L)*x - 1.0L/271700.0L) + 3.0L/1421200.0L); + break; + case 1: + poly_val[0] = x*(x*(x*(-29.0L/3695120.0L*x + 7.0L/419900.0L) - 9.0L/972400.0L) - 1.0L/9237800.0L) + 1.0L/1847560.0L; + poly_val[1] = x*(x*(x*((4499.0L/27209520.0L)*x - 11014.0L/31177575.0L) + 32893.0L/166280400.0L) + 10.0L/3741309.0L) - 5.0L/415701.0L; + poly_val[2] = x*(x*(x*(-795325.0L/478887552.0L*x + 427795.0L/119721888.0L) - 24815.0L/12279168.0L) - 5.0L/155584.0L) + 5.0L/38896.0L; + poly_val[3] = x*(x*(x*((161375.0L/15247232.0L)*x - 2575.0L/112112.0L) + 200385.0L/15247232.0L) + 30.0L/119119.0L) - 15.0L/17017.0L; + poly_val[4] = x*(x*(x*(-274825.0L/5717712.0L*x + 2955.0L/28028.0L) - 116565.0L/1905904.0L) - 5.0L/3432.0L) + 5.0L/1144.0L; + poly_val[5] = x*(x*(x*((5689.0L/34320.0L)*x - 3953.0L/10725.0L) + 12427.0L/57200.0L) + 24.0L/3575.0L) - 12.0L/715.0L; + poly_val[6] = x*(x*(x*(-2589.0L/5720.0L*x + 7323.0L/7150.0L) - 17577.0L/28600.0L) - 15.0L/572.0L) + 15.0L/286.0L; + poly_val[7] = x*(x*(x*((3475.0L/3432.0L)*x - 335.0L/143.0L) + 1635.0L/1144.0L) + 40.0L/429.0L) - 20.0L/143.0L; + poly_val[8] = x*(x*(x*(-6625.0L/3432.0L*x + 3925.0L/858.0L) - 245.0L/88.0L) - 15.0L/44.0L) + 15.0L/44.0L; + poly_val[9] = x*(x*(x*((25.0L/8.0L)*x - 80.0L/11.0L) + 315.0L/88.0L) + 20.0L/11.0L) - 10.0L/11.0L; + poly_val[10] = x*(x*(x*(-11370461.0L/2794176.0L*x + 10100381.0L/1164240.0L) - 3749981.0L/1552320.0L) - 1968329.0L/635040.0L); + poly_val[11] = x*(x*(x*((11370461.0L/2794176.0L)*x - 1206871.0L/158760.0L) + 3749981.0L/4656960.0L) + 20.0L/11.0L) + 10.0L/11.0L; + poly_val[12] = x*(x*(x*(-25.0L/8.0L*x + 115.0L/22.0L) - 45.0L/88.0L) - 15.0L/44.0L) - 15.0L/44.0L; + poly_val[13] = x*(x*(x*((6625.0L/3432.0L)*x - 450.0L/143.0L) + 735.0L/1144.0L) + 40.0L/429.0L) + 20.0L/143.0L; + poly_val[14] = x*(x*(x*(-3475.0L/3432.0L*x + 1465.0L/858.0L) - 545.0L/1144.0L) - 15.0L/572.0L) - 15.0L/286.0L; + poly_val[15] = x*(x*(x*((2589.0L/5720.0L)*x - 2811.0L/3575.0L) + 567.0L/2200.0L) + 24.0L/3575.0L) + 12.0L/715.0L; + poly_val[16] = x*(x*(x*(-5689.0L/34320.0L*x + 4211.0L/14300.0L) - 6069.0L/57200.0L) - 5.0L/3432.0L) - 5.0L/1144.0L; + poly_val[17] = x*(x*(x*((274825.0L/5717712.0L)*x - 31030.0L/357357.0L) + 63395.0L/1905904.0L) + 30.0L/119119.0L) + 15.0L/17017.0L; + poly_val[18] = x*(x*(x*(-161375.0L/15247232.0L*x + 73825.0L/3811808.0L) - 118035.0L/15247232.0L) - 5.0L/155584.0L) - 5.0L/38896.0L; + poly_val[19] = x*(x*(x*((795325.0L/478887552.0L)*x - 61255.0L/19953648.0L) + 3545.0L/2800512.0L) + 10.0L/3741309.0L) + 5.0L/415701.0L; + poly_val[20] = x*(x*(x*(-4499.0L/27209520.0L*x + 6781.0L/22007700.0L) - 64897.0L/498841200.0L) - 1.0L/9237800.0L) - 1.0L/1847560.0L; + poly_val[21] = pow(x, 2)*(x*((29.0L/3695120.0L)*x - 1.0L/67925.0L) + 9.0L/1421200.0L); + break; + case 2: + poly_val[0] = x*(x*(-29.0L/923780.0L*x + 21.0L/419900.0L) - 9.0L/486200.0L) - 1.0L/9237800.0L; + poly_val[1] = x*(x*((4499.0L/6802380.0L)*x - 11014.0L/10392525.0L) + 32893.0L/83140200.0L) + 10.0L/3741309.0L; + poly_val[2] = x*(x*(-795325.0L/119721888.0L*x + 427795.0L/39907296.0L) - 24815.0L/6139584.0L) - 5.0L/155584.0L; + poly_val[3] = x*(x*((161375.0L/3811808.0L)*x - 7725.0L/112112.0L) + 200385.0L/7623616.0L) + 30.0L/119119.0L; + poly_val[4] = x*(x*(-274825.0L/1429428.0L*x + 8865.0L/28028.0L) - 116565.0L/952952.0L) - 5.0L/3432.0L; + poly_val[5] = x*(x*((5689.0L/8580.0L)*x - 3953.0L/3575.0L) + 12427.0L/28600.0L) + 24.0L/3575.0L; + poly_val[6] = x*(x*(-2589.0L/1430.0L*x + 21969.0L/7150.0L) - 17577.0L/14300.0L) - 15.0L/572.0L; + poly_val[7] = x*(x*((3475.0L/858.0L)*x - 1005.0L/143.0L) + 1635.0L/572.0L) + 40.0L/429.0L; + poly_val[8] = x*(x*(-6625.0L/858.0L*x + 3925.0L/286.0L) - 245.0L/44.0L) - 15.0L/44.0L; + poly_val[9] = x*(x*((25.0L/2.0L)*x - 240.0L/11.0L) + 315.0L/44.0L) + 20.0L/11.0L; + poly_val[10] = x*(x*(-11370461.0L/698544.0L*x + 10100381.0L/388080.0L) - 3749981.0L/776160.0L) - 1968329.0L/635040.0L; + poly_val[11] = x*(x*((11370461.0L/698544.0L)*x - 1206871.0L/52920.0L) + 3749981.0L/2328480.0L) + 20.0L/11.0L; + poly_val[12] = x*(x*(-25.0L/2.0L*x + 345.0L/22.0L) - 45.0L/44.0L) - 15.0L/44.0L; + poly_val[13] = x*(x*((6625.0L/858.0L)*x - 1350.0L/143.0L) + 735.0L/572.0L) + 40.0L/429.0L; + poly_val[14] = x*(x*(-3475.0L/858.0L*x + 1465.0L/286.0L) - 545.0L/572.0L) - 15.0L/572.0L; + poly_val[15] = x*(x*((2589.0L/1430.0L)*x - 8433.0L/3575.0L) + 567.0L/1100.0L) + 24.0L/3575.0L; + poly_val[16] = x*(x*(-5689.0L/8580.0L*x + 12633.0L/14300.0L) - 6069.0L/28600.0L) - 5.0L/3432.0L; + poly_val[17] = x*(x*((274825.0L/1429428.0L)*x - 31030.0L/119119.0L) + 63395.0L/952952.0L) + 30.0L/119119.0L; + poly_val[18] = x*(x*(-161375.0L/3811808.0L*x + 221475.0L/3811808.0L) - 118035.0L/7623616.0L) - 5.0L/155584.0L; + poly_val[19] = x*(x*((795325.0L/119721888.0L)*x - 61255.0L/6651216.0L) + 3545.0L/1400256.0L) + 10.0L/3741309.0L; + poly_val[20] = x*(x*(-4499.0L/6802380.0L*x + 6781.0L/7335900.0L) - 64897.0L/249420600.0L) - 1.0L/9237800.0L; + poly_val[21] = x*(x*((29.0L/923780.0L)*x - 3.0L/67925.0L) + 9.0L/710600.0L); + break; + } +} +void beta_n10_m3(int deriv, double x, double *poly_val) { + switch(deriv) + { + case 0: + poly_val[0] = x*(x*(x*(x*(x*(x*((51185699.0L/11732745024000.0L)*x - 4090369.0L/266653296000.0L) + 35733059.0L/1955457504000.0L) - 1072961.0L/154378224000.0L) - 514639.0L/617512896000.0L) - 1.0L/18475600.0L) + 1.0L/1847560.0L); + poly_val[1] = x*(x*(x*(x*(x*(x*(-97750271.0L/1066613184000.0L*x + 3782441303.0L/11732745024000.0L) - 237253277.0L/617512896000.0L) + 18781351.0L/128931264000.0L) + 11419.0L/617512896.0L) + 5.0L/3741309.0L) - 5.0L/415701.0L); + poly_val[2] = x*(x*(x*(x*(x*(x*((531225.0L/579394816.0L)*x - 75723343.0L/23465490048.0L) + 90288169.0L/23465490048.0L) - 1311199.0L/902518848.0L) - 487121.0L/2470051584.0L) - 5.0L/311168.0L) + 5.0L/38896.0L); + poly_val[3] = x*(x*(x*(x*(x*(x*(-14351957.0L/2470051584.0L*x + 5616313.0L/274450176.0L) - 20100053.0L/823350528.0L) + 22641695.0L/2470051584.0L) + 5663.0L/4200768.0L) + 15.0L/119119.0L) - 15.0L/17017.0L); + poly_val[4] = x*(x*(x*(x*(x*(x*((129283295.0L/4940103168.0L)*x - 10356823.0L/112275072.0L) + 90651851.0L/823350528.0L) - 50656289.0L/1235025792.0L) - 1933049.0L/290594304.0L) - 5.0L/6864.0L) + 5.0L/1144.0L); + poly_val[5] = x*(x*(x*(x*(x*(x*(-294222911.0L/3302208000.0L*x + 346045183.0L/1100736000.0L) - 4545171157.0L/12108096000.0L) + 5019772789.0L/36324288000.0L) + 9587629.0L/378378000.0L) + 12.0L/3575.0L) - 12.0L/715.0L); + poly_val[6] = x*(x*(x*(x*(x*(x*((720747847.0L/3027024000.0L)*x - 636560369.0L/756756000.0L) + 24128711.0L/24024000.0L) - 21109141.0L/58212000.0L) - 1888949.0L/24216192.0L) - 15.0L/1144.0L) + 15.0L/286.0L); + poly_val[7] = x*(x*(x*(x*(x*(x*(-37195415.0L/72648576.0L*x + 131578253.0L/72648576.0L) - 52283503.0L/24216192.0L) + 54623759.0L/72648576.0L) + 1827209.0L/9081072.0L) + 20.0L/429.0L) - 20.0L/143.0L); + poly_val[8] = x*(x*(x*(x*(x*(x*((130810913.0L/145297152.0L)*x - 1428677.0L/448448.0L) + 91285561.0L/24216192.0L) - 6286355.0L/5189184.0L) - 1650809.0L/3725568.0L) - 15.0L/88.0L) + 15.0L/44.0L); + poly_val[9] = x*(x*(x*(x*(x*(x*(-1327945.0L/1016064.0L*x + 51572033.0L/11176704.0L) - 20028763.0L/3725568.0L) + 17535659.0L/11176704.0L) + 698249.0L/1397088.0L) + 10.0L/11.0L) - 10.0L/11.0L); + poly_val[10] = pow(x, 2)*(pow(x, 2)*(x*(x*((10991207.0L/6985440.0L)*x - 571831.0L/103488.0L) + 8832661.0L/1397088.0L) - 1271555.0L/698544.0L) - 1968329.0L/1270080.0L) + 1; + poly_val[11] = x*(x*(x*(x*(x*(x*(-10991207.0L/6985440.0L*x + 6970883.0L/1270080.0L) - 43387097.0L/6985440.0L) + 1841911.0L/931392.0L) - 698249.0L/1397088.0L) + 10.0L/11.0L) + 10.0L/11.0L); + poly_val[12] = x*(x*(x*(x*(x*(x*((1327945.0L/1016064.0L)*x - 4223311.0L/931392.0L) + 9568231.0L/1862784.0L) - 5143529.0L/2794176.0L) + 1650809.0L/3725568.0L) - 15.0L/88.0L) - 15.0L/44.0L); + poly_val[13] = x*(x*(x*(x*(x*(x*(-130810913.0L/145297152.0L*x + 452785043.0L/145297152.0L) - 24637831.0L/6918912.0L) + 197560625.0L/145297152.0L) - 1827209.0L/9081072.0L) + 20.0L/429.0L) + 20.0L/143.0L); + poly_val[14] = x*(x*(x*(x*(x*(x*((37195415.0L/72648576.0L)*x - 10732471.0L/6054048.0L) + 24747451.0L/12108096.0L) - 1111433.0L/1397088.0L) + 1888949.0L/24216192.0L) - 15.0L/1144.0L) - 15.0L/286.0L); + poly_val[15] = x*(x*(x*(x*(x*(x*(-720747847.0L/3027024000.0L*x + 227181223.0L/275184000.0L) - 107350871.0L/112112000.0L) + 12483133.0L/33264000.0L) - 9587629.0L/378378000.0L) + 12.0L/3575.0L) + 12.0L/715.0L); + poly_val[16] = x*(x*(x*(x*(x*(x*((294222911.0L/3302208000.0L)*x - 255356207.0L/825552000.0L) + 2180676613.0L/6054048000.0L) - 1285312429.0L/9081072000.0L) + 1933049.0L/290594304.0L) - 5.0L/6864.0L) - 5.0L/1144.0L); + poly_val[17] = x*(x*(x*(x*(x*(x*(-129283295.0L/4940103168.0L*x + 49920317.0L/548900352.0L) - 174886343.0L/1646701056.0L) + 206342519.0L/4940103168.0L) - 5663.0L/4200768.0L) + 15.0L/119119.0L) + 15.0L/17017.0L); + poly_val[18] = x*(x*(x*(x*(x*(x*((14351957.0L/2470051584.0L)*x - 24958441.0L/1235025792.0L) + 9735059.0L/411675264.0L) - 5743835.0L/617512896.0L) + 487121.0L/2470051584.0L) - 5.0L/311168.0L) - 5.0L/38896.0L); + poly_val[19] = x*(x*(x*(x*(x*(x*(-531225.0L/579394816.0L*x + 149757889.0L/46930980096.0L) - 175509947.0L/46930980096.0L) + 31049.0L/21111552.0L) - 11419.0L/617512896.0L) + 5.0L/3741309.0L) + 5.0L/415701.0L); + poly_val[20] = x*(x*(x*(x*(x*(x*((97750271.0L/1066613184000.0L)*x - 936082391.0L/2933186256000.0L) + 16516831.0L/44108064000.0L) - 431798291.0L/2933186256000.0L) + 514639.0L/617512896000.0L) - 1.0L/18475600.0L) - 1.0L/1847560.0L); + poly_val[21] = pow(x, 4)*(x*(x*(-51185699.0L/11732745024000.0L*x + 59441219.0L/3910915008000.0L) - 69813539.0L/3910915008000.0L) + 82302659.0L/11732745024000.0L); + break; + case 1: + poly_val[0] = x*(x*(x*(x*(x*((51185699.0L/1676106432000.0L)*x - 4090369.0L/44442216000.0L) + 35733059.0L/391091500800.0L) - 1072961.0L/38594556000.0L) - 514639.0L/205837632000.0L) - 1.0L/9237800.0L) + 1.0L/1847560.0L; + poly_val[1] = x*(x*(x*(x*(x*(-97750271.0L/152373312000.0L*x + 3782441303.0L/1955457504000.0L) - 237253277.0L/123502579200.0L) + 18781351.0L/32232816000.0L) + 11419.0L/205837632.0L) + 10.0L/3741309.0L) - 5.0L/415701.0L; + poly_val[2] = x*(x*(x*(x*(x*((531225.0L/82770688.0L)*x - 75723343.0L/3910915008.0L) + 451440845.0L/23465490048.0L) - 1311199.0L/225629712.0L) - 487121.0L/823350528.0L) - 5.0L/155584.0L) + 5.0L/38896.0L; + poly_val[3] = x*(x*(x*(x*(x*(-14351957.0L/352864512.0L*x + 5616313.0L/45741696.0L) - 100500265.0L/823350528.0L) + 22641695.0L/617512896.0L) + 5663.0L/1400256.0L) + 30.0L/119119.0L) - 15.0L/17017.0L; + poly_val[4] = x*(x*(x*(x*(x*((129283295.0L/705729024.0L)*x - 10356823.0L/18712512.0L) + 453259255.0L/823350528.0L) - 50656289.0L/308756448.0L) - 1933049.0L/96864768.0L) - 5.0L/3432.0L) + 5.0L/1144.0L; + poly_val[5] = x*(x*(x*(x*(x*(-294222911.0L/471744000.0L*x + 346045183.0L/183456000.0L) - 4545171157.0L/2421619200.0L) + 5019772789.0L/9081072000.0L) + 9587629.0L/126126000.0L) + 24.0L/3575.0L) - 12.0L/715.0L; + poly_val[6] = x*(x*(x*(x*(x*((720747847.0L/432432000.0L)*x - 636560369.0L/126126000.0L) + 24128711.0L/4804800.0L) - 21109141.0L/14553000.0L) - 1888949.0L/8072064.0L) - 15.0L/572.0L) + 15.0L/286.0L; + poly_val[7] = x*(x*(x*(x*(x*(-37195415.0L/10378368.0L*x + 131578253.0L/12108096.0L) - 261417515.0L/24216192.0L) + 54623759.0L/18162144.0L) + 1827209.0L/3027024.0L) + 40.0L/429.0L) - 20.0L/143.0L; + poly_val[8] = x*(x*(x*(x*(x*((130810913.0L/20756736.0L)*x - 4286031.0L/224224.0L) + 456427805.0L/24216192.0L) - 6286355.0L/1297296.0L) - 1650809.0L/1241856.0L) - 15.0L/44.0L) + 15.0L/44.0L; + poly_val[9] = x*(x*(x*(x*(x*(-1327945.0L/145152.0L*x + 51572033.0L/1862784.0L) - 100143815.0L/3725568.0L) + 17535659.0L/2794176.0L) + 698249.0L/465696.0L) + 20.0L/11.0L) - 10.0L/11.0L; + poly_val[10] = x*(pow(x, 2)*(x*(x*((10991207.0L/997920.0L)*x - 571831.0L/17248.0L) + 44163305.0L/1397088.0L) - 1271555.0L/174636.0L) - 1968329.0L/635040.0L); + poly_val[11] = x*(x*(x*(x*(x*(-10991207.0L/997920.0L*x + 6970883.0L/211680.0L) - 43387097.0L/1397088.0L) + 1841911.0L/232848.0L) - 698249.0L/465696.0L) + 20.0L/11.0L) + 10.0L/11.0L; + poly_val[12] = x*(x*(x*(x*(x*((1327945.0L/145152.0L)*x - 4223311.0L/155232.0L) + 47841155.0L/1862784.0L) - 5143529.0L/698544.0L) + 1650809.0L/1241856.0L) - 15.0L/44.0L) - 15.0L/44.0L; + poly_val[13] = x*(x*(x*(x*(x*(-130810913.0L/20756736.0L*x + 452785043.0L/24216192.0L) - 123189155.0L/6918912.0L) + 197560625.0L/36324288.0L) - 1827209.0L/3027024.0L) + 40.0L/429.0L) + 20.0L/143.0L; + poly_val[14] = x*(x*(x*(x*(x*((37195415.0L/10378368.0L)*x - 10732471.0L/1009008.0L) + 123737255.0L/12108096.0L) - 1111433.0L/349272.0L) + 1888949.0L/8072064.0L) - 15.0L/572.0L) - 15.0L/286.0L; + poly_val[15] = x*(x*(x*(x*(x*(-720747847.0L/432432000.0L*x + 227181223.0L/45864000.0L) - 107350871.0L/22422400.0L) + 12483133.0L/8316000.0L) - 9587629.0L/126126000.0L) + 24.0L/3575.0L) + 12.0L/715.0L; + poly_val[16] = x*(x*(x*(x*(x*((294222911.0L/471744000.0L)*x - 255356207.0L/137592000.0L) + 2180676613.0L/1210809600.0L) - 1285312429.0L/2270268000.0L) + 1933049.0L/96864768.0L) - 5.0L/3432.0L) - 5.0L/1144.0L; + poly_val[17] = x*(x*(x*(x*(x*(-129283295.0L/705729024.0L*x + 49920317.0L/91483392.0L) - 874431715.0L/1646701056.0L) + 206342519.0L/1235025792.0L) - 5663.0L/1400256.0L) + 30.0L/119119.0L) + 15.0L/17017.0L; + poly_val[18] = x*(x*(x*(x*(x*((14351957.0L/352864512.0L)*x - 24958441.0L/205837632.0L) + 48675295.0L/411675264.0L) - 5743835.0L/154378224.0L) + 487121.0L/823350528.0L) - 5.0L/155584.0L) - 5.0L/38896.0L; + poly_val[19] = x*(x*(x*(x*(x*(-531225.0L/82770688.0L*x + 149757889.0L/7821830016.0L) - 877549735.0L/46930980096.0L) + 31049.0L/5277888.0L) - 11419.0L/205837632.0L) + 10.0L/3741309.0L) + 5.0L/415701.0L; + poly_val[20] = x*(x*(x*(x*(x*((97750271.0L/152373312000.0L)*x - 936082391.0L/488864376000.0L) + 16516831.0L/8821612800.0L) - 431798291.0L/733296564000.0L) + 514639.0L/205837632000.0L) - 1.0L/9237800.0L) - 1.0L/1847560.0L; + poly_val[21] = pow(x, 3)*(x*(x*(-51185699.0L/1676106432000.0L*x + 59441219.0L/651819168000.0L) - 69813539.0L/782183001600.0L) + 82302659.0L/2933186256000.0L); + break; + case 2: + poly_val[0] = x*(x*(x*(x*((51185699.0L/279351072000.0L)*x - 4090369.0L/8888443200.0L) + 35733059.0L/97772875200.0L) - 1072961.0L/12864852000.0L) - 514639.0L/102918816000.0L) - 1.0L/9237800.0L; + poly_val[1] = x*(x*(x*(x*(-97750271.0L/25395552000.0L*x + 3782441303.0L/391091500800.0L) - 237253277.0L/30875644800.0L) + 18781351.0L/10744272000.0L) + 11419.0L/102918816.0L) + 10.0L/3741309.0L; + poly_val[2] = x*(x*(x*(x*((1593675.0L/41385344.0L)*x - 378616715.0L/3910915008.0L) + 451440845.0L/5866372512.0L) - 1311199.0L/75209904.0L) - 487121.0L/411675264.0L) - 5.0L/155584.0L; + poly_val[3] = x*(x*(x*(x*(-14351957.0L/58810752.0L*x + 28081565.0L/45741696.0L) - 100500265.0L/205837632.0L) + 22641695.0L/205837632.0L) + 5663.0L/700128.0L) + 30.0L/119119.0L; + poly_val[4] = x*(x*(x*(x*((129283295.0L/117621504.0L)*x - 51784115.0L/18712512.0L) + 453259255.0L/205837632.0L) - 50656289.0L/102918816.0L) - 1933049.0L/48432384.0L) - 5.0L/3432.0L; + poly_val[5] = x*(x*(x*(x*(-294222911.0L/78624000.0L*x + 346045183.0L/36691200.0L) - 4545171157.0L/605404800.0L) + 5019772789.0L/3027024000.0L) + 9587629.0L/63063000.0L) + 24.0L/3575.0L; + poly_val[6] = x*(x*(x*(x*((720747847.0L/72072000.0L)*x - 636560369.0L/25225200.0L) + 24128711.0L/1201200.0L) - 21109141.0L/4851000.0L) - 1888949.0L/4036032.0L) - 15.0L/572.0L; + poly_val[7] = x*(x*(x*(x*(-37195415.0L/1729728.0L*x + 657891265.0L/12108096.0L) - 261417515.0L/6054048.0L) + 54623759.0L/6054048.0L) + 1827209.0L/1513512.0L) + 40.0L/429.0L; + poly_val[8] = x*(x*(x*(x*((130810913.0L/3459456.0L)*x - 21430155.0L/224224.0L) + 456427805.0L/6054048.0L) - 6286355.0L/432432.0L) - 1650809.0L/620928.0L) - 15.0L/44.0L; + poly_val[9] = x*(x*(x*(x*(-1327945.0L/24192.0L*x + 257860165.0L/1862784.0L) - 100143815.0L/931392.0L) + 17535659.0L/931392.0L) + 698249.0L/232848.0L) + 20.0L/11.0L; + poly_val[10] = pow(x, 2)*(x*(x*((10991207.0L/166320.0L)*x - 2859155.0L/17248.0L) + 44163305.0L/349272.0L) - 1271555.0L/58212.0L) - 1968329.0L/635040.0L; + poly_val[11] = x*(x*(x*(x*(-10991207.0L/166320.0L*x + 6970883.0L/42336.0L) - 43387097.0L/349272.0L) + 1841911.0L/77616.0L) - 698249.0L/232848.0L) + 20.0L/11.0L; + poly_val[12] = x*(x*(x*(x*((1327945.0L/24192.0L)*x - 21116555.0L/155232.0L) + 47841155.0L/465696.0L) - 5143529.0L/232848.0L) + 1650809.0L/620928.0L) - 15.0L/44.0L; + poly_val[13] = x*(x*(x*(x*(-130810913.0L/3459456.0L*x + 2263925215.0L/24216192.0L) - 123189155.0L/1729728.0L) + 197560625.0L/12108096.0L) - 1827209.0L/1513512.0L) + 40.0L/429.0L; + poly_val[14] = x*(x*(x*(x*((37195415.0L/1729728.0L)*x - 53662355.0L/1009008.0L) + 123737255.0L/3027024.0L) - 1111433.0L/116424.0L) + 1888949.0L/4036032.0L) - 15.0L/572.0L; + poly_val[15] = x*(x*(x*(x*(-720747847.0L/72072000.0L*x + 227181223.0L/9172800.0L) - 107350871.0L/5605600.0L) + 12483133.0L/2772000.0L) - 9587629.0L/63063000.0L) + 24.0L/3575.0L; + poly_val[16] = x*(x*(x*(x*((294222911.0L/78624000.0L)*x - 255356207.0L/27518400.0L) + 2180676613.0L/302702400.0L) - 1285312429.0L/756756000.0L) + 1933049.0L/48432384.0L) - 5.0L/3432.0L; + poly_val[17] = x*(x*(x*(x*(-129283295.0L/117621504.0L*x + 249601585.0L/91483392.0L) - 874431715.0L/411675264.0L) + 206342519.0L/411675264.0L) - 5663.0L/700128.0L) + 30.0L/119119.0L; + poly_val[18] = x*(x*(x*(x*((14351957.0L/58810752.0L)*x - 124792205.0L/205837632.0L) + 48675295.0L/102918816.0L) - 5743835.0L/51459408.0L) + 487121.0L/411675264.0L) - 5.0L/155584.0L; + poly_val[19] = x*(x*(x*(x*(-1593675.0L/41385344.0L*x + 748789445.0L/7821830016.0L) - 877549735.0L/11732745024.0L) + 31049.0L/1759296.0L) - 11419.0L/102918816.0L) + 10.0L/3741309.0L; + poly_val[20] = x*(x*(x*(x*((97750271.0L/25395552000.0L)*x - 936082391.0L/97772875200.0L) + 16516831.0L/2205403200.0L) - 431798291.0L/244432188000.0L) + 514639.0L/102918816000.0L) - 1.0L/9237800.0L; + poly_val[21] = pow(x, 2)*(x*(x*(-51185699.0L/279351072000.0L*x + 59441219.0L/130363833600.0L) - 69813539.0L/195545750400.0L) + 82302659.0L/977728752000.0L); + break; + } +} +void beta_n10_m4(int deriv, double x, double *poly_val) { + switch(deriv) + { + case 0: + poly_val[0] = x*(x*(x*(x*(x*(x*(x*(x*(-33637859.0L/2394437760000.0L*x + 1483649773.0L/23465490048000.0L) - 316964689.0L/2933186256000.0L) + 324820967.0L/3910915008000.0L) - 1736957.0L/72648576000.0L) + 514639.0L/6175128960000.0L) - 514639.0L/617512896000.0L) - 1.0L/18475600.0L) + 1.0L/1847560.0L); + poly_val[1] = x*(x*(x*(x*(x*(x*(x*(x*((311524262719.0L/1055947052160000.0L)*x - 701042962783.0L/527973526080000.0L) + 74885259587.0L/32998345380000.0L) - 70831176221.0L/40613348160000.0L) + 75682379687.0L/150849578880000.0L) - 11419.0L/5557616064.0L) + 11419.0L/617512896.0L) + 5.0L/3741309.0L) - 5.0L/415701.0L); + poly_val[2] = x*(x*(x*(x*(x*(x*(x*(x*(-9968948243.0L/3379030566912.0L*x + 44868219719.0L/3379030566912.0L) - 12780882041.0L/563171761152.0L) + 29463096031.0L/1689515283456.0L) - 16937158895.0L/3379030566912.0L) + 487121.0L/19760412672.0L) - 487121.0L/2470051584.0L) - 5.0L/311168.0L) + 5.0L/38896.0L); + poly_val[3] = x*(x*(x*(x*(x*(x*(x*(x*((4795181.0L/256628736.0L)*x - 31958747.0L/380007936.0L) + 473385725.0L/3293402112.0L) - 41964919.0L/380007936.0L) + 6885539.0L/217147392.0L) - 809.0L/4200768.0L) + 5663.0L/4200768.0L) + 15.0L/119119.0L) - 15.0L/17017.0L); + poly_val[4] = x*(x*(x*(x*(x*(x*(x*(x*(-356053741.0L/4234374144.0L*x + 11218117517.0L/29640619008.0L) - 684750301.0L/1058593536.0L) + 7363490045.0L/14820309504.0L) - 4223360627.0L/29640619008.0L) + 1933049.0L/1743565824.0L) - 1933049.0L/290594304.0L) - 5.0L/6864.0L) + 5.0L/1144.0L); + poly_val[5] = x*(x*(x*(x*(x*(x*(x*(x*((311561305079.0L/1089728640000.0L)*x - 140236087829.0L/108972864000.0L) + 39945615029.0L/18162144000.0L) - 184035443699.0L/108972864000.0L) + 105391706569.0L/217945728000.0L) - 9587629.0L/1891890000.0L) + 9587629.0L/378378000.0L) + 12.0L/3575.0L) - 12.0L/715.0L); + poly_val[6] = x*(x*(x*(x*(x*(x*(x*(x*(-46160205953.0L/60540480000.0L*x + 5327562143.0L/1552320000.0L) - 3623292631.0L/617760000.0L) + 136260296441.0L/30270240000.0L) - 25964084431.0L/20180160000.0L) + 1888949.0L/96864768.0L) - 1888949.0L/24216192.0L) - 15.0L/1144.0L) + 15.0L/286.0L); + poly_val[7] = x*(x*(x*(x*(x*(x*(x*(x*((18499877.0L/11321856.0L)*x - 801489739.0L/108972864.0L) + 782601527.0L/62270208.0L) - 262586183.0L/27243216.0L) + 2398107605.0L/871782912.0L) - 1827209.0L/27243216.0L) + 1827209.0L/9081072.0L) + 20.0L/429.0L) - 20.0L/143.0L); + poly_val[8] = x*(x*(x*(x*(x*(x*(x*(x*(-2493086731.0L/871782912.0L*x + 11221599217.0L/871782912.0L) - 399388835.0L/18162144.0L) + 7345256297.0L/435891456.0L) - 46195777.0L/9580032.0L) + 1650809.0L/7451136.0L) - 1650809.0L/3725568.0L) - 15.0L/88.0L) + 15.0L/44.0L); + poly_val[9] = x*(x*(x*(x*(x*(x*(x*(x*((92343961.0L/22353408.0L)*x - 207809573.0L/11176704.0L) + 88727773.0L/2794176.0L) - 90615265.0L/3725568.0L) + 157144591.0L/22353408.0L) - 698249.0L/1397088.0L) + 698249.0L/1397088.0L) + 10.0L/11.0L) - 10.0L/11.0L); + poly_val[10] = pow(x, 2)*(pow(x, 2)*(x*(x*(x*(x*(-178100340673.0L/35925120000.0L*x + 1122102226711.0L/50295168000.0L) - 957933723511.0L/25147584000.0L) + 733946977711.0L/25147584000.0L) - 61475136673.0L/7185024000.0L) + 307869749.0L/466560000.0L) - 1968329.0L/1270080.0L) + 1; + poly_val[11] = x*(x*(x*(x*(x*(x*(x*(x*((178100340673.0L/35925120000.0L)*x - 1402452582211.0L/62868960000.0L) + 228012714673.0L/5987520000.0L) - 1836144861211.0L/62868960000.0L) + 2176770052711.0L/251475840000.0L) - 698249.0L/1397088.0L) - 698249.0L/1397088.0L) + 10.0L/11.0L) + 10.0L/11.0L); + poly_val[12] = x*(x*(x*(x*(x*(x*(x*(x*(-92343961.0L/22353408.0L*x + 415476503.0L/22353408.0L) - 59104301.0L/1862784.0L) + 272310167.0L/11176704.0L) - 161927825.0L/22353408.0L) + 1650809.0L/7451136.0L) + 1650809.0L/3725568.0L) - 15.0L/88.0L) - 15.0L/44.0L); + poly_val[13] = x*(x*(x*(x*(x*(x*(x*(x*((2493086731.0L/871782912.0L)*x - 5608090681.0L/435891456.0L) + 683892595.0L/31135104.0L) - 7359321647.0L/435891456.0L) + 4364057777.0L/871782912.0L) - 1827209.0L/27243216.0L) - 1827209.0L/9081072.0L) + 20.0L/429.0L) + 20.0L/143.0L); + poly_val[14] = x*(x*(x*(x*(x*(x*(x*(x*(-18499877.0L/11321856.0L*x + 6408496849.0L/871782912.0L) - 1823789521.0L/145297152.0L) + 323680565.0L/33530112.0L) - 2484812989.0L/871782912.0L) + 1888949.0L/96864768.0L) + 1888949.0L/24216192.0L) - 15.0L/1144.0L) - 15.0L/286.0L); + poly_val[15] = x*(x*(x*(x*(x*(x*(x*(x*((46160205953.0L/60540480000.0L)*x - 6922231.0L/2018016.0L) + 35465070353.0L/6054048000.0L) - 6820439797.0L/1513512000.0L) + 254905271.0L/192192000.0L) - 9587629.0L/1891890000.0L) - 9587629.0L/378378000.0L) + 12.0L/3575.0L) + 12.0L/715.0L); + poly_val[16] = x*(x*(x*(x*(x*(x*(x*(x*(-311561305079.0L/1089728640000.0L*x + 107822374417.0L/83825280000.0L) - 299257107283.0L/136216080000.0L) + 70840342681.0L/41912640000.0L) - 41601700243.0L/83825280000.0L) + 1933049.0L/1743565824.0L) + 1933049.0L/290594304.0L) - 5.0L/6864.0L) - 5.0L/1144.0L); + poly_val[17] = x*(x*(x*(x*(x*(x*(x*(x*((356053741.0L/4234374144.0L)*x - 5606634083.0L/14820309504.0L) + 399033563.0L/617512896.0L) - 7368194069.0L/14820309504.0L) + 5185445.0L/35582976.0L) - 809.0L/4200768.0L) - 5663.0L/4200768.0L) + 15.0L/119119.0L) + 15.0L/17017.0L); + poly_val[18] = x*(x*(x*(x*(x*(x*(x*(x*(-4795181.0L/256628736.0L*x + 1661205589.0L/19760412672.0L) - 202694095.0L/1411458048.0L) + 40431847.0L/365933568.0L) - 639105469.0L/19760412672.0L) + 487121.0L/19760412672.0L) + 487121.0L/2470051584.0L) - 5.0L/311168.0L) - 5.0L/38896.0L); + poly_val[19] = x*(x*(x*(x*(x*(x*(x*(x*((9968948243.0L/3379030566912.0L)*x - 862544509.0L/64981357056.0L) + 38310835621.0L/1689515283456.0L) - 14738088485.0L/844757641728.0L) + 907279897.0L/177843714048.0L) - 11419.0L/5557616064.0L) - 11419.0L/617512896.0L) + 5.0L/3741309.0L) + 5.0L/415701.0L); + poly_val[20] = x*(x*(x*(x*(x*(x*(x*(x*(-311524262719.0L/1055947052160000.0L*x + 280326487781.0L/211189410432000.0L) - 5701224667.0L/2514159648000.0L) + 184231971829.0L/105594705216000.0L) - 107650576139.0L/211189410432000.0L) + 514639.0L/6175128960000.0L) + 514639.0L/617512896000.0L) - 1.0L/18475600.0L) - 1.0L/1847560.0L); + poly_val[21] = pow(x, 5)*(x*(x*(x*((33637859.0L/2394437760000.0L)*x - 3708023477.0L/58663725120000.0L) + 39104259.0L/362121760000.0L) - 4873971667.0L/58663725120000.0L) + 218919637.0L/9025188480000.0L); + break; + case 1: + poly_val[0] = x*(x*(x*(x*(x*(x*(x*(-33637859.0L/266048640000.0L*x + 1483649773.0L/2933186256000.0L) - 316964689.0L/419026608000.0L) + 324820967.0L/651819168000.0L) - 1736957.0L/14529715200.0L) + 514639.0L/1543782240000.0L) - 514639.0L/205837632000.0L) - 1.0L/9237800.0L) + 1.0L/1847560.0L; + poly_val[1] = x*(x*(x*(x*(x*(x*(x*((311524262719.0L/117327450240000.0L)*x - 701042962783.0L/65996690760000.0L) + 74885259587.0L/4714049340000.0L) - 70831176221.0L/6768891360000.0L) + 75682379687.0L/30169915776000.0L) - 11419.0L/1389404016.0L) + 11419.0L/205837632.0L) + 10.0L/3741309.0L) - 5.0L/415701.0L; + poly_val[2] = x*(x*(x*(x*(x*(x*(x*(-9968948243.0L/375447840768.0L*x + 44868219719.0L/422378820864.0L) - 12780882041.0L/80453108736.0L) + 29463096031.0L/281585880576.0L) - 84685794475.0L/3379030566912.0L) + 487121.0L/4940103168.0L) - 487121.0L/823350528.0L) - 5.0L/155584.0L) + 5.0L/38896.0L; + poly_val[3] = x*(x*(x*(x*(x*(x*(x*((4795181.0L/28514304.0L)*x - 31958747.0L/47500992.0L) + 473385725.0L/470486016.0L) - 41964919.0L/63334656.0L) + 34427695.0L/217147392.0L) - 809.0L/1050192.0L) + 5663.0L/1400256.0L) + 30.0L/119119.0L) - 15.0L/17017.0L; + poly_val[4] = x*(x*(x*(x*(x*(x*(x*(-356053741.0L/470486016.0L*x + 11218117517.0L/3705077376.0L) - 684750301.0L/151227648.0L) + 7363490045.0L/2470051584.0L) - 21116803135.0L/29640619008.0L) + 1933049.0L/435891456.0L) - 1933049.0L/96864768.0L) - 5.0L/3432.0L) + 5.0L/1144.0L; + poly_val[5] = x*(x*(x*(x*(x*(x*(x*((311561305079.0L/121080960000.0L)*x - 140236087829.0L/13621608000.0L) + 39945615029.0L/2594592000.0L) - 184035443699.0L/18162144000.0L) + 105391706569.0L/43589145600.0L) - 9587629.0L/472972500.0L) + 9587629.0L/126126000.0L) + 24.0L/3575.0L) - 12.0L/715.0L; + poly_val[6] = x*(x*(x*(x*(x*(x*(x*(-46160205953.0L/6726720000.0L*x + 5327562143.0L/194040000.0L) - 25363048417.0L/617760000.0L) + 136260296441.0L/5045040000.0L) - 25964084431.0L/4036032000.0L) + 1888949.0L/24216192.0L) - 1888949.0L/8072064.0L) - 15.0L/572.0L) + 15.0L/286.0L; + poly_val[7] = x*(x*(x*(x*(x*(x*(x*((18499877.0L/1257984.0L)*x - 801489739.0L/13621608.0L) + 782601527.0L/8895744.0L) - 262586183.0L/4540536.0L) + 11990538025.0L/871782912.0L) - 1827209.0L/6810804.0L) + 1827209.0L/3027024.0L) + 40.0L/429.0L) - 20.0L/143.0L; + poly_val[8] = x*(x*(x*(x*(x*(x*(x*(-2493086731.0L/96864768.0L*x + 11221599217.0L/108972864.0L) - 399388835.0L/2594592.0L) + 7345256297.0L/72648576.0L) - 230978885.0L/9580032.0L) + 1650809.0L/1862784.0L) - 1650809.0L/1241856.0L) - 15.0L/44.0L) + 15.0L/44.0L; + poly_val[9] = x*(x*(x*(x*(x*(x*(x*((92343961.0L/2483712.0L)*x - 207809573.0L/1397088.0L) + 88727773.0L/399168.0L) - 90615265.0L/620928.0L) + 785722955.0L/22353408.0L) - 698249.0L/349272.0L) + 698249.0L/465696.0L) + 20.0L/11.0L) - 10.0L/11.0L; + poly_val[10] = x*(pow(x, 2)*(x*(x*(x*(x*(-178100340673.0L/3991680000.0L*x + 1122102226711.0L/6286896000.0L) - 957933723511.0L/3592512000.0L) + 733946977711.0L/4191264000.0L) - 61475136673.0L/1437004800.0L) + 307869749.0L/116640000.0L) - 1968329.0L/635040.0L); + poly_val[11] = x*(x*(x*(x*(x*(x*(x*((178100340673.0L/3991680000.0L)*x - 1402452582211.0L/7858620000.0L) + 228012714673.0L/855360000.0L) - 1836144861211.0L/10478160000.0L) + 2176770052711.0L/50295168000.0L) - 698249.0L/349272.0L) - 698249.0L/465696.0L) + 20.0L/11.0L) + 10.0L/11.0L; + poly_val[12] = x*(x*(x*(x*(x*(x*(x*(-92343961.0L/2483712.0L*x + 415476503.0L/2794176.0L) - 59104301.0L/266112.0L) + 272310167.0L/1862784.0L) - 809639125.0L/22353408.0L) + 1650809.0L/1862784.0L) + 1650809.0L/1241856.0L) - 15.0L/44.0L) - 15.0L/44.0L; + poly_val[13] = x*(x*(x*(x*(x*(x*(x*((2493086731.0L/96864768.0L)*x - 5608090681.0L/54486432.0L) + 683892595.0L/4447872.0L) - 7359321647.0L/72648576.0L) + 21820288885.0L/871782912.0L) - 1827209.0L/6810804.0L) - 1827209.0L/3027024.0L) + 40.0L/429.0L) + 20.0L/143.0L; + poly_val[14] = x*(x*(x*(x*(x*(x*(x*(-18499877.0L/1257984.0L*x + 6408496849.0L/108972864.0L) - 1823789521.0L/20756736.0L) + 323680565.0L/5588352.0L) - 12424064945.0L/871782912.0L) + 1888949.0L/24216192.0L) + 1888949.0L/8072064.0L) - 15.0L/572.0L) - 15.0L/286.0L; + poly_val[15] = x*(x*(x*(x*(x*(x*(x*((46160205953.0L/6726720000.0L)*x - 6922231.0L/252252.0L) + 35465070353.0L/864864000.0L) - 6820439797.0L/252252000.0L) + 254905271.0L/38438400.0L) - 9587629.0L/472972500.0L) - 9587629.0L/126126000.0L) + 24.0L/3575.0L) + 12.0L/715.0L; + poly_val[16] = x*(x*(x*(x*(x*(x*(x*(-311561305079.0L/121080960000.0L*x + 107822374417.0L/10478160000.0L) - 299257107283.0L/19459440000.0L) + 70840342681.0L/6985440000.0L) - 41601700243.0L/16765056000.0L) + 1933049.0L/435891456.0L) + 1933049.0L/96864768.0L) - 5.0L/3432.0L) - 5.0L/1144.0L; + poly_val[17] = x*(x*(x*(x*(x*(x*(x*((356053741.0L/470486016.0L)*x - 5606634083.0L/1852538688.0L) + 399033563.0L/88216128.0L) - 7368194069.0L/2470051584.0L) + 25927225.0L/35582976.0L) - 809.0L/1050192.0L) - 5663.0L/1400256.0L) + 30.0L/119119.0L) + 15.0L/17017.0L; + poly_val[18] = x*(x*(x*(x*(x*(x*(x*(-4795181.0L/28514304.0L*x + 1661205589.0L/2470051584.0L) - 202694095.0L/201636864.0L) + 40431847.0L/60988928.0L) - 3195527345.0L/19760412672.0L) + 487121.0L/4940103168.0L) + 487121.0L/823350528.0L) - 5.0L/155584.0L) - 5.0L/38896.0L; + poly_val[19] = x*(x*(x*(x*(x*(x*(x*((9968948243.0L/375447840768.0L)*x - 862544509.0L/8122669632.0L) + 38310835621.0L/241359326208.0L) - 14738088485.0L/140792940288.0L) + 4536399485.0L/177843714048.0L) - 11419.0L/1389404016.0L) - 11419.0L/205837632.0L) + 10.0L/3741309.0L) + 5.0L/415701.0L; + poly_val[20] = x*(x*(x*(x*(x*(x*(x*(-311524262719.0L/117327450240000.0L*x + 280326487781.0L/26398676304000.0L) - 5701224667.0L/359165664000.0L) + 184231971829.0L/17599117536000.0L) - 107650576139.0L/42237882086400.0L) + 514639.0L/1543782240000.0L) + 514639.0L/205837632000.0L) - 1.0L/9237800.0L) - 1.0L/1847560.0L; + poly_val[21] = pow(x, 4)*(x*(x*(x*((33637859.0L/266048640000.0L)*x - 3708023477.0L/7332965640000.0L) + 39104259.0L/51731680000.0L) - 4873971667.0L/9777287520000.0L) + 218919637.0L/1805037696000.0L); + break; + case 2: + poly_val[0] = x*(x*(x*(x*(x*(x*(-33637859.0L/33256080000.0L*x + 1483649773.0L/419026608000.0L) - 316964689.0L/69837768000.0L) + 324820967.0L/130363833600.0L) - 1736957.0L/3632428800.0L) + 514639.0L/514594080000.0L) - 514639.0L/102918816000.0L) - 1.0L/9237800.0L; + poly_val[1] = x*(x*(x*(x*(x*(x*((311524262719.0L/14665931280000.0L)*x - 701042962783.0L/9428098680000.0L) + 74885259587.0L/785674890000.0L) - 70831176221.0L/1353778272000.0L) + 75682379687.0L/7542478944000.0L) - 11419.0L/463134672.0L) + 11419.0L/102918816.0L) + 10.0L/3741309.0L; + poly_val[2] = x*(x*(x*(x*(x*(x*(-9968948243.0L/46930980096.0L*x + 44868219719.0L/60339831552.0L) - 12780882041.0L/13408851456.0L) + 147315480155.0L/281585880576.0L) - 84685794475.0L/844757641728.0L) + 487121.0L/1646701056.0L) - 487121.0L/411675264.0L) - 5.0L/155584.0L; + poly_val[3] = x*(x*(x*(x*(x*(x*((4795181.0L/3564288.0L)*x - 31958747.0L/6785856.0L) + 473385725.0L/78414336.0L) - 209824595.0L/63334656.0L) + 34427695.0L/54286848.0L) - 809.0L/350064.0L) + 5663.0L/700128.0L) + 30.0L/119119.0L; + poly_val[4] = x*(x*(x*(x*(x*(x*(-356053741.0L/58810752.0L*x + 11218117517.0L/529296768.0L) - 684750301.0L/25204608.0L) + 36817450225.0L/2470051584.0L) - 21116803135.0L/7410154752.0L) + 1933049.0L/145297152.0L) - 1933049.0L/48432384.0L) - 5.0L/3432.0L; + poly_val[5] = x*(x*(x*(x*(x*(x*((311561305079.0L/15135120000.0L)*x - 140236087829.0L/1945944000.0L) + 39945615029.0L/432432000.0L) - 184035443699.0L/3632428800.0L) + 105391706569.0L/10897286400.0L) - 9587629.0L/157657500.0L) + 9587629.0L/63063000.0L) + 24.0L/3575.0L; + poly_val[6] = x*(x*(x*(x*(x*(x*(-46160205953.0L/840840000.0L*x + 5327562143.0L/27720000.0L) - 25363048417.0L/102960000.0L) + 136260296441.0L/1009008000.0L) - 25964084431.0L/1009008000.0L) + 1888949.0L/8072064.0L) - 1888949.0L/4036032.0L) - 15.0L/572.0L; + poly_val[7] = x*(x*(x*(x*(x*(x*((18499877.0L/157248.0L)*x - 801489739.0L/1945944.0L) + 782601527.0L/1482624.0L) - 1312930915.0L/4540536.0L) + 11990538025.0L/217945728.0L) - 1827209.0L/2270268.0L) + 1827209.0L/1513512.0L) + 40.0L/429.0L; + poly_val[8] = x*(x*(x*(x*(x*(x*(-2493086731.0L/12108096.0L*x + 11221599217.0L/15567552.0L) - 399388835.0L/432432.0L) + 36726281485.0L/72648576.0L) - 230978885.0L/2395008.0L) + 1650809.0L/620928.0L) - 1650809.0L/620928.0L) - 15.0L/44.0L; + poly_val[9] = x*(x*(x*(x*(x*(x*((92343961.0L/310464.0L)*x - 207809573.0L/199584.0L) + 88727773.0L/66528.0L) - 453076325.0L/620928.0L) + 785722955.0L/5588352.0L) - 698249.0L/116424.0L) + 698249.0L/232848.0L) + 20.0L/11.0L; + poly_val[10] = pow(x, 2)*(x*(x*(x*(x*(-178100340673.0L/498960000.0L*x + 1122102226711.0L/898128000.0L) - 957933723511.0L/598752000.0L) + 733946977711.0L/838252800.0L) - 61475136673.0L/359251200.0L) + 307869749.0L/38880000.0L) - 1968329.0L/635040.0L; + poly_val[11] = x*(x*(x*(x*(x*(x*((178100340673.0L/498960000.0L)*x - 1402452582211.0L/1122660000.0L) + 228012714673.0L/142560000.0L) - 1836144861211.0L/2095632000.0L) + 2176770052711.0L/12573792000.0L) - 698249.0L/116424.0L) - 698249.0L/232848.0L) + 20.0L/11.0L; + poly_val[12] = x*(x*(x*(x*(x*(x*(-92343961.0L/310464.0L*x + 415476503.0L/399168.0L) - 59104301.0L/44352.0L) + 1361550835.0L/1862784.0L) - 809639125.0L/5588352.0L) + 1650809.0L/620928.0L) + 1650809.0L/620928.0L) - 15.0L/44.0L; + poly_val[13] = x*(x*(x*(x*(x*(x*((2493086731.0L/12108096.0L)*x - 5608090681.0L/7783776.0L) + 683892595.0L/741312.0L) - 36796608235.0L/72648576.0L) + 21820288885.0L/217945728.0L) - 1827209.0L/2270268.0L) - 1827209.0L/1513512.0L) + 40.0L/429.0L; + poly_val[14] = x*(x*(x*(x*(x*(x*(-18499877.0L/157248.0L*x + 6408496849.0L/15567552.0L) - 1823789521.0L/3459456.0L) + 1618402825.0L/5588352.0L) - 12424064945.0L/217945728.0L) + 1888949.0L/8072064.0L) + 1888949.0L/4036032.0L) - 15.0L/572.0L; + poly_val[15] = x*(x*(x*(x*(x*(x*((46160205953.0L/840840000.0L)*x - 6922231.0L/36036.0L) + 35465070353.0L/144144000.0L) - 6820439797.0L/50450400.0L) + 254905271.0L/9609600.0L) - 9587629.0L/157657500.0L) - 9587629.0L/63063000.0L) + 24.0L/3575.0L; + poly_val[16] = x*(x*(x*(x*(x*(x*(-311561305079.0L/15135120000.0L*x + 107822374417.0L/1496880000.0L) - 299257107283.0L/3243240000.0L) + 70840342681.0L/1397088000.0L) - 41601700243.0L/4191264000.0L) + 1933049.0L/145297152.0L) + 1933049.0L/48432384.0L) - 5.0L/3432.0L; + poly_val[17] = x*(x*(x*(x*(x*(x*((356053741.0L/58810752.0L)*x - 5606634083.0L/264648384.0L) + 399033563.0L/14702688.0L) - 36840970345.0L/2470051584.0L) + 25927225.0L/8895744.0L) - 809.0L/350064.0L) - 5663.0L/700128.0L) + 30.0L/119119.0L; + poly_val[18] = x*(x*(x*(x*(x*(x*(-4795181.0L/3564288.0L*x + 1661205589.0L/352864512.0L) - 202694095.0L/33606144.0L) + 202159235.0L/60988928.0L) - 3195527345.0L/4940103168.0L) + 487121.0L/1646701056.0L) + 487121.0L/411675264.0L) - 5.0L/155584.0L; + poly_val[19] = x*(x*(x*(x*(x*(x*((9968948243.0L/46930980096.0L)*x - 862544509.0L/1160381376.0L) + 38310835621.0L/40226554368.0L) - 73690442425.0L/140792940288.0L) + 4536399485.0L/44460928512.0L) - 11419.0L/463134672.0L) - 11419.0L/102918816.0L) + 10.0L/3741309.0L; + poly_val[20] = x*(x*(x*(x*(x*(x*(-311524262719.0L/14665931280000.0L*x + 280326487781.0L/3771239472000.0L) - 5701224667.0L/59860944000.0L) + 184231971829.0L/3519823507200.0L) - 107650576139.0L/10559470521600.0L) + 514639.0L/514594080000.0L) + 514639.0L/102918816000.0L) - 1.0L/9237800.0L; + poly_val[21] = pow(x, 3)*(x*(x*(x*((33637859.0L/33256080000.0L)*x - 3708023477.0L/1047566520000.0L) + 117312777.0L/25865840000.0L) - 4873971667.0L/1955457504000.0L) + 218919637.0L/451259424000.0L); + break; + } +} diff --git a/bfps/cpp/io_tools.hpp b/bfps/cpp/spline_n10.hpp similarity index 78% rename from bfps/cpp/io_tools.hpp rename to bfps/cpp/spline_n10.hpp index 69c0e8bb6ce723f5497a9a913411e2f51af8592f..4853f1975e8759fbcaf4c499df02a5b3bc934a91 100644 --- a/bfps/cpp/io_tools.hpp +++ b/bfps/cpp/spline_n10.hpp @@ -24,18 +24,15 @@ -#include <hdf5.h> -#include <vector> -#include <string> +#ifndef SPLINE_N10 -#ifndef IO_TOOLS +#define SPLINE_N10 -#define IO_TOOLS +void beta_n10_m0(const int deriv, const double x, double *__restrict__ poly_val); +void beta_n10_m1(const int deriv, const double x, double *__restrict__ poly_val); +void beta_n10_m2(const int deriv, const double x, double *__restrict__ poly_val); +void beta_n10_m3(const int deriv, const double x, double *__restrict__ poly_val); +void beta_n10_m4(const int deriv, const double x, double *__restrict__ poly_val); -template <typename number> -std::vector<number> read_vector( - hid_t group, - std::string dset_name); - -#endif//IO_TOOLS +#endif//SPLINE_N10 diff --git a/bfps/cpp/spline_n7.cpp b/bfps/cpp/spline_n7.cpp new file mode 100644 index 0000000000000000000000000000000000000000..84a3fdbc99a96cd3f5a13501414cbaa0a908096d --- /dev/null +++ b/bfps/cpp/spline_n7.cpp @@ -0,0 +1,324 @@ +/********************************************************************** +* * +* Copyright 2015 Max Planck Institute * +* for Dynamics and Self-Organization * +* * +* This file is part of bfps. * +* * +* bfps is free software: you can redistribute it and/or modify * +* it under the terms of the GNU General Public License as published * +* by the Free Software Foundation, either version 3 of the License, * +* or (at your option) any later version. * +* * +* bfps is distributed in the hope that it will be useful, * +* but WITHOUT ANY WARRANTY; without even the implied warranty of * +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * +* GNU General Public License for more details. * +* * +* You should have received a copy of the GNU General Public License * +* along with bfps. If not, see <http://www.gnu.org/licenses/> * +* * +* Contact: Cristian.Lalescu@ds.mpg.de * +* * +**********************************************************************/ + + + +#include "spline_n7.hpp" +#include <cmath> + +void beta_n7_m0(int deriv, double x, double *poly_val) { + switch(deriv) + { + case 0: + poly_val[0] = 0; + poly_val[1] = 0; + poly_val[2] = 0; + poly_val[3] = 0; + poly_val[4] = 0; + poly_val[5] = 0; + poly_val[6] = 0; + poly_val[7] = -x + 1; + poly_val[8] = x; + poly_val[9] = 0; + poly_val[10] = 0; + poly_val[11] = 0; + poly_val[12] = 0; + poly_val[13] = 0; + poly_val[14] = 0; + poly_val[15] = 0; + break; + case 1: + poly_val[0] = 0; + poly_val[1] = 0; + poly_val[2] = 0; + poly_val[3] = 0; + poly_val[4] = 0; + poly_val[5] = 0; + poly_val[6] = 0; + poly_val[7] = -1; + poly_val[8] = 1; + poly_val[9] = 0; + poly_val[10] = 0; + poly_val[11] = 0; + poly_val[12] = 0; + poly_val[13] = 0; + poly_val[14] = 0; + poly_val[15] = 0; + break; + case 2: + poly_val[0] = 0; + poly_val[1] = 0; + poly_val[2] = 0; + poly_val[3] = 0; + poly_val[4] = 0; + poly_val[5] = 0; + poly_val[6] = 0; + poly_val[7] = 0; + poly_val[8] = 0; + poly_val[9] = 0; + poly_val[10] = 0; + poly_val[11] = 0; + poly_val[12] = 0; + poly_val[13] = 0; + poly_val[14] = 0; + poly_val[15] = 0; + break; + } +} +void beta_n7_m1(int deriv, double x, double *poly_val) { + switch(deriv) + { + case 0: + poly_val[0] = x*(x*(-1.0L/24024.0L*x + 1.0L/12012.0L) - 1.0L/24024.0L); + poly_val[1] = x*(x*((23.0L/36036.0L)*x - 95.0L/72072.0L) + 7.0L/10296.0L); + poly_val[2] = x*(x*(-119.0L/25740.0L*x + 511.0L/51480.0L) - 7.0L/1320.0L); + poly_val[3] = x*(x*((7.0L/330.0L)*x - 21.0L/440.0L) + 7.0L/264.0L); + poly_val[4] = x*(x*(-7.0L/99.0L*x + 133.0L/792.0L) - 7.0L/72.0L); + poly_val[5] = x*(x*((7.0L/36.0L)*x - 35.0L/72.0L) + 7.0L/24.0L); + poly_val[6] = x*(x*(-7.0L/12.0L*x + 35.0L/24.0L) - 7.0L/8.0L); + poly_val[7] = pow(x, 2)*((9.0L/8.0L)*x - 17.0L/8.0L) + 1; + poly_val[8] = x*(x*(-9.0L/8.0L*x + 5.0L/4.0L) + 7.0L/8.0L); + poly_val[9] = x*(x*((7.0L/12.0L)*x - 7.0L/24.0L) - 7.0L/24.0L); + poly_val[10] = x*(x*(-7.0L/36.0L*x + 7.0L/72.0L) + 7.0L/72.0L); + poly_val[11] = x*(x*((7.0L/99.0L)*x - 35.0L/792.0L) - 7.0L/264.0L); + poly_val[12] = x*(x*(-7.0L/330.0L*x + 7.0L/440.0L) + 7.0L/1320.0L); + poly_val[13] = x*(x*((119.0L/25740.0L)*x - 203.0L/51480.0L) - 7.0L/10296.0L); + poly_val[14] = x*(x*(-23.0L/36036.0L*x + 43.0L/72072.0L) + 1.0L/24024.0L); + poly_val[15] = pow(x, 2)*((1.0L/24024.0L)*x - 1.0L/24024.0L); + break; + case 1: + poly_val[0] = x*(-1.0L/8008.0L*x + 1.0L/6006.0L) - 1.0L/24024.0L; + poly_val[1] = x*((23.0L/12012.0L)*x - 95.0L/36036.0L) + 7.0L/10296.0L; + poly_val[2] = x*(-119.0L/8580.0L*x + 511.0L/25740.0L) - 7.0L/1320.0L; + poly_val[3] = x*((7.0L/110.0L)*x - 21.0L/220.0L) + 7.0L/264.0L; + poly_val[4] = x*(-7.0L/33.0L*x + 133.0L/396.0L) - 7.0L/72.0L; + poly_val[5] = x*((7.0L/12.0L)*x - 35.0L/36.0L) + 7.0L/24.0L; + poly_val[6] = x*(-7.0L/4.0L*x + 35.0L/12.0L) - 7.0L/8.0L; + poly_val[7] = x*((27.0L/8.0L)*x - 17.0L/4.0L); + poly_val[8] = x*(-27.0L/8.0L*x + 5.0L/2.0L) + 7.0L/8.0L; + poly_val[9] = x*((7.0L/4.0L)*x - 7.0L/12.0L) - 7.0L/24.0L; + poly_val[10] = x*(-7.0L/12.0L*x + 7.0L/36.0L) + 7.0L/72.0L; + poly_val[11] = x*((7.0L/33.0L)*x - 35.0L/396.0L) - 7.0L/264.0L; + poly_val[12] = x*(-7.0L/110.0L*x + 7.0L/220.0L) + 7.0L/1320.0L; + poly_val[13] = x*((119.0L/8580.0L)*x - 203.0L/25740.0L) - 7.0L/10296.0L; + poly_val[14] = x*(-23.0L/12012.0L*x + 43.0L/36036.0L) + 1.0L/24024.0L; + poly_val[15] = x*((1.0L/8008.0L)*x - 1.0L/12012.0L); + break; + case 2: + poly_val[0] = -1.0L/4004.0L*x + 1.0L/6006.0L; + poly_val[1] = (23.0L/6006.0L)*x - 95.0L/36036.0L; + poly_val[2] = -119.0L/4290.0L*x + 511.0L/25740.0L; + poly_val[3] = (7.0L/55.0L)*x - 21.0L/220.0L; + poly_val[4] = -14.0L/33.0L*x + 133.0L/396.0L; + poly_val[5] = (7.0L/6.0L)*x - 35.0L/36.0L; + poly_val[6] = -7.0L/2.0L*x + 35.0L/12.0L; + poly_val[7] = (27.0L/4.0L)*x - 17.0L/4.0L; + poly_val[8] = -27.0L/4.0L*x + 5.0L/2.0L; + poly_val[9] = (7.0L/2.0L)*x - 7.0L/12.0L; + poly_val[10] = -7.0L/6.0L*x + 7.0L/36.0L; + poly_val[11] = (14.0L/33.0L)*x - 35.0L/396.0L; + poly_val[12] = -7.0L/55.0L*x + 7.0L/220.0L; + poly_val[13] = (119.0L/4290.0L)*x - 203.0L/25740.0L; + poly_val[14] = -23.0L/6006.0L*x + 43.0L/36036.0L; + poly_val[15] = (1.0L/4004.0L)*x - 1.0L/12012.0L; + break; + } +} +void beta_n7_m2(int deriv, double x, double *poly_val) { + switch(deriv) + { + case 0: + poly_val[0] = x*(x*(x*(x*((5.0L/42042.0L)*x - 53.0L/168168.0L) + 1.0L/4312.0L) + 1.0L/168168.0L) - 1.0L/24024.0L); + poly_val[1] = x*(x*(x*(x*(-5435.0L/3027024.0L*x + 1613.0L/336336.0L) - 3599.0L/1009008.0L) - 7.0L/61776.0L) + 7.0L/10296.0L); + poly_val[2] = x*(x*(x*(x*((19607.0L/1544400.0L)*x - 26453.0L/772200.0L) + 39851.0L/1544400.0L) + 7.0L/6600.0L) - 7.0L/1320.0L); + poly_val[3] = x*(x*(x*(x*(-1477.0L/26400.0L*x + 4039.0L/26400.0L) - 1029.0L/8800.0L) - 7.0L/1056.0L) + 7.0L/264.0L); + poly_val[4] = x*(x*(x*(x*((1645.0L/9504.0L)*x - 763.0L/1584.0L) + 1183.0L/3168.0L) + 7.0L/216.0L) - 7.0L/72.0L); + poly_val[5] = x*(x*(x*(x*(-175.0L/432.0L*x + 497.0L/432.0L) - 385.0L/432.0L) - 7.0L/48.0L) + 7.0L/24.0L); + poly_val[6] = x*(x*(x*(x*((35.0L/48.0L)*x - 49.0L/24.0L) + 21.0L/16.0L) + 7.0L/8.0L) - 7.0L/8.0L); + poly_val[7] = pow(x, 2)*(x*(x*(-174319.0L/176400.0L*x + 152269.0L/58800.0L) - 64069.0L/58800.0L) - 266681.0L/176400.0L) + 1; + poly_val[8] = x*(x*(x*(x*((174319.0L/176400.0L)*x - 103697.0L/44100.0L) + 108169.0L/176400.0L) + 7.0L/8.0L) + 7.0L/8.0L); + poly_val[9] = x*(x*(x*(x*(-35.0L/48.0L*x + 77.0L/48.0L) - 7.0L/16.0L) - 7.0L/48.0L) - 7.0L/24.0L); + poly_val[10] = x*(x*(x*(x*((175.0L/432.0L)*x - 7.0L/8.0L) + 49.0L/144.0L) + 7.0L/216.0L) + 7.0L/72.0L); + poly_val[11] = x*(x*(x*(x*(-1645.0L/9504.0L*x + 3647.0L/9504.0L) - 1687.0L/9504.0L) - 7.0L/1056.0L) - 7.0L/264.0L); + poly_val[12] = x*(x*(x*(x*((1477.0L/26400.0L)*x - 1673.0L/13200.0L) + 567.0L/8800.0L) + 7.0L/6600.0L) + 7.0L/1320.0L); + poly_val[13] = x*(x*(x*(x*(-19607.0L/1544400.0L*x + 15043.0L/514800.0L) - 623.0L/39600.0L) - 7.0L/61776.0L) - 7.0L/10296.0L); + poly_val[14] = x*(x*(x*(x*((5435.0L/3027024.0L)*x - 6329.0L/1513512.0L) + 7079.0L/3027024.0L) + 1.0L/168168.0L) + 1.0L/24024.0L); + poly_val[15] = pow(x, 3)*(x*(-5.0L/42042.0L*x + 47.0L/168168.0L) - 9.0L/56056.0L); + break; + case 1: + poly_val[0] = x*(x*(x*((25.0L/42042.0L)*x - 53.0L/42042.0L) + 3.0L/4312.0L) + 1.0L/84084.0L) - 1.0L/24024.0L; + poly_val[1] = x*(x*(x*(-27175.0L/3027024.0L*x + 1613.0L/84084.0L) - 3599.0L/336336.0L) - 7.0L/30888.0L) + 7.0L/10296.0L; + poly_val[2] = x*(x*(x*((19607.0L/308880.0L)*x - 26453.0L/193050.0L) + 39851.0L/514800.0L) + 7.0L/3300.0L) - 7.0L/1320.0L; + poly_val[3] = x*(x*(x*(-1477.0L/5280.0L*x + 4039.0L/6600.0L) - 3087.0L/8800.0L) - 7.0L/528.0L) + 7.0L/264.0L; + poly_val[4] = x*(x*(x*((8225.0L/9504.0L)*x - 763.0L/396.0L) + 1183.0L/1056.0L) + 7.0L/108.0L) - 7.0L/72.0L; + poly_val[5] = x*(x*(x*(-875.0L/432.0L*x + 497.0L/108.0L) - 385.0L/144.0L) - 7.0L/24.0L) + 7.0L/24.0L; + poly_val[6] = x*(x*(x*((175.0L/48.0L)*x - 49.0L/6.0L) + 63.0L/16.0L) + 7.0L/4.0L) - 7.0L/8.0L; + poly_val[7] = x*(x*(x*(-174319.0L/35280.0L*x + 152269.0L/14700.0L) - 64069.0L/19600.0L) - 266681.0L/88200.0L); + poly_val[8] = x*(x*(x*((174319.0L/35280.0L)*x - 103697.0L/11025.0L) + 108169.0L/58800.0L) + 7.0L/4.0L) + 7.0L/8.0L; + poly_val[9] = x*(x*(x*(-175.0L/48.0L*x + 77.0L/12.0L) - 21.0L/16.0L) - 7.0L/24.0L) - 7.0L/24.0L; + poly_val[10] = x*(x*(x*((875.0L/432.0L)*x - 7.0L/2.0L) + 49.0L/48.0L) + 7.0L/108.0L) + 7.0L/72.0L; + poly_val[11] = x*(x*(x*(-8225.0L/9504.0L*x + 3647.0L/2376.0L) - 1687.0L/3168.0L) - 7.0L/528.0L) - 7.0L/264.0L; + poly_val[12] = x*(x*(x*((1477.0L/5280.0L)*x - 1673.0L/3300.0L) + 1701.0L/8800.0L) + 7.0L/3300.0L) + 7.0L/1320.0L; + poly_val[13] = x*(x*(x*(-19607.0L/308880.0L*x + 15043.0L/128700.0L) - 623.0L/13200.0L) - 7.0L/30888.0L) - 7.0L/10296.0L; + poly_val[14] = x*(x*(x*((27175.0L/3027024.0L)*x - 6329.0L/378378.0L) + 7079.0L/1009008.0L) + 1.0L/84084.0L) + 1.0L/24024.0L; + poly_val[15] = pow(x, 2)*(x*(-25.0L/42042.0L*x + 47.0L/42042.0L) - 27.0L/56056.0L); + break; + case 2: + poly_val[0] = x*(x*((50.0L/21021.0L)*x - 53.0L/14014.0L) + 3.0L/2156.0L) + 1.0L/84084.0L; + poly_val[1] = x*(x*(-27175.0L/756756.0L*x + 1613.0L/28028.0L) - 3599.0L/168168.0L) - 7.0L/30888.0L; + poly_val[2] = x*(x*((19607.0L/77220.0L)*x - 26453.0L/64350.0L) + 39851.0L/257400.0L) + 7.0L/3300.0L; + poly_val[3] = x*(x*(-1477.0L/1320.0L*x + 4039.0L/2200.0L) - 3087.0L/4400.0L) - 7.0L/528.0L; + poly_val[4] = x*(x*((8225.0L/2376.0L)*x - 763.0L/132.0L) + 1183.0L/528.0L) + 7.0L/108.0L; + poly_val[5] = x*(x*(-875.0L/108.0L*x + 497.0L/36.0L) - 385.0L/72.0L) - 7.0L/24.0L; + poly_val[6] = x*(x*((175.0L/12.0L)*x - 49.0L/2.0L) + 63.0L/8.0L) + 7.0L/4.0L; + poly_val[7] = x*(x*(-174319.0L/8820.0L*x + 152269.0L/4900.0L) - 64069.0L/9800.0L) - 266681.0L/88200.0L; + poly_val[8] = x*(x*((174319.0L/8820.0L)*x - 103697.0L/3675.0L) + 108169.0L/29400.0L) + 7.0L/4.0L; + poly_val[9] = x*(x*(-175.0L/12.0L*x + 77.0L/4.0L) - 21.0L/8.0L) - 7.0L/24.0L; + poly_val[10] = x*(x*((875.0L/108.0L)*x - 21.0L/2.0L) + 49.0L/24.0L) + 7.0L/108.0L; + poly_val[11] = x*(x*(-8225.0L/2376.0L*x + 3647.0L/792.0L) - 1687.0L/1584.0L) - 7.0L/528.0L; + poly_val[12] = x*(x*((1477.0L/1320.0L)*x - 1673.0L/1100.0L) + 1701.0L/4400.0L) + 7.0L/3300.0L; + poly_val[13] = x*(x*(-19607.0L/77220.0L*x + 15043.0L/42900.0L) - 623.0L/6600.0L) - 7.0L/30888.0L; + poly_val[14] = x*(x*((27175.0L/756756.0L)*x - 6329.0L/126126.0L) + 7079.0L/504504.0L) + 1.0L/84084.0L; + poly_val[15] = x*(x*(-50.0L/21021.0L*x + 47.0L/14014.0L) - 27.0L/28028.0L); + break; + } +} +void beta_n7_m3(int deriv, double x, double *poly_val) { + switch(deriv) + { + case 0: + poly_val[0] = x*(x*(x*(x*(x*(x*(-200017.0L/605404800.0L*x + 175717.0L/151351200.0L) - 139417.0L/100900800.0L) + 6109.0L/11642400.0L) + 59.0L/950400.0L) + 1.0L/168168.0L) - 1.0L/24024.0L); + poly_val[1] = x*(x*(x*(x*(x*(x*((4502941.0L/908107200.0L)*x - 2878489.0L/165110400.0L) + 119663.0L/5765760.0L) - 1094837.0L/139708800.0L) - 20137.0L/19958400.0L) - 7.0L/61776.0L) + 7.0L/10296.0L); + poly_val[2] = x*(x*(x*(x*(x*(x*(-901357.0L/25945920.0L*x + 10570201.0L/86486400.0L) - 4195157.0L/28828800.0L) + 14135041.0L/259459200.0L) + 2077.0L/266112.0L) + 7.0L/6600.0L) - 7.0L/1320.0L); + poly_val[3] = x*(x*(x*(x*(x*(x*((200569.0L/1330560.0L)*x - 3530803.0L/6652800.0L) + 93403.0L/147840.0L) - 1551821.0L/6652800.0L) - 31957.0L/831600.0L) - 7.0L/1056.0L) + 7.0L/264.0L); + poly_val[4] = x*(x*(x*(x*(x*(x*(-9044677.0L/19958400.0L*x + 56909.0L/35640.0L) - 6314093.0L/3326400.0L) + 213061.0L/311850.0L) + 247081.0L/1814400.0L) + 7.0L/216.0L) - 7.0L/72.0L); + poly_val[5] = x*(x*(x*(x*(x*(x*((906869.0L/907200.0L)*x - 2130157.0L/604800.0L) + 503599.0L/120960.0L) - 2574109.0L/1814400.0L) - 222581.0L/604800.0L) - 7.0L/48.0L) + 7.0L/24.0L); + poly_val[6] = x*(x*(x*(x*(x*(x*(-505069.0L/302400.0L*x + 394819.0L/67200.0L) - 277219.0L/40320.0L) + 1344209.0L/604800.0L) + 90281.0L/201600.0L) + 7.0L/8.0L) - 7.0L/8.0L); + poly_val[7] = pow(x, 2)*(pow(x, 2)*(x*(x*((121399.0L/56448.0L)*x - 1182509.0L/156800.0L) + 12323341.0L/1411200.0L) - 3993487.0L/1411200.0L) - 266681.0L/176400.0L) + 1; + poly_val[8] = x*(x*(x*(x*(x*(x*(-121399.0L/56448.0L*x + 2650561.0L/352800.0L) - 174319.0L/20160.0L) + 350719.0L/117600.0L) - 90281.0L/201600.0L) + 7.0L/8.0L) + 7.0L/8.0L); + poly_val[9] = x*(x*(x*(x*(x*(x*((505069.0L/302400.0L)*x - 703519.0L/120960.0L) + 1350319.0L/201600.0L) - 1501481.0L/604800.0L) + 222581.0L/604800.0L) - 7.0L/48.0L) - 7.0L/24.0L); + poly_val[10] = x*(x*(x*(x*(x*(x*(-906869.0L/907200.0L*x + 1261139.0L/362880.0L) - 811073.0L/201600.0L) + 2819581.0L/1814400.0L) - 247081.0L/1814400.0L) + 7.0L/216.0L) + 7.0L/72.0L); + poly_val[11] = x*(x*(x*(x*(x*(x*((9044677.0L/19958400.0L)*x - 1497319.0L/950400.0L) + 813523.0L/443520.0L) - 14314981.0L/19958400.0L) + 31957.0L/831600.0L) - 7.0L/1056.0L) - 7.0L/264.0L); + poly_val[12] = x*(x*(x*(x*(x*(x*(-200569.0L/1330560.0L*x + 39649.0L/75600.0L) - 226559.0L/369600.0L) + 200173.0L/831600.0L) - 2077.0L/266112.0L) + 7.0L/6600.0L) + 7.0L/1320.0L); + poly_val[13] = x*(x*(x*(x*(x*(x*((901357.0L/25945920.0L)*x - 31384387.0L/259459200.0L) + 2451851.0L/17297280.0L) - 1112533.0L/19958400.0L) + 20137.0L/19958400.0L) - 7.0L/61776.0L) - 7.0L/10296.0L); + poly_val[14] = x*(x*(x*(x*(x*(x*(-4502941.0L/908107200.0L*x + 2091853.0L/121080960.0L) - 12279031.0L/605404800.0L) + 1114733.0L/139708800.0L) - 59.0L/950400.0L) + 1.0L/168168.0L) + 1.0L/24024.0L); + poly_val[15] = pow(x, 4)*(x*(x*((200017.0L/605404800.0L)*x - 232417.0L/201801600.0L) + 39031.0L/28828800.0L) - 322417.0L/605404800.0L); + break; + case 1: + poly_val[0] = x*(x*(x*(x*(x*(-200017.0L/86486400.0L*x + 175717.0L/25225200.0L) - 139417.0L/20180160.0L) + 6109.0L/2910600.0L) + 59.0L/316800.0L) + 1.0L/84084.0L) - 1.0L/24024.0L; + poly_val[1] = x*(x*(x*(x*(x*((4502941.0L/129729600.0L)*x - 2878489.0L/27518400.0L) + 119663.0L/1153152.0L) - 1094837.0L/34927200.0L) - 20137.0L/6652800.0L) - 7.0L/30888.0L) + 7.0L/10296.0L; + poly_val[2] = x*(x*(x*(x*(x*(-901357.0L/3706560.0L*x + 10570201.0L/14414400.0L) - 4195157.0L/5765760.0L) + 14135041.0L/64864800.0L) + 2077.0L/88704.0L) + 7.0L/3300.0L) - 7.0L/1320.0L; + poly_val[3] = x*(x*(x*(x*(x*((200569.0L/190080.0L)*x - 3530803.0L/1108800.0L) + 93403.0L/29568.0L) - 1551821.0L/1663200.0L) - 31957.0L/277200.0L) - 7.0L/528.0L) + 7.0L/264.0L; + poly_val[4] = x*(x*(x*(x*(x*(-9044677.0L/2851200.0L*x + 56909.0L/5940.0L) - 6314093.0L/665280.0L) + 426122.0L/155925.0L) + 247081.0L/604800.0L) + 7.0L/108.0L) - 7.0L/72.0L; + poly_val[5] = x*(x*(x*(x*(x*((906869.0L/129600.0L)*x - 2130157.0L/100800.0L) + 503599.0L/24192.0L) - 2574109.0L/453600.0L) - 222581.0L/201600.0L) - 7.0L/24.0L) + 7.0L/24.0L; + poly_val[6] = x*(x*(x*(x*(x*(-505069.0L/43200.0L*x + 394819.0L/11200.0L) - 277219.0L/8064.0L) + 1344209.0L/151200.0L) + 90281.0L/67200.0L) + 7.0L/4.0L) - 7.0L/8.0L; + poly_val[7] = x*(pow(x, 2)*(x*(x*((121399.0L/8064.0L)*x - 3547527.0L/78400.0L) + 12323341.0L/282240.0L) - 3993487.0L/352800.0L) - 266681.0L/88200.0L); + poly_val[8] = x*(x*(x*(x*(x*(-121399.0L/8064.0L*x + 2650561.0L/58800.0L) - 174319.0L/4032.0L) + 350719.0L/29400.0L) - 90281.0L/67200.0L) + 7.0L/4.0L) + 7.0L/8.0L; + poly_val[9] = x*(x*(x*(x*(x*((505069.0L/43200.0L)*x - 703519.0L/20160.0L) + 1350319.0L/40320.0L) - 1501481.0L/151200.0L) + 222581.0L/201600.0L) - 7.0L/24.0L) - 7.0L/24.0L; + poly_val[10] = x*(x*(x*(x*(x*(-906869.0L/129600.0L*x + 1261139.0L/60480.0L) - 811073.0L/40320.0L) + 2819581.0L/453600.0L) - 247081.0L/604800.0L) + 7.0L/108.0L) + 7.0L/72.0L; + poly_val[11] = x*(x*(x*(x*(x*((9044677.0L/2851200.0L)*x - 1497319.0L/158400.0L) + 813523.0L/88704.0L) - 14314981.0L/4989600.0L) + 31957.0L/277200.0L) - 7.0L/528.0L) - 7.0L/264.0L; + poly_val[12] = x*(x*(x*(x*(x*(-200569.0L/190080.0L*x + 39649.0L/12600.0L) - 226559.0L/73920.0L) + 200173.0L/207900.0L) - 2077.0L/88704.0L) + 7.0L/3300.0L) + 7.0L/1320.0L; + poly_val[13] = x*(x*(x*(x*(x*((901357.0L/3706560.0L)*x - 31384387.0L/43243200.0L) + 2451851.0L/3459456.0L) - 1112533.0L/4989600.0L) + 20137.0L/6652800.0L) - 7.0L/30888.0L) - 7.0L/10296.0L; + poly_val[14] = x*(x*(x*(x*(x*(-4502941.0L/129729600.0L*x + 2091853.0L/20180160.0L) - 12279031.0L/121080960.0L) + 1114733.0L/34927200.0L) - 59.0L/316800.0L) + 1.0L/84084.0L) + 1.0L/24024.0L; + poly_val[15] = pow(x, 3)*(x*(x*((200017.0L/86486400.0L)*x - 232417.0L/33633600.0L) + 39031.0L/5765760.0L) - 322417.0L/151351200.0L); + break; + case 2: + poly_val[0] = x*(x*(x*(x*(-200017.0L/14414400.0L*x + 175717.0L/5045040.0L) - 139417.0L/5045040.0L) + 6109.0L/970200.0L) + 59.0L/158400.0L) + 1.0L/84084.0L; + poly_val[1] = x*(x*(x*(x*((4502941.0L/21621600.0L)*x - 2878489.0L/5503680.0L) + 119663.0L/288288.0L) - 1094837.0L/11642400.0L) - 20137.0L/3326400.0L) - 7.0L/30888.0L; + poly_val[2] = x*(x*(x*(x*(-901357.0L/617760.0L*x + 10570201.0L/2882880.0L) - 4195157.0L/1441440.0L) + 14135041.0L/21621600.0L) + 2077.0L/44352.0L) + 7.0L/3300.0L; + poly_val[3] = x*(x*(x*(x*((200569.0L/31680.0L)*x - 3530803.0L/221760.0L) + 93403.0L/7392.0L) - 1551821.0L/554400.0L) - 31957.0L/138600.0L) - 7.0L/528.0L; + poly_val[4] = x*(x*(x*(x*(-9044677.0L/475200.0L*x + 56909.0L/1188.0L) - 6314093.0L/166320.0L) + 426122.0L/51975.0L) + 247081.0L/302400.0L) + 7.0L/108.0L; + poly_val[5] = x*(x*(x*(x*((906869.0L/21600.0L)*x - 2130157.0L/20160.0L) + 503599.0L/6048.0L) - 2574109.0L/151200.0L) - 222581.0L/100800.0L) - 7.0L/24.0L; + poly_val[6] = x*(x*(x*(x*(-505069.0L/7200.0L*x + 394819.0L/2240.0L) - 277219.0L/2016.0L) + 1344209.0L/50400.0L) + 90281.0L/33600.0L) + 7.0L/4.0L; + poly_val[7] = pow(x, 2)*(x*(x*((121399.0L/1344.0L)*x - 3547527.0L/15680.0L) + 12323341.0L/70560.0L) - 3993487.0L/117600.0L) - 266681.0L/88200.0L; + poly_val[8] = x*(x*(x*(x*(-121399.0L/1344.0L*x + 2650561.0L/11760.0L) - 174319.0L/1008.0L) + 350719.0L/9800.0L) - 90281.0L/33600.0L) + 7.0L/4.0L; + poly_val[9] = x*(x*(x*(x*((505069.0L/7200.0L)*x - 703519.0L/4032.0L) + 1350319.0L/10080.0L) - 1501481.0L/50400.0L) + 222581.0L/100800.0L) - 7.0L/24.0L; + poly_val[10] = x*(x*(x*(x*(-906869.0L/21600.0L*x + 1261139.0L/12096.0L) - 811073.0L/10080.0L) + 2819581.0L/151200.0L) - 247081.0L/302400.0L) + 7.0L/108.0L; + poly_val[11] = x*(x*(x*(x*((9044677.0L/475200.0L)*x - 1497319.0L/31680.0L) + 813523.0L/22176.0L) - 14314981.0L/1663200.0L) + 31957.0L/138600.0L) - 7.0L/528.0L; + poly_val[12] = x*(x*(x*(x*(-200569.0L/31680.0L*x + 39649.0L/2520.0L) - 226559.0L/18480.0L) + 200173.0L/69300.0L) - 2077.0L/44352.0L) + 7.0L/3300.0L; + poly_val[13] = x*(x*(x*(x*((901357.0L/617760.0L)*x - 31384387.0L/8648640.0L) + 2451851.0L/864864.0L) - 1112533.0L/1663200.0L) + 20137.0L/3326400.0L) - 7.0L/30888.0L; + poly_val[14] = x*(x*(x*(x*(-4502941.0L/21621600.0L*x + 2091853.0L/4036032.0L) - 12279031.0L/30270240.0L) + 1114733.0L/11642400.0L) - 59.0L/158400.0L) + 1.0L/84084.0L; + poly_val[15] = pow(x, 2)*(x*(x*((200017.0L/14414400.0L)*x - 232417.0L/6726720.0L) + 39031.0L/1441440.0L) - 322417.0L/50450400.0L); + break; + } +} +void beta_n7_m4(int deriv, double x, double *poly_val) { + switch(deriv) + { + case 0: + poly_val[0] = x*(x*(x*(x*(x*(x*(x*(x*((322727.0L/302702400.0L)*x - 414979.0L/86486400.0L) + 141853.0L/17297280.0L) - 6059.0L/960960.0L) + 11.0L/6048.0L) - 59.0L/6652800.0L) + 59.0L/950400.0L) + 1.0L/168168.0L) - 1.0L/24024.0L); + poly_val[1] = x*(x*(x*(x*(x*(x*(x*(x*(-13405813.0L/838252800.0L*x + 60333233.0L/838252800.0L) - 670268567.0L/5448643200.0L) + 3680359.0L/38918880.0L) - 42433073.0L/1556755200.0L) + 20137.0L/119750400.0L) - 20137.0L/19958400.0L) - 7.0L/61776.0L) + 7.0L/10296.0L); + poly_val[2] = x*(x*(x*(x*(x*(x*(x*(x*((174279691.0L/1556755200.0L)*x - 1120513.0L/2223936.0L) + 44685269.0L/51891840.0L) - 515148581.0L/778377600.0L) + 296702557.0L/1556755200.0L) - 2077.0L/1330560.0L) + 2077.0L/266112.0L) + 7.0L/6600.0L) - 7.0L/1320.0L); + poly_val[3] = x*(x*(x*(x*(x*(x*(x*(x*(-28817.0L/59400.0L*x + 4841917.0L/2217600.0L) - 2256829.0L/604800.0L) + 19073623.0L/6652800.0L) - 609701.0L/739200.0L) + 31957.0L/3326400.0L) - 31957.0L/831600.0L) - 7.0L/1056.0L) + 7.0L/264.0L); + poly_val[4] = x*(x*(x*(x*(x*(x*(x*(x*((3112349.0L/2138400.0L)*x - 56029813.0L/8553600.0L) + 60931183.0L/5443200.0L) - 36766399.0L/4276800.0L) + 14799451.0L/5987520.0L) - 247081.0L/5443200.0L) + 247081.0L/1814400.0L) + 7.0L/216.0L) - 7.0L/72.0L); + poly_val[5] = x*(x*(x*(x*(x*(x*(x*(x*(-4979933.0L/1555200.0L*x + 156886007.0L/10886400.0L) - 8935543.0L/362880.0L) + 51445649.0L/2721600.0L) - 59327153.0L/10886400.0L) + 222581.0L/1209600.0L) - 222581.0L/604800.0L) - 7.0L/48.0L) + 7.0L/24.0L); + poly_val[6] = x*(x*(x*(x*(x*(x*(x*(x*((6455647.0L/1209600.0L)*x - 7263173.0L/302400.0L) + 8272337.0L/201600.0L) - 3809647.0L/120960.0L) + 11059493.0L/1209600.0L) - 90281.0L/201600.0L) + 90281.0L/201600.0L) + 7.0L/8.0L) - 7.0L/8.0L); + poly_val[7] = pow(x, 2)*(pow(x, 2)*(x*(x*(x*(x*(-43576279.0L/6350400.0L*x + 11205641.0L/362880.0L) - 19141411.0L/362880.0L) + 73478603.0L/1814400.0L) - 21513043.0L/1814400.0L) + 54613.0L/90720.0L) - 266681.0L/176400.0L) + 1; + poly_val[8] = x*(x*(x*(x*(x*(x*(x*(x*((43576279.0L/6350400.0L)*x - 392175587.0L/12700800.0L) + 223287331.0L/4233600.0L) - 36755869.0L/907200.0L) + 1353509.0L/113400.0L) - 90281.0L/201600.0L) - 90281.0L/201600.0L) + 7.0L/8.0L) + 7.0L/8.0L); + poly_val[9] = x*(x*(x*(x*(x*(x*(x*(x*(-6455647.0L/1209600.0L*x + 4149733.0L/172800.0L) - 24807889.0L/604800.0L) + 794597.0L/25200.0L) - 2251181.0L/241920.0L) + 222581.0L/1209600.0L) + 222581.0L/604800.0L) - 7.0L/48.0L) - 7.0L/24.0L); + poly_val[10] = x*(x*(x*(x*(x*(x*(x*(x*((4979933.0L/1555200.0L)*x - 39212443.0L/2721600.0L) + 5358427.0L/217728.0L) - 103036921.0L/5443200.0L) + 60708181.0L/10886400.0L) - 247081.0L/5443200.0L) - 247081.0L/1814400.0L) + 7.0L/216.0L) + 7.0L/72.0L); + poly_val[11] = x*(x*(x*(x*(x*(x*(x*(x*(-3112349.0L/2138400.0L*x + 56014751.0L/8553600.0L) - 223273759.0L/19958400.0L) + 14724943.0L/1710720.0L) - 13763713.0L/5443200.0L) + 31957.0L/3326400.0L) + 31957.0L/831600.0L) - 7.0L/1056.0L) - 7.0L/264.0L); + poly_val[12] = x*(x*(x*(x*(x*(x*(x*(x*((28817.0L/59400.0L)*x - 968119.0L/443520.0L) + 4961851.0L/1330560.0L) - 9545759.0L/3326400.0L) + 932753.0L/1108800.0L) - 2077.0L/1330560.0L) - 2077.0L/266112.0L) + 7.0L/6600.0L) + 7.0L/1320.0L); + poly_val[13] = x*(x*(x*(x*(x*(x*(x*(x*(-174279691.0L/1556755200.0L*x + 784158119.0L/1556755200.0L) - 669877073.0L/778377600.0L) + 257762143.0L/389188800.0L) - 23213117.0L/119750400.0L) + 20137.0L/119750400.0L) + 20137.0L/19958400.0L) - 7.0L/61776.0L) - 7.0L/10296.0L); + poly_val[14] = x*(x*(x*(x*(x*(x*(x*(x*((13405813.0L/838252800.0L)*x - 2154253.0L/29937600.0L) + 2900003.0L/23587200.0L) - 73650743.0L/778377600.0L) + 1722493.0L/62270208.0L) - 59.0L/6652800.0L) - 59.0L/950400.0L) + 1.0L/168168.0L) + 1.0L/24024.0L); + poly_val[15] = pow(x, 5)*(x*(x*(x*(-322727.0L/302702400.0L*x + 2904233.0L/605404800.0L) - 2005.0L/244608.0L) + 545581.0L/86486400.0L) - 159353.0L/86486400.0L); + break; + case 1: + poly_val[0] = x*(x*(x*(x*(x*(x*(x*((322727.0L/33633600.0L)*x - 414979.0L/10810800.0L) + 141853.0L/2471040.0L) - 6059.0L/160160.0L) + 55.0L/6048.0L) - 59.0L/1663200.0L) + 59.0L/316800.0L) + 1.0L/84084.0L) - 1.0L/24024.0L; + poly_val[1] = x*(x*(x*(x*(x*(x*(x*(-13405813.0L/93139200.0L*x + 60333233.0L/104781600.0L) - 670268567.0L/778377600.0L) + 3680359.0L/6486480.0L) - 42433073.0L/311351040.0L) + 20137.0L/29937600.0L) - 20137.0L/6652800.0L) - 7.0L/30888.0L) + 7.0L/10296.0L; + poly_val[2] = x*(x*(x*(x*(x*(x*(x*((174279691.0L/172972800.0L)*x - 1120513.0L/277992.0L) + 44685269.0L/7413120.0L) - 515148581.0L/129729600.0L) + 296702557.0L/311351040.0L) - 2077.0L/332640.0L) + 2077.0L/88704.0L) + 7.0L/3300.0L) - 7.0L/1320.0L; + poly_val[3] = x*(x*(x*(x*(x*(x*(x*(-28817.0L/6600.0L*x + 4841917.0L/277200.0L) - 2256829.0L/86400.0L) + 19073623.0L/1108800.0L) - 609701.0L/147840.0L) + 31957.0L/831600.0L) - 31957.0L/277200.0L) - 7.0L/528.0L) + 7.0L/264.0L; + poly_val[4] = x*(x*(x*(x*(x*(x*(x*((3112349.0L/237600.0L)*x - 56029813.0L/1069200.0L) + 60931183.0L/777600.0L) - 36766399.0L/712800.0L) + 14799451.0L/1197504.0L) - 247081.0L/1360800.0L) + 247081.0L/604800.0L) + 7.0L/108.0L) - 7.0L/72.0L; + poly_val[5] = x*(x*(x*(x*(x*(x*(x*(-4979933.0L/172800.0L*x + 156886007.0L/1360800.0L) - 8935543.0L/51840.0L) + 51445649.0L/453600.0L) - 59327153.0L/2177280.0L) + 222581.0L/302400.0L) - 222581.0L/201600.0L) - 7.0L/24.0L) + 7.0L/24.0L; + poly_val[6] = x*(x*(x*(x*(x*(x*(x*((6455647.0L/134400.0L)*x - 7263173.0L/37800.0L) + 8272337.0L/28800.0L) - 3809647.0L/20160.0L) + 11059493.0L/241920.0L) - 90281.0L/50400.0L) + 90281.0L/67200.0L) + 7.0L/4.0L) - 7.0L/8.0L; + poly_val[7] = x*(pow(x, 2)*(x*(x*(x*(x*(-43576279.0L/705600.0L*x + 11205641.0L/45360.0L) - 19141411.0L/51840.0L) + 73478603.0L/302400.0L) - 21513043.0L/362880.0L) + 54613.0L/22680.0L) - 266681.0L/88200.0L); + poly_val[8] = x*(x*(x*(x*(x*(x*(x*((43576279.0L/705600.0L)*x - 392175587.0L/1587600.0L) + 223287331.0L/604800.0L) - 36755869.0L/151200.0L) + 1353509.0L/22680.0L) - 90281.0L/50400.0L) - 90281.0L/67200.0L) + 7.0L/4.0L) + 7.0L/8.0L; + poly_val[9] = x*(x*(x*(x*(x*(x*(x*(-6455647.0L/134400.0L*x + 4149733.0L/21600.0L) - 24807889.0L/86400.0L) + 794597.0L/4200.0L) - 2251181.0L/48384.0L) + 222581.0L/302400.0L) + 222581.0L/201600.0L) - 7.0L/24.0L) - 7.0L/24.0L; + poly_val[10] = x*(x*(x*(x*(x*(x*(x*((4979933.0L/172800.0L)*x - 39212443.0L/340200.0L) + 5358427.0L/31104.0L) - 103036921.0L/907200.0L) + 60708181.0L/2177280.0L) - 247081.0L/1360800.0L) - 247081.0L/604800.0L) + 7.0L/108.0L) + 7.0L/72.0L; + poly_val[11] = x*(x*(x*(x*(x*(x*(x*(-3112349.0L/237600.0L*x + 56014751.0L/1069200.0L) - 223273759.0L/2851200.0L) + 14724943.0L/285120.0L) - 13763713.0L/1088640.0L) + 31957.0L/831600.0L) + 31957.0L/277200.0L) - 7.0L/528.0L) - 7.0L/264.0L; + poly_val[12] = x*(x*(x*(x*(x*(x*(x*((28817.0L/6600.0L)*x - 968119.0L/55440.0L) + 4961851.0L/190080.0L) - 9545759.0L/554400.0L) + 932753.0L/221760.0L) - 2077.0L/332640.0L) - 2077.0L/88704.0L) + 7.0L/3300.0L) + 7.0L/1320.0L; + poly_val[13] = x*(x*(x*(x*(x*(x*(x*(-174279691.0L/172972800.0L*x + 784158119.0L/194594400.0L) - 669877073.0L/111196800.0L) + 257762143.0L/64864800.0L) - 23213117.0L/23950080.0L) + 20137.0L/29937600.0L) + 20137.0L/6652800.0L) - 7.0L/30888.0L) - 7.0L/10296.0L; + poly_val[14] = x*(x*(x*(x*(x*(x*(x*((13405813.0L/93139200.0L)*x - 2154253.0L/3742200.0L) + 2900003.0L/3369600.0L) - 73650743.0L/129729600.0L) + 8612465.0L/62270208.0L) - 59.0L/1663200.0L) - 59.0L/316800.0L) + 1.0L/84084.0L) + 1.0L/24024.0L; + poly_val[15] = pow(x, 4)*(x*(x*(x*(-322727.0L/33633600.0L*x + 2904233.0L/75675600.0L) - 2005.0L/34944.0L) + 545581.0L/14414400.0L) - 159353.0L/17297280.0L); + break; + case 2: + poly_val[0] = x*(x*(x*(x*(x*(x*((322727.0L/4204200.0L)*x - 414979.0L/1544400.0L) + 141853.0L/411840.0L) - 6059.0L/32032.0L) + 55.0L/1512.0L) - 59.0L/554400.0L) + 59.0L/158400.0L) + 1.0L/84084.0L; + poly_val[1] = x*(x*(x*(x*(x*(x*(-13405813.0L/11642400.0L*x + 60333233.0L/14968800.0L) - 670268567.0L/129729600.0L) + 3680359.0L/1297296.0L) - 42433073.0L/77837760.0L) + 20137.0L/9979200.0L) - 20137.0L/3326400.0L) - 7.0L/30888.0L; + poly_val[2] = x*(x*(x*(x*(x*(x*((174279691.0L/21621600.0L)*x - 7843591.0L/277992.0L) + 44685269.0L/1235520.0L) - 515148581.0L/25945920.0L) + 296702557.0L/77837760.0L) - 2077.0L/110880.0L) + 2077.0L/44352.0L) + 7.0L/3300.0L; + poly_val[3] = x*(x*(x*(x*(x*(x*(-28817.0L/825.0L*x + 4841917.0L/39600.0L) - 2256829.0L/14400.0L) + 19073623.0L/221760.0L) - 609701.0L/36960.0L) + 31957.0L/277200.0L) - 31957.0L/138600.0L) - 7.0L/528.0L; + poly_val[4] = x*(x*(x*(x*(x*(x*((3112349.0L/29700.0L)*x - 392208691.0L/1069200.0L) + 60931183.0L/129600.0L) - 36766399.0L/142560.0L) + 14799451.0L/299376.0L) - 247081.0L/453600.0L) + 247081.0L/302400.0L) + 7.0L/108.0L; + poly_val[5] = x*(x*(x*(x*(x*(x*(-4979933.0L/21600.0L*x + 156886007.0L/194400.0L) - 8935543.0L/8640.0L) + 51445649.0L/90720.0L) - 59327153.0L/544320.0L) + 222581.0L/100800.0L) - 222581.0L/100800.0L) - 7.0L/24.0L; + poly_val[6] = x*(x*(x*(x*(x*(x*((6455647.0L/16800.0L)*x - 7263173.0L/5400.0L) + 8272337.0L/4800.0L) - 3809647.0L/4032.0L) + 11059493.0L/60480.0L) - 90281.0L/16800.0L) + 90281.0L/33600.0L) + 7.0L/4.0L; + poly_val[7] = pow(x, 2)*(x*(x*(x*(x*(-43576279.0L/88200.0L*x + 11205641.0L/6480.0L) - 19141411.0L/8640.0L) + 73478603.0L/60480.0L) - 21513043.0L/90720.0L) + 54613.0L/7560.0L) - 266681.0L/88200.0L; + poly_val[8] = x*(x*(x*(x*(x*(x*((43576279.0L/88200.0L)*x - 392175587.0L/226800.0L) + 223287331.0L/100800.0L) - 36755869.0L/30240.0L) + 1353509.0L/5670.0L) - 90281.0L/16800.0L) - 90281.0L/33600.0L) + 7.0L/4.0L; + poly_val[9] = x*(x*(x*(x*(x*(x*(-6455647.0L/16800.0L*x + 29048131.0L/21600.0L) - 24807889.0L/14400.0L) + 794597.0L/840.0L) - 2251181.0L/12096.0L) + 222581.0L/100800.0L) + 222581.0L/100800.0L) - 7.0L/24.0L; + poly_val[10] = x*(x*(x*(x*(x*(x*((4979933.0L/21600.0L)*x - 39212443.0L/48600.0L) + 5358427.0L/5184.0L) - 103036921.0L/181440.0L) + 60708181.0L/544320.0L) - 247081.0L/453600.0L) - 247081.0L/302400.0L) + 7.0L/108.0L; + poly_val[11] = x*(x*(x*(x*(x*(x*(-3112349.0L/29700.0L*x + 392103257.0L/1069200.0L) - 223273759.0L/475200.0L) + 14724943.0L/57024.0L) - 13763713.0L/272160.0L) + 31957.0L/277200.0L) + 31957.0L/138600.0L) - 7.0L/528.0L; + poly_val[12] = x*(x*(x*(x*(x*(x*((28817.0L/825.0L)*x - 968119.0L/7920.0L) + 4961851.0L/31680.0L) - 9545759.0L/110880.0L) + 932753.0L/55440.0L) - 2077.0L/110880.0L) - 2077.0L/44352.0L) + 7.0L/3300.0L; + poly_val[13] = x*(x*(x*(x*(x*(x*(-174279691.0L/21621600.0L*x + 784158119.0L/27799200.0L) - 669877073.0L/18532800.0L) + 257762143.0L/12972960.0L) - 23213117.0L/5987520.0L) + 20137.0L/9979200.0L) + 20137.0L/3326400.0L) - 7.0L/30888.0L; + poly_val[14] = x*(x*(x*(x*(x*(x*((13405813.0L/11642400.0L)*x - 2154253.0L/534600.0L) + 2900003.0L/561600.0L) - 73650743.0L/25945920.0L) + 8612465.0L/15567552.0L) - 59.0L/554400.0L) - 59.0L/158400.0L) + 1.0L/84084.0L; + poly_val[15] = pow(x, 3)*(x*(x*(x*(-322727.0L/4204200.0L*x + 2904233.0L/10810800.0L) - 2005.0L/5824.0L) + 545581.0L/2882880.0L) - 159353.0L/4324320.0L); + break; + } +} diff --git a/bfps/cpp/spline_n7.hpp b/bfps/cpp/spline_n7.hpp new file mode 100644 index 0000000000000000000000000000000000000000..2c0b86f6ff8bd722fd6bd038d8dfee39727d5fc1 --- /dev/null +++ b/bfps/cpp/spline_n7.hpp @@ -0,0 +1,38 @@ +/********************************************************************** +* * +* Copyright 2017 Max Planck Institute * +* for Dynamics and Self-Organization * +* * +* This file is part of bfps. * +* * +* bfps is free software: you can redistribute it and/or modify * +* it under the terms of the GNU General Public License as published * +* by the Free Software Foundation, either version 3 of the License, * +* or (at your option) any later version. * +* * +* bfps is distributed in the hope that it will be useful, * +* but WITHOUT ANY WARRANTY; without even the implied warranty of * +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * +* GNU General Public License for more details. * +* * +* You should have received a copy of the GNU General Public License * +* along with bfps. If not, see <http://www.gnu.org/licenses/> * +* * +* Contact: Cristian.Lalescu@ds.mpg.de * +* * +**********************************************************************/ + + + +#ifndef SPLINE_N7 + +#define SPLINE_N7 + +void beta_n7_m0(const int deriv, const double x, double *__restrict__ poly_val); +void beta_n7_m1(const int deriv, const double x, double *__restrict__ poly_val); +void beta_n7_m2(const int deriv, const double x, double *__restrict__ poly_val); +void beta_n7_m3(const int deriv, const double x, double *__restrict__ poly_val); +void beta_n7_m4(const int deriv, const double x, double *__restrict__ poly_val); + +#endif//SPLINE_N7 + diff --git a/bfps/cpp/spline_n8.cpp b/bfps/cpp/spline_n8.cpp new file mode 100644 index 0000000000000000000000000000000000000000..d6cea769acbc3a74eee1dfd53933a45441318016 --- /dev/null +++ b/bfps/cpp/spline_n8.cpp @@ -0,0 +1,354 @@ +/********************************************************************** +* * +* Copyright 2015 Max Planck Institute * +* for Dynamics and Self-Organization * +* * +* This file is part of bfps. * +* * +* bfps is free software: you can redistribute it and/or modify * +* it under the terms of the GNU General Public License as published * +* by the Free Software Foundation, either version 3 of the License, * +* or (at your option) any later version. * +* * +* bfps is distributed in the hope that it will be useful, * +* but WITHOUT ANY WARRANTY; without even the implied warranty of * +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * +* GNU General Public License for more details. * +* * +* You should have received a copy of the GNU General Public License * +* along with bfps. If not, see <http://www.gnu.org/licenses/> * +* * +* Contact: Cristian.Lalescu@ds.mpg.de * +* * +**********************************************************************/ + + + +#include "spline_n8.hpp" +#include <cmath> + +void beta_n8_m0(int deriv, double x, double *poly_val) { + switch(deriv) + { + case 0: + poly_val[0] = 0; + poly_val[1] = 0; + poly_val[2] = 0; + poly_val[3] = 0; + poly_val[4] = 0; + poly_val[5] = 0; + poly_val[6] = 0; + poly_val[7] = 0; + poly_val[8] = -x + 1; + poly_val[9] = x; + poly_val[10] = 0; + poly_val[11] = 0; + poly_val[12] = 0; + poly_val[13] = 0; + poly_val[14] = 0; + poly_val[15] = 0; + poly_val[16] = 0; + poly_val[17] = 0; + break; + case 1: + poly_val[0] = 0; + poly_val[1] = 0; + poly_val[2] = 0; + poly_val[3] = 0; + poly_val[4] = 0; + poly_val[5] = 0; + poly_val[6] = 0; + poly_val[7] = 0; + poly_val[8] = -1; + poly_val[9] = 1; + poly_val[10] = 0; + poly_val[11] = 0; + poly_val[12] = 0; + poly_val[13] = 0; + poly_val[14] = 0; + poly_val[15] = 0; + poly_val[16] = 0; + poly_val[17] = 0; + break; + case 2: + poly_val[0] = 0; + poly_val[1] = 0; + poly_val[2] = 0; + poly_val[3] = 0; + poly_val[4] = 0; + poly_val[5] = 0; + poly_val[6] = 0; + poly_val[7] = 0; + poly_val[8] = 0; + poly_val[9] = 0; + poly_val[10] = 0; + poly_val[11] = 0; + poly_val[12] = 0; + poly_val[13] = 0; + poly_val[14] = 0; + poly_val[15] = 0; + poly_val[16] = 0; + poly_val[17] = 0; + break; + } +} +void beta_n8_m1(int deriv, double x, double *poly_val) { + switch(deriv) + { + case 0: + poly_val[0] = x*(x*((1.0L/102960.0L)*x - 1.0L/51480.0L) + 1.0L/102960.0L); + poly_val[1] = x*(x*(-11.0L/65520.0L*x + 83.0L/240240.0L) - 8.0L/45045.0L); + poly_val[2] = x*(x*((62.0L/45045.0L)*x - 4.0L/1365.0L) + 2.0L/1287.0L); + poly_val[3] = x*(x*(-46.0L/6435.0L*x + 34.0L/2145.0L) - 56.0L/6435.0L); + poly_val[4] = x*(x*((343.0L/12870.0L)*x - 133.0L/2145.0L) + 7.0L/198.0L); + poly_val[5] = x*(x*(-7.0L/90.0L*x + 21.0L/110.0L) - 56.0L/495.0L); + poly_val[6] = x*(x*((98.0L/495.0L)*x - 28.0L/55.0L) + 14.0L/45.0L); + poly_val[7] = x*(x*(-26.0L/45.0L*x + 22.0L/15.0L) - 8.0L/9.0L); + poly_val[8] = pow(x, 2)*((10.0L/9.0L)*x - 19.0L/9.0L) + 1; + poly_val[9] = x*(x*(-10.0L/9.0L*x + 11.0L/9.0L) + 8.0L/9.0L); + poly_val[10] = x*(x*((26.0L/45.0L)*x - 4.0L/15.0L) - 14.0L/45.0L); + poly_val[11] = x*(x*(-98.0L/495.0L*x + 14.0L/165.0L) + 56.0L/495.0L); + poly_val[12] = x*(x*((7.0L/90.0L)*x - 7.0L/165.0L) - 7.0L/198.0L); + poly_val[13] = x*(x*(-343.0L/12870.0L*x + 7.0L/390.0L) + 56.0L/6435.0L); + poly_val[14] = x*(x*((46.0L/6435.0L)*x - 4.0L/715.0L) - 2.0L/1287.0L); + poly_val[15] = x*(x*(-62.0L/45045.0L*x + 6.0L/5005.0L) + 8.0L/45045.0L); + poly_val[16] = x*(x*((11.0L/65520.0L)*x - 19.0L/120120.0L) - 1.0L/102960.0L); + poly_val[17] = pow(x, 2)*(-1.0L/102960.0L*x + 1.0L/102960.0L); + break; + case 1: + poly_val[0] = x*((1.0L/34320.0L)*x - 1.0L/25740.0L) + 1.0L/102960.0L; + poly_val[1] = x*(-11.0L/21840.0L*x + 83.0L/120120.0L) - 8.0L/45045.0L; + poly_val[2] = x*((62.0L/15015.0L)*x - 8.0L/1365.0L) + 2.0L/1287.0L; + poly_val[3] = x*(-46.0L/2145.0L*x + 68.0L/2145.0L) - 56.0L/6435.0L; + poly_val[4] = x*((343.0L/4290.0L)*x - 266.0L/2145.0L) + 7.0L/198.0L; + poly_val[5] = x*(-7.0L/30.0L*x + 21.0L/55.0L) - 56.0L/495.0L; + poly_val[6] = x*((98.0L/165.0L)*x - 56.0L/55.0L) + 14.0L/45.0L; + poly_val[7] = x*(-26.0L/15.0L*x + 44.0L/15.0L) - 8.0L/9.0L; + poly_val[8] = x*((10.0L/3.0L)*x - 38.0L/9.0L); + poly_val[9] = x*(-10.0L/3.0L*x + 22.0L/9.0L) + 8.0L/9.0L; + poly_val[10] = x*((26.0L/15.0L)*x - 8.0L/15.0L) - 14.0L/45.0L; + poly_val[11] = x*(-98.0L/165.0L*x + 28.0L/165.0L) + 56.0L/495.0L; + poly_val[12] = x*((7.0L/30.0L)*x - 14.0L/165.0L) - 7.0L/198.0L; + poly_val[13] = x*(-343.0L/4290.0L*x + 7.0L/195.0L) + 56.0L/6435.0L; + poly_val[14] = x*((46.0L/2145.0L)*x - 8.0L/715.0L) - 2.0L/1287.0L; + poly_val[15] = x*(-62.0L/15015.0L*x + 12.0L/5005.0L) + 8.0L/45045.0L; + poly_val[16] = x*((11.0L/21840.0L)*x - 19.0L/60060.0L) - 1.0L/102960.0L; + poly_val[17] = x*(-1.0L/34320.0L*x + 1.0L/51480.0L); + break; + case 2: + poly_val[0] = (1.0L/17160.0L)*x - 1.0L/25740.0L; + poly_val[1] = -11.0L/10920.0L*x + 83.0L/120120.0L; + poly_val[2] = (124.0L/15015.0L)*x - 8.0L/1365.0L; + poly_val[3] = -92.0L/2145.0L*x + 68.0L/2145.0L; + poly_val[4] = (343.0L/2145.0L)*x - 266.0L/2145.0L; + poly_val[5] = -7.0L/15.0L*x + 21.0L/55.0L; + poly_val[6] = (196.0L/165.0L)*x - 56.0L/55.0L; + poly_val[7] = -52.0L/15.0L*x + 44.0L/15.0L; + poly_val[8] = (20.0L/3.0L)*x - 38.0L/9.0L; + poly_val[9] = -20.0L/3.0L*x + 22.0L/9.0L; + poly_val[10] = (52.0L/15.0L)*x - 8.0L/15.0L; + poly_val[11] = -196.0L/165.0L*x + 28.0L/165.0L; + poly_val[12] = (7.0L/15.0L)*x - 14.0L/165.0L; + poly_val[13] = -343.0L/2145.0L*x + 7.0L/195.0L; + poly_val[14] = (92.0L/2145.0L)*x - 8.0L/715.0L; + poly_val[15] = -124.0L/15015.0L*x + 12.0L/5005.0L; + poly_val[16] = (11.0L/10920.0L)*x - 19.0L/60060.0L; + poly_val[17] = -1.0L/17160.0L*x + 1.0L/51480.0L; + break; + } +} +void beta_n8_m2(int deriv, double x, double *poly_val) { + switch(deriv) + { + case 0: + poly_val[0] = x*(x*(x*(x*(-23.0L/823680.0L*x + 61.0L/823680.0L) - 1.0L/18304.0L) - 1.0L/823680.0L) + 1.0L/102960.0L); + poly_val[1] = x*(x*(x*(x*((3851.0L/8072064.0L)*x - 5143.0L/4036032.0L) + 12773.0L/13453440.0L) + 8.0L/315315.0L) - 8.0L/45045.0L); + poly_val[2] = x*(x*(x*(x*(-3637.0L/945945.0L*x + 33.0L/3185.0L) - 821.0L/105105.0L) - 1.0L/3861.0L) + 2.0L/1287.0L); + poly_val[3] = x*(x*(x*(x*((1877.0L/96525.0L)*x - 5116.0L/96525.0L) + 3911.0L/96525.0L) + 56.0L/32175.0L) - 56.0L/6435.0L); + poly_val[4] = x*(x*(x*(x*(-17857.0L/257400.0L*x + 49399.0L/257400.0L) - 4263.0L/28600.0L) - 7.0L/792.0L) + 7.0L/198.0L); + poly_val[5] = x*(x*(x*(x*((2219.0L/11880.0L)*x - 1043.0L/1980.0L) + 329.0L/792.0L) + 56.0L/1485.0L) - 56.0L/495.0L); + poly_val[6] = x*(x*(x*(x*(-119.0L/297.0L*x + 343.0L/297.0L) - 1351.0L/1485.0L) - 7.0L/45.0L) + 14.0L/45.0L); + poly_val[7] = x*(x*(x*(x*((31.0L/45.0L)*x - 88.0L/45.0L) + 19.0L/15.0L) + 8.0L/9.0L) - 8.0L/9.0L); + poly_val[8] = pow(x, 2)*(x*(x*(-647051.0L/705600.0L*x + 568651.0L/235200.0L) - 686753.0L/705600.0L) - 1077749.0L/705600.0L) + 1; + poly_val[9] = x*(x*(x*(x*((647051.0L/705600.0L)*x - 764651.0L/352800.0L) + 333451.0L/705600.0L) + 8.0L/9.0L) + 8.0L/9.0L); + poly_val[10] = x*(x*(x*(x*(-31.0L/45.0L*x + 67.0L/45.0L) - 1.0L/3.0L) - 7.0L/45.0L) - 14.0L/45.0L); + poly_val[11] = x*(x*(x*(x*((119.0L/297.0L)*x - 28.0L/33.0L) + 49.0L/165.0L) + 56.0L/1485.0L) + 56.0L/495.0L); + poly_val[12] = x*(x*(x*(x*(-2219.0L/11880.0L*x + 4837.0L/11880.0L) - 2093.0L/11880.0L) - 7.0L/792.0L) - 7.0L/198.0L); + poly_val[13] = x*(x*(x*(x*((17857.0L/257400.0L)*x - 1813.0L/11700.0L) + 2149.0L/28600.0L) + 56.0L/32175.0L) + 56.0L/6435.0L); + poly_val[14] = x*(x*(x*(x*(-1877.0L/96525.0L*x + 1423.0L/32175.0L) - 739.0L/32175.0L) - 1.0L/3861.0L) - 2.0L/1287.0L); + poly_val[15] = x*(x*(x*(x*((3637.0L/945945.0L)*x - 8384.0L/945945.0L) + 911.0L/189189.0L) + 8.0L/315315.0L) + 8.0L/45045.0L); + poly_val[16] = x*(x*(x*(x*(-3851.0L/8072064.0L*x + 8969.0L/8072064.0L) - 8383.0L/13453440.0L) - 1.0L/823680.0L) - 1.0L/102960.0L); + poly_val[17] = pow(x, 3)*(x*((23.0L/823680.0L)*x - 3.0L/45760.0L) + 31.0L/823680.0L); + break; + case 1: + poly_val[0] = x*(x*(x*(-23.0L/164736.0L*x + 61.0L/205920.0L) - 3.0L/18304.0L) - 1.0L/411840.0L) + 1.0L/102960.0L; + poly_val[1] = x*(x*(x*((19255.0L/8072064.0L)*x - 5143.0L/1009008.0L) + 12773.0L/4484480.0L) + 16.0L/315315.0L) - 8.0L/45045.0L; + poly_val[2] = x*(x*(x*(-3637.0L/189189.0L*x + 132.0L/3185.0L) - 821.0L/35035.0L) - 2.0L/3861.0L) + 2.0L/1287.0L; + poly_val[3] = x*(x*(x*((1877.0L/19305.0L)*x - 20464.0L/96525.0L) + 3911.0L/32175.0L) + 112.0L/32175.0L) - 56.0L/6435.0L; + poly_val[4] = x*(x*(x*(-17857.0L/51480.0L*x + 49399.0L/64350.0L) - 12789.0L/28600.0L) - 7.0L/396.0L) + 7.0L/198.0L; + poly_val[5] = x*(x*(x*((2219.0L/2376.0L)*x - 1043.0L/495.0L) + 329.0L/264.0L) + 112.0L/1485.0L) - 56.0L/495.0L; + poly_val[6] = x*(x*(x*(-595.0L/297.0L*x + 1372.0L/297.0L) - 1351.0L/495.0L) - 14.0L/45.0L) + 14.0L/45.0L; + poly_val[7] = x*(x*(x*((31.0L/9.0L)*x - 352.0L/45.0L) + 19.0L/5.0L) + 16.0L/9.0L) - 8.0L/9.0L; + poly_val[8] = x*(x*(x*(-647051.0L/141120.0L*x + 568651.0L/58800.0L) - 686753.0L/235200.0L) - 1077749.0L/352800.0L); + poly_val[9] = x*(x*(x*((647051.0L/141120.0L)*x - 764651.0L/88200.0L) + 333451.0L/235200.0L) + 16.0L/9.0L) + 8.0L/9.0L; + poly_val[10] = x*(x*(x*(-31.0L/9.0L*x + 268.0L/45.0L) - 1) - 14.0L/45.0L) - 14.0L/45.0L; + poly_val[11] = x*(x*(x*((595.0L/297.0L)*x - 112.0L/33.0L) + 49.0L/55.0L) + 112.0L/1485.0L) + 56.0L/495.0L; + poly_val[12] = x*(x*(x*(-2219.0L/2376.0L*x + 4837.0L/2970.0L) - 2093.0L/3960.0L) - 7.0L/396.0L) - 7.0L/198.0L; + poly_val[13] = x*(x*(x*((17857.0L/51480.0L)*x - 1813.0L/2925.0L) + 6447.0L/28600.0L) + 112.0L/32175.0L) + 56.0L/6435.0L; + poly_val[14] = x*(x*(x*(-1877.0L/19305.0L*x + 5692.0L/32175.0L) - 739.0L/10725.0L) - 2.0L/3861.0L) - 2.0L/1287.0L; + poly_val[15] = x*(x*(x*((3637.0L/189189.0L)*x - 33536.0L/945945.0L) + 911.0L/63063.0L) + 16.0L/315315.0L) + 8.0L/45045.0L; + poly_val[16] = x*(x*(x*(-19255.0L/8072064.0L*x + 8969.0L/2018016.0L) - 8383.0L/4484480.0L) - 1.0L/411840.0L) - 1.0L/102960.0L; + poly_val[17] = pow(x, 2)*(x*((23.0L/164736.0L)*x - 3.0L/11440.0L) + 31.0L/274560.0L); + break; + case 2: + poly_val[0] = x*(x*(-23.0L/41184.0L*x + 61.0L/68640.0L) - 3.0L/9152.0L) - 1.0L/411840.0L; + poly_val[1] = x*(x*((19255.0L/2018016.0L)*x - 5143.0L/336336.0L) + 12773.0L/2242240.0L) + 16.0L/315315.0L; + poly_val[2] = x*(x*(-14548.0L/189189.0L*x + 396.0L/3185.0L) - 1642.0L/35035.0L) - 2.0L/3861.0L; + poly_val[3] = x*(x*((7508.0L/19305.0L)*x - 20464.0L/32175.0L) + 7822.0L/32175.0L) + 112.0L/32175.0L; + poly_val[4] = x*(x*(-17857.0L/12870.0L*x + 49399.0L/21450.0L) - 12789.0L/14300.0L) - 7.0L/396.0L; + poly_val[5] = x*(x*((2219.0L/594.0L)*x - 1043.0L/165.0L) + 329.0L/132.0L) + 112.0L/1485.0L; + poly_val[6] = x*(x*(-2380.0L/297.0L*x + 1372.0L/99.0L) - 2702.0L/495.0L) - 14.0L/45.0L; + poly_val[7] = x*(x*((124.0L/9.0L)*x - 352.0L/15.0L) + 38.0L/5.0L) + 16.0L/9.0L; + poly_val[8] = x*(x*(-647051.0L/35280.0L*x + 568651.0L/19600.0L) - 686753.0L/117600.0L) - 1077749.0L/352800.0L; + poly_val[9] = x*(x*((647051.0L/35280.0L)*x - 764651.0L/29400.0L) + 333451.0L/117600.0L) + 16.0L/9.0L; + poly_val[10] = x*(x*(-124.0L/9.0L*x + 268.0L/15.0L) - 2) - 14.0L/45.0L; + poly_val[11] = x*(x*((2380.0L/297.0L)*x - 112.0L/11.0L) + 98.0L/55.0L) + 112.0L/1485.0L; + poly_val[12] = x*(x*(-2219.0L/594.0L*x + 4837.0L/990.0L) - 2093.0L/1980.0L) - 7.0L/396.0L; + poly_val[13] = x*(x*((17857.0L/12870.0L)*x - 1813.0L/975.0L) + 6447.0L/14300.0L) + 112.0L/32175.0L; + poly_val[14] = x*(x*(-7508.0L/19305.0L*x + 5692.0L/10725.0L) - 1478.0L/10725.0L) - 2.0L/3861.0L; + poly_val[15] = x*(x*((14548.0L/189189.0L)*x - 33536.0L/315315.0L) + 1822.0L/63063.0L) + 16.0L/315315.0L; + poly_val[16] = x*(x*(-19255.0L/2018016.0L*x + 8969.0L/672672.0L) - 8383.0L/2242240.0L) - 1.0L/411840.0L; + poly_val[17] = x*(x*((23.0L/41184.0L)*x - 9.0L/11440.0L) + 31.0L/137280.0L); + break; + } +} +void beta_n8_m3(int deriv, double x, double *poly_val) { + switch(deriv) + { + case 0: + poly_val[0] = x*(x*(x*(x*(x*(x*((1409119.0L/18162144000.0L)*x - 225133.0L/825552000.0L) + 982819.0L/3027024000.0L) - 280097.0L/2270268000.0L) - 266681.0L/18162144000.0L) - 1.0L/823680.0L) + 1.0L/102960.0L); + poly_val[1] = x*(x*(x*(x*(x*(x*(-3423751.0L/2594592000.0L*x + 84281297.0L/18162144000.0L) - 1239279.0L/224224000.0L) + 584159.0L/279417600.0L) + 21701.0L/81081000.0L) + 8.0L/315315.0L) - 8.0L/45045.0L); + poly_val[2] = x*(x*(x*(x*(x*(x*((1142023.0L/108108000.0L)*x - 2637197.0L/70945875.0L) + 16757869.0L/378378000.0L) - 1574527.0L/94594500.0L) - 1058149.0L/454053600.0L) - 1.0L/3861.0L) + 2.0L/1287.0L); + poly_val[3] = x*(x*(x*(x*(x*(x*(-8002307.0L/151351200.0L*x + 84540767.0L/454053600.0L) - 33583789.0L/151351200.0L) + 961571.0L/11642400.0L) + 41981.0L/3243240.0L) + 56.0L/32175.0L) - 56.0L/6435.0L); + poly_val[4] = x*(x*(x*(x*(x*(x*((6870101.0L/37065600.0L)*x - 8475431.0L/12972960.0L) + 1246801.0L/1601600.0L) - 18501503.0L/64864800.0L) - 1033649.0L/19958400.0L) - 7.0L/792.0L) + 7.0L/198.0L); + poly_val[5] = x*(x*(x*(x*(x*(x*(-16071287.0L/33264000.0L*x + 24300857.0L/14256000.0L) - 22488359.0L/11088000.0L) + 24013303.0L/33264000.0L) + 999349.0L/6237000.0L) + 56.0L/1485.0L) - 56.0L/495.0L); + poly_val[6] = x*(x*(x*(x*(x*(x*((383939.0L/396000.0L)*x - 2667076.0L/779625.0L) + 5606353.0L/1386000.0L) - 25547.0L/18900.0L) - 901349.0L/2268000.0L) - 7.0L/45.0L) + 14.0L/45.0L); + poly_val[7] = x*(x*(x*(x*(x*(x*(-24268063.0L/15876000.0L*x + 85470809.0L/15876000.0L) - 1232929.0L/196000.0L) + 31221523.0L/15876000.0L) + 372149.0L/793800.0L) + 8.0L/9.0L) - 8.0L/9.0L); + poly_val[8] = pow(x, 2)*(pow(x, 2)*(x*(x*((434251.0L/226800.0L)*x - 14219287.0L/2116800.0L) + 4099663.0L/529200.0L) - 7673891.0L/3175200.0L) - 1077749.0L/705600.0L) + 1; + poly_val[9] = x*(x*(x*(x*(x*(x*(-434251.0L/226800.0L*x + 8491067.0L/1270080.0L) - 8098063.0L/1058400.0L) + 16330063.0L/6350400.0L) - 372149.0L/793800.0L) + 8.0L/9.0L) + 8.0L/9.0L); + poly_val[10] = x*(x*(x*(x*(x*(x*((24268063.0L/15876000.0L)*x - 2637676.0L/496125.0L) + 596739.0L/98000.0L) - 4429349.0L/1984500.0L) + 901349.0L/2268000.0L) - 7.0L/45.0L) - 14.0L/45.0L); + poly_val[11] = x*(x*(x*(x*(x*(x*(-383939.0L/396000.0L*x + 7633697.0L/2268000.0L) - 32262353.0L/8316000.0L) + 2482583.0L/1663200.0L) - 999349.0L/6237000.0L) + 56.0L/1485.0L) + 56.0L/495.0L); + poly_val[12] = x*(x*(x*(x*(x*(x*((16071287.0L/33264000.0L)*x - 5978251.0L/3564000.0L) + 32375053.0L/16632000.0L) - 6319283.0L/8316000.0L) + 1033649.0L/19958400.0L) - 7.0L/792.0L) - 7.0L/198.0L); + poly_val[13] = x*(x*(x*(x*(x*(x*(-6870101.0L/37065600.0L*x + 167126329.0L/259459200.0L) - 2405369.0L/3203200.0L) + 76448243.0L/259459200.0L) - 41981.0L/3243240.0L) + 56.0L/32175.0L) + 56.0L/6435.0L); + poly_val[14] = x*(x*(x*(x*(x*(x*((8002307.0L/151351200.0L)*x - 521923.0L/2837835.0L) + 5425117.0L/25225200.0L) - 41011.0L/485100.0L) + 1058149.0L/454053600.0L) - 1.0L/3861.0L) - 2.0L/1287.0L); + poly_val[15] = x*(x*(x*(x*(x*(x*(-1142023.0L/108108000.0L*x + 83487077.0L/2270268000.0L) - 10870837.0L/252252000.0L) + 12826589.0L/756756000.0L) - 21701.0L/81081000.0L) + 8.0L/315315.0L) + 8.0L/45045.0L); + poly_val[16] = x*(x*(x*(x*(x*(x*((3423751.0L/2594592000.0L)*x - 41741251.0L/9081072000.0L) + 604847.0L/112112000.0L) - 7411.0L/3492720.0L) + 266681.0L/18162144000.0L) - 1.0L/823680.0L) - 1.0L/102960.0L); + poly_val[17] = pow(x, 4)*(x*(x*(-1409119.0L/18162144000.0L*x + 1636969.0L/6054048000.0L) - 1923619.0L/6054048000.0L) + 206279.0L/1651104000.0L); + break; + case 1: + poly_val[0] = x*(x*(x*(x*(x*((1409119.0L/2594592000.0L)*x - 225133.0L/137592000.0L) + 982819.0L/605404800.0L) - 280097.0L/567567000.0L) - 266681.0L/6054048000.0L) - 1.0L/411840.0L) + 1.0L/102960.0L; + poly_val[1] = x*(x*(x*(x*(x*(-3423751.0L/370656000.0L*x + 84281297.0L/3027024000.0L) - 1239279.0L/44844800.0L) + 584159.0L/69854400.0L) + 21701.0L/27027000.0L) + 16.0L/315315.0L) - 8.0L/45045.0L; + poly_val[2] = x*(x*(x*(x*(x*((1142023.0L/15444000.0L)*x - 5274394.0L/23648625.0L) + 16757869.0L/75675600.0L) - 1574527.0L/23648625.0L) - 1058149.0L/151351200.0L) - 2.0L/3861.0L) + 2.0L/1287.0L; + poly_val[3] = x*(x*(x*(x*(x*(-8002307.0L/21621600.0L*x + 84540767.0L/75675600.0L) - 33583789.0L/30270240.0L) + 961571.0L/2910600.0L) + 41981.0L/1081080.0L) + 112.0L/32175.0L) - 56.0L/6435.0L; + poly_val[4] = x*(x*(x*(x*(x*((48090707.0L/37065600.0L)*x - 8475431.0L/2162160.0L) + 1246801.0L/320320.0L) - 18501503.0L/16216200.0L) - 1033649.0L/6652800.0L) - 7.0L/396.0L) + 7.0L/198.0L; + poly_val[5] = x*(x*(x*(x*(x*(-16071287.0L/4752000.0L*x + 24300857.0L/2376000.0L) - 22488359.0L/2217600.0L) + 24013303.0L/8316000.0L) + 999349.0L/2079000.0L) + 112.0L/1485.0L) - 56.0L/495.0L; + poly_val[6] = x*(x*(x*(x*(x*((2687573.0L/396000.0L)*x - 5334152.0L/259875.0L) + 5606353.0L/277200.0L) - 25547.0L/4725.0L) - 901349.0L/756000.0L) - 14.0L/45.0L) + 14.0L/45.0L; + poly_val[7] = x*(x*(x*(x*(x*(-24268063.0L/2268000.0L*x + 85470809.0L/2646000.0L) - 1232929.0L/39200.0L) + 31221523.0L/3969000.0L) + 372149.0L/264600.0L) + 16.0L/9.0L) - 8.0L/9.0L; + poly_val[8] = x*(pow(x, 2)*(x*(x*((434251.0L/32400.0L)*x - 14219287.0L/352800.0L) + 4099663.0L/105840.0L) - 7673891.0L/793800.0L) - 1077749.0L/352800.0L); + poly_val[9] = x*(x*(x*(x*(x*(-434251.0L/32400.0L*x + 8491067.0L/211680.0L) - 8098063.0L/211680.0L) + 16330063.0L/1587600.0L) - 372149.0L/264600.0L) + 16.0L/9.0L) + 8.0L/9.0L; + poly_val[10] = x*(x*(x*(x*(x*((24268063.0L/2268000.0L)*x - 5275352.0L/165375.0L) + 596739.0L/19600.0L) - 4429349.0L/496125.0L) + 901349.0L/756000.0L) - 14.0L/45.0L) - 14.0L/45.0L; + poly_val[11] = x*(x*(x*(x*(x*(-2687573.0L/396000.0L*x + 7633697.0L/378000.0L) - 32262353.0L/1663200.0L) + 2482583.0L/415800.0L) - 999349.0L/2079000.0L) + 112.0L/1485.0L) + 56.0L/495.0L; + poly_val[12] = x*(x*(x*(x*(x*((16071287.0L/4752000.0L)*x - 5978251.0L/594000.0L) + 32375053.0L/3326400.0L) - 6319283.0L/2079000.0L) + 1033649.0L/6652800.0L) - 7.0L/396.0L) - 7.0L/198.0L; + poly_val[13] = x*(x*(x*(x*(x*(-48090707.0L/37065600.0L*x + 167126329.0L/43243200.0L) - 2405369.0L/640640.0L) + 76448243.0L/64864800.0L) - 41981.0L/1081080.0L) + 112.0L/32175.0L) + 56.0L/6435.0L; + poly_val[14] = x*(x*(x*(x*(x*((8002307.0L/21621600.0L)*x - 1043846.0L/945945.0L) + 5425117.0L/5045040.0L) - 41011.0L/121275.0L) + 1058149.0L/151351200.0L) - 2.0L/3861.0L) - 2.0L/1287.0L; + poly_val[15] = x*(x*(x*(x*(x*(-1142023.0L/15444000.0L*x + 83487077.0L/378378000.0L) - 10870837.0L/50450400.0L) + 12826589.0L/189189000.0L) - 21701.0L/27027000.0L) + 16.0L/315315.0L) + 8.0L/45045.0L; + poly_val[16] = x*(x*(x*(x*(x*((3423751.0L/370656000.0L)*x - 41741251.0L/1513512000.0L) + 604847.0L/22422400.0L) - 7411.0L/873180.0L) + 266681.0L/6054048000.0L) - 1.0L/411840.0L) - 1.0L/102960.0L; + poly_val[17] = pow(x, 3)*(x*(x*(-1409119.0L/2594592000.0L*x + 1636969.0L/1009008000.0L) - 1923619.0L/1210809600.0L) + 206279.0L/412776000.0L); + break; + case 2: + poly_val[0] = x*(x*(x*(x*((1409119.0L/432432000.0L)*x - 225133.0L/27518400.0L) + 982819.0L/151351200.0L) - 280097.0L/189189000.0L) - 266681.0L/3027024000.0L) - 1.0L/411840.0L; + poly_val[1] = x*(x*(x*(x*(-3423751.0L/61776000.0L*x + 84281297.0L/605404800.0L) - 1239279.0L/11211200.0L) + 584159.0L/23284800.0L) + 21701.0L/13513500.0L) + 16.0L/315315.0L; + poly_val[2] = x*(x*(x*(x*((1142023.0L/2574000.0L)*x - 5274394.0L/4729725.0L) + 16757869.0L/18918900.0L) - 1574527.0L/7882875.0L) - 1058149.0L/75675600.0L) - 2.0L/3861.0L; + poly_val[3] = x*(x*(x*(x*(-8002307.0L/3603600.0L*x + 84540767.0L/15135120.0L) - 33583789.0L/7567560.0L) + 961571.0L/970200.0L) + 41981.0L/540540.0L) + 112.0L/32175.0L; + poly_val[4] = x*(x*(x*(x*((48090707.0L/6177600.0L)*x - 8475431.0L/432432.0L) + 1246801.0L/80080.0L) - 18501503.0L/5405400.0L) - 1033649.0L/3326400.0L) - 7.0L/396.0L; + poly_val[5] = x*(x*(x*(x*(-16071287.0L/792000.0L*x + 24300857.0L/475200.0L) - 22488359.0L/554400.0L) + 24013303.0L/2772000.0L) + 999349.0L/1039500.0L) + 112.0L/1485.0L; + poly_val[6] = x*(x*(x*(x*((2687573.0L/66000.0L)*x - 5334152.0L/51975.0L) + 5606353.0L/69300.0L) - 25547.0L/1575.0L) - 901349.0L/378000.0L) - 14.0L/45.0L; + poly_val[7] = x*(x*(x*(x*(-24268063.0L/378000.0L*x + 85470809.0L/529200.0L) - 1232929.0L/9800.0L) + 31221523.0L/1323000.0L) + 372149.0L/132300.0L) + 16.0L/9.0L; + poly_val[8] = pow(x, 2)*(x*(x*((434251.0L/5400.0L)*x - 14219287.0L/70560.0L) + 4099663.0L/26460.0L) - 7673891.0L/264600.0L) - 1077749.0L/352800.0L; + poly_val[9] = x*(x*(x*(x*(-434251.0L/5400.0L*x + 8491067.0L/42336.0L) - 8098063.0L/52920.0L) + 16330063.0L/529200.0L) - 372149.0L/132300.0L) + 16.0L/9.0L; + poly_val[10] = x*(x*(x*(x*((24268063.0L/378000.0L)*x - 5275352.0L/33075.0L) + 596739.0L/4900.0L) - 4429349.0L/165375.0L) + 901349.0L/378000.0L) - 14.0L/45.0L; + poly_val[11] = x*(x*(x*(x*(-2687573.0L/66000.0L*x + 7633697.0L/75600.0L) - 32262353.0L/415800.0L) + 2482583.0L/138600.0L) - 999349.0L/1039500.0L) + 112.0L/1485.0L; + poly_val[12] = x*(x*(x*(x*((16071287.0L/792000.0L)*x - 5978251.0L/118800.0L) + 32375053.0L/831600.0L) - 6319283.0L/693000.0L) + 1033649.0L/3326400.0L) - 7.0L/396.0L; + poly_val[13] = x*(x*(x*(x*(-48090707.0L/6177600.0L*x + 167126329.0L/8648640.0L) - 2405369.0L/160160.0L) + 76448243.0L/21621600.0L) - 41981.0L/540540.0L) + 112.0L/32175.0L; + poly_val[14] = x*(x*(x*(x*((8002307.0L/3603600.0L)*x - 1043846.0L/189189.0L) + 5425117.0L/1261260.0L) - 41011.0L/40425.0L) + 1058149.0L/75675600.0L) - 2.0L/3861.0L; + poly_val[15] = x*(x*(x*(x*(-1142023.0L/2574000.0L*x + 83487077.0L/75675600.0L) - 10870837.0L/12612600.0L) + 12826589.0L/63063000.0L) - 21701.0L/13513500.0L) + 16.0L/315315.0L; + poly_val[16] = x*(x*(x*(x*((3423751.0L/61776000.0L)*x - 41741251.0L/302702400.0L) + 604847.0L/5605600.0L) - 7411.0L/291060.0L) + 266681.0L/3027024000.0L) - 1.0L/411840.0L; + poly_val[17] = pow(x, 2)*(x*(x*(-1409119.0L/432432000.0L*x + 1636969.0L/201801600.0L) - 1923619.0L/302702400.0L) + 206279.0L/137592000.0L); + break; + } +} +void beta_n8_m4(int deriv, double x, double *poly_val) { + switch(deriv) + { + case 0: + poly_val[0] = x*(x*(x*(x*(x*(x*(x*(x*(-1101377.0L/4402944000.0L*x + 14870423.0L/13208832000.0L) - 159755.0L/83026944.0L) + 1953779.0L/1320883200.0L) - 952949.0L/2235340800.0L) + 266681.0L/145297152000.0L) - 266681.0L/18162144000.0L) - 1.0L/823680.0L) + 1.0L/102960.0L); + poly_val[1] = x*(x*(x*(x*(x*(x*(x*(x*((617882417.0L/145297152000.0L)*x - 4729333.0L/247104000.0L) + 43207609.0L/1320883200.0L) - 913297027.0L/36324288000.0L) + 38966747.0L/5381376000.0L) - 21701.0L/567567000.0L) + 21701.0L/81081000.0L) + 8.0L/315315.0L) - 8.0L/45045.0L); + poly_val[2] = x*(x*(x*(x*(x*(x*(x*(x*(-463422059.0L/13621608000.0L*x + 189610079.0L/1238328000.0L) - 891181997.0L/3405402000.0L) + 273948221.0L/1362160800.0L) - 788229307.0L/13621608000.0L) + 1058149.0L/2724321600.0L) - 1058149.0L/454053600.0L) - 1.0L/3861.0L) + 2.0L/1287.0L); + poly_val[3] = x*(x*(x*(x*(x*(x*(x*(x*((42130577.0L/247665600.0L)*x - 18961871.0L/24766560.0L) + 7617133.0L/5821200.0L) - 1369431659.0L/1362160800.0L) + 787169833.0L/2724321600.0L) - 41981.0L/16216200.0L) + 41981.0L/3243240.0L) + 56.0L/32175.0L) - 56.0L/6435.0L); + poly_val[4] = x*(x*(x*(x*(x*(x*(x*(x*(-617941241.0L/1037836800.0L*x + 34336129.0L/12812800.0L) - 216051389.0L/47174400.0L) + 1825249697.0L/518918400.0L) - 349286711.0L/345945600.0L) + 1033649.0L/79833600.0L) - 1033649.0L/19958400.0L) - 7.0L/792.0L) + 7.0L/198.0L); + poly_val[5] = x*(x*(x*(x*(x*(x*(x*(x*((264845533.0L/171072000.0L)*x - 149002003.0L/21384000.0L) + 1425869503.0L/119750400.0L) - 177679.0L/19440.0L) + 627784987.0L/239500800.0L) - 999349.0L/18711000.0L) + 999349.0L/6237000.0L) + 56.0L/1485.0L) - 56.0L/495.0L); + poly_val[6] = x*(x*(x*(x*(x*(x*(x*(x*(-42136729.0L/13608000.0L*x + 189644803.0L/13608000.0L) - 5940337.0L/249480.0L) + 1367199013.0L/74844000.0L) - 786324923.0L/149688000.0L) + 901349.0L/4536000.0L) - 901349.0L/2268000.0L) - 7.0L/45.0L) + 14.0L/45.0L); + poly_val[7] = x*(x*(x*(x*(x*(x*(x*(x*((154508033.0L/31752000.0L)*x - 115893523.0L/5292000.0L) + 2651402.0L/70875.0L) - 18221291.0L/635040.0L) + 88029853.0L/10584000.0L) - 372149.0L/793800.0L) + 372149.0L/793800.0L) + 8.0L/9.0L) - 8.0L/9.0L); + poly_val[8] = pow(x, 2)*(pow(x, 2)*(x*(x*(x*(x*(-618047281.0L/101606400.0L*x + 5297753.0L/193536.0L) - 2375132021.0L/50803200.0L) + 1822225277.0L/50803200.0L) - 355812263.0L/33868800.0L) + 63566689.0L/101606400.0L) - 1077749.0L/705600.0L) + 1; + poly_val[9] = x*(x*(x*(x*(x*(x*(x*(x*((618047281.0L/101606400.0L)*x - 231758767.0L/8467200.0L) + 2374701779.0L/50803200.0L) - 303866687.0L/8467200.0L) + 119588617.0L/11289600.0L) - 372149.0L/793800.0L) - 372149.0L/793800.0L) + 8.0L/9.0L) + 8.0L/9.0L); + poly_val[10] = x*(x*(x*(x*(x*(x*(x*(x*(-154508033.0L/31752000.0L*x + 231737053.0L/10584000.0L) - 59361409.0L/1587600.0L) + 91229503.0L/3175200.0L) - 5997349.0L/705600.0L) + 901349.0L/4536000.0L) + 901349.0L/2268000.0L) - 7.0L/45.0L) - 14.0L/45.0L); + poly_val[11] = x*(x*(x*(x*(x*(x*(x*(x*((42136729.0L/13608000.0L)*x - 94792879.0L/6804000.0L) + 178080211.0L/7484400.0L) - 124488893.0L/6804000.0L) + 808490713.0L/149688000.0L) - 999349.0L/18711000.0L) - 999349.0L/6237000.0L) + 56.0L/1485.0L) + 56.0L/495.0L); + poly_val[12] = x*(x*(x*(x*(x*(x*(x*(x*(-264845533.0L/171072000.0L*x + 1191593773.0L/171072000.0L) - 23984633.0L/2016000.0L) + 156569593.0L/17107200.0L) - 3224840863.0L/1197504000.0L) + 1033649.0L/79833600.0L) + 1033649.0L/19958400.0L) - 7.0L/792.0L) - 7.0L/198.0L); + poly_val[13] = x*(x*(x*(x*(x*(x*(x*(x*((617941241.0L/1037836800.0L)*x - 351041.0L/131040.0L) + 2374601821.0L/518918400.0L) - 456767273.0L/129729600.0L) + 27500531.0L/26611200.0L) - 41981.0L/16216200.0L) - 41981.0L/3243240.0L) + 56.0L/32175.0L) + 56.0L/6435.0L); + poly_val[14] = x*(x*(x*(x*(x*(x*(x*(x*(-42130577.0L/247665600.0L*x + 189556483.0L/247665600.0L) - 7951072.0L/6081075.0L) + 1370477429.0L/1362160800.0L) - 803027411.0L/2724321600.0L) + 1058149.0L/2724321600.0L) + 1058149.0L/454053600.0L) - 1.0L/3861.0L) - 2.0L/1287.0L); + poly_val[15] = x*(x*(x*(x*(x*(x*(x*(x*((463422059.0L/13621608000.0L)*x - 148934833.0L/972972000.0L) + 2698663.0L/10319400.0L) - 274116233.0L/1362160800.0L) + 160398913.0L/2724321600.0L) - 21701.0L/567567000.0L) - 21701.0L/81081000.0L) + 8.0L/315315.0L) + 8.0L/45045.0L); + poly_val[16] = x*(x*(x*(x*(x*(x*(x*(x*(-617882417.0L/145297152000.0L*x + 926697983.0L/48432384000.0L) - 36537089.0L/1117670400.0L) + 166138879.0L/6604416000.0L) - 356085943.0L/48432384000.0L) + 266681.0L/145297152000.0L) + 266681.0L/18162144000.0L) - 1.0L/823680.0L) - 1.0L/102960.0L); + poly_val[17] = pow(x, 5)*(x*(x*(x*((1101377.0L/4402944000.0L)*x - 3716689.0L/3302208000.0L) + 46568317.0L/24216192000.0L) - 1194499.0L/807206400.0L) + 62788793.0L/145297152000.0L); + break; + case 1: + poly_val[0] = x*(x*(x*(x*(x*(x*(x*(-1101377.0L/489216000.0L*x + 14870423.0L/1651104000.0L) - 159755.0L/11860992.0L) + 1953779.0L/220147200.0L) - 952949.0L/447068160.0L) + 266681.0L/36324288000.0L) - 266681.0L/6054048000.0L) - 1.0L/411840.0L) + 1.0L/102960.0L; + poly_val[1] = x*(x*(x*(x*(x*(x*(x*((617882417.0L/16144128000.0L)*x - 4729333.0L/30888000.0L) + 43207609.0L/188697600.0L) - 913297027.0L/6054048000.0L) + 38966747.0L/1076275200.0L) - 21701.0L/141891750.0L) + 21701.0L/27027000.0L) + 16.0L/315315.0L) - 8.0L/45045.0L; + poly_val[2] = x*(x*(x*(x*(x*(x*(x*(-463422059.0L/1513512000.0L*x + 189610079.0L/154791000.0L) - 891181997.0L/486486000.0L) + 273948221.0L/227026800.0L) - 788229307.0L/2724321600.0L) + 1058149.0L/681080400.0L) - 1058149.0L/151351200.0L) - 2.0L/3861.0L) + 2.0L/1287.0L; + poly_val[3] = x*(x*(x*(x*(x*(x*(x*((42130577.0L/27518400.0L)*x - 18961871.0L/3095820.0L) + 7617133.0L/831600.0L) - 1369431659.0L/227026800.0L) + 787169833.0L/544864320.0L) - 41981.0L/4054050.0L) + 41981.0L/1081080.0L) + 112.0L/32175.0L) - 56.0L/6435.0L; + poly_val[4] = x*(x*(x*(x*(x*(x*(x*(-617941241.0L/115315200.0L*x + 34336129.0L/1601600.0L) - 216051389.0L/6739200.0L) + 1825249697.0L/86486400.0L) - 349286711.0L/69189120.0L) + 1033649.0L/19958400.0L) - 1033649.0L/6652800.0L) - 7.0L/396.0L) + 7.0L/198.0L; + poly_val[5] = x*(x*(x*(x*(x*(x*(x*((264845533.0L/19008000.0L)*x - 149002003.0L/2673000.0L) + 1425869503.0L/17107200.0L) - 177679.0L/3240.0L) + 627784987.0L/47900160.0L) - 999349.0L/4677750.0L) + 999349.0L/2079000.0L) + 112.0L/1485.0L) - 56.0L/495.0L; + poly_val[6] = x*(x*(x*(x*(x*(x*(x*(-42136729.0L/1512000.0L*x + 189644803.0L/1701000.0L) - 5940337.0L/35640.0L) + 1367199013.0L/12474000.0L) - 786324923.0L/29937600.0L) + 901349.0L/1134000.0L) - 901349.0L/756000.0L) - 14.0L/45.0L) + 14.0L/45.0L; + poly_val[7] = x*(x*(x*(x*(x*(x*(x*((154508033.0L/3528000.0L)*x - 115893523.0L/661500.0L) + 2651402.0L/10125.0L) - 18221291.0L/105840.0L) + 88029853.0L/2116800.0L) - 372149.0L/198450.0L) + 372149.0L/264600.0L) + 16.0L/9.0L) - 8.0L/9.0L; + poly_val[8] = x*(pow(x, 2)*(x*(x*(x*(x*(-618047281.0L/11289600.0L*x + 5297753.0L/24192.0L) - 2375132021.0L/7257600.0L) + 1822225277.0L/8467200.0L) - 355812263.0L/6773760.0L) + 63566689.0L/25401600.0L) - 1077749.0L/352800.0L); + poly_val[9] = x*(x*(x*(x*(x*(x*(x*((618047281.0L/11289600.0L)*x - 231758767.0L/1058400.0L) + 2374701779.0L/7257600.0L) - 303866687.0L/1411200.0L) + 119588617.0L/2257920.0L) - 372149.0L/198450.0L) - 372149.0L/264600.0L) + 16.0L/9.0L) + 8.0L/9.0L; + poly_val[10] = x*(x*(x*(x*(x*(x*(x*(-154508033.0L/3528000.0L*x + 231737053.0L/1323000.0L) - 59361409.0L/226800.0L) + 91229503.0L/529200.0L) - 5997349.0L/141120.0L) + 901349.0L/1134000.0L) + 901349.0L/756000.0L) - 14.0L/45.0L) - 14.0L/45.0L; + poly_val[11] = x*(x*(x*(x*(x*(x*(x*((42136729.0L/1512000.0L)*x - 94792879.0L/850500.0L) + 178080211.0L/1069200.0L) - 124488893.0L/1134000.0L) + 808490713.0L/29937600.0L) - 999349.0L/4677750.0L) - 999349.0L/2079000.0L) + 112.0L/1485.0L) + 56.0L/495.0L; + poly_val[12] = x*(x*(x*(x*(x*(x*(x*(-264845533.0L/19008000.0L*x + 1191593773.0L/21384000.0L) - 23984633.0L/288000.0L) + 156569593.0L/2851200.0L) - 3224840863.0L/239500800.0L) + 1033649.0L/19958400.0L) + 1033649.0L/6652800.0L) - 7.0L/396.0L) - 7.0L/198.0L; + poly_val[13] = x*(x*(x*(x*(x*(x*(x*((617941241.0L/115315200.0L)*x - 351041.0L/16380.0L) + 2374601821.0L/74131200.0L) - 456767273.0L/21621600.0L) + 27500531.0L/5322240.0L) - 41981.0L/4054050.0L) - 41981.0L/1081080.0L) + 112.0L/32175.0L) + 56.0L/6435.0L; + poly_val[14] = x*(x*(x*(x*(x*(x*(x*(-42130577.0L/27518400.0L*x + 189556483.0L/30958200.0L) - 7951072.0L/868725.0L) + 1370477429.0L/227026800.0L) - 803027411.0L/544864320.0L) + 1058149.0L/681080400.0L) + 1058149.0L/151351200.0L) - 2.0L/3861.0L) - 2.0L/1287.0L; + poly_val[15] = x*(x*(x*(x*(x*(x*(x*((463422059.0L/1513512000.0L)*x - 148934833.0L/121621500.0L) + 2698663.0L/1474200.0L) - 274116233.0L/227026800.0L) + 160398913.0L/544864320.0L) - 21701.0L/141891750.0L) - 21701.0L/27027000.0L) + 16.0L/315315.0L) + 8.0L/45045.0L; + poly_val[16] = x*(x*(x*(x*(x*(x*(x*(-617882417.0L/16144128000.0L*x + 926697983.0L/6054048000.0L) - 36537089.0L/159667200.0L) + 166138879.0L/1100736000.0L) - 356085943.0L/9686476800.0L) + 266681.0L/36324288000.0L) + 266681.0L/6054048000.0L) - 1.0L/411840.0L) - 1.0L/102960.0L; + poly_val[17] = pow(x, 4)*(x*(x*(x*((1101377.0L/489216000.0L)*x - 3716689.0L/412776000.0L) + 46568317.0L/3459456000.0L) - 1194499.0L/134534400.0L) + 62788793.0L/29059430400.0L); + break; + case 2: + poly_val[0] = x*(x*(x*(x*(x*(x*(-1101377.0L/61152000.0L*x + 14870423.0L/235872000.0L) - 159755.0L/1976832.0L) + 1953779.0L/44029440.0L) - 952949.0L/111767040.0L) + 266681.0L/12108096000.0L) - 266681.0L/3027024000.0L) - 1.0L/411840.0L; + poly_val[1] = x*(x*(x*(x*(x*(x*((617882417.0L/2018016000.0L)*x - 33105331.0L/30888000.0L) + 43207609.0L/31449600.0L) - 913297027.0L/1210809600.0L) + 38966747.0L/269068800.0L) - 21701.0L/47297250.0L) + 21701.0L/13513500.0L) + 16.0L/315315.0L; + poly_val[2] = x*(x*(x*(x*(x*(x*(-463422059.0L/189189000.0L*x + 189610079.0L/22113000.0L) - 891181997.0L/81081000.0L) + 273948221.0L/45405360.0L) - 788229307.0L/681080400.0L) + 1058149.0L/227026800.0L) - 1058149.0L/75675600.0L) - 2.0L/3861.0L; + poly_val[3] = x*(x*(x*(x*(x*(x*((42130577.0L/3439800.0L)*x - 18961871.0L/442260.0L) + 7617133.0L/138600.0L) - 1369431659.0L/45405360.0L) + 787169833.0L/136216080.0L) - 41981.0L/1351350.0L) + 41981.0L/540540.0L) + 112.0L/32175.0L; + poly_val[4] = x*(x*(x*(x*(x*(x*(-617941241.0L/14414400.0L*x + 34336129.0L/228800.0L) - 216051389.0L/1123200.0L) + 1825249697.0L/17297280.0L) - 349286711.0L/17297280.0L) + 1033649.0L/6652800.0L) - 1033649.0L/3326400.0L) - 7.0L/396.0L; + poly_val[5] = x*(x*(x*(x*(x*(x*((264845533.0L/2376000.0L)*x - 1043014021.0L/2673000.0L) + 1425869503.0L/2851200.0L) - 177679.0L/648.0L) + 627784987.0L/11975040.0L) - 999349.0L/1559250.0L) + 999349.0L/1039500.0L) + 112.0L/1485.0L; + poly_val[6] = x*(x*(x*(x*(x*(x*(-42136729.0L/189000.0L*x + 189644803.0L/243000.0L) - 5940337.0L/5940.0L) + 1367199013.0L/2494800.0L) - 786324923.0L/7484400.0L) + 901349.0L/378000.0L) - 901349.0L/378000.0L) - 14.0L/45.0L; + poly_val[7] = x*(x*(x*(x*(x*(x*((154508033.0L/441000.0L)*x - 115893523.0L/94500.0L) + 5302804.0L/3375.0L) - 18221291.0L/21168.0L) + 88029853.0L/529200.0L) - 372149.0L/66150.0L) + 372149.0L/132300.0L) + 16.0L/9.0L; + poly_val[8] = pow(x, 2)*(x*(x*(x*(x*(-618047281.0L/1411200.0L*x + 5297753.0L/3456.0L) - 2375132021.0L/1209600.0L) + 1822225277.0L/1693440.0L) - 355812263.0L/1693440.0L) + 63566689.0L/8467200.0L) - 1077749.0L/352800.0L; + poly_val[9] = x*(x*(x*(x*(x*(x*((618047281.0L/1411200.0L)*x - 231758767.0L/151200.0L) + 2374701779.0L/1209600.0L) - 303866687.0L/282240.0L) + 119588617.0L/564480.0L) - 372149.0L/66150.0L) - 372149.0L/132300.0L) + 16.0L/9.0L; + poly_val[10] = x*(x*(x*(x*(x*(x*(-154508033.0L/441000.0L*x + 231737053.0L/189000.0L) - 59361409.0L/37800.0L) + 91229503.0L/105840.0L) - 5997349.0L/35280.0L) + 901349.0L/378000.0L) + 901349.0L/378000.0L) - 14.0L/45.0L; + poly_val[11] = x*(x*(x*(x*(x*(x*((42136729.0L/189000.0L)*x - 94792879.0L/121500.0L) + 178080211.0L/178200.0L) - 124488893.0L/226800.0L) + 808490713.0L/7484400.0L) - 999349.0L/1559250.0L) - 999349.0L/1039500.0L) + 112.0L/1485.0L; + poly_val[12] = x*(x*(x*(x*(x*(x*(-264845533.0L/2376000.0L*x + 8341156411.0L/21384000.0L) - 23984633.0L/48000.0L) + 156569593.0L/570240.0L) - 3224840863.0L/59875200.0L) + 1033649.0L/6652800.0L) + 1033649.0L/3326400.0L) - 7.0L/396.0L; + poly_val[13] = x*(x*(x*(x*(x*(x*((617941241.0L/14414400.0L)*x - 351041.0L/2340.0L) + 2374601821.0L/12355200.0L) - 456767273.0L/4324320.0L) + 27500531.0L/1330560.0L) - 41981.0L/1351350.0L) - 41981.0L/540540.0L) + 112.0L/32175.0L; + poly_val[14] = x*(x*(x*(x*(x*(x*(-42130577.0L/3439800.0L*x + 189556483.0L/4422600.0L) - 15902144.0L/289575.0L) + 1370477429.0L/45405360.0L) - 803027411.0L/136216080.0L) + 1058149.0L/227026800.0L) + 1058149.0L/75675600.0L) - 2.0L/3861.0L; + poly_val[15] = x*(x*(x*(x*(x*(x*((463422059.0L/189189000.0L)*x - 148934833.0L/17374500.0L) + 2698663.0L/245700.0L) - 274116233.0L/45405360.0L) + 160398913.0L/136216080.0L) - 21701.0L/47297250.0L) - 21701.0L/13513500.0L) + 16.0L/315315.0L; + poly_val[16] = x*(x*(x*(x*(x*(x*(-617882417.0L/2018016000.0L*x + 926697983.0L/864864000.0L) - 36537089.0L/26611200.0L) + 166138879.0L/220147200.0L) - 356085943.0L/2421619200.0L) + 266681.0L/12108096000.0L) + 266681.0L/3027024000.0L) - 1.0L/411840.0L; + poly_val[17] = pow(x, 3)*(x*(x*(x*((1101377.0L/61152000.0L)*x - 3716689.0L/58968000.0L) + 46568317.0L/576576000.0L) - 1194499.0L/26906880.0L) + 62788793.0L/7264857600.0L); + break; + } +} diff --git a/bfps/cpp/spline_n8.hpp b/bfps/cpp/spline_n8.hpp new file mode 100644 index 0000000000000000000000000000000000000000..39b2e03ad321b495a1da04f1fae8526daa4d3536 --- /dev/null +++ b/bfps/cpp/spline_n8.hpp @@ -0,0 +1,38 @@ +/********************************************************************** +* * +* Copyright 2017 Max Planck Institute * +* for Dynamics and Self-Organization * +* * +* This file is part of bfps. * +* * +* bfps is free software: you can redistribute it and/or modify * +* it under the terms of the GNU General Public License as published * +* by the Free Software Foundation, either version 3 of the License, * +* or (at your option) any later version. * +* * +* bfps is distributed in the hope that it will be useful, * +* but WITHOUT ANY WARRANTY; without even the implied warranty of * +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * +* GNU General Public License for more details. * +* * +* You should have received a copy of the GNU General Public License * +* along with bfps. If not, see <http://www.gnu.org/licenses/> * +* * +* Contact: Cristian.Lalescu@ds.mpg.de * +* * +**********************************************************************/ + + + +#ifndef SPLINE_N8 + +#define SPLINE_N8 + +void beta_n8_m0(const int deriv, const double x, double *__restrict__ poly_val); +void beta_n8_m1(const int deriv, const double x, double *__restrict__ poly_val); +void beta_n8_m2(const int deriv, const double x, double *__restrict__ poly_val); +void beta_n8_m3(const int deriv, const double x, double *__restrict__ poly_val); +void beta_n8_m4(const int deriv, const double x, double *__restrict__ poly_val); + +#endif//SPLINE_N8 + diff --git a/bfps/cpp/spline_n9.cpp b/bfps/cpp/spline_n9.cpp new file mode 100644 index 0000000000000000000000000000000000000000..4587a3f91c27937f553430463d19ea2221a173dc --- /dev/null +++ b/bfps/cpp/spline_n9.cpp @@ -0,0 +1,384 @@ +/********************************************************************** +* * +* Copyright 2015 Max Planck Institute * +* for Dynamics and Self-Organization * +* * +* This file is part of bfps. * +* * +* bfps is free software: you can redistribute it and/or modify * +* it under the terms of the GNU General Public License as published * +* by the Free Software Foundation, either version 3 of the License, * +* or (at your option) any later version. * +* * +* bfps is distributed in the hope that it will be useful, * +* but WITHOUT ANY WARRANTY; without even the implied warranty of * +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * +* GNU General Public License for more details. * +* * +* You should have received a copy of the GNU General Public License * +* along with bfps. If not, see <http://www.gnu.org/licenses/> * +* * +* Contact: Cristian.Lalescu@ds.mpg.de * +* * +**********************************************************************/ + + + +#include "spline_n9.hpp" +#include <cmath> + +void beta_n9_m0(int deriv, double x, double *poly_val) { + switch(deriv) + { + case 0: + poly_val[0] = 0; + poly_val[1] = 0; + poly_val[2] = 0; + poly_val[3] = 0; + poly_val[4] = 0; + poly_val[5] = 0; + poly_val[6] = 0; + poly_val[7] = 0; + poly_val[8] = 0; + poly_val[9] = -x + 1; + poly_val[10] = x; + poly_val[11] = 0; + poly_val[12] = 0; + poly_val[13] = 0; + poly_val[14] = 0; + poly_val[15] = 0; + poly_val[16] = 0; + poly_val[17] = 0; + poly_val[18] = 0; + poly_val[19] = 0; + break; + case 1: + poly_val[0] = 0; + poly_val[1] = 0; + poly_val[2] = 0; + poly_val[3] = 0; + poly_val[4] = 0; + poly_val[5] = 0; + poly_val[6] = 0; + poly_val[7] = 0; + poly_val[8] = 0; + poly_val[9] = -1; + poly_val[10] = 1; + poly_val[11] = 0; + poly_val[12] = 0; + poly_val[13] = 0; + poly_val[14] = 0; + poly_val[15] = 0; + poly_val[16] = 0; + poly_val[17] = 0; + poly_val[18] = 0; + poly_val[19] = 0; + break; + case 2: + poly_val[0] = 0; + poly_val[1] = 0; + poly_val[2] = 0; + poly_val[3] = 0; + poly_val[4] = 0; + poly_val[5] = 0; + poly_val[6] = 0; + poly_val[7] = 0; + poly_val[8] = 0; + poly_val[9] = 0; + poly_val[10] = 0; + poly_val[11] = 0; + poly_val[12] = 0; + poly_val[13] = 0; + poly_val[14] = 0; + poly_val[15] = 0; + poly_val[16] = 0; + poly_val[17] = 0; + poly_val[18] = 0; + poly_val[19] = 0; + break; + } +} +void beta_n9_m1(int deriv, double x, double *poly_val) { + switch(deriv) + { + case 0: + poly_val[0] = x*(x*(-1.0L/437580.0L*x + 1.0L/218790.0L) - 1.0L/437580.0L); + poly_val[1] = x*(x*((7.0L/159120.0L)*x - 79.0L/875160.0L) + 9.0L/194480.0L); + poly_val[2] = x*(x*(-549.0L/1361360.0L*x + 1161.0L/1361360.0L) - 9.0L/20020.0L); + poly_val[3] = x*(x*((47.0L/20020.0L)*x - 103.0L/20020.0L) + 2.0L/715.0L); + poly_val[4] = x*(x*(-7.0L/715.0L*x + 16.0L/715.0L) - 9.0L/715.0L); + poly_val[5] = x*(x*((9.0L/286.0L)*x - 54.0L/715.0L) + 63.0L/1430.0L); + poly_val[6] = x*(x*(-119.0L/1430.0L*x + 301.0L/1430.0L) - 7.0L/55.0L); + poly_val[7] = x*(x*((1.0L/5.0L)*x - 29.0L/55.0L) + 18.0L/55.0L); + poly_val[8] = x*(x*(-63.0L/110.0L*x + 81.0L/55.0L) - 9.0L/10.0L); + poly_val[9] = pow(x, 2)*((11.0L/10.0L)*x - 21.0L/10.0L) + 1; + poly_val[10] = x*(x*(-11.0L/10.0L*x + 6.0L/5.0L) + 9.0L/10.0L); + poly_val[11] = x*(x*((63.0L/110.0L)*x - 27.0L/110.0L) - 18.0L/55.0L); + poly_val[12] = x*(x*(-1.0L/5.0L*x + 4.0L/55.0L) + 7.0L/55.0L); + poly_val[13] = x*(x*((119.0L/1430.0L)*x - 28.0L/715.0L) - 63.0L/1430.0L); + poly_val[14] = x*(x*(-9.0L/286.0L*x + 27.0L/1430.0L) + 9.0L/715.0L); + poly_val[15] = x*(x*((7.0L/715.0L)*x - 1.0L/143.0L) - 2.0L/715.0L); + poly_val[16] = x*(x*(-47.0L/20020.0L*x + 19.0L/10010.0L) + 9.0L/20020.0L); + poly_val[17] = x*(x*((549.0L/1361360.0L)*x - 243.0L/680680.0L) - 9.0L/194480.0L); + poly_val[18] = x*(x*(-7.0L/159120.0L*x + 73.0L/1750320.0L) + 1.0L/437580.0L); + poly_val[19] = pow(x, 2)*((1.0L/437580.0L)*x - 1.0L/437580.0L); + break; + case 1: + poly_val[0] = x*(-1.0L/145860.0L*x + 1.0L/109395.0L) - 1.0L/437580.0L; + poly_val[1] = x*((7.0L/53040.0L)*x - 79.0L/437580.0L) + 9.0L/194480.0L; + poly_val[2] = x*(-1647.0L/1361360.0L*x + 1161.0L/680680.0L) - 9.0L/20020.0L; + poly_val[3] = x*((141.0L/20020.0L)*x - 103.0L/10010.0L) + 2.0L/715.0L; + poly_val[4] = x*(-21.0L/715.0L*x + 32.0L/715.0L) - 9.0L/715.0L; + poly_val[5] = x*((27.0L/286.0L)*x - 108.0L/715.0L) + 63.0L/1430.0L; + poly_val[6] = x*(-357.0L/1430.0L*x + 301.0L/715.0L) - 7.0L/55.0L; + poly_val[7] = x*((3.0L/5.0L)*x - 58.0L/55.0L) + 18.0L/55.0L; + poly_val[8] = x*(-189.0L/110.0L*x + 162.0L/55.0L) - 9.0L/10.0L; + poly_val[9] = x*((33.0L/10.0L)*x - 21.0L/5.0L); + poly_val[10] = x*(-33.0L/10.0L*x + 12.0L/5.0L) + 9.0L/10.0L; + poly_val[11] = x*((189.0L/110.0L)*x - 27.0L/55.0L) - 18.0L/55.0L; + poly_val[12] = x*(-3.0L/5.0L*x + 8.0L/55.0L) + 7.0L/55.0L; + poly_val[13] = x*((357.0L/1430.0L)*x - 56.0L/715.0L) - 63.0L/1430.0L; + poly_val[14] = x*(-27.0L/286.0L*x + 27.0L/715.0L) + 9.0L/715.0L; + poly_val[15] = x*((21.0L/715.0L)*x - 2.0L/143.0L) - 2.0L/715.0L; + poly_val[16] = x*(-141.0L/20020.0L*x + 19.0L/5005.0L) + 9.0L/20020.0L; + poly_val[17] = x*((1647.0L/1361360.0L)*x - 243.0L/340340.0L) - 9.0L/194480.0L; + poly_val[18] = x*(-7.0L/53040.0L*x + 73.0L/875160.0L) + 1.0L/437580.0L; + poly_val[19] = x*((1.0L/145860.0L)*x - 1.0L/218790.0L); + break; + case 2: + poly_val[0] = -1.0L/72930.0L*x + 1.0L/109395.0L; + poly_val[1] = (7.0L/26520.0L)*x - 79.0L/437580.0L; + poly_val[2] = -1647.0L/680680.0L*x + 1161.0L/680680.0L; + poly_val[3] = (141.0L/10010.0L)*x - 103.0L/10010.0L; + poly_val[4] = -42.0L/715.0L*x + 32.0L/715.0L; + poly_val[5] = (27.0L/143.0L)*x - 108.0L/715.0L; + poly_val[6] = -357.0L/715.0L*x + 301.0L/715.0L; + poly_val[7] = (6.0L/5.0L)*x - 58.0L/55.0L; + poly_val[8] = -189.0L/55.0L*x + 162.0L/55.0L; + poly_val[9] = (33.0L/5.0L)*x - 21.0L/5.0L; + poly_val[10] = -33.0L/5.0L*x + 12.0L/5.0L; + poly_val[11] = (189.0L/55.0L)*x - 27.0L/55.0L; + poly_val[12] = -6.0L/5.0L*x + 8.0L/55.0L; + poly_val[13] = (357.0L/715.0L)*x - 56.0L/715.0L; + poly_val[14] = -27.0L/143.0L*x + 27.0L/715.0L; + poly_val[15] = (42.0L/715.0L)*x - 2.0L/143.0L; + poly_val[16] = -141.0L/10010.0L*x + 19.0L/5005.0L; + poly_val[17] = (1647.0L/680680.0L)*x - 243.0L/340340.0L; + poly_val[18] = -7.0L/26520.0L*x + 73.0L/875160.0L; + poly_val[19] = (1.0L/72930.0L)*x - 1.0L/218790.0L; + break; + } +} +void beta_n9_m2(int deriv, double x, double *poly_val) { + switch(deriv) + { + case 0: + poly_val[0] = x*(x*(x*(x*((1.0L/151470.0L)*x - 23.0L/1312740.0L) + 1.0L/77220.0L) + 1.0L/3938220.0L) - 1.0L/437580.0L); + poly_val[1] = x*(x*(x*(x*(-15871.0L/126023040.0L*x + 42389.0L/126023040.0L) - 31621.0L/126023040.0L) - 9.0L/1555840.0L) + 9.0L/194480.0L); + poly_val[2] = x*(x*(x*(x*((17379.0L/15247232.0L)*x - 23391.0L/7623616.0L) + 176391.0L/76236160.0L) + 9.0L/140140.0L) - 9.0L/20020.0L); + poly_val[3] = x*(x*(x*(x*(-1369.0L/210210.0L*x + 2481.0L/140140.0L) - 379.0L/28028.0L) - 1.0L/2145.0L) + 2.0L/715.0L); + poly_val[4] = x*(x*(x*(x*((283.0L/10725.0L)*x - 779.0L/10725.0L) + 604.0L/10725.0L) + 9.0L/3575.0L) - 9.0L/715.0L); + poly_val[5] = x*(x*(x*(x*(-2313.0L/28600.0L*x + 6471.0L/28600.0L) - 5103.0L/28600.0L) - 63.0L/5720.0L) + 63.0L/1430.0L); + poly_val[6] = x*(x*(x*(x*((259.0L/1320.0L)*x - 1603.0L/2860.0L) + 2569.0L/5720.0L) + 7.0L/165.0L) - 7.0L/55.0L); + poly_val[7] = x*(x*(x*(x*(-13.0L/33.0L*x + 38.0L/33.0L) - 152.0L/165.0L) - 9.0L/55.0L) + 18.0L/55.0L); + poly_val[8] = x*(x*(x*(x*((36.0L/55.0L)*x - 207.0L/110.0L) + 27.0L/22.0L) + 9.0L/10.0L) - 9.0L/10.0L); + poly_val[9] = pow(x, 2)*(x*(x*(-5462819.0L/6350400.0L*x + 4827779.0L/2116800.0L) - 1864259.0L/2116800.0L) - 9778141.0L/6350400.0L) + 1; + poly_val[10] = x*(x*(x*(x*((5462819.0L/6350400.0L)*x - 6415379.0L/3175200.0L) + 2287619.0L/6350400.0L) + 9.0L/10.0L) + 9.0L/10.0L); + poly_val[11] = x*(x*(x*(x*(-36.0L/55.0L*x + 153.0L/110.0L) - 27.0L/110.0L) - 9.0L/55.0L) - 18.0L/55.0L); + poly_val[12] = x*(x*(x*(x*((13.0L/33.0L)*x - 9.0L/11.0L) + 14.0L/55.0L) + 7.0L/165.0L) + 7.0L/55.0L); + poly_val[13] = x*(x*(x*(x*(-259.0L/1320.0L*x + 7217.0L/17160.0L) - 581.0L/3432.0L) - 63.0L/5720.0L) - 63.0L/1430.0L); + poly_val[14] = x*(x*(x*(x*((2313.0L/28600.0L)*x - 2547.0L/14300.0L) + 2349.0L/28600.0L) + 9.0L/3575.0L) + 9.0L/715.0L); + poly_val[15] = x*(x*(x*(x*(-283.0L/10725.0L*x + 212.0L/3575.0L) - 106.0L/3575.0L) - 1.0L/2145.0L) - 2.0L/715.0L); + poly_val[16] = x*(x*(x*(x*((1369.0L/210210.0L)*x - 6247.0L/420420.0L) + 3293.0L/420420.0L) + 9.0L/140140.0L) + 9.0L/20020.0L); + poly_val[17] = x*(x*(x*(x*(-17379.0L/15247232.0L*x + 40113.0L/15247232.0L) - 6453.0L/4484480.0L) - 9.0L/1555840.0L) - 9.0L/194480.0L); + poly_val[18] = x*(x*(x*(x*((15871.0L/126023040.0L)*x - 6161.0L/21003840.0L) + 1385.0L/8401536.0L) + 1.0L/3938220.0L) + 1.0L/437580.0L); + poly_val[19] = pow(x, 3)*(x*(-1.0L/151470.0L*x + 61.0L/3938220.0L) - 7.0L/787644.0L); + break; + case 1: + poly_val[0] = x*(x*(x*((1.0L/30294.0L)*x - 23.0L/328185.0L) + 1.0L/25740.0L) + 1.0L/1969110.0L) - 1.0L/437580.0L; + poly_val[1] = x*(x*(x*(-15871.0L/25204608.0L*x + 42389.0L/31505760.0L) - 31621.0L/42007680.0L) - 9.0L/777920.0L) + 9.0L/194480.0L; + poly_val[2] = x*(x*(x*((86895.0L/15247232.0L)*x - 23391.0L/1905904.0L) + 529173.0L/76236160.0L) + 9.0L/70070.0L) - 9.0L/20020.0L; + poly_val[3] = x*(x*(x*(-1369.0L/42042.0L*x + 2481.0L/35035.0L) - 1137.0L/28028.0L) - 2.0L/2145.0L) + 2.0L/715.0L; + poly_val[4] = x*(x*(x*((283.0L/2145.0L)*x - 3116.0L/10725.0L) + 604.0L/3575.0L) + 18.0L/3575.0L) - 9.0L/715.0L; + poly_val[5] = x*(x*(x*(-2313.0L/5720.0L*x + 6471.0L/7150.0L) - 15309.0L/28600.0L) - 63.0L/2860.0L) + 63.0L/1430.0L; + poly_val[6] = x*(x*(x*((259.0L/264.0L)*x - 1603.0L/715.0L) + 7707.0L/5720.0L) + 14.0L/165.0L) - 7.0L/55.0L; + poly_val[7] = x*(x*(x*(-65.0L/33.0L*x + 152.0L/33.0L) - 152.0L/55.0L) - 18.0L/55.0L) + 18.0L/55.0L; + poly_val[8] = x*(x*(x*((36.0L/11.0L)*x - 414.0L/55.0L) + 81.0L/22.0L) + 9.0L/5.0L) - 9.0L/10.0L; + poly_val[9] = x*(x*(x*(-5462819.0L/1270080.0L*x + 4827779.0L/529200.0L) - 1864259.0L/705600.0L) - 9778141.0L/3175200.0L); + poly_val[10] = x*(x*(x*((5462819.0L/1270080.0L)*x - 6415379.0L/793800.0L) + 2287619.0L/2116800.0L) + 9.0L/5.0L) + 9.0L/10.0L; + poly_val[11] = x*(x*(x*(-36.0L/11.0L*x + 306.0L/55.0L) - 81.0L/110.0L) - 18.0L/55.0L) - 18.0L/55.0L; + poly_val[12] = x*(x*(x*((65.0L/33.0L)*x - 36.0L/11.0L) + 42.0L/55.0L) + 14.0L/165.0L) + 7.0L/55.0L; + poly_val[13] = x*(x*(x*(-259.0L/264.0L*x + 7217.0L/4290.0L) - 581.0L/1144.0L) - 63.0L/2860.0L) - 63.0L/1430.0L; + poly_val[14] = x*(x*(x*((2313.0L/5720.0L)*x - 2547.0L/3575.0L) + 7047.0L/28600.0L) + 18.0L/3575.0L) + 9.0L/715.0L; + poly_val[15] = x*(x*(x*(-283.0L/2145.0L*x + 848.0L/3575.0L) - 318.0L/3575.0L) - 2.0L/2145.0L) - 2.0L/715.0L; + poly_val[16] = x*(x*(x*((1369.0L/42042.0L)*x - 6247.0L/105105.0L) + 3293.0L/140140.0L) + 9.0L/70070.0L) + 9.0L/20020.0L; + poly_val[17] = x*(x*(x*(-86895.0L/15247232.0L*x + 40113.0L/3811808.0L) - 19359.0L/4484480.0L) - 9.0L/777920.0L) - 9.0L/194480.0L; + poly_val[18] = x*(x*(x*((15871.0L/25204608.0L)*x - 6161.0L/5250960.0L) + 1385.0L/2800512.0L) + 1.0L/1969110.0L) + 1.0L/437580.0L; + poly_val[19] = pow(x, 2)*(x*(-1.0L/30294.0L*x + 61.0L/984555.0L) - 7.0L/262548.0L); + break; + case 2: + poly_val[0] = x*(x*((2.0L/15147.0L)*x - 23.0L/109395.0L) + 1.0L/12870.0L) + 1.0L/1969110.0L; + poly_val[1] = x*(x*(-15871.0L/6301152.0L*x + 42389.0L/10501920.0L) - 31621.0L/21003840.0L) - 9.0L/777920.0L; + poly_val[2] = x*(x*((86895.0L/3811808.0L)*x - 70173.0L/1905904.0L) + 529173.0L/38118080.0L) + 9.0L/70070.0L; + poly_val[3] = x*(x*(-2738.0L/21021.0L*x + 7443.0L/35035.0L) - 1137.0L/14014.0L) - 2.0L/2145.0L; + poly_val[4] = x*(x*((1132.0L/2145.0L)*x - 3116.0L/3575.0L) + 1208.0L/3575.0L) + 18.0L/3575.0L; + poly_val[5] = x*(x*(-2313.0L/1430.0L*x + 19413.0L/7150.0L) - 15309.0L/14300.0L) - 63.0L/2860.0L; + poly_val[6] = x*(x*((259.0L/66.0L)*x - 4809.0L/715.0L) + 7707.0L/2860.0L) + 14.0L/165.0L; + poly_val[7] = x*(x*(-260.0L/33.0L*x + 152.0L/11.0L) - 304.0L/55.0L) - 18.0L/55.0L; + poly_val[8] = x*(x*((144.0L/11.0L)*x - 1242.0L/55.0L) + 81.0L/11.0L) + 9.0L/5.0L; + poly_val[9] = x*(x*(-5462819.0L/317520.0L*x + 4827779.0L/176400.0L) - 1864259.0L/352800.0L) - 9778141.0L/3175200.0L; + poly_val[10] = x*(x*((5462819.0L/317520.0L)*x - 6415379.0L/264600.0L) + 2287619.0L/1058400.0L) + 9.0L/5.0L; + poly_val[11] = x*(x*(-144.0L/11.0L*x + 918.0L/55.0L) - 81.0L/55.0L) - 18.0L/55.0L; + poly_val[12] = x*(x*((260.0L/33.0L)*x - 108.0L/11.0L) + 84.0L/55.0L) + 14.0L/165.0L; + poly_val[13] = x*(x*(-259.0L/66.0L*x + 7217.0L/1430.0L) - 581.0L/572.0L) - 63.0L/2860.0L; + poly_val[14] = x*(x*((2313.0L/1430.0L)*x - 7641.0L/3575.0L) + 7047.0L/14300.0L) + 18.0L/3575.0L; + poly_val[15] = x*(x*(-1132.0L/2145.0L*x + 2544.0L/3575.0L) - 636.0L/3575.0L) - 2.0L/2145.0L; + poly_val[16] = x*(x*((2738.0L/21021.0L)*x - 6247.0L/35035.0L) + 3293.0L/70070.0L) + 9.0L/70070.0L; + poly_val[17] = x*(x*(-86895.0L/3811808.0L*x + 120339.0L/3811808.0L) - 19359.0L/2242240.0L) - 9.0L/777920.0L; + poly_val[18] = x*(x*((15871.0L/6301152.0L)*x - 6161.0L/1750320.0L) + 1385.0L/1400256.0L) + 1.0L/1969110.0L; + poly_val[19] = x*(x*(-2.0L/15147.0L*x + 61.0L/328185.0L) - 7.0L/131274.0L); + break; + } +} +void beta_n9_m3(int deriv, double x, double *poly_val) { + switch(deriv) + { + case 0: + poly_val[0] = x*(x*(x*(x*(x*(x*(-1888217.0L/102918816000.0L*x + 4978651.0L/77189112000.0L) - 11858753.0L/154378224000.0L) + 44201.0L/1513512000.0L) + 63397.0L/18162144000.0L) + 1.0L/3938220.0L) - 1.0L/437580.0L); + poly_val[1] = x*(x*(x*(x*(x*(x*((3845371.0L/11027016000.0L)*x - 378705053.0L/308756448000.0L) + 451208881.0L/308756448000.0L) - 11393267.0L/20583763200.0L) - 10949.0L/155232000.0L) - 9.0L/1555840.0L) + 9.0L/194480.0L); + poly_val[2] = x*(x*(x*(x*(x*(x*(-53864479.0L/17153136000.0L*x + 34466423.0L/3118752000.0L) - 50212829.0L/3811808000.0L) + 4865737.0L/980179200.0L) + 196909.0L/288288000.0L) + 9.0L/140140.0L) - 9.0L/20020.0L); + poly_val[3] = x*(x*(x*(x*(x*(x*((323440939.0L/18162144000.0L)*x - 1139100889.0L/18162144000.0L) + 452791811.0L/6054048000.0L) - 2074439.0L/74131200.0L) - 9601741.0L/2270268000.0L) - 1.0L/2145.0L) + 2.0L/715.0L); + poly_val[4] = x*(x*(x*(x*(x*(x*(-46260611.0L/648648000.0L*x + 190244191.0L/756756000.0L) - 28367477.0L/94594500.0L) + 252001891.0L/2270268000.0L) + 5861.0L/310464.0L) + 9.0L/3575.0L) - 9.0L/715.0L); + poly_val[5] = x*(x*(x*(x*(x*(x*((216286123.0L/1009008000.0L)*x - 381611761.0L/504504000.0L) + 25287533.0L/28028000.0L) - 12734027.0L/38808000.0L) - 9381241.0L/144144000.0L) - 63.0L/5720.0L) + 63.0L/1430.0L); + poly_val[6] = x*(x*(x*(x*(x*(x*(-650812303.0L/1297296000.0L*x + 328457149.0L/185328000.0L) - 912727037.0L/432432000.0L) + 192796181.0L/259459200.0L) + 9072541.0L/49896000.0L) + 7.0L/165.0L) - 7.0L/55.0L); + poly_val[7] = x*(x*(x*(x*(x*(x*((326737249.0L/349272000.0L)*x - 384845203.0L/116424000.0L) + 455197111.0L/116424000.0L) - 89503367.0L/69854400.0L) - 8190541.0L/19404000.0L) - 9.0L/55.0L) + 18.0L/55.0L); + poly_val[8] = x*(x*(x*(x*(x*(x*(-15623459.0L/11088000.0L*x + 12051359.0L/2425500.0L) - 24996777.0L/4312000.0L) + 3398939.0L/1940400.0L) + 3427741.0L/7056000.0L) + 9.0L/10.0L) - 9.0L/10.0L); + poly_val[9] = pow(x, 2)*(pow(x, 2)*(x*(x*((109653629.0L/63504000.0L)*x - 128296219.0L/21168000.0L) + 442116007.0L/63504000.0L) - 18943367.0L/9072000.0L) - 9778141.0L/6350400.0L) + 1; + poly_val[10] = x*(x*(x*(x*(x*(x*(-109653629.0L/63504000.0L*x + 191343373.0L/31752000.0L) - 217755137.0L/31752000.0L) + 10180259.0L/4536000.0L) - 3427741.0L/7056000.0L) + 9.0L/10.0L) + 9.0L/10.0L); + poly_val[11] = x*(x*(x*(x*(x*(x*((15623459.0L/11088000.0L)*x - 379906003.0L/77616000.0L) + 48081059.0L/8624000.0L) - 31369501.0L/15523200.0L) + 8190541.0L/19404000.0L) - 9.0L/55.0L) - 18.0L/55.0L); + poly_val[12] = x*(x*(x*(x*(x*(x*(-326737249.0L/349272000.0L*x + 566312567.0L/174636000.0L) - 108321659.0L/29106000.0L) + 49820941.0L/34927200.0L) - 9072541.0L/49896000.0L) + 7.0L/165.0L) + 7.0L/55.0L); + poly_val[13] = x*(x*(x*(x*(x*(x*((650812303.0L/1297296000.0L)*x - 4884169.0L/2808000.0L) + 54375817.0L/27027000.0L) - 101735461.0L/129729600.0L) + 9381241.0L/144144000.0L) - 63.0L/5720.0L) - 63.0L/1430.0L); + poly_val[14] = x*(x*(x*(x*(x*(x*(-216286123.0L/1009008000.0L*x + 750779339.0L/1009008000.0L) - 97002071.0L/112112000.0L) + 342332713.0L/1009008000.0L) - 5861.0L/310464.0L) + 9.0L/3575.0L) + 9.0L/715.0L); + poly_val[15] = x*(x*(x*(x*(x*(x*((46260611.0L/648648000.0L)*x - 1125304793.0L/4540536000.0L) + 437719279.0L/1513512000.0L) - 516063203.0L/4540536000.0L) + 9601741.0L/2270268000.0L) - 1.0L/2145.0L) - 2.0L/715.0L); + poly_val[16] = x*(x*(x*(x*(x*(x*(-323440939.0L/18162144000.0L*x + 93748807.0L/1513512000.0L) - 219338303.0L/3027024000.0L) + 3696851.0L/129729600.0L) - 196909.0L/288288000.0L) + 9.0L/140140.0L) + 9.0L/20020.0L); + poly_val[17] = x*(x*(x*(x*(x*(x*((53864479.0L/17153136000.0L)*x - 374972053.0L/34306272000.0L) + 48826629.0L/3811808000.0L) - 290471.0L/57657600.0L) + 10949.0L/155232000.0L) - 9.0L/1555840.0L) - 9.0L/194480.0L); + poly_val[18] = x*(x*(x*(x*(x*(x*(-3845371.0L/11027016000.0L*x + 374987663.0L/308756448000.0L) - 440056711.0L/308756448000.0L) + 2662049.0L/4750099200.0L) - 63397.0L/18162144000.0L) + 1.0L/3938220.0L) + 1.0L/437580.0L); + poly_val[19] = pow(x, 4)*(x*(x*((1888217.0L/102918816000.0L)*x - 19737953.0L/308756448000.0L) + 23187553.0L/308756448000.0L) - 9114251.0L/308756448000.0L); + break; + case 1: + poly_val[0] = x*(x*(x*(x*(x*(-1888217.0L/14702688000.0L*x + 4978651.0L/12864852000.0L) - 11858753.0L/30875644800.0L) + 44201.0L/378378000.0L) + 63397.0L/6054048000.0L) + 1.0L/1969110.0L) - 1.0L/437580.0L; + poly_val[1] = x*(x*(x*(x*(x*((3845371.0L/1575288000.0L)*x - 378705053.0L/51459408000.0L) + 451208881.0L/61751289600.0L) - 11393267.0L/5145940800.0L) - 10949.0L/51744000.0L) - 9.0L/777920.0L) + 9.0L/194480.0L; + poly_val[2] = x*(x*(x*(x*(x*(-53864479.0L/2450448000.0L*x + 34466423.0L/519792000.0L) - 50212829.0L/762361600.0L) + 4865737.0L/245044800.0L) + 196909.0L/96096000.0L) + 9.0L/70070.0L) - 9.0L/20020.0L; + poly_val[3] = x*(x*(x*(x*(x*((323440939.0L/2594592000.0L)*x - 1139100889.0L/3027024000.0L) + 452791811.0L/1210809600.0L) - 2074439.0L/18532800.0L) - 9601741.0L/756756000.0L) - 2.0L/2145.0L) + 2.0L/715.0L; + poly_val[4] = x*(x*(x*(x*(x*(-46260611.0L/92664000.0L*x + 190244191.0L/126126000.0L) - 28367477.0L/18918900.0L) + 252001891.0L/567567000.0L) + 5861.0L/103488.0L) + 18.0L/3575.0L) - 9.0L/715.0L; + poly_val[5] = x*(x*(x*(x*(x*((216286123.0L/144144000.0L)*x - 381611761.0L/84084000.0L) + 25287533.0L/5605600.0L) - 12734027.0L/9702000.0L) - 9381241.0L/48048000.0L) - 63.0L/2860.0L) + 63.0L/1430.0L; + poly_val[6] = x*(x*(x*(x*(x*(-650812303.0L/185328000.0L*x + 328457149.0L/30888000.0L) - 912727037.0L/86486400.0L) + 192796181.0L/64864800.0L) + 9072541.0L/16632000.0L) + 14.0L/165.0L) - 7.0L/55.0L; + poly_val[7] = x*(x*(x*(x*(x*((326737249.0L/49896000.0L)*x - 384845203.0L/19404000.0L) + 455197111.0L/23284800.0L) - 89503367.0L/17463600.0L) - 8190541.0L/6468000.0L) - 18.0L/55.0L) + 18.0L/55.0L; + poly_val[8] = x*(x*(x*(x*(x*(-15623459.0L/1584000.0L*x + 12051359.0L/404250.0L) - 24996777.0L/862400.0L) + 3398939.0L/485100.0L) + 3427741.0L/2352000.0L) + 9.0L/5.0L) - 9.0L/10.0L; + poly_val[9] = x*(pow(x, 2)*(x*(x*((109653629.0L/9072000.0L)*x - 128296219.0L/3528000.0L) + 442116007.0L/12700800.0L) - 18943367.0L/2268000.0L) - 9778141.0L/3175200.0L); + poly_val[10] = x*(x*(x*(x*(x*(-109653629.0L/9072000.0L*x + 191343373.0L/5292000.0L) - 217755137.0L/6350400.0L) + 10180259.0L/1134000.0L) - 3427741.0L/2352000.0L) + 9.0L/5.0L) + 9.0L/10.0L; + poly_val[11] = x*(x*(x*(x*(x*((15623459.0L/1584000.0L)*x - 379906003.0L/12936000.0L) + 48081059.0L/1724800.0L) - 31369501.0L/3880800.0L) + 8190541.0L/6468000.0L) - 18.0L/55.0L) - 18.0L/55.0L; + poly_val[12] = x*(x*(x*(x*(x*(-326737249.0L/49896000.0L*x + 566312567.0L/29106000.0L) - 108321659.0L/5821200.0L) + 49820941.0L/8731800.0L) - 9072541.0L/16632000.0L) + 14.0L/165.0L) + 7.0L/55.0L; + poly_val[13] = x*(x*(x*(x*(x*((650812303.0L/185328000.0L)*x - 4884169.0L/468000.0L) + 54375817.0L/5405400.0L) - 101735461.0L/32432400.0L) + 9381241.0L/48048000.0L) - 63.0L/2860.0L) - 63.0L/1430.0L; + poly_val[14] = x*(x*(x*(x*(x*(-216286123.0L/144144000.0L*x + 750779339.0L/168168000.0L) - 97002071.0L/22422400.0L) + 342332713.0L/252252000.0L) - 5861.0L/103488.0L) + 18.0L/3575.0L) + 9.0L/715.0L; + poly_val[15] = x*(x*(x*(x*(x*((46260611.0L/92664000.0L)*x - 1125304793.0L/756756000.0L) + 437719279.0L/302702400.0L) - 516063203.0L/1135134000.0L) + 9601741.0L/756756000.0L) - 2.0L/2145.0L) - 2.0L/715.0L; + poly_val[16] = x*(x*(x*(x*(x*(-323440939.0L/2594592000.0L*x + 93748807.0L/252252000.0L) - 219338303.0L/605404800.0L) + 3696851.0L/32432400.0L) - 196909.0L/96096000.0L) + 9.0L/70070.0L) + 9.0L/20020.0L; + poly_val[17] = x*(x*(x*(x*(x*((53864479.0L/2450448000.0L)*x - 374972053.0L/5717712000.0L) + 48826629.0L/762361600.0L) - 290471.0L/14414400.0L) + 10949.0L/51744000.0L) - 9.0L/777920.0L) - 9.0L/194480.0L; + poly_val[18] = x*(x*(x*(x*(x*(-3845371.0L/1575288000.0L*x + 374987663.0L/51459408000.0L) - 440056711.0L/61751289600.0L) + 2662049.0L/1187524800.0L) - 63397.0L/6054048000.0L) + 1.0L/1969110.0L) + 1.0L/437580.0L; + poly_val[19] = pow(x, 3)*(x*(x*((1888217.0L/14702688000.0L)*x - 19737953.0L/51459408000.0L) + 23187553.0L/61751289600.0L) - 9114251.0L/77189112000.0L); + break; + case 2: + poly_val[0] = x*(x*(x*(x*(-1888217.0L/2450448000.0L*x + 4978651.0L/2572970400.0L) - 11858753.0L/7718911200.0L) + 44201.0L/126126000.0L) + 63397.0L/3027024000.0L) + 1.0L/1969110.0L; + poly_val[1] = x*(x*(x*(x*((3845371.0L/262548000.0L)*x - 378705053.0L/10291881600.0L) + 451208881.0L/15437822400.0L) - 11393267.0L/1715313600.0L) - 10949.0L/25872000.0L) - 9.0L/777920.0L; + poly_val[2] = x*(x*(x*(x*(-53864479.0L/408408000.0L*x + 34466423.0L/103958400.0L) - 50212829.0L/190590400.0L) + 4865737.0L/81681600.0L) + 196909.0L/48048000.0L) + 9.0L/70070.0L; + poly_val[3] = x*(x*(x*(x*((323440939.0L/432432000.0L)*x - 1139100889.0L/605404800.0L) + 452791811.0L/302702400.0L) - 2074439.0L/6177600.0L) - 9601741.0L/378378000.0L) - 2.0L/2145.0L; + poly_val[4] = x*(x*(x*(x*(-46260611.0L/15444000.0L*x + 190244191.0L/25225200.0L) - 28367477.0L/4729725.0L) + 252001891.0L/189189000.0L) + 5861.0L/51744.0L) + 18.0L/3575.0L; + poly_val[5] = x*(x*(x*(x*((216286123.0L/24024000.0L)*x - 381611761.0L/16816800.0L) + 25287533.0L/1401400.0L) - 12734027.0L/3234000.0L) - 9381241.0L/24024000.0L) - 63.0L/2860.0L; + poly_val[6] = x*(x*(x*(x*(-650812303.0L/30888000.0L*x + 328457149.0L/6177600.0L) - 912727037.0L/21621600.0L) + 192796181.0L/21621600.0L) + 9072541.0L/8316000.0L) + 14.0L/165.0L; + poly_val[7] = x*(x*(x*(x*((326737249.0L/8316000.0L)*x - 384845203.0L/3880800.0L) + 455197111.0L/5821200.0L) - 89503367.0L/5821200.0L) - 8190541.0L/3234000.0L) - 18.0L/55.0L; + poly_val[8] = x*(x*(x*(x*(-15623459.0L/264000.0L*x + 12051359.0L/80850.0L) - 24996777.0L/215600.0L) + 3398939.0L/161700.0L) + 3427741.0L/1176000.0L) + 9.0L/5.0L; + poly_val[9] = pow(x, 2)*(x*(x*((109653629.0L/1512000.0L)*x - 128296219.0L/705600.0L) + 442116007.0L/3175200.0L) - 18943367.0L/756000.0L) - 9778141.0L/3175200.0L; + poly_val[10] = x*(x*(x*(x*(-109653629.0L/1512000.0L*x + 191343373.0L/1058400.0L) - 217755137.0L/1587600.0L) + 10180259.0L/378000.0L) - 3427741.0L/1176000.0L) + 9.0L/5.0L; + poly_val[11] = x*(x*(x*(x*((15623459.0L/264000.0L)*x - 379906003.0L/2587200.0L) + 48081059.0L/431200.0L) - 31369501.0L/1293600.0L) + 8190541.0L/3234000.0L) - 18.0L/55.0L; + poly_val[12] = x*(x*(x*(x*(-326737249.0L/8316000.0L*x + 566312567.0L/5821200.0L) - 108321659.0L/1455300.0L) + 49820941.0L/2910600.0L) - 9072541.0L/8316000.0L) + 14.0L/165.0L; + poly_val[13] = x*(x*(x*(x*((650812303.0L/30888000.0L)*x - 4884169.0L/93600.0L) + 54375817.0L/1351350.0L) - 101735461.0L/10810800.0L) + 9381241.0L/24024000.0L) - 63.0L/2860.0L; + poly_val[14] = x*(x*(x*(x*(-216286123.0L/24024000.0L*x + 750779339.0L/33633600.0L) - 97002071.0L/5605600.0L) + 342332713.0L/84084000.0L) - 5861.0L/51744.0L) + 18.0L/3575.0L; + poly_val[15] = x*(x*(x*(x*((46260611.0L/15444000.0L)*x - 1125304793.0L/151351200.0L) + 437719279.0L/75675600.0L) - 516063203.0L/378378000.0L) + 9601741.0L/378378000.0L) - 2.0L/2145.0L; + poly_val[16] = x*(x*(x*(x*(-323440939.0L/432432000.0L*x + 93748807.0L/50450400.0L) - 219338303.0L/151351200.0L) + 3696851.0L/10810800.0L) - 196909.0L/48048000.0L) + 9.0L/70070.0L; + poly_val[17] = x*(x*(x*(x*((53864479.0L/408408000.0L)*x - 374972053.0L/1143542400.0L) + 48826629.0L/190590400.0L) - 290471.0L/4804800.0L) + 10949.0L/25872000.0L) - 9.0L/777920.0L; + poly_val[18] = x*(x*(x*(x*(-3845371.0L/262548000.0L*x + 374987663.0L/10291881600.0L) - 440056711.0L/15437822400.0L) + 2662049.0L/395841600.0L) - 63397.0L/3027024000.0L) + 1.0L/1969110.0L; + poly_val[19] = pow(x, 2)*(x*(x*((1888217.0L/2450448000.0L)*x - 19737953.0L/10291881600.0L) + 23187553.0L/15437822400.0L) - 9114251.0L/25729704000.0L); + break; + } +} +void beta_n9_m4(int deriv, double x, double *poly_val) { + switch(deriv) + { + case 0: + poly_val[0] = x*(x*(x*(x*(x*(x*(x*(x*((41064761.0L/694702008000.0L)*x - 739266961.0L/2778808032000.0L) + 19438393.0L/42750892800.0L) - 252257.0L/721768320.0L) + 1645439.0L/16345929600.0L) - 63397.0L/163459296000.0L) + 63397.0L/18162144000.0L) + 1.0L/3938220.0L) - 1.0L/437580.0L); + poly_val[1] = x*(x*(x*(x*(x*(x*(x*(x*(-24967737721.0L/22230464256000.0L*x + 112371677293.0L/22230464256000.0L) - 32009510671.0L/3705077376000.0L) + 73804272761.0L/11115232128000.0L) - 2040029.0L/1067489280.0L) + 10949.0L/1241856000.0L) - 10949.0L/155232000.0L) - 9.0L/1555840.0L) + 9.0L/194480.0L); + poly_val[2] = x*(x*(x*(x*(x*(x*(x*(x*((2774247673.0L/274450176000.0L)*x - 346838241.0L/7623616000.0L) + 27714803.0L/356428800.0L) - 4099765343.0L/68612544000.0L) + 1571889307.0L/91483392000.0L) - 196909.0L/2018016000.0L) + 196909.0L/288288000.0L) + 9.0L/140140.0L) - 9.0L/20020.0L); + poly_val[3] = x*(x*(x*(x*(x*(x*(x*(x*(-390139199.0L/6810804000.0L*x + 14047573997.0L/54486432000.0L) - 3429833893.0L/7783776000.0L) + 67075889.0L/198132480.0L) - 58224899.0L/598752000.0L) + 9601741.0L/13621608000.0L) - 9601741.0L/2270268000.0L) - 1.0L/2145.0L) + 2.0L/715.0L); + poly_val[4] = x*(x*(x*(x*(x*(x*(x*(x*((3121232971.0L/13621608000.0L)*x - 2809674701.0L/2724321600.0L) + 8003230493.0L/4540536000.0L) - 4610296543.0L/3405402000.0L) + 5290082767.0L/13621608000.0L) - 5861.0L/1552320.0L) + 5861.0L/310464.0L) + 9.0L/3575.0L) - 9.0L/715.0L); + poly_val[5] = x*(x*(x*(x*(x*(x*(x*(x*(-2774576947.0L/4036032000.0L*x + 1387590587.0L/448448000.0L) - 2134234717.0L/403603200.0L) + 234077981.0L/57657600.0L) - 62576029.0L/53813760.0L) + 9381241.0L/576576000.0L) - 9381241.0L/144144000.0L) - 63.0L/5720.0L) + 63.0L/1430.0L); + poly_val[6] = x*(x*(x*(x*(x*(x*(x*(x*((3567551993.0L/2223936000.0L)*x - 1003596169.0L/138996000.0L) + 96036348283.0L/7783776000.0L) - 119617651.0L/12636000.0L) + 8431990223.0L/3113510400.0L) - 9072541.0L/149688000.0L) + 9072541.0L/49896000.0L) + 7.0L/165.0L) - 7.0L/55.0L); + poly_val[7] = x*(x*(x*(x*(x*(x*(x*(x*(-3121831129.0L/1047816000.0L*x + 3512754907.0L/261954000.0L) - 29097643.0L/1270080.0L) + 2629412623.0L/149688000.0L) - 2639918789.0L/523908000.0L) + 8190541.0L/38808000.0L) - 8190541.0L/19404000.0L) - 9.0L/55.0L) + 18.0L/55.0L); + poly_val[8] = x*(x*(x*(x*(x*(x*(x*(x*((173445649.0L/38808000.0L)*x - 173469767.0L/8624000.0L) + 2666634419.0L/77616000.0L) - 204389797.0L/7761600.0L) + 7042859.0L/924000.0L) - 3427741.0L/7056000.0L) + 3427741.0L/7056000.0L) + 9.0L/10.0L) - 9.0L/10.0L); + poly_val[9] = pow(x, 2)*(pow(x, 2)*(x*(x*(x*(x*(-24977055133.0L/4572288000.0L*x + 4496095937.0L/182891520.0L) - 13710284363.0L/326592000.0L) + 73580652293.0L/2286144000.0L) - 43121414201.0L/4572288000.0L) + 117868837.0L/182891520.0L) - 9778141.0L/6350400.0L) + 1; + poly_val[10] = x*(x*(x*(x*(x*(x*(x*(x*((24977055133.0L/4572288000.0L)*x - 28097774443.0L/1143072000.0L) + 913803707.0L/21772800.0L) - 7362601913.0L/228614400.0L) + 1244337839.0L/130636800.0L) - 3427741.0L/7056000.0L) - 3427741.0L/7056000.0L) + 9.0L/10.0L) + 9.0L/10.0L); + poly_val[11] = x*(x*(x*(x*(x*(x*(x*(x*(-173445649.0L/38808000.0L*x + 173421531.0L/8624000.0L) - 2664897923.0L/77616000.0L) + 186093701.0L/7056000.0L) - 40471741.0L/5174400.0L) + 8190541.0L/38808000.0L) + 8190541.0L/19404000.0L) - 9.0L/55.0L) - 18.0L/55.0L); + poly_val[12] = x*(x*(x*(x*(x*(x*(x*(x*((3121831129.0L/1047816000.0L)*x - 14045460533.0L/1047816000.0L) + 4796663819.0L/209563200.0L) - 41156842.0L/2338875.0L) + 5454795473.0L/1047816000.0L) - 9072541.0L/149688000.0L) - 9072541.0L/49896000.0L) + 7.0L/165.0L) + 7.0L/55.0L); + poly_val[13] = x*(x*(x*(x*(x*(x*(x*(x*(-3567551993.0L/2223936000.0L*x + 16050429233.0L/2223936000.0L) - 223628941.0L/18144000.0L) + 2108327369.0L/222393600.0L) - 43498588667.0L/15567552000.0L) + 9381241.0L/576576000.0L) + 9381241.0L/144144000.0L) - 63.0L/5720.0L) - 63.0L/1430.0L); + poly_val[14] = x*(x*(x*(x*(x*(x*(x*(x*((2774576947.0L/4036032000.0L)*x - 34674659.0L/11211200.0L) + 10660297499.0L/2018016000.0L) - 26627609.0L/6552000.0L) + 1606926733.0L/1345344000.0L) - 5861.0L/1552320.0L) - 5861.0L/310464.0L) + 9.0L/3575.0L) + 9.0L/715.0L); + poly_val[15] = x*(x*(x*(x*(x*(x*(x*(x*(-3121232971.0L/13621608000.0L*x + 7021361617.0L/6810804000.0L) - 4797418079.0L/2724321600.0L) + 3691153121.0L/2724321600.0L) - 38663749.0L/97297200.0L) + 9601741.0L/13621608000.0L) + 9601741.0L/2270268000.0L) - 1.0L/2145.0L) - 2.0L/715.0L); + poly_val[16] = x*(x*(x*(x*(x*(x*(x*(x*((390139199.0L/6810804000.0L)*x - 14042448331.0L/54486432000.0L) + 1142301647.0L/2594592000.0L) - 9228730547.0L/27243216000.0L) + 41576923.0L/419126400.0L) - 196909.0L/2018016000.0L) - 196909.0L/288288000.0L) + 9.0L/140140.0L) + 9.0L/20020.0L); + poly_val[17] = x*(x*(x*(x*(x*(x*(x*(x*(-2774247673.0L/274450176000.0L*x + 1386894709.0L/30494464000.0L) - 304627159.0L/3920716800.0L) + 745799291.0L/12475008000.0L) - 1920359.0L/109824000.0L) + 10949.0L/1241856000.0L) + 10949.0L/155232000.0L) - 9.0L/1555840.0L) - 9.0L/194480.0L); + poly_val[18] = x*(x*(x*(x*(x*(x*(x*(x*((24967737721.0L/22230464256000.0L)*x - 28084490549.0L/5557616064000.0L) + 8723736529.0L/1010475648000.0L) - 7383695351.0L/1111523212800.0L) + 2538335281.0L/1307674368000.0L) - 63397.0L/163459296000.0L) - 63397.0L/18162144000.0L) + 1.0L/3938220.0L) + 1.0L/437580.0L); + poly_val[19] = pow(x, 5)*(x*(x*(x*(-41064761.0L/694702008000.0L*x + 147812887.0L/555761606400.0L) - 420895147.0L/926269344000.0L) + 138794879.0L/396972576000.0L) - 283684103.0L/2778808032000.0L); + break; + case 1: + poly_val[0] = x*(x*(x*(x*(x*(x*(x*((41064761.0L/77189112000.0L)*x - 739266961.0L/347351004000.0L) + 19438393.0L/6107270400.0L) - 252257.0L/120294720.0L) + 1645439.0L/3269185920.0L) - 63397.0L/40864824000.0L) + 63397.0L/6054048000.0L) + 1.0L/1969110.0L) - 1.0L/437580.0L; + poly_val[1] = x*(x*(x*(x*(x*(x*(x*(-24967737721.0L/2470051584000.0L*x + 112371677293.0L/2778808032000.0L) - 32009510671.0L/529296768000.0L) + 73804272761.0L/1852538688000.0L) - 2040029.0L/213497856.0L) + 10949.0L/310464000.0L) - 10949.0L/51744000.0L) - 9.0L/777920.0L) + 9.0L/194480.0L; + poly_val[2] = x*(x*(x*(x*(x*(x*(x*((2774247673.0L/30494464000.0L)*x - 346838241.0L/952952000.0L) + 27714803.0L/50918400.0L) - 4099765343.0L/11435424000.0L) + 1571889307.0L/18296678400.0L) - 196909.0L/504504000.0L) + 196909.0L/96096000.0L) + 9.0L/70070.0L) - 9.0L/20020.0L; + poly_val[3] = x*(x*(x*(x*(x*(x*(x*(-390139199.0L/756756000.0L*x + 14047573997.0L/6810804000.0L) - 3429833893.0L/1111968000.0L) + 67075889.0L/33022080.0L) - 58224899.0L/119750400.0L) + 9601741.0L/3405402000.0L) - 9601741.0L/756756000.0L) - 2.0L/2145.0L) + 2.0L/715.0L; + poly_val[4] = x*(x*(x*(x*(x*(x*(x*((3121232971.0L/1513512000.0L)*x - 2809674701.0L/340540200.0L) + 8003230493.0L/648648000.0L) - 4610296543.0L/567567000.0L) + 5290082767.0L/2724321600.0L) - 5861.0L/388080.0L) + 5861.0L/103488.0L) + 18.0L/3575.0L) - 9.0L/715.0L; + poly_val[5] = x*(x*(x*(x*(x*(x*(x*(-2774576947.0L/448448000.0L*x + 1387590587.0L/56056000.0L) - 2134234717.0L/57657600.0L) + 234077981.0L/9609600.0L) - 62576029.0L/10762752.0L) + 9381241.0L/144144000.0L) - 9381241.0L/48048000.0L) - 63.0L/2860.0L) + 63.0L/1430.0L; + poly_val[6] = x*(x*(x*(x*(x*(x*(x*((3567551993.0L/247104000.0L)*x - 1003596169.0L/17374500.0L) + 96036348283.0L/1111968000.0L) - 119617651.0L/2106000.0L) + 8431990223.0L/622702080.0L) - 9072541.0L/37422000.0L) + 9072541.0L/16632000.0L) + 14.0L/165.0L) - 7.0L/55.0L; + poly_val[7] = x*(x*(x*(x*(x*(x*(x*(-3121831129.0L/116424000.0L*x + 3512754907.0L/32744250.0L) - 29097643.0L/181440.0L) + 2629412623.0L/24948000.0L) - 2639918789.0L/104781600.0L) + 8190541.0L/9702000.0L) - 8190541.0L/6468000.0L) - 18.0L/55.0L) + 18.0L/55.0L; + poly_val[8] = x*(x*(x*(x*(x*(x*(x*((173445649.0L/4312000.0L)*x - 173469767.0L/1078000.0L) + 2666634419.0L/11088000.0L) - 204389797.0L/1293600.0L) + 7042859.0L/184800.0L) - 3427741.0L/1764000.0L) + 3427741.0L/2352000.0L) + 9.0L/5.0L) - 9.0L/10.0L; + poly_val[9] = x*(pow(x, 2)*(x*(x*(x*(x*(-24977055133.0L/508032000.0L*x + 4496095937.0L/22861440.0L) - 13710284363.0L/46656000.0L) + 73580652293.0L/381024000.0L) - 43121414201.0L/914457600.0L) + 117868837.0L/45722880.0L) - 9778141.0L/3175200.0L); + poly_val[10] = x*(x*(x*(x*(x*(x*(x*((24977055133.0L/508032000.0L)*x - 28097774443.0L/142884000.0L) + 913803707.0L/3110400.0L) - 7362601913.0L/38102400.0L) + 1244337839.0L/26127360.0L) - 3427741.0L/1764000.0L) - 3427741.0L/2352000.0L) + 9.0L/5.0L) + 9.0L/10.0L; + poly_val[11] = x*(x*(x*(x*(x*(x*(x*(-173445649.0L/4312000.0L*x + 173421531.0L/1078000.0L) - 2664897923.0L/11088000.0L) + 186093701.0L/1176000.0L) - 40471741.0L/1034880.0L) + 8190541.0L/9702000.0L) + 8190541.0L/6468000.0L) - 18.0L/55.0L) - 18.0L/55.0L; + poly_val[12] = x*(x*(x*(x*(x*(x*(x*((3121831129.0L/116424000.0L)*x - 14045460533.0L/130977000.0L) + 4796663819.0L/29937600.0L) - 82313684.0L/779625.0L) + 5454795473.0L/209563200.0L) - 9072541.0L/37422000.0L) - 9072541.0L/16632000.0L) + 14.0L/165.0L) + 7.0L/55.0L; + poly_val[13] = x*(x*(x*(x*(x*(x*(x*(-3567551993.0L/247104000.0L*x + 16050429233.0L/277992000.0L) - 223628941.0L/2592000.0L) + 2108327369.0L/37065600.0L) - 43498588667.0L/3113510400.0L) + 9381241.0L/144144000.0L) + 9381241.0L/48048000.0L) - 63.0L/2860.0L) - 63.0L/1430.0L; + poly_val[14] = x*(x*(x*(x*(x*(x*(x*((2774576947.0L/448448000.0L)*x - 34674659.0L/1401400.0L) + 10660297499.0L/288288000.0L) - 26627609.0L/1092000.0L) + 1606926733.0L/269068800.0L) - 5861.0L/388080.0L) - 5861.0L/103488.0L) + 18.0L/3575.0L) + 9.0L/715.0L; + poly_val[15] = x*(x*(x*(x*(x*(x*(x*(-3121232971.0L/1513512000.0L*x + 7021361617.0L/851350500.0L) - 4797418079.0L/389188800.0L) + 3691153121.0L/454053600.0L) - 38663749.0L/19459440.0L) + 9601741.0L/3405402000.0L) + 9601741.0L/756756000.0L) - 2.0L/2145.0L) - 2.0L/715.0L; + poly_val[16] = x*(x*(x*(x*(x*(x*(x*((390139199.0L/756756000.0L)*x - 14042448331.0L/6810804000.0L) + 1142301647.0L/370656000.0L) - 9228730547.0L/4540536000.0L) + 41576923.0L/83825280.0L) - 196909.0L/504504000.0L) - 196909.0L/96096000.0L) + 9.0L/70070.0L) + 9.0L/20020.0L; + poly_val[17] = x*(x*(x*(x*(x*(x*(x*(-2774247673.0L/30494464000.0L*x + 1386894709.0L/3811808000.0L) - 304627159.0L/560102400.0L) + 745799291.0L/2079168000.0L) - 1920359.0L/21964800.0L) + 10949.0L/310464000.0L) + 10949.0L/51744000.0L) - 9.0L/777920.0L) - 9.0L/194480.0L; + poly_val[18] = x*(x*(x*(x*(x*(x*(x*((24967737721.0L/2470051584000.0L)*x - 28084490549.0L/694702008000.0L) + 8723736529.0L/144353664000.0L) - 7383695351.0L/185253868800.0L) + 2538335281.0L/261534873600.0L) - 63397.0L/40864824000.0L) - 63397.0L/6054048000.0L) + 1.0L/1969110.0L) + 1.0L/437580.0L; + poly_val[19] = pow(x, 4)*(x*(x*(x*(-41064761.0L/77189112000.0L*x + 147812887.0L/69470200800.0L) - 420895147.0L/132324192000.0L) + 138794879.0L/66162096000.0L) - 283684103.0L/555761606400.0L); + break; + case 2: + poly_val[0] = x*(x*(x*(x*(x*(x*((41064761.0L/9648639000.0L)*x - 739266961.0L/49621572000.0L) + 19438393.0L/1017878400.0L) - 252257.0L/24058944.0L) + 1645439.0L/817296480.0L) - 63397.0L/13621608000.0L) + 63397.0L/3027024000.0L) + 1.0L/1969110.0L; + poly_val[1] = x*(x*(x*(x*(x*(x*(-24967737721.0L/308756448000.0L*x + 112371677293.0L/396972576000.0L) - 32009510671.0L/88216128000.0L) + 73804272761.0L/370507737600.0L) - 2040029.0L/53374464.0L) + 10949.0L/103488000.0L) - 10949.0L/25872000.0L) - 9.0L/777920.0L; + poly_val[2] = x*(x*(x*(x*(x*(x*((2774247673.0L/3811808000.0L)*x - 346838241.0L/136136000.0L) + 27714803.0L/8486400.0L) - 4099765343.0L/2287084800.0L) + 1571889307.0L/4574169600.0L) - 196909.0L/168168000.0L) + 196909.0L/48048000.0L) + 9.0L/70070.0L; + poly_val[3] = x*(x*(x*(x*(x*(x*(-390139199.0L/94594500.0L*x + 14047573997.0L/972972000.0L) - 3429833893.0L/185328000.0L) + 67075889.0L/6604416.0L) - 58224899.0L/29937600.0L) + 9601741.0L/1135134000.0L) - 9601741.0L/378378000.0L) - 2.0L/2145.0L; + poly_val[4] = x*(x*(x*(x*(x*(x*((3121232971.0L/189189000.0L)*x - 2809674701.0L/48648600.0L) + 8003230493.0L/108108000.0L) - 4610296543.0L/113513400.0L) + 5290082767.0L/681080400.0L) - 5861.0L/129360.0L) + 5861.0L/51744.0L) + 18.0L/3575.0L; + poly_val[5] = x*(x*(x*(x*(x*(x*(-2774576947.0L/56056000.0L*x + 1387590587.0L/8008000.0L) - 2134234717.0L/9609600.0L) + 234077981.0L/1921920.0L) - 62576029.0L/2690688.0L) + 9381241.0L/48048000.0L) - 9381241.0L/24024000.0L) - 63.0L/2860.0L; + poly_val[6] = x*(x*(x*(x*(x*(x*((3567551993.0L/30888000.0L)*x - 7025173183.0L/17374500.0L) + 96036348283.0L/185328000.0L) - 119617651.0L/421200.0L) + 8431990223.0L/155675520.0L) - 9072541.0L/12474000.0L) + 9072541.0L/8316000.0L) + 14.0L/165.0L; + poly_val[7] = x*(x*(x*(x*(x*(x*(-3121831129.0L/14553000.0L*x + 3512754907.0L/4677750.0L) - 29097643.0L/30240.0L) + 2629412623.0L/4989600.0L) - 2639918789.0L/26195400.0L) + 8190541.0L/3234000.0L) - 8190541.0L/3234000.0L) - 18.0L/55.0L; + poly_val[8] = x*(x*(x*(x*(x*(x*((173445649.0L/539000.0L)*x - 173469767.0L/154000.0L) + 2666634419.0L/1848000.0L) - 204389797.0L/258720.0L) + 7042859.0L/46200.0L) - 3427741.0L/588000.0L) + 3427741.0L/1176000.0L) + 9.0L/5.0L; + poly_val[9] = pow(x, 2)*(x*(x*(x*(x*(-24977055133.0L/63504000.0L*x + 4496095937.0L/3265920.0L) - 13710284363.0L/7776000.0L) + 73580652293.0L/76204800.0L) - 43121414201.0L/228614400.0L) + 117868837.0L/15240960.0L) - 9778141.0L/3175200.0L; + poly_val[10] = x*(x*(x*(x*(x*(x*((24977055133.0L/63504000.0L)*x - 28097774443.0L/20412000.0L) + 913803707.0L/518400.0L) - 7362601913.0L/7620480.0L) + 1244337839.0L/6531840.0L) - 3427741.0L/588000.0L) - 3427741.0L/1176000.0L) + 9.0L/5.0L; + poly_val[11] = x*(x*(x*(x*(x*(x*(-173445649.0L/539000.0L*x + 173421531.0L/154000.0L) - 2664897923.0L/1848000.0L) + 186093701.0L/235200.0L) - 40471741.0L/258720.0L) + 8190541.0L/3234000.0L) + 8190541.0L/3234000.0L) - 18.0L/55.0L; + poly_val[12] = x*(x*(x*(x*(x*(x*((3121831129.0L/14553000.0L)*x - 14045460533.0L/18711000.0L) + 4796663819.0L/4989600.0L) - 82313684.0L/155925.0L) + 5454795473.0L/52390800.0L) - 9072541.0L/12474000.0L) - 9072541.0L/8316000.0L) + 14.0L/165.0L; + poly_val[13] = x*(x*(x*(x*(x*(x*(-3567551993.0L/30888000.0L*x + 112353004631.0L/277992000.0L) - 223628941.0L/432000.0L) + 2108327369.0L/7413120.0L) - 43498588667.0L/778377600.0L) + 9381241.0L/48048000.0L) + 9381241.0L/24024000.0L) - 63.0L/2860.0L; + poly_val[14] = x*(x*(x*(x*(x*(x*((2774576947.0L/56056000.0L)*x - 34674659.0L/200200.0L) + 10660297499.0L/48048000.0L) - 26627609.0L/218400.0L) + 1606926733.0L/67267200.0L) - 5861.0L/129360.0L) - 5861.0L/51744.0L) + 18.0L/3575.0L; + poly_val[15] = x*(x*(x*(x*(x*(x*(-3121232971.0L/189189000.0L*x + 7021361617.0L/121621500.0L) - 4797418079.0L/64864800.0L) + 3691153121.0L/90810720.0L) - 38663749.0L/4864860.0L) + 9601741.0L/1135134000.0L) + 9601741.0L/378378000.0L) - 2.0L/2145.0L; + poly_val[16] = x*(x*(x*(x*(x*(x*((390139199.0L/94594500.0L)*x - 14042448331.0L/972972000.0L) + 1142301647.0L/61776000.0L) - 9228730547.0L/908107200.0L) + 41576923.0L/20956320.0L) - 196909.0L/168168000.0L) - 196909.0L/48048000.0L) + 9.0L/70070.0L; + poly_val[17] = x*(x*(x*(x*(x*(x*(-2774247673.0L/3811808000.0L*x + 1386894709.0L/544544000.0L) - 304627159.0L/93350400.0L) + 745799291.0L/415833600.0L) - 1920359.0L/5491200.0L) + 10949.0L/103488000.0L) + 10949.0L/25872000.0L) - 9.0L/777920.0L; + poly_val[18] = x*(x*(x*(x*(x*(x*((24967737721.0L/308756448000.0L)*x - 28084490549.0L/99243144000.0L) + 8723736529.0L/24058944000.0L) - 7383695351.0L/37050773760.0L) + 2538335281.0L/65383718400.0L) - 63397.0L/13621608000.0L) - 63397.0L/3027024000.0L) + 1.0L/1969110.0L; + poly_val[19] = pow(x, 3)*(x*(x*(x*(-41064761.0L/9648639000.0L*x + 147812887.0L/9924314400.0L) - 420895147.0L/22054032000.0L) + 138794879.0L/13232419200.0L) - 283684103.0L/138940401600.0L); + break; + } +} diff --git a/bfps/cpp/spline_n9.hpp b/bfps/cpp/spline_n9.hpp new file mode 100644 index 0000000000000000000000000000000000000000..b3770a824b2faec1f9196a2b366ed729e6ad21c7 --- /dev/null +++ b/bfps/cpp/spline_n9.hpp @@ -0,0 +1,38 @@ +/********************************************************************** +* * +* Copyright 2017 Max Planck Institute * +* for Dynamics and Self-Organization * +* * +* This file is part of bfps. * +* * +* bfps is free software: you can redistribute it and/or modify * +* it under the terms of the GNU General Public License as published * +* by the Free Software Foundation, either version 3 of the License, * +* or (at your option) any later version. * +* * +* bfps is distributed in the hope that it will be useful, * +* but WITHOUT ANY WARRANTY; without even the implied warranty of * +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * +* GNU General Public License for more details. * +* * +* You should have received a copy of the GNU General Public License * +* along with bfps. If not, see <http://www.gnu.org/licenses/> * +* * +* Contact: Cristian.Lalescu@ds.mpg.de * +* * +**********************************************************************/ + + + +#ifndef SPLINE_N9 + +#define SPLINE_N9 + +void beta_n9_m0(const int deriv, const double x, double *__restrict__ poly_val); +void beta_n9_m1(const int deriv, const double x, double *__restrict__ poly_val); +void beta_n9_m2(const int deriv, const double x, double *__restrict__ poly_val); +void beta_n9_m3(const int deriv, const double x, double *__restrict__ poly_val); +void beta_n9_m4(const int deriv, const double x, double *__restrict__ poly_val); + +#endif//SPLINE_N9 + diff --git a/bfps/cpp/vorticity_equation.cpp b/bfps/cpp/vorticity_equation.cpp new file mode 100644 index 0000000000000000000000000000000000000000..82f3c23dd7f445a11d9a680493271c9e9a5081e8 --- /dev/null +++ b/bfps/cpp/vorticity_equation.cpp @@ -0,0 +1,722 @@ +/********************************************************************** +* * +* Copyright 2015 Max Planck Institute * +* for Dynamics and Self-Organization * +* * +* This file is part of bfps. * +* * +* bfps is free software: you can redistribute it and/or modify * +* it under the terms of the GNU General Public License as published * +* by the Free Software Foundation, either version 3 of the License, * +* or (at your option) any later version. * +* * +* bfps is distributed in the hope that it will be useful, * +* but WITHOUT ANY WARRANTY; without even the implied warranty of * +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * +* GNU General Public License for more details. * +* * +* You should have received a copy of the GNU General Public License * +* along with bfps. If not, see <http://www.gnu.org/licenses/> * +* * +* Contact: Cristian.Lalescu@ds.mpg.de * +* * +**********************************************************************/ + + + +#define NDEBUG + +#include <cassert> +#include <cmath> +#include <cstring> +#include "fftw_tools.hpp" +#include "vorticity_equation.hpp" +#include "scope_timer.hpp" + + + +template <class rnumber, + field_backend be> +void vorticity_equation<rnumber, be>::impose_zero_modes() +{ + TIMEZONE("vorticity_equation::impose_zero_modes"); + this->u->impose_zero_mode(); + this->v[0]->impose_zero_mode(); + this->v[1]->impose_zero_mode(); + this->v[2]->impose_zero_mode(); +} + +template <class rnumber, + field_backend be> +void vorticity_equation<rnumber, be>::update_checkpoint() +{ + std::string fname = this->get_current_fname(); + if (this->kk->layout->myrank == 0) + { + bool file_exists = false; + { + struct stat file_buffer; + file_exists = (stat(fname.c_str(), &file_buffer) == 0); + } + if (file_exists) + { + // check how many fields there are in the checkpoint file + // increment checkpoint if needed + hsize_t fields_stored; + hid_t fid, group_id; + fid = H5Fopen(fname.c_str(), H5F_ACC_RDONLY, H5P_DEFAULT); + group_id = H5Gopen(fid, "vorticity/complex", H5P_DEFAULT); + H5Gget_num_objs( + group_id, + &fields_stored); + bool dset_exists = H5Lexists( + group_id, + std::to_string(this->iteration).c_str(), + H5P_DEFAULT); + H5Gclose(group_id); + H5Fclose(fid); + if ((int(fields_stored) >= this->checkpoints_per_file) && + !dset_exists) + this->checkpoint++; + } + else + { + // create file, create fields_stored dset + hid_t fid = H5Fcreate( + fname.c_str(), + H5F_ACC_EXCL, + H5P_DEFAULT, + H5P_DEFAULT); + hid_t gg = H5Gcreate( + fid, + "vorticity", + H5P_DEFAULT, + H5P_DEFAULT, + H5P_DEFAULT); + hid_t ggg = H5Gcreate( + gg, + "complex", + H5P_DEFAULT, + H5P_DEFAULT, + H5P_DEFAULT); + H5Gclose(ggg); + H5Gclose(gg); + H5Fclose(fid); + } + } + MPI_Bcast(&this->checkpoint, 1, MPI_INT, 0, this->kk->layout->comm); +} + +template <class rnumber, + field_backend be> +vorticity_equation<rnumber, be>::vorticity_equation( + const char *NAME, + int nx, + int ny, + int nz, + double DKX, + double DKY, + double DKZ, + unsigned FFTW_PLAN_RIGOR) +{ + TIMEZONE("vorticity_equation::vorticity_equation"); + /* initialize name and basic stuff */ + strncpy(this->name, NAME, 256); + this->name[255] = '\0'; + this->iteration = 0; + this->checkpoint = 0; + + /* initialize fields */ + this->cvorticity = new field<rnumber, be, THREE>( + nx, ny, nz, MPI_COMM_WORLD, FFTW_PLAN_RIGOR); + this->rvorticity = new field<rnumber, be, THREE>( + nx, ny, nz, MPI_COMM_WORLD, FFTW_PLAN_RIGOR); + this->v[1] = new field<rnumber, be, THREE>( + nx, ny, nz, MPI_COMM_WORLD, FFTW_PLAN_RIGOR); + this->v[2] = new field<rnumber, be, THREE>( + nx, ny, nz, MPI_COMM_WORLD, FFTW_PLAN_RIGOR); + this->v[0] = this->cvorticity; + this->v[3] = this->cvorticity; + + this->cvelocity = new field<rnumber, be, THREE>( + nx, ny, nz, MPI_COMM_WORLD, FFTW_PLAN_RIGOR); + this->rvelocity = new field<rnumber, be, THREE>( + nx, ny, nz, MPI_COMM_WORLD, FFTW_PLAN_RIGOR); + this->u = this->cvelocity; + + /* initialize kspace */ + this->kk = new kspace<be, SMOOTH>( + this->cvorticity->clayout, DKX, DKY, DKZ); + + /* ``physical'' parameters etc, initialized here just in case */ + + this->nu = 0.1; + this->fmode = 1; + this->famplitude = 1.0; + this->fk0 = 2.0; + this->fk1 = 4.0; +} + +template <class rnumber, + field_backend be> +vorticity_equation<rnumber, be>::~vorticity_equation() +{ + TIMEZONE("vorticity_equation::~vorticity_equation"); + delete this->kk; + delete this->cvorticity; + delete this->rvorticity; + delete this->v[1]; + delete this->v[2]; + delete this->cvelocity; + delete this->rvelocity; +} + +template <class rnumber, + field_backend be> +void vorticity_equation<rnumber, be>::compute_vorticity() +{ + TIMEZONE("vorticity_equation::compute_vorticity"); + this->cvorticity->real_space_representation = false; + this->kk->CLOOP_K2( + [&](ptrdiff_t cindex, + ptrdiff_t xindex, + ptrdiff_t yindex, + ptrdiff_t zindex, + double k2){ + if (k2 <= this->kk->kM2) + { + this->cvorticity->cval(cindex,0,0) = -(this->kk->ky[yindex]*this->u->cval(cindex,2,1) - this->kk->kz[zindex]*this->u->cval(cindex,1,1)); + this->cvorticity->cval(cindex,0,1) = (this->kk->ky[yindex]*this->u->cval(cindex,2,0) - this->kk->kz[zindex]*this->u->cval(cindex,1,0)); + this->cvorticity->cval(cindex,1,0) = -(this->kk->kz[zindex]*this->u->cval(cindex,0,1) - this->kk->kx[xindex]*this->u->cval(cindex,2,1)); + this->cvorticity->cval(cindex,1,1) = (this->kk->kz[zindex]*this->u->cval(cindex,0,0) - this->kk->kx[xindex]*this->u->cval(cindex,2,0)); + this->cvorticity->cval(cindex,2,0) = -(this->kk->kx[xindex]*this->u->cval(cindex,1,1) - this->kk->ky[yindex]*this->u->cval(cindex,0,1)); + this->cvorticity->cval(cindex,2,1) = (this->kk->kx[xindex]*this->u->cval(cindex,1,0) - this->kk->ky[yindex]*this->u->cval(cindex,0,0)); + //ptrdiff_t tindex = 3*cindex; + //this->cvorticity->get_cdata()[tindex+0][0] = -(this->kk->ky[yindex]*this->u->get_cdata()[tindex+2][1] - this->kk->kz[zindex]*this->u->get_cdata()[tindex+1][1]); + //this->cvorticity->get_cdata()[tindex+1][0] = -(this->kk->kz[zindex]*this->u->get_cdata()[tindex+0][1] - this->kk->kx[xindex]*this->u->get_cdata()[tindex+2][1]); + //this->cvorticity->get_cdata()[tindex+2][0] = -(this->kk->kx[xindex]*this->u->get_cdata()[tindex+1][1] - this->kk->ky[yindex]*this->u->get_cdata()[tindex+0][1]); + //this->cvorticity->get_cdata()[tindex+0][1] = (this->kk->ky[yindex]*this->u->get_cdata()[tindex+2][0] - this->kk->kz[zindex]*this->u->get_cdata()[tindex+1][0]); + //this->cvorticity->get_cdata()[tindex+1][1] = (this->kk->kz[zindex]*this->u->get_cdata()[tindex+0][0] - this->kk->kx[xindex]*this->u->get_cdata()[tindex+2][0]); + //this->cvorticity->get_cdata()[tindex+2][1] = (this->kk->kx[xindex]*this->u->get_cdata()[tindex+1][0] - this->kk->ky[yindex]*this->u->get_cdata()[tindex+0][0]); + } + else + std::fill_n((rnumber*)(this->cvorticity->get_cdata()+3*cindex), 6, 0.0); + } + ); + this->cvorticity->symmetrize(); +} + +template <class rnumber, + field_backend be> +void vorticity_equation<rnumber, be>::compute_velocity(field<rnumber, be, THREE> *vorticity) +{ + TIMEZONE("vorticity_equation::compute_velocity"); + this->u->real_space_representation = false; + this->kk->CLOOP_K2( + [&](ptrdiff_t cindex, + ptrdiff_t xindex, + ptrdiff_t yindex, + ptrdiff_t zindex, + double k2){ + if (k2 <= this->kk->kM2 && k2 > 0) + { + this->u->cval(cindex,0,0) = -(this->kk->ky[yindex]*vorticity->cval(cindex,2,1) - this->kk->kz[zindex]*vorticity->cval(cindex,1,1)) / k2; + this->u->cval(cindex,0,1) = (this->kk->ky[yindex]*vorticity->cval(cindex,2,0) - this->kk->kz[zindex]*vorticity->cval(cindex,1,0)) / k2; + this->u->cval(cindex,1,0) = -(this->kk->kz[zindex]*vorticity->cval(cindex,0,1) - this->kk->kx[xindex]*vorticity->cval(cindex,2,1)) / k2; + this->u->cval(cindex,1,1) = (this->kk->kz[zindex]*vorticity->cval(cindex,0,0) - this->kk->kx[xindex]*vorticity->cval(cindex,2,0)) / k2; + this->u->cval(cindex,2,0) = -(this->kk->kx[xindex]*vorticity->cval(cindex,1,1) - this->kk->ky[yindex]*vorticity->cval(cindex,0,1)) / k2; + this->u->cval(cindex,2,1) = (this->kk->kx[xindex]*vorticity->cval(cindex,1,0) - this->kk->ky[yindex]*vorticity->cval(cindex,0,0)) / k2; + //ptrdiff_t tindex = 3*cindex; + //this->u->get_cdata()[tindex+0][0] = -(this->kk->ky[yindex]*vorticity->get_cdata()[tindex+2][1] - this->kk->kz[zindex]*vorticity->get_cdata()[tindex+1][1]) / k2; + //this->u->get_cdata()[tindex+0][1] = (this->kk->ky[yindex]*vorticity->get_cdata()[tindex+2][0] - this->kk->kz[zindex]*vorticity->get_cdata()[tindex+1][0]) / k2; + //this->u->get_cdata()[tindex+1][0] = -(this->kk->kz[zindex]*vorticity->get_cdata()[tindex+0][1] - this->kk->kx[xindex]*vorticity->get_cdata()[tindex+2][1]) / k2; + //this->u->get_cdata()[tindex+1][1] = (this->kk->kz[zindex]*vorticity->get_cdata()[tindex+0][0] - this->kk->kx[xindex]*vorticity->get_cdata()[tindex+2][0]) / k2; + //this->u->get_cdata()[tindex+2][0] = -(this->kk->kx[xindex]*vorticity->get_cdata()[tindex+1][1] - this->kk->ky[yindex]*vorticity->get_cdata()[tindex+0][1]) / k2; + //this->u->get_cdata()[tindex+2][1] = (this->kk->kx[xindex]*vorticity->get_cdata()[tindex+1][0] - this->kk->ky[yindex]*vorticity->get_cdata()[tindex+0][0]) / k2; + } + else + std::fill_n((rnumber*)(this->u->get_cdata()+3*cindex), 6, 0.0); + } + ); + this->u->symmetrize(); +} + +template <class rnumber, + field_backend be> +void vorticity_equation<rnumber, be>::add_forcing( + field<rnumber, be, THREE> *dst, + field<rnumber, be, THREE> *vort_field, + rnumber factor) +{ + TIMEZONE("vorticity_equation::add_forcing"); + if (strcmp(this->forcing_type, "none") == 0) + return; + if (strcmp(this->forcing_type, "Kolmogorov") == 0) + { + ptrdiff_t cindex; + if (this->cvorticity->clayout->myrank == this->cvorticity->clayout->rank[0][this->fmode]) + { + cindex = ((this->fmode - this->cvorticity->clayout->starts[0]) * this->cvorticity->clayout->sizes[1])*this->cvorticity->clayout->sizes[2]; + dst->cval(cindex,2, 0) -= this->famplitude*factor/2; + //dst->get_cdata()[cindex*3+2][0] -= this->famplitude*factor/2; + } + if (this->cvorticity->clayout->myrank == this->cvorticity->clayout->rank[0][this->cvorticity->clayout->sizes[0] - this->fmode]) + { + cindex = ((this->cvorticity->clayout->sizes[0] - this->fmode - this->cvorticity->clayout->starts[0]) * this->cvorticity->clayout->sizes[1])*this->cvorticity->clayout->sizes[2]; + dst->cval(cindex, 2, 0) -= this->famplitude*factor/2; + //dst->get_cdata()[cindex*3+2][0] -= this->famplitude*factor/2; + } + return; + } + if (strcmp(this->forcing_type, "linear") == 0) + { + this->kk->CLOOP( + [&](ptrdiff_t cindex, + ptrdiff_t xindex, + ptrdiff_t yindex, + ptrdiff_t zindex){ + double knorm = sqrt(this->kk->kx[xindex]*this->kk->kx[xindex] + + this->kk->ky[yindex]*this->kk->ky[yindex] + + this->kk->kz[zindex]*this->kk->kz[zindex]); + if ((this->fk0 <= knorm) && + (this->fk1 >= knorm)) + for (int c=0; c<3; c++) + for (int i=0; i<2; i++) + dst->cval(cindex,c,i) += this->famplitude*vort_field->cval(cindex,c,i)*factor; + //dst->get_cdata()[cindex*3+c][i] += this->famplitude*vort_field->get_cdata()[cindex*3+c][i]*factor; + } + ); + return; + } +} + +template <class rnumber, + field_backend be> +void vorticity_equation<rnumber, be>::omega_nonlin( + int src) +{ + DEBUG_MSG("vorticity_equation::omega_nonlin(%d)\n", src); + assert(src >= 0 && src < 3); + this->compute_velocity(this->v[src]); + /* get fields from Fourier space to real space */ + this->u->ift(); + this->rvorticity->real_space_representation = false; + *this->rvorticity = this->v[src]->get_cdata(); + this->rvorticity->ift(); + /* compute cross product $u \times \omega$, and normalize */ + this->u->RLOOP( + [&](ptrdiff_t rindex, + ptrdiff_t xindex, + ptrdiff_t yindex, + ptrdiff_t zindex){ + //ptrdiff_t tindex = 3*rindex; + rnumber tmp[3]; + for (int cc=0; cc<3; cc++) + tmp[cc] = (this->u->rval(rindex,(cc+1)%3)*this->rvorticity->rval(rindex,(cc+2)%3) - + this->u->rval(rindex,(cc+2)%3)*this->rvorticity->rval(rindex,(cc+1)%3)); + //tmp[cc][0] = (this->u->get_rdata()[tindex+(cc+1)%3]*this->rvorticity->get_rdata()[tindex+(cc+2)%3] - + // this->u->get_rdata()[tindex+(cc+2)%3]*this->rvorticity->get_rdata()[tindex+(cc+1)%3]); + for (int cc=0; cc<3; cc++) + this->u->rval(rindex,cc) = tmp[cc] / this->u->npoints; + //this->u->get_rdata()[(3*rindex)+cc] = tmp[cc][0] / this->u->npoints; + } + ); + /* go back to Fourier space */ + //this->clean_up_real_space(this->ru, 3); + this->u->dft(); + this->kk->template dealias<rnumber, THREE>(this->u->get_cdata()); + /* $\imath k \times Fourier(u \times \omega)$ */ + this->kk->CLOOP( + [&](ptrdiff_t cindex, + ptrdiff_t xindex, + ptrdiff_t yindex, + ptrdiff_t zindex){ + rnumber tmp[3][2]; + { + tmp[0][0] = -(this->kk->ky[yindex]*this->u->cval(cindex,2,1) - this->kk->kz[zindex]*this->u->cval(cindex,1,1)); + tmp[1][0] = -(this->kk->kz[zindex]*this->u->cval(cindex,0,1) - this->kk->kx[xindex]*this->u->cval(cindex,2,1)); + tmp[2][0] = -(this->kk->kx[xindex]*this->u->cval(cindex,1,1) - this->kk->ky[yindex]*this->u->cval(cindex,0,1)); + tmp[0][1] = (this->kk->ky[yindex]*this->u->cval(cindex,2,0) - this->kk->kz[zindex]*this->u->cval(cindex,1,0)); + tmp[1][1] = (this->kk->kz[zindex]*this->u->cval(cindex,0,0) - this->kk->kx[xindex]*this->u->cval(cindex,2,0)); + tmp[2][1] = (this->kk->kx[xindex]*this->u->cval(cindex,1,0) - this->kk->ky[yindex]*this->u->cval(cindex,0,0)); + } + //ptrdiff_t tindex = 3*cindex; + //{ + // tmp[0][0] = -(this->kk->ky[yindex]*this->u->get_cdata()[tindex+2][1] - this->kk->kz[zindex]*this->u->get_cdata()[tindex+1][1]); + // tmp[1][0] = -(this->kk->kz[zindex]*this->u->get_cdata()[tindex+0][1] - this->kk->kx[xindex]*this->u->get_cdata()[tindex+2][1]); + // tmp[2][0] = -(this->kk->kx[xindex]*this->u->get_cdata()[tindex+1][1] - this->kk->ky[yindex]*this->u->get_cdata()[tindex+0][1]); + // tmp[0][1] = (this->kk->ky[yindex]*this->u->get_cdata()[tindex+2][0] - this->kk->kz[zindex]*this->u->get_cdata()[tindex+1][0]); + // tmp[1][1] = (this->kk->kz[zindex]*this->u->get_cdata()[tindex+0][0] - this->kk->kx[xindex]*this->u->get_cdata()[tindex+2][0]); + // tmp[2][1] = (this->kk->kx[xindex]*this->u->get_cdata()[tindex+1][0] - this->kk->ky[yindex]*this->u->get_cdata()[tindex+0][0]); + //} + for (int cc=0; cc<3; cc++) for (int i=0; i<2; i++) + this->u->cval(cindex, cc, i) = tmp[cc][i]; + //this->u->get_cdata()[3*cindex+cc][i] = tmp[cc][i]; + } + ); + this->add_forcing(this->u, this->v[src], 1.0); + this->kk->template force_divfree<rnumber>(this->u->get_cdata()); +} + +template <class rnumber, + field_backend be> +void vorticity_equation<rnumber, be>::step(double dt) +{ + DEBUG_MSG("vorticity_equation::step\n"); + TIMEZONE("vorticity_equation::step"); + *this->v[1] = 0.0; + this->omega_nonlin(0); + this->kk->CLOOP_K2( + [&](ptrdiff_t cindex, + ptrdiff_t xindex, + ptrdiff_t yindex, + ptrdiff_t zindex, + double k2){ + if (k2 <= this->kk->kM2) + { + double factor0; + factor0 = exp(-this->nu * k2 * dt); + for (int cc=0; cc<3; cc++) for (int i=0; i<2; i++) + this->v[1]->cval(cindex,cc,i) = ( + this->v[0]->cval(cindex,cc,i) + + dt*this->u->cval(cindex,cc,i))*factor0; + //this->v[1]->get_cdata()[3*cindex+cc][i] = ( + // this->v[0]->get_cdata()[3*cindex+cc][i] + + // dt*this->u->get_cdata()[3*cindex+cc][i])*factor0; + } + } + ); + + this->omega_nonlin(1); + this->kk->CLOOP_K2( + [&](ptrdiff_t cindex, + ptrdiff_t xindex, + ptrdiff_t yindex, + ptrdiff_t zindex, + double k2){ + if (k2 <= this->kk->kM2) + { + double factor0, factor1; + factor0 = exp(-this->nu * k2 * dt/2); + factor1 = exp( this->nu * k2 * dt/2); + for (int cc=0; cc<3; cc++) for (int i=0; i<2; i++) + this->v[2]->cval(cindex, cc, i) = ( + 3*this->v[0]->cval(cindex,cc,i)*factor0 + + ( this->v[1]->cval(cindex,cc,i) + + dt*this->u->cval(cindex,cc,i))*factor1)*0.25; + //this->v[2]->get_cdata()[3*cindex+cc][i] = ( + // 3*this->v[0]->get_cdata()[3*cindex+cc][i]*factor0 + + // (this->v[1]->get_cdata()[3*cindex+cc][i] + + // dt*this->u->get_cdata()[3*cindex+cc][i])*factor1)*0.25; + } + } + ); + + this->omega_nonlin(2); + this->kk->CLOOP_K2( + [&](ptrdiff_t cindex, + ptrdiff_t xindex, + ptrdiff_t yindex, + ptrdiff_t zindex, + double k2){ + if (k2 <= this->kk->kM2) + { + double factor0; + factor0 = exp(-this->nu * k2 * dt * 0.5); + for (int cc=0; cc<3; cc++) for (int i=0; i<2; i++) + this->v[3]->cval(cindex,cc,i) = ( + this->v[0]->cval(cindex,cc,i)*factor0 + + 2*(this->v[2]->cval(cindex,cc,i) + + dt*this->u->cval(cindex,cc,i)))*factor0/3; + //this->v[3]->get_cdata()[3*cindex+cc][i] = ( + // this->v[0]->get_cdata()[3*cindex+cc][i]*factor0 + + // 2*(this->v[2]->get_cdata()[3*cindex+cc][i] + + // dt*this->u->get_cdata()[3*cindex+cc][i]))*factor0/3; + } + } + ); + + this->kk->template force_divfree<rnumber>(this->cvorticity->get_cdata()); + this->cvorticity->symmetrize(); + this->iteration++; +} + +template <class rnumber, + field_backend be> +void vorticity_equation<rnumber, be>::compute_pressure(field<rnumber, be, ONE> *pressure) +{ + TIMEZONE("vorticity_equation::compute_pressure"); + /* assume velocity is already in real space representation */ + + this->v[1]->real_space_representation = true; + /* diagonal terms 11 22 33 */ + this->v[1]->RLOOP ( + [&](ptrdiff_t rindex, + ptrdiff_t xindex, + ptrdiff_t yindex, + ptrdiff_t zindex){ + //ptrdiff_t tindex = 3*rindex; + for (int cc=0; cc<3; cc++) + this->v[1]->rval(rindex,cc) = this->u->rval(rindex,cc)*this->u->rval(rindex,cc); + //this->v[1]->get_rdata()[tindex+cc] = this->u->get_rdata()[tindex+cc]*this->u->get_rdata()[tindex+cc]; + } + ); + //this->clean_up_real_space(this->rv[1], 3); + this->v[1]->dft(); + this->kk->template dealias<rnumber, THREE>(this->v[1]->get_cdata()); + this->kk->CLOOP_K2( + [&](ptrdiff_t cindex, + ptrdiff_t xindex, + ptrdiff_t yindex, + ptrdiff_t zindex, + double k2){ + if (k2 <= this->kk->kM2 && k2 > 0) + { + ptrdiff_t tindex = 3*cindex; + for (int i=0; i<2; i++) + { + pressure->get_cdata()[cindex][i] = \ + -(this->kk->kx[xindex]*this->kk->kx[xindex]*this->v[1]->get_cdata()[tindex+0][i] + + this->kk->ky[yindex]*this->kk->ky[yindex]*this->v[1]->get_cdata()[tindex+1][i] + + this->kk->kz[zindex]*this->kk->kz[zindex]*this->v[1]->get_cdata()[tindex+2][i]); + } + } + else + std::fill_n((rnumber*)(pressure->get_cdata()+cindex), 2, 0.0); + } + ); + /* off-diagonal terms 12 23 31 */ + this->v[1]->real_space_representation = true; + this->v[1]->RLOOP ( + [&](ptrdiff_t rindex, + ptrdiff_t xindex, + ptrdiff_t yindex, + ptrdiff_t zindex){ + //ptrdiff_t tindex = 3*rindex; + for (int cc=0; cc<3; cc++) + this->v[1]->rval(rindex,cc) = this->u->rval(rindex,cc)*this->u->rval(rindex,(cc+1)%3); + //this->v[1]->get_rdata()[tindex+cc] = this->u->get_rdata()[tindex+cc]*this->u->get_rdata()[tindex+(cc+1)%3]; + } + ); + //this->clean_up_real_space(this->rv[1], 3); + this->v[1]->dft(); + this->kk->template dealias<rnumber, THREE>(this->v[1]->get_cdata()); + this->kk->CLOOP_K2( + [&](ptrdiff_t cindex, + ptrdiff_t xindex, + ptrdiff_t yindex, + ptrdiff_t zindex, + double k2){ + if (k2 <= this->kk->kM2 && k2 > 0) + { + ptrdiff_t tindex = 3*cindex; + for (int i=0; i<2; i++) + { + pressure->get_cdata()[cindex][i] -= \ + 2*(this->kk->kx[xindex]*this->kk->ky[yindex]*this->v[1]->get_cdata()[tindex+0][i] + + this->kk->ky[yindex]*this->kk->kz[zindex]*this->v[1]->get_cdata()[tindex+1][i] + + this->kk->kz[zindex]*this->kk->kx[xindex]*this->v[1]->get_cdata()[tindex+2][i]); + pressure->get_cdata()[cindex][i] /= pressure->npoints*k2; + } + } + } + ); +} + + +/** \brief Compute Lagrangian acceleration. + * + * Acceleration is put in `acceleration` in the Fourier space representation. + */ +template <class rnumber, + field_backend be> +void vorticity_equation<rnumber, be>::compute_Lagrangian_acceleration( + field<rnumber, be, THREE> *acceleration) +{ + field<rnumber, be, ONE> *pressure = new field<rnumber, be, ONE>( + this->cvelocity->rlayout->sizes[2], + this->cvelocity->rlayout->sizes[1], + this->cvelocity->rlayout->sizes[0], + this->cvelocity->rlayout->comm, + this->cvelocity->fftw_plan_rigor); + this->compute_velocity(this->cvorticity); + this->cvelocity->ift(); + this->compute_pressure(pressure); + this->compute_velocity(this->cvorticity); + acceleration->real_space_representation = false; + *acceleration = 0.0; + this->kk->CLOOP_K2( + [&](ptrdiff_t cindex, + ptrdiff_t xindex, + ptrdiff_t yindex, + ptrdiff_t zindex, + double k2){ + if (k2 <= this->kk->kM2) + { + ptrdiff_t tindex = 3*cindex; + for (int cc=0; cc<3; cc++) + for (int i=0; i<2; i++) + acceleration->get_cdata()[tindex+cc][i] = \ + - this->nu*k2*this->cvelocity->get_cdata()[tindex+cc][i]; + if (strcmp(this->forcing_type, "linear") == 0) + { + double knorm = sqrt(k2); + if ((this->fk0 <= knorm) && + (this->fk1 >= knorm)) + for (int c=0; c<3; c++) + for (int i=0; i<2; i++) + acceleration->get_cdata()[tindex+c][i] += \ + this->famplitude*this->cvelocity->get_cdata()[tindex+c][i]; + } + acceleration->get_cdata()[tindex+0][0] += this->kk->kx[xindex]*pressure->get_cdata()[cindex][1]; + acceleration->get_cdata()[tindex+1][0] += this->kk->ky[yindex]*pressure->get_cdata()[cindex][1]; + acceleration->get_cdata()[tindex+2][0] += this->kk->kz[zindex]*pressure->get_cdata()[cindex][1]; + acceleration->get_cdata()[tindex+0][1] -= this->kk->kx[xindex]*pressure->get_cdata()[cindex][0]; + acceleration->get_cdata()[tindex+1][1] -= this->kk->ky[yindex]*pressure->get_cdata()[cindex][0]; + acceleration->get_cdata()[tindex+2][1] -= this->kk->kz[zindex]*pressure->get_cdata()[cindex][0]; + } + }); + delete pressure; +} + +template <class rnumber, + field_backend be> +void vorticity_equation<rnumber, be>::compute_Eulerian_acceleration( + field<rnumber, be, THREE> *acceleration) +{ + this->compute_velocity(this->cvorticity); + acceleration->real_space_representation = false; + /* put in linear terms */ + this->kk->CLOOP_K2( + [&](ptrdiff_t cindex, + ptrdiff_t xindex, + ptrdiff_t yindex, + ptrdiff_t zindex, + double k2){ + if (k2 <= this->kk->kM2) + { + ptrdiff_t tindex = 3*cindex; + for (int cc=0; cc<3; cc++) + for (int i=0; i<2; i++) + acceleration->get_cdata()[tindex+cc][i] = \ + - this->nu*k2*this->cvelocity->get_cdata()[tindex+cc][i]; + if (strcmp(this->forcing_type, "linear") == 0) + { + double knorm = sqrt(k2); + if ((this->fk0 <= knorm) && + (this->fk1 >= knorm)) + { + for (int c=0; c<3; c++) + for (int i=0; i<2; i++) + acceleration->get_cdata()[tindex+c][i] += \ + this->famplitude*this->cvelocity->get_cdata()[tindex+c][i]; + } + } + } + } + ); + this->cvelocity->ift(); + /* compute uu */ + /* 11 22 33 */ + this->v[1]->real_space_representation = true; + this->cvelocity->RLOOP ( + [&](ptrdiff_t rindex, + ptrdiff_t xindex, + ptrdiff_t yindex, + ptrdiff_t zindex){ + //ptrdiff_t tindex = 3*rindex; + for (int cc=0; cc<3; cc++) + this->v[1]->rval(rindex,cc) = \ + this->cvelocity->rval(rindex,cc)*this->cvelocity->rval(rindex,cc) / this->cvelocity->npoints; + //this->v[1]->get_rdata()[tindex+cc] = this->cvelocity->get_rdata()[tindex+cc]*this->cvelocity->get_rdata()[tindex+cc] / this->cvelocity->npoints; + } + ); + this->v[1]->dft(); + this->kk->template dealias<rnumber, THREE>(this->v[1]->get_cdata()); + this->kk->CLOOP_K2( + [&](ptrdiff_t cindex, + ptrdiff_t xindex, + ptrdiff_t yindex, + ptrdiff_t zindex, + double k2){ + if (k2 <= this->kk->kM2) + { + ptrdiff_t tindex = 3*cindex; + acceleration->get_cdata()[tindex+0][0] += + this->kk->kx[xindex]*this->v[1]->get_cdata()[tindex+0][1]; + acceleration->get_cdata()[tindex+0][1] += + -this->kk->kx[xindex]*this->v[1]->get_cdata()[tindex+0][0]; + acceleration->get_cdata()[tindex+1][0] += + this->kk->ky[yindex]*this->v[1]->get_cdata()[tindex+1][1]; + acceleration->get_cdata()[tindex+1][1] += + -this->kk->ky[yindex]*this->v[1]->get_cdata()[tindex+1][0]; + acceleration->get_cdata()[tindex+2][0] += + this->kk->kz[zindex]*this->v[1]->get_cdata()[tindex+2][1]; + acceleration->get_cdata()[tindex+2][1] += + -this->kk->kz[zindex]*this->v[1]->get_cdata()[tindex+2][0]; + } + } + ); + /* 12 23 31 */ + this->v[1]->real_space_representation = true; + this->cvelocity->RLOOP ( + [&](ptrdiff_t rindex, + ptrdiff_t xindex, + ptrdiff_t yindex, + ptrdiff_t zindex){ + //ptrdiff_t tindex = 3*rindex; + for (int cc=0; cc<3; cc++) + this->v[1]->rval(rindex,cc) = \ + this->cvelocity->rval(rindex,cc)*this->cvelocity->rval(rindex,(cc+1)%3) / this->cvelocity->npoints; + //this->v[1]->get_rdata()[tindex+cc] = this->cvelocity->get_rdata()[tindex+cc]*this->cvelocity->get_rdata()[tindex+(cc+1)%3] / this->cvelocity->npoints; + } + ); + this->v[1]->dft(); + this->kk->template dealias<rnumber, THREE>(this->v[1]->get_cdata()); + this->kk->CLOOP_K2( + [&](ptrdiff_t cindex, + ptrdiff_t xindex, + ptrdiff_t yindex, + ptrdiff_t zindex, + double k2){ + if (k2 <= this->kk->kM2) + { + ptrdiff_t tindex = 3*cindex; + acceleration->get_cdata()[tindex+0][0] += + (this->kk->ky[yindex]*this->v[1]->get_cdata()[tindex+0][1] + + this->kk->kz[zindex]*this->v[1]->get_cdata()[tindex+2][1]); + acceleration->get_cdata()[tindex+0][1] += + - (this->kk->ky[yindex]*this->v[1]->get_cdata()[tindex+0][0] + + this->kk->kz[zindex]*this->v[1]->get_cdata()[tindex+2][0]); + acceleration->get_cdata()[tindex+1][0] += + (this->kk->kz[zindex]*this->v[1]->get_cdata()[tindex+1][1] + + this->kk->kx[xindex]*this->v[1]->get_cdata()[tindex+0][1]); + acceleration->get_cdata()[tindex+1][1] += + - (this->kk->kz[zindex]*this->v[1]->get_cdata()[tindex+1][0] + + this->kk->kx[xindex]*this->v[1]->get_cdata()[tindex+0][0]); + acceleration->get_cdata()[tindex+2][0] += + (this->kk->kx[xindex]*this->v[1]->get_cdata()[tindex+2][1] + + this->kk->ky[yindex]*this->v[1]->get_cdata()[tindex+1][1]); + acceleration->get_cdata()[tindex+2][1] += + - (this->kk->kx[xindex]*this->v[1]->get_cdata()[tindex+2][0] + + this->kk->ky[yindex]*this->v[1]->get_cdata()[tindex+1][0]); + } + } + ); + if (this->kk->layout->myrank == this->kk->layout->rank[0][0]) + std::fill_n((rnumber*)(acceleration->get_cdata()), 6, 0.0); + this->kk->template force_divfree<rnumber>(acceleration->get_cdata()); +} + + +/*****************************************************************************/ + + + + +/*****************************************************************************/ +/* finally, force generation of code for single precision */ +template class vorticity_equation<float, FFTW>; +template class vorticity_equation<double, FFTW>; +/*****************************************************************************/ + diff --git a/bfps/cpp/vorticity_equation.hpp b/bfps/cpp/vorticity_equation.hpp new file mode 100644 index 0000000000000000000000000000000000000000..880f05d2d93ada14390f9ed0a003c5cc299633e2 --- /dev/null +++ b/bfps/cpp/vorticity_equation.hpp @@ -0,0 +1,137 @@ +/********************************************************************** +* * +* Copyright 2015 Max Planck Institute * +* for Dynamics and Self-Organization * +* * +* This file is part of bfps. * +* * +* bfps is free software: you can redistribute it and/or modify * +* it under the terms of the GNU General Public License as published * +* by the Free Software Foundation, either version 3 of the License, * +* or (at your option) any later version. * +* * +* bfps is distributed in the hope that it will be useful, * +* but WITHOUT ANY WARRANTY; without even the implied warranty of * +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * +* GNU General Public License for more details. * +* * +* You should have received a copy of the GNU General Public License * +* along with bfps. If not, see <http://www.gnu.org/licenses/> * +* * +* Contact: Cristian.Lalescu@ds.mpg.de * +* * +**********************************************************************/ + +#include <sys/stat.h> +#include <stdio.h> +#include <stdlib.h> +#include <iostream> + +#include "field.hpp" +#include "field_descriptor.hpp" + +#ifndef VORTICITY_EQUATION + +#define VORTICITY_EQUATION + +extern int myrank, nprocs; + + +/* container for field descriptor, fields themselves, parameters, etc + * This particular class is only meant as a stepping stone to a proper solver + * that only uses the field class (and related layout and kspace classes), and + * HDF5 for I/O. + * */ + +template <typename rnumber, + field_backend be> +class vorticity_equation +{ + public: + /* name */ + char name[256]; + + /* iteration */ + int iteration; + int checkpoint; + int checkpoints_per_file; + + /* fields */ + field<rnumber, be, THREE> *cvorticity, *cvelocity; + field<rnumber, be, THREE> *rvorticity, *rvelocity; + kspace<be, SMOOTH> *kk; + + + /* short names for velocity, and 4 vorticity fields */ + field<rnumber, be, THREE> *u, *v[4]; + + /* physical parameters */ + double nu; + int fmode; // for Kolmogorov flow + double famplitude; // both for Kflow and band forcing + double fk0, fk1; // for band forcing + char forcing_type[128]; + + /* constructor, destructor */ + vorticity_equation( + const char *NAME, + int nx, + int ny, + int nz, + double DKX = 1.0, + double DKY = 1.0, + double DKZ = 1.0, + unsigned FFTW_PLAN_RIGOR = FFTW_MEASURE); + ~vorticity_equation(void); + + /* solver essential methods */ + void omega_nonlin(int src); + void step(double dt); + void impose_zero_modes(void); + void add_forcing(field<rnumber, be, THREE> *dst, + field<rnumber, be, THREE> *src_vorticity, + rnumber factor); + void compute_vorticity(void); + void compute_velocity(field<rnumber, be, THREE> *vorticity); + + /* I/O stuff */ + inline std::string get_current_fname() + { + return ( + std::string(this->name) + + std::string("_checkpoint_") + + std::to_string(this->checkpoint) + + std::string(".h5")); + } + void update_checkpoint(void); + inline void io_checkpoint(bool read = true) + { + assert(!this->cvorticity->real_space_representation); + if (!read) + this->update_checkpoint(); + std::string fname = this->get_current_fname(); + this->cvorticity->io( + fname, + "vorticity", + this->iteration, + read); + if (read) + { + #if (__GNUC__ <= 4 && __GNUC_MINOR__ < 7) + this->kk->low_pass<rnumber, THREE>(this->cvorticity->get_cdata(), this->kk->kM); + this->kk->force_divfree<rnumber>(this->cvorticity->get_cdata()); + #else + this->kk->template low_pass<rnumber, THREE>(this->cvorticity->get_cdata(), this->kk->kM); + this->kk->template force_divfree<rnumber>(this->cvorticity->get_cdata()); + #endif + } + } + + /* statistics and general postprocessing */ + void compute_pressure(field<rnumber, be, ONE> *pressure); + void compute_Eulerian_acceleration(field<rnumber, be, THREE> *acceleration); + void compute_Lagrangian_acceleration(field<rnumber, be, THREE> *acceleration); +}; + +#endif//VORTICITY_EQUATION + diff --git a/bfps/test/B32p1e4_checkpoint_0.h5 b/bfps/test/B32p1e4_checkpoint_0.h5 new file mode 100644 index 0000000000000000000000000000000000000000..236da62d4dffeb15b056ea3869f6343c74298e94 Binary files /dev/null and b/bfps/test/B32p1e4_checkpoint_0.h5 differ diff --git a/bfps/test/__init__.py b/bfps/test/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/bfps/test/test_bfps_NSVEparticles.py b/bfps/test/test_bfps_NSVEparticles.py new file mode 100644 index 0000000000000000000000000000000000000000..ab77e2103ccda7685cebe759f8e11cfe2a5b5ec9 --- /dev/null +++ b/bfps/test/test_bfps_NSVEparticles.py @@ -0,0 +1,56 @@ +#! /usr/bin/env python + +import os +import numpy as np +import h5py +import sys + +import bfps +from bfps import DNS + + +def main(): + niterations = 32 + nparticles = 10000 + njobs = 2 + c = DNS() + c.launch( + ['NSVEparticles', + '-n', '32', + '--src-simname', 'B32p1e4', + '--src-wd', bfps.lib_dir + '/test', + '--src-iteration', '0', + '--simname', 'dns_nsveparticles', + '--np', '4', + '--ntpp', '1', + '--niter_todo', '{0}'.format(niterations), + '--niter_out', '{0}'.format(niterations), + '--niter_stat', '1', + '--checkpoints_per_file', '{0}'.format(3), + '--nparticles', '{0}'.format(nparticles), + '--particle-rand-seed', '2', + '--njobs', '{0}'.format(njobs), + '--wd', './'] + + sys.argv[1:]) + f0 = h5py.File( + os.path.join( + os.path.join(bfps.lib_dir, 'test'), + 'B32p1e4_checkpoint_0.h5'), + 'r') + f1 = h5py.File(c.get_checkpoint_0_fname(), 'r') + for iteration in [0, 32, 64]: + field0 = f0['vorticity/complex/{0}'.format(iteration)].value + field1 = f1['vorticity/complex/{0}'.format(iteration)].value + assert(np.max(np.abs(field0 - field1)) < 1e-5) + x0 = f0['tracers0/state/{0}'.format(iteration)].value + x1 = f1['tracers0/state/{0}'.format(iteration)].value + assert(np.max(np.abs(x0 - x1)) < 1e-5) + y0 = f0['tracers0/rhs/{0}'.format(iteration)].value + y1 = f1['tracers0/rhs/{0}'.format(iteration)].value + assert(np.max(np.abs(y0 - y1)) < 1e-5) + print('SUCCESS! Basic test passed.') + return None + +if __name__ == '__main__': + main() + diff --git a/bfps/tools.py b/bfps/tools.py index ff5d365aa979fd0c98b9ab64fe8a2a5404f05474..69756ec648409ab52d57930d26b1ab1ca8b942c1 100644 --- a/bfps/tools.py +++ b/bfps/tools.py @@ -28,6 +28,36 @@ import sys import math import numpy as np +import h5py + +def create_alloc_early_dataset( + data_file, + dset_name, + dset_shape, + dset_maxshape, + dset_chunks, + # maybe something more general can be used here + dset_dtype = h5py.h5t.IEEE_F64LE): + # create the dataspace. + space_id = h5py.h5s.create_simple( + dset_shape, + dset_maxshape) + # create the dataset creation property list. + dcpl = h5py.h5p.create(h5py.h5p.DATASET_CREATE) + # set the allocation time to "early". + dcpl.set_alloc_time(h5py.h5d.ALLOC_TIME_EARLY) + dcpl.set_chunk(dset_chunks) + # and now create dataset + if sys.version_info[0] == 3: + dset_name = dset_name.encode() + return h5py.h5d.create( + data_file.id, + dset_name, + dset_dtype, + space_id, + dcpl, + h5py.h5p.DEFAULT) + def generate_data_3D_uniform( n0, n1, n2, dtype = np.complex128, diff --git a/documentation/_static/overview.rst b/documentation/_static/overview.rst index 607cfcc4774cbfd583240d2e7f9bad3cc766af80..afe7a753666e6ea5911ce1266d0803aa25ea5c45 100644 --- a/documentation/_static/overview.rst +++ b/documentation/_static/overview.rst @@ -2,6 +2,65 @@ Overview and Tutorial ===================== +---------------- +General comments +---------------- + +The purpose of this code is to run pseudo-spectral DNS of turbulence, +and integrate particle trajectories in the resulting fields. +In brief, the main aim of the code is to simplify the launching of +compute jobs and postprocessing, up to and including the generation of +publication-ready figures. + +For research, people routinely write code from scratch because research +goals change to a point where modifying the previous code is too +expensive. +With bfps, the desire is to identify core functionality that should be +implemented in a library. +The core library can then be used by many problem-specific codes. + +In this sense, the structuring of the code-base is non-standard. +The core functionality is implemented in C++ (classes useful for +describing working with fields or sets of particles), while a python +wrapper is used for generating "main" programmes to be linked against +the core library. +The core library uses MPI for parallelization, and the python wrapper +compiles this core library when being installed. +The compilation environment can be configured for different +machines as required. + +Python3 "wrapper" +----------------- + +In principle, users of the code should only need to use python3 for +launching jobs and postprocessing data. +While python2 compatibility should not be too hard to maintain, the +usage of strings makes it a bit cumbersome --- +the code makes extensive usage of strings for `HDF5` I/O. + +Classes defined in the python package can be used to generate executable +codes, compile/launch them, and then for accessing and postprocessing +data. +Obviously, postprocessing methods can be optimized with C extensions or +otherwise, as needed. + +Code generation is quite straightforward, with C++ code snippets handled +as strings in the python code, such that they can be combined in +different ways. + +Once a "main" file has been written, it is compiled and linked against +the core library. +Depending on machine-specific settings, the code can then be launched +directly, or job scripts appropriate for queueing systems are generated +and submitted. + +C++ core library +---------------- + +A small set of base classes are implemented. + +[ some details to be added here ] + --------- Equations --------- diff --git a/documentation/cpp/cpp_config b/documentation/cpp/cpp_config new file mode 100644 index 0000000000000000000000000000000000000000..02cf369779ec1370da3303b018d39e11be9613d4 --- /dev/null +++ b/documentation/cpp/cpp_config @@ -0,0 +1,2494 @@ +# Doxyfile 1.8.13 + +# This file describes the settings to be used by the documentation system +# doxygen (www.doxygen.org) for a project. +# +# All text after a double hash (##) is considered a comment and is placed in +# front of the TAG it is preceding. +# +# All text after a single hash (#) is considered a comment and will be ignored. +# The format is: +# TAG = value [value, ...] +# For lists, items can also be appended using: +# TAG += value [value, ...] +# Values that contain spaces should be placed between quotes (\" \"). + +#--------------------------------------------------------------------------- +# Project related configuration options +#--------------------------------------------------------------------------- + +# This tag specifies the encoding used for all characters in the config file +# that follow. The default is UTF-8 which is also the encoding used for all text +# before the first occurrence of this tag. Doxygen uses libiconv (or the iconv +# built into libc) for the transcoding. See http://www.gnu.org/software/libiconv +# for the list of possible encodings. +# The default value is: UTF-8. + +DOXYFILE_ENCODING = UTF-8 + +# The PROJECT_NAME tag is a single word (or a sequence of words surrounded by +# double-quotes, unless you are using Doxywizard) that should identify the +# project for which the documentation is generated. This name is used in the +# title of most generated pages and in a few other places. +# The default value is: My Project. + +PROJECT_NAME = "BFPS" + +# The PROJECT_NUMBER tag can be used to enter a project or revision number. This +# could be handy for archiving the generated documentation or if some version +# control system is used. + +PROJECT_NUMBER = + +# Using the PROJECT_BRIEF tag one can provide an optional one line description +# for a project that appears at the top of each page and should give viewer a +# quick idea about the purpose of the project. Keep the description short. + +PROJECT_BRIEF = + +# With the PROJECT_LOGO tag one can specify a logo or an icon that is included +# in the documentation. The maximum height of the logo should not exceed 55 +# pixels and the maximum width should not exceed 200 pixels. Doxygen will copy +# the logo to the output directory. + +PROJECT_LOGO = + +# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute) path +# into which the generated documentation will be written. If a relative path is +# entered, it will be relative to the location where doxygen was started. If +# left blank the current directory will be used. + +OUTPUT_DIRECTORY = + +# If the CREATE_SUBDIRS tag is set to YES then doxygen will create 4096 sub- +# directories (in 2 levels) under the output directory of each output format and +# will distribute the generated files over these directories. Enabling this +# option can be useful when feeding doxygen a huge amount of source files, where +# putting all generated files in the same directory would otherwise causes +# performance problems for the file system. +# The default value is: NO. + +CREATE_SUBDIRS = NO + +# If the ALLOW_UNICODE_NAMES tag is set to YES, doxygen will allow non-ASCII +# characters to appear in the names of generated files. If set to NO, non-ASCII +# characters will be escaped, for example _xE3_x81_x84 will be used for Unicode +# U+3044. +# The default value is: NO. + +ALLOW_UNICODE_NAMES = NO + +# The OUTPUT_LANGUAGE tag is used to specify the language in which all +# documentation generated by doxygen is written. Doxygen will use this +# information to generate all constant output in the proper language. +# Possible values are: Afrikaans, Arabic, Armenian, Brazilian, Catalan, Chinese, +# Chinese-Traditional, Croatian, Czech, Danish, Dutch, English (United States), +# Esperanto, Farsi (Persian), Finnish, French, German, Greek, Hungarian, +# Indonesian, Italian, Japanese, Japanese-en (Japanese with English messages), +# Korean, Korean-en (Korean with English messages), Latvian, Lithuanian, +# Macedonian, Norwegian, Persian (Farsi), Polish, Portuguese, Romanian, Russian, +# Serbian, Serbian-Cyrillic, Slovak, Slovene, Spanish, Swedish, Turkish, +# Ukrainian and Vietnamese. +# The default value is: English. + +OUTPUT_LANGUAGE = English + +# If the BRIEF_MEMBER_DESC tag is set to YES, doxygen will include brief member +# descriptions after the members that are listed in the file and class +# documentation (similar to Javadoc). Set to NO to disable this. +# The default value is: YES. + +BRIEF_MEMBER_DESC = YES + +# If the REPEAT_BRIEF tag is set to YES, doxygen will prepend the brief +# description of a member or function before the detailed description +# +# Note: If both HIDE_UNDOC_MEMBERS and BRIEF_MEMBER_DESC are set to NO, the +# brief descriptions will be completely suppressed. +# The default value is: YES. + +REPEAT_BRIEF = YES + +# This tag implements a quasi-intelligent brief description abbreviator that is +# used to form the text in various listings. Each string in this list, if found +# as the leading text of the brief description, will be stripped from the text +# and the result, after processing the whole list, is used as the annotated +# text. Otherwise, the brief description is used as-is. If left blank, the +# following values are used ($name is automatically replaced with the name of +# the entity):The $name class, The $name widget, The $name file, is, provides, +# specifies, contains, represents, a, an and the. + +ABBREVIATE_BRIEF = "The $name class" \ + "The $name widget" \ + "The $name file" \ + is \ + provides \ + specifies \ + contains \ + represents \ + a \ + an \ + the + +# If the ALWAYS_DETAILED_SEC and REPEAT_BRIEF tags are both set to YES then +# doxygen will generate a detailed section even if there is only a brief +# description. +# The default value is: NO. + +ALWAYS_DETAILED_SEC = NO + +# If the INLINE_INHERITED_MEMB tag is set to YES, doxygen will show all +# inherited members of a class in the documentation of that class as if those +# members were ordinary class members. Constructors, destructors and assignment +# operators of the base classes will not be shown. +# The default value is: NO. + +INLINE_INHERITED_MEMB = NO + +# If the FULL_PATH_NAMES tag is set to YES, doxygen will prepend the full path +# before files name in the file list and in the header files. If set to NO the +# shortest path that makes the file name unique will be used +# The default value is: YES. + +FULL_PATH_NAMES = YES + +# The STRIP_FROM_PATH tag can be used to strip a user-defined part of the path. +# Stripping is only done if one of the specified strings matches the left-hand +# part of the path. The tag can be used to show relative paths in the file list. +# If left blank the directory from which doxygen is run is used as the path to +# strip. +# +# Note that you can specify absolute paths here, but also relative paths, which +# will be relative from the directory where doxygen is started. +# This tag requires that the tag FULL_PATH_NAMES is set to YES. + +STRIP_FROM_PATH = + +# The STRIP_FROM_INC_PATH tag can be used to strip a user-defined part of the +# path mentioned in the documentation of a class, which tells the reader which +# header file to include in order to use a class. If left blank only the name of +# the header file containing the class definition is used. Otherwise one should +# specify the list of include paths that are normally passed to the compiler +# using the -I flag. + +STRIP_FROM_INC_PATH = + +# If the SHORT_NAMES tag is set to YES, doxygen will generate much shorter (but +# less readable) file names. This can be useful is your file systems doesn't +# support long names like on DOS, Mac, or CD-ROM. +# The default value is: NO. + +SHORT_NAMES = NO + +# If the JAVADOC_AUTOBRIEF tag is set to YES then doxygen will interpret the +# first line (until the first dot) of a Javadoc-style comment as the brief +# description. If set to NO, the Javadoc-style will behave just like regular Qt- +# style comments (thus requiring an explicit @brief command for a brief +# description.) +# The default value is: NO. + +JAVADOC_AUTOBRIEF = NO + +# If the QT_AUTOBRIEF tag is set to YES then doxygen will interpret the first +# line (until the first dot) of a Qt-style comment as the brief description. If +# set to NO, the Qt-style will behave just like regular Qt-style comments (thus +# requiring an explicit \brief command for a brief description.) +# The default value is: NO. + +QT_AUTOBRIEF = NO + +# The MULTILINE_CPP_IS_BRIEF tag can be set to YES to make doxygen treat a +# multi-line C++ special comment block (i.e. a block of //! or /// comments) as +# a brief description. This used to be the default behavior. The new default is +# to treat a multi-line C++ comment block as a detailed description. Set this +# tag to YES if you prefer the old behavior instead. +# +# Note that setting this tag to YES also means that rational rose comments are +# not recognized any more. +# The default value is: NO. + +MULTILINE_CPP_IS_BRIEF = NO + +# If the INHERIT_DOCS tag is set to YES then an undocumented member inherits the +# documentation from any documented member that it re-implements. +# The default value is: YES. + +INHERIT_DOCS = YES + +# If the SEPARATE_MEMBER_PAGES tag is set to YES then doxygen will produce a new +# page for each member. If set to NO, the documentation of a member will be part +# of the file/class/namespace that contains it. +# The default value is: NO. + +SEPARATE_MEMBER_PAGES = NO + +# The TAB_SIZE tag can be used to set the number of spaces in a tab. Doxygen +# uses this value to replace tabs by spaces in code fragments. +# Minimum value: 1, maximum value: 16, default value: 4. + +TAB_SIZE = 4 + +# This tag can be used to specify a number of aliases that act as commands in +# the documentation. An alias has the form: +# name=value +# For example adding +# "sideeffect=@par Side Effects:\n" +# will allow you to put the command \sideeffect (or @sideeffect) in the +# documentation, which will result in a user-defined paragraph with heading +# "Side Effects:". You can put \n's in the value part of an alias to insert +# newlines. + +ALIASES = + +# This tag can be used to specify a number of word-keyword mappings (TCL only). +# A mapping has the form "name=value". For example adding "class=itcl::class" +# will allow you to use the command class in the itcl::class meaning. + +TCL_SUBST = + +# Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C sources +# only. Doxygen will then generate output that is more tailored for C. For +# instance, some of the names that are used will be different. The list of all +# members will be omitted, etc. +# The default value is: NO. + +OPTIMIZE_OUTPUT_FOR_C = YES + +# Set the OPTIMIZE_OUTPUT_JAVA tag to YES if your project consists of Java or +# Python sources only. Doxygen will then generate output that is more tailored +# for that language. For instance, namespaces will be presented as packages, +# qualified scopes will look different, etc. +# The default value is: NO. + +OPTIMIZE_OUTPUT_JAVA = NO + +# Set the OPTIMIZE_FOR_FORTRAN tag to YES if your project consists of Fortran +# sources. Doxygen will then generate output that is tailored for Fortran. +# The default value is: NO. + +OPTIMIZE_FOR_FORTRAN = NO + +# Set the OPTIMIZE_OUTPUT_VHDL tag to YES if your project consists of VHDL +# sources. Doxygen will then generate output that is tailored for VHDL. +# The default value is: NO. + +OPTIMIZE_OUTPUT_VHDL = NO + +# Doxygen selects the parser to use depending on the extension of the files it +# parses. With this tag you can assign which parser to use for a given +# extension. Doxygen has a built-in mapping, but you can override or extend it +# using this tag. The format is ext=language, where ext is a file extension, and +# language is one of the parsers supported by doxygen: IDL, Java, Javascript, +# C#, C, C++, D, PHP, Objective-C, Python, Fortran (fixed format Fortran: +# FortranFixed, free formatted Fortran: FortranFree, unknown formatted Fortran: +# Fortran. In the later case the parser tries to guess whether the code is fixed +# or free formatted code, this is the default for Fortran type files), VHDL. For +# instance to make doxygen treat .inc files as Fortran files (default is PHP), +# and .f files as C (default is Fortran), use: inc=Fortran f=C. +# +# Note: For files without extension you can use no_extension as a placeholder. +# +# Note that for custom extensions you also need to set FILE_PATTERNS otherwise +# the files are not read by doxygen. + +EXTENSION_MAPPING = + +# If the MARKDOWN_SUPPORT tag is enabled then doxygen pre-processes all comments +# according to the Markdown format, which allows for more readable +# documentation. See http://daringfireball.net/projects/markdown/ for details. +# The output of markdown processing is further processed by doxygen, so you can +# mix doxygen, HTML, and XML commands with Markdown formatting. Disable only in +# case of backward compatibilities issues. +# The default value is: YES. + +MARKDOWN_SUPPORT = YES + +# When the TOC_INCLUDE_HEADINGS tag is set to a non-zero value, all headings up +# to that level are automatically included in the table of contents, even if +# they do not have an id attribute. +# Note: This feature currently applies only to Markdown headings. +# Minimum value: 0, maximum value: 99, default value: 0. +# This tag requires that the tag MARKDOWN_SUPPORT is set to YES. + +TOC_INCLUDE_HEADINGS = 0 + +# When enabled doxygen tries to link words that correspond to documented +# classes, or namespaces to their corresponding documentation. Such a link can +# be prevented in individual cases by putting a % sign in front of the word or +# globally by setting AUTOLINK_SUPPORT to NO. +# The default value is: YES. + +AUTOLINK_SUPPORT = YES + +# If you use STL classes (i.e. std::string, std::vector, etc.) but do not want +# to include (a tag file for) the STL sources as input, then you should set this +# tag to YES in order to let doxygen match functions declarations and +# definitions whose arguments contain STL classes (e.g. func(std::string); +# versus func(std::string) {}). This also make the inheritance and collaboration +# diagrams that involve STL classes more complete and accurate. +# The default value is: NO. + +BUILTIN_STL_SUPPORT = NO + +# If you use Microsoft's C++/CLI language, you should set this option to YES to +# enable parsing support. +# The default value is: NO. + +CPP_CLI_SUPPORT = NO + +# Set the SIP_SUPPORT tag to YES if your project consists of sip (see: +# http://www.riverbankcomputing.co.uk/software/sip/intro) sources only. Doxygen +# will parse them like normal C++ but will assume all classes use public instead +# of private inheritance when no explicit protection keyword is present. +# The default value is: NO. + +SIP_SUPPORT = NO + +# For Microsoft's IDL there are propget and propput attributes to indicate +# getter and setter methods for a property. Setting this option to YES will make +# doxygen to replace the get and set methods by a property in the documentation. +# This will only work if the methods are indeed getting or setting a simple +# type. If this is not the case, or you want to show the methods anyway, you +# should set this option to NO. +# The default value is: YES. + +IDL_PROPERTY_SUPPORT = YES + +# If member grouping is used in the documentation and the DISTRIBUTE_GROUP_DOC +# tag is set to YES then doxygen will reuse the documentation of the first +# member in the group (if any) for the other members of the group. By default +# all members of a group must be documented explicitly. +# The default value is: NO. + +DISTRIBUTE_GROUP_DOC = NO + +# If one adds a struct or class to a group and this option is enabled, then also +# any nested class or struct is added to the same group. By default this option +# is disabled and one has to add nested compounds explicitly via \ingroup. +# The default value is: NO. + +GROUP_NESTED_COMPOUNDS = NO + +# Set the SUBGROUPING tag to YES to allow class member groups of the same type +# (for instance a group of public functions) to be put as a subgroup of that +# type (e.g. under the Public Functions section). Set it to NO to prevent +# subgrouping. Alternatively, this can be done per class using the +# \nosubgrouping command. +# The default value is: YES. + +SUBGROUPING = YES + +# When the INLINE_GROUPED_CLASSES tag is set to YES, classes, structs and unions +# are shown inside the group in which they are included (e.g. using \ingroup) +# instead of on a separate page (for HTML and Man pages) or section (for LaTeX +# and RTF). +# +# Note that this feature does not work in combination with +# SEPARATE_MEMBER_PAGES. +# The default value is: NO. + +INLINE_GROUPED_CLASSES = NO + +# When the INLINE_SIMPLE_STRUCTS tag is set to YES, structs, classes, and unions +# with only public data fields or simple typedef fields will be shown inline in +# the documentation of the scope in which they are defined (i.e. file, +# namespace, or group documentation), provided this scope is documented. If set +# to NO, structs, classes, and unions are shown on a separate page (for HTML and +# Man pages) or section (for LaTeX and RTF). +# The default value is: NO. + +INLINE_SIMPLE_STRUCTS = NO + +# When TYPEDEF_HIDES_STRUCT tag is enabled, a typedef of a struct, union, or +# enum is documented as struct, union, or enum with the name of the typedef. So +# typedef struct TypeS {} TypeT, will appear in the documentation as a struct +# with name TypeT. When disabled the typedef will appear as a member of a file, +# namespace, or class. And the struct will be named TypeS. This can typically be +# useful for C code in case the coding convention dictates that all compound +# types are typedef'ed and only the typedef is referenced, never the tag name. +# The default value is: NO. + +TYPEDEF_HIDES_STRUCT = NO + +# The size of the symbol lookup cache can be set using LOOKUP_CACHE_SIZE. This +# cache is used to resolve symbols given their name and scope. Since this can be +# an expensive process and often the same symbol appears multiple times in the +# code, doxygen keeps a cache of pre-resolved symbols. If the cache is too small +# doxygen will become slower. If the cache is too large, memory is wasted. The +# cache size is given by this formula: 2^(16+LOOKUP_CACHE_SIZE). The valid range +# is 0..9, the default is 0, corresponding to a cache size of 2^16=65536 +# symbols. At the end of a run doxygen will report the cache usage and suggest +# the optimal cache size from a speed point of view. +# Minimum value: 0, maximum value: 9, default value: 0. + +LOOKUP_CACHE_SIZE = 0 + +#--------------------------------------------------------------------------- +# Build related configuration options +#--------------------------------------------------------------------------- + +# If the EXTRACT_ALL tag is set to YES, doxygen will assume all entities in +# documentation are documented, even if no documentation was available. Private +# class members and static file members will be hidden unless the +# EXTRACT_PRIVATE respectively EXTRACT_STATIC tags are set to YES. +# Note: This will also disable the warnings about undocumented members that are +# normally produced when WARNINGS is set to YES. +# The default value is: NO. + +EXTRACT_ALL = NO + +# If the EXTRACT_PRIVATE tag is set to YES, all private members of a class will +# be included in the documentation. +# The default value is: NO. + +EXTRACT_PRIVATE = NO + +# If the EXTRACT_PACKAGE tag is set to YES, all members with package or internal +# scope will be included in the documentation. +# The default value is: NO. + +EXTRACT_PACKAGE = NO + +# If the EXTRACT_STATIC tag is set to YES, all static members of a file will be +# included in the documentation. +# The default value is: NO. + +EXTRACT_STATIC = NO + +# If the EXTRACT_LOCAL_CLASSES tag is set to YES, classes (and structs) defined +# locally in source files will be included in the documentation. If set to NO, +# only classes defined in header files are included. Does not have any effect +# for Java sources. +# The default value is: YES. + +EXTRACT_LOCAL_CLASSES = YES + +# This flag is only useful for Objective-C code. If set to YES, local methods, +# which are defined in the implementation section but not in the interface are +# included in the documentation. If set to NO, only methods in the interface are +# included. +# The default value is: NO. + +EXTRACT_LOCAL_METHODS = NO + +# If this flag is set to YES, the members of anonymous namespaces will be +# extracted and appear in the documentation as a namespace called +# 'anonymous_namespace{file}', where file will be replaced with the base name of +# the file that contains the anonymous namespace. By default anonymous namespace +# are hidden. +# The default value is: NO. + +EXTRACT_ANON_NSPACES = NO + +# If the HIDE_UNDOC_MEMBERS tag is set to YES, doxygen will hide all +# undocumented members inside documented classes or files. If set to NO these +# members will be included in the various overviews, but no documentation +# section is generated. This option has no effect if EXTRACT_ALL is enabled. +# The default value is: NO. + +HIDE_UNDOC_MEMBERS = NO + +# If the HIDE_UNDOC_CLASSES tag is set to YES, doxygen will hide all +# undocumented classes that are normally visible in the class hierarchy. If set +# to NO, these classes will be included in the various overviews. This option +# has no effect if EXTRACT_ALL is enabled. +# The default value is: NO. + +HIDE_UNDOC_CLASSES = NO + +# If the HIDE_FRIEND_COMPOUNDS tag is set to YES, doxygen will hide all friend +# (class|struct|union) declarations. If set to NO, these declarations will be +# included in the documentation. +# The default value is: NO. + +HIDE_FRIEND_COMPOUNDS = NO + +# If the HIDE_IN_BODY_DOCS tag is set to YES, doxygen will hide any +# documentation blocks found inside the body of a function. If set to NO, these +# blocks will be appended to the function's detailed documentation block. +# The default value is: NO. + +HIDE_IN_BODY_DOCS = NO + +# The INTERNAL_DOCS tag determines if documentation that is typed after a +# \internal command is included. If the tag is set to NO then the documentation +# will be excluded. Set it to YES to include the internal documentation. +# The default value is: NO. + +INTERNAL_DOCS = NO + +# If the CASE_SENSE_NAMES tag is set to NO then doxygen will only generate file +# names in lower-case letters. If set to YES, upper-case letters are also +# allowed. This is useful if you have classes or files whose names only differ +# in case and if your file system supports case sensitive file names. Windows +# and Mac users are advised to set this option to NO. +# The default value is: system dependent. + +CASE_SENSE_NAMES = YES + +# If the HIDE_SCOPE_NAMES tag is set to NO then doxygen will show members with +# their full class and namespace scopes in the documentation. If set to YES, the +# scope will be hidden. +# The default value is: NO. + +HIDE_SCOPE_NAMES = NO + +# If the HIDE_COMPOUND_REFERENCE tag is set to NO (default) then doxygen will +# append additional text to a page's title, such as Class Reference. If set to +# YES the compound reference will be hidden. +# The default value is: NO. + +HIDE_COMPOUND_REFERENCE= NO + +# If the SHOW_INCLUDE_FILES tag is set to YES then doxygen will put a list of +# the files that are included by a file in the documentation of that file. +# The default value is: YES. + +SHOW_INCLUDE_FILES = YES + +# If the SHOW_GROUPED_MEMB_INC tag is set to YES then Doxygen will add for each +# grouped member an include statement to the documentation, telling the reader +# which file to include in order to use the member. +# The default value is: NO. + +SHOW_GROUPED_MEMB_INC = NO + +# If the FORCE_LOCAL_INCLUDES tag is set to YES then doxygen will list include +# files with double quotes in the documentation rather than with sharp brackets. +# The default value is: NO. + +FORCE_LOCAL_INCLUDES = NO + +# If the INLINE_INFO tag is set to YES then a tag [inline] is inserted in the +# documentation for inline members. +# The default value is: YES. + +INLINE_INFO = YES + +# If the SORT_MEMBER_DOCS tag is set to YES then doxygen will sort the +# (detailed) documentation of file and class members alphabetically by member +# name. If set to NO, the members will appear in declaration order. +# The default value is: YES. + +SORT_MEMBER_DOCS = YES + +# If the SORT_BRIEF_DOCS tag is set to YES then doxygen will sort the brief +# descriptions of file, namespace and class members alphabetically by member +# name. If set to NO, the members will appear in declaration order. Note that +# this will also influence the order of the classes in the class list. +# The default value is: NO. + +SORT_BRIEF_DOCS = NO + +# If the SORT_MEMBERS_CTORS_1ST tag is set to YES then doxygen will sort the +# (brief and detailed) documentation of class members so that constructors and +# destructors are listed first. If set to NO the constructors will appear in the +# respective orders defined by SORT_BRIEF_DOCS and SORT_MEMBER_DOCS. +# Note: If SORT_BRIEF_DOCS is set to NO this option is ignored for sorting brief +# member documentation. +# Note: If SORT_MEMBER_DOCS is set to NO this option is ignored for sorting +# detailed member documentation. +# The default value is: NO. + +SORT_MEMBERS_CTORS_1ST = NO + +# If the SORT_GROUP_NAMES tag is set to YES then doxygen will sort the hierarchy +# of group names into alphabetical order. If set to NO the group names will +# appear in their defined order. +# The default value is: NO. + +SORT_GROUP_NAMES = NO + +# If the SORT_BY_SCOPE_NAME tag is set to YES, the class list will be sorted by +# fully-qualified names, including namespaces. If set to NO, the class list will +# be sorted only by class name, not including the namespace part. +# Note: This option is not very useful if HIDE_SCOPE_NAMES is set to YES. +# Note: This option applies only to the class list, not to the alphabetical +# list. +# The default value is: NO. + +SORT_BY_SCOPE_NAME = NO + +# If the STRICT_PROTO_MATCHING option is enabled and doxygen fails to do proper +# type resolution of all parameters of a function it will reject a match between +# the prototype and the implementation of a member function even if there is +# only one candidate or it is obvious which candidate to choose by doing a +# simple string match. By disabling STRICT_PROTO_MATCHING doxygen will still +# accept a match between prototype and implementation in such cases. +# The default value is: NO. + +STRICT_PROTO_MATCHING = NO + +# The GENERATE_TODOLIST tag can be used to enable (YES) or disable (NO) the todo +# list. This list is created by putting \todo commands in the documentation. +# The default value is: YES. + +GENERATE_TODOLIST = YES + +# The GENERATE_TESTLIST tag can be used to enable (YES) or disable (NO) the test +# list. This list is created by putting \test commands in the documentation. +# The default value is: YES. + +GENERATE_TESTLIST = YES + +# The GENERATE_BUGLIST tag can be used to enable (YES) or disable (NO) the bug +# list. This list is created by putting \bug commands in the documentation. +# The default value is: YES. + +GENERATE_BUGLIST = YES + +# The GENERATE_DEPRECATEDLIST tag can be used to enable (YES) or disable (NO) +# the deprecated list. This list is created by putting \deprecated commands in +# the documentation. +# The default value is: YES. + +GENERATE_DEPRECATEDLIST= YES + +# The ENABLED_SECTIONS tag can be used to enable conditional documentation +# sections, marked by \if <section_label> ... \endif and \cond <section_label> +# ... \endcond blocks. + +ENABLED_SECTIONS = + +# The MAX_INITIALIZER_LINES tag determines the maximum number of lines that the +# initial value of a variable or macro / define can have for it to appear in the +# documentation. If the initializer consists of more lines than specified here +# it will be hidden. Use a value of 0 to hide initializers completely. The +# appearance of the value of individual variables and macros / defines can be +# controlled using \showinitializer or \hideinitializer command in the +# documentation regardless of this setting. +# Minimum value: 0, maximum value: 10000, default value: 30. + +MAX_INITIALIZER_LINES = 30 + +# Set the SHOW_USED_FILES tag to NO to disable the list of files generated at +# the bottom of the documentation of classes and structs. If set to YES, the +# list will mention the files that were used to generate the documentation. +# The default value is: YES. + +SHOW_USED_FILES = YES + +# Set the SHOW_FILES tag to NO to disable the generation of the Files page. This +# will remove the Files entry from the Quick Index and from the Folder Tree View +# (if specified). +# The default value is: YES. + +SHOW_FILES = YES + +# Set the SHOW_NAMESPACES tag to NO to disable the generation of the Namespaces +# page. This will remove the Namespaces entry from the Quick Index and from the +# Folder Tree View (if specified). +# The default value is: YES. + +SHOW_NAMESPACES = YES + +# The FILE_VERSION_FILTER tag can be used to specify a program or script that +# doxygen should invoke to get the current version for each file (typically from +# the version control system). Doxygen will invoke the program by executing (via +# popen()) the command command input-file, where command is the value of the +# FILE_VERSION_FILTER tag, and input-file is the name of an input file provided +# by doxygen. Whatever the program writes to standard output is used as the file +# version. For an example see the documentation. + +FILE_VERSION_FILTER = + +# The LAYOUT_FILE tag can be used to specify a layout file which will be parsed +# by doxygen. The layout file controls the global structure of the generated +# output files in an output format independent way. To create the layout file +# that represents doxygen's defaults, run doxygen with the -l option. You can +# optionally specify a file name after the option, if omitted DoxygenLayout.xml +# will be used as the name of the layout file. +# +# Note that if you run doxygen from a directory containing a file called +# DoxygenLayout.xml, doxygen will parse it automatically even if the LAYOUT_FILE +# tag is left empty. + +LAYOUT_FILE = + +# The CITE_BIB_FILES tag can be used to specify one or more bib files containing +# the reference definitions. This must be a list of .bib files. The .bib +# extension is automatically appended if omitted. This requires the bibtex tool +# to be installed. See also http://en.wikipedia.org/wiki/BibTeX for more info. +# For LaTeX the style of the bibliography can be controlled using +# LATEX_BIB_STYLE. To use this feature you need bibtex and perl available in the +# search path. See also \cite for info how to create references. + +CITE_BIB_FILES = references + +#--------------------------------------------------------------------------- +# Configuration options related to warning and progress messages +#--------------------------------------------------------------------------- + +# The QUIET tag can be used to turn on/off the messages that are generated to +# standard output by doxygen. If QUIET is set to YES this implies that the +# messages are off. +# The default value is: NO. + +QUIET = NO + +# The WARNINGS tag can be used to turn on/off the warning messages that are +# generated to standard error (stderr) by doxygen. If WARNINGS is set to YES +# this implies that the warnings are on. +# +# Tip: Turn warnings on while writing the documentation. +# The default value is: YES. + +WARNINGS = YES + +# If the WARN_IF_UNDOCUMENTED tag is set to YES then doxygen will generate +# warnings for undocumented members. If EXTRACT_ALL is set to YES then this flag +# will automatically be disabled. +# The default value is: YES. + +WARN_IF_UNDOCUMENTED = YES + +# If the WARN_IF_DOC_ERROR tag is set to YES, doxygen will generate warnings for +# potential errors in the documentation, such as not documenting some parameters +# in a documented function, or documenting parameters that don't exist or using +# markup commands wrongly. +# The default value is: YES. + +WARN_IF_DOC_ERROR = YES + +# This WARN_NO_PARAMDOC option can be enabled to get warnings for functions that +# are documented, but have no documentation for their parameters or return +# value. If set to NO, doxygen will only warn about wrong or incomplete +# parameter documentation, but not about the absence of documentation. +# The default value is: NO. + +WARN_NO_PARAMDOC = NO + +# If the WARN_AS_ERROR tag is set to YES then doxygen will immediately stop when +# a warning is encountered. +# The default value is: NO. + +WARN_AS_ERROR = NO + +# The WARN_FORMAT tag determines the format of the warning messages that doxygen +# can produce. The string should contain the $file, $line, and $text tags, which +# will be replaced by the file and line number from which the warning originated +# and the warning text. Optionally the format may contain $version, which will +# be replaced by the version of the file (if it could be obtained via +# FILE_VERSION_FILTER) +# The default value is: $file:$line: $text. + +WARN_FORMAT = "$file:$line: $text" + +# The WARN_LOGFILE tag can be used to specify a file to which warning and error +# messages should be written. If left blank the output is written to standard +# error (stderr). + +WARN_LOGFILE = + +#--------------------------------------------------------------------------- +# Configuration options related to the input files +#--------------------------------------------------------------------------- + +# The INPUT tag is used to specify the files and/or directories that contain +# documented source files. You may enter file names like myfile.cpp or +# directories like /usr/src/myproject. Separate the files or directories with +# spaces. See also FILE_PATTERNS and EXTENSION_MAPPING +# Note: If this tag is empty the current directory is searched. + +INPUT = ../../bfps/cpp + +# This tag can be used to specify the character encoding of the source files +# that doxygen parses. Internally doxygen uses the UTF-8 encoding. Doxygen uses +# libiconv (or the iconv built into libc) for the transcoding. See the libiconv +# documentation (see: http://www.gnu.org/software/libiconv) for the list of +# possible encodings. +# The default value is: UTF-8. + +INPUT_ENCODING = UTF-8 + +# If the value of the INPUT tag contains directories, you can use the +# FILE_PATTERNS tag to specify one or more wildcard patterns (like *.cpp and +# *.h) to filter out the source-files in the directories. +# +# Note that for custom extensions or not directly supported extensions you also +# need to set EXTENSION_MAPPING for the extension otherwise the files are not +# read by doxygen. +# +# If left blank the following patterns are tested:*.c, *.cc, *.cxx, *.cpp, +# *.c++, *.java, *.ii, *.ixx, *.ipp, *.i++, *.inl, *.idl, *.ddl, *.odl, *.h, +# *.hh, *.hxx, *.hpp, *.h++, *.cs, *.d, *.php, *.php4, *.php5, *.phtml, *.inc, +# *.m, *.markdown, *.md, *.mm, *.dox, *.py, *.pyw, *.f90, *.f95, *.f03, *.f08, +# *.f, *.for, *.tcl, *.vhd, *.vhdl, *.ucf and *.qsf. + +FILE_PATTERNS = *.c \ + *.cc \ + *.cxx \ + *.cpp \ + *.c++ \ + *.java \ + *.ii \ + *.ixx \ + *.ipp \ + *.i++ \ + *.inl \ + *.idl \ + *.ddl \ + *.odl \ + *.h \ + *.hh \ + *.hxx \ + *.hpp \ + *.h++ \ + *.cs \ + *.d \ + *.php \ + *.php4 \ + *.php5 \ + *.phtml \ + *.inc \ + *.m \ + *.markdown \ + *.md \ + *.mm \ + *.dox \ + *.py \ + *.pyw \ + *.f90 \ + *.f95 \ + *.f03 \ + *.f08 \ + *.f \ + *.for \ + *.tcl \ + *.vhd \ + *.vhdl \ + *.ucf \ + *.qsf + +# The RECURSIVE tag can be used to specify whether or not subdirectories should +# be searched for input files as well. +# The default value is: NO. + +RECURSIVE = YES + +# The EXCLUDE tag can be used to specify files and/or directories that should be +# excluded from the INPUT source files. This way you can easily exclude a +# subdirectory from a directory tree whose root is specified with the INPUT tag. +# +# Note that relative paths are relative to the directory from which doxygen is +# run. + +EXCLUDE = + +# The EXCLUDE_SYMLINKS tag can be used to select whether or not files or +# directories that are symbolic links (a Unix file system feature) are excluded +# from the input. +# The default value is: NO. + +EXCLUDE_SYMLINKS = NO + +# If the value of the INPUT tag contains directories, you can use the +# EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude +# certain files from those directories. +# +# Note that the wildcards are matched against the file with absolute path, so to +# exclude all test directories for example use the pattern */test/* + +EXCLUDE_PATTERNS = + +# The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names +# (namespaces, classes, functions, etc.) that should be excluded from the +# output. The symbol name can be a fully qualified name, a word, or if the +# wildcard * is used, a substring. Examples: ANamespace, AClass, +# AClass::ANamespace, ANamespace::*Test +# +# Note that the wildcards are matched against the file with absolute path, so to +# exclude all test directories use the pattern */test/* + +EXCLUDE_SYMBOLS = + +# The EXAMPLE_PATH tag can be used to specify one or more files or directories +# that contain example code fragments that are included (see the \include +# command). + +EXAMPLE_PATH = + +# If the value of the EXAMPLE_PATH tag contains directories, you can use the +# EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp and +# *.h) to filter out the source-files in the directories. If left blank all +# files are included. + +EXAMPLE_PATTERNS = * + +# If the EXAMPLE_RECURSIVE tag is set to YES then subdirectories will be +# searched for input files to be used with the \include or \dontinclude commands +# irrespective of the value of the RECURSIVE tag. +# The default value is: NO. + +EXAMPLE_RECURSIVE = NO + +# The IMAGE_PATH tag can be used to specify one or more files or directories +# that contain images that are to be included in the documentation (see the +# \image command). + +IMAGE_PATH = + +# The INPUT_FILTER tag can be used to specify a program that doxygen should +# invoke to filter for each input file. Doxygen will invoke the filter program +# by executing (via popen()) the command: +# +# <filter> <input-file> +# +# where <filter> is the value of the INPUT_FILTER tag, and <input-file> is the +# name of an input file. Doxygen will then use the output that the filter +# program writes to standard output. If FILTER_PATTERNS is specified, this tag +# will be ignored. +# +# Note that the filter must not add or remove lines; it is applied before the +# code is scanned, but not when the output code is generated. If lines are added +# or removed, the anchors will not be placed correctly. +# +# Note that for custom extensions or not directly supported extensions you also +# need to set EXTENSION_MAPPING for the extension otherwise the files are not +# properly processed by doxygen. + +INPUT_FILTER = + +# The FILTER_PATTERNS tag can be used to specify filters on a per file pattern +# basis. Doxygen will compare the file name with each pattern and apply the +# filter if there is a match. The filters are a list of the form: pattern=filter +# (like *.cpp=my_cpp_filter). See INPUT_FILTER for further information on how +# filters are used. If the FILTER_PATTERNS tag is empty or if none of the +# patterns match the file name, INPUT_FILTER is applied. +# +# Note that for custom extensions or not directly supported extensions you also +# need to set EXTENSION_MAPPING for the extension otherwise the files are not +# properly processed by doxygen. + +FILTER_PATTERNS = + +# If the FILTER_SOURCE_FILES tag is set to YES, the input filter (if set using +# INPUT_FILTER) will also be used to filter the input files that are used for +# producing the source files to browse (i.e. when SOURCE_BROWSER is set to YES). +# The default value is: NO. + +FILTER_SOURCE_FILES = NO + +# The FILTER_SOURCE_PATTERNS tag can be used to specify source filters per file +# pattern. A pattern will override the setting for FILTER_PATTERN (if any) and +# it is also possible to disable source filtering for a specific pattern using +# *.ext= (so without naming a filter). +# This tag requires that the tag FILTER_SOURCE_FILES is set to YES. + +FILTER_SOURCE_PATTERNS = + +# If the USE_MDFILE_AS_MAINPAGE tag refers to the name of a markdown file that +# is part of the input, its contents will be placed on the main page +# (index.html). This can be useful if you have a project on for instance GitHub +# and want to reuse the introduction page also for the doxygen output. + +USE_MDFILE_AS_MAINPAGE = + +#--------------------------------------------------------------------------- +# Configuration options related to source browsing +#--------------------------------------------------------------------------- + +# If the SOURCE_BROWSER tag is set to YES then a list of source files will be +# generated. Documented entities will be cross-referenced with these sources. +# +# Note: To get rid of all source code in the generated output, make sure that +# also VERBATIM_HEADERS is set to NO. +# The default value is: NO. + +SOURCE_BROWSER = NO + +# Setting the INLINE_SOURCES tag to YES will include the body of functions, +# classes and enums directly into the documentation. +# The default value is: NO. + +INLINE_SOURCES = NO + +# Setting the STRIP_CODE_COMMENTS tag to YES will instruct doxygen to hide any +# special comment blocks from generated source code fragments. Normal C, C++ and +# Fortran comments will always remain visible. +# The default value is: YES. + +STRIP_CODE_COMMENTS = YES + +# If the REFERENCED_BY_RELATION tag is set to YES then for each documented +# function all documented functions referencing it will be listed. +# The default value is: NO. + +REFERENCED_BY_RELATION = NO + +# If the REFERENCES_RELATION tag is set to YES then for each documented function +# all documented entities called/used by that function will be listed. +# The default value is: NO. + +REFERENCES_RELATION = NO + +# If the REFERENCES_LINK_SOURCE tag is set to YES and SOURCE_BROWSER tag is set +# to YES then the hyperlinks from functions in REFERENCES_RELATION and +# REFERENCED_BY_RELATION lists will link to the source code. Otherwise they will +# link to the documentation. +# The default value is: YES. + +REFERENCES_LINK_SOURCE = YES + +# If SOURCE_TOOLTIPS is enabled (the default) then hovering a hyperlink in the +# source code will show a tooltip with additional information such as prototype, +# brief description and links to the definition and documentation. Since this +# will make the HTML file larger and loading of large files a bit slower, you +# can opt to disable this feature. +# The default value is: YES. +# This tag requires that the tag SOURCE_BROWSER is set to YES. + +SOURCE_TOOLTIPS = YES + +# If the USE_HTAGS tag is set to YES then the references to source code will +# point to the HTML generated by the htags(1) tool instead of doxygen built-in +# source browser. The htags tool is part of GNU's global source tagging system +# (see http://www.gnu.org/software/global/global.html). You will need version +# 4.8.6 or higher. +# +# To use it do the following: +# - Install the latest version of global +# - Enable SOURCE_BROWSER and USE_HTAGS in the config file +# - Make sure the INPUT points to the root of the source tree +# - Run doxygen as normal +# +# Doxygen will invoke htags (and that will in turn invoke gtags), so these +# tools must be available from the command line (i.e. in the search path). +# +# The result: instead of the source browser generated by doxygen, the links to +# source code will now point to the output of htags. +# The default value is: NO. +# This tag requires that the tag SOURCE_BROWSER is set to YES. + +USE_HTAGS = NO + +# If the VERBATIM_HEADERS tag is set the YES then doxygen will generate a +# verbatim copy of the header file for each class for which an include is +# specified. Set to NO to disable this. +# See also: Section \class. +# The default value is: YES. + +VERBATIM_HEADERS = YES + +# If the CLANG_ASSISTED_PARSING tag is set to YES then doxygen will use the +# clang parser (see: http://clang.llvm.org/) for more accurate parsing at the +# cost of reduced performance. This can be particularly helpful with template +# rich C++ code for which doxygen's built-in parser lacks the necessary type +# information. +# Note: The availability of this option depends on whether or not doxygen was +# generated with the -Duse-libclang=ON option for CMake. +# The default value is: NO. + +CLANG_ASSISTED_PARSING = NO + +# If clang assisted parsing is enabled you can provide the compiler with command +# line options that you would normally use when invoking the compiler. Note that +# the include paths will already be set by doxygen for the files and directories +# specified with INPUT and INCLUDE_PATH. +# This tag requires that the tag CLANG_ASSISTED_PARSING is set to YES. + +CLANG_OPTIONS = + +#--------------------------------------------------------------------------- +# Configuration options related to the alphabetical class index +#--------------------------------------------------------------------------- + +# If the ALPHABETICAL_INDEX tag is set to YES, an alphabetical index of all +# compounds will be generated. Enable this if the project contains a lot of +# classes, structs, unions or interfaces. +# The default value is: YES. + +ALPHABETICAL_INDEX = YES + +# The COLS_IN_ALPHA_INDEX tag can be used to specify the number of columns in +# which the alphabetical index list will be split. +# Minimum value: 1, maximum value: 20, default value: 5. +# This tag requires that the tag ALPHABETICAL_INDEX is set to YES. + +COLS_IN_ALPHA_INDEX = 5 + +# In case all classes in a project start with a common prefix, all classes will +# be put under the same header in the alphabetical index. The IGNORE_PREFIX tag +# can be used to specify a prefix (or a list of prefixes) that should be ignored +# while generating the index headers. +# This tag requires that the tag ALPHABETICAL_INDEX is set to YES. + +IGNORE_PREFIX = + +#--------------------------------------------------------------------------- +# Configuration options related to the HTML output +#--------------------------------------------------------------------------- + +# If the GENERATE_HTML tag is set to YES, doxygen will generate HTML output +# The default value is: YES. + +GENERATE_HTML = YES + +# The HTML_OUTPUT tag is used to specify where the HTML docs will be put. If a +# relative path is entered the value of OUTPUT_DIRECTORY will be put in front of +# it. +# The default directory is: html. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_OUTPUT = html + +# The HTML_FILE_EXTENSION tag can be used to specify the file extension for each +# generated HTML page (for example: .htm, .php, .asp). +# The default value is: .html. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_FILE_EXTENSION = .html + +# The HTML_HEADER tag can be used to specify a user-defined HTML header file for +# each generated HTML page. If the tag is left blank doxygen will generate a +# standard header. +# +# To get valid HTML the header file that includes any scripts and style sheets +# that doxygen needs, which is dependent on the configuration options used (e.g. +# the setting GENERATE_TREEVIEW). It is highly recommended to start with a +# default header using +# doxygen -w html new_header.html new_footer.html new_stylesheet.css +# YourConfigFile +# and then modify the file new_header.html. See also section "Doxygen usage" +# for information on how to generate the default header that doxygen normally +# uses. +# Note: The header is subject to change so you typically have to regenerate the +# default header when upgrading to a newer version of doxygen. For a description +# of the possible markers and block names see the documentation. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_HEADER = + +# The HTML_FOOTER tag can be used to specify a user-defined HTML footer for each +# generated HTML page. If the tag is left blank doxygen will generate a standard +# footer. See HTML_HEADER for more information on how to generate a default +# footer and what special commands can be used inside the footer. See also +# section "Doxygen usage" for information on how to generate the default footer +# that doxygen normally uses. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_FOOTER = + +# The HTML_STYLESHEET tag can be used to specify a user-defined cascading style +# sheet that is used by each HTML page. It can be used to fine-tune the look of +# the HTML output. If left blank doxygen will generate a default style sheet. +# See also section "Doxygen usage" for information on how to generate the style +# sheet that doxygen normally uses. +# Note: It is recommended to use HTML_EXTRA_STYLESHEET instead of this tag, as +# it is more robust and this tag (HTML_STYLESHEET) will in the future become +# obsolete. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_STYLESHEET = + +# The HTML_EXTRA_STYLESHEET tag can be used to specify additional user-defined +# cascading style sheets that are included after the standard style sheets +# created by doxygen. Using this option one can overrule certain style aspects. +# This is preferred over using HTML_STYLESHEET since it does not replace the +# standard style sheet and is therefore more robust against future updates. +# Doxygen will copy the style sheet files to the output directory. +# Note: The order of the extra style sheet files is of importance (e.g. the last +# style sheet in the list overrules the setting of the previous ones in the +# list). For an example see the documentation. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_EXTRA_STYLESHEET = + +# The HTML_EXTRA_FILES tag can be used to specify one or more extra images or +# other source files which should be copied to the HTML output directory. Note +# that these files will be copied to the base HTML output directory. Use the +# $relpath^ marker in the HTML_HEADER and/or HTML_FOOTER files to load these +# files. In the HTML_STYLESHEET file, use the file name only. Also note that the +# files will be copied as-is; there are no commands or markers available. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_EXTRA_FILES = + +# The HTML_COLORSTYLE_HUE tag controls the color of the HTML output. Doxygen +# will adjust the colors in the style sheet and background images according to +# this color. Hue is specified as an angle on a colorwheel, see +# http://en.wikipedia.org/wiki/Hue for more information. For instance the value +# 0 represents red, 60 is yellow, 120 is green, 180 is cyan, 240 is blue, 300 +# purple, and 360 is red again. +# Minimum value: 0, maximum value: 359, default value: 220. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_COLORSTYLE_HUE = 220 + +# The HTML_COLORSTYLE_SAT tag controls the purity (or saturation) of the colors +# in the HTML output. For a value of 0 the output will use grayscales only. A +# value of 255 will produce the most vivid colors. +# Minimum value: 0, maximum value: 255, default value: 100. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_COLORSTYLE_SAT = 100 + +# The HTML_COLORSTYLE_GAMMA tag controls the gamma correction applied to the +# luminance component of the colors in the HTML output. Values below 100 +# gradually make the output lighter, whereas values above 100 make the output +# darker. The value divided by 100 is the actual gamma applied, so 80 represents +# a gamma of 0.8, The value 220 represents a gamma of 2.2, and 100 does not +# change the gamma. +# Minimum value: 40, maximum value: 240, default value: 80. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_COLORSTYLE_GAMMA = 80 + +# If the HTML_TIMESTAMP tag is set to YES then the footer of each generated HTML +# page will contain the date and time when the page was generated. Setting this +# to YES can help to show when doxygen was last run and thus if the +# documentation is up to date. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_TIMESTAMP = NO + +# If the HTML_DYNAMIC_SECTIONS tag is set to YES then the generated HTML +# documentation will contain sections that can be hidden and shown after the +# page has loaded. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_DYNAMIC_SECTIONS = NO + +# With HTML_INDEX_NUM_ENTRIES one can control the preferred number of entries +# shown in the various tree structured indices initially; the user can expand +# and collapse entries dynamically later on. Doxygen will expand the tree to +# such a level that at most the specified number of entries are visible (unless +# a fully collapsed tree already exceeds this amount). So setting the number of +# entries 1 will produce a full collapsed tree by default. 0 is a special value +# representing an infinite number of entries and will result in a full expanded +# tree by default. +# Minimum value: 0, maximum value: 9999, default value: 100. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_INDEX_NUM_ENTRIES = 100 + +# If the GENERATE_DOCSET tag is set to YES, additional index files will be +# generated that can be used as input for Apple's Xcode 3 integrated development +# environment (see: http://developer.apple.com/tools/xcode/), introduced with +# OSX 10.5 (Leopard). To create a documentation set, doxygen will generate a +# Makefile in the HTML output directory. Running make will produce the docset in +# that directory and running make install will install the docset in +# ~/Library/Developer/Shared/Documentation/DocSets so that Xcode will find it at +# startup. See http://developer.apple.com/tools/creatingdocsetswithdoxygen.html +# for more information. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTML is set to YES. + +GENERATE_DOCSET = NO + +# This tag determines the name of the docset feed. A documentation feed provides +# an umbrella under which multiple documentation sets from a single provider +# (such as a company or product suite) can be grouped. +# The default value is: Doxygen generated docs. +# This tag requires that the tag GENERATE_DOCSET is set to YES. + +DOCSET_FEEDNAME = "Doxygen generated docs" + +# This tag specifies a string that should uniquely identify the documentation +# set bundle. This should be a reverse domain-name style string, e.g. +# com.mycompany.MyDocSet. Doxygen will append .docset to the name. +# The default value is: org.doxygen.Project. +# This tag requires that the tag GENERATE_DOCSET is set to YES. + +DOCSET_BUNDLE_ID = org.doxygen.Project + +# The DOCSET_PUBLISHER_ID tag specifies a string that should uniquely identify +# the documentation publisher. This should be a reverse domain-name style +# string, e.g. com.mycompany.MyDocSet.documentation. +# The default value is: org.doxygen.Publisher. +# This tag requires that the tag GENERATE_DOCSET is set to YES. + +DOCSET_PUBLISHER_ID = org.doxygen.Publisher + +# The DOCSET_PUBLISHER_NAME tag identifies the documentation publisher. +# The default value is: Publisher. +# This tag requires that the tag GENERATE_DOCSET is set to YES. + +DOCSET_PUBLISHER_NAME = Publisher + +# If the GENERATE_HTMLHELP tag is set to YES then doxygen generates three +# additional HTML index files: index.hhp, index.hhc, and index.hhk. The +# index.hhp is a project file that can be read by Microsoft's HTML Help Workshop +# (see: http://www.microsoft.com/en-us/download/details.aspx?id=21138) on +# Windows. +# +# The HTML Help Workshop contains a compiler that can convert all HTML output +# generated by doxygen into a single compiled HTML file (.chm). Compiled HTML +# files are now used as the Windows 98 help format, and will replace the old +# Windows help format (.hlp) on all Windows platforms in the future. Compressed +# HTML files also contain an index, a table of contents, and you can search for +# words in the documentation. The HTML workshop also contains a viewer for +# compressed HTML files. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTML is set to YES. + +GENERATE_HTMLHELP = NO + +# The CHM_FILE tag can be used to specify the file name of the resulting .chm +# file. You can add a path in front of the file if the result should not be +# written to the html output directory. +# This tag requires that the tag GENERATE_HTMLHELP is set to YES. + +CHM_FILE = + +# The HHC_LOCATION tag can be used to specify the location (absolute path +# including file name) of the HTML help compiler (hhc.exe). If non-empty, +# doxygen will try to run the HTML help compiler on the generated index.hhp. +# The file has to be specified with full path. +# This tag requires that the tag GENERATE_HTMLHELP is set to YES. + +HHC_LOCATION = + +# The GENERATE_CHI flag controls if a separate .chi index file is generated +# (YES) or that it should be included in the master .chm file (NO). +# The default value is: NO. +# This tag requires that the tag GENERATE_HTMLHELP is set to YES. + +GENERATE_CHI = NO + +# The CHM_INDEX_ENCODING is used to encode HtmlHelp index (hhk), content (hhc) +# and project file content. +# This tag requires that the tag GENERATE_HTMLHELP is set to YES. + +CHM_INDEX_ENCODING = + +# The BINARY_TOC flag controls whether a binary table of contents is generated +# (YES) or a normal table of contents (NO) in the .chm file. Furthermore it +# enables the Previous and Next buttons. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTMLHELP is set to YES. + +BINARY_TOC = NO + +# The TOC_EXPAND flag can be set to YES to add extra items for group members to +# the table of contents of the HTML help documentation and to the tree view. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTMLHELP is set to YES. + +TOC_EXPAND = NO + +# If the GENERATE_QHP tag is set to YES and both QHP_NAMESPACE and +# QHP_VIRTUAL_FOLDER are set, an additional index file will be generated that +# can be used as input for Qt's qhelpgenerator to generate a Qt Compressed Help +# (.qch) of the generated HTML documentation. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTML is set to YES. + +GENERATE_QHP = NO + +# If the QHG_LOCATION tag is specified, the QCH_FILE tag can be used to specify +# the file name of the resulting .qch file. The path specified is relative to +# the HTML output folder. +# This tag requires that the tag GENERATE_QHP is set to YES. + +QCH_FILE = + +# The QHP_NAMESPACE tag specifies the namespace to use when generating Qt Help +# Project output. For more information please see Qt Help Project / Namespace +# (see: http://qt-project.org/doc/qt-4.8/qthelpproject.html#namespace). +# The default value is: org.doxygen.Project. +# This tag requires that the tag GENERATE_QHP is set to YES. + +QHP_NAMESPACE = org.doxygen.Project + +# The QHP_VIRTUAL_FOLDER tag specifies the namespace to use when generating Qt +# Help Project output. For more information please see Qt Help Project / Virtual +# Folders (see: http://qt-project.org/doc/qt-4.8/qthelpproject.html#virtual- +# folders). +# The default value is: doc. +# This tag requires that the tag GENERATE_QHP is set to YES. + +QHP_VIRTUAL_FOLDER = doc + +# If the QHP_CUST_FILTER_NAME tag is set, it specifies the name of a custom +# filter to add. For more information please see Qt Help Project / Custom +# Filters (see: http://qt-project.org/doc/qt-4.8/qthelpproject.html#custom- +# filters). +# This tag requires that the tag GENERATE_QHP is set to YES. + +QHP_CUST_FILTER_NAME = + +# The QHP_CUST_FILTER_ATTRS tag specifies the list of the attributes of the +# custom filter to add. For more information please see Qt Help Project / Custom +# Filters (see: http://qt-project.org/doc/qt-4.8/qthelpproject.html#custom- +# filters). +# This tag requires that the tag GENERATE_QHP is set to YES. + +QHP_CUST_FILTER_ATTRS = + +# The QHP_SECT_FILTER_ATTRS tag specifies the list of the attributes this +# project's filter section matches. Qt Help Project / Filter Attributes (see: +# http://qt-project.org/doc/qt-4.8/qthelpproject.html#filter-attributes). +# This tag requires that the tag GENERATE_QHP is set to YES. + +QHP_SECT_FILTER_ATTRS = + +# The QHG_LOCATION tag can be used to specify the location of Qt's +# qhelpgenerator. If non-empty doxygen will try to run qhelpgenerator on the +# generated .qhp file. +# This tag requires that the tag GENERATE_QHP is set to YES. + +QHG_LOCATION = + +# If the GENERATE_ECLIPSEHELP tag is set to YES, additional index files will be +# generated, together with the HTML files, they form an Eclipse help plugin. To +# install this plugin and make it available under the help contents menu in +# Eclipse, the contents of the directory containing the HTML and XML files needs +# to be copied into the plugins directory of eclipse. The name of the directory +# within the plugins directory should be the same as the ECLIPSE_DOC_ID value. +# After copying Eclipse needs to be restarted before the help appears. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTML is set to YES. + +GENERATE_ECLIPSEHELP = NO + +# A unique identifier for the Eclipse help plugin. When installing the plugin +# the directory name containing the HTML and XML files should also have this +# name. Each documentation set should have its own identifier. +# The default value is: org.doxygen.Project. +# This tag requires that the tag GENERATE_ECLIPSEHELP is set to YES. + +ECLIPSE_DOC_ID = org.doxygen.Project + +# If you want full control over the layout of the generated HTML pages it might +# be necessary to disable the index and replace it with your own. The +# DISABLE_INDEX tag can be used to turn on/off the condensed index (tabs) at top +# of each HTML page. A value of NO enables the index and the value YES disables +# it. Since the tabs in the index contain the same information as the navigation +# tree, you can set this option to YES if you also set GENERATE_TREEVIEW to YES. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTML is set to YES. + +DISABLE_INDEX = NO + +# The GENERATE_TREEVIEW tag is used to specify whether a tree-like index +# structure should be generated to display hierarchical information. If the tag +# value is set to YES, a side panel will be generated containing a tree-like +# index structure (just like the one that is generated for HTML Help). For this +# to work a browser that supports JavaScript, DHTML, CSS and frames is required +# (i.e. any modern browser). Windows users are probably better off using the +# HTML help feature. Via custom style sheets (see HTML_EXTRA_STYLESHEET) one can +# further fine-tune the look of the index. As an example, the default style +# sheet generated by doxygen has an example that shows how to put an image at +# the root of the tree instead of the PROJECT_NAME. Since the tree basically has +# the same information as the tab index, you could consider setting +# DISABLE_INDEX to YES when enabling this option. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTML is set to YES. + +GENERATE_TREEVIEW = NO + +# The ENUM_VALUES_PER_LINE tag can be used to set the number of enum values that +# doxygen will group on one line in the generated HTML documentation. +# +# Note that a value of 0 will completely suppress the enum values from appearing +# in the overview section. +# Minimum value: 0, maximum value: 20, default value: 4. +# This tag requires that the tag GENERATE_HTML is set to YES. + +ENUM_VALUES_PER_LINE = 4 + +# If the treeview is enabled (see GENERATE_TREEVIEW) then this tag can be used +# to set the initial width (in pixels) of the frame in which the tree is shown. +# Minimum value: 0, maximum value: 1500, default value: 250. +# This tag requires that the tag GENERATE_HTML is set to YES. + +TREEVIEW_WIDTH = 250 + +# If the EXT_LINKS_IN_WINDOW option is set to YES, doxygen will open links to +# external symbols imported via tag files in a separate window. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTML is set to YES. + +EXT_LINKS_IN_WINDOW = NO + +# Use this tag to change the font size of LaTeX formulas included as images in +# the HTML documentation. When you change the font size after a successful +# doxygen run you need to manually remove any form_*.png images from the HTML +# output directory to force them to be regenerated. +# Minimum value: 8, maximum value: 50, default value: 10. +# This tag requires that the tag GENERATE_HTML is set to YES. + +FORMULA_FONTSIZE = 10 + +# Use the FORMULA_TRANPARENT tag to determine whether or not the images +# generated for formulas are transparent PNGs. Transparent PNGs are not +# supported properly for IE 6.0, but are supported on all modern browsers. +# +# Note that when changing this option you need to delete any form_*.png files in +# the HTML output directory before the changes have effect. +# The default value is: YES. +# This tag requires that the tag GENERATE_HTML is set to YES. + +FORMULA_TRANSPARENT = YES + +# Enable the USE_MATHJAX option to render LaTeX formulas using MathJax (see +# http://www.mathjax.org) which uses client side Javascript for the rendering +# instead of using pre-rendered bitmaps. Use this if you do not have LaTeX +# installed or if you want to formulas look prettier in the HTML output. When +# enabled you may also need to install MathJax separately and configure the path +# to it using the MATHJAX_RELPATH option. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTML is set to YES. + +USE_MATHJAX = NO + +# When MathJax is enabled you can set the default output format to be used for +# the MathJax output. See the MathJax site (see: +# http://docs.mathjax.org/en/latest/output.html) for more details. +# Possible values are: HTML-CSS (which is slower, but has the best +# compatibility), NativeMML (i.e. MathML) and SVG. +# The default value is: HTML-CSS. +# This tag requires that the tag USE_MATHJAX is set to YES. + +MATHJAX_FORMAT = HTML-CSS + +# When MathJax is enabled you need to specify the location relative to the HTML +# output directory using the MATHJAX_RELPATH option. The destination directory +# should contain the MathJax.js script. For instance, if the mathjax directory +# is located at the same level as the HTML output directory, then +# MATHJAX_RELPATH should be ../mathjax. The default value points to the MathJax +# Content Delivery Network so you can quickly see the result without installing +# MathJax. However, it is strongly recommended to install a local copy of +# MathJax from http://www.mathjax.org before deployment. +# The default value is: http://cdn.mathjax.org/mathjax/latest. +# This tag requires that the tag USE_MATHJAX is set to YES. + +MATHJAX_RELPATH = http://cdn.mathjax.org/mathjax/latest + +# The MATHJAX_EXTENSIONS tag can be used to specify one or more MathJax +# extension names that should be enabled during MathJax rendering. For example +# MATHJAX_EXTENSIONS = TeX/AMSmath TeX/AMSsymbols +# This tag requires that the tag USE_MATHJAX is set to YES. + +MATHJAX_EXTENSIONS = + +# The MATHJAX_CODEFILE tag can be used to specify a file with javascript pieces +# of code that will be used on startup of the MathJax code. See the MathJax site +# (see: http://docs.mathjax.org/en/latest/output.html) for more details. For an +# example see the documentation. +# This tag requires that the tag USE_MATHJAX is set to YES. + +MATHJAX_CODEFILE = + +# When the SEARCHENGINE tag is enabled doxygen will generate a search box for +# the HTML output. The underlying search engine uses javascript and DHTML and +# should work on any modern browser. Note that when using HTML help +# (GENERATE_HTMLHELP), Qt help (GENERATE_QHP), or docsets (GENERATE_DOCSET) +# there is already a search function so this one should typically be disabled. +# For large projects the javascript based search engine can be slow, then +# enabling SERVER_BASED_SEARCH may provide a better solution. It is possible to +# search using the keyboard; to jump to the search box use <access key> + S +# (what the <access key> is depends on the OS and browser, but it is typically +# <CTRL>, <ALT>/<option>, or both). Inside the search box use the <cursor down +# key> to jump into the search results window, the results can be navigated +# using the <cursor keys>. Press <Enter> to select an item or <escape> to cancel +# the search. The filter options can be selected when the cursor is inside the +# search box by pressing <Shift>+<cursor down>. Also here use the <cursor keys> +# to select a filter and <Enter> or <escape> to activate or cancel the filter +# option. +# The default value is: YES. +# This tag requires that the tag GENERATE_HTML is set to YES. + +SEARCHENGINE = YES + +# When the SERVER_BASED_SEARCH tag is enabled the search engine will be +# implemented using a web server instead of a web client using Javascript. There +# are two flavors of web server based searching depending on the EXTERNAL_SEARCH +# setting. When disabled, doxygen will generate a PHP script for searching and +# an index file used by the script. When EXTERNAL_SEARCH is enabled the indexing +# and searching needs to be provided by external tools. See the section +# "External Indexing and Searching" for details. +# The default value is: NO. +# This tag requires that the tag SEARCHENGINE is set to YES. + +SERVER_BASED_SEARCH = NO + +# When EXTERNAL_SEARCH tag is enabled doxygen will no longer generate the PHP +# script for searching. Instead the search results are written to an XML file +# which needs to be processed by an external indexer. Doxygen will invoke an +# external search engine pointed to by the SEARCHENGINE_URL option to obtain the +# search results. +# +# Doxygen ships with an example indexer (doxyindexer) and search engine +# (doxysearch.cgi) which are based on the open source search engine library +# Xapian (see: http://xapian.org/). +# +# See the section "External Indexing and Searching" for details. +# The default value is: NO. +# This tag requires that the tag SEARCHENGINE is set to YES. + +EXTERNAL_SEARCH = NO + +# The SEARCHENGINE_URL should point to a search engine hosted by a web server +# which will return the search results when EXTERNAL_SEARCH is enabled. +# +# Doxygen ships with an example indexer (doxyindexer) and search engine +# (doxysearch.cgi) which are based on the open source search engine library +# Xapian (see: http://xapian.org/). See the section "External Indexing and +# Searching" for details. +# This tag requires that the tag SEARCHENGINE is set to YES. + +SEARCHENGINE_URL = + +# When SERVER_BASED_SEARCH and EXTERNAL_SEARCH are both enabled the unindexed +# search data is written to a file for indexing by an external tool. With the +# SEARCHDATA_FILE tag the name of this file can be specified. +# The default file is: searchdata.xml. +# This tag requires that the tag SEARCHENGINE is set to YES. + +SEARCHDATA_FILE = searchdata.xml + +# When SERVER_BASED_SEARCH and EXTERNAL_SEARCH are both enabled the +# EXTERNAL_SEARCH_ID tag can be used as an identifier for the project. This is +# useful in combination with EXTRA_SEARCH_MAPPINGS to search through multiple +# projects and redirect the results back to the right project. +# This tag requires that the tag SEARCHENGINE is set to YES. + +EXTERNAL_SEARCH_ID = + +# The EXTRA_SEARCH_MAPPINGS tag can be used to enable searching through doxygen +# projects other than the one defined by this configuration file, but that are +# all added to the same external search index. Each project needs to have a +# unique id set via EXTERNAL_SEARCH_ID. The search mapping then maps the id of +# to a relative location where the documentation can be found. The format is: +# EXTRA_SEARCH_MAPPINGS = tagname1=loc1 tagname2=loc2 ... +# This tag requires that the tag SEARCHENGINE is set to YES. + +EXTRA_SEARCH_MAPPINGS = + +#--------------------------------------------------------------------------- +# Configuration options related to the LaTeX output +#--------------------------------------------------------------------------- + +# If the GENERATE_LATEX tag is set to YES, doxygen will generate LaTeX output. +# The default value is: YES. + +GENERATE_LATEX = YES + +# The LATEX_OUTPUT tag is used to specify where the LaTeX docs will be put. If a +# relative path is entered the value of OUTPUT_DIRECTORY will be put in front of +# it. +# The default directory is: latex. +# This tag requires that the tag GENERATE_LATEX is set to YES. + +LATEX_OUTPUT = latex + +# The LATEX_CMD_NAME tag can be used to specify the LaTeX command name to be +# invoked. +# +# Note that when enabling USE_PDFLATEX this option is only used for generating +# bitmaps for formulas in the HTML output, but not in the Makefile that is +# written to the output directory. +# The default file is: latex. +# This tag requires that the tag GENERATE_LATEX is set to YES. + +LATEX_CMD_NAME = latex + +# The MAKEINDEX_CMD_NAME tag can be used to specify the command name to generate +# index for LaTeX. +# The default file is: makeindex. +# This tag requires that the tag GENERATE_LATEX is set to YES. + +MAKEINDEX_CMD_NAME = makeindex + +# If the COMPACT_LATEX tag is set to YES, doxygen generates more compact LaTeX +# documents. This may be useful for small projects and may help to save some +# trees in general. +# The default value is: NO. +# This tag requires that the tag GENERATE_LATEX is set to YES. + +COMPACT_LATEX = NO + +# The PAPER_TYPE tag can be used to set the paper type that is used by the +# printer. +# Possible values are: a4 (210 x 297 mm), letter (8.5 x 11 inches), legal (8.5 x +# 14 inches) and executive (7.25 x 10.5 inches). +# The default value is: a4. +# This tag requires that the tag GENERATE_LATEX is set to YES. + +PAPER_TYPE = a4 + +# The EXTRA_PACKAGES tag can be used to specify one or more LaTeX package names +# that should be included in the LaTeX output. The package can be specified just +# by its name or with the correct syntax as to be used with the LaTeX +# \usepackage command. To get the times font for instance you can specify : +# EXTRA_PACKAGES=times or EXTRA_PACKAGES={times} +# To use the option intlimits with the amsmath package you can specify: +# EXTRA_PACKAGES=[intlimits]{amsmath} +# If left blank no extra packages will be included. +# This tag requires that the tag GENERATE_LATEX is set to YES. + +EXTRA_PACKAGES = + +# The LATEX_HEADER tag can be used to specify a personal LaTeX header for the +# generated LaTeX document. The header should contain everything until the first +# chapter. If it is left blank doxygen will generate a standard header. See +# section "Doxygen usage" for information on how to let doxygen write the +# default header to a separate file. +# +# Note: Only use a user-defined header if you know what you are doing! The +# following commands have a special meaning inside the header: $title, +# $datetime, $date, $doxygenversion, $projectname, $projectnumber, +# $projectbrief, $projectlogo. Doxygen will replace $title with the empty +# string, for the replacement values of the other commands the user is referred +# to HTML_HEADER. +# This tag requires that the tag GENERATE_LATEX is set to YES. + +LATEX_HEADER = + +# The LATEX_FOOTER tag can be used to specify a personal LaTeX footer for the +# generated LaTeX document. The footer should contain everything after the last +# chapter. If it is left blank doxygen will generate a standard footer. See +# LATEX_HEADER for more information on how to generate a default footer and what +# special commands can be used inside the footer. +# +# Note: Only use a user-defined footer if you know what you are doing! +# This tag requires that the tag GENERATE_LATEX is set to YES. + +LATEX_FOOTER = + +# The LATEX_EXTRA_STYLESHEET tag can be used to specify additional user-defined +# LaTeX style sheets that are included after the standard style sheets created +# by doxygen. Using this option one can overrule certain style aspects. Doxygen +# will copy the style sheet files to the output directory. +# Note: The order of the extra style sheet files is of importance (e.g. the last +# style sheet in the list overrules the setting of the previous ones in the +# list). +# This tag requires that the tag GENERATE_LATEX is set to YES. + +LATEX_EXTRA_STYLESHEET = + +# The LATEX_EXTRA_FILES tag can be used to specify one or more extra images or +# other source files which should be copied to the LATEX_OUTPUT output +# directory. Note that the files will be copied as-is; there are no commands or +# markers available. +# This tag requires that the tag GENERATE_LATEX is set to YES. + +LATEX_EXTRA_FILES = + +# If the PDF_HYPERLINKS tag is set to YES, the LaTeX that is generated is +# prepared for conversion to PDF (using ps2pdf or pdflatex). The PDF file will +# contain links (just like the HTML output) instead of page references. This +# makes the output suitable for online browsing using a PDF viewer. +# The default value is: YES. +# This tag requires that the tag GENERATE_LATEX is set to YES. + +PDF_HYPERLINKS = YES + +# If the USE_PDFLATEX tag is set to YES, doxygen will use pdflatex to generate +# the PDF file directly from the LaTeX files. Set this option to YES, to get a +# higher quality PDF documentation. +# The default value is: YES. +# This tag requires that the tag GENERATE_LATEX is set to YES. + +USE_PDFLATEX = YES + +# If the LATEX_BATCHMODE tag is set to YES, doxygen will add the \batchmode +# command to the generated LaTeX files. This will instruct LaTeX to keep running +# if errors occur, instead of asking the user for help. This option is also used +# when generating formulas in HTML. +# The default value is: NO. +# This tag requires that the tag GENERATE_LATEX is set to YES. + +LATEX_BATCHMODE = NO + +# If the LATEX_HIDE_INDICES tag is set to YES then doxygen will not include the +# index chapters (such as File Index, Compound Index, etc.) in the output. +# The default value is: NO. +# This tag requires that the tag GENERATE_LATEX is set to YES. + +LATEX_HIDE_INDICES = NO + +# If the LATEX_SOURCE_CODE tag is set to YES then doxygen will include source +# code with syntax highlighting in the LaTeX output. +# +# Note that which sources are shown also depends on other settings such as +# SOURCE_BROWSER. +# The default value is: NO. +# This tag requires that the tag GENERATE_LATEX is set to YES. + +LATEX_SOURCE_CODE = NO + +# The LATEX_BIB_STYLE tag can be used to specify the style to use for the +# bibliography, e.g. plainnat, or ieeetr. See +# http://en.wikipedia.org/wiki/BibTeX and \cite for more info. +# The default value is: plain. +# This tag requires that the tag GENERATE_LATEX is set to YES. + +LATEX_BIB_STYLE = plain + +# If the LATEX_TIMESTAMP tag is set to YES then the footer of each generated +# page will contain the date and time when the page was generated. Setting this +# to NO can help when comparing the output of multiple runs. +# The default value is: NO. +# This tag requires that the tag GENERATE_LATEX is set to YES. + +LATEX_TIMESTAMP = NO + +#--------------------------------------------------------------------------- +# Configuration options related to the RTF output +#--------------------------------------------------------------------------- + +# If the GENERATE_RTF tag is set to YES, doxygen will generate RTF output. The +# RTF output is optimized for Word 97 and may not look too pretty with other RTF +# readers/editors. +# The default value is: NO. + +GENERATE_RTF = NO + +# The RTF_OUTPUT tag is used to specify where the RTF docs will be put. If a +# relative path is entered the value of OUTPUT_DIRECTORY will be put in front of +# it. +# The default directory is: rtf. +# This tag requires that the tag GENERATE_RTF is set to YES. + +RTF_OUTPUT = rtf + +# If the COMPACT_RTF tag is set to YES, doxygen generates more compact RTF +# documents. This may be useful for small projects and may help to save some +# trees in general. +# The default value is: NO. +# This tag requires that the tag GENERATE_RTF is set to YES. + +COMPACT_RTF = NO + +# If the RTF_HYPERLINKS tag is set to YES, the RTF that is generated will +# contain hyperlink fields. The RTF file will contain links (just like the HTML +# output) instead of page references. This makes the output suitable for online +# browsing using Word or some other Word compatible readers that support those +# fields. +# +# Note: WordPad (write) and others do not support links. +# The default value is: NO. +# This tag requires that the tag GENERATE_RTF is set to YES. + +RTF_HYPERLINKS = NO + +# Load stylesheet definitions from file. Syntax is similar to doxygen's config +# file, i.e. a series of assignments. You only have to provide replacements, +# missing definitions are set to their default value. +# +# See also section "Doxygen usage" for information on how to generate the +# default style sheet that doxygen normally uses. +# This tag requires that the tag GENERATE_RTF is set to YES. + +RTF_STYLESHEET_FILE = + +# Set optional variables used in the generation of an RTF document. Syntax is +# similar to doxygen's config file. A template extensions file can be generated +# using doxygen -e rtf extensionFile. +# This tag requires that the tag GENERATE_RTF is set to YES. + +RTF_EXTENSIONS_FILE = + +# If the RTF_SOURCE_CODE tag is set to YES then doxygen will include source code +# with syntax highlighting in the RTF output. +# +# Note that which sources are shown also depends on other settings such as +# SOURCE_BROWSER. +# The default value is: NO. +# This tag requires that the tag GENERATE_RTF is set to YES. + +RTF_SOURCE_CODE = NO + +#--------------------------------------------------------------------------- +# Configuration options related to the man page output +#--------------------------------------------------------------------------- + +# If the GENERATE_MAN tag is set to YES, doxygen will generate man pages for +# classes and files. +# The default value is: NO. + +GENERATE_MAN = NO + +# The MAN_OUTPUT tag is used to specify where the man pages will be put. If a +# relative path is entered the value of OUTPUT_DIRECTORY will be put in front of +# it. A directory man3 will be created inside the directory specified by +# MAN_OUTPUT. +# The default directory is: man. +# This tag requires that the tag GENERATE_MAN is set to YES. + +MAN_OUTPUT = man + +# The MAN_EXTENSION tag determines the extension that is added to the generated +# man pages. In case the manual section does not start with a number, the number +# 3 is prepended. The dot (.) at the beginning of the MAN_EXTENSION tag is +# optional. +# The default value is: .3. +# This tag requires that the tag GENERATE_MAN is set to YES. + +MAN_EXTENSION = .3 + +# The MAN_SUBDIR tag determines the name of the directory created within +# MAN_OUTPUT in which the man pages are placed. If defaults to man followed by +# MAN_EXTENSION with the initial . removed. +# This tag requires that the tag GENERATE_MAN is set to YES. + +MAN_SUBDIR = + +# If the MAN_LINKS tag is set to YES and doxygen generates man output, then it +# will generate one additional man file for each entity documented in the real +# man page(s). These additional files only source the real man page, but without +# them the man command would be unable to find the correct page. +# The default value is: NO. +# This tag requires that the tag GENERATE_MAN is set to YES. + +MAN_LINKS = NO + +#--------------------------------------------------------------------------- +# Configuration options related to the XML output +#--------------------------------------------------------------------------- + +# If the GENERATE_XML tag is set to YES, doxygen will generate an XML file that +# captures the structure of the code including all documentation. +# The default value is: NO. + +GENERATE_XML = NO + +# The XML_OUTPUT tag is used to specify where the XML pages will be put. If a +# relative path is entered the value of OUTPUT_DIRECTORY will be put in front of +# it. +# The default directory is: xml. +# This tag requires that the tag GENERATE_XML is set to YES. + +XML_OUTPUT = xml + +# If the XML_PROGRAMLISTING tag is set to YES, doxygen will dump the program +# listings (including syntax highlighting and cross-referencing information) to +# the XML output. Note that enabling this will significantly increase the size +# of the XML output. +# The default value is: YES. +# This tag requires that the tag GENERATE_XML is set to YES. + +XML_PROGRAMLISTING = YES + +#--------------------------------------------------------------------------- +# Configuration options related to the DOCBOOK output +#--------------------------------------------------------------------------- + +# If the GENERATE_DOCBOOK tag is set to YES, doxygen will generate Docbook files +# that can be used to generate PDF. +# The default value is: NO. + +GENERATE_DOCBOOK = NO + +# The DOCBOOK_OUTPUT tag is used to specify where the Docbook pages will be put. +# If a relative path is entered the value of OUTPUT_DIRECTORY will be put in +# front of it. +# The default directory is: docbook. +# This tag requires that the tag GENERATE_DOCBOOK is set to YES. + +DOCBOOK_OUTPUT = docbook + +# If the DOCBOOK_PROGRAMLISTING tag is set to YES, doxygen will include the +# program listings (including syntax highlighting and cross-referencing +# information) to the DOCBOOK output. Note that enabling this will significantly +# increase the size of the DOCBOOK output. +# The default value is: NO. +# This tag requires that the tag GENERATE_DOCBOOK is set to YES. + +DOCBOOK_PROGRAMLISTING = NO + +#--------------------------------------------------------------------------- +# Configuration options for the AutoGen Definitions output +#--------------------------------------------------------------------------- + +# If the GENERATE_AUTOGEN_DEF tag is set to YES, doxygen will generate an +# AutoGen Definitions (see http://autogen.sf.net) file that captures the +# structure of the code including all documentation. Note that this feature is +# still experimental and incomplete at the moment. +# The default value is: NO. + +GENERATE_AUTOGEN_DEF = NO + +#--------------------------------------------------------------------------- +# Configuration options related to the Perl module output +#--------------------------------------------------------------------------- + +# If the GENERATE_PERLMOD tag is set to YES, doxygen will generate a Perl module +# file that captures the structure of the code including all documentation. +# +# Note that this feature is still experimental and incomplete at the moment. +# The default value is: NO. + +GENERATE_PERLMOD = NO + +# If the PERLMOD_LATEX tag is set to YES, doxygen will generate the necessary +# Makefile rules, Perl scripts and LaTeX code to be able to generate PDF and DVI +# output from the Perl module output. +# The default value is: NO. +# This tag requires that the tag GENERATE_PERLMOD is set to YES. + +PERLMOD_LATEX = NO + +# If the PERLMOD_PRETTY tag is set to YES, the Perl module output will be nicely +# formatted so it can be parsed by a human reader. This is useful if you want to +# understand what is going on. On the other hand, if this tag is set to NO, the +# size of the Perl module output will be much smaller and Perl will parse it +# just the same. +# The default value is: YES. +# This tag requires that the tag GENERATE_PERLMOD is set to YES. + +PERLMOD_PRETTY = YES + +# The names of the make variables in the generated doxyrules.make file are +# prefixed with the string contained in PERLMOD_MAKEVAR_PREFIX. This is useful +# so different doxyrules.make files included by the same Makefile don't +# overwrite each other's variables. +# This tag requires that the tag GENERATE_PERLMOD is set to YES. + +PERLMOD_MAKEVAR_PREFIX = + +#--------------------------------------------------------------------------- +# Configuration options related to the preprocessor +#--------------------------------------------------------------------------- + +# If the ENABLE_PREPROCESSING tag is set to YES, doxygen will evaluate all +# C-preprocessor directives found in the sources and include files. +# The default value is: YES. + +ENABLE_PREPROCESSING = YES + +# If the MACRO_EXPANSION tag is set to YES, doxygen will expand all macro names +# in the source code. If set to NO, only conditional compilation will be +# performed. Macro expansion can be done in a controlled way by setting +# EXPAND_ONLY_PREDEF to YES. +# The default value is: NO. +# This tag requires that the tag ENABLE_PREPROCESSING is set to YES. + +MACRO_EXPANSION = NO + +# If the EXPAND_ONLY_PREDEF and MACRO_EXPANSION tags are both set to YES then +# the macro expansion is limited to the macros specified with the PREDEFINED and +# EXPAND_AS_DEFINED tags. +# The default value is: NO. +# This tag requires that the tag ENABLE_PREPROCESSING is set to YES. + +EXPAND_ONLY_PREDEF = NO + +# If the SEARCH_INCLUDES tag is set to YES, the include files in the +# INCLUDE_PATH will be searched if a #include is found. +# The default value is: YES. +# This tag requires that the tag ENABLE_PREPROCESSING is set to YES. + +SEARCH_INCLUDES = YES + +# The INCLUDE_PATH tag can be used to specify one or more directories that +# contain include files that are not input files but should be processed by the +# preprocessor. +# This tag requires that the tag SEARCH_INCLUDES is set to YES. + +INCLUDE_PATH = + +# You can use the INCLUDE_FILE_PATTERNS tag to specify one or more wildcard +# patterns (like *.h and *.hpp) to filter out the header-files in the +# directories. If left blank, the patterns specified with FILE_PATTERNS will be +# used. +# This tag requires that the tag ENABLE_PREPROCESSING is set to YES. + +INCLUDE_FILE_PATTERNS = + +# The PREDEFINED tag can be used to specify one or more macro names that are +# defined before the preprocessor is started (similar to the -D option of e.g. +# gcc). The argument of the tag is a list of macros of the form: name or +# name=definition (no spaces). If the definition and the "=" are omitted, "=1" +# is assumed. To prevent a macro definition from being undefined via #undef or +# recursively expanded use the := operator instead of the = operator. +# This tag requires that the tag ENABLE_PREPROCESSING is set to YES. + +PREDEFINED = + +# If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then this +# tag can be used to specify a list of macro names that should be expanded. The +# macro definition that is found in the sources will be used. Use the PREDEFINED +# tag if you want to use a different macro definition that overrules the +# definition found in the source code. +# This tag requires that the tag ENABLE_PREPROCESSING is set to YES. + +EXPAND_AS_DEFINED = + +# If the SKIP_FUNCTION_MACROS tag is set to YES then doxygen's preprocessor will +# remove all references to function-like macros that are alone on a line, have +# an all uppercase name, and do not end with a semicolon. Such function macros +# are typically used for boiler-plate code, and will confuse the parser if not +# removed. +# The default value is: YES. +# This tag requires that the tag ENABLE_PREPROCESSING is set to YES. + +SKIP_FUNCTION_MACROS = YES + +#--------------------------------------------------------------------------- +# Configuration options related to external references +#--------------------------------------------------------------------------- + +# The TAGFILES tag can be used to specify one or more tag files. For each tag +# file the location of the external documentation should be added. The format of +# a tag file without this location is as follows: +# TAGFILES = file1 file2 ... +# Adding location for the tag files is done as follows: +# TAGFILES = file1=loc1 "file2 = loc2" ... +# where loc1 and loc2 can be relative or absolute paths or URLs. See the +# section "Linking to external documentation" for more information about the use +# of tag files. +# Note: Each tag file must have a unique name (where the name does NOT include +# the path). If a tag file is not located in the directory in which doxygen is +# run, you must also specify the path to the tagfile here. + +TAGFILES = + +# When a file name is specified after GENERATE_TAGFILE, doxygen will create a +# tag file that is based on the input files it reads. See section "Linking to +# external documentation" for more information about the usage of tag files. + +GENERATE_TAGFILE = + +# If the ALLEXTERNALS tag is set to YES, all external class will be listed in +# the class index. If set to NO, only the inherited external classes will be +# listed. +# The default value is: NO. + +ALLEXTERNALS = NO + +# If the EXTERNAL_GROUPS tag is set to YES, all external groups will be listed +# in the modules index. If set to NO, only the current project's groups will be +# listed. +# The default value is: YES. + +EXTERNAL_GROUPS = YES + +# If the EXTERNAL_PAGES tag is set to YES, all external pages will be listed in +# the related pages index. If set to NO, only the current project's pages will +# be listed. +# The default value is: YES. + +EXTERNAL_PAGES = YES + +# The PERL_PATH should be the absolute path and name of the perl script +# interpreter (i.e. the result of 'which perl'). +# The default file (with absolute path) is: /usr/bin/perl. + +PERL_PATH = /usr/bin/perl + +#--------------------------------------------------------------------------- +# Configuration options related to the dot tool +#--------------------------------------------------------------------------- + +# If the CLASS_DIAGRAMS tag is set to YES, doxygen will generate a class diagram +# (in HTML and LaTeX) for classes with base or super classes. Setting the tag to +# NO turns the diagrams off. Note that this option also works with HAVE_DOT +# disabled, but it is recommended to install and use dot, since it yields more +# powerful graphs. +# The default value is: YES. + +CLASS_DIAGRAMS = YES + +# You can define message sequence charts within doxygen comments using the \msc +# command. Doxygen will then run the mscgen tool (see: +# http://www.mcternan.me.uk/mscgen/)) to produce the chart and insert it in the +# documentation. The MSCGEN_PATH tag allows you to specify the directory where +# the mscgen tool resides. If left empty the tool is assumed to be found in the +# default search path. + +MSCGEN_PATH = + +# You can include diagrams made with dia in doxygen documentation. Doxygen will +# then run dia to produce the diagram and insert it in the documentation. The +# DIA_PATH tag allows you to specify the directory where the dia binary resides. +# If left empty dia is assumed to be found in the default search path. + +DIA_PATH = + +# If set to YES the inheritance and collaboration graphs will hide inheritance +# and usage relations if the target is undocumented or is not a class. +# The default value is: YES. + +HIDE_UNDOC_RELATIONS = YES + +# If you set the HAVE_DOT tag to YES then doxygen will assume the dot tool is +# available from the path. This tool is part of Graphviz (see: +# http://www.graphviz.org/), a graph visualization toolkit from AT&T and Lucent +# Bell Labs. The other options in this section have no effect if this option is +# set to NO +# The default value is: YES. + +HAVE_DOT = YES + +# The DOT_NUM_THREADS specifies the number of dot invocations doxygen is allowed +# to run in parallel. When set to 0 doxygen will base this on the number of +# processors available in the system. You can set it explicitly to a value +# larger than 0 to get control over the balance between CPU load and processing +# speed. +# Minimum value: 0, maximum value: 32, default value: 0. +# This tag requires that the tag HAVE_DOT is set to YES. + +DOT_NUM_THREADS = 0 + +# When you want a differently looking font in the dot files that doxygen +# generates you can specify the font name using DOT_FONTNAME. You need to make +# sure dot is able to find the font, which can be done by putting it in a +# standard location or by setting the DOTFONTPATH environment variable or by +# setting DOT_FONTPATH to the directory containing the font. +# The default value is: Helvetica. +# This tag requires that the tag HAVE_DOT is set to YES. + +DOT_FONTNAME = Helvetica + +# The DOT_FONTSIZE tag can be used to set the size (in points) of the font of +# dot graphs. +# Minimum value: 4, maximum value: 24, default value: 10. +# This tag requires that the tag HAVE_DOT is set to YES. + +DOT_FONTSIZE = 10 + +# By default doxygen will tell dot to use the default font as specified with +# DOT_FONTNAME. If you specify a different font using DOT_FONTNAME you can set +# the path where dot can find it using this tag. +# This tag requires that the tag HAVE_DOT is set to YES. + +DOT_FONTPATH = + +# If the CLASS_GRAPH tag is set to YES then doxygen will generate a graph for +# each documented class showing the direct and indirect inheritance relations. +# Setting this tag to YES will force the CLASS_DIAGRAMS tag to NO. +# The default value is: YES. +# This tag requires that the tag HAVE_DOT is set to YES. + +CLASS_GRAPH = YES + +# If the COLLABORATION_GRAPH tag is set to YES then doxygen will generate a +# graph for each documented class showing the direct and indirect implementation +# dependencies (inheritance, containment, and class references variables) of the +# class with other documented classes. +# The default value is: YES. +# This tag requires that the tag HAVE_DOT is set to YES. + +COLLABORATION_GRAPH = YES + +# If the GROUP_GRAPHS tag is set to YES then doxygen will generate a graph for +# groups, showing the direct groups dependencies. +# The default value is: YES. +# This tag requires that the tag HAVE_DOT is set to YES. + +GROUP_GRAPHS = YES + +# If the UML_LOOK tag is set to YES, doxygen will generate inheritance and +# collaboration diagrams in a style similar to the OMG's Unified Modeling +# Language. +# The default value is: NO. +# This tag requires that the tag HAVE_DOT is set to YES. + +UML_LOOK = NO + +# If the UML_LOOK tag is enabled, the fields and methods are shown inside the +# class node. If there are many fields or methods and many nodes the graph may +# become too big to be useful. The UML_LIMIT_NUM_FIELDS threshold limits the +# number of items for each type to make the size more manageable. Set this to 0 +# for no limit. Note that the threshold may be exceeded by 50% before the limit +# is enforced. So when you set the threshold to 10, up to 15 fields may appear, +# but if the number exceeds 15, the total amount of fields shown is limited to +# 10. +# Minimum value: 0, maximum value: 100, default value: 10. +# This tag requires that the tag HAVE_DOT is set to YES. + +UML_LIMIT_NUM_FIELDS = 10 + +# If the TEMPLATE_RELATIONS tag is set to YES then the inheritance and +# collaboration graphs will show the relations between templates and their +# instances. +# The default value is: NO. +# This tag requires that the tag HAVE_DOT is set to YES. + +TEMPLATE_RELATIONS = NO + +# If the INCLUDE_GRAPH, ENABLE_PREPROCESSING and SEARCH_INCLUDES tags are set to +# YES then doxygen will generate a graph for each documented file showing the +# direct and indirect include dependencies of the file with other documented +# files. +# The default value is: YES. +# This tag requires that the tag HAVE_DOT is set to YES. + +INCLUDE_GRAPH = YES + +# If the INCLUDED_BY_GRAPH, ENABLE_PREPROCESSING and SEARCH_INCLUDES tags are +# set to YES then doxygen will generate a graph for each documented file showing +# the direct and indirect include dependencies of the file with other documented +# files. +# The default value is: YES. +# This tag requires that the tag HAVE_DOT is set to YES. + +INCLUDED_BY_GRAPH = YES + +# If the CALL_GRAPH tag is set to YES then doxygen will generate a call +# dependency graph for every global function or class method. +# +# Note that enabling this option will significantly increase the time of a run. +# So in most cases it will be better to enable call graphs for selected +# functions only using the \callgraph command. Disabling a call graph can be +# accomplished by means of the command \hidecallgraph. +# The default value is: NO. +# This tag requires that the tag HAVE_DOT is set to YES. + +CALL_GRAPH = NO + +# If the CALLER_GRAPH tag is set to YES then doxygen will generate a caller +# dependency graph for every global function or class method. +# +# Note that enabling this option will significantly increase the time of a run. +# So in most cases it will be better to enable caller graphs for selected +# functions only using the \callergraph command. Disabling a caller graph can be +# accomplished by means of the command \hidecallergraph. +# The default value is: NO. +# This tag requires that the tag HAVE_DOT is set to YES. + +CALLER_GRAPH = NO + +# If the GRAPHICAL_HIERARCHY tag is set to YES then doxygen will graphical +# hierarchy of all classes instead of a textual one. +# The default value is: YES. +# This tag requires that the tag HAVE_DOT is set to YES. + +GRAPHICAL_HIERARCHY = YES + +# If the DIRECTORY_GRAPH tag is set to YES then doxygen will show the +# dependencies a directory has on other directories in a graphical way. The +# dependency relations are determined by the #include relations between the +# files in the directories. +# The default value is: YES. +# This tag requires that the tag HAVE_DOT is set to YES. + +DIRECTORY_GRAPH = YES + +# The DOT_IMAGE_FORMAT tag can be used to set the image format of the images +# generated by dot. For an explanation of the image formats see the section +# output formats in the documentation of the dot tool (Graphviz (see: +# http://www.graphviz.org/)). +# Note: If you choose svg you need to set HTML_FILE_EXTENSION to xhtml in order +# to make the SVG files visible in IE 9+ (other browsers do not have this +# requirement). +# Possible values are: png, png:cairo, png:cairo:cairo, png:cairo:gd, png:gd, +# png:gd:gd, jpg, jpg:cairo, jpg:cairo:gd, jpg:gd, jpg:gd:gd, gif, gif:cairo, +# gif:cairo:gd, gif:gd, gif:gd:gd, svg, png:gd, png:gd:gd, png:cairo, +# png:cairo:gd, png:cairo:cairo, png:cairo:gdiplus, png:gdiplus and +# png:gdiplus:gdiplus. +# The default value is: png. +# This tag requires that the tag HAVE_DOT is set to YES. + +DOT_IMAGE_FORMAT = png + +# If DOT_IMAGE_FORMAT is set to svg, then this option can be set to YES to +# enable generation of interactive SVG images that allow zooming and panning. +# +# Note that this requires a modern browser other than Internet Explorer. Tested +# and working are Firefox, Chrome, Safari, and Opera. +# Note: For IE 9+ you need to set HTML_FILE_EXTENSION to xhtml in order to make +# the SVG files visible. Older versions of IE do not have SVG support. +# The default value is: NO. +# This tag requires that the tag HAVE_DOT is set to YES. + +INTERACTIVE_SVG = NO + +# The DOT_PATH tag can be used to specify the path where the dot tool can be +# found. If left blank, it is assumed the dot tool can be found in the path. +# This tag requires that the tag HAVE_DOT is set to YES. + +DOT_PATH = + +# The DOTFILE_DIRS tag can be used to specify one or more directories that +# contain dot files that are included in the documentation (see the \dotfile +# command). +# This tag requires that the tag HAVE_DOT is set to YES. + +DOTFILE_DIRS = + +# The MSCFILE_DIRS tag can be used to specify one or more directories that +# contain msc files that are included in the documentation (see the \mscfile +# command). + +MSCFILE_DIRS = + +# The DIAFILE_DIRS tag can be used to specify one or more directories that +# contain dia files that are included in the documentation (see the \diafile +# command). + +DIAFILE_DIRS = + +# When using plantuml, the PLANTUML_JAR_PATH tag should be used to specify the +# path where java can find the plantuml.jar file. If left blank, it is assumed +# PlantUML is not used or called during a preprocessing step. Doxygen will +# generate a warning when it encounters a \startuml command in this case and +# will not generate output for the diagram. + +PLANTUML_JAR_PATH = + +# When using plantuml, the PLANTUML_CFG_FILE tag can be used to specify a +# configuration file for plantuml. + +PLANTUML_CFG_FILE = + +# When using plantuml, the specified paths are searched for files specified by +# the !include statement in a plantuml block. + +PLANTUML_INCLUDE_PATH = + +# The DOT_GRAPH_MAX_NODES tag can be used to set the maximum number of nodes +# that will be shown in the graph. If the number of nodes in a graph becomes +# larger than this value, doxygen will truncate the graph, which is visualized +# by representing a node as a red box. Note that doxygen if the number of direct +# children of the root node in a graph is already larger than +# DOT_GRAPH_MAX_NODES then the graph will not be shown at all. Also note that +# the size of a graph can be further restricted by MAX_DOT_GRAPH_DEPTH. +# Minimum value: 0, maximum value: 10000, default value: 50. +# This tag requires that the tag HAVE_DOT is set to YES. + +DOT_GRAPH_MAX_NODES = 50 + +# The MAX_DOT_GRAPH_DEPTH tag can be used to set the maximum depth of the graphs +# generated by dot. A depth value of 3 means that only nodes reachable from the +# root by following a path via at most 3 edges will be shown. Nodes that lay +# further from the root node will be omitted. Note that setting this option to 1 +# or 2 may greatly reduce the computation time needed for large code bases. Also +# note that the size of a graph can be further restricted by +# DOT_GRAPH_MAX_NODES. Using a depth of 0 means no depth restriction. +# Minimum value: 0, maximum value: 1000, default value: 0. +# This tag requires that the tag HAVE_DOT is set to YES. + +MAX_DOT_GRAPH_DEPTH = 0 + +# Set the DOT_TRANSPARENT tag to YES to generate images with a transparent +# background. This is disabled by default, because dot on Windows does not seem +# to support this out of the box. +# +# Warning: Depending on the platform used, enabling this option may lead to +# badly anti-aliased labels on the edges of a graph (i.e. they become hard to +# read). +# The default value is: NO. +# This tag requires that the tag HAVE_DOT is set to YES. + +DOT_TRANSPARENT = NO + +# Set the DOT_MULTI_TARGETS tag to YES to allow dot to generate multiple output +# files in one run (i.e. multiple -o and -T options on the command line). This +# makes dot run faster, but since only newer versions of dot (>1.8.10) support +# this, this feature is disabled by default. +# The default value is: NO. +# This tag requires that the tag HAVE_DOT is set to YES. + +DOT_MULTI_TARGETS = NO + +# If the GENERATE_LEGEND tag is set to YES doxygen will generate a legend page +# explaining the meaning of the various boxes and arrows in the dot generated +# graphs. +# The default value is: YES. +# This tag requires that the tag HAVE_DOT is set to YES. + +GENERATE_LEGEND = YES + +# If the DOT_CLEANUP tag is set to YES, doxygen will remove the intermediate dot +# files that are used to generate the various graphs. +# The default value is: YES. +# This tag requires that the tag HAVE_DOT is set to YES. + +DOT_CLEANUP = YES diff --git a/documentation/cpp/references.bib b/documentation/cpp/references.bib new file mode 100644 index 0000000000000000000000000000000000000000..c8bdd9453c54609cd8eac207a9d4369e275baf8e --- /dev/null +++ b/documentation/cpp/references.bib @@ -0,0 +1,14 @@ +@ARTICLE{Buzzicotti2017, + author = {{Buzzicotti}, M. and {Linkmann}, M. and {Aluie}, H. and {Biferale}, L. and + {Brasseur}, J. and {Meneveau}, C.}, + title = "{Effect of filter type on the statistics of energy transfer between resolved and subfilter scales from a-priori analysis of direct numerical simulations of isotropic turbulence}", + journal = {ArXiv e-prints}, +archivePrefix = "arXiv", + eprint = {1706.03219}, + primaryClass = "physics.flu-dyn", + keywords = {Physics - Fluid Dynamics}, + year = 2017, + month = jun, + adsurl = {http://adsabs.harvard.edu/abs/2017arXiv170603219B}, + adsnote = {Provided by the SAO/NASA Astrophysics Data System} +} diff --git a/documentation/figs/interpolation.py b/documentation/figs/interpolation.py new file mode 100644 index 0000000000000000000000000000000000000000..302efcc157971b8b0407bb76bd3e7be6437f1206 --- /dev/null +++ b/documentation/figs/interpolation.py @@ -0,0 +1,52 @@ +import matplotlib.pyplot as plt +import matplotlib.patches as mpatches +import math + +def main(): + slab = 2 + nproc = 5 + f = plt.figure(figsize = (6, 4.5)) + a = f.add_subplot(111) + for p in range(nproc): + color = plt.get_cmap('plasma')(p*1./nproc) + a.add_patch( + mpatches.Rectangle( + [0, p*slab], + slab*(nproc+2)-1, 1, + color = color, + alpha = .2)) + a.text(-.5, p*slab+.5, '$p_{0}$'.format(p), + verticalalignment = 'center') + for y in range((nproc+2)*slab): + a.plot([y, y], + range(p*slab, (p+1)*slab), + marker = '.', + linestyle = 'none', + color = color) + for X, Y in [(9.9, 6.3), + (3.3, 3.7)]: + a.plot([X], [Y], + color = 'black', + marker = 'x') + for n in [1, 2]: + a.add_patch( + mpatches.Rectangle( + [math.floor(X-n), math.floor(Y-n)], + 2*n+1, 2*n+1, + color = 'green', + alpha = .2)) + a.text(math.floor(X)+.5, math.floor(Y - n)-.3, + '$n = {0}$'.format(n), + horizontalalignment = 'center') + a.set_ylim(bottom = -1, top = 10) + a.set_xlim(left = -1) + a.set_ylabel('$z$') + a.set_xlabel('$x,y$') + a.set_aspect('equal') + f.tight_layout() + f.savefig('interp_problem.pdf') + return None + +if __name__ == '__main__': + main() + diff --git a/done.txt b/done.txt deleted file mode 100644 index 2064592cc9dd7a6e278c9980770882e636b8a2be..0000000000000000000000000000000000000000 --- a/done.txt +++ /dev/null @@ -1,21 +0,0 @@ -x 2015-12-04 make code py3 compatible @python3 -x 2015-12-23 decide on versioning system +merge0 -x 2015-12-24 move get_grid coords to interpolator @optimization +v1.0 -x 2015-12-25 get rid of temporal interpolation @optimization +v1.0 -x 2015-12-26 call interpolation only when needed @optimization +v1.0 -x 2015-12-26 clean up tox files, make sure all tests run @tests +v1.0 -x 2016-01-03 check divfree function -x 2016-01-03 compute kMeta(t) as well -x 2016-01-03 split library into core and extra @optimization +v1.0 -x 2016-01-07 FFTW interpolator doesn't need its own field @optimization +v1.0 +particle_api -x 2016-01-08 simplify tracer/field addition mechanism @design +v1.0 +particle_api -x 2016-01-08 add stat choice parameter to add_particles @design +v1.0 +particle_api -x 2016-01-15 particle output is broken when niter_part != 1 @bugfix -x 2016-01-19 clean up machine_settings mess @design @documentation +v2.0 -x 2016-01-24 clear delimitation of public API @documentation +v1.0 -x 2016-01-24 document coordinate conventions @documentation +v1.0 -x 2016-01-24 move parameters from _fluid_particle_base to NavierStokes etc @design -x 2016-01-29 install_info should be renamed to bfps_info in data file -x 2016-02-01 tweak HDF5 settings @optimization @HDF5 +I/O -x 2016-03-02 code overview @documentation -x 2016-04-29 use HDF5 io for fields @design @HDF5 +I/O diff --git a/examples/NS0SliceParticles.py b/examples/NS0SliceParticles.py new file mode 100644 index 0000000000000000000000000000000000000000..7c089405988a1c6eef6a1c7649e11c7a4a6edcaa --- /dev/null +++ b/examples/NS0SliceParticles.py @@ -0,0 +1,126 @@ +####################################################################### +# # +# Copyright 2015 Max Planck Institute # +# for Dynamics and Self-Organization # +# # +# This file is part of bfps. # +# # +# bfps is free software: you can redistribute it and/or modify # +# it under the terms of the GNU General Public License as published # +# by the Free Software Foundation, either version 3 of the License, # +# or (at your option) any later version. # +# # +# bfps is distributed in the hope that it will be useful, # +# but WITHOUT ANY WARRANTY; without even the implied warranty of # +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # +# GNU General Public License for more details. # +# # +# You should have received a copy of the GNU General Public License # +# along with bfps. If not, see <http://www.gnu.org/licenses/> # +# # +# Contact: Cristian.Lalescu@ds.mpg.de # +# # +####################################################################### + + + +import os +import sys +import bfps +import numpy as np + +class NS0SliceParticles(bfps.NavierStokes): + """ + Example of how bfps is envisioned to be used. + Standard NavierStokes class is inherited, and then new functionality + added on top. + In particular, this class will a DNS with particles starting on a square + grid in the z=0 slice of the field. + """ + standard_names = ['NS0SP', + 'NS0SP-single', + 'NS0SP-double'] + def __init__( + self, + name = 'NS0SliceParticles-v' + bfps.__version__, + **kwargs): + bfps.NavierStokes.__init__( + self, + name = name, + **kwargs) + return None + def specific_parser_arguments( + self, + parser): + bfps.NavierStokes.specific_parser_arguments(self, parser) + parser.add_argument( + '--pcloudX', + type = float, + dest = 'pcloudX', + default = 0.0) + parser.add_argument( + '--pcloudY', + type = float, + dest = 'pcloudY', + default = 0.0) + return None + def launch_jobs( + self, + opt = None): + if not os.path.exists(os.path.join(self.work_dir, self.simname + '.h5')): + particle_initial_condition = None + if self.parameters['nparticles'] > 0: + # the extra dimension of 1 is because I want + # a single chunk of particles. + particle_initial_condition = np.zeros( + (1, + self.parameters['nparticles'], + self.parameters['nparticles'], + 3), + dtype = np.float64) + xvals = (opt.pcloudX + + np.linspace(-opt.particle_cloud_size/2, + opt.particle_cloud_size/2, + self.parameters['nparticles'])) + yvals = (opt.pcloudY + + np.linspace(-opt.particle_cloud_size/2, + opt.particle_cloud_size/2, + self.parameters['nparticles'])) + particle_initial_condition[..., 0] = xvals[None, None, :] + particle_initial_condition[..., 1] = yvals[None, :, None] + self.write_par( + particle_ic = particle_initial_condition) + if self.parameters['nparticles'] > 0: + data = self.generate_tracer_state( + species = 0, + rseed = opt.particle_rand_seed, + data = particle_initial_condition) + init_condition_file = os.path.join( + self.work_dir, + self.simname + '_cvorticity_i{0:0>5x}'.format(0)) + if not os.path.exists(init_condition_file): + if len(opt.src_simname) > 0: + src_file = os.path.join( + os.path.realpath(opt.src_work_dir), + opt.src_simname + '_cvorticity_i{0:0>5x}'.format(opt.src_iteration)) + os.symlink(src_file, init_condition_file) + else: + self.generate_vector_field( + write_to_file = True, + spectra_slope = 2.0, + amplitude = 0.05) + self.run( + ncpu = opt.ncpu, + njobs = opt.njobs, + hours = opt.minutes // 60, + minutes = opt.minutes % 60) + return None + +def main(): + c = NS0SliceParticles() + c.launch(args = sys.argv[1:]) + return None + +if __name__ == '__main__': + main() + diff --git a/examples/NSBufferedParticles.py b/examples/NSBufferedParticles.py new file mode 100644 index 0000000000000000000000000000000000000000..34906576d62e2b2cac68f2d6c261129b23d667b7 --- /dev/null +++ b/examples/NSBufferedParticles.py @@ -0,0 +1,51 @@ +import bfps +import argparse +import sys + +class NSBufferedParticles(bfps.NavierStokes): + """ + Another example. + This class behaves identically to NavierStokes, except that it uses a + buffered interpolator, and the corresponding distributed_particles class. + """ + standard_names = ['NSBP', + 'NSBP-single', + 'NSBP-double'] + def launch( + self, + args = [], + noparticles = False, + **kwargs): + self.name = 'NSBufferedParticles-v' + bfps.__version__ + opt = self.prepare_launch(args = args) + self.fill_up_fluid_code() + if noparticles: + opt.nparticles = 0 + elif type(opt.nparticles) == int: + if opt.nparticles > 0: + self.name += '-particles' + self.add_3D_rFFTW_field( + name = 'rFFTW_acc') + self.add_interpolator( + name = 'cubic_spline', + neighbours = opt.neighbours, + smoothness = opt.smoothness, + class_name = 'interpolator') + self.add_particles( + integration_steps = [4], + interpolator = 'cubic_spline', + acc_name = 'rFFTW_acc', + class_name = 'distributed_particles') + self.finalize_code() + self.launch_jobs(opt = opt) + return None + +if __name__ == '__main__': + parser = argparse.ArgumentParser(prog = 'NSBufferedParticles') + parser.add_argument( + '-v', '--version', + action = 'version', + version = '%(prog)s ' + bfps.__version__) + c = NSBufferedParticles(fluid_precision = 'single') + c.launch(args = sys.argv[1:]) + diff --git a/examples/NavierStokesDB.py b/examples/NavierStokesDB.py new file mode 100644 index 0000000000000000000000000000000000000000..d099ad308e8fa47aea08275bc80694da796465b2 --- /dev/null +++ b/examples/NavierStokesDB.py @@ -0,0 +1,112 @@ +####################################################################### +# # +# Copyright 2015 Max Planck Institute # +# for Dynamics and Self-Organization # +# # +# This file is part of bfps. # +# # +# bfps is free software: you can redistribute it and/or modify # +# it under the terms of the GNU General Public License as published # +# by the Free Software Foundation, either version 3 of the License, # +# or (at your option) any later version. # +# # +# bfps is distributed in the hope that it will be useful, # +# but WITHOUT ANY WARRANTY; without even the implied warranty of # +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # +# GNU General Public License for more details. # +# # +# You should have received a copy of the GNU General Public License # +# along with bfps. If not, see <http://www.gnu.org/licenses/> # +# # +# Contact: Cristian.Lalescu@ds.mpg.de # +# # +####################################################################### + + + +import os +import h5py +import bfps + +class NavierStokesDB(bfps.NavierStokes): + """ + Example of how bfps is envisioned to be used. + Standard NavierStokes class is inherited, and then new functionality + added on top. + In particular, this class will generate an HDF5 file containing a 5D + array representing the time history of the velocity field. + Snapshots are saved every "niter_stat" iterations. + + No effort was spent on optimizing the HDF5 file access, since the code + was only used for a teeny DNS of 72^3 so far. + """ + standard_names = ['NSDB', + 'NSDB-single', + 'NSDB-double'] + def __init__( + self, + name = 'NavierStokesDataBase-v' + bfps.__version__, + **kwargs): + bfps.NavierStokes.__init__( + self, + name = name, + **kwargs) + self.file_datasets_grow += """ + { + if (myrank == 0) + { + hid_t database_file; + char dbfname[256]; + sprintf(dbfname, "%s_field_database.h5", simname); + database_file = H5Fopen(dbfname, H5F_ACC_RDWR, H5P_DEFAULT); + hsize_t dset = H5Dopen(database_file, "rvelocity", H5P_DEFAULT); + grow_single_dataset(dset, niter_todo/niter_stat); + H5Dclose(dset); + H5Fclose(database_file); + } + } + """ + self.stat_src += """ + { + fs->compute_velocity(fs->cvorticity); + *tmp_vec_field = fs->cvelocity; + tmp_vec_field->ift(); + char dbfname[256]; + sprintf(dbfname, "%s_field_database.h5", simname); + tmp_vec_field->io(dbfname, "rvelocity", fs->iteration / niter_stat, false); + } + """ + return None + def get_database_file_name(self): + return os.path.join(self.work_dir, self.simname + '_field_database.h5') + def get_database_file(self): + return h5py.File(self.get_postprocess_file_name(), 'r') + def write_par( + self, + iter0 = 0, + **kwargs): + bfps.NavierStokes.write_par( + self, + iter0 = iter0, + **kwargs) + with h5py.File(self.get_database_file_name(), 'a') as ofile: + ofile.create_dataset( + 'rvelocity', + (1, + self.parameters['nz'], + self.parameters['ny'], + self.parameters['nx'], + 3), + chunks = (1, + self.parameters['nz'], + self.parameters['ny'], + self.parameters['nx'], + 3), + maxshape = (None, + self.parameters['nz'], + self.parameters['ny'], + self.parameters['nx'], + 3), + dtype = self.rtype) + return None + diff --git a/machine_settings_py.py b/machine_settings_py.py index 22123e391aa14151e2f1d4b4c8c0b5c8d6a1c435..787f1d5a10b9b0b260b42a1da18d35e67c56dacc 100644 --- a/machine_settings_py.py +++ b/machine_settings_py.py @@ -37,6 +37,7 @@ import os hostname = os.getenv('HOSTNAME') +compiler = 'g++' extra_compile_args = ['-Wall', '-O2', '-g', '-mtune=native', '-ffast-math', '-std=c++11'] extra_libraries = ['hdf5'] include_dirs = [] diff --git a/setup.py b/setup.py index c9bbc9c1d956d4d74d6344e19d1d220b1ff12b0b..1178f54781c647a3d49e4640a1e19b3092abf4c6 100644 --- a/setup.py +++ b/setup.py @@ -52,7 +52,7 @@ if not os.path.exists(os.path.join(bfpsfolder, 'host_information.py')): shutil.copyfile('./machine_settings_py.py', os.path.join(bfpsfolder, 'machine_settings.py')) sys.path.insert(0, bfpsfolder) # import stuff required for compilation of static library -from machine_settings import include_dirs, library_dirs, extra_compile_args, extra_libraries +from machine_settings import compiler, include_dirs, library_dirs, extra_compile_args, extra_libraries ### package versioning @@ -88,7 +88,20 @@ print('This is bfps version ' + VERSION) ### lists of files and MANIFEST.in -src_file_list = ['field', +src_file_list = ['hdf5_tools', + 'full_code/get_rfields', + 'full_code/NSVE_field_stats', + 'full_code/native_binary_to_hdf5', + 'full_code/postprocess', + 'full_code/code_base', + 'full_code/direct_numerical_simulation', + 'full_code/NSVE', + 'full_code/NSVEparticles', + 'field_binary_IO', + 'vorticity_equation', + 'field', + 'kspace', + 'field_layout', 'field_descriptor', 'rFFTW_distributed_particles', 'distributed_particles', @@ -99,7 +112,6 @@ src_file_list = ['field', 'interpolator_base', 'fluid_solver', 'fluid_solver_base', - 'io_tools', 'fftw_tools', 'spline_n1', 'spline_n2', @@ -107,11 +119,47 @@ src_file_list = ['field', 'spline_n4', 'spline_n5', 'spline_n6', - 'Lagrange_polys'] + 'spline_n7', + 'spline_n8', + 'spline_n9', + 'spline_n10', + 'Lagrange_polys', + 'scope_timer'] + +particle_headers = [ + 'cpp/particles/particles_distr_mpi.hpp', + 'cpp/particles/abstract_particles_input.hpp', + 'cpp/particles/abstract_particles_output.hpp', + 'cpp/particles/abstract_particles_system.hpp', + 'cpp/particles/alltoall_exchanger.hpp', + 'cpp/particles/particles_adams_bashforth.hpp', + 'cpp/particles/particles_field_computer.hpp', + 'cpp/particles/particles_input_hdf5.hpp', + 'cpp/particles/particles_generic_interp.hpp', + 'cpp/particles/particles_output_hdf5.hpp', + 'cpp/particles/particles_output_mpiio.hpp', + 'cpp/particles/particles_system_builder.hpp', + 'cpp/particles/particles_system.hpp', + 'cpp/particles/particles_utils.hpp', + 'cpp/particles/particles_output_sampling_hdf5.hpp', + 'cpp/particles/particles_sampling.hpp', + 'cpp/particles/env_utils.hpp'] + +full_code_headers = ['cpp/full_code/main_code.hpp', + 'cpp/full_code/codes_with_no_output.hpp', + 'cpp/full_code/NSVE_no_output.hpp', + 'cpp/full_code/NSVEparticles_no_output.hpp'] header_list = (['cpp/base.hpp'] + + ['cpp/fftw_interface.hpp'] + + ['cpp/bfps_timer.hpp'] + + ['cpp/omputils.hpp'] + + ['cpp/shared_array.hpp'] + + ['cpp/spline.hpp'] + ['cpp/' + fname + '.hpp' - for fname in src_file_list]) + for fname in src_file_list] + + particle_headers + + full_code_headers) with open('MANIFEST.in', 'w') as manifest_in_file: for fname in (['bfps/cpp/' + ff + '.cpp' for ff in src_file_list] + @@ -121,91 +169,106 @@ with open('MANIFEST.in', 'w') as manifest_in_file: ### libraries -libraries = ['fftw3_mpi', - 'fftw3', - 'fftw3f_mpi', - 'fftw3f'] -libraries += extra_libraries - - - -### save compiling information -pickle.dump( - {'include_dirs' : include_dirs, - 'library_dirs' : library_dirs, - 'extra_compile_args' : extra_compile_args, - 'libraries' : libraries, - 'install_date' : now, - 'VERSION' : VERSION, - 'git_revision' : git_revision}, - open('bfps/install_info.pickle', 'wb'), - protocol = 2) +libraries = extra_libraries +import distutils.cmd -def compile_bfps_library(): - if not os.path.isdir('obj'): - os.makedirs('obj') - need_to_compile = True - else: - ofile = 'bfps/libbfps.a' - libtime = datetime.datetime.fromtimestamp(os.path.getctime(ofile)) - latest = libtime - for fname in header_list: - latest = max(latest, - datetime.datetime.fromtimestamp(os.path.getctime('bfps/' + fname))) - need_to_compile = (latest > libtime) - for fname in src_file_list: - ifile = 'bfps/cpp/' + fname + '.cpp' - ofile = 'obj/' + fname + '.o' - if not os.path.exists(ofile): - need_to_compile_file = True - else: - need_to_compile_file = (need_to_compile or - (datetime.datetime.fromtimestamp(os.path.getctime(ofile)) < - datetime.datetime.fromtimestamp(os.path.getctime(ifile)))) - if need_to_compile_file: - command_strings = ['g++', '-c'] - command_strings += ['bfps/cpp/' + fname + '.cpp'] - command_strings += ['-o', 'obj/' + fname + '.o'] - command_strings += extra_compile_args - command_strings += ['-I' + idir for idir in include_dirs] - command_strings.append('-Ibfps/cpp/') - print(' '.join(command_strings)) - assert(subprocess.call(command_strings) == 0) - command_strings = ['ar', 'rvs', 'bfps/libbfps.a'] - command_strings += ['obj/' + fname + '.o' for fname in src_file_list] - print(' '.join(command_strings)) - assert(subprocess.call(command_strings) == 0) - return None - -from distutils.command.build import build as DistutilsBuild -from distutils.command.install import install as DistutilsInstall - -class CustomBuild(DistutilsBuild): +class CompileLibCommand(distutils.cmd.Command): + description = 'Compile bfps library.' + user_options = [ + ('timing-output=', None, 'Toggle timing output.'), + ('fftw-estimate=', None, 'Use FFTW ESTIMATE.'), + ('disable-fftw-omp=', None, 'Turn Off FFTW OpenMP.'), + ] + def initialize_options(self): + self.timing_output = 0 + self.fftw_estimate = 0 + self.disable_fftw_omp = 0 + return None + def finalize_options(self): + self.timing_output = (int(self.timing_output) == 1) + self.fftw_estimate = (int(self.fftw_estimate) == 1) + self.disable_fftw_omp = (int(self.disable_fftw_omp) == 1) + return None def run(self): - compile_bfps_library() - DistutilsBuild.run(self) + if not os.path.isdir('obj'): + os.makedirs('obj') + need_to_compile = True + if not os.path.isdir('obj/full_code'): + os.makedirs('obj/full_code') + need_to_compile = True + if not os.path.isfile('bfps/libbfps.a'): + need_to_compile = True + else: + ofile = 'bfps/libbfps.a' + libtime = datetime.datetime.fromtimestamp(os.path.getctime(ofile)) + latest = libtime + for fname in header_list: + latest = max(latest, + datetime.datetime.fromtimestamp(os.path.getctime('bfps/' + fname))) + need_to_compile = (latest > libtime) + eca = extra_compile_args + eca += ['-fPIC'] + if self.timing_output: + eca += ['-DUSE_TIMINGOUTPUT'] + if self.fftw_estimate: + eca += ['-DUSE_FFTWESTIMATE'] + if self.disable_fftw_omp: + eca += ['-DNO_FFTWOMP'] + for fname in src_file_list: + ifile = 'bfps/cpp/' + fname + '.cpp' + ofile = 'obj/' + fname + '.o' + if not os.path.exists(ofile): + need_to_compile_file = True + else: + need_to_compile_file = (need_to_compile or + (datetime.datetime.fromtimestamp(os.path.getctime(ofile)) < + datetime.datetime.fromtimestamp(os.path.getctime(ifile)))) + if need_to_compile_file: + command_strings = [compiler, '-c'] + command_strings += ['bfps/cpp/' + fname + '.cpp'] + command_strings += ['-o', 'obj/' + fname + '.o'] + command_strings += eca + command_strings += ['-I' + idir for idir in include_dirs] + command_strings.append('-Ibfps/cpp/') + print(' '.join(command_strings)) + subprocess.check_call(command_strings) + command_strings = ['ar', 'rvs', 'bfps/libbfps.a'] + command_strings += ['obj/' + fname + '.o' for fname in src_file_list] + print(' '.join(command_strings)) + subprocess.check_call(command_strings) -# this custom install leads to a broken installation. no idea why... -class CustomInstall(DistutilsInstall): - def run(self): - compile_bfps_library() - DistutilsInstall.run(self) + ### save compiling information + pickle.dump( + {'include_dirs' : include_dirs, + 'library_dirs' : library_dirs, + 'compiler' : compiler, + 'extra_compile_args' : eca, + 'libraries' : libraries, + 'install_date' : now, + 'VERSION' : VERSION, + 'git_revision' : git_revision}, + open('bfps/install_info.pickle', 'wb'), + protocol = 2) + return None from setuptools import setup setup( name = 'bfps', - packages = ['bfps'], + packages = ['bfps', 'bfps/test'], install_requires = ['numpy>=1.8', 'h5py>=2.2.1'], - cmdclass={'build' : CustomBuild}, - package_data = {'bfps': header_list + ['libbfps.a', - 'install_info.pickle']}, + cmdclass={'compile_library' : CompileLibCommand}, + package_data = {'bfps': header_list + + ['libbfps.a', + 'install_info.pickle'] + + ['test/B32p1e4_checkpoint_0.h5']}, entry_points = { 'console_scripts': [ 'bfps = bfps.__main__:main', - 'bfps1 = bfps.__main__:main'], + 'bfps1 = bfps.__main__:main', + 'bfps.test_NSVEparticles = bfps.test.test_bfps_NSVEparticles:main'], }, version = VERSION, ######################################################################## diff --git a/tests/DNS/test_against_old_code.py b/tests/DNS/test_against_old_code.py new file mode 100644 index 0000000000000000000000000000000000000000..861100f4a490ca683f09d699dfb9e867c8fcf108 --- /dev/null +++ b/tests/DNS/test_against_old_code.py @@ -0,0 +1,303 @@ +####################################################################### +# # +# Copyright 2015 Max Planck Institute # +# for Dynamics and Self-Organization # +# # +# This file is part of bfps. # +# # +# bfps is free software: you can redistribute it and/or modify # +# it under the terms of the GNU General Public License as published # +# by the Free Software Foundation, either version 3 of the License, # +# or (at your option) any later version. # +# # +# bfps is distributed in the hope that it will be useful, # +# but WITHOUT ANY WARRANTY; without even the implied warranty of # +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # +# GNU General Public License for more details. # +# # +# You should have received a copy of the GNU General Public License # +# along with bfps. If not, see <http://www.gnu.org/licenses/> # +# # +# Contact: Cristian.Lalescu@ds.mpg.de # +# # +####################################################################### + + + +import sys +import os +import numpy as np +import h5py +import argparse +import subprocess + +import bfps +import bfps.tools + +from bfps_addons import NSReader +import matplotlib.pyplot as plt + +def compare_moments( + c0, c1): + df0 = c0.get_data_file() + df1 = c1.get_data_file() + f = plt.figure(figsize=(6,10)) + a = f.add_subplot(211) + a.plot(df0['statistics/moments/vorticity'][:, 2, 3], + color = 'blue', + marker = '+') + a.plot(df1['statistics/moments/vorticity'][:, 2, 3], + color = 'red', + marker = 'x') + a = f.add_subplot(212) + a.plot(df0['statistics/moments/velocity'][:, 2, 3], + color = 'blue', + marker = '+') + a.plot(df1['statistics/moments/velocity'][:, 2, 3], + color = 'red', + marker = 'x') + f.tight_layout() + f.savefig('figs/moments.pdf') + return None + +def overlap_trajectories( + c0, c1): + """ + c0 is NSReader of NavierStokes data + c1 is NSReader of NSVorticityEquation data + """ + f = plt.figure(figsize = (6, 6)) + ntrajectories = 100 + + a = f.add_subplot(111) + pf = c0.get_particle_file() + a.scatter(pf['tracers0/state'][0, :ntrajectories, 0], + pf['tracers0/state'][0, :ntrajectories, 2], + marker = '+', + color = 'blue') + a.plot(pf['tracers0/state'][:, :ntrajectories, 0], + pf['tracers0/state'][:, :ntrajectories, 2]) + a.set_xlabel('$x$') + a.set_ylabel('$z$') + c0_initial_condition = pf['tracers0/state'][0, :ntrajectories] + pf.close() + + pf = h5py.File(c1.simname + '_checkpoint_0.h5', 'r') + state = [] + nsteps = len(pf['tracers0/state'].keys()) + for ss in range(nsteps): + state.append(pf['tracers0/state/{0}'.format( + ss*c1.parameters['niter_out'])][:ntrajectories]) + state = np.array(state) + c1_initial_condition = state[0, :] + a.scatter(state[0, :, 0], + state[0, :, 2], + marker = 'x', + color = 'red') + a.plot(state[:, :, 0], + state[:, :, 2], + dashes = (1, 1)) + a.set_xlabel('$x$') + a.set_ylabel('$z$') + f.tight_layout() + f.savefig('figs/trajectories.pdf') + + print('difference between initial conditions is {0}'.format( + np.max(np.abs(c0_initial_condition - c1_initial_condition)))) + return None + +def overlap_worst_trajectory( + c0, c1): + """ + c0 is NSReader of NavierStokes data + c1 is NSReader of NSVorticityEquation data + """ + + ntrajectories = 100 + pf0 = c0.get_particle_file() + state0 = pf0['tracers0/state'][:, :ntrajectories] + pf0.close() + + pf1 = h5py.File(c1.simname + '_checkpoint_0.h5', 'r') + state1 = [] + nsteps = len(pf1['tracers0/state'].keys()) + for ss in range(nsteps): + state1.append(pf1['tracers0/state/{0}'.format( + ss*c1.parameters['niter_out'])][:ntrajectories]) + state1 = np.array(state1) + pf1.close() + + diff = np.abs(state0 - state1) + bad_index = np.argmax(np.sum(diff[-1]**2, axis = 1)) + + f = plt.figure(figsize = (6, 10)) + + ax = f.add_subplot(311) + ay = f.add_subplot(312) + az = f.add_subplot(313) + ax.set_ylabel('$x$') + ax.set_xlabel('iteration') + ay.set_ylabel('$y$') + ay.set_xlabel('iteration') + az.set_ylabel('$z$') + az.set_xlabel('iteration') + + ax.plot(state0[:, bad_index, 0]) + ay.plot(state0[:, bad_index, 1]) + az.plot(state0[:, bad_index, 2]) + ax.plot(state1[:, bad_index, 0], dashes = (1, 1)) + ay.plot(state1[:, bad_index, 1], dashes = (1, 1)) + az.plot(state1[:, bad_index, 2], dashes = (1, 1)) + f.tight_layout() + f.savefig('figs/trajectories.pdf') + return None + +def get_maximum_trajectory_error( + c0, c1): + """ + c0 is NSReader of NavierStokes data + c1 is NSReader of NSVorticityEquation data + """ + + ntrajectories = 100 + pf0 = c0.get_particle_file() + state0 = pf0['tracers0/state'][:, :ntrajectories] + pf0.close() + + pf1 = h5py.File(c1.simname + '_checkpoint_0.h5', 'r') + state1 = [] + nsteps = len(pf1['tracers0/state'].keys()) + for ss in range(nsteps): + state1.append(pf1['tracers0/state/{0}'.format( + ss*c1.parameters['niter_out'])][:ntrajectories]) + state1 = np.array(state1) + pf1.close() + + diff = np.abs(state0 - state1) + max_distance = np.max(diff, axis = 1) + print(max_distance) + + #f = plt.figure(figsize = (6, 10)) + + #a = f.add_subplot(111) + #a.set_xlabel('iteration') + + #a.plot(max_distance[:, 0], label = '$x$ difference') + #a.plot(max_distance[:, 1], label = '$y$ difference') + #a.plot(max_distance[:, 2], label = '$z$ difference') + #a.legend(loc = 'best') + + #f.tight_layout() + #f.savefig('figs/trajectories.pdf') + return None + +def check_interpolation( + c0, c1, + nparticles = 2): + """ + c0 is NSReader of NavierStokes data + c1 is NSReader of NSVorticityEquation data + """ + f = plt.figure(figsize = (6, 10)) + + a = f.add_subplot(211) + pf = c0.get_particle_file() + x0 = pf['tracers0/state'][0, :, 0] + y0 = pf['tracers0/state'][0, :, 2] + v0 = np.sum( + pf['tracers0/rhs'][1, 1]**2, + axis = 1)**.5 + a.scatter( + x0, y0, + c = v0, + vmin = v0.min(), + vmax = v0.max(), + edgecolors = 'none', + s = 5., + cmap = plt.get_cmap('magma')) + a.set_xlabel('$x$') + a.set_ylabel('$z$') + pf.close() + + a = f.add_subplot(212) + pf = h5py.File(c1.simname + '_checkpoint_0.h5', 'r') + state = pf['tracers0/state/0'] + x1 = state[:, 0] + y1 = state[:, 2] + v1 = np.sum( + pf['tracers0/rhs/1'][1]**2, + axis = 1)**.5 + # using v0 for colors on purpose, because we want the velocity to be the same, + # so v1.min() should be equal to v0.min() etc. + a.scatter( + x1, y1, + c = v1, + vmin = v0.min(), + vmax = v0.max(), + edgecolors = 'none', + s = 5., + cmap = plt.get_cmap('magma')) + a.set_xlabel('$x$') + a.set_ylabel('$z$') + f.tight_layout() + f.savefig('figs/trajectories.pdf') + return None + +def main(): + niterations = 32 + particle_initial_condition = None + nparticles = 10000 + run_NS = False + run_NSVE = False + c = bfps.NavierStokes(simname = 'fluid_solver') + if run_NS: + run_NSVE = True + subprocess.call('rm *fluid_solver* NavierStokes*', shell = True) + c.launch( + ['-n', '32', + '--simname', 'fluid_solver', + '--ncpu', '4', + '--niter_todo', '{0}'.format(niterations), + '--niter_out', '{0}'.format(niterations), + '--niter_stat', '1', + '--nparticles', '{0}'.format(nparticles), + '--particle-rand-seed', '2', + '--niter_part', '{0}'.format(niterations), + '--njobs', '2', + '--wd', './'] + + sys.argv[1:], + particle_initial_condition = particle_initial_condition) + subprocess.call('cat err_file_fluid_solver_0', shell = True) + subprocess.call('rm *vorticity_equation* NSVE*', shell = True) + if run_NSVE: + data = c.read_cfield(iteration = 0) + f = h5py.File('vorticity_equation_checkpoint_0.h5', 'w') + f['vorticity/complex/0'] = data + f.close() + c = bfps.DNS() + c.launch( + ['NSVEparticles', + '-n', '32', + '--simname', 'vorticity_equation', + '--np', '4', + '--ntpp', '1', + '--niter_todo', '{0}'.format(niterations), + '--niter_out', '{0}'.format(niterations), + '--niter_stat', '1', + '--checkpoints_per_file', '{0}'.format(3), + '--nparticles', '{0}'.format(nparticles), + '--particle-rand-seed', '2', + '--njobs', '2', + '--wd', './'] + + sys.argv[1:], + particle_initial_condition = particle_initial_condition) + subprocess.call('cat err_file_vorticity_equation_0', shell = True) + c0 = NSReader(simname = 'fluid_solver') + c1 = NSReader(simname = 'vorticity_equation') + get_maximum_trajectory_error(c0, c1) + overlap_worst_trajectory(c0, c1) + return None + +if __name__ == '__main__': + main() + diff --git a/tests/DNS/test_convergence.py b/tests/DNS/test_convergence.py new file mode 100644 index 0000000000000000000000000000000000000000..056ee3ca7c3936c15e7cf58bcc161d86c7449bb9 --- /dev/null +++ b/tests/DNS/test_convergence.py @@ -0,0 +1,163 @@ +####################################################################### +# # +# Copyright 2015 Max Planck Institute # +# for Dynamics and Self-Organization # +# # +# This file is part of bfps. # +# # +# bfps is free software: you can redistribute it and/or modify # +# it under the terms of the GNU General Public License as published # +# by the Free Software Foundation, either version 3 of the License, # +# or (at your option) any later version. # +# # +# bfps is distributed in the hope that it will be useful, # +# but WITHOUT ANY WARRANTY; without even the implied warranty of # +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # +# GNU General Public License for more details. # +# # +# You should have received a copy of the GNU General Public License # +# along with bfps. If not, see <http://www.gnu.org/licenses/> # +# # +# Contact: Cristian.Lalescu@ds.mpg.de # +# # +####################################################################### + + + +from bfps.DNS import DNS +import numpy as np +import h5py +import matplotlib.pyplot as plt + +def main_fluid( + launch = True): + niterations = 8 + divisions_to_make = 3 + if launch: + c = [DNS() for i in range(divisions_to_make)] + c[0].launch( + ['NSVE', + '-n', '32', + '--simname', 'div0', + '--np', '2', + '--ntpp', '2', + '--niter_todo', '{0}'.format(niterations), + '--niter_out', '{0}'.format(niterations), + '--niter_stat', '1', + '--wd', './']) + for div in range(1, divisions_to_make): + c[div].launch( + ['NSVE', + '-n', '{0}'.format(32*2**div), + '--simname', 'div{0}'.format(div), + '--src-simname', 'div0', + '--np', '2', + '--ntpp', '2', + '--niter_todo', '{0}'.format(niterations * 2**div), + '--niter_out', '{0}'.format(niterations * 2**div), + '--niter_stat', '{0}'.format(2**div), + '--wd', './']) + # dumbest test + # just look at moments of a field, check that they converge + field = 'velocity' + flist = [h5py.File('div{0}.h5'.format(div), 'r') + for div in range(divisions_to_make)] + err = [(np.abs(flist[div]['/statistics/moments/vorticity'][:, :, 3] - + flist[div-1]['/statistics/moments/vorticity'][:, :, 3]) / + np.abs(flist[div]['/statistics/moments/vorticity'][:, :, 3])) + for div in range(1, divisions_to_make)] + f = plt.figure() + a = f.add_subplot(111) + a.plot(err[0][:, 9]) + a.plot(err[1][:, 9]) + a.set_yscale('log') + f.tight_layout() + f.savefig('moments.pdf') + plt.close(f) + # look at common Fourier amplitudes + flist = [h5py.File('div{0}_checkpoint_0.h5'.format(div), 'r') + for div in range(divisions_to_make)] + err = [] + dt = [] + for div in range(1, divisions_to_make): + n = int(32*2**(div-1)) + f0 = flist[div]['/vorticity/complex/{0}'.format(niterations*2**div)][:n//4,:n//4, :n//4] + f1 = flist[div-1]['/vorticity/complex/{0}'.format(niterations*2**(div-1))][:n//4,:n//4, :n//4] + good_indices = np.where(np.abs(f0) > 0) + err.append(np.mean(np.abs((f0 - f1)[good_indices]) / np.abs(f0[good_indices]))) + dt.append(h5py.File('div{0}.h5'.format(div-1), 'r')['parameters/dt'].value) + err = np.array(err) + dt = np.array(dt) + f = plt.figure() + a = f.add_subplot(111) + a.plot(dt, err) + a.plot(dt, dt) + a.set_yscale('log') + a.set_xscale('log') + f.tight_layout() + f.savefig('wavenumber_evdt.pdf') + plt.close(f) + return None + +def main_particles( + launch = False): + niterations = 8 + divisions_to_make = 3 + nparticles = int(1e5) + if launch: + c = [DNS() for i in range(divisions_to_make)] + c[0].launch( + ['NSVEparticles', + '-n', '32', + '--simname', 'div0', + '--np', '2', + '--ntpp', '2', + '--niter_todo', '{0}'.format(niterations), + '--niter_out', '{0}'.format(niterations), + '--niter_stat', '1', + '--nparticles', '{0}'.format(nparticles), + '--particle-rand-seed', '13', + '--wd', './']) + for div in range(1, divisions_to_make): + c[div].launch( + ['NSVEparticles', + '-n', '{0}'.format(32*2**div), + '--simname', 'div{0}'.format(div), + '--src-simname', 'div0', + '--np', '2', + '--ntpp', '2', + '--niter_todo', '{0}'.format(niterations * 2**div), + '--niter_out', '{0}'.format(niterations * 2**div), + '--niter_stat', '{0}'.format(2**div), + '--nparticles', '{0}'.format(nparticles), + '--particle-rand-seed', '13', + '--wd', './']) + + # check distance between particles + flist = [h5py.File('div{0}_checkpoint_0.h5'.format(div), 'r') + for div in range(divisions_to_make)] + err = [] + dt = [] + for div in range(1, divisions_to_make): + n = int(32*2**(div-1)) + p0 = flist[div]['/tracers0/state/{0}'.format(niterations*2**div)][:] + p1 = flist[div-1]['/tracers0/state/{0}'.format(niterations*2**(div-1))][:] + err.append(np.mean( + np.sum((p0 - p1)**2, axis = 1)**.5 / np.sum((p0)**2, axis = 1)**.5)) + dt.append(h5py.File('div{0}.h5'.format(div-1), 'r')['parameters/dt'].value) + err = np.array(err) + dt = np.array(dt) + f = plt.figure() + a = f.add_subplot(111) + a.plot(dt, err, marker = '.') + a.plot(dt, 2*1e4*dt**4, dashes = (1, 1), color = 'black') + a.set_yscale('log') + a.set_xscale('log') + f.tight_layout() + f.savefig('particle_position_evdt.pdf') + plt.close(f) + return None + +if __name__ == '__main__': + main_particles() + diff --git a/tests/DNS/test_scaling.py b/tests/DNS/test_scaling.py new file mode 100644 index 0000000000000000000000000000000000000000..1d4b12a5e3eb4aa322d68ba276437d1a641f7eae --- /dev/null +++ b/tests/DNS/test_scaling.py @@ -0,0 +1,132 @@ +import sys +import numpy as np +import argparse +import os + +import bfps + +def get_DNS_parameters( + DNS_type = 'A', + N = 512, + nnodes = 1, + nprocesses = 1, + output_on = False, + cores_per_node = 16, + nparticles = int(1e5)): + simname = (DNS_type + '{0:0>4d}'.format(N)) + if output_on: + simname = DNS_type + simname + class_name = 'NSVE' + if DNS_type != 'A': + simname += 'p{0}e{1}'.format( + int(nparticles / 10**np.log10(nparticles)), + int(np.log10(nparticles))) + class_name += 'particles' + work_dir = 'nn{0:0>4d}np{1}'.format(nnodes, nprocesses) + if not output_on: + class_name += '_no_output' + src_simname = 'N{0:0>4d}_kMeta2'.format(N) + src_iteration = -1 + if N == 512: + src_iteration = 3072 + if N == 1024: + src_iteration = 0x4000 + if N == 2048: + src_iteration = 0x6000 + if N == 4096: + src_iteration = 0 + DNS_parameters = [ + class_name, + '-n', '{0}'.format(N), + '--np', '{0}'.format(nnodes*nprocesses), + '--ntpp', '{0}'.format(cores_per_node // nprocesses), + '--simname', simname, + '--wd', work_dir, + '--niter_todo', '12', + '--niter_out', '12', + '--niter_stat', '3'] + if src_iteration >= 0: + DNS_parameters += [ + '--src-wd', 'database', + '--src-simname', src_simname, + '--src-iteration', '{0}'.format(src_iteration)] + if DNS_type != 'A': + DNS_parameters += [ + '--nparticles', '{0}'.format(nparticles)] + nneighbours = np.where(np.array( + ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K']) == DNS_type)[0][0] + if nneighbours < 3: + smoothness = 1 + else: + smoothness = 2 + DNS_parameters += [ + '--tracers0_neighbours', '{0}'.format(nneighbours), + '--tracers0_smoothness', '{0}'.format(smoothness), + '--particle-rand-seed', '2'] + return simname, work_dir, DNS_parameters + +def main(): + #DNS_type = 'A', + #N = 512, + #nnodes = 1, + #nprocesses = 1, + #output_on = False, + #cores_per_node = 16, + #nparticles = 1e5) + parser = argparse.ArgumentParser(prog = 'launcher') + parser.add_argument( + 'DNS_setup', + choices = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K'], + type = str) + parser.add_argument( + '-n', + type = int, + dest = 'n', + default = 32) + parser.add_argument( + '--nnodes', + type = int, + dest = 'nnodes', + default = 1) + parser.add_argument( + '--nprocesses', + type = int, + dest = 'nprocesses', + default = 1) + parser.add_argument( + '--ncores', + type = int, + dest = 'ncores', + default = 4) + parser.add_argument( + '--output-on', + action = 'store_true', + dest = 'output_on') + parser.add_argument( + '--nparticles', + type = int, + dest = 'nparticles', + default = int(1e5)) + opt = parser.parse_args(sys.argv[1:]) + simname, work_dir, params = get_DNS_parameters( + DNS_type = opt.DNS_setup, + N = opt.n, + nnodes = opt.nnodes, + nprocesses = opt.nprocesses, + output_on = opt.output_on, + nparticles = opt.nparticles, + cores_per_node = opt.ncores) + print(work_dir + '/' + simname) + print(' '.join(params)) + # these following 2 lines actually launch something + # I'm not passing anything from sys.argv since we don't want to get + # parameter conflicts after the simname and work_dir have been decided + if not os.path.exists(work_dir): + os.makedirs(work_dir) + c = bfps.DNS() + c.launch(params) + return None + +if __name__ == '__main__': + main() + diff --git a/tests/ci-scripts/test.sh b/tests/ci-scripts/test.sh new file mode 100644 index 0000000000000000000000000000000000000000..ddde2489e431412c260752f800640812ead91167 --- /dev/null +++ b/tests/ci-scripts/test.sh @@ -0,0 +1,45 @@ +#!/bin/bash + +# print command +set -x +# stops when fails +set -e + +# Init +export destdir=$(pwd)"/ci-installdir" +export pythonbin=/home/ubuntu/anaconda3/bin/python3 +export bfpspythonpath=$destdir/lib/python3.6/site-packages/ +export PYTHONPATH=:$bfpspythonpath$PYTHONPATH +export PATH=$destdir/bin/:/home/ubuntu/hdf5/install/bin/:$PATH +export LD_LIBRARY_PATH=/home/ubuntu/hdf5/install/lib/:/home/ubuntu/fftw/install/lib/ + +echo "destdir = $destdir" +echo "pythonbin = $pythonbin" +echo "bfpspythonpath = $bfpspythonpath" + +# Remove possible previous installation +if [[ -d $destdir ]] ; then + rm -rf $destdir ; +fi + +# Create install path +if [[ ! -d $bfpspythonpath ]] ; then + mkdir -p $bfpspythonpath ; +fi + +# Build +$pythonbin setup.py compile_library --timing-output 1 +# Install +$pythonbin setup.py install --prefix=$destdir + +# Test +ls $destdir +ls $destdir/bin/ + +$pythonbin $destdir/bin/bfps.test_NSVEparticles + +# Clean +if [[ -d $destdir ]] ; then + rm -rf $destdir ; +fi + diff --git a/tests/test_field_class.py b/tests/test_field_class.py index fc52f419a5ab2dd7a5231676c41b9d586d497080..110d9be685ef42d4ed231a3a3c723ac34e3d916d 100644 --- a/tests/test_field_class.py +++ b/tests/test_field_class.py @@ -32,32 +32,37 @@ class TestField(_fluid_particle_base): self.fluid_includes += '#include "fftw_tools.hpp"\n' self.fluid_includes += '#include "field.hpp"\n' self.fluid_variables += ('field<' + self.C_dtype + ', FFTW, ONE> *f;\n' + + 'field<' + self.C_dtype + ', FFTW, THREE> *v;\n' + 'kspace<FFTW, SMOOTH> *kk;\n') self.fluid_start += """ //begincpp f = new field<{0}, FFTW, ONE>( nx, ny, nz, MPI_COMM_WORLD); + v = new field<{0}, FFTW, THREE>( + nx, ny, nz, MPI_COMM_WORLD); kk = new kspace<FFTW, SMOOTH>( f->clayout, 1., 1., 1.); // read rdata - f->io("field.h5", "rdata", 0, true); + f->real_space_representation = true; + f->io("field.h5", "scal", 0, true); // go to fourier space, write into cdata_tmp f->dft(); - f->io("field.h5", "cdata_tmp", 0, false); + f->io("field.h5", "scal_tmp", 0, false); f->ift(); - f->io("field.h5", "rdata", 0, false); - f->io("field.h5", "cdata", 0, true); + f->io("field.h5", "scal", 0, false); + f->real_space_representation = false; + f->io("field.h5", "scal", 0, true); hid_t gg; if (f->myrank == 0) gg = H5Fopen("field.h5", H5F_ACC_RDWR, H5P_DEFAULT); kk->cospectrum<float, ONE>( - f->get_rdata(), - f->get_rdata(), + f->get_cdata(), + f->get_cdata(), gg, "scal", 0); f->ift(); - f->io("field.h5", "rdata_tmp", 0, false); + f->io("field.h5", "scal_tmp", 0, false); std::vector<double> me; me.resize(1); me[0] = 30; @@ -66,11 +71,15 @@ class TestField(_fluid_particle_base): 0, me); if (f->myrank == 0) H5Fclose(gg); + v->real_space_representation = false; + v->io("field.h5", "vec", 0, true); + v->io("field.h5", "vec_tmp", 0, false); //endcpp """.format(self.C_dtype) self.fluid_end += """ //begincpp delete f; + delete v; //endcpp """ return None @@ -92,7 +101,7 @@ class TestField(_fluid_particle_base): return None def main(): - n = 128 + n = 32 kdata = pyfftw.n_byte_align_empty( (n, n, n//2 + 1), pyfftw.simd_alignment, @@ -116,10 +125,10 @@ def main(): tf.parameters['ny'] = n tf.parameters['nz'] = n f = h5py.File('field.h5', 'w') - f['cdata'] = cdata.reshape((1,) + cdata.shape) - f['cdata_tmp'] = np.zeros(shape=(1,) + cdata.shape).astype(cdata.dtype) - f['rdata'] = rdata.reshape((1,) + rdata.shape) - f['rdata_tmp'] = np.zeros(shape=(1,) + rdata.shape).astype(rdata.dtype) + f['scal/complex/0'] = cdata + f['scal/real/0'] = rdata + f['vec/complex/0'] = np.array([cdata, cdata, cdata]).reshape(cdata.shape + (3,)) + f['vec/real/0'] = np.array([rdata, rdata, rdata]).reshape(rdata.shape + (3,)) f['moments/scal'] = np.zeros(shape = (1, 10)).astype(np.float) f['histograms/scal'] = np.zeros(shape = (1, 64)).astype(np.float) kspace = tf.get_kspace() @@ -133,35 +142,60 @@ def main(): '--ncpu', '2']) f = h5py.File('field.h5', 'r') - err0 = np.max(np.abs(f['rdata_tmp'][0] - rdata)) / np.mean(np.abs(rdata)) - err1 = np.max(np.abs(f['rdata'][0]/(n**3) - rdata)) / np.mean(np.abs(rdata)) - err2 = np.max(np.abs(f['cdata_tmp'][0]/(n**3) - cdata)) / np.mean(np.abs(cdata)) - print(err0, err1, err2) - assert(err0 < 1e-5) - assert(err1 < 1e-5) - assert(err2 < 1e-4) - ### compare - #fig = plt.figure(figsize=(12, 6)) - #a = fig.add_subplot(121) - #a.set_axis_off() - #a.imshow(rdata[0, :, :], interpolation = 'none') - #a = fig.add_subplot(122) - #a.set_axis_off() - #a.imshow(f['rdata_tmp'][0, 0, :, :], interpolation = 'none') + #err0 = np.max(np.abs(f['scal_tmp/real/0'].value - rdata)) / np.mean(np.abs(rdata)) + #err1 = np.max(np.abs(f['scal/real/0'].value/(n**3) - rdata)) / np.mean(np.abs(rdata)) + #err2 = np.max(np.abs(f['scal_tmp/complex/0'].value/(n**3) - cdata)) / np.mean(np.abs(cdata)) + #print(err0, err1, err2) + #assert(err0 < 1e-5) + #assert(err1 < 1e-5) + #assert(err2 < 1e-4) + ## compare + fig = plt.figure(figsize=(18, 6)) + a = fig.add_subplot(131) + a.set_axis_off() + v0 = f['vec/complex/0'][:, :, 0, 0] + v1 = f['vec_tmp/complex/0'][:, :, 0, 0] + a.imshow(np.log(np.abs(v0 - v1)), + interpolation = 'none') + a = fig.add_subplot(132) + a.set_axis_off() + a.imshow(np.log(np.abs(v0)), + interpolation = 'none') + a = fig.add_subplot(133) + a.set_axis_off() + a.imshow(np.log(np.abs(v1)), + interpolation = 'none') + fig.tight_layout() + fig.savefig('tst_fields.pdf') + fig = plt.figure(figsize=(18, 6)) + a = fig.add_subplot(131) + a.set_axis_off() + v0 = f['scal/complex/0'][:, :, 0] + v1 = f['scal_tmp/complex/0'][:, :, 0] + a.imshow(np.log(np.abs(v0 - v1)), + interpolation = 'none') + a = fig.add_subplot(132) + a.set_axis_off() + a.imshow(np.log(np.abs(v0)), + interpolation = 'none') + a = fig.add_subplot(133) + a.set_axis_off() + a.imshow(np.log(np.abs(v1)), + interpolation = 'none') + fig.tight_layout() + fig.savefig('tst_sfields.pdf') + # look at moments and histogram + #print('moments are ', f['moments/scal'][0]) + #fig = plt.figure(figsize=(6,6)) + #a = fig.add_subplot(211) + #a.plot(f['histograms/scal'][0]) + #a.set_yscale('log') + #a = fig.add_subplot(212) + #a.plot(f['spectra/scal'][0]) + #a.set_xscale('log') + #a.set_yscale('log') #fig.tight_layout() #fig.savefig('tst.pdf') - # look at moments and histogram - print('moments are ', f['moments/scal'][0]) - fig = plt.figure(figsize=(6,6)) - a = fig.add_subplot(211) - a.plot(f['histograms/scal'][0]) - a.set_yscale('log') - a = fig.add_subplot(212) - a.plot(f['spectra/scal'][0]) - a.set_xscale('log') - a.set_yscale('log') - fig.tight_layout() - fig.savefig('tst.pdf') return None if __name__ == '__main__': diff --git a/tests/test_io.py b/tests/test_io.py index ce825c808785266c5199149aac5a4ab481ffedc2..624d357b0950eb8c3ae18c1f4a9ae7f47f45b0f8 100644 --- a/tests/test_io.py +++ b/tests/test_io.py @@ -54,6 +54,6 @@ if __name__ == '__main__': c = test_io(work_dir = opt.work_dir + '/io') c.write_src() c.write_par() - c.set_host_info({'type' : 'pc'}) - c.run(ncpu = opt.ncpu) + c.set_host_info(bfps.host_info) + c.run(opt.ncpu, 1) diff --git a/tests/test_io_00.py b/tests/test_io_00.py new file mode 100644 index 0000000000000000000000000000000000000000..f558cb8c6fc87be0518a7f63b4fadb0f06acd293 --- /dev/null +++ b/tests/test_io_00.py @@ -0,0 +1,37 @@ +####################################################################### +# # +# Copyright 2015 Max Planck Institute # +# for Dynamics and Self-Organization # +# # +# This file is part of bfps. # +# # +# bfps is free software: you can redistribute it and/or modify # +# it under the terms of the GNU General Public License as published # +# by the Free Software Foundation, either version 3 of the License, # +# or (at your option) any later version. # +# # +# bfps is distributed in the hope that it will be useful, # +# but WITHOUT ANY WARRANTY; without even the implied warranty of # +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # +# GNU General Public License for more details. # +# # +# You should have received a copy of the GNU General Public License # +# along with bfps. If not, see <http://www.gnu.org/licenses/> # +# # +# Contact: Cristian.Lalescu@ds.mpg.de # +# # +####################################################################### + + + +from test_io import * + +if __name__ == '__main__': + opt = parser.parse_args( + ['-n', '32', + '--ncpu', '2'] + + sys.argv[1:]) + print('about to create test_io object') + c = test_io(work_dir = opt.work_dir + '/io') + print('congratulations, test_io object was created') + diff --git a/tests/test_io_01_write.py b/tests/test_io_01_write.py new file mode 100644 index 0000000000000000000000000000000000000000..d3876da168d55cc3c44b86f08fde653b61aa4301 --- /dev/null +++ b/tests/test_io_01_write.py @@ -0,0 +1,37 @@ +####################################################################### +# # +# Copyright 2015 Max Planck Institute # +# for Dynamics and Self-Organization # +# # +# This file is part of bfps. # +# # +# bfps is free software: you can redistribute it and/or modify # +# it under the terms of the GNU General Public License as published # +# by the Free Software Foundation, either version 3 of the License, # +# or (at your option) any later version. # +# # +# bfps is distributed in the hope that it will be useful, # +# but WITHOUT ANY WARRANTY; without even the implied warranty of # +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # +# GNU General Public License for more details. # +# # +# You should have received a copy of the GNU General Public License # +# along with bfps. If not, see <http://www.gnu.org/licenses/> # +# # +# Contact: Cristian.Lalescu@ds.mpg.de # +# # +####################################################################### + + + +from test_io import * + +if __name__ == '__main__': + opt = parser.parse_args( + ['-n', '32', + '--ncpu', '2'] + + sys.argv[1:]) + c = test_io(work_dir = opt.work_dir + '/io') + c.write_src() + c.write_par() + diff --git a/tests/test_io_02_compile.py b/tests/test_io_02_compile.py new file mode 100644 index 0000000000000000000000000000000000000000..5db5cba3520a5c9b28015d5099e4afb7ecd9ebf3 --- /dev/null +++ b/tests/test_io_02_compile.py @@ -0,0 +1,39 @@ +####################################################################### +# # +# Copyright 2015 Max Planck Institute # +# for Dynamics and Self-Organization # +# # +# This file is part of bfps. # +# # +# bfps is free software: you can redistribute it and/or modify # +# it under the terms of the GNU General Public License as published # +# by the Free Software Foundation, either version 3 of the License, # +# or (at your option) any later version. # +# # +# bfps is distributed in the hope that it will be useful, # +# but WITHOUT ANY WARRANTY; without even the implied warranty of # +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # +# GNU General Public License for more details. # +# # +# You should have received a copy of the GNU General Public License # +# along with bfps. If not, see <http://www.gnu.org/licenses/> # +# # +# Contact: Cristian.Lalescu@ds.mpg.de # +# # +####################################################################### + + + +from test_io import * + +if __name__ == '__main__': + opt = parser.parse_args( + ['-n', '32', + '--ncpu', '2'] + + sys.argv[1:]) + c = test_io(work_dir = opt.work_dir + '/io') + c.write_src() + c.write_par() + c.set_host_info(bfps.host_info) + c.compile_code() + diff --git a/tests/test_io_03_run.py b/tests/test_io_03_run.py new file mode 100644 index 0000000000000000000000000000000000000000..a789ac66fd99d8e5525ce69b1e861f609d969212 --- /dev/null +++ b/tests/test_io_03_run.py @@ -0,0 +1,39 @@ +####################################################################### +# # +# Copyright 2015 Max Planck Institute # +# for Dynamics and Self-Organization # +# # +# This file is part of bfps. # +# # +# bfps is free software: you can redistribute it and/or modify # +# it under the terms of the GNU General Public License as published # +# by the Free Software Foundation, either version 3 of the License, # +# or (at your option) any later version. # +# # +# bfps is distributed in the hope that it will be useful, # +# but WITHOUT ANY WARRANTY; without even the implied warranty of # +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # +# GNU General Public License for more details. # +# # +# You should have received a copy of the GNU General Public License # +# along with bfps. If not, see <http://www.gnu.org/licenses/> # +# # +# Contact: Cristian.Lalescu@ds.mpg.de # +# # +####################################################################### + + + +from test_io import * + +if __name__ == '__main__': + opt = parser.parse_args( + ['-n', '32', + '--ncpu', '2'] + + sys.argv[1:]) + c = test_io(work_dir = opt.work_dir + '/io') + c.write_src() + c.write_par() + c.set_host_info(bfps.host_info) + c.run() + diff --git a/tests/test_vorticity_equation.py b/tests/test_vorticity_equation.py new file mode 100644 index 0000000000000000000000000000000000000000..dfaccb8bf352bdd252e5edf29f6e7d711689f7dc --- /dev/null +++ b/tests/test_vorticity_equation.py @@ -0,0 +1,329 @@ +####################################################################### +# # +# Copyright 2015 Max Planck Institute # +# for Dynamics and Self-Organization # +# # +# This file is part of bfps. # +# # +# bfps is free software: you can redistribute it and/or modify # +# it under the terms of the GNU General Public License as published # +# by the Free Software Foundation, either version 3 of the License, # +# or (at your option) any later version. # +# # +# bfps is distributed in the hope that it will be useful, # +# but WITHOUT ANY WARRANTY; without even the implied warranty of # +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # +# GNU General Public License for more details. # +# # +# You should have received a copy of the GNU General Public License # +# along with bfps. If not, see <http://www.gnu.org/licenses/> # +# # +# Contact: Cristian.Lalescu@ds.mpg.de # +# # +####################################################################### + + + +import sys +import os +import numpy as np +import h5py +import argparse +import subprocess + +import bfps +import bfps.tools + +from bfps_addons import NSReader +import matplotlib.pyplot as plt + +def compare_moments( + c0, c1): + df0 = c0.get_data_file() + df1 = c1.get_data_file() + f = plt.figure(figsize=(6,10)) + a = f.add_subplot(211) + a.plot(df0['statistics/moments/vorticity'][:, 2, 3], + color = 'blue', + marker = '+') + a.plot(df1['statistics/moments/vorticity'][:, 2, 3], + color = 'red', + marker = 'x') + a = f.add_subplot(212) + a.plot(df0['statistics/moments/velocity'][:, 2, 3], + color = 'blue', + marker = '+') + a.plot(df1['statistics/moments/velocity'][:, 2, 3], + color = 'red', + marker = 'x') + f.tight_layout() + f.savefig('figs/moments.pdf') + return None + +def overlap_trajectories( + c0, c1): + """ + c0 is NSReader of NavierStokes data + c1 is NSReader of NSVorticityEquation data + """ + f = plt.figure(figsize = (6, 6)) + ntrajectories = 100 + + a = f.add_subplot(111) + pf = c0.get_particle_file() + a.scatter(pf['tracers0/state'][0, :ntrajectories, 0], + pf['tracers0/state'][0, :ntrajectories, 2], + marker = '+', + color = 'blue') + a.plot(pf['tracers0/state'][:, :ntrajectories, 0], + pf['tracers0/state'][:, :ntrajectories, 2]) + a.set_xlabel('$x$') + a.set_ylabel('$z$') + c0_initial_condition = pf['tracers0/state'][0, :ntrajectories] + pf.close() + + pf = h5py.File(c1.simname + '_checkpoint_0.h5', 'r') + state = [] + nsteps = len(pf['tracers0/state'].keys()) + for ss in range(nsteps): + state.append(pf['tracers0/state/{0}'.format( + ss*c1.parameters['niter_out'])][:ntrajectories]) + state = np.array(state) + c1_initial_condition = state[0, :] + a.scatter(state[0, :, 0], + state[0, :, 2], + marker = 'x', + color = 'red') + a.plot(state[:, :, 0], + state[:, :, 2], + dashes = (1, 1)) + a.set_xlabel('$x$') + a.set_ylabel('$z$') + f.tight_layout() + f.savefig('figs/trajectories.pdf') + + print('difference between initial conditions is {0}'.format( + np.max(np.abs(c0_initial_condition - c1_initial_condition)))) + return None + +def overlap_worst_trajectory( + c0, c1): + """ + c0 is NSReader of NavierStokes data + c1 is NSReader of NSVorticityEquation data + """ + + ntrajectories = 100 + pf0 = c0.get_particle_file() + state0 = pf0['tracers0/state'][:, :ntrajectories] + pf0.close() + + pf1 = h5py.File(c1.simname + '_checkpoint_0.h5', 'r') + state1 = [] + nsteps = len(pf1['tracers0/state'].keys()) + for ss in range(nsteps): + state1.append(pf1['tracers0/state/{0}'.format( + ss*c1.parameters['niter_out'])][:ntrajectories]) + state1 = np.array(state1) + pf1.close() + + diff = np.abs(state0 - state1) + bad_index = np.argmax(np.sum(diff[-1]**2, axis = 1)) + + f = plt.figure(figsize = (6, 10)) + + ax = f.add_subplot(311) + ay = f.add_subplot(312) + az = f.add_subplot(313) + ax.set_ylabel('$x$') + ax.set_xlabel('iteration') + ay.set_ylabel('$y$') + ay.set_xlabel('iteration') + az.set_ylabel('$z$') + az.set_xlabel('iteration') + + ax.plot(state0[:, bad_index, 0]) + ay.plot(state0[:, bad_index, 1]) + az.plot(state0[:, bad_index, 2]) + ax.plot(state1[:, bad_index, 0], dashes = (1, 1)) + ay.plot(state1[:, bad_index, 1], dashes = (1, 1)) + az.plot(state1[:, bad_index, 2], dashes = (1, 1)) + f.tight_layout() + f.savefig('figs/trajectories.pdf') + return None + +def get_maximum_trajectory_error( + c0, c1): + """ + c0 is NSReader of NavierStokes data + c1 is NSReader of NSVorticityEquation data + """ + + ntrajectories = 100 + pf0 = c0.get_particle_file() + state0 = pf0['tracers0/state'][:, :ntrajectories] + pf0.close() + + pf1 = h5py.File(c1.simname + '_checkpoint_0.h5', 'r') + state1 = [] + nsteps = len(pf1['tracers0/state'].keys()) + for ss in range(nsteps): + state1.append(pf1['tracers0/state/{0}'.format( + ss*c1.parameters['niter_out'])][:ntrajectories]) + state1 = np.array(state1) + pf1.close() + + diff = np.abs(state0 - state1) + max_distance = np.max(diff, axis = 1) + print(max_distance) + + #f = plt.figure(figsize = (6, 10)) + + #a = f.add_subplot(111) + #a.set_xlabel('iteration') + + #a.plot(max_distance[:, 0], label = '$x$ difference') + #a.plot(max_distance[:, 1], label = '$y$ difference') + #a.plot(max_distance[:, 2], label = '$z$ difference') + #a.legend(loc = 'best') + + #f.tight_layout() + #f.savefig('figs/trajectories.pdf') + return None + +def check_interpolation( + c0, c1, + nparticles = 2): + """ + c0 is NSReader of NavierStokes data + c1 is NSReader of NSVorticityEquation data + """ + f = plt.figure(figsize = (6, 10)) + + a = f.add_subplot(211) + pf = c0.get_particle_file() + x0 = pf['tracers0/state'][0, :, 0] + y0 = pf['tracers0/state'][0, :, 2] + v0 = np.sum( + pf['tracers0/rhs'][1, 1]**2, + axis = 1)**.5 + a.scatter( + x0, y0, + c = v0, + vmin = v0.min(), + vmax = v0.max(), + edgecolors = 'none', + s = 5., + cmap = plt.get_cmap('magma')) + a.set_xlabel('$x$') + a.set_ylabel('$z$') + pf.close() + + a = f.add_subplot(212) + pf = h5py.File(c1.simname + '_checkpoint_0.h5', 'r') + state = pf['tracers0/state/0'] + x1 = state[:, 0] + y1 = state[:, 2] + v1 = np.sum( + pf['tracers0/rhs/1'][1]**2, + axis = 1)**.5 + # using v0 for colors on purpose, because we want the velocity to be the same, + # so v1.min() should be equal to v0.min() etc. + a.scatter( + x1, y1, + c = v1, + vmin = v0.min(), + vmax = v0.max(), + edgecolors = 'none', + s = 5., + cmap = plt.get_cmap('magma')) + a.set_xlabel('$x$') + a.set_ylabel('$z$') + f.tight_layout() + f.savefig('figs/trajectories.pdf') + return None + +def main(): + niterations = 32 + particle_initial_condition = None + nparticles = 10000 + run_NS = True + run_NSVE = False + plain_interpolation_test = False + if plain_interpolation_test: + niterations = 1 + pcloudX = np.pi + pcloudY = np.pi + particle_cloud_size = np.pi + nparticles = 32*4 + particle_initial_condition = np.zeros( + (nparticles, + nparticles, + 3), + dtype = np.float64) + xvals = (pcloudX + + np.linspace(-particle_cloud_size/2, + particle_cloud_size/2, + nparticles)) + yvals = (pcloudY + + np.linspace(-particle_cloud_size/2, + particle_cloud_size/2, + nparticles)) + particle_initial_condition[..., 0] = xvals[None, None, :] + particle_initial_condition[..., 2] = yvals[None, :, None] + particle_initial_condition = particle_initial_condition.reshape(-1, 3) + nparticles = nparticles**2 + c = bfps.NavierStokes(simname = 'fluid_solver') + if run_NS: + run_NSVE = True + subprocess.call('rm *fluid_solver* NavierStokes*', shell = True) + c.launch( + ['-n', '32', + '--simname', 'fluid_solver', + '--ncpu', '4', + '--niter_todo', '{0}'.format(niterations), + '--niter_out', '{0}'.format(niterations), + '--niter_stat', '1', + '--nparticles', '{0}'.format(nparticles), + '--particle-rand-seed', '2', + '--niter_part', '{0}'.format(niterations), + '--njobs', '2', + '--wd', './'] + + sys.argv[1:], + particle_initial_condition = particle_initial_condition) + subprocess.call('cat err_file_fluid_solver_0', shell = True) + subprocess.call('rm *vorticity_equation* NSVE*', shell = True) + if run_NSVE: + data = c.read_cfield(iteration = 0) + f = h5py.File('vorticity_equation_checkpoint_0.h5', 'w') + f['vorticity/complex/0'] = data + f.close() + c = bfps.NSVorticityEquation() + c.launch( + ['-n', '32', + '--simname', 'vorticity_equation', + '--np', '4', + '--ntpp', '1', + '--niter_todo', '{0}'.format(niterations), + '--niter_out', '{0}'.format(niterations), + '--niter_stat', '1', + '--checkpoints_per_file', '{0}'.format(3), + '--nparticles', '{0}'.format(nparticles), + '--particle-rand-seed', '2', + '--njobs', '2', + '--wd', './'] + + sys.argv[1:], + particle_initial_condition = particle_initial_condition) + subprocess.call('cat err_file_vorticity_equation_0', shell = True) + c0 = NSReader(simname = 'fluid_solver') + c1 = NSReader(simname = 'vorticity_equation') + if plain_interpolation_test: + check_interpolation(c0, c1, nparticles = int(nparticles**.5)) + else: + get_maximum_trajectory_error(c0, c1) + #overlap_worst_trajectory(c0, c1) + return None + +if __name__ == '__main__': + main() + diff --git a/todo.txt b/todo.txt deleted file mode 100644 index 0b5cafdefaf49739269dd49c19b14ffcd680b86f..0000000000000000000000000000000000000000 --- a/todo.txt +++ /dev/null @@ -1,17 +0,0 @@ -(B) compute z polynomials only when needed @optimization -(B) use argparse subcommands instead of required argument @design -(B) read https://www.xsede.org/documents/271087/369161/ExtScale-Koziol.pdf @optimization @HDF5 +I/O -(B) set up mechanism for adding in new PDEs @design +v2.0 +alternate_algorithms -(B) use less memory @optimization -(B) move stat I/O to cpp lib @design @HDF5 -(C) test involving hydrodynamic similarity @tests -(C) tests should use launch instead of get_parser @design @tests -(D) executable should be compiled in a tmp folder -(D) generalize interpolation comparison test @tests -(D) generate separate lib(s) with extra classes @tests +alternate_algorithms -(D) test anisotropic grids @tests -(D) test non-cubic domains @tests -(D) tests should not overwrite other tests (tox_full) @tests -(E) add u-equation algorithm for testing purposes @tests +alternate_algorithms -(E) pure python DNS addon: pros and cons @tests +alternate_algorithms -(F) add switch to turn off simulation