From 23ed2f4afc4d8ecaff1bcfc46b2f3cf53923188b Mon Sep 17 00:00:00 2001 From: Cristian C Lalescu <Cristian.Lalescu@ds.mpg.de> Date: Tue, 25 Sep 2018 11:30:48 +0200 Subject: [PATCH] split inner_computer into hpp and cpp --- bfps/cpp/field.cpp | 7 + bfps/cpp/full_code/NSVE.cpp | 1 + .../particles/particles_inner_computer.cpp | 155 ++++++++++++++++++ .../particles/particles_inner_computer.hpp | 121 ++------------ setup.py | 8 +- 5 files changed, 185 insertions(+), 107 deletions(-) create mode 100644 bfps/cpp/particles/particles_inner_computer.cpp diff --git a/bfps/cpp/field.cpp b/bfps/cpp/field.cpp index d5bc78a5..87e18b67 100644 --- a/bfps/cpp/field.cpp +++ b/bfps/cpp/field.cpp @@ -79,6 +79,7 @@ field<rnumber, be, fc>::field( hsize_t tmp_local_size; ptrdiff_t local_n0, local_0_start; ptrdiff_t local_n1, local_1_start; + variable_used_only_in_assert(tmp_local_size); tmp_local_size = fftw_interface<rnumber>::mpi_local_size_many_transposed( 3, nfftw, ncomp(fc), FFTW_MPI_DEFAULT_BLOCK, FFTW_MPI_DEFAULT_BLOCK, this->comm, @@ -227,6 +228,7 @@ int field<rnumber, be, fc>::io( H5Tequal(dset_type, H5T_IEEE_F64LE) || H5Tequal(dset_type, H5T_INTEL_F64) || H5Tequal(dset_type, H5T_NATIVE_DOUBLE)); + variable_used_only_in_assert(io_for_real); H5Tclose(dset_type); assert(this->real_space_representation == io_for_real); } @@ -307,6 +309,7 @@ int field<rnumber, be, fc>::io( /* check file space */ int ndims_fspace = H5Sget_simple_extent_dims(fspace, dims, NULL); + variable_used_only_in_assert(ndims_fspace); assert(((unsigned int)(ndims_fspace)) == ndim(fc)); if (this->real_space_representation) { @@ -417,6 +420,7 @@ int field<rnumber, be, fc>::io_database( H5Tequal(dset_type, H5T_IEEE_F64LE) || H5Tequal(dset_type, H5T_INTEL_F64) || H5Tequal(dset_type, H5T_NATIVE_DOUBLE)); + variable_used_only_in_assert(io_for_real); H5Tclose(dset_type); assert(this->real_space_representation == io_for_real); } @@ -493,6 +497,7 @@ int field<rnumber, be, fc>::io_database( /* check file space */ int ndims_fspace = H5Sget_simple_extent_dims(fspace, dims, NULL); + variable_used_only_in_assert(ndims_fspace); assert(ndims_fspace == int(ndim(fc) + 1)); offset[0] = toffset; if (this->real_space_representation) @@ -714,6 +719,7 @@ int field<rnumber, be, fc>::write_filtered( } /* check file space */ int ndims_fspace = H5Sget_simple_extent_dims(fspace, fdims, NULL); + variable_used_only_in_assert(ndims_fspace); assert(((unsigned int)(ndims_fspace)) == ndim(fc)); for (unsigned int i=0; i<ndim(fc); i++) { @@ -1567,6 +1573,7 @@ int joint_rspace_PDF( hid_t dset, wspace; hsize_t dims[5]; int ndims; + variable_used_only_in_assert(ndims); if (fc == THREE) { dset = H5Dopen( diff --git a/bfps/cpp/full_code/NSVE.cpp b/bfps/cpp/full_code/NSVE.cpp index ecec7db3..efd82fb3 100644 --- a/bfps/cpp/full_code/NSVE.cpp +++ b/bfps/cpp/full_code/NSVE.cpp @@ -18,6 +18,7 @@ int NSVE<rnumber>::initialize(void) // set caching parameters hid_t fapl = H5Pcreate(H5P_FILE_ACCESS); herr_t cache_err = H5Pset_cache(fapl, 0, 521, 134217728, 1.0); + variable_used_only_in_assert(cache_err); DEBUG_MSG("when setting stat_file cache I got %d\n", cache_err); this->stat_file = H5Fopen( (this->simname + ".h5").c_str(), diff --git a/bfps/cpp/particles/particles_inner_computer.cpp b/bfps/cpp/particles/particles_inner_computer.cpp new file mode 100644 index 00000000..85286190 --- /dev/null +++ b/bfps/cpp/particles/particles_inner_computer.cpp @@ -0,0 +1,155 @@ +#include "base.hpp" +#include "particles_utils.hpp" +#include "particles_inner_computer.hpp" + +#include <cmath> + +template <class real_number, class partsize_t> +template <int size_particle_positions, int size_particle_rhs> +void particles_inner_computer<real_number, partsize_t>::compute_interaction( + const partsize_t nb_particles, + const real_number pos_part[], + real_number rhs_part[]) const{ + static_assert(size_particle_positions == 6, "This kernel works only with 6 values for one particle's position"); + static_assert(size_particle_rhs == 6, "This kernel works only with 6 values per particle's rhs"); + + #pragma omp parallel for + for(partsize_t idx_part = 0 ; idx_part < nb_particles ; ++idx_part){ + // Add attr × V0 to the field interpolation + rhs_part[idx_part*size_particle_rhs + IDX_X] += pos_part[idx_part*size_particle_positions + 3+IDX_X]*v0; + rhs_part[idx_part*size_particle_rhs + IDX_Y] += pos_part[idx_part*size_particle_positions + 3+IDX_Y]*v0; + rhs_part[idx_part*size_particle_rhs + IDX_Z] += pos_part[idx_part*size_particle_positions + 3+IDX_Z]*v0; + } +} + + // for given orientation and right-hand-side, recompute right-hand-side such + // that it is perpendicular to the current orientation. + // this is the job of the Lagrange multiplier terms, hence the + // "add_Lagrange_multipliers" name of the method. +template <class real_number, class partsize_t> +template <int size_particle_positions, int size_particle_rhs> +void particles_inner_computer<real_number, partsize_t>::add_Lagrange_multipliers( + const partsize_t nb_particles, + const real_number pos_part[], + real_number rhs_part[]) const{ + static_assert(size_particle_positions == 6, "This kernel works only with 6 values for one particle's position"); + static_assert(size_particle_rhs == 6, "This kernel works only with 6 values per particle's rhs"); + + #pragma omp parallel for + for(partsize_t idx_part = 0 ; idx_part < nb_particles ; ++idx_part){ + const partsize_t idx0 = idx_part*size_particle_positions + 3; + const partsize_t idx1 = idx_part*size_particle_rhs + 3; + // check that orientation is unit vector: + real_number orientation_size = sqrt( + pos_part[idx0+IDX_X]*pos_part[idx0+IDX_X] + + pos_part[idx0+IDX_Y]*pos_part[idx0+IDX_Y] + + pos_part[idx0+IDX_Z]*pos_part[idx0+IDX_Z]); + variable_used_only_in_assert(orientation_size); + assert(orientation_size > 0.99); + assert(orientation_size < 1.01); + // I call "rotation" to be the right hand side of the orientation part of the ODE + // project rotation on orientation: + real_number projection = ( + pos_part[idx0+IDX_X]*rhs_part[idx1+IDX_X] + + pos_part[idx0+IDX_Y]*rhs_part[idx1+IDX_Y] + + pos_part[idx0+IDX_Z]*rhs_part[idx1+IDX_Z]); + + // now remove parallel bit. + rhs_part[idx1+IDX_X] -= pos_part[idx0+IDX_X]*projection; + rhs_part[idx1+IDX_Y] -= pos_part[idx0+IDX_Y]*projection; + rhs_part[idx1+IDX_Z] -= pos_part[idx0+IDX_Z]*projection; + + // DEBUG + // sanity check, for debugging purposes + // compute dot product between orientation and orientation change + //real_number dotproduct = ( + // rhs_part[idx1 + IDX_X]*pos_part[idx0 + IDX_X] + + // rhs_part[idx1 + IDX_Y]*pos_part[idx0 + IDX_Y] + + // rhs_part[idx1 + IDX_Z]*pos_part[idx0 + IDX_Z]); + //if (dotproduct > 0.1) + //{ + // DEBUG_MSG("dotproduct = %g, projection = %g\n" + // "pos_part[%d] = %g, pos_part[%d] = %g, pos_part[%d] = %g\n" + // "rhs_part[%d] = %g, rhs_part[%d] = %g, rhs_part[%d] = %g\n", + // dotproduct, + // projection, + // IDX_X, pos_part[idx0 + IDX_X], + // IDX_Y, pos_part[idx0 + IDX_Y], + // IDX_Z, pos_part[idx0 + IDX_Z], + // IDX_X, rhs_part[idx1 + IDX_X], + // IDX_Y, rhs_part[idx1 + IDX_Y], + // IDX_Z, rhs_part[idx1 + IDX_Z]); + // assert(false); + //} + //assert(dotproduct <= 0.1); + } + } + +template <class real_number, class partsize_t> +template <int size_particle_positions, int size_particle_rhs, int size_particle_rhs_extra> +void particles_inner_computer<real_number, partsize_t>::compute_interaction_with_extra( + const partsize_t nb_particles, + const real_number pos_part[], + real_number rhs_part[], + const real_number rhs_part_extra[]) const{ + static_assert(size_particle_rhs_extra == 3, "This kernel works only with 3 values for one particle's rhs extra"); + + // call plain compute_interaction first + compute_interaction<size_particle_positions, size_particle_rhs>(nb_particles, pos_part, rhs_part); + + // now add vorticity term + #pragma omp parallel for + for(partsize_t idx_part = 0 ; idx_part < nb_particles ; ++idx_part){ + // Cross product vorticity/orientation + rhs_part[idx_part*size_particle_rhs + 3+IDX_X] += 0.5*(rhs_part_extra[idx_part*size_particle_rhs_extra + IDX_Y]*pos_part[idx_part*size_particle_positions + 3+IDX_Z] - + rhs_part_extra[idx_part*size_particle_rhs_extra + IDX_Z]*pos_part[idx_part*size_particle_positions + 3+IDX_Y]); + rhs_part[idx_part*size_particle_rhs + 3+IDX_Y] += 0.5*(rhs_part_extra[idx_part*size_particle_rhs_extra + IDX_Z]*pos_part[idx_part*size_particle_positions + 3+IDX_X] - + rhs_part_extra[idx_part*size_particle_rhs_extra + IDX_X]*pos_part[idx_part*size_particle_positions + 3+IDX_Z]); + rhs_part[idx_part*size_particle_rhs + 3+IDX_Z] += 0.5*(rhs_part_extra[idx_part*size_particle_rhs_extra + IDX_X]*pos_part[idx_part*size_particle_positions + 3+IDX_Y] - + rhs_part_extra[idx_part*size_particle_rhs_extra + IDX_Y]*pos_part[idx_part*size_particle_positions + 3+IDX_X]); + } +} + +// meant to be called AFTER executing the time-stepping operation. +// once the particles have been moved, ensure that the orientation is a unit vector. +template <class real_number, class partsize_t> +template <int size_particle_positions> +void particles_inner_computer<real_number, partsize_t>::enforce_unit_orientation( + const partsize_t nb_particles, + real_number pos_part[]) const{ + static_assert(size_particle_positions == 6, "This kernel works only with 6 values for one particle's position"); + + #pragma omp parallel for + for(partsize_t idx_part = 0 ; idx_part < nb_particles ; ++idx_part){ + const partsize_t idx0 = idx_part*size_particle_positions + 3; + // compute orientation size: + real_number orientation_size = sqrt( + pos_part[idx0+IDX_X]*pos_part[idx0+IDX_X] + + pos_part[idx0+IDX_Y]*pos_part[idx0+IDX_Y] + + pos_part[idx0+IDX_Z]*pos_part[idx0+IDX_Z]); + // now renormalize + pos_part[idx0 + IDX_X] /= orientation_size; + pos_part[idx0 + IDX_Y] /= orientation_size; + pos_part[idx0 + IDX_Z] /= orientation_size; + } +} + +template class particles_inner_computer<double, long long>; + +template void particles_inner_computer<double, long long>::compute_interaction<6, 6>( + const long long nb_particles, + const double pos_part[], + double rhs_part[]) const; +template void particles_inner_computer<double, long long>::add_Lagrange_multipliers<6, 6>( + const long long nb_particles, + const double pos_part[], + double rhs_part[]) const; +template void particles_inner_computer<double, long long>::compute_interaction_with_extra <6, 6, 3>( + const long long nb_particles, + const double pos_part[], + double rhs_part[], + const double rhs_part_extra[]) const; +template void particles_inner_computer<double, long long>:: enforce_unit_orientation<6>( + const long long nb_particles, + double pos_part[]) const; + diff --git a/bfps/cpp/particles/particles_inner_computer.hpp b/bfps/cpp/particles/particles_inner_computer.hpp index b2eb95dd..3e233370 100644 --- a/bfps/cpp/particles/particles_inner_computer.hpp +++ b/bfps/cpp/particles/particles_inner_computer.hpp @@ -15,120 +15,31 @@ public: } template <int size_particle_positions, int size_particle_rhs> - void compute_interaction(const partsize_t nb_particles, const real_number pos_part[], real_number rhs_part[]) const{ - static_assert(size_particle_positions == 6, "This kernel works only with 6 values for one particle's position"); - static_assert(size_particle_rhs == 6, "This kernel works only with 6 values per particle's rhs"); - - #pragma omp parallel for - for(partsize_t idx_part = 0 ; idx_part < nb_particles ; ++idx_part){ - // Add attr × V0 to the field interpolation - rhs_part[idx_part*size_particle_rhs + IDX_X] += pos_part[idx_part*size_particle_positions + 3+IDX_X]*v0; - rhs_part[idx_part*size_particle_rhs + IDX_Y] += pos_part[idx_part*size_particle_positions + 3+IDX_Y]*v0; - rhs_part[idx_part*size_particle_rhs + IDX_Z] += pos_part[idx_part*size_particle_positions + 3+IDX_Z]*v0; - } - } - + void compute_interaction( + const partsize_t nb_particles, + const real_number pos_part[], + real_number rhs_part[]) const; // for given orientation and right-hand-side, recompute right-hand-side such // that it is perpendicular to the current orientation. // this is the job of the Lagrange multiplier terms, hence the // "add_Lagrange_multipliers" name of the method. template <int size_particle_positions, int size_particle_rhs> - void add_Lagrange_multipliers(const partsize_t nb_particles, const real_number pos_part[], real_number rhs_part[]) const{ - static_assert(size_particle_positions == 6, "This kernel works only with 6 values for one particle's position"); - static_assert(size_particle_rhs == 6, "This kernel works only with 6 values per particle's rhs"); - - #pragma omp parallel for - for(partsize_t idx_part = 0 ; idx_part < nb_particles ; ++idx_part){ - const partsize_t idx0 = idx_part*size_particle_positions + 3; - const partsize_t idx1 = idx_part*size_particle_rhs + 3; - // check that orientation is unit vector: - real_number orientation_size = sqrt( - pos_part[idx0+IDX_X]*pos_part[idx0+IDX_X] + - pos_part[idx0+IDX_Y]*pos_part[idx0+IDX_Y] + - pos_part[idx0+IDX_Z]*pos_part[idx0+IDX_Z]); - variable_used_only_in_assert(orientation_size); - assert(orientation_size > 0.99); - assert(orientation_size < 1.01); - // I call "rotation" to be the right hand side of the orientation part of the ODE - // project rotation on orientation: - real_number projection = ( - pos_part[idx0+IDX_X]*rhs_part[idx1+IDX_X] + - pos_part[idx0+IDX_Y]*rhs_part[idx1+IDX_Y] + - pos_part[idx0+IDX_Z]*rhs_part[idx1+IDX_Z]); - - // now remove parallel bit. - rhs_part[idx1+IDX_X] -= pos_part[idx0+IDX_X]*projection; - rhs_part[idx1+IDX_Y] -= pos_part[idx0+IDX_Y]*projection; - rhs_part[idx1+IDX_Z] -= pos_part[idx0+IDX_Z]*projection; - - // DEBUG - // sanity check, for debugging purposes - // compute dot product between orientation and orientation change - //real_number dotproduct = ( - // rhs_part[idx1 + IDX_X]*pos_part[idx0 + IDX_X] + - // rhs_part[idx1 + IDX_Y]*pos_part[idx0 + IDX_Y] + - // rhs_part[idx1 + IDX_Z]*pos_part[idx0 + IDX_Z]); - //if (dotproduct > 0.1) - //{ - // DEBUG_MSG("dotproduct = %g, projection = %g\n" - // "pos_part[%d] = %g, pos_part[%d] = %g, pos_part[%d] = %g\n" - // "rhs_part[%d] = %g, rhs_part[%d] = %g, rhs_part[%d] = %g\n", - // dotproduct, - // projection, - // IDX_X, pos_part[idx0 + IDX_X], - // IDX_Y, pos_part[idx0 + IDX_Y], - // IDX_Z, pos_part[idx0 + IDX_Z], - // IDX_X, rhs_part[idx1 + IDX_X], - // IDX_Y, rhs_part[idx1 + IDX_Y], - // IDX_Z, rhs_part[idx1 + IDX_Z]); - // assert(false); - //} - //assert(dotproduct <= 0.1); - } - } - + void add_Lagrange_multipliers( + const partsize_t nb_particles, + const real_number pos_part[], + real_number rhs_part[]) const; template <int size_particle_positions, int size_particle_rhs, int size_particle_rhs_extra> - void compute_interaction_with_extra(const partsize_t nb_particles, const real_number pos_part[], real_number rhs_part[], - const real_number rhs_part_extra[]) const{ - static_assert(size_particle_rhs_extra == 3, "This kernel works only with 3 values for one particle's rhs extra"); - - // call plain compute_interaction first - compute_interaction<size_particle_positions, size_particle_rhs>(nb_particles, pos_part, rhs_part); - - // now add vorticity term - #pragma omp parallel for - for(partsize_t idx_part = 0 ; idx_part < nb_particles ; ++idx_part){ - // Cross product vorticity/orientation - rhs_part[idx_part*size_particle_rhs + 3+IDX_X] += 0.5*(rhs_part_extra[idx_part*size_particle_rhs_extra + IDX_Y]*pos_part[idx_part*size_particle_positions + 3+IDX_Z] - - rhs_part_extra[idx_part*size_particle_rhs_extra + IDX_Z]*pos_part[idx_part*size_particle_positions + 3+IDX_Y]); - rhs_part[idx_part*size_particle_rhs + 3+IDX_Y] += 0.5*(rhs_part_extra[idx_part*size_particle_rhs_extra + IDX_Z]*pos_part[idx_part*size_particle_positions + 3+IDX_X] - - rhs_part_extra[idx_part*size_particle_rhs_extra + IDX_X]*pos_part[idx_part*size_particle_positions + 3+IDX_Z]); - rhs_part[idx_part*size_particle_rhs + 3+IDX_Z] += 0.5*(rhs_part_extra[idx_part*size_particle_rhs_extra + IDX_X]*pos_part[idx_part*size_particle_positions + 3+IDX_Y] - - rhs_part_extra[idx_part*size_particle_rhs_extra + IDX_Y]*pos_part[idx_part*size_particle_positions + 3+IDX_X]); - } - } - + void compute_interaction_with_extra( + const partsize_t nb_particles, + const real_number pos_part[], + real_number rhs_part[], + const real_number rhs_part_extra[]) const; // meant to be called AFTER executing the time-stepping operation. // once the particles have been moved, ensure that the orientation is a unit vector. template <int size_particle_positions> - void enforce_unit_orientation(const partsize_t nb_particles, real_number pos_part[]) const{ - static_assert(size_particle_positions == 6, "This kernel works only with 6 values for one particle's position"); - - #pragma omp parallel for - for(partsize_t idx_part = 0 ; idx_part < nb_particles ; ++idx_part){ - const partsize_t idx0 = idx_part*size_particle_positions + 3; - // compute orientation size: - real_number orientation_size = sqrt( - pos_part[idx0+IDX_X]*pos_part[idx0+IDX_X] + - pos_part[idx0+IDX_Y]*pos_part[idx0+IDX_Y] + - pos_part[idx0+IDX_Z]*pos_part[idx0+IDX_Z]); - // now renormalize - pos_part[idx0 + IDX_X] /= orientation_size; - pos_part[idx0 + IDX_Y] /= orientation_size; - pos_part[idx0 + IDX_Z] /= orientation_size; - } - } - + void enforce_unit_orientation( + const partsize_t nb_particles, + real_number pos_part[]) const; bool isEnable() const { return isActive; diff --git a/setup.py b/setup.py index 3094c692..34f63082 100644 --- a/setup.py +++ b/setup.py @@ -126,7 +126,8 @@ src_file_list = [ 'full_code/test_interpolation', 'full_code/NSVEparticles', 'full_code/NSVEcomplex_particles', - 'full_code/NSVEp_extra_sampling'] + 'full_code/NSVEp_extra_sampling', + 'particles/particles_inner_computer'] particle_headers = [ 'cpp/particles/abstract_particles_input.hpp', @@ -144,7 +145,7 @@ particle_headers = [ 'cpp/particles/particles_field_computer.hpp', 'cpp/particles/particles_generic_interp.hpp', 'cpp/particles/particles_inner_computer_empty.hpp', - 'cpp/particles/particles_inner_computer.hpp', + #'cpp/particles/particles_inner_computer.hpp', 'cpp/particles/particles_input_hdf5.hpp', 'cpp/particles/particles_output_hdf5.hpp', 'cpp/particles/particles_output_mpiio.hpp', @@ -210,6 +211,9 @@ class CompileLibCommand(distutils.cmd.Command): if not os.path.isdir('obj/full_code'): os.makedirs('obj/full_code') need_to_compile = True + if not os.path.isdir('obj/particles'): + os.makedirs('obj/particles') + need_to_compile = True if not os.path.isfile('bfps/libbfps.a'): need_to_compile = True else: -- GitLab