diff --git a/README.rst b/README.rst index ddb9f2447db919248100368a9a08b13297d5e3a4..5418d711b7846e2ebd2ca68270a95baf1f9e35b3 100644 --- a/README.rst +++ b/README.rst @@ -60,7 +60,8 @@ Use a console; navigate to the ``bfps`` folder, and type: If you want to run simulations on the machine where you're installing, you will need to call `compile_library` before installing. Your machine needs to have an MPI compiler installed, the HDF5 C library -and FFTW >= 3.4. +and FFTW >= 3.4 --- a detailed prerequisite installation list is +included at the end of this document. The file `machine_settings_py.py` should be modified appropriately for your machine (otherwise the `compile_library` command will most likely fail). @@ -102,7 +103,63 @@ Comments * particles: initialization of multistep solvers is done with lower order methods, so direct convergence tests will fail. +* Code is only known to work with HDF5 1.8.x. + * Code is used mainly with Python 3.4 and 3.5. In principle it should be easy to maintain compatibility with Python 2.7.x, but as of `bfps 1.8` this is no longer a main concern. +------------------------------- +Installation with prerequisites +------------------------------- + +These installation steps assume that you have a working MPI compiler, +properly configured on your system (i.e. the various configure scripts +are able to find it). +If this is not the case, please consult the FFTW and HDF5 compilation +instructions for detailed instructions (`./configure --help` should be +enough). + +1. Make directory PREFIX on local fast partition. + +2. Download, compile, install FFTW (latest version 3.x from http://www.fftw.org/). + Execute the following commands in order, feel free to customize + optimisation flags for your own computer: + +.. code:: bash + ./configure --prefix=PREFIX --enable-single --enable-sse --enable-mpi --enable-openmp --enable-threads + make + make install + ./configure --prefix=PREFIX --enable-sse --enable-sse2 --enable-mpi --enable-openmp --enable-threads + make + make install + +3. Download, compile, install HDF5 (version 1.8.x, currently available + at https://support.hdfgroup.org/HDF5/release/obtainsrc518.html. + We are using parallel I/O, therefore we use the plain C interface of HDF5: + +.. code:: bash + ./configure --prefix=PREFIX --enable-parallel + make + make install + +3. This step may be ommited. + I recommend the creation of a virtual python3 environment (also under PREFIX) that will be used for installing bfps and dependencies. + Please see https://docs.python-guide.org/dev/virtualenvs/. + +4. Clone bfps repository. + +.. code:: bash + git clone git@gitlab.mpcdf.mpg.de:clalescu/bfps.git + +5. Tweak host_information.py and machine_settings.py for your user and your machine and place under ~/.config/bfps. + +6. Activate virtual environment. + +7. Go into bfps repository, execute + +.. code:: bash + + python setup.py compile_library + python setup.py install + diff --git a/bfps/cpp/full_code/NSVEcomplex_particles.cpp b/bfps/cpp/full_code/NSVEcomplex_particles.cpp index 4ffa148af2ba33fa6c412349d323efafe6035cae..9b910e9bb7a5aaf6b36d884858a095ff9971dffa 100644 --- a/bfps/cpp/full_code/NSVEcomplex_particles.cpp +++ b/bfps/cpp/full_code/NSVEcomplex_particles.cpp @@ -44,6 +44,7 @@ int NSVEcomplex_particles<rnumber>::initialize(void) particles_inner_computer<double, long long int> current_particles_inner_computer(inner_v0); current_particles_inner_computer.setEnable(enable_inner); + this->ps = particles_system_builder_with_p2p( this->fs->cvelocity, // (field object) this->fs->kk, // (kspace object, contains dkx, dky, dkz) @@ -92,7 +93,11 @@ int NSVEcomplex_particles<rnumber>::step(void) if(this->enable_vorticity_omega){ *this->tmp_vec_field = this->fs->cvorticity->get_cdata(); this->tmp_vec_field->ift(); - this->ps->completeLoopWithVorticity(this->dt, *this->tmp_vec_field); + std::unique_ptr<double[]> sampled_vorticity(new double[3*this->ps->getLocalNbParticles()]); + std::fill_n(sampled_vorticity.get(), 3*this->ps->getLocalNbParticles(), 0); + this->ps->sample_compute_field(*this->tmp_vec_field, sampled_vorticity.get()); + DEBUG_MSG("sampled vorticity is %g\n", sampled_vorticity[0]); + this->ps->completeLoopWithVorticity(this->dt, sampled_vorticity.get()); } else{ this->ps->completeLoop(this->dt); @@ -145,6 +150,7 @@ int NSVEcomplex_particles<rnumber>::do_stats() return EXIT_SUCCESS; /// allocate temporary data array + /// initialize pdata0 with the positions, and pdata1 with the orientations std::unique_ptr<double[]> pdata0 = this->ps->extractParticlesState(0, 3); std::unique_ptr<double[]> pdata1 = this->ps->extractParticlesState(3, 6); std::unique_ptr<double[]> pdata2(new double[9*this->ps->getLocalNbParticles()]); @@ -170,6 +176,8 @@ int NSVEcomplex_particles<rnumber>::do_stats() this->ps->get_step_idx()-1); /// sample velocity + /// from now on, we need to clean up data arrays before interpolation + std::fill_n(pdata1.get(), 3*this->ps->getLocalNbParticles(), 0); this->ps->sample_compute_field(*this->tmp_vec_field, pdata1.get()); this->particles_sample_writer_mpi->template save_dataset<3>( "tracers0", @@ -188,6 +196,7 @@ int NSVEcomplex_particles<rnumber>::do_stats() this->fs->cvelocity, this->nabla_u); this->nabla_u->ift(); + std::fill_n(pdata2.get(), 9*this->ps->getLocalNbParticles(), 0); this->ps->sample_compute_field(*this->nabla_u, pdata2.get()); this->particles_sample_writer_mpi->template save_dataset<9>( "tracers0", @@ -201,6 +210,7 @@ int NSVEcomplex_particles<rnumber>::do_stats() /// compute acceleration and sample it this->fs->compute_Lagrangian_acceleration(this->tmp_vec_field); this->tmp_vec_field->ift(); + std::fill_n(pdata1.get(), 3*this->ps->getLocalNbParticles(), 0); this->ps->sample_compute_field(*this->tmp_vec_field, pdata1.get()); this->particles_sample_writer_mpi->template save_dataset<3>( "tracers0", @@ -231,7 +241,10 @@ int NSVEcomplex_particles<rnumber>::read_parameters(void) this->tracers0_smoothness = hdf5_tools::read_value<int>(parameter_file, "parameters/tracers0_smoothness"); this->enable_p2p = hdf5_tools::read_value<int>(parameter_file, "parameters/tracers0_enable_p2p"); this->enable_inner = hdf5_tools::read_value<int>(parameter_file, "parameters/tracers0_enable_inner"); - this->enable_vorticity_omega = hdf5_tools::read_value<int>(parameter_file, "parameters/tracers0_enable_vorticity_omega"); + int tval = hdf5_tools::read_value<int>(parameter_file, "parameters/tracers0_enable_vorticity_omega"); + this->enable_vorticity_omega = tval; + DEBUG_MSG("tracers0_enable_vorticity_omega = %d, this->enable_vorticity_omega = %d\n", + tval, this->enable_vorticity_omega); this->cutoff = hdf5_tools::read_value<double>(parameter_file, "parameters/tracers0_cutoff"); this->inner_v0 = hdf5_tools::read_value<double>(parameter_file, "parameters/tracers0_inner_v0"); H5Fclose(parameter_file); diff --git a/bfps/cpp/particles/abstract_particles_system.hpp b/bfps/cpp/particles/abstract_particles_system.hpp index 871f045953bdb35cd7c21c84e2a024a9d85869ff..ffa89892d1d6e34b7bcfe1b43b44e02c20471779 100644 --- a/bfps/cpp/particles/abstract_particles_system.hpp +++ b/bfps/cpp/particles/abstract_particles_system.hpp @@ -75,6 +75,7 @@ public: const field<rnumber, be, fc>& in_field) { static_assert(fc == THREE, "only THREE is supported for now"); std::unique_ptr<real_number[]> extra_rhs(new real_number[getLocalNbParticles()*3]()); + std::fill_n(extra_rhs.get(), 3*getLocalNbParticles(), 0); sample_compute_field(in_field, extra_rhs.get()); completeLoopWithVorticity(dt, extra_rhs.get()); } diff --git a/bfps/cpp/particles/particles_inner_computer.hpp b/bfps/cpp/particles/particles_inner_computer.hpp index 5c855cf5d4f9c9ce69200212e4929420a0c37228..f1fe322a3c30d87c6e3244c6c85f1ff3002f9a00 100644 --- a/bfps/cpp/particles/particles_inner_computer.hpp +++ b/bfps/cpp/particles/particles_inner_computer.hpp @@ -98,12 +98,12 @@ public: #pragma omp parallel for for(partsize_t idx_part = 0 ; idx_part < nb_particles ; ++idx_part){ // Cross product vorticity/orientation - rhs_part[idx_part*size_particle_rhs + 3+IDX_X] += (rhs_part_extra[idx_part*size_particle_rhs_extra + IDX_Y]*pos_part[idx_part*size_particle_positions + 3+IDX_Z] - - rhs_part_extra[idx_part*size_particle_rhs_extra + IDX_Z]*pos_part[idx_part*size_particle_positions + 3+IDX_Y]); - rhs_part[idx_part*size_particle_rhs + 3+IDX_Y] += (rhs_part_extra[idx_part*size_particle_rhs_extra + IDX_Z]*pos_part[idx_part*size_particle_positions + 3+IDX_X] - - rhs_part_extra[idx_part*size_particle_rhs_extra + IDX_X]*pos_part[idx_part*size_particle_positions + 3+IDX_Z]); - rhs_part[idx_part*size_particle_rhs + 3+IDX_Z] += (rhs_part_extra[idx_part*size_particle_rhs_extra + IDX_X]*pos_part[idx_part*size_particle_positions + 3+IDX_Y] - - rhs_part_extra[idx_part*size_particle_rhs_extra + IDX_Y]*pos_part[idx_part*size_particle_positions + 3+IDX_X]); + rhs_part[idx_part*size_particle_rhs + 3+IDX_X] += 0.5*(rhs_part_extra[idx_part*size_particle_rhs_extra + IDX_Y]*pos_part[idx_part*size_particle_positions + 3+IDX_Z] - + rhs_part_extra[idx_part*size_particle_rhs_extra + IDX_Z]*pos_part[idx_part*size_particle_positions + 3+IDX_Y]); + rhs_part[idx_part*size_particle_rhs + 3+IDX_Y] += 0.5*(rhs_part_extra[idx_part*size_particle_rhs_extra + IDX_Z]*pos_part[idx_part*size_particle_positions + 3+IDX_X] - + rhs_part_extra[idx_part*size_particle_rhs_extra + IDX_X]*pos_part[idx_part*size_particle_positions + 3+IDX_Z]); + rhs_part[idx_part*size_particle_rhs + 3+IDX_Z] += 0.5*(rhs_part_extra[idx_part*size_particle_rhs_extra + IDX_X]*pos_part[idx_part*size_particle_positions + 3+IDX_Y] - + rhs_part_extra[idx_part*size_particle_rhs_extra + IDX_Y]*pos_part[idx_part*size_particle_positions + 3+IDX_X]); } } diff --git a/bfps/cpp/particles/particles_system.hpp b/bfps/cpp/particles/particles_system.hpp index db2f33fa3716a8b22c96817a8c2724434c17834a..e451f56a56b00a050d9d940085ad50ccf564ea30 100644 --- a/bfps/cpp/particles/particles_system.hpp +++ b/bfps/cpp/particles/particles_system.hpp @@ -278,7 +278,7 @@ public: void completeLoopWithVorticity(const real_number dt, const real_number particle_extra_rhs[]) final { - TIMEZONE("particles_system::completeLoop"); + TIMEZONE("particles_system::completeLoopWithVorticity"); compute(); compute_p2p(); compute_particles_inner(particle_extra_rhs);