diff --git a/cpp/base.hpp b/cpp/base.hpp index 806e9b3415308b6dacf86bb36896aa86fdd7855f..ca3749762969876fea09cc9766948f9aa8052400 100644 --- a/cpp/base.hpp +++ b/cpp/base.hpp @@ -59,6 +59,7 @@ namespace turtle_mpi_pcontrol{ const int FFTW = 5; const int FIELD = 6; const int PARTICLES = 7; + const int HDF5 = 8; } #ifdef USE_TIMINGOUTPUT @@ -66,14 +67,16 @@ int start_mpi_profiling_zone(const int zone_name) { assert((zone_name == turtle_mpi_pcontrol::FFTW) || (zone_name == turtle_mpi_pcontrol::FIELD) || - (zone_name == turtle_mpi_pcontrol::PARTICLES)); + (zone_name == turtle_mpi_pcontrol::PARTICLES) || + (zone_name == turtle_mpi_pcontrol::HDF5)); return MPI_Pcontrol(zone_name); } int finish_mpi_profiling_zone(const int zone_name) { assert((zone_name == turtle_mpi_pcontrol::FFTW) || (zone_name == turtle_mpi_pcontrol::FIELD) || - (zone_name == turtle_mpi_pcontrol::PARTICLES)); + (zone_name == turtle_mpi_pcontrol::PARTICLES) || + (zone_name == turtle_mpi_pcontrol::HDF5)); return MPI_Pcontrol(-zone_name); } #else diff --git a/cpp/field.cpp b/cpp/field.cpp index d14c741406b79d94d2eaba2fb1eb651cb4446695..5282d8b8fc5e08f0e0b76a54921d4348ddf2494f 100644 --- a/cpp/field.cpp +++ b/cpp/field.cpp @@ -148,9 +148,9 @@ template <typename rnumber, void field<rnumber, be, fc>::ift() { TIMEZONE("field::ift"); - MPI_Pcontrol(5); + start_mpi_profiling_zone(turtle_mpi_pcontrol::FFTW); fftw_interface<rnumber>::execute(this->c2r_plan); - MPI_Pcontrol(-5); + finish_mpi_profiling_zone(turtle_mpi_pcontrol::FFTW); this->real_space_representation = true; } @@ -160,9 +160,9 @@ template <typename rnumber, void field<rnumber, be, fc>::dft() { TIMEZONE("field::dft"); - MPI_Pcontrol(5); + start_mpi_profiling_zone(turtle_mpi_pcontrol::FFTW); fftw_interface<rnumber>::execute(this->r2c_plan); - MPI_Pcontrol(-5); + finish_mpi_profiling_zone(turtle_mpi_pcontrol::FFTW); this->real_space_representation = false; } @@ -188,6 +188,7 @@ int field<rnumber, be, fc>::io( "/" + std::to_string(iteration)); /* open/create file */ + start_mpi_profiling_zone(turtle_mpi_pcontrol::HDF5); plist_id = H5Pcreate(H5P_FILE_ACCESS); H5Pset_fapl_mpio(plist_id, this->comm, MPI_INFO_NULL); bool file_exists = false; @@ -233,6 +234,7 @@ int field<rnumber, be, fc>::io( H5Tclose(dset_type); assert(this->real_space_representation == io_for_real); } + finish_mpi_profiling_zone(turtle_mpi_pcontrol::HDF5); /* generic space initialization */ hid_t fspace, mspace; @@ -261,6 +263,7 @@ int field<rnumber, be, fc>::io( memoffset[i] = 0; } } + start_mpi_profiling_zone(turtle_mpi_pcontrol::HDF5); mspace = H5Screate_simple(ndim(fc), memshape, NULL); H5Sselect_hyperslab(mspace, H5S_SELECT_SET, memoffset, NULL, count, NULL); @@ -306,12 +309,15 @@ int field<rnumber, be, fc>::io( H5P_DEFAULT); } } + finish_mpi_profiling_zone(turtle_mpi_pcontrol::HDF5); /* both dset_id and fspace should now have sane values */ /* if reading, first set local data to 0 */ if (read) *this = rnumber(0.0); + start_mpi_profiling_zone(turtle_mpi_pcontrol::HDF5); + /* both dset_id and fspace should now have sane values */ /* check file space */ int ndims_fspace = H5Sget_simple_extent_dims(fspace, dims, NULL); variable_used_only_in_assert(ndims_fspace); @@ -361,6 +367,7 @@ int field<rnumber, be, fc>::io( H5Dclose(dset_id); /* close file */ H5Fclose(file_id); + finish_mpi_profiling_zone(turtle_mpi_pcontrol::HDF5); return EXIT_SUCCESS; } @@ -565,6 +572,7 @@ int field<rnumber, be, fc>::write_0slice( const std::string field_name, const int iteration) { + start_mpi_profiling_zone(turtle_mpi_pcontrol::HDF5); // this should in principle work for any fc TIMEZONE("field::write_0slice"); assert(this->real_space_representation); @@ -622,6 +630,7 @@ int field<rnumber, be, fc>::write_0slice( H5Sclose(mspace); H5Sclose(wspace); } + finish_mpi_profiling_zone(turtle_mpi_pcontrol::HDF5); return EXIT_SUCCESS; } diff --git a/cpp/field.hpp b/cpp/field.hpp index d288795e112adb30890220e492f3ceda7e3f6071..15b298be48556b7c2ff2747ad5771ffddd99d7de 100644 --- a/cpp/field.hpp +++ b/cpp/field.hpp @@ -251,6 +251,7 @@ class field template <class func_type> void RLOOP_simd(func_type expression) { + start_mpi_profiling_zone(turtle_mpi_pcontrol::FIELD); switch(be) { case FFTW: @@ -274,10 +275,12 @@ class field } break; } + finish_mpi_profiling_zone(turtle_mpi_pcontrol::FIELD); } template <class func_type> void RLOOP(func_type expression) { + start_mpi_profiling_zone(turtle_mpi_pcontrol::FIELD); switch(be) { case FFTW: @@ -301,6 +304,7 @@ class field } break; } + finish_mpi_profiling_zone(turtle_mpi_pcontrol::FIELD); } ptrdiff_t get_cindex( ptrdiff_t xindex, diff --git a/cpp/full_code/NSVEparticles.cpp b/cpp/full_code/NSVEparticles.cpp index ecae213e3f43f90fbba91cbffe0e3e97bdb02f98..2572ab9ba18b310629a0b747492b20013b3d66b6 100644 --- a/cpp/full_code/NSVEparticles.cpp +++ b/cpp/full_code/NSVEparticles.cpp @@ -180,6 +180,7 @@ int NSVEparticles<rnumber>::do_stats() this->NSVE<rnumber>::do_stats(); + start_mpi_profiling_zone(turtle_mpi_pcontrol::PARTICLES); /// either one of two conditions suffices to compute statistics: /// 1) current iteration is a multiple of niter_part /// 2) we are within niter_part_fine_duration/2 of a multiple of niter_part_fine_period @@ -208,6 +209,7 @@ int NSVEparticles<rnumber>::do_stats() /// sample velocity std::fill_n(pdata.get(), 3*this->ps->getLocalNbParticles(), 0); + finish_mpi_profiling_zone(turtle_mpi_pcontrol::PARTICLES); if (!(this->iteration % this->niter_stat == 0)) { // we need to compute velocity field manually, because it didn't happen in NSVE::do_stats() @@ -215,6 +217,7 @@ int NSVEparticles<rnumber>::do_stats() *this->tmp_vec_field = this->fs->cvelocity->get_cdata(); this->tmp_vec_field->ift(); } + start_mpi_profiling_zone(turtle_mpi_pcontrol::PARTICLES); this->ps->sample_compute_field(*this->tmp_vec_field, pdata.get()); this->particles_sample_writer_mpi->template save_dataset<3>( "tracers0", @@ -224,10 +227,12 @@ int NSVEparticles<rnumber>::do_stats() this->ps->getParticlesIndexes(), this->ps->getLocalNbParticles(), this->ps->get_step_idx()-1); + finish_mpi_profiling_zone(turtle_mpi_pcontrol::PARTICLES); /// compute acceleration and sample it this->fs->compute_Lagrangian_acceleration(this->tmp_vec_field, this->pressure); this->tmp_vec_field->ift(); + start_mpi_profiling_zone(turtle_mpi_pcontrol::PARTICLES); std::fill_n(pdata.get(), 3*this->ps->getLocalNbParticles(), 0); this->ps->sample_compute_field(*this->tmp_vec_field, pdata.get()); this->particles_sample_writer_mpi->template save_dataset<3>( @@ -241,6 +246,7 @@ int NSVEparticles<rnumber>::do_stats() // deallocate temporary data array delete[] pdata.release(); + finish_mpi_profiling_zone(turtle_mpi_pcontrol::PARTICLES); return EXIT_SUCCESS; } diff --git a/cpp/kspace.hpp b/cpp/kspace.hpp index c5e36e19fe599617437e54c1ff7260b6595f303d..238bdc9e5375316ec14661fd26bc938b79b94e34 100644 --- a/cpp/kspace.hpp +++ b/cpp/kspace.hpp @@ -155,6 +155,7 @@ class kspace template <class func_type> void CLOOP(func_type expression) { + start_mpi_profiling_zone(turtle_mpi_pcontrol::FIELD); #pragma omp parallel { const hsize_t start = OmpUtils::ForIntervalStart(this->layout->subsizes[1]); @@ -172,10 +173,12 @@ class kspace } } } + finish_mpi_profiling_zone(turtle_mpi_pcontrol::FIELD); } template <class func_type> void CLOOP_simd(func_type expression) { + start_mpi_profiling_zone(turtle_mpi_pcontrol::FIELD); #pragma omp parallel { const hsize_t start = OmpUtils::ForIntervalStart(this->layout->subsizes[1]); @@ -194,10 +197,12 @@ class kspace } } } + finish_mpi_profiling_zone(turtle_mpi_pcontrol::FIELD); } template <class func_type> void CLOOP_K2(func_type expression) { + start_mpi_profiling_zone(turtle_mpi_pcontrol::FIELD); #pragma omp parallel { const hsize_t start = OmpUtils::ForIntervalStart(this->layout->subsizes[1]); @@ -221,10 +226,12 @@ class kspace } } } + finish_mpi_profiling_zone(turtle_mpi_pcontrol::FIELD); } template <class func_type> void CLOOP_K2_simd(func_type expression) { + start_mpi_profiling_zone(turtle_mpi_pcontrol::FIELD); #pragma omp parallel { const hsize_t start = OmpUtils::ForIntervalStart(this->layout->subsizes[1]); @@ -249,10 +256,12 @@ class kspace } } } + finish_mpi_profiling_zone(turtle_mpi_pcontrol::FIELD); } template <class func_type> void CLOOP_K2_NXMODES(func_type expression) { + start_mpi_profiling_zone(turtle_mpi_pcontrol::FIELD); #pragma omp parallel { const hsize_t start = OmpUtils::ForIntervalStart(this->layout->subsizes[1]); @@ -273,6 +282,7 @@ class kspace } } } + finish_mpi_profiling_zone(turtle_mpi_pcontrol::FIELD); } template <typename rnumber> void project_divfree( diff --git a/cpp/particles/particles_system.hpp b/cpp/particles/particles_system.hpp index fa6c0d952914ca5a3bbd145d7b74e3f66f9be277..9487f870db874731103e9f8cd5609fa334c9d89b 100644 --- a/cpp/particles/particles_system.hpp +++ b/cpp/particles/particles_system.hpp @@ -424,6 +424,7 @@ public: } void completeLoop(const real_number dt) final { + start_mpi_profiling_zone(turtle_mpi_pcontrol::PARTICLES); TIMEZONE("particles_system::completeLoop"); compute(); compute_particles_inner(); @@ -433,9 +434,11 @@ public: redistribute(); inc_step_idx(); shift_rhs_vectors(); + finish_mpi_profiling_zone(turtle_mpi_pcontrol::PARTICLES); } void complete2ndOrderLoop(const real_number dt) final { + start_mpi_profiling_zone(turtle_mpi_pcontrol::PARTICLES); assert((size_particle_positions == 6) || (size_particle_positions == 7)); assert(size_particle_rhs == size_particle_positions); std::unique_ptr<real_number[]> sampled_velocity(new real_number[getLocalNbParticles()*3]()); @@ -448,11 +451,13 @@ public: redistribute(); inc_step_idx(); shift_rhs_vectors(); + finish_mpi_profiling_zone(turtle_mpi_pcontrol::PARTICLES); } void completeLoopWithVorticity( const real_number dt, const real_number particle_extra_field[]) final { + start_mpi_profiling_zone(turtle_mpi_pcontrol::PARTICLES); TIMEZONE("particles_system::completeLoopWithVorticity"); compute(); compute_sphere_particles_inner(particle_extra_field); @@ -467,11 +472,13 @@ public: redistribute(); inc_step_idx(); shift_rhs_vectors(); + finish_mpi_profiling_zone(turtle_mpi_pcontrol::PARTICLES); } void completeLoopWithVelocityGradient( const real_number dt, const real_number particle_extra_field[]) final { + start_mpi_profiling_zone(turtle_mpi_pcontrol::PARTICLES); TIMEZONE("particles_system::completeLoopWithVelocityGradient"); compute(); compute_ellipsoid_particles_inner(particle_extra_field); @@ -486,6 +493,7 @@ public: redistribute(); inc_step_idx(); shift_rhs_vectors(); + finish_mpi_profiling_zone(turtle_mpi_pcontrol::PARTICLES); } const real_number* getParticlesState() const final {