From 5da5a947a99e75b6f2a7f1704cc3108a0d03a61d Mon Sep 17 00:00:00 2001 From: Berenger Bramas <bbramas@mpcdf.mpg.de> Date: Fri, 2 Dec 2016 12:03:51 +0100 Subject: [PATCH] update script generation and time per loops --- bfps/_code.py | 13 +++++++++++-- bfps/_fluid_base.py | 13 +++++++++++++ bfps/cpp/field.cpp | 2 ++ bfps/cpp/scope_timer.hpp | 9 ++++++++- 4 files changed, 34 insertions(+), 3 deletions(-) diff --git a/bfps/_code.py b/bfps/_code.py index a56acb40..36cb602a 100644 --- a/bfps/_code.py +++ b/bfps/_code.py @@ -321,6 +321,8 @@ class _code(_base): script_file.write('# @ output = ' + os.path.join(self.work_dir, out_file) + '\n') script_file.write('# @ job_type = parallel\n') script_file.write('# @ node_usage = not_shared\n') + script_file.write('# @ notification = complete\n') + script_file.write('# @ notify_user = $(user)@rzg.mpg.de\n') nb_cpus_per_node = 20 @@ -328,6 +330,7 @@ class _code(_base): nb_process_per_node = int(os.environ['NB_PROC_PER_NODE']) except : nb_process_per_node=nb_cpus_per_node + print('nb_cpu = {} '.format(nprocesses)) print('nb_process_per_node = {} (NB_PROC_PER_NODE)'.format(nb_process_per_node)) nb_cpus_per_task=int(nb_cpus_per_node/nb_process_per_node) @@ -336,9 +339,9 @@ class _code(_base): raise Exception('nb cpus {} should be devided per nb proce per node {}(NB_PROC_PER_NODE)'.format(nb_cpus_per_node, nb_process_per_node)) nb_tasks_per_node = int(nb_cpus_per_node/nb_cpus_per_task) - number_of_nodes = int((nprocesses+nb_process_per_node-1)/nb_process_per_node) + number_of_nodes = int((nprocesses+nb_cpus_per_node-1)/nb_cpus_per_node) - first_node_tasks = nprocesses - (number_of_nodes-1)*nb_process_per_node + first_node_tasks = int((nprocesses - (number_of_nodes-1)*nb_cpus_per_node)/nb_cpus_per_task) script_file.write('# @ resources = ConsumableCpus({})\n'.format(nb_cpus_per_task)) script_file.write('# @ network.MPI = sn_all,not_shared,us\n') @@ -349,10 +352,15 @@ class _code(_base): if (first_node_tasks > 0): script_file.write('# @ first_node_tasks = {0}\n'.format(first_node_tasks)) script_file.write('# @ queue\n') + + + script_file.write('export OMP_NUM_THREADS={}\n'.format(nb_cpus_per_task)) + script_file.write('LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:' + ':'.join([bfps.lib_dir] + bfps.install_info['library_dirs']) + '\n') script_file.write('echo "Start time is `date`"\n') + script_file.write('export HTMLOUTPUT={}.html\n'.format(command_atoms[-1])) script_file.write('cd ' + self.work_dir + '\n') # script_file.write('cp -s ../*.h5 ./\n') script_file.write('poe ' + @@ -425,6 +433,7 @@ class _code(_base): '\n') script_file.write('echo "This is step $LOADL_STEP_ID out of {0}"\n'.format(njobs)) script_file.write('echo "Start time is `date`"\n') + script_file.write('export HTMLOUTPUT={}.html\n'.format(command_atoms[-1])) # script_file.write('cp -s ../*.h5 ./\n') script_file.write('cd ' + self.work_dir + '\n') script_file.write('poe ' + diff --git a/bfps/_fluid_base.py b/bfps/_fluid_base.py index b5fbf077..79a823a9 100644 --- a/bfps/_fluid_base.py +++ b/bfps/_fluid_base.py @@ -266,6 +266,13 @@ class _fluid_particle_base(_code): if not postprocess_mode: self.main += 'for (int max_iter = iteration+niter_todo; iteration < max_iter; iteration++)\n' self.main += '{\n' + + self.main += """ + #ifdef USE_TIMINGOUTPUT + const std::string loopLabel = "code::main_start::loop-" + std::to_string(iteration); + TIMEZONE(loopLabel.c_str()); + #endif + """ self.main += 'if (iteration % niter_stat == 0) do_stats();\n' if self.particle_species > 0: self.main += 'if (iteration % niter_part == 0) do_particle_stats();\n' @@ -279,6 +286,12 @@ class _fluid_particle_base(_code): else: self.main += 'for (int frame_index = iter0; frame_index <= iter1; frame_index += niter_out)\n' self.main += '{\n' + self.main += """ + #ifdef USE_TIMINGOUTPUT + const std::string loopLabel = "code::main_start::loop-" + std::to_string(frame_index); + TIMEZONE(loopLabel.c_str()); + #endif + """ if self.particle_species > 0: self.main += self.particle_loop self.main += self.fluid_loop diff --git a/bfps/cpp/field.cpp b/bfps/cpp/field.cpp index 0d065ced..62941fc4 100644 --- a/bfps/cpp/field.cpp +++ b/bfps/cpp/field.cpp @@ -540,6 +540,8 @@ void field<rnumber, be, fc>::compute_rspace_stats( } } }); + + TIMEZONE("FIELD_RLOOP::Merge"); local_moments_threaded.mergeParallel(); local_hist_threaded.mergeParallel(); } diff --git a/bfps/cpp/scope_timer.hpp b/bfps/cpp/scope_timer.hpp index 76fc69f1..94cb7c2b 100644 --- a/bfps/cpp/scope_timer.hpp +++ b/bfps/cpp/scope_timer.hpp @@ -38,6 +38,7 @@ #include <cstring> #include <stdexcept> #include <fstream> +#include <iomanip> #include "base.hpp" #include "bfps_timer.hpp" @@ -503,7 +504,13 @@ public: << eventToShow.second->getOccurrence(); } myResults << "\">" << eventToShow.second->getName(); - myResults << " (" << 100*eventToShow.second->getDuration()/totalDuration << "% -- " ; + const double percentage = 100*eventToShow.second->getDuration()/totalDuration; + if( percentage < 0.001 ){ + myResults << " (< 0.001% -- " ; + } + else{ + myResults << " (" << std::fixed << std::setprecision(3) << percentage << "% -- " ; + } myResults << eventToShow.second->getDuration() <<"s)</span></label>\n"; myResults << "<ul>\n"; events.push({-1, std::shared_ptr<CoreEvent>()}); -- GitLab