From 5da5a947a99e75b6f2a7f1704cc3108a0d03a61d Mon Sep 17 00:00:00 2001
From: Berenger Bramas <bbramas@mpcdf.mpg.de>
Date: Fri, 2 Dec 2016 12:03:51 +0100
Subject: [PATCH] update script generation and time per loops

---
 bfps/_code.py            | 13 +++++++++++--
 bfps/_fluid_base.py      | 13 +++++++++++++
 bfps/cpp/field.cpp       |  2 ++
 bfps/cpp/scope_timer.hpp |  9 ++++++++-
 4 files changed, 34 insertions(+), 3 deletions(-)

diff --git a/bfps/_code.py b/bfps/_code.py
index a56acb40..36cb602a 100644
--- a/bfps/_code.py
+++ b/bfps/_code.py
@@ -321,6 +321,8 @@ class _code(_base):
         script_file.write('# @ output = ' + os.path.join(self.work_dir, out_file) + '\n')
         script_file.write('# @ job_type = parallel\n')
         script_file.write('# @ node_usage = not_shared\n')
+        script_file.write('# @ notification = complete\n')
+        script_file.write('# @ notify_user = $(user)@rzg.mpg.de\n')
 
         nb_cpus_per_node = 20
 
@@ -328,6 +330,7 @@ class _code(_base):
             nb_process_per_node = int(os.environ['NB_PROC_PER_NODE'])
         except :
            nb_process_per_node=nb_cpus_per_node
+        print('nb_cpu = {} '.format(nprocesses))
         print('nb_process_per_node = {} (NB_PROC_PER_NODE)'.format(nb_process_per_node))
         
         nb_cpus_per_task=int(nb_cpus_per_node/nb_process_per_node)
@@ -336,9 +339,9 @@ class _code(_base):
             raise Exception('nb cpus {} should be devided per nb proce per node {}(NB_PROC_PER_NODE)'.format(nb_cpus_per_node, nb_process_per_node))
 
         nb_tasks_per_node = int(nb_cpus_per_node/nb_cpus_per_task)
-        number_of_nodes = int((nprocesses+nb_process_per_node-1)/nb_process_per_node)
+        number_of_nodes = int((nprocesses+nb_cpus_per_node-1)/nb_cpus_per_node)
 
-        first_node_tasks = nprocesses - (number_of_nodes-1)*nb_process_per_node
+        first_node_tasks = int((nprocesses - (number_of_nodes-1)*nb_cpus_per_node)/nb_cpus_per_task)
 
         script_file.write('# @ resources = ConsumableCpus({})\n'.format(nb_cpus_per_task))
         script_file.write('# @ network.MPI = sn_all,not_shared,us\n')
@@ -349,10 +352,15 @@ class _code(_base):
         if (first_node_tasks > 0):
             script_file.write('# @ first_node_tasks = {0}\n'.format(first_node_tasks))
         script_file.write('# @ queue\n')
+
+
+        script_file.write('export OMP_NUM_THREADS={}\n'.format(nb_cpus_per_task))
+
         script_file.write('LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:' +
                           ':'.join([bfps.lib_dir] + bfps.install_info['library_dirs']) +
                           '\n')
         script_file.write('echo "Start time is `date`"\n')
+        script_file.write('export HTMLOUTPUT={}.html\n'.format(command_atoms[-1]))
         script_file.write('cd ' + self.work_dir + '\n')
 #        script_file.write('cp -s ../*.h5 ./\n')
         script_file.write('poe ' +
@@ -425,6 +433,7 @@ class _code(_base):
                           '\n')
         script_file.write('echo "This is step $LOADL_STEP_ID out of {0}"\n'.format(njobs))
         script_file.write('echo "Start time is `date`"\n')
+        script_file.write('export HTMLOUTPUT={}.html\n'.format(command_atoms[-1]))
 #        script_file.write('cp -s ../*.h5 ./\n')
         script_file.write('cd ' + self.work_dir + '\n')
         script_file.write('poe ' +
diff --git a/bfps/_fluid_base.py b/bfps/_fluid_base.py
index b5fbf077..79a823a9 100644
--- a/bfps/_fluid_base.py
+++ b/bfps/_fluid_base.py
@@ -266,6 +266,13 @@ class _fluid_particle_base(_code):
         if not postprocess_mode:
             self.main       += 'for (int max_iter = iteration+niter_todo; iteration < max_iter; iteration++)\n'
             self.main       += '{\n'
+
+            self.main       += """
+                                #ifdef USE_TIMINGOUTPUT
+                                const std::string loopLabel = "code::main_start::loop-" + std::to_string(iteration);                                
+                                TIMEZONE(loopLabel.c_str());
+                                #endif
+                                """
             self.main       += 'if (iteration % niter_stat == 0) do_stats();\n'
             if self.particle_species > 0:
                 self.main       += 'if (iteration % niter_part == 0) do_particle_stats();\n'
@@ -279,6 +286,12 @@ class _fluid_particle_base(_code):
         else:
             self.main       += 'for (int frame_index = iter0; frame_index <= iter1; frame_index += niter_out)\n'
             self.main       += '{\n'
+            self.main       += """
+                                #ifdef USE_TIMINGOUTPUT
+                                const std::string loopLabel = "code::main_start::loop-" + std::to_string(frame_index);                                
+                                TIMEZONE(loopLabel.c_str());
+                                #endif
+                                """
             if self.particle_species > 0:
                 self.main   += self.particle_loop
             self.main       += self.fluid_loop
diff --git a/bfps/cpp/field.cpp b/bfps/cpp/field.cpp
index 0d065ced..62941fc4 100644
--- a/bfps/cpp/field.cpp
+++ b/bfps/cpp/field.cpp
@@ -540,6 +540,8 @@ void field<rnumber, be, fc>::compute_rspace_stats(
                 }
             }
         });
+
+        TIMEZONE("FIELD_RLOOP::Merge");
         local_moments_threaded.mergeParallel();
         local_hist_threaded.mergeParallel();
     }
diff --git a/bfps/cpp/scope_timer.hpp b/bfps/cpp/scope_timer.hpp
index 76fc69f1..94cb7c2b 100644
--- a/bfps/cpp/scope_timer.hpp
+++ b/bfps/cpp/scope_timer.hpp
@@ -38,6 +38,7 @@
 #include <cstring>
 #include <stdexcept>
 #include <fstream>
+#include <iomanip>
 #include "base.hpp"
 #include "bfps_timer.hpp"
 
@@ -503,7 +504,13 @@ public:
                                  << eventToShow.second->getOccurrence();
                 }
                 myResults << "\">" << eventToShow.second->getName();
-                myResults << " (" << 100*eventToShow.second->getDuration()/totalDuration << "% -- " ;
+                const double percentage =  100*eventToShow.second->getDuration()/totalDuration;
+                if( percentage < 0.001 ){
+                    myResults << " (< 0.001% -- " ;
+                }
+                else{
+                    myResults << " (" << std::fixed << std::setprecision(3) << percentage << "% -- " ;
+                }
                 myResults << eventToShow.second->getDuration() <<"s)</span></label>\n";
                 myResults << "<ul>\n";
                 events.push({-1, std::shared_ptr<CoreEvent>()});
-- 
GitLab