From 9a7ad49b71ba80d7386adb8240babc408ac464c4 Mon Sep 17 00:00:00 2001
From: Cristian C Lalescu <Cristian.Lalescu@mpcdf.mpg.de>
Date: Fri, 7 Aug 2020 18:47:01 +0200
Subject: [PATCH] adds control option for pc, and comment

control option: allow fine grained control over parameters given to
mpirun/srun on the command line in interactive runs.

comment: commit adds comment in `kspace.cpp` on division by zero code.
clarification about why it happens, list of solutions.
---
 TurTLE/_code.py |  2 ++
 cpp/kspace.cpp  | 23 ++++++++++++++++++++---
 2 files changed, 22 insertions(+), 3 deletions(-)

diff --git a/TurTLE/_code.py b/TurTLE/_code.py
index e9bac67e..00339f95 100644
--- a/TurTLE/_code.py
+++ b/TurTLE/_code.py
@@ -407,6 +407,8 @@ class _code(_base):
                 command_atoms = [self.host_info['executable_launcher']]
                 if self.host_info['executable_launcher'] == 'srun':
                     command_atoms += ['-p', 'interactive']
+                if 'executable_parameters' in self.host_info.keys():
+                    command_atoms += self.host_info['executable_parameters']
             else:
                 command_atoms = ['mpirun']
             command_atoms += ['-n',
diff --git a/cpp/kspace.cpp b/cpp/kspace.cpp
index cdb32d5d..64daa3c4 100644
--- a/cpp/kspace.cpp
+++ b/cpp/kspace.cpp
@@ -147,11 +147,28 @@ kspace<be, dt>::kspace(
             &this->kshell.front(),
             this->nshells,
             MPI_DOUBLE, MPI_SUM, this->layout->comm);
+
+    // 2020-08-07
+    // following loop generated some problems with intel compiler
+    // at highest optimization level, the intel compiler performs aggressive
+    // vectorization of loops, including this one.
+    // it also turns on speculative execution, i.e. it computes both branches
+    // of the if clause in parallel, and then it picks out the useful result.
+    // the problem is that one of the branches is a division by 0, hence a
+    // floating point exception is raised.
+    // there are several possible solutions:
+    // * instead of dividing by `this->nshell[n]`, create a double `nnshell`
+    //   that takes the maximum value between 1.0 and `this->nshell[n]`, and
+    //   then divide by `nnshell`
+    // * use the '-fp-speculation=safe' intel compiler option
+    // * tell the compiler that we do not want to vectorize this particular
+    //   loop by using `#pragma novector`.
+    // I chose the last option because there's no reason to optimize this
+    // loop. Furthermore, it seems like the solution that's most readable,
+    // and with the least amount of side effects.
+    # pragma novector
     for (int n=0; n<this->nshells; n++){
-        // using the `volatile` line instead of the if gets the code to work
-        //volatile double nnshell = std::max(double(this->nshell[n]), 1.0);
         if (this->nshell[n] > 0)
-	        //this->kshell[n] /= nnshell;
 	        this->kshell[n] /= this->nshell[n];
     }
 }
-- 
GitLab