uses even distribution of threads within node

3ca6064f · Cristian Lalescu · 2cd0182d · 3ca6064f · 3ca6064f
Commit 3ca6064f authored 4 years ago by Cristian Lalescu
--- a/TurTLE/_code.py
+++ b/TurTLE/_code.py
@@ -34,6 +34,7 @@ import math
 import warnings
 import TurTLE
+import TurTLE.tools
 from TurTLE.host_info import host_info
 from ._base import _base
@@ -752,6 +753,14 @@ class _code(_base):
        if nb_threads_per_process > 1:
            script_file.write('export OMP_NUM_THREADS=${SLURM_CPUS_PER_TASK}\n')
+        # explicit binding options that can be used further on if needed
+        core_masks = TurTLE.tools.distribute_cores_evenly(
+                nprocesses = nb_processes_per_node,
+                nthreads_per_process = nb_threads_per_process,
+                total_number_of_cores = nb_cpus_per_node)
+        script_file.write('export SLURM_CPU_BIND_OPTION="--cpu-bind=verbose,mask_cpu:' +
+                          ','.join(['0x{0:x}'.format(mm) for mm in core_masks]) + '"\n')
        script_file.write('echo "Start time is `date`"\n')
        script_file.write('cd ' + self.work_dir + '\n')
        script_file.write('export HTMLOUTPUT={}.html\n'.format(command_atoms[-1]))
@@ -768,16 +777,16 @@ class _code(_base):
        if 'executable_launcher' in self.host_info.keys():
            executable_launcher = self.host_info['executable_launcher']
        else:
-            executable_launcher = 'srun'
+            executable_launcher = 'srun ${SLURM_CPU_BIND_OPTION}'
            if host_info['use_vtune']:
                if 'vtune_executable' not in host_info.keys():
                    host_info['vtune_executable'] = 'vtune'
-                executable_launcher = 'srun ' + host_info['vtune_executable'] + ' -collect hpc-performance -trace-mpi -quiet -result-dir=vtune_${SLURM_JOB_NAME}'
+                executable_launcher += ' ' + host_info['vtune_executable'] + ' -collect hpc-performance -trace-mpi -quiet -result-dir=vtune_${SLURM_JOB_NAME}'
            if host_info['use_aps']:
                if 'aps_executable' not in host_info.keys():
                    host_info['aps_executable'] = 'aps'
-                executable_launcher = 'srun ' + host_info['aps_executable'] + ' --result-dir=aps_${SLURM_JOB_NAME} --collection-mode=all'
+                executable_launcher += ' ' + host_info['aps_executable'] + ' --result-dir=aps_${SLURM_JOB_NAME} --collection-mode=all'
        script_file.write(executable_launcher + ' {0}\n'.format(' '.join(command_atoms)))
        script_file.write('echo "End time is `date`"\n')
        script_file.write('exit 0\n')

--- a/TurTLE/tools.py
+++ b/TurTLE/tools.py
@@ -389,3 +389,30 @@ def particle_finite_diff_test(
            plt.close(fig)
    return pid
+def distribute_cores_evenly(
+        nprocesses = 4,
+        nthreads_per_process = 3,
+        total_number_of_cores = 16):
+    assert(nprocesses*nthreads_per_process <= total_number_of_cores)
+    # first, determine how many total cores we can allocate per process
+    max_cores_per_process = total_number_of_cores // nprocesses
+    # then spread useful cores evenly throughout the total cores per process
+    # start with no cores allocated
+    single_process_mask = np.zeros(max_cores_per_process, np.bool)
+    # allocate cores evenly
+    skip = max_cores_per_process // nthreads_per_process
+    for t in range(nthreads_per_process):
+        single_process_mask[t*skip] = 1
+    single_process_mask = sum(single_process_mask[i]*(2**i) for i in range(max_cores_per_process))
+    # now create full node mask:
+    all_masks = []
+    for p in range(nprocesses):
+        all_masks.append(single_process_mask*(2**(max_cores_per_process*p)))
+    return all_masks