Skip to content
Snippets Groups Projects
Commit 3ca6064f authored by Cristian Lalescu's avatar Cristian Lalescu
Browse files

uses even distribution of threads within node

parent 2cd0182d
No related branches found
No related tags found
No related merge requests found
Pipeline #91484 passed
...@@ -34,6 +34,7 @@ import math ...@@ -34,6 +34,7 @@ import math
import warnings import warnings
import TurTLE import TurTLE
import TurTLE.tools
from TurTLE.host_info import host_info from TurTLE.host_info import host_info
from ._base import _base from ._base import _base
...@@ -752,6 +753,14 @@ class _code(_base): ...@@ -752,6 +753,14 @@ class _code(_base):
if nb_threads_per_process > 1: if nb_threads_per_process > 1:
script_file.write('export OMP_NUM_THREADS=${SLURM_CPUS_PER_TASK}\n') script_file.write('export OMP_NUM_THREADS=${SLURM_CPUS_PER_TASK}\n')
# explicit binding options that can be used further on if needed
core_masks = TurTLE.tools.distribute_cores_evenly(
nprocesses = nb_processes_per_node,
nthreads_per_process = nb_threads_per_process,
total_number_of_cores = nb_cpus_per_node)
script_file.write('export SLURM_CPU_BIND_OPTION="--cpu-bind=verbose,mask_cpu:' +
','.join(['0x{0:x}'.format(mm) for mm in core_masks]) + '"\n')
script_file.write('echo "Start time is `date`"\n') script_file.write('echo "Start time is `date`"\n')
script_file.write('cd ' + self.work_dir + '\n') script_file.write('cd ' + self.work_dir + '\n')
script_file.write('export HTMLOUTPUT={}.html\n'.format(command_atoms[-1])) script_file.write('export HTMLOUTPUT={}.html\n'.format(command_atoms[-1]))
...@@ -768,16 +777,16 @@ class _code(_base): ...@@ -768,16 +777,16 @@ class _code(_base):
if 'executable_launcher' in self.host_info.keys(): if 'executable_launcher' in self.host_info.keys():
executable_launcher = self.host_info['executable_launcher'] executable_launcher = self.host_info['executable_launcher']
else: else:
executable_launcher = 'srun' executable_launcher = 'srun ${SLURM_CPU_BIND_OPTION}'
if host_info['use_vtune']: if host_info['use_vtune']:
if 'vtune_executable' not in host_info.keys(): if 'vtune_executable' not in host_info.keys():
host_info['vtune_executable'] = 'vtune' host_info['vtune_executable'] = 'vtune'
executable_launcher = 'srun ' + host_info['vtune_executable'] + ' -collect hpc-performance -trace-mpi -quiet -result-dir=vtune_${SLURM_JOB_NAME}' executable_launcher += ' ' + host_info['vtune_executable'] + ' -collect hpc-performance -trace-mpi -quiet -result-dir=vtune_${SLURM_JOB_NAME}'
if host_info['use_aps']: if host_info['use_aps']:
if 'aps_executable' not in host_info.keys(): if 'aps_executable' not in host_info.keys():
host_info['aps_executable'] = 'aps' host_info['aps_executable'] = 'aps'
executable_launcher = 'srun ' + host_info['aps_executable'] + ' --result-dir=aps_${SLURM_JOB_NAME} --collection-mode=all' executable_launcher += ' ' + host_info['aps_executable'] + ' --result-dir=aps_${SLURM_JOB_NAME} --collection-mode=all'
script_file.write(executable_launcher + ' {0}\n'.format(' '.join(command_atoms))) script_file.write(executable_launcher + ' {0}\n'.format(' '.join(command_atoms)))
script_file.write('echo "End time is `date`"\n') script_file.write('echo "End time is `date`"\n')
script_file.write('exit 0\n') script_file.write('exit 0\n')
......
...@@ -389,3 +389,30 @@ def particle_finite_diff_test( ...@@ -389,3 +389,30 @@ def particle_finite_diff_test(
plt.close(fig) plt.close(fig)
return pid return pid
def distribute_cores_evenly(
nprocesses = 4,
nthreads_per_process = 3,
total_number_of_cores = 16):
assert(nprocesses*nthreads_per_process <= total_number_of_cores)
# first, determine how many total cores we can allocate per process
max_cores_per_process = total_number_of_cores // nprocesses
# then spread useful cores evenly throughout the total cores per process
# start with no cores allocated
single_process_mask = np.zeros(max_cores_per_process, np.bool)
# allocate cores evenly
skip = max_cores_per_process // nthreads_per_process
for t in range(nthreads_per_process):
single_process_mask[t*skip] = 1
single_process_mask = sum(single_process_mask[i]*(2**i) for i in range(max_cores_per_process))
# now create full node mask:
all_masks = []
for p in range(nprocesses):
all_masks.append(single_process_mask*(2**(max_cores_per_process*p)))
return all_masks
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment