diff --git a/bfps/_code.py b/bfps/_code.py
index 73af76610ef224422508bf8b7b820b4720363734..d8b32d5967aa256f5005f1e291ab741cb57b1236 100644
--- a/bfps/_code.py
+++ b/bfps/_code.py
@@ -62,7 +62,7 @@ class _code(_base):
                 #include <string>
                 #include <cstring>
                 #include <fftw3-mpi.h>
-				#include <omp.h>
+                #include <omp.h>
                 #include <fenv.h>
                 #include <cstdlib>
                 //endcpp
@@ -277,7 +277,8 @@ class _code(_base):
                     hours         = hours,
                     minutes       = minutes,
                     out_file      = out_file + '_' + suffix,
-                    err_file      = err_file + '_' + suffix)
+                    err_file      = err_file + '_' + suffix,
+                    nb_threads_per_process = nb_threads_per_process)
                 os.chdir(self.work_dir)
                 qsub_atoms = ['qsub']
                 if len(job_name_list) >= 1:
@@ -299,7 +300,7 @@ class _code(_base):
                     out_file      = out_file + '_' + suffix,
                     err_file      = err_file + '_' + suffix,
                     nb_mpi_processes = nb_processes,
-			        nb_threads_per_process = nb_threads_per_process)
+                    nb_threads_per_process = nb_threads_per_process)
                 os.chdir(self.work_dir)
                 qsub_atoms = ['sbatch']
 
@@ -326,7 +327,7 @@ class _code(_base):
                     out_file      = out_file + '_' + suffix,
                     err_file      = err_file + '_' + suffix,
                     nb_mpi_processes = nb_processes,
-			        nb_threads_per_process = nb_threads_per_process)
+                    nb_threads_per_process = nb_threads_per_process)
             else:
                 self.write_IBMLoadLeveler_file_many_job(
                     file_name     = os.path.join(self.work_dir, job_script_name),
@@ -338,7 +339,7 @@ class _code(_base):
                     err_file      = err_file + '_' + suffix,
                     njobs = njobs,
                     nb_mpi_processes = nb_processes,
-			        nb_threads_per_process = nb_threads_per_process)
+                    nb_threads_per_process = nb_threads_per_process)
             submit_atoms = ['llsubmit']
 
             if not no_submit:
@@ -367,8 +368,8 @@ class _code(_base):
             minutes = None,
             out_file = None,
             err_file = None,
-			nb_mpi_processes = None,
-			nb_threads_per_process = None):
+            nb_mpi_processes = None,
+            nb_threads_per_process = None):
 
         script_file = open(file_name, 'w')
         script_file.write('# @ shell=/bin/bash\n')
@@ -465,8 +466,8 @@ class _code(_base):
             out_file = None,
             err_file = None,
             njobs = 2,
-			nb_mpi_processes = None,
-			nb_threads_per_process = None):
+            nb_mpi_processes = None,
+            nb_threads_per_process = None):
         assert(type(self.host_info['environment']) != type(None))
         script_file = open(file_name, 'w')
         script_file.write('# @ shell=/bin/bash\n')
@@ -563,7 +564,8 @@ class _code(_base):
             hours = None,
             minutes = None,
             out_file = None,
-            err_file = None):
+            err_file = None,
+            nb_threads_per_process = 1):
         script_file = open(file_name, 'w')
         script_file.write('#!/bin/bash\n')
         # export all environment variables
@@ -586,6 +588,8 @@ class _code(_base):
         script_file.write('echo "got $NSLOTS slots."\n')
         script_file.write('echo "Start time is `date`"\n')
         script_file.write('mpiexec -machinefile $TMPDIR/machines ' +
+                          '-genv OMP_NUM_THREADS={0} '.format(nb_threads_per_process)
+                          '-genv OMP_PLACES=cores '
                           '-genv LD_LIBRARY_PATH ' +
                           '"' +
                           ':'.join([bfps.lib_dir] + bfps.install_info['library_dirs']) +
@@ -604,8 +608,8 @@ class _code(_base):
             minutes = None,
             out_file = None,
             err_file = None,
-			nb_mpi_processes = None,
-			nb_threads_per_process = None):
+            nb_mpi_processes = None,
+            nb_threads_per_process = None):
         script_file = open(file_name, 'w')
         script_file.write('#!/bin/bash -l\n')
         # job name
diff --git a/bfps/cpp/full_code/symmetrize_test.cpp b/bfps/cpp/full_code/symmetrize_test.cpp
index 13d48d6040aef829d545d364e0565230fc7cedeb..3b4cd5a5b7304e233735cab87f603b5e164ef0eb 100644
--- a/bfps/cpp/full_code/symmetrize_test.cpp
+++ b/bfps/cpp/full_code/symmetrize_test.cpp
@@ -36,14 +36,18 @@ template <typename rnumber>
 int symmetrize_test<rnumber>::do_work(void)
 {
     // allocate
+    DEBUG_MSG("about to allocate field0\n");
     field<rnumber, FFTW, THREE> *test_field0 = new field<rnumber, FFTW, THREE>(
             this->nx, this->ny, this->nz,
             this->comm,
             DEFAULT_FFTW_FLAG);
+    DEBUG_MSG("finished allocating field0\n");
+    DEBUG_MSG("about to allocate field1\n");
     field<rnumber, FFTW, THREE> *test_field1 = new field<rnumber, FFTW, THREE>(
             this->nx, this->ny, this->nz,
             this->comm,
             DEFAULT_FFTW_FLAG);
+    DEBUG_MSG("finished allocating field1\n");
     std::default_random_engine rgen;
     std::normal_distribution<rnumber> rdist;
     rgen.seed(1);