diff --git a/cpp/field.cpp b/cpp/field.cpp
index 04eaa008e0c37b37b382335b6069425e1ce5d731..077427ce1c00594cb94d75f5d3dde3fe9d3cebea 100644
--- a/cpp/field.cpp
+++ b/cpp/field.cpp
@@ -24,8 +24,6 @@
 
 
 
-#define NDEBUG
-
 #include <sys/stat.h>
 #include <cmath>
 #include <cstdlib>
@@ -35,7 +33,7 @@
 #include "scope_timer.hpp"
 #include "shared_array.hpp"
 
-
+#define NDEBUG
 
 template <typename rnumber,
           field_backend be,
@@ -1783,7 +1781,7 @@ int joint_rspace_3PDF(
     assert(max_f1_estimate.size() == 1);
     assert(max_f2_estimate.size() == 1);
     assert(max_f3_estimate.size() == 1);
-    
+
     int nbins;
     std::string dsetc, dsetm;
     dsetc = "histograms/" + dset_name + "_components";
@@ -1799,6 +1797,7 @@ int joint_rspace_3PDF(
                 H5P_DEFAULT);
         wspace = H5Dget_space(dset);
         ndims = H5Sget_simple_extent_dims(wspace, dims, NULL);
+        variable_used_only_in_assert(ndims);
         assert(ndims == 4);
         H5Sclose(wspace);
         H5Dclose(dset);
@@ -1809,7 +1808,7 @@ int joint_rspace_3PDF(
         MPI_Bcast(&nbins, 1, MPI_INT, 0, f1->comm);
     }
 
-    
+
     /// histogram magnitudes
     shared_array<ptrdiff_t> local_histm_threaded(
             nbins*nbins*nbins,
@@ -1822,7 +1821,7 @@ int joint_rspace_3PDF(
     bin1size.resize(1);
     bin2size.resize(1);
     bin3size.resize(1);
-    
+
     bin1size[0] = 2*max_f1_estimate[0] / nbins;
     bin2size[0] = 2*max_f2_estimate[0] / nbins;
     bin3size[0] = 2*max_f3_estimate[0] / nbins;
@@ -1851,7 +1850,6 @@ int joint_rspace_3PDF(
     }
     local_histm_threaded.mergeParallel();
     ptrdiff_t *histm = new ptrdiff_t[nbins*nbins*nbins];
-    ptrdiff_t *histc = NULL;
     {
         MPI_Allreduce(
                 (void*)local_histm_threaded.getMasterData(),
@@ -1865,7 +1863,7 @@ int joint_rspace_3PDF(
         TIMEZONE("root-work");
         hid_t dset, wspace, mspace;
         hsize_t count[5], offset[5];
-        
+
         dset = H5Dopen(group, dsetm.c_str(), H5P_DEFAULT);
         assert(dset > 0);
         offset[0] = toffset;
diff --git a/cpp/full_code/NSVE.cpp b/cpp/full_code/NSVE.cpp
index 7b1b2d9550c45f9166c37e1b8132427fed046597..af4740035c5e139c609766e96552bfac0440a1be 100644
--- a/cpp/full_code/NSVE.cpp
+++ b/cpp/full_code/NSVE.cpp
@@ -23,7 +23,7 @@
 
 
 
-#define NDEBUG
+//#define NDEBUG
 
 #include <string>
 #include <cmath>
diff --git a/cpp/full_code/NSVEcomplex_particles.cpp b/cpp/full_code/NSVEcomplex_particles.cpp
index 3bd27102d7495b39dfa92bb5b7975b3f64d6cca5..701892f93dafaaf06ecdbd0c58bf3963c5b1c8e9 100644
--- a/cpp/full_code/NSVEcomplex_particles.cpp
+++ b/cpp/full_code/NSVEcomplex_particles.cpp
@@ -24,8 +24,6 @@
 
 
 
-#define NDEBUG
-
 #include <string>
 #include <cmath>
 #include "NSVEcomplex_particles.hpp"
@@ -34,6 +32,8 @@
 #include "particles/p2p_computer.hpp"
 #include "particles/particles_inner_computer.hpp"
 
+#define NDEBUG
+
 template <typename rnumber>
 int NSVEcomplex_particles<rnumber>::initialize(void)
 {
diff --git a/cpp/full_code/NSVEparticles.cpp b/cpp/full_code/NSVEparticles.cpp
index 9b8743cdb48a5f3575931dfcc200fe1f0362778d..1952bcfccb6255bb357a8d603a74efea086db3c2 100644
--- a/cpp/full_code/NSVEparticles.cpp
+++ b/cpp/full_code/NSVEparticles.cpp
@@ -24,13 +24,13 @@
 
 
 
-#define NDEBUG
-
 #include <string>
 #include <cmath>
 #include "NSVEparticles.hpp"
 #include "scope_timer.hpp"
 
+#define NDEBUG
+
 template <typename rnumber>
 int NSVEparticles<rnumber>::initialize(void)
 {
@@ -43,6 +43,7 @@ int NSVEparticles<rnumber>::initialize(void)
             this->fs->cvelocity->rlayout->comm,
             this->fs->cvelocity->fftw_plan_rigor);
 
+    DEBUG_MSG_WAIT(MPI_COMM_WORLD, "about to call particles_system_builder\n");
     this->ps = particles_system_builder(
                 this->fs->cvelocity,              // (field object)
                 this->fs->kk,                     // (kspace object, contains dkx, dky, dkz)
@@ -55,6 +56,7 @@ int NSVEparticles<rnumber>::initialize(void)
                 tracers0_smoothness,        // parameter
                 this->comm,
                 this->fs->iteration+1);
+    DEBUG_MSG_WAIT(MPI_COMM_WORLD, "after call to particles_system_builder\n");
     this->particles_output_writer_mpi = new particles_output_hdf5<
         long long int, double, 3>(
                 MPI_COMM_WORLD,
diff --git a/cpp/full_code/code_base.cpp b/cpp/full_code/code_base.cpp
index a6487c726de44b018392128f955ccebf7e7100a1..f412a792b642e70fdf36d88d0f327501a4d6ce0a 100644
--- a/cpp/full_code/code_base.cpp
+++ b/cpp/full_code/code_base.cpp
@@ -24,11 +24,10 @@
 
 
 
-#define NDEBUG
-
 #include "code_base.hpp"
 #include "scope_timer.hpp"
 
+#define NDEBUG
 
 code_base::code_base(
         const MPI_Comm COMMUNICATOR,
diff --git a/cpp/full_code/direct_numerical_simulation.cpp b/cpp/full_code/direct_numerical_simulation.cpp
index 5329e7034e082b32cbdad7b4aae3d81665156215..955c1f2e6bcb75bd1a7538da5edd2ac2d9520ccc 100644
--- a/cpp/full_code/direct_numerical_simulation.cpp
+++ b/cpp/full_code/direct_numerical_simulation.cpp
@@ -23,8 +23,6 @@
 
 
 
-#define NDEBUG
-
 #include <cstdlib>
 #include <sys/types.h>
 #include <sys/stat.h>
@@ -32,6 +30,7 @@
 #include "scope_timer.hpp"
 #include "hdf5_tools.hpp"
 
+#define NDEBUG
 
 int direct_numerical_simulation::grow_file_datasets()
 {
diff --git a/cpp/kspace.cpp b/cpp/kspace.cpp
index 5accb969cd93735b0cfb4d5deecfbb8ccd914910..452ca305dcf6730824d27aca23b25ffa567de8ff 100644
--- a/cpp/kspace.cpp
+++ b/cpp/kspace.cpp
@@ -24,8 +24,6 @@
 
 
 
-#define NDEBUG
-
 #include <cmath>
 #include <cstdlib>
 #include <algorithm>
@@ -34,7 +32,7 @@
 #include "scope_timer.hpp"
 #include "shared_array.hpp"
 
-
+#define NDEBUG
 
 template <field_backend be,
           kspace_dealias_type dt>
diff --git a/cpp/particles/particles_distr_mpi.hpp b/cpp/particles/particles_distr_mpi.hpp
index 43d61ca407af23e3cf3c3979d678af08cd7b5ff8..57d8067059a1408afb90c53e32c83c0731030bd2 100644
--- a/cpp/particles/particles_distr_mpi.hpp
+++ b/cpp/particles/particles_distr_mpi.hpp
@@ -308,7 +308,7 @@ public:
 
                 if(descriptor.nbParticlesToSend){
                     whatNext.emplace_back(std::pair<Action,int>{NOTHING_TODO, -1});
-                    mpiRequests.emplace_back();                    
+                    mpiRequests.emplace_back();
                     assert(descriptor.nbParticlesToSend*size_particle_positions < std::numeric_limits<int>::max());
                     AssertMpi(MPI_Isend(const_cast<real_number*>(&particles_positions[(current_offset_particles_for_partition[current_partition_size-descriptor.nbPartitionsToSend])*size_particle_positions]),
                                         int(descriptor.nbParticlesToSend*size_particle_positions), particles_utils::GetMpiType(real_number()),
@@ -335,6 +335,8 @@ public:
         }
 
         const bool more_than_one_thread = (omp_get_max_threads() > 1);
+        MPI_Barrier(MPI_COMM_WORLD);
+        //DEBUG_MSG_WAIT(MPI_COMM_WORLD, "line 338 of particles_distr_mpi.hpp\n");
 
         TIMEZONE_OMP_INIT_PREPARALLEL(omp_get_max_threads())
         #pragma omp parallel default(shared)
@@ -432,7 +434,7 @@ public:
                         const int destProc = descriptor.destProc;
                         whatNext.emplace_back(std::pair<Action,int>{RELEASE_BUFFER_PARTICLES, releasedAction.second});
                         mpiRequests.emplace_back();
-                        const int tag = descriptor.isLower? TAG_LOW_UP_RESULTS : TAG_UP_LOW_RESULTS;                        
+                        const int tag = descriptor.isLower? TAG_LOW_UP_RESULTS : TAG_UP_LOW_RESULTS;
                         assert(NbParticlesToReceive*size_particle_rhs < std::numeric_limits<int>::max());
                         AssertMpi(MPI_Isend(descriptor.results.get(), int(NbParticlesToReceive*size_particle_rhs), particles_utils::GetMpiType(real_number()), destProc, tag,
                                   current_com, &mpiRequests.back()));
@@ -645,7 +647,7 @@ public:
 
             if(nbOutLower){
                 whatNext.emplace_back(std::pair<Action,int>{NOTHING_TODO, -1});
-                mpiRequests.emplace_back();                
+                mpiRequests.emplace_back();
                 assert(nbOutLower*size_particle_positions < std::numeric_limits<int>::max());
                 AssertMpi(MPI_Isend(&(*inout_positions_particles)[0], int(nbOutLower*size_particle_positions), particles_utils::GetMpiType(real_number()), (my_rank-1+nb_processes_involved)%nb_processes_involved, TAG_LOW_UP_MOVED_PARTICLES,
                           MPI_COMM_WORLD, &mpiRequests.back()));
diff --git a/cpp/particles/particles_field_computer.hpp b/cpp/particles/particles_field_computer.hpp
index a59b40d5df8d025f63126c142677576ec5be965a..b0ad62afea6e80dcca85f5c567c66b48d10a71fe 100644
--- a/cpp/particles/particles_field_computer.hpp
+++ b/cpp/particles/particles_field_computer.hpp
@@ -182,6 +182,7 @@ public:
                 }
             }
         }
+        DEBUG_MSG("exiting apply_computation\n");
     }
 
     template <int size_particle_rhs>
diff --git a/cpp/particles/particles_system.hpp b/cpp/particles/particles_system.hpp
index a05175ca52c4c4b669f29f893913b3d7fcf6c484..f82f56763ebcabc32b30dcceff0d6f0fd0d8e027 100644
--- a/cpp/particles/particles_system.hpp
+++ b/cpp/particles/particles_system.hpp
@@ -271,6 +271,7 @@ public:
 
     void redistribute() final {
         TIMEZONE("particles_system::redistribute");
+        DEBUG_MSG("step index is %d\n", step_idx);
         particles_distr.template redistribute<computer_class, size_particle_positions, size_particle_rhs, 1>(
                               computer,
                               current_my_nb_particles_per_partition.get(),
diff --git a/cpp/vorticity_equation.cpp b/cpp/vorticity_equation.cpp
index ead9345af5a2f0555e7fa6e2b6ee45cecd9f3624..38ca8f6dc55b77e8ab66b91693076dc074a3affc 100644
--- a/cpp/vorticity_equation.cpp
+++ b/cpp/vorticity_equation.cpp
@@ -24,8 +24,6 @@
 
 
 
-#define NDEBUG
-
 #include <limits>
 #include <cassert>
 #include <cmath>
@@ -35,6 +33,8 @@
 #include "scope_timer.hpp"
 #include "shared_array.hpp"
 
+#define NDEBUG
+
 
 
 template <class rnumber,