diff --git a/cpp/particles/lock_free_bool_array.hpp b/cpp/particles/lock_free_bool_array.hpp index ae5eb0d4f97420f08954595cb1c7ecd8fa78b080..bfcd48880cfc6308f63afe235f0528b1a927f68b 100644 --- a/cpp/particles/lock_free_bool_array.hpp +++ b/cpp/particles/lock_free_bool_array.hpp @@ -32,18 +32,30 @@ #include <cstdio> #include <omp.h> - class lock_free_bool_array{ static const int Available = 0; static const int Busy = 1; static const int NoOwner = -1; +#ifdef __INTEL_COMPILER + struct Locker { + Locker(){ + omp_init_nest_lock(&lock); + } + ~Locker(){ + omp_destroy_nest_lock(&lock); + } + omp_nest_lock_t lock; + }; +#else struct Locker { - Locker() : lock(Available), ownerId(NoOwner), counter(0) {} + Locker() : lock(Available), ownerId(NoOwner), counter(0) { + } std::atomic_int lock; std::atomic_int ownerId; int counter; }; +#endif std::vector<std::unique_ptr<Locker>> keys; @@ -55,14 +67,39 @@ public: k.reset(new Locker()); } } +#ifndef NDEBUG + ~lock_free_bool_array(){ + for(auto& k : keys){ +#ifdef __INTEL_COMPILER +#else + assert(k->lock.load() == Available); + assert(k->ownerId.load() == NoOwner); +#endif + } + } +#endif + +#ifdef __INTEL_COMPILER + void lock(const long int inKey){ + Locker* k = keys[inKey%keys.size()].get(); + omp_set_nest_lock(&k->lock); + } + void unlock(const long int inKey){ + Locker* k = keys[inKey%keys.size()].get(); + omp_unset_nest_lock(&k->lock); + } +#else void lock(const long int inKey){ Locker* k = keys[inKey%keys.size()].get(); if(k->ownerId.load() != omp_get_thread_num()){ + int localBusy = Busy;// Intel complains if we pass a const as last param int expected = Available; - while(!std::atomic_compare_exchange_strong(&k->lock, &expected, Busy)){ + while(!std::atomic_compare_exchange_strong(&k->lock, &expected, localBusy)){ usleep(1); + expected = Available; } + assert(k->ownerId.load() == NoOwner); k->ownerId.store(omp_get_thread_num()); k->counter = 0; // must remain } @@ -83,6 +120,7 @@ public: k->lock.store(Available); } } +#endif }; #endif diff --git a/cpp/particles/p2p/p2p_distr_mpi.hpp b/cpp/particles/p2p/p2p_distr_mpi.hpp index 44dc644165eb8a4094d8dd9a4698d2180e09d319..ecf6aa4ddf7c3bd56a745f4ebec01009325cf725 100644 --- a/cpp/particles/p2p/p2p_distr_mpi.hpp +++ b/cpp/particles/p2p/p2p_distr_mpi.hpp @@ -231,7 +231,9 @@ public: const long double field_section_width_z = spatial_box_width[IDXC_Z]/(long double)(field_grid_dim[IDXC_Z]); const long int limite = static_cast<long int>((field_section_width_z*(long double)(partition_interval_offset_per_proc[dest_proc+1]) - std::numeric_limits<long double>::epsilon())/cutoff_radius); - if(limite == nb_cell_levels[IDXC_Z]){ + if(static_cast<long double>(limite)*cutoff_radius + == field_section_width_z*(long double)(partition_interval_offset_per_proc[dest_proc+1]) + || limite == nb_cell_levels[IDXC_Z]){ return limite-1; } return limite;