diff --git a/bfps/cpp/particles/lock_free_bool_array.hpp b/bfps/cpp/particles/lock_free_bool_array.hpp new file mode 100644 index 0000000000000000000000000000000000000000..1ae9968e810a64d463d821305579af2904f6e70f --- /dev/null +++ b/bfps/cpp/particles/lock_free_bool_array.hpp @@ -0,0 +1,35 @@ +#ifndef LOCK_FREE_BOOL_ARRAY_HPP +#define LOCK_FREE_BOOL_ARRAY_HPP + +#include <vector> +#include <memory> + +class lock_free_bool_array{ + std::vector<std::unique_ptr<long int>> keys; + +public: + explicit lock_free_bool_array(const long int inNbKeys = 512){ + keys.resize(inNbKeys); + for(std::unique_ptr<long int>& k : keys){ + k.reset(new long int(0)); + } + } + + void lock(const int inKey){ + volatile long int* k = keys[inKey%keys.size()].get(); + long int res = 1; + int cpt = 0; + while(res == 1){ + res = __sync_val_compare_and_swap(k, 0, res); + cpt++; + } + } + + void unlock(const int inKey){ + volatile long int* k = keys[inKey%keys.size()].get(); + assert(k && *k); + (*k) = 0; + } +}; + +#endif diff --git a/bfps/cpp/particles/p2p_distr_mpi.hpp b/bfps/cpp/particles/p2p_distr_mpi.hpp index 6ed8158ae38c060c0b500aaac14381b42c5e3b39..40be1a28274ea16571e0d6251b89a9bd4c8212a9 100644 --- a/bfps/cpp/particles/p2p_distr_mpi.hpp +++ b/bfps/cpp/particles/p2p_distr_mpi.hpp @@ -14,6 +14,7 @@ #include "scope_timer.hpp" #include "particles_utils.hpp" #include "p2p_tree.hpp" +#include "lock_free_bool_array.hpp" template <class partsize_t, class real_number> class p2p_distr_mpi { @@ -204,11 +205,21 @@ public: - std::numeric_limits<real_number>::epsilon())/cutoff_radius); } + real_number apply_pbc(real_number pos, IDXS_3D dim) const{ + while( pos < spatial_box_offset[dim] ){ + pos += spatial_box_width[dim]; + } + while( spatial_box_width[dim]+spatial_box_offset[dim] <= pos){ + pos -= spatial_box_width[dim]; + } + return pos; + } + std::array<long int,3> get_cell_coordinate(const real_number pos_x, const real_number pos_y, const real_number pos_z) const { - const real_number diff_x = pos_x - spatial_box_offset[IDX_X]; - const real_number diff_y = pos_y - spatial_box_offset[IDX_Y]; - const real_number diff_z = pos_z - spatial_box_offset[IDX_Z]; + const real_number diff_x = apply_pbc(pos_x,IDX_X) - spatial_box_offset[IDX_X]; + const real_number diff_y = apply_pbc(pos_y,IDX_Y) - spatial_box_offset[IDX_Y]; + const real_number diff_z = apply_pbc(pos_z,IDX_Z) - spatial_box_offset[IDX_Z]; std::array<long int,3> coord; coord[IDX_X] = static_cast<long int>(diff_x/cutoff_radius); coord[IDX_Y] = static_cast<long int>(diff_y/cutoff_radius); @@ -225,13 +236,13 @@ public: real_number compute_distance_r2(const real_number x1, const real_number y1, const real_number z1, const real_number x2, const real_number y2, const real_number z2, const real_number xshift_coef, const real_number yshift_coef, const real_number zshift_coef) const { - real_number diff_x = std::abs(x1-x2+xshift_coef*spatial_box_width[IDX_X]); + real_number diff_x = std::abs(apply_pbc(x1,IDX_X)-apply_pbc(x2,IDX_X)+xshift_coef*spatial_box_width[IDX_X]); assert(diff_x <= 2*cutoff_radius); - real_number diff_y = std::abs(y1-y2+yshift_coef*spatial_box_width[IDX_Y]); + real_number diff_y = std::abs(apply_pbc(y1,IDX_X)-apply_pbc(y2,IDX_X)+yshift_coef*spatial_box_width[IDX_Y]); assert(diff_y <= 2*cutoff_radius); - real_number diff_z = std::abs(z1-z2+zshift_coef*spatial_box_width[IDX_Z]); + real_number diff_z = std::abs(apply_pbc(z1,IDX_X)-apply_pbc(z2,IDX_X)+zshift_coef*spatial_box_width[IDX_Z]); assert(diff_z <= 2*cutoff_radius); return (diff_x*diff_x) + (diff_y*diff_y) + (diff_z*diff_z); @@ -561,6 +572,8 @@ public: } } + lock_free_bool_array cells_locker(512); + TIMEZONE_OMP_INIT_PREPARALLEL(omp_get_max_threads()) #pragma omp parallel default(shared) { @@ -648,10 +661,12 @@ public: nb_parts_in_cell += 1; } - const std::vector<std::pair<partsize_t,partsize_t>>* neighbors[27]; - long int neighbors_indexes[27]; - std::array<real_number,3> shift[27]; - const int nbNeighbors = my_tree.getNeighbors(current_cell_idx, neighbors, neighbors_indexes, shift, true); + #pragma omp task default(shared) firstprivate(idxPart, nb_parts_in_cell, current_cell_idx) + { + const std::vector<std::pair<partsize_t,partsize_t>>* neighbors[27]; + long int neighbors_indexes[27]; + std::array<real_number,3> shift[27]; + const int nbNeighbors = my_tree.getNeighbors(current_cell_idx, neighbors, neighbors_indexes, shift, true); // for(int idx_test = 0 ; idx_test < nb_parts_in_cell ; ++idx_test){ // TODO // real_number totest[3] = {8.570442e-01, 7.173084e-02, 8.279754e-03}; @@ -666,52 +681,59 @@ public: // } // } - // with other interval - for(size_t idx_neighbor = 0 ; idx_neighbor < nbNeighbors ; ++idx_neighbor){ - for(size_t idx_2 = 0 ; idx_2 < (*neighbors[idx_neighbor]).size() ; ++idx_2){ - for(partsize_t idx_p1 = 0 ; idx_p1 < nb_parts_in_cell ; ++idx_p1){ - for(partsize_t idx_p2 = 0 ; idx_p2 < (*neighbors[idx_neighbor])[idx_2].second ; ++idx_p2){ - const real_number dist_r2 = compute_distance_r2(descriptor.toCompute[(idxPart+idx_p1)*size_particle_positions + IDX_X], - descriptor.toCompute[(idxPart+idx_p1)*size_particle_positions + IDX_Y], - descriptor.toCompute[(idxPart+idx_p1)*size_particle_positions + IDX_Z], - particles_positions[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_positions + IDX_X], - particles_positions[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_positions + IDX_Y], - particles_positions[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_positions + IDX_Z], - shift[idx_neighbor][IDX_X], shift[idx_neighbor][IDX_Y], shift[idx_neighbor][IDX_Z]); - if(dist_r2 < cutoff_radius_compute*cutoff_radius_compute){ - in_computer.template compute_interaction<size_particle_positions,size_particle_data, size_particle_rhs>( - &descriptor.toCompute[(idxPart+idx_p1)*size_particle_positions], - &descriptor.toData[(idxPart+idx_p1)*size_particle_data], - &descriptor.results[(idxPart+idx_p1)*size_particle_rhs], - &particles_positions[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_positions], - &particles_data[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_data], - &particles_current_rhs[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_rhs], - dist_r2, shift[idx_neighbor][IDX_X], shift[idx_neighbor][IDX_Y], shift[idx_neighbor][IDX_Z]); + // with other interval + for(size_t idx_neighbor = 0 ; idx_neighbor < nbNeighbors ; ++idx_neighbor){ + cells_locker.lock(neighbors_indexes[idx_neighbor]); + + for(size_t idx_2 = 0 ; idx_2 < (*neighbors[idx_neighbor]).size() ; ++idx_2){ + for(partsize_t idx_p1 = 0 ; idx_p1 < nb_parts_in_cell ; ++idx_p1){ + for(partsize_t idx_p2 = 0 ; idx_p2 < (*neighbors[idx_neighbor])[idx_2].second ; ++idx_p2){ + const real_number dist_r2 = compute_distance_r2(descriptor.toCompute[(idxPart+idx_p1)*size_particle_positions + IDX_X], + descriptor.toCompute[(idxPart+idx_p1)*size_particle_positions + IDX_Y], + descriptor.toCompute[(idxPart+idx_p1)*size_particle_positions + IDX_Z], + particles_positions[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_positions + IDX_X], + particles_positions[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_positions + IDX_Y], + particles_positions[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_positions + IDX_Z], + shift[idx_neighbor][IDX_X], shift[idx_neighbor][IDX_Y], shift[idx_neighbor][IDX_Z]); + if(dist_r2 < cutoff_radius_compute*cutoff_radius_compute){ + in_computer.template compute_interaction<size_particle_positions,size_particle_data, size_particle_rhs>( + &descriptor.toCompute[(idxPart+idx_p1)*size_particle_positions], + &descriptor.toData[(idxPart+idx_p1)*size_particle_data], + &descriptor.results[(idxPart+idx_p1)*size_particle_rhs], + &particles_positions[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_positions], + &particles_data[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_data], + &particles_current_rhs[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_rhs], + dist_r2, shift[idx_neighbor][IDX_X], shift[idx_neighbor][IDX_Y], shift[idx_neighbor][IDX_Z]); + } + + // if(inout_index_particles[(*neighbors[idx_neighbor])[idx_2].first+idx_p2] == 356){// TODO + // printf("test interaction between :\n"); + // printf("index %ld (%ld) pos %e %e %e\n", + // (idxPart+idx_p1), -1L, + // descriptor.toCompute[(idxPart+idx_p1)*size_particle_positions + IDX_X], + // descriptor.toCompute[(idxPart+idx_p1)*size_particle_positions + IDX_Y], + // descriptor.toCompute[(idxPart+idx_p1)*size_particle_positions + IDX_Z]); + // printf("index %ld (%ld) pos %e %e %e\n", + // ((*neighbors[idx_neighbor])[idx_2].first+idx_p2), + // inout_index_particles[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)], + // particles_positions[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_positions + IDX_X], + // particles_positions[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_positions + IDX_Y], + // particles_positions[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_positions + IDX_Z]); + // printf("Radius = %e (%e)\n", sqrt(dist_r2), dist_r2); + // } } - -// if(inout_index_particles[(*neighbors[idx_neighbor])[idx_2].first+idx_p2] == 356){// TODO -// printf("test interaction between :\n"); -// printf("index %ld (%ld) pos %e %e %e\n", -// (idxPart+idx_p1), -1L, -// descriptor.toCompute[(idxPart+idx_p1)*size_particle_positions + IDX_X], -// descriptor.toCompute[(idxPart+idx_p1)*size_particle_positions + IDX_Y], -// descriptor.toCompute[(idxPart+idx_p1)*size_particle_positions + IDX_Z]); -// printf("index %ld (%ld) pos %e %e %e\n", -// ((*neighbors[idx_neighbor])[idx_2].first+idx_p2), -// inout_index_particles[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)], -// particles_positions[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_positions + IDX_X], -// particles_positions[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_positions + IDX_Y], -// particles_positions[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_positions + IDX_Z]); -// printf("Radius = %e (%e)\n", sqrt(dist_r2), dist_r2); -// } } } + + cells_locker.unlock(neighbors_indexes[idx_neighbor]); } } idxPart += nb_parts_in_cell; } + #pragma omp taskwait + // Send back const int destProc = descriptor.destProc; whatNext.emplace_back(std::pair<Action,int>{RELEASE_BUFFER_PARTICLES, releasedAction.second}); @@ -761,185 +783,196 @@ public: // Compute self data for(const auto& iter_cell : my_tree){ TIMEZONE("proceed-leaf"); - const std::vector<std::pair<partsize_t,partsize_t>>& intervals = iter_cell.second; - - for(size_t idx_1 = 0 ; idx_1 < intervals.size() ; ++idx_1){ - // self interval - for(partsize_t idx_p1 = 0 ; idx_p1 < intervals[idx_1].second ; ++idx_p1){ -// if(((inout_index_particles[(intervals[idx_1].first+idx_p1)] == 356))){// TODO -// printf("box %ld:\n", iter_cell.first); -// printf("intervals.size() %lu:\n", intervals.size()); -// printf("intervals[idx_1].second %ld:\n", intervals[idx_1].second); -// printf("index %ld (%ld) pos %e %e %e\n", -// (intervals[idx_1].first+idx_p1), inout_index_particles[(intervals[idx_1].first+idx_p1)], -// particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDX_X], -// particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDX_Y], -// particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDX_Z]); -// } -// if(((inout_index_particles[(intervals[idx_1].first+idx_p1)] == 547))){// TODO -// printf("box %ld:\n", iter_cell.first); -// printf("intervals.size() %lu:\n", intervals.size()); -// printf("intervals[idx_1].second %ld:\n", intervals[idx_1].second); -// printf("index %ld (%ld) pos %e %e %e\n", -// (intervals[idx_1].first+idx_p1), inout_index_particles[(intervals[idx_1].first+idx_p1)], -// particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDX_X], -// particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDX_Y], -// particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDX_Z]); -// } - + const long int currenct_cell_idx = iter_cell.first; + const std::vector<std::pair<partsize_t,partsize_t>>* intervals_ptr = &iter_cell.second; - for(partsize_t idx_p2 = idx_p1+1 ; idx_p2 < intervals[idx_1].second ; ++idx_p2){ - const real_number dist_r2 = compute_distance_r2(particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDX_X], - particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDX_Y], - particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDX_Z], - particles_positions[(intervals[idx_1].first+idx_p2)*size_particle_positions + IDX_X], - particles_positions[(intervals[idx_1].first+idx_p2)*size_particle_positions + IDX_Y], - particles_positions[(intervals[idx_1].first+idx_p2)*size_particle_positions + IDX_Z], - 0, 0, 0); - if(dist_r2 < cutoff_radius_compute*cutoff_radius_compute){ - in_computer.template compute_interaction<size_particle_positions,size_particle_data,size_particle_rhs>( - &particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions], - &particles_data[(intervals[idx_1].first+idx_p1)*size_particle_data], - &particles_current_rhs[(intervals[idx_1].first+idx_p1)*size_particle_rhs], - &particles_positions[(intervals[idx_1].first+idx_p2)*size_particle_positions], - &particles_data[(intervals[idx_1].first+idx_p2)*size_particle_data], - &particles_current_rhs[(intervals[idx_1].first+idx_p2)*size_particle_rhs], - dist_r2, 0, 0, 0); - } +#pragma omp task default(shared) firstprivate(currenct_cell_idx, intervals_ptr) + { + const std::vector<std::pair<partsize_t,partsize_t>>& intervals = (*intervals_ptr); -// if(((inout_index_particles[(intervals[idx_1].first+idx_p1)] == 356) -// || inout_index_particles[(intervals[idx_1].first+idx_p2)] == 356)/* -// && ((inout_index_particles[(intervals[idx_1].first+idx_p1)] == 1832) -// || inout_index_particles[(intervals[idx_1].first+idx_p2)] == 1832) -// && ((inout_index_particles[(intervals[idx_1].first+idx_p1)] == 547) -// || inout_index_particles[(intervals[idx_1].first+idx_p2)] == 547)*/){// TODO -// printf("print between :\n"); -// printf("index %ld (%ld) pos %e %e %e\n", -// (intervals[idx_1].first+idx_p1), inout_index_particles[(intervals[idx_1].first+idx_p1)], -// particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDX_X], -// particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDX_Y], -// particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDX_Z]); -// printf("index %ld (%ld) pos %e %e %e\n", -// (intervals[idx_1].first+idx_p2), -// inout_index_particles[(intervals[idx_1].first+idx_p2)], -// particles_positions[(intervals[idx_1].first+idx_p2)*size_particle_positions + IDX_X], -// particles_positions[(intervals[idx_1].first+idx_p2)*size_particle_positions + IDX_Y], -// particles_positions[(intervals[idx_1].first+idx_p2)*size_particle_positions + IDX_Z]); -// printf("Radius = %e (%e)\n", sqrt(dist_r2), dist_r2); -// } - } - } + cells_locker.lock(currenct_cell_idx); - // with other interval - for(size_t idx_2 = idx_1+1 ; idx_2 < intervals.size() ; ++idx_2){ + for(size_t idx_1 = 0 ; idx_1 < intervals.size() ; ++idx_1){ + // self interval for(partsize_t idx_p1 = 0 ; idx_p1 < intervals[idx_1].second ; ++idx_p1){ - for(partsize_t idx_p2 = 0 ; idx_p2 < intervals[idx_2].second ; ++idx_p2){ + // if(((inout_index_particles[(intervals[idx_1].first+idx_p1)] == 356))){// TODO + // printf("box %ld:\n", iter_cell.first); + // printf("intervals.size() %lu:\n", intervals.size()); + // printf("intervals[idx_1].second %ld:\n", intervals[idx_1].second); + // printf("index %ld (%ld) pos %e %e %e\n", + // (intervals[idx_1].first+idx_p1), inout_index_particles[(intervals[idx_1].first+idx_p1)], + // particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDX_X], + // particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDX_Y], + // particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDX_Z]); + // } + // if(((inout_index_particles[(intervals[idx_1].first+idx_p1)] == 547))){// TODO + // printf("box %ld:\n", iter_cell.first); + // printf("intervals.size() %lu:\n", intervals.size()); + // printf("intervals[idx_1].second %ld:\n", intervals[idx_1].second); + // printf("index %ld (%ld) pos %e %e %e\n", + // (intervals[idx_1].first+idx_p1), inout_index_particles[(intervals[idx_1].first+idx_p1)], + // particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDX_X], + // particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDX_Y], + // particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDX_Z]); + // } + + + for(partsize_t idx_p2 = idx_p1+1 ; idx_p2 < intervals[idx_1].second ; ++idx_p2){ const real_number dist_r2 = compute_distance_r2(particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDX_X], particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDX_Y], particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDX_Z], - particles_positions[(intervals[idx_2].first+idx_p2)*size_particle_positions + IDX_X], - particles_positions[(intervals[idx_2].first+idx_p2)*size_particle_positions + IDX_Y], - particles_positions[(intervals[idx_2].first+idx_p2)*size_particle_positions + IDX_Z], + particles_positions[(intervals[idx_1].first+idx_p2)*size_particle_positions + IDX_X], + particles_positions[(intervals[idx_1].first+idx_p2)*size_particle_positions + IDX_Y], + particles_positions[(intervals[idx_1].first+idx_p2)*size_particle_positions + IDX_Z], 0, 0, 0); if(dist_r2 < cutoff_radius_compute*cutoff_radius_compute){ in_computer.template compute_interaction<size_particle_positions,size_particle_data,size_particle_rhs>( &particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions], &particles_data[(intervals[idx_1].first+idx_p1)*size_particle_data], &particles_current_rhs[(intervals[idx_1].first+idx_p1)*size_particle_rhs], - &particles_positions[(intervals[idx_2].first+idx_p2)*size_particle_positions], - &particles_data[(intervals[idx_2].first+idx_p2)*size_particle_data], - &particles_current_rhs[(intervals[idx_2].first+idx_p2)*size_particle_rhs], + &particles_positions[(intervals[idx_1].first+idx_p2)*size_particle_positions], + &particles_data[(intervals[idx_1].first+idx_p2)*size_particle_data], + &particles_current_rhs[(intervals[idx_1].first+idx_p2)*size_particle_rhs], dist_r2, 0, 0, 0); } -// if(((inout_index_particles[(intervals[idx_1].first+idx_p1)] == 356) -// || inout_index_particles[(intervals[idx_2].first+idx_p2)] == 356)/* -// && ((inout_index_particles[(intervals[idx_1].first+idx_p1)] == 547) -// || inout_index_particles[(intervals[idx_2].first+idx_p2)] == 547) -// && ((inout_index_particles[(intervals[idx_1].first+idx_p1)] == 1832) -// || inout_index_particles[(intervals[idx_2].first+idx_p2)] == 1832)*/){// TODO -// printf("print between :\n"); -// printf("index %ld (%ld) pos %e %e %e\n", -// (intervals[idx_1].first+idx_p1), inout_index_particles[(intervals[idx_1].first+idx_p1)], -// particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDX_X], -// particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDX_Y], -// particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDX_Z]); -// printf("index %ld (%ld) pos %e %e %e\n", -// (intervals[idx_2].first+idx_p2), -// inout_index_particles[(intervals[idx_2].first+idx_p2)], -// particles_positions[(intervals[idx_2].first+idx_p2)*size_particle_positions + IDX_X], -// particles_positions[(intervals[idx_2].first+idx_p2)*size_particle_positions + IDX_Y], -// particles_positions[(intervals[idx_2].first+idx_p2)*size_particle_positions + IDX_Z]); -// printf("Radius = %e (%e)\n", sqrt(dist_r2), dist_r2); -// } + // if(((inout_index_particles[(intervals[idx_1].first+idx_p1)] == 356) + // || inout_index_particles[(intervals[idx_1].first+idx_p2)] == 356)/* + // && ((inout_index_particles[(intervals[idx_1].first+idx_p1)] == 1832) + // || inout_index_particles[(intervals[idx_1].first+idx_p2)] == 1832) + // && ((inout_index_particles[(intervals[idx_1].first+idx_p1)] == 547) + // || inout_index_particles[(intervals[idx_1].first+idx_p2)] == 547)*/){// TODO + // printf("print between :\n"); + // printf("index %ld (%ld) pos %e %e %e\n", + // (intervals[idx_1].first+idx_p1), inout_index_particles[(intervals[idx_1].first+idx_p1)], + // particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDX_X], + // particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDX_Y], + // particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDX_Z]); + // printf("index %ld (%ld) pos %e %e %e\n", + // (intervals[idx_1].first+idx_p2), + // inout_index_particles[(intervals[idx_1].first+idx_p2)], + // particles_positions[(intervals[idx_1].first+idx_p2)*size_particle_positions + IDX_X], + // particles_positions[(intervals[idx_1].first+idx_p2)*size_particle_positions + IDX_Y], + // particles_positions[(intervals[idx_1].first+idx_p2)*size_particle_positions + IDX_Z]); + // printf("Radius = %e (%e)\n", sqrt(dist_r2), dist_r2); + // } + } + } + + // with other interval + for(size_t idx_2 = idx_1+1 ; idx_2 < intervals.size() ; ++idx_2){ + for(partsize_t idx_p1 = 0 ; idx_p1 < intervals[idx_1].second ; ++idx_p1){ + for(partsize_t idx_p2 = 0 ; idx_p2 < intervals[idx_2].second ; ++idx_p2){ + const real_number dist_r2 = compute_distance_r2(particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDX_X], + particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDX_Y], + particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDX_Z], + particles_positions[(intervals[idx_2].first+idx_p2)*size_particle_positions + IDX_X], + particles_positions[(intervals[idx_2].first+idx_p2)*size_particle_positions + IDX_Y], + particles_positions[(intervals[idx_2].first+idx_p2)*size_particle_positions + IDX_Z], + 0, 0, 0); + if(dist_r2 < cutoff_radius_compute*cutoff_radius_compute){ + in_computer.template compute_interaction<size_particle_positions,size_particle_data,size_particle_rhs>( + &particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions], + &particles_data[(intervals[idx_1].first+idx_p1)*size_particle_data], + &particles_current_rhs[(intervals[idx_1].first+idx_p1)*size_particle_rhs], + &particles_positions[(intervals[idx_2].first+idx_p2)*size_particle_positions], + &particles_data[(intervals[idx_2].first+idx_p2)*size_particle_data], + &particles_current_rhs[(intervals[idx_2].first+idx_p2)*size_particle_rhs], + dist_r2, 0, 0, 0); + } + + // if(((inout_index_particles[(intervals[idx_1].first+idx_p1)] == 356) + // || inout_index_particles[(intervals[idx_2].first+idx_p2)] == 356)/* + // && ((inout_index_particles[(intervals[idx_1].first+idx_p1)] == 547) + // || inout_index_particles[(intervals[idx_2].first+idx_p2)] == 547) + // && ((inout_index_particles[(intervals[idx_1].first+idx_p1)] == 1832) + // || inout_index_particles[(intervals[idx_2].first+idx_p2)] == 1832)*/){// TODO + // printf("print between :\n"); + // printf("index %ld (%ld) pos %e %e %e\n", + // (intervals[idx_1].first+idx_p1), inout_index_particles[(intervals[idx_1].first+idx_p1)], + // particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDX_X], + // particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDX_Y], + // particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDX_Z]); + // printf("index %ld (%ld) pos %e %e %e\n", + // (intervals[idx_2].first+idx_p2), + // inout_index_particles[(intervals[idx_2].first+idx_p2)], + // particles_positions[(intervals[idx_2].first+idx_p2)*size_particle_positions + IDX_X], + // particles_positions[(intervals[idx_2].first+idx_p2)*size_particle_positions + IDX_Y], + // particles_positions[(intervals[idx_2].first+idx_p2)*size_particle_positions + IDX_Z]); + // printf("Radius = %e (%e)\n", sqrt(dist_r2), dist_r2); + // } + } } } } - } + const std::vector<std::pair<partsize_t,partsize_t>>* neighbors[27]; + long int neighbors_indexes[27]; + std::array<real_number,3> shift[27]; + const int nbNeighbors = my_tree.getNeighbors(currenct_cell_idx, neighbors, neighbors_indexes, shift, false); + + // if(((currenct_cell_idx == 785))){// TODO + // printf("box %ld:\n", iter_cell.first); + // printf("intervals.size() %lu:\n", intervals.size()); + // printf("nbNeighbors %d\n",nbNeighbors); + // } + + for(size_t idx_1 = 0 ; idx_1 < intervals.size() ; ++idx_1){ + // with other interval + for(size_t idx_neighbor = 0 ; idx_neighbor < nbNeighbors ; ++idx_neighbor){ + if(currenct_cell_idx < neighbors_indexes[idx_neighbor]){ + cells_locker.lock(neighbors_indexes[idx_neighbor]); + + for(size_t idx_2 = 0 ; idx_2 < (*neighbors[idx_neighbor]).size() ; ++idx_2){ + for(partsize_t idx_p1 = 0 ; idx_p1 < intervals[idx_1].second ; ++idx_p1){ + for(partsize_t idx_p2 = 0 ; idx_p2 < (*neighbors[idx_neighbor])[idx_2].second ; ++idx_p2){ + const real_number dist_r2 = compute_distance_r2(particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDX_X], + particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDX_Y], + particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDX_Z], + particles_positions[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_positions + IDX_X], + particles_positions[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_positions + IDX_Y], + particles_positions[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_positions + IDX_Z], + shift[idx_neighbor][IDX_X], shift[idx_neighbor][IDX_Y], shift[idx_neighbor][IDX_Z]); + if(dist_r2 < cutoff_radius_compute*cutoff_radius_compute){ + in_computer.template compute_interaction<size_particle_positions,size_particle_data,size_particle_rhs>( + &particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions], + &particles_data[(intervals[idx_1].first+idx_p1)*size_particle_data], + &particles_current_rhs[(intervals[idx_1].first+idx_p1)*size_particle_rhs], + &particles_positions[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_positions], + &particles_data[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_data], + &particles_current_rhs[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_rhs], + dist_r2, shift[idx_neighbor][IDX_X], shift[idx_neighbor][IDX_Y], shift[idx_neighbor][IDX_Z]); + } - const long int currenct_cell_idx = iter_cell.first; - const std::vector<std::pair<partsize_t,partsize_t>>* neighbors[27]; - long int neighbors_indexes[27]; - std::array<real_number,3> shift[27]; - const int nbNeighbors = my_tree.getNeighbors(currenct_cell_idx, neighbors, neighbors_indexes, shift, false); - -// if(((currenct_cell_idx == 785))){// TODO -// printf("box %ld:\n", iter_cell.first); -// printf("intervals.size() %lu:\n", intervals.size()); -// printf("nbNeighbors %d\n",nbNeighbors); -// } - - for(size_t idx_1 = 0 ; idx_1 < intervals.size() ; ++idx_1){ - // with other interval - for(size_t idx_neighbor = 0 ; idx_neighbor < nbNeighbors ; ++idx_neighbor){ - if(currenct_cell_idx < neighbors_indexes[idx_neighbor]){ - for(size_t idx_2 = 0 ; idx_2 < (*neighbors[idx_neighbor]).size() ; ++idx_2){ - for(partsize_t idx_p1 = 0 ; idx_p1 < intervals[idx_1].second ; ++idx_p1){ - for(partsize_t idx_p2 = 0 ; idx_p2 < (*neighbors[idx_neighbor])[idx_2].second ; ++idx_p2){ - const real_number dist_r2 = compute_distance_r2(particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDX_X], - particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDX_Y], - particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDX_Z], - particles_positions[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_positions + IDX_X], - particles_positions[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_positions + IDX_Y], - particles_positions[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_positions + IDX_Z], - shift[idx_neighbor][IDX_X], shift[idx_neighbor][IDX_Y], shift[idx_neighbor][IDX_Z]); - if(dist_r2 < cutoff_radius_compute*cutoff_radius_compute){ - in_computer.template compute_interaction<size_particle_positions,size_particle_data,size_particle_rhs>( - &particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions], - &particles_data[(intervals[idx_1].first+idx_p1)*size_particle_data], - &particles_current_rhs[(intervals[idx_1].first+idx_p1)*size_particle_rhs], - &particles_positions[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_positions], - &particles_data[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_data], - &particles_current_rhs[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_rhs], - dist_r2, shift[idx_neighbor][IDX_X], shift[idx_neighbor][IDX_Y], shift[idx_neighbor][IDX_Z]); + // if(((inout_index_particles[(intervals[idx_1].first+idx_p1)] == 356) + // || inout_index_particles[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)] == 356)/* + // && (inout_index_particles[(intervals[idx_1].first+idx_p1)] == 547) + // || inout_index_particles[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)] == 547 + // && (inout_index_particles[(intervals[idx_1].first+idx_p1)] == 1832) + // || inout_index_particles[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)] == 1832*/){// TODO + // printf("print between :\n"); + // printf("index %ld (%ld) pos %e %e %e\n", + // (intervals[idx_1].first+idx_p1), inout_index_particles[(intervals[idx_1].first+idx_p1)], + // particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDX_X], + // particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDX_Y], + // particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDX_Z]); + // printf("index %ld (%ld) pos %e %e %e\n", + // ((*neighbors[idx_neighbor])[idx_2].first+idx_p2), + // inout_index_particles[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)], + // particles_positions[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_positions + IDX_X], + // particles_positions[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_positions + IDX_Y], + // particles_positions[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_positions + IDX_Z]); + // printf("Radius = %e (%e)\n", sqrt(dist_r2), dist_r2); + // } } - -// if(((inout_index_particles[(intervals[idx_1].first+idx_p1)] == 356) -// || inout_index_particles[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)] == 356)/* -// && (inout_index_particles[(intervals[idx_1].first+idx_p1)] == 547) -// || inout_index_particles[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)] == 547 -// && (inout_index_particles[(intervals[idx_1].first+idx_p1)] == 1832) -// || inout_index_particles[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)] == 1832*/){// TODO -// printf("print between :\n"); -// printf("index %ld (%ld) pos %e %e %e\n", -// (intervals[idx_1].first+idx_p1), inout_index_particles[(intervals[idx_1].first+idx_p1)], -// particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDX_X], -// particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDX_Y], -// particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDX_Z]); -// printf("index %ld (%ld) pos %e %e %e\n", -// ((*neighbors[idx_neighbor])[idx_2].first+idx_p2), -// inout_index_particles[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)], -// particles_positions[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_positions + IDX_X], -// particles_positions[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_positions + IDX_Y], -// particles_positions[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_positions + IDX_Z]); -// printf("Radius = %e (%e)\n", sqrt(dist_r2), dist_r2); -// } } } + cells_locker.unlock(neighbors_indexes[idx_neighbor]); } } } + + cells_locker.unlock(currenct_cell_idx); } } } diff --git a/bfps/cpp/particles/p2p_tree.hpp b/bfps/cpp/particles/p2p_tree.hpp index 52cb2f2d967ebc3533c8e3c98455734d26c450cb..3d92c4e5666a1b1d94e71a6ed920a5bc063cfcba 100644 --- a/bfps/cpp/particles/p2p_tree.hpp +++ b/bfps/cpp/particles/p2p_tree.hpp @@ -15,7 +15,7 @@ class p2p_tree{ } long int get_cell_coord_y_from_index(const long int index) const{ - return (index - get_cell_coord_z_from_index(index)*(nb_cell_levels[IDX_X]*nb_cell_levels[IDX_Y])) + return (index % (nb_cell_levels[IDX_X]*nb_cell_levels[IDX_Y])) / nb_cell_levels[IDX_X]; }