Skip to content
Snippets Groups Projects
Commit e38f1267 authored by Berenger Bramas's avatar Berenger Bramas
Browse files

Add PBC rounding to the positions and add lock free array to protect the cells from race conditions

parent cba0515b
Branches
Tags
1 merge request!23WIP: Feature/use cmake
#ifndef LOCK_FREE_BOOL_ARRAY_HPP
#define LOCK_FREE_BOOL_ARRAY_HPP
#include <vector>
#include <memory>
class lock_free_bool_array{
std::vector<std::unique_ptr<long int>> keys;
public:
explicit lock_free_bool_array(const long int inNbKeys = 512){
keys.resize(inNbKeys);
for(std::unique_ptr<long int>& k : keys){
k.reset(new long int(0));
}
}
void lock(const int inKey){
volatile long int* k = keys[inKey%keys.size()].get();
long int res = 1;
int cpt = 0;
while(res == 1){
res = __sync_val_compare_and_swap(k, 0, res);
cpt++;
}
}
void unlock(const int inKey){
volatile long int* k = keys[inKey%keys.size()].get();
assert(k && *k);
(*k) = 0;
}
};
#endif
...@@ -14,6 +14,7 @@ ...@@ -14,6 +14,7 @@
#include "scope_timer.hpp" #include "scope_timer.hpp"
#include "particles_utils.hpp" #include "particles_utils.hpp"
#include "p2p_tree.hpp" #include "p2p_tree.hpp"
#include "lock_free_bool_array.hpp"
template <class partsize_t, class real_number> template <class partsize_t, class real_number>
class p2p_distr_mpi { class p2p_distr_mpi {
...@@ -204,11 +205,21 @@ public: ...@@ -204,11 +205,21 @@ public:
- std::numeric_limits<real_number>::epsilon())/cutoff_radius); - std::numeric_limits<real_number>::epsilon())/cutoff_radius);
} }
real_number apply_pbc(real_number pos, IDXS_3D dim) const{
while( pos < spatial_box_offset[dim] ){
pos += spatial_box_width[dim];
}
while( spatial_box_width[dim]+spatial_box_offset[dim] <= pos){
pos -= spatial_box_width[dim];
}
return pos;
}
std::array<long int,3> get_cell_coordinate(const real_number pos_x, const real_number pos_y, std::array<long int,3> get_cell_coordinate(const real_number pos_x, const real_number pos_y,
const real_number pos_z) const { const real_number pos_z) const {
const real_number diff_x = pos_x - spatial_box_offset[IDX_X]; const real_number diff_x = apply_pbc(pos_x,IDX_X) - spatial_box_offset[IDX_X];
const real_number diff_y = pos_y - spatial_box_offset[IDX_Y]; const real_number diff_y = apply_pbc(pos_y,IDX_Y) - spatial_box_offset[IDX_Y];
const real_number diff_z = pos_z - spatial_box_offset[IDX_Z]; const real_number diff_z = apply_pbc(pos_z,IDX_Z) - spatial_box_offset[IDX_Z];
std::array<long int,3> coord; std::array<long int,3> coord;
coord[IDX_X] = static_cast<long int>(diff_x/cutoff_radius); coord[IDX_X] = static_cast<long int>(diff_x/cutoff_radius);
coord[IDX_Y] = static_cast<long int>(diff_y/cutoff_radius); coord[IDX_Y] = static_cast<long int>(diff_y/cutoff_radius);
...@@ -225,13 +236,13 @@ public: ...@@ -225,13 +236,13 @@ public:
real_number compute_distance_r2(const real_number x1, const real_number y1, const real_number z1, real_number compute_distance_r2(const real_number x1, const real_number y1, const real_number z1,
const real_number x2, const real_number y2, const real_number z2, const real_number x2, const real_number y2, const real_number z2,
const real_number xshift_coef, const real_number yshift_coef, const real_number zshift_coef) const { const real_number xshift_coef, const real_number yshift_coef, const real_number zshift_coef) const {
real_number diff_x = std::abs(x1-x2+xshift_coef*spatial_box_width[IDX_X]); real_number diff_x = std::abs(apply_pbc(x1,IDX_X)-apply_pbc(x2,IDX_X)+xshift_coef*spatial_box_width[IDX_X]);
assert(diff_x <= 2*cutoff_radius); assert(diff_x <= 2*cutoff_radius);
real_number diff_y = std::abs(y1-y2+yshift_coef*spatial_box_width[IDX_Y]); real_number diff_y = std::abs(apply_pbc(y1,IDX_X)-apply_pbc(y2,IDX_X)+yshift_coef*spatial_box_width[IDX_Y]);
assert(diff_y <= 2*cutoff_radius); assert(diff_y <= 2*cutoff_radius);
real_number diff_z = std::abs(z1-z2+zshift_coef*spatial_box_width[IDX_Z]); real_number diff_z = std::abs(apply_pbc(z1,IDX_X)-apply_pbc(z2,IDX_X)+zshift_coef*spatial_box_width[IDX_Z]);
assert(diff_z <= 2*cutoff_radius); assert(diff_z <= 2*cutoff_radius);
return (diff_x*diff_x) + (diff_y*diff_y) + (diff_z*diff_z); return (diff_x*diff_x) + (diff_y*diff_y) + (diff_z*diff_z);
...@@ -561,6 +572,8 @@ public: ...@@ -561,6 +572,8 @@ public:
} }
} }
lock_free_bool_array cells_locker(512);
TIMEZONE_OMP_INIT_PREPARALLEL(omp_get_max_threads()) TIMEZONE_OMP_INIT_PREPARALLEL(omp_get_max_threads())
#pragma omp parallel default(shared) #pragma omp parallel default(shared)
{ {
...@@ -648,10 +661,12 @@ public: ...@@ -648,10 +661,12 @@ public:
nb_parts_in_cell += 1; nb_parts_in_cell += 1;
} }
const std::vector<std::pair<partsize_t,partsize_t>>* neighbors[27]; #pragma omp task default(shared) firstprivate(idxPart, nb_parts_in_cell, current_cell_idx)
long int neighbors_indexes[27]; {
std::array<real_number,3> shift[27]; const std::vector<std::pair<partsize_t,partsize_t>>* neighbors[27];
const int nbNeighbors = my_tree.getNeighbors(current_cell_idx, neighbors, neighbors_indexes, shift, true); long int neighbors_indexes[27];
std::array<real_number,3> shift[27];
const int nbNeighbors = my_tree.getNeighbors(current_cell_idx, neighbors, neighbors_indexes, shift, true);
// for(int idx_test = 0 ; idx_test < nb_parts_in_cell ; ++idx_test){ // TODO // for(int idx_test = 0 ; idx_test < nb_parts_in_cell ; ++idx_test){ // TODO
// real_number totest[3] = {8.570442e-01, 7.173084e-02, 8.279754e-03}; // real_number totest[3] = {8.570442e-01, 7.173084e-02, 8.279754e-03};
...@@ -666,52 +681,59 @@ public: ...@@ -666,52 +681,59 @@ public:
// } // }
// } // }
// with other interval // with other interval
for(size_t idx_neighbor = 0 ; idx_neighbor < nbNeighbors ; ++idx_neighbor){ for(size_t idx_neighbor = 0 ; idx_neighbor < nbNeighbors ; ++idx_neighbor){
for(size_t idx_2 = 0 ; idx_2 < (*neighbors[idx_neighbor]).size() ; ++idx_2){ cells_locker.lock(neighbors_indexes[idx_neighbor]);
for(partsize_t idx_p1 = 0 ; idx_p1 < nb_parts_in_cell ; ++idx_p1){
for(partsize_t idx_p2 = 0 ; idx_p2 < (*neighbors[idx_neighbor])[idx_2].second ; ++idx_p2){ for(size_t idx_2 = 0 ; idx_2 < (*neighbors[idx_neighbor]).size() ; ++idx_2){
const real_number dist_r2 = compute_distance_r2(descriptor.toCompute[(idxPart+idx_p1)*size_particle_positions + IDX_X], for(partsize_t idx_p1 = 0 ; idx_p1 < nb_parts_in_cell ; ++idx_p1){
descriptor.toCompute[(idxPart+idx_p1)*size_particle_positions + IDX_Y], for(partsize_t idx_p2 = 0 ; idx_p2 < (*neighbors[idx_neighbor])[idx_2].second ; ++idx_p2){
descriptor.toCompute[(idxPart+idx_p1)*size_particle_positions + IDX_Z], const real_number dist_r2 = compute_distance_r2(descriptor.toCompute[(idxPart+idx_p1)*size_particle_positions + IDX_X],
particles_positions[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_positions + IDX_X], descriptor.toCompute[(idxPart+idx_p1)*size_particle_positions + IDX_Y],
particles_positions[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_positions + IDX_Y], descriptor.toCompute[(idxPart+idx_p1)*size_particle_positions + IDX_Z],
particles_positions[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_positions + IDX_Z], particles_positions[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_positions + IDX_X],
shift[idx_neighbor][IDX_X], shift[idx_neighbor][IDX_Y], shift[idx_neighbor][IDX_Z]); particles_positions[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_positions + IDX_Y],
if(dist_r2 < cutoff_radius_compute*cutoff_radius_compute){ particles_positions[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_positions + IDX_Z],
in_computer.template compute_interaction<size_particle_positions,size_particle_data, size_particle_rhs>( shift[idx_neighbor][IDX_X], shift[idx_neighbor][IDX_Y], shift[idx_neighbor][IDX_Z]);
&descriptor.toCompute[(idxPart+idx_p1)*size_particle_positions], if(dist_r2 < cutoff_radius_compute*cutoff_radius_compute){
&descriptor.toData[(idxPart+idx_p1)*size_particle_data], in_computer.template compute_interaction<size_particle_positions,size_particle_data, size_particle_rhs>(
&descriptor.results[(idxPart+idx_p1)*size_particle_rhs], &descriptor.toCompute[(idxPart+idx_p1)*size_particle_positions],
&particles_positions[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_positions], &descriptor.toData[(idxPart+idx_p1)*size_particle_data],
&particles_data[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_data], &descriptor.results[(idxPart+idx_p1)*size_particle_rhs],
&particles_current_rhs[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_rhs], &particles_positions[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_positions],
dist_r2, shift[idx_neighbor][IDX_X], shift[idx_neighbor][IDX_Y], shift[idx_neighbor][IDX_Z]); &particles_data[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_data],
&particles_current_rhs[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_rhs],
dist_r2, shift[idx_neighbor][IDX_X], shift[idx_neighbor][IDX_Y], shift[idx_neighbor][IDX_Z]);
}
// if(inout_index_particles[(*neighbors[idx_neighbor])[idx_2].first+idx_p2] == 356){// TODO
// printf("test interaction between :\n");
// printf("index %ld (%ld) pos %e %e %e\n",
// (idxPart+idx_p1), -1L,
// descriptor.toCompute[(idxPart+idx_p1)*size_particle_positions + IDX_X],
// descriptor.toCompute[(idxPart+idx_p1)*size_particle_positions + IDX_Y],
// descriptor.toCompute[(idxPart+idx_p1)*size_particle_positions + IDX_Z]);
// printf("index %ld (%ld) pos %e %e %e\n",
// ((*neighbors[idx_neighbor])[idx_2].first+idx_p2),
// inout_index_particles[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)],
// particles_positions[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_positions + IDX_X],
// particles_positions[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_positions + IDX_Y],
// particles_positions[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_positions + IDX_Z]);
// printf("Radius = %e (%e)\n", sqrt(dist_r2), dist_r2);
// }
} }
// if(inout_index_particles[(*neighbors[idx_neighbor])[idx_2].first+idx_p2] == 356){// TODO
// printf("test interaction between :\n");
// printf("index %ld (%ld) pos %e %e %e\n",
// (idxPart+idx_p1), -1L,
// descriptor.toCompute[(idxPart+idx_p1)*size_particle_positions + IDX_X],
// descriptor.toCompute[(idxPart+idx_p1)*size_particle_positions + IDX_Y],
// descriptor.toCompute[(idxPart+idx_p1)*size_particle_positions + IDX_Z]);
// printf("index %ld (%ld) pos %e %e %e\n",
// ((*neighbors[idx_neighbor])[idx_2].first+idx_p2),
// inout_index_particles[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)],
// particles_positions[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_positions + IDX_X],
// particles_positions[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_positions + IDX_Y],
// particles_positions[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_positions + IDX_Z]);
// printf("Radius = %e (%e)\n", sqrt(dist_r2), dist_r2);
// }
} }
} }
cells_locker.unlock(neighbors_indexes[idx_neighbor]);
} }
} }
idxPart += nb_parts_in_cell; idxPart += nb_parts_in_cell;
} }
#pragma omp taskwait
// Send back // Send back
const int destProc = descriptor.destProc; const int destProc = descriptor.destProc;
whatNext.emplace_back(std::pair<Action,int>{RELEASE_BUFFER_PARTICLES, releasedAction.second}); whatNext.emplace_back(std::pair<Action,int>{RELEASE_BUFFER_PARTICLES, releasedAction.second});
...@@ -761,185 +783,196 @@ public: ...@@ -761,185 +783,196 @@ public:
// Compute self data // Compute self data
for(const auto& iter_cell : my_tree){ for(const auto& iter_cell : my_tree){
TIMEZONE("proceed-leaf"); TIMEZONE("proceed-leaf");
const std::vector<std::pair<partsize_t,partsize_t>>& intervals = iter_cell.second; const long int currenct_cell_idx = iter_cell.first;
const std::vector<std::pair<partsize_t,partsize_t>>* intervals_ptr = &iter_cell.second;
for(size_t idx_1 = 0 ; idx_1 < intervals.size() ; ++idx_1){
// self interval
for(partsize_t idx_p1 = 0 ; idx_p1 < intervals[idx_1].second ; ++idx_p1){
// if(((inout_index_particles[(intervals[idx_1].first+idx_p1)] == 356))){// TODO
// printf("box %ld:\n", iter_cell.first);
// printf("intervals.size() %lu:\n", intervals.size());
// printf("intervals[idx_1].second %ld:\n", intervals[idx_1].second);
// printf("index %ld (%ld) pos %e %e %e\n",
// (intervals[idx_1].first+idx_p1), inout_index_particles[(intervals[idx_1].first+idx_p1)],
// particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDX_X],
// particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDX_Y],
// particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDX_Z]);
// }
// if(((inout_index_particles[(intervals[idx_1].first+idx_p1)] == 547))){// TODO
// printf("box %ld:\n", iter_cell.first);
// printf("intervals.size() %lu:\n", intervals.size());
// printf("intervals[idx_1].second %ld:\n", intervals[idx_1].second);
// printf("index %ld (%ld) pos %e %e %e\n",
// (intervals[idx_1].first+idx_p1), inout_index_particles[(intervals[idx_1].first+idx_p1)],
// particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDX_X],
// particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDX_Y],
// particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDX_Z]);
// }
for(partsize_t idx_p2 = idx_p1+1 ; idx_p2 < intervals[idx_1].second ; ++idx_p2){ #pragma omp task default(shared) firstprivate(currenct_cell_idx, intervals_ptr)
const real_number dist_r2 = compute_distance_r2(particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDX_X], {
particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDX_Y], const std::vector<std::pair<partsize_t,partsize_t>>& intervals = (*intervals_ptr);
particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDX_Z],
particles_positions[(intervals[idx_1].first+idx_p2)*size_particle_positions + IDX_X],
particles_positions[(intervals[idx_1].first+idx_p2)*size_particle_positions + IDX_Y],
particles_positions[(intervals[idx_1].first+idx_p2)*size_particle_positions + IDX_Z],
0, 0, 0);
if(dist_r2 < cutoff_radius_compute*cutoff_radius_compute){
in_computer.template compute_interaction<size_particle_positions,size_particle_data,size_particle_rhs>(
&particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions],
&particles_data[(intervals[idx_1].first+idx_p1)*size_particle_data],
&particles_current_rhs[(intervals[idx_1].first+idx_p1)*size_particle_rhs],
&particles_positions[(intervals[idx_1].first+idx_p2)*size_particle_positions],
&particles_data[(intervals[idx_1].first+idx_p2)*size_particle_data],
&particles_current_rhs[(intervals[idx_1].first+idx_p2)*size_particle_rhs],
dist_r2, 0, 0, 0);
}
// if(((inout_index_particles[(intervals[idx_1].first+idx_p1)] == 356) cells_locker.lock(currenct_cell_idx);
// || inout_index_particles[(intervals[idx_1].first+idx_p2)] == 356)/*
// && ((inout_index_particles[(intervals[idx_1].first+idx_p1)] == 1832)
// || inout_index_particles[(intervals[idx_1].first+idx_p2)] == 1832)
// && ((inout_index_particles[(intervals[idx_1].first+idx_p1)] == 547)
// || inout_index_particles[(intervals[idx_1].first+idx_p2)] == 547)*/){// TODO
// printf("print between :\n");
// printf("index %ld (%ld) pos %e %e %e\n",
// (intervals[idx_1].first+idx_p1), inout_index_particles[(intervals[idx_1].first+idx_p1)],
// particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDX_X],
// particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDX_Y],
// particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDX_Z]);
// printf("index %ld (%ld) pos %e %e %e\n",
// (intervals[idx_1].first+idx_p2),
// inout_index_particles[(intervals[idx_1].first+idx_p2)],
// particles_positions[(intervals[idx_1].first+idx_p2)*size_particle_positions + IDX_X],
// particles_positions[(intervals[idx_1].first+idx_p2)*size_particle_positions + IDX_Y],
// particles_positions[(intervals[idx_1].first+idx_p2)*size_particle_positions + IDX_Z]);
// printf("Radius = %e (%e)\n", sqrt(dist_r2), dist_r2);
// }
}
}
// with other interval for(size_t idx_1 = 0 ; idx_1 < intervals.size() ; ++idx_1){
for(size_t idx_2 = idx_1+1 ; idx_2 < intervals.size() ; ++idx_2){ // self interval
for(partsize_t idx_p1 = 0 ; idx_p1 < intervals[idx_1].second ; ++idx_p1){ for(partsize_t idx_p1 = 0 ; idx_p1 < intervals[idx_1].second ; ++idx_p1){
for(partsize_t idx_p2 = 0 ; idx_p2 < intervals[idx_2].second ; ++idx_p2){ // if(((inout_index_particles[(intervals[idx_1].first+idx_p1)] == 356))){// TODO
// printf("box %ld:\n", iter_cell.first);
// printf("intervals.size() %lu:\n", intervals.size());
// printf("intervals[idx_1].second %ld:\n", intervals[idx_1].second);
// printf("index %ld (%ld) pos %e %e %e\n",
// (intervals[idx_1].first+idx_p1), inout_index_particles[(intervals[idx_1].first+idx_p1)],
// particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDX_X],
// particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDX_Y],
// particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDX_Z]);
// }
// if(((inout_index_particles[(intervals[idx_1].first+idx_p1)] == 547))){// TODO
// printf("box %ld:\n", iter_cell.first);
// printf("intervals.size() %lu:\n", intervals.size());
// printf("intervals[idx_1].second %ld:\n", intervals[idx_1].second);
// printf("index %ld (%ld) pos %e %e %e\n",
// (intervals[idx_1].first+idx_p1), inout_index_particles[(intervals[idx_1].first+idx_p1)],
// particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDX_X],
// particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDX_Y],
// particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDX_Z]);
// }
for(partsize_t idx_p2 = idx_p1+1 ; idx_p2 < intervals[idx_1].second ; ++idx_p2){
const real_number dist_r2 = compute_distance_r2(particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDX_X], const real_number dist_r2 = compute_distance_r2(particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDX_X],
particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDX_Y], particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDX_Y],
particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDX_Z], particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDX_Z],
particles_positions[(intervals[idx_2].first+idx_p2)*size_particle_positions + IDX_X], particles_positions[(intervals[idx_1].first+idx_p2)*size_particle_positions + IDX_X],
particles_positions[(intervals[idx_2].first+idx_p2)*size_particle_positions + IDX_Y], particles_positions[(intervals[idx_1].first+idx_p2)*size_particle_positions + IDX_Y],
particles_positions[(intervals[idx_2].first+idx_p2)*size_particle_positions + IDX_Z], particles_positions[(intervals[idx_1].first+idx_p2)*size_particle_positions + IDX_Z],
0, 0, 0); 0, 0, 0);
if(dist_r2 < cutoff_radius_compute*cutoff_radius_compute){ if(dist_r2 < cutoff_radius_compute*cutoff_radius_compute){
in_computer.template compute_interaction<size_particle_positions,size_particle_data,size_particle_rhs>( in_computer.template compute_interaction<size_particle_positions,size_particle_data,size_particle_rhs>(
&particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions], &particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions],
&particles_data[(intervals[idx_1].first+idx_p1)*size_particle_data], &particles_data[(intervals[idx_1].first+idx_p1)*size_particle_data],
&particles_current_rhs[(intervals[idx_1].first+idx_p1)*size_particle_rhs], &particles_current_rhs[(intervals[idx_1].first+idx_p1)*size_particle_rhs],
&particles_positions[(intervals[idx_2].first+idx_p2)*size_particle_positions], &particles_positions[(intervals[idx_1].first+idx_p2)*size_particle_positions],
&particles_data[(intervals[idx_2].first+idx_p2)*size_particle_data], &particles_data[(intervals[idx_1].first+idx_p2)*size_particle_data],
&particles_current_rhs[(intervals[idx_2].first+idx_p2)*size_particle_rhs], &particles_current_rhs[(intervals[idx_1].first+idx_p2)*size_particle_rhs],
dist_r2, 0, 0, 0); dist_r2, 0, 0, 0);
} }
// if(((inout_index_particles[(intervals[idx_1].first+idx_p1)] == 356) // if(((inout_index_particles[(intervals[idx_1].first+idx_p1)] == 356)
// || inout_index_particles[(intervals[idx_2].first+idx_p2)] == 356)/* // || inout_index_particles[(intervals[idx_1].first+idx_p2)] == 356)/*
// && ((inout_index_particles[(intervals[idx_1].first+idx_p1)] == 547) // && ((inout_index_particles[(intervals[idx_1].first+idx_p1)] == 1832)
// || inout_index_particles[(intervals[idx_2].first+idx_p2)] == 547) // || inout_index_particles[(intervals[idx_1].first+idx_p2)] == 1832)
// && ((inout_index_particles[(intervals[idx_1].first+idx_p1)] == 1832) // && ((inout_index_particles[(intervals[idx_1].first+idx_p1)] == 547)
// || inout_index_particles[(intervals[idx_2].first+idx_p2)] == 1832)*/){// TODO // || inout_index_particles[(intervals[idx_1].first+idx_p2)] == 547)*/){// TODO
// printf("print between :\n"); // printf("print between :\n");
// printf("index %ld (%ld) pos %e %e %e\n", // printf("index %ld (%ld) pos %e %e %e\n",
// (intervals[idx_1].first+idx_p1), inout_index_particles[(intervals[idx_1].first+idx_p1)], // (intervals[idx_1].first+idx_p1), inout_index_particles[(intervals[idx_1].first+idx_p1)],
// particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDX_X], // particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDX_X],
// particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDX_Y], // particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDX_Y],
// particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDX_Z]); // particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDX_Z]);
// printf("index %ld (%ld) pos %e %e %e\n", // printf("index %ld (%ld) pos %e %e %e\n",
// (intervals[idx_2].first+idx_p2), // (intervals[idx_1].first+idx_p2),
// inout_index_particles[(intervals[idx_2].first+idx_p2)], // inout_index_particles[(intervals[idx_1].first+idx_p2)],
// particles_positions[(intervals[idx_2].first+idx_p2)*size_particle_positions + IDX_X], // particles_positions[(intervals[idx_1].first+idx_p2)*size_particle_positions + IDX_X],
// particles_positions[(intervals[idx_2].first+idx_p2)*size_particle_positions + IDX_Y], // particles_positions[(intervals[idx_1].first+idx_p2)*size_particle_positions + IDX_Y],
// particles_positions[(intervals[idx_2].first+idx_p2)*size_particle_positions + IDX_Z]); // particles_positions[(intervals[idx_1].first+idx_p2)*size_particle_positions + IDX_Z]);
// printf("Radius = %e (%e)\n", sqrt(dist_r2), dist_r2); // printf("Radius = %e (%e)\n", sqrt(dist_r2), dist_r2);
// } // }
}
}
// with other interval
for(size_t idx_2 = idx_1+1 ; idx_2 < intervals.size() ; ++idx_2){
for(partsize_t idx_p1 = 0 ; idx_p1 < intervals[idx_1].second ; ++idx_p1){
for(partsize_t idx_p2 = 0 ; idx_p2 < intervals[idx_2].second ; ++idx_p2){
const real_number dist_r2 = compute_distance_r2(particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDX_X],
particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDX_Y],
particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDX_Z],
particles_positions[(intervals[idx_2].first+idx_p2)*size_particle_positions + IDX_X],
particles_positions[(intervals[idx_2].first+idx_p2)*size_particle_positions + IDX_Y],
particles_positions[(intervals[idx_2].first+idx_p2)*size_particle_positions + IDX_Z],
0, 0, 0);
if(dist_r2 < cutoff_radius_compute*cutoff_radius_compute){
in_computer.template compute_interaction<size_particle_positions,size_particle_data,size_particle_rhs>(
&particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions],
&particles_data[(intervals[idx_1].first+idx_p1)*size_particle_data],
&particles_current_rhs[(intervals[idx_1].first+idx_p1)*size_particle_rhs],
&particles_positions[(intervals[idx_2].first+idx_p2)*size_particle_positions],
&particles_data[(intervals[idx_2].first+idx_p2)*size_particle_data],
&particles_current_rhs[(intervals[idx_2].first+idx_p2)*size_particle_rhs],
dist_r2, 0, 0, 0);
}
// if(((inout_index_particles[(intervals[idx_1].first+idx_p1)] == 356)
// || inout_index_particles[(intervals[idx_2].first+idx_p2)] == 356)/*
// && ((inout_index_particles[(intervals[idx_1].first+idx_p1)] == 547)
// || inout_index_particles[(intervals[idx_2].first+idx_p2)] == 547)
// && ((inout_index_particles[(intervals[idx_1].first+idx_p1)] == 1832)
// || inout_index_particles[(intervals[idx_2].first+idx_p2)] == 1832)*/){// TODO
// printf("print between :\n");
// printf("index %ld (%ld) pos %e %e %e\n",
// (intervals[idx_1].first+idx_p1), inout_index_particles[(intervals[idx_1].first+idx_p1)],
// particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDX_X],
// particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDX_Y],
// particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDX_Z]);
// printf("index %ld (%ld) pos %e %e %e\n",
// (intervals[idx_2].first+idx_p2),
// inout_index_particles[(intervals[idx_2].first+idx_p2)],
// particles_positions[(intervals[idx_2].first+idx_p2)*size_particle_positions + IDX_X],
// particles_positions[(intervals[idx_2].first+idx_p2)*size_particle_positions + IDX_Y],
// particles_positions[(intervals[idx_2].first+idx_p2)*size_particle_positions + IDX_Z]);
// printf("Radius = %e (%e)\n", sqrt(dist_r2), dist_r2);
// }
}
} }
} }
} }
}
const std::vector<std::pair<partsize_t,partsize_t>>* neighbors[27];
long int neighbors_indexes[27];
std::array<real_number,3> shift[27];
const int nbNeighbors = my_tree.getNeighbors(currenct_cell_idx, neighbors, neighbors_indexes, shift, false);
// if(((currenct_cell_idx == 785))){// TODO
// printf("box %ld:\n", iter_cell.first);
// printf("intervals.size() %lu:\n", intervals.size());
// printf("nbNeighbors %d\n",nbNeighbors);
// }
for(size_t idx_1 = 0 ; idx_1 < intervals.size() ; ++idx_1){
// with other interval
for(size_t idx_neighbor = 0 ; idx_neighbor < nbNeighbors ; ++idx_neighbor){
if(currenct_cell_idx < neighbors_indexes[idx_neighbor]){
cells_locker.lock(neighbors_indexes[idx_neighbor]);
for(size_t idx_2 = 0 ; idx_2 < (*neighbors[idx_neighbor]).size() ; ++idx_2){
for(partsize_t idx_p1 = 0 ; idx_p1 < intervals[idx_1].second ; ++idx_p1){
for(partsize_t idx_p2 = 0 ; idx_p2 < (*neighbors[idx_neighbor])[idx_2].second ; ++idx_p2){
const real_number dist_r2 = compute_distance_r2(particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDX_X],
particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDX_Y],
particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDX_Z],
particles_positions[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_positions + IDX_X],
particles_positions[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_positions + IDX_Y],
particles_positions[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_positions + IDX_Z],
shift[idx_neighbor][IDX_X], shift[idx_neighbor][IDX_Y], shift[idx_neighbor][IDX_Z]);
if(dist_r2 < cutoff_radius_compute*cutoff_radius_compute){
in_computer.template compute_interaction<size_particle_positions,size_particle_data,size_particle_rhs>(
&particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions],
&particles_data[(intervals[idx_1].first+idx_p1)*size_particle_data],
&particles_current_rhs[(intervals[idx_1].first+idx_p1)*size_particle_rhs],
&particles_positions[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_positions],
&particles_data[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_data],
&particles_current_rhs[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_rhs],
dist_r2, shift[idx_neighbor][IDX_X], shift[idx_neighbor][IDX_Y], shift[idx_neighbor][IDX_Z]);
}
const long int currenct_cell_idx = iter_cell.first; // if(((inout_index_particles[(intervals[idx_1].first+idx_p1)] == 356)
const std::vector<std::pair<partsize_t,partsize_t>>* neighbors[27]; // || inout_index_particles[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)] == 356)/*
long int neighbors_indexes[27]; // && (inout_index_particles[(intervals[idx_1].first+idx_p1)] == 547)
std::array<real_number,3> shift[27]; // || inout_index_particles[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)] == 547
const int nbNeighbors = my_tree.getNeighbors(currenct_cell_idx, neighbors, neighbors_indexes, shift, false); // && (inout_index_particles[(intervals[idx_1].first+idx_p1)] == 1832)
// || inout_index_particles[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)] == 1832*/){// TODO
// if(((currenct_cell_idx == 785))){// TODO // printf("print between :\n");
// printf("box %ld:\n", iter_cell.first); // printf("index %ld (%ld) pos %e %e %e\n",
// printf("intervals.size() %lu:\n", intervals.size()); // (intervals[idx_1].first+idx_p1), inout_index_particles[(intervals[idx_1].first+idx_p1)],
// printf("nbNeighbors %d\n",nbNeighbors); // particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDX_X],
// } // particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDX_Y],
// particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDX_Z]);
for(size_t idx_1 = 0 ; idx_1 < intervals.size() ; ++idx_1){ // printf("index %ld (%ld) pos %e %e %e\n",
// with other interval // ((*neighbors[idx_neighbor])[idx_2].first+idx_p2),
for(size_t idx_neighbor = 0 ; idx_neighbor < nbNeighbors ; ++idx_neighbor){ // inout_index_particles[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)],
if(currenct_cell_idx < neighbors_indexes[idx_neighbor]){ // particles_positions[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_positions + IDX_X],
for(size_t idx_2 = 0 ; idx_2 < (*neighbors[idx_neighbor]).size() ; ++idx_2){ // particles_positions[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_positions + IDX_Y],
for(partsize_t idx_p1 = 0 ; idx_p1 < intervals[idx_1].second ; ++idx_p1){ // particles_positions[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_positions + IDX_Z]);
for(partsize_t idx_p2 = 0 ; idx_p2 < (*neighbors[idx_neighbor])[idx_2].second ; ++idx_p2){ // printf("Radius = %e (%e)\n", sqrt(dist_r2), dist_r2);
const real_number dist_r2 = compute_distance_r2(particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDX_X], // }
particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDX_Y],
particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDX_Z],
particles_positions[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_positions + IDX_X],
particles_positions[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_positions + IDX_Y],
particles_positions[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_positions + IDX_Z],
shift[idx_neighbor][IDX_X], shift[idx_neighbor][IDX_Y], shift[idx_neighbor][IDX_Z]);
if(dist_r2 < cutoff_radius_compute*cutoff_radius_compute){
in_computer.template compute_interaction<size_particle_positions,size_particle_data,size_particle_rhs>(
&particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions],
&particles_data[(intervals[idx_1].first+idx_p1)*size_particle_data],
&particles_current_rhs[(intervals[idx_1].first+idx_p1)*size_particle_rhs],
&particles_positions[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_positions],
&particles_data[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_data],
&particles_current_rhs[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_rhs],
dist_r2, shift[idx_neighbor][IDX_X], shift[idx_neighbor][IDX_Y], shift[idx_neighbor][IDX_Z]);
} }
// if(((inout_index_particles[(intervals[idx_1].first+idx_p1)] == 356)
// || inout_index_particles[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)] == 356)/*
// && (inout_index_particles[(intervals[idx_1].first+idx_p1)] == 547)
// || inout_index_particles[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)] == 547
// && (inout_index_particles[(intervals[idx_1].first+idx_p1)] == 1832)
// || inout_index_particles[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)] == 1832*/){// TODO
// printf("print between :\n");
// printf("index %ld (%ld) pos %e %e %e\n",
// (intervals[idx_1].first+idx_p1), inout_index_particles[(intervals[idx_1].first+idx_p1)],
// particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDX_X],
// particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDX_Y],
// particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDX_Z]);
// printf("index %ld (%ld) pos %e %e %e\n",
// ((*neighbors[idx_neighbor])[idx_2].first+idx_p2),
// inout_index_particles[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)],
// particles_positions[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_positions + IDX_X],
// particles_positions[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_positions + IDX_Y],
// particles_positions[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_positions + IDX_Z]);
// printf("Radius = %e (%e)\n", sqrt(dist_r2), dist_r2);
// }
} }
} }
cells_locker.unlock(neighbors_indexes[idx_neighbor]);
} }
} }
} }
cells_locker.unlock(currenct_cell_idx);
} }
} }
} }
......
...@@ -15,7 +15,7 @@ class p2p_tree{ ...@@ -15,7 +15,7 @@ class p2p_tree{
} }
long int get_cell_coord_y_from_index(const long int index) const{ long int get_cell_coord_y_from_index(const long int index) const{
return (index - get_cell_coord_z_from_index(index)*(nb_cell_levels[IDX_X]*nb_cell_levels[IDX_Y])) return (index % (nb_cell_levels[IDX_X]*nb_cell_levels[IDX_Y]))
/ nb_cell_levels[IDX_X]; / nb_cell_levels[IDX_X];
} }
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment