Commit cac11466 authored by Cristian Lalescu's avatar Cristian Lalescu
Browse files

passes full pair separation information to p2p

parent 2be5e2fe
Pipeline #87197 passed with stages
in 15 minutes and 39 seconds
......@@ -260,15 +260,16 @@ public:
real_number compute_distance_r2(const real_number x1, const real_number y1, const real_number z1,
const real_number x2, const real_number y2, const real_number z2,
const real_number xshift_coef, const real_number yshift_coef, const real_number zshift_coef) const {
real_number diff_x = std::abs(apply_pbc(x1,IDXC_X)-apply_pbc(x2,IDXC_X)+xshift_coef*spatial_box_width[IDXC_X]);
assert(diff_x <= 2*cutoff_radius);
const real_number xshift_coef, const real_number yshift_coef, const real_number zshift_coef,
real_number &diff_x, real_number &diff_y, real_number &diff_z) const {
diff_x = apply_pbc(x1,IDXC_X)-apply_pbc(x2,IDXC_X)+xshift_coef*spatial_box_width[IDXC_X];
assert(std::abs(diff_x) <= 2*cutoff_radius);
real_number diff_y = std::abs(apply_pbc(y1,IDXC_X)-apply_pbc(y2,IDXC_X)+yshift_coef*spatial_box_width[IDXC_Y]);
assert(diff_y <= 2*cutoff_radius);
diff_y = apply_pbc(y1,IDXC_X)-apply_pbc(y2,IDXC_X)+yshift_coef*spatial_box_width[IDXC_Y];
assert(std::abs(diff_y) <= 2*cutoff_radius);
real_number diff_z = std::abs(apply_pbc(z1,IDXC_X)-apply_pbc(z2,IDXC_X)+zshift_coef*spatial_box_width[IDXC_Z]);
assert(diff_z <= 2*cutoff_radius);
diff_z = apply_pbc(z1,IDXC_X)-apply_pbc(z2,IDXC_X)+zshift_coef*spatial_box_width[IDXC_Z];
assert(std::abs(diff_z) <= 2*cutoff_radius);
return (diff_x*diff_x) + (diff_y*diff_y) + (diff_z*diff_z);
}
......@@ -333,14 +334,14 @@ public:
});
// Permute array using buffer
// permute 4th function parameter using buffer, based on information in first 3 parameters
// permute 4th function parameter using buffer, based on information in first 3 parameters
std::vector<unsigned char> buffer;
permute_copy<real_number, size_particle_positions>(
current_offset_particles_for_partition[idxPartition],
current_offset_particles_for_partition[idxPartition],
current_my_nb_particles_per_partition[idxPartition],
part_to_sort.data(),
particles_positions,
&buffer);
particles_positions,
&buffer);
for(int idx_rhs = 0 ; idx_rhs < nb_rhs ; ++idx_rhs){
permute_copy<real_number, size_particle_rhs>(
current_offset_particles_for_partition[idxPartition],
......@@ -350,17 +351,17 @@ public:
&buffer);
}
permute_copy<partsize_t, 1>(
current_offset_particles_for_partition[idxPartition],
current_offset_particles_for_partition[idxPartition],
current_my_nb_particles_per_partition[idxPartition],
part_to_sort.data(),
inout_index_particles,
&buffer);
inout_index_particles,
&buffer);
permute_copy<long int, 1>(
current_offset_particles_for_partition[idxPartition],
current_offset_particles_for_partition[idxPartition],
current_my_nb_particles_per_partition[idxPartition],
part_to_sort.data(),
particles_coord.get(),
&buffer);
particles_coord.get(),
&buffer);
}
}
......@@ -654,12 +655,13 @@ public:
const std::vector<std::pair<partsize_t,partsize_t>>* neighbors[27];
long int neighbors_indexes[27];
std::array<real_number,3> shift[27];
real_number diff_x, diff_y, diff_z;
const int nbNeighbors = my_tree.getNeighbors(
current_cell_idx,
neighbors,
neighbors_indexes,
shift,
true);
current_cell_idx,
neighbors,
neighbors_indexes,
shift,
true);
// with other interval
for(int idx_neighbor = 0 ; idx_neighbor < nbNeighbors ; ++idx_neighbor){
......@@ -669,13 +671,18 @@ public:
for(partsize_t idx_p1 = 0 ; idx_p1 < nb_parts_in_cell ; ++idx_p1){
for(partsize_t idx_p2 = 0 ; idx_p2 < (*neighbors[idx_neighbor])[idx_2].second ; ++idx_p2){
const real_number dist_r2 = compute_distance_r2(
descriptor.toCompute[(idxPart+idx_p1)*size_particle_positions + IDXC_X],
descriptor.toCompute[(idxPart+idx_p1)*size_particle_positions + IDXC_X],
descriptor.toCompute[(idxPart+idx_p1)*size_particle_positions + IDXC_Y],
descriptor.toCompute[(idxPart+idx_p1)*size_particle_positions + IDXC_Z],
particles_positions[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_positions + IDXC_X],
particles_positions[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_positions + IDXC_Y],
particles_positions[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_positions + IDXC_Z],
shift[idx_neighbor][IDXC_X], shift[idx_neighbor][IDXC_Y], shift[idx_neighbor][IDXC_Z]);
shift[idx_neighbor][IDXC_X],
shift[idx_neighbor][IDXC_Y],
shift[idx_neighbor][IDXC_Z],
diff_x,
diff_y,
diff_z);
if(dist_r2 < cutoff_radius_compute*cutoff_radius_compute){
computer_thread.template compute_interaction<size_particle_positions, size_particle_rhs>(
descriptor.indexes[(idxPart+idx_p1)],
......@@ -685,10 +692,10 @@ public:
&particles_positions[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_positions],
&particles_current_rhs[0][((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_rhs],
dist_r2,
cutoff_radius_compute,
shift[idx_neighbor][IDXC_X],
shift[idx_neighbor][IDXC_Y],
shift[idx_neighbor][IDXC_Z]);
cutoff_radius_compute,
diff_x,
diff_y,
diff_z);
}
}
}
......@@ -761,13 +768,15 @@ public:
// self interval
for(partsize_t idx_p1 = 0 ; idx_p1 < intervals[idx_1].second ; ++idx_p1){
for(partsize_t idx_p2 = idx_p1+1 ; idx_p2 < intervals[idx_1].second ; ++idx_p2){
real_number diff_x, diff_y, diff_z;
const real_number dist_r2 = compute_distance_r2(particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDXC_X],
particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDXC_Y],
particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDXC_Z],
particles_positions[(intervals[idx_1].first+idx_p2)*size_particle_positions + IDXC_X],
particles_positions[(intervals[idx_1].first+idx_p2)*size_particle_positions + IDXC_Y],
particles_positions[(intervals[idx_1].first+idx_p2)*size_particle_positions + IDXC_Z],
0, 0, 0);
0, 0, 0,
diff_x, diff_y, diff_z);
if(dist_r2 < cutoff_radius_compute*cutoff_radius_compute){
computer_thread.template compute_interaction<size_particle_positions,size_particle_rhs>(
inout_index_particles[(intervals[idx_1].first+idx_p1)],
......@@ -776,7 +785,7 @@ public:
inout_index_particles[(intervals[idx_1].first+idx_p2)],
&particles_positions[(intervals[idx_1].first+idx_p2)*size_particle_positions],
&particles_current_rhs[0][(intervals[idx_1].first+idx_p2)*size_particle_rhs],
dist_r2, cutoff_radius_compute, 0, 0, 0);
dist_r2, cutoff_radius_compute, diff_x, diff_y, diff_z);
}
}
}
......@@ -785,13 +794,15 @@ public:
for(size_t idx_2 = idx_1+1 ; idx_2 < intervals.size() ; ++idx_2){
for(partsize_t idx_p1 = 0 ; idx_p1 < intervals[idx_1].second ; ++idx_p1){
for(partsize_t idx_p2 = 0 ; idx_p2 < intervals[idx_2].second ; ++idx_p2){
real_number diff_x, diff_y, diff_z;
const real_number dist_r2 = compute_distance_r2(particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDXC_X],
particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDXC_Y],
particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDXC_Z],
particles_positions[(intervals[idx_2].first+idx_p2)*size_particle_positions + IDXC_X],
particles_positions[(intervals[idx_2].first+idx_p2)*size_particle_positions + IDXC_Y],
particles_positions[(intervals[idx_2].first+idx_p2)*size_particle_positions + IDXC_Z],
0, 0, 0);
0, 0, 0,
diff_x, diff_y, diff_z);
if(dist_r2 < cutoff_radius_compute*cutoff_radius_compute){
computer_thread.template compute_interaction<size_particle_positions,size_particle_rhs>(
inout_index_particles[(intervals[idx_1].first+idx_p1)],
......@@ -800,7 +811,7 @@ public:
inout_index_particles[(intervals[idx_2].first+idx_p2)],
&particles_positions[(intervals[idx_2].first+idx_p2)*size_particle_positions],
&particles_current_rhs[0][(intervals[idx_2].first+idx_p2)*size_particle_rhs],
dist_r2, cutoff_radius_compute, 0, 0, 0);
dist_r2, cutoff_radius_compute, diff_x, diff_y, diff_z);
}
}
}
......@@ -821,13 +832,15 @@ public:
for(size_t idx_2 = 0 ; idx_2 < (*neighbors[idx_neighbor]).size() ; ++idx_2){
for(partsize_t idx_p1 = 0 ; idx_p1 < intervals[idx_1].second ; ++idx_p1){
for(partsize_t idx_p2 = 0 ; idx_p2 < (*neighbors[idx_neighbor])[idx_2].second ; ++idx_p2){
real_number diff_x, diff_y, diff_z;
const real_number dist_r2 = compute_distance_r2(particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDXC_X],
particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDXC_Y],
particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDXC_Z],
particles_positions[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_positions + IDXC_X],
particles_positions[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_positions + IDXC_Y],
particles_positions[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_positions + IDXC_Z],
shift[idx_neighbor][IDXC_X], shift[idx_neighbor][IDXC_Y], shift[idx_neighbor][IDXC_Z]);
shift[idx_neighbor][IDXC_X], shift[idx_neighbor][IDXC_Y], shift[idx_neighbor][IDXC_Z],
diff_x, diff_y, diff_z);
if(dist_r2 < cutoff_radius_compute*cutoff_radius_compute){
computer_thread.template compute_interaction<size_particle_positions,size_particle_rhs>(
inout_index_particles[(intervals[idx_1].first+idx_p1)],
......@@ -836,7 +849,8 @@ public:
inout_index_particles[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)],
&particles_positions[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_positions],
&particles_current_rhs[0][((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_rhs],
dist_r2, cutoff_radius_compute, shift[idx_neighbor][IDXC_X], shift[idx_neighbor][IDXC_Y], shift[idx_neighbor][IDXC_Z]);
dist_r2, cutoff_radius_compute,
diff_x, diff_y, diff_z);
}
}
}
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment