diff --git a/bfps/cpp/particles/p2p_computer.hpp b/bfps/cpp/particles/p2p_computer.hpp index efe0e5e321f190cf26a66d68c35e828de6ffddb1..eb77729fd0b9d6cdb0ad2586df3bf78696c1ef59 100644 --- a/bfps/cpp/particles/p2p_computer.hpp +++ b/bfps/cpp/particles/p2p_computer.hpp @@ -8,7 +8,7 @@ class p2p_computer{ public: template <int size_particle_rhs> void init_result_array(real_number rhs[], const partsize_t nbParticles) const{ - memset(rhs, 0, sizeof(real_number)*nbParticles); + memset(rhs, 0, sizeof(real_number)*nbParticles*size_particle_rhs); } template <int size_particle_rhs> @@ -23,7 +23,8 @@ public: template <int size_particle_positions, int size_particle_rhs> void compute_interaction(const real_number pos_part1[], real_number rhs_part1[], const real_number pos_part2[], real_number rhs_part2[], - const real_number dist_pow2) const{ + const real_number dist_pow2, + const real_number xshift_coef, const real_number yshift_coef, const real_number zshift_coef) const{ rhs_part1[0] += 1; rhs_part2[0] += 1; } diff --git a/bfps/cpp/particles/p2p_distr_mpi.hpp b/bfps/cpp/particles/p2p_distr_mpi.hpp index 1709dc7284742616eb66d029c5512cc2ec53f9da..aacd5a2f79920502613ab394063a2af226a8c254 100644 --- a/bfps/cpp/particles/p2p_distr_mpi.hpp +++ b/bfps/cpp/particles/p2p_distr_mpi.hpp @@ -210,21 +210,16 @@ public: } real_number compute_distance_r2(const real_number x1, const real_number y1, const real_number z1, - const real_number x2, const real_number y2, const real_number z2) const { - real_number diff_x = std::abs(x1-x2); - while(diff_x > spatial_box_width[IDX_X]/2){ - diff_x = std::abs(diff_x - spatial_box_width[IDX_X]); - } + const real_number x2, const real_number y2, const real_number z2, + const real_number xshift_coef, const real_number yshift_coef, const real_number zshift_coef) const { + real_number diff_x = std::abs(x1-x2+xshift_coef*spatial_box_width[IDX_X]); + assert(diff_x <= 2*cutoff_radius); - real_number diff_y = std::abs(y1-y2); - while(diff_y > spatial_box_width[IDX_Y]/2){ - diff_y = std::abs(diff_y - spatial_box_width[IDX_Y]); - } + real_number diff_y = std::abs(y1-y2+yshift_coef*spatial_box_width[IDX_Y]); + assert(diff_y <= 2*cutoff_radius); - real_number diff_z = std::abs(z1-z2); - while(diff_z > spatial_box_width[IDX_Z]/2){ - diff_z = std::abs(diff_z - spatial_box_width[IDX_Z]); - } + real_number diff_z = std::abs(z1-z2+zshift_coef*spatial_box_width[IDX_Z]); + assert(diff_z <= 2*cutoff_radius); return (diff_x*diff_x) + (diff_y*diff_y) + (diff_z*diff_z); } @@ -401,8 +396,8 @@ public: // Find process with at least one neighbor { -// std::cout << my_rank << " my_top_z_cell_level " << my_top_z_cell_level << std::endl; -// std::cout << my_rank << " my_down_z_cell_level " << my_down_z_cell_level << std::endl; +// std::cout << my_rank << ">> my_top_z_cell_level " << my_top_z_cell_level << std::endl; +// std::cout << my_rank << ">> my_down_z_cell_level " << my_down_z_cell_level << std::endl; // std::cout.flush();// TODO int dest_proc = (my_rank+1)%nb_processes_involved; @@ -417,8 +412,8 @@ public: } // std::cout << my_rank << " dest_proc " << dest_proc << std::endl; -// std::cout << my_rank << " first_cell_level_proc(dest_proc) " << first_cell_level_proc(dest_proc) << std::endl; -// std::cout << my_rank << " last_cell_level_proc(dest_proc) " << last_cell_level_proc(dest_proc) << std::endl; +// std::cout << my_rank << ">> first_cell_level_proc(dest_proc) " << first_cell_level_proc(dest_proc) << std::endl; +// std::cout << my_rank << ">> last_cell_level_proc(dest_proc) " << last_cell_level_proc(dest_proc) << std::endl; // std::cout.flush();// TODO NeighborDescriptor descriptor; @@ -427,12 +422,12 @@ public: descriptor.nbParticlesToExchange = particles_offset_layers[my_nb_cell_levels] - particles_offset_layers[my_nb_cell_levels-nb_levels_to_send]; descriptor.isRecv = false; -// std::cout << my_rank << "SEND" << std::endl; -// std::cout << "descriptor.destProc " << descriptor.destProc << std::endl; -// std::cout << "descriptor.nbLevelsToExchange " << descriptor.nbLevelsToExchange << std::endl; -// std::cout << "descriptor.nbParticlesToExchange " << descriptor.nbParticlesToExchange << std::endl; -// std::cout << "descriptor.isRecv " << descriptor.isRecv << std::endl; -// std::cout << "neigDescriptors.size() " << neigDescriptors.size() << std::endl; +// std::cout << my_rank << " SEND" << std::endl; +// std::cout << ">> descriptor.destProc " << descriptor.destProc << std::endl; +// std::cout << ">> descriptor.nbLevelsToExchange " << descriptor.nbLevelsToExchange << std::endl; +// std::cout << ">> descriptor.nbParticlesToExchange " << descriptor.nbParticlesToExchange << std::endl; +// std::cout << ">> descriptor.isRecv " << descriptor.isRecv << std::endl; +// std::cout << ">> neigDescriptors.size() " << neigDescriptors.size() << std::endl; // std::cout.flush();// TODO neigDescriptors.emplace_back(std::move(descriptor)); @@ -467,12 +462,12 @@ public: neigDescriptors.emplace_back(std::move(descriptor)); // std::cout << my_rank << "] RECV" << std::endl; -// std::cout << "descriptor.destProc " << descriptor.destProc << std::endl; -// std::cout << "descriptor.nbLevelsToExchange " << descriptor.nbLevelsToExchange << std::endl; -// std::cout << "descriptor.nbParticlesToExchange " << descriptor.nbParticlesToExchange << std::endl; -// std::cout << "descriptor.nbParticlesToExchange " << descriptor.nbParticlesToExchange << std::endl; -// std::cout << "descriptor.isRecv " << descriptor.isRecv << std::endl; -// std::cout << "neigDescriptors.size() " << neigDescriptors.size() << std::endl; +// std::cout << ">> descriptor.destProc " << descriptor.destProc << std::endl; +// std::cout << ">> descriptor.nbLevelsToExchange " << descriptor.nbLevelsToExchange << std::endl; +// std::cout << ">> descriptor.nbParticlesToExchange " << descriptor.nbParticlesToExchange << std::endl; +// std::cout << ">> descriptor.nbParticlesToExchange " << descriptor.nbParticlesToExchange << std::endl; +// std::cout << ">> descriptor.isRecv " << descriptor.isRecv << std::endl; +// std::cout << ">> neigDescriptors.size() " << neigDescriptors.size() << std::endl; // std::cout.flush();// TODO src_proc = (src_proc-1+nb_processes_involved)%nb_processes_involved; @@ -544,6 +539,7 @@ public: #pragma omp master { while(mpiRequests.size()){ + TIMEZONE("wait-loop"); assert(mpiRequests.size() == whatNext.size()); int idxDone = int(mpiRequests.size()); @@ -561,6 +557,7 @@ public: /// Data to exchange particles ////////////////////////////////////////////////////////////////////// if(releasedAction.first == RECV_PARTICLES){ + TIMEZONE("post-recv-particles"); NeighborDescriptor& descriptor = neigDescriptors[releasedAction.second]; assert(descriptor.isRecv); const int destProc = descriptor.destProc; @@ -589,6 +586,7 @@ public: /// Computation ////////////////////////////////////////////////////////////////////// if(releasedAction.first == COMPUTE_PARTICLES){ + TIMEZONE("compute-particles"); NeighborDescriptor& descriptor = neigDescriptors[releasedAction.second]; assert(descriptor.isRecv); const partsize_t NbParticlesToReceive = descriptor.nbParticlesToExchange; @@ -613,7 +611,8 @@ public: const std::vector<std::pair<partsize_t,partsize_t>>* neighbors[27]; long int neighbors_indexes[27]; - const int nbNeighbors = my_tree.getNeighbors(current_cell_idx, neighbors, neighbors_indexes, true); + std::array<real_number,3> shift[27]; + const int nbNeighbors = my_tree.getNeighbors(current_cell_idx, neighbors, neighbors_indexes, shift, true); // for(int idx_test = 0 ; idx_test < nb_parts_in_cell ; ++idx_test){ // TODO // real_number totest[3] = {8.570442e-01, 7.173084e-02, 8.279754e-03}; @@ -638,14 +637,15 @@ public: descriptor.toCompute[(idxPart+idx_p1)*size_particle_positions + IDX_Z], particles_positions[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_positions + IDX_X], particles_positions[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_positions + IDX_Y], - particles_positions[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_positions + IDX_Z]); + particles_positions[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_positions + IDX_Z], + shift[idx_neighbor][IDX_X], shift[idx_neighbor][IDX_Y], shift[idx_neighbor][IDX_Z]); if(dist_r2 < cutoff_radius_compute*cutoff_radius_compute){ in_computer.template compute_interaction<size_particle_positions,size_particle_rhs>( &descriptor.toCompute[(idxPart+idx_p1)*size_particle_positions], &descriptor.results[(idxPart+idx_p1)*size_particle_rhs], &particles_positions[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_positions], &particles_current_rhs[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_rhs], - dist_r2); + dist_r2, shift[idx_neighbor][IDX_X], shift[idx_neighbor][IDX_Y], shift[idx_neighbor][IDX_Z]); } // if(inout_index_particles[(*neighbors[idx_neighbor])[idx_2].first+idx_p2] == 356){// TODO @@ -693,6 +693,7 @@ public: /// Merge ////////////////////////////////////////////////////////////////////// if(releasedAction.first == MERGE_PARTICLES){ + TIMEZONE("merge"); NeighborDescriptor& descriptor = neigDescriptors[releasedAction.second]; assert(descriptor.isRecv == false); assert(descriptor.toRecvAndMerge != nullptr); @@ -709,6 +710,7 @@ public: // Compute self data for(const auto& iter_cell : my_tree){ + TIMEZONE("proceed-leaf"); const std::vector<std::pair<partsize_t,partsize_t>>& intervals = iter_cell.second; for(size_t idx_1 = 0 ; idx_1 < intervals.size() ; ++idx_1){ @@ -742,14 +744,15 @@ public: particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDX_Z], particles_positions[(intervals[idx_1].first+idx_p2)*size_particle_positions + IDX_X], particles_positions[(intervals[idx_1].first+idx_p2)*size_particle_positions + IDX_Y], - particles_positions[(intervals[idx_1].first+idx_p2)*size_particle_positions + IDX_Z]); + particles_positions[(intervals[idx_1].first+idx_p2)*size_particle_positions + IDX_Z], + 0, 0, 0); if(dist_r2 < cutoff_radius_compute*cutoff_radius_compute){ in_computer.template compute_interaction<size_particle_positions,size_particle_rhs>( &particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions], &particles_current_rhs[(intervals[idx_1].first+idx_p1)*size_particle_rhs], &particles_positions[(intervals[idx_1].first+idx_p2)*size_particle_positions], &particles_current_rhs[(intervals[idx_1].first+idx_p2)*size_particle_rhs], - dist_r2); + dist_r2, 0, 0, 0); } // if(((inout_index_particles[(intervals[idx_1].first+idx_p1)] == 356) @@ -784,14 +787,15 @@ public: particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDX_Z], particles_positions[(intervals[idx_2].first+idx_p2)*size_particle_positions + IDX_X], particles_positions[(intervals[idx_2].first+idx_p2)*size_particle_positions + IDX_Y], - particles_positions[(intervals[idx_2].first+idx_p2)*size_particle_positions + IDX_Z]); + particles_positions[(intervals[idx_2].first+idx_p2)*size_particle_positions + IDX_Z], + 0, 0, 0); if(dist_r2 < cutoff_radius_compute*cutoff_radius_compute){ in_computer.template compute_interaction<size_particle_positions,size_particle_rhs>( &particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions], &particles_current_rhs[(intervals[idx_1].first+idx_p1)*size_particle_rhs], &particles_positions[(intervals[idx_2].first+idx_p2)*size_particle_positions], &particles_current_rhs[(intervals[idx_2].first+idx_p2)*size_particle_rhs], - dist_r2); + dist_r2, 0, 0, 0); } // if(((inout_index_particles[(intervals[idx_1].first+idx_p1)] == 356) @@ -823,7 +827,8 @@ public: const long int currenct_cell_idx = iter_cell.first; const std::vector<std::pair<partsize_t,partsize_t>>* neighbors[27]; long int neighbors_indexes[27]; - const int nbNeighbors = my_tree.getNeighbors(currenct_cell_idx, neighbors, neighbors_indexes, false); + std::array<real_number,3> shift[27]; + const int nbNeighbors = my_tree.getNeighbors(currenct_cell_idx, neighbors, neighbors_indexes, shift, false); // if(((currenct_cell_idx == 785))){// TODO // printf("box %ld:\n", iter_cell.first); @@ -843,14 +848,15 @@ public: particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDX_Z], particles_positions[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_positions + IDX_X], particles_positions[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_positions + IDX_Y], - particles_positions[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_positions + IDX_Z]); + particles_positions[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_positions + IDX_Z], + shift[idx_neighbor][IDX_X], shift[idx_neighbor][IDX_Y], shift[idx_neighbor][IDX_Z]); if(dist_r2 < cutoff_radius_compute*cutoff_radius_compute){ in_computer.template compute_interaction<size_particle_positions,size_particle_rhs>( &particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions], &particles_current_rhs[(intervals[idx_1].first+idx_p1)*size_particle_rhs], &particles_positions[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_positions], &particles_current_rhs[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_rhs], - dist_r2); + dist_r2, shift[idx_neighbor][IDX_X], shift[idx_neighbor][IDX_Y], shift[idx_neighbor][IDX_Z]); } // if(((inout_index_particles[(intervals[idx_1].first+idx_p1)] == 356) diff --git a/bfps/cpp/particles/p2p_tree.hpp b/bfps/cpp/particles/p2p_tree.hpp index 9f146644120af7c38b65e24dc110545c311f516a..52cb2f2d967ebc3533c8e3c98455734d26c450cb 100644 --- a/bfps/cpp/particles/p2p_tree.hpp +++ b/bfps/cpp/particles/p2p_tree.hpp @@ -46,7 +46,9 @@ public: return emptyCell; } - int getNeighbors(const long int idx, const CellClass* output[27], long int output_indexes[27], const bool include_target) const{ + template <class ShiftType> + int getNeighbors(const long int idx, const CellClass* output[27], long int output_indexes[27], + std::array<ShiftType,3> shift[27], const bool include_target) const{ int nbNeighbors = 0; std::fill_n(output, 27, nullptr); @@ -57,29 +59,38 @@ public: for(long int neigh_x = -1 ; neigh_x <= 1 ; ++neigh_x){ long int neigh_x_pbc = neigh_x+idx_x; + ShiftType shift_x = 0; if(neigh_x_pbc < 0){ neigh_x_pbc += nb_cell_levels[IDX_X]; + shift_x = 1; } else if(nb_cell_levels[IDX_X] <= neigh_x_pbc){ neigh_x_pbc -= nb_cell_levels[IDX_X]; + shift_x = -1; } for(long int neigh_y = -1 ; neigh_y <= 1 ; ++neigh_y){ long int neigh_y_pbc = neigh_y+idx_y; + ShiftType shift_y = 0; if(neigh_y_pbc < 0){ neigh_y_pbc += nb_cell_levels[IDX_Y]; + shift_y = 1; } else if(nb_cell_levels[IDX_Y] <= neigh_y_pbc){ neigh_y_pbc -= nb_cell_levels[IDX_Y]; + shift_y = -1; } for(long int neigh_z = -1 ; neigh_z <= 1 ; ++neigh_z){ long int neigh_z_pbc = neigh_z+idx_z; + ShiftType shift_z = 0; if(neigh_z_pbc < 0){ neigh_z_pbc += nb_cell_levels[IDX_Z]; + shift_z = 1; } else if(nb_cell_levels[IDX_Z] <= neigh_z_pbc){ neigh_z_pbc -= nb_cell_levels[IDX_Z]; + shift_z = -1; } if(include_target || neigh_x_pbc != idx_x || neigh_y_pbc != idx_y || neigh_z_pbc != idx_z){ @@ -90,6 +101,11 @@ public: if(iter != data.end()){ output[nbNeighbors] = &(iter->second); output_indexes[nbNeighbors] = idx_neigh; + + shift[nbNeighbors][IDX_X] = shift_x; + shift[nbNeighbors][IDX_Y] = shift_y; + shift[nbNeighbors][IDX_Z] = shift_z; + nbNeighbors += 1; } }