Skip to content
Snippets Groups Projects
Commit eb1b7d36 authored by Berenger Bramas's avatar Berenger Bramas
Browse files

Add first version of particle particle interactions

parent 0ce7d47f
No related branches found
No related tags found
1 merge request!23WIP: Feature/use cmake
mpicxx -g main_tocompile.cpp -o /tmp/main_test_part.exe -I/home/bbramas/Projects/bfps/bfps/cpp/ -I/home/bbramas/Downloads/hdf5install/include -L/home/bbramas/Downloads/hdf5install/lib -lhdf5 -lsz -lz
mpicxx -fPIC -rdynamic -g NSVE-v2.0.1-single.cpp -o /tmp/NSVE-v2.0.1-single.exe -I/home/bbramas/Projects/bfps/bfps/cpp/ -I/home/bbramas/Downloads/hdf5install/include -I/home/bbramas/Downloads/fftw-3.3.4/install/include/ -L/home/bbramas/Downloads/hdf5install/lib -lhdf5 -lsz -lz -L/home/bbramas/.local/lib/python2.7/site-packages/bfps-2.0.1.post31+g12693ea-py2.7.egg/bfps/ -lbfps -fopenmp -lgomp -L/home/bbramas/Downloads/fftw-3.3.4/install/lib/ -lfftw3_mpi -lfftw3f_mpi -lfftw3_omp -lfftw3f_omp -lfftw3 -lfftw3f
#ifndef P2P_COMPUTER_HPP
#define P2P_COMPUTER_HPP
#include <cstring>
template <class real_number, class partsize_t>
class p2p_computer{
public:
template <int size_particle_rhs>
void init_result_array(real_number rhs[], const partsize_t nbParticles) const{
memset(rhs, 0, sizeof(real_number)*nbParticles);
}
template <int size_particle_rhs>
void reduce_particles_rhs(real_number rhs_dst[], const real_number rhs_src[], const partsize_t nbParticles) const{
for(int idx_part = 0 ; idx_part < nbParticles ; ++idx_part){
for(int idx_rhs = 0 ; idx_rhs < size_particle_rhs ; ++idx_rhs){
rhs_dst[idx_part*size_particle_rhs+idx_rhs] += rhs_src[idx_part*size_particle_rhs+idx_rhs];
}
}
}
template <int size_particle_positions, int size_particle_rhs>
void compute_interaction(const real_number pos_part1[], real_number rhs_part1[],
const real_number pos_part2[], real_number rhs_part2[],
const real_number dist_pow2) const{
rhs_part1[0] += 1;
rhs_part2[0] += 1;
}
};
#endif
#ifndef P2P_DISTR_MPI_HPP
#define P2P_DISTR_MPI_HPP
#include <mpi.h>
#include <vector>
#include <memory>
#include <cassert>
#include <type_traits>
#include <omp.h>
#include <algorithm>
#include "scope_timer.hpp"
#include "particles_utils.hpp"
#include "p2p_tree.hpp"
/*
- method to reorder each particle section following the cutoff grid (will permite index too)
- exchange particles (with upper only) and receive particle (from lower only)
- 1 message at a time! so need the offset of each cell of the cutoff grid
- iterate on what has been received with my own particles, fill both rhs
- send back the rhs
- merge rhs
- update particles property
*/
template <class partsize_t, class real_number>
class p2p_distr_mpi {
protected:
static const int MaxNbRhs = 100;
enum MpiTag{
TAG_NB_PARTICLES,
TAG_POSITION_PARTICLES,
TAG_RESULT_PARTICLES,
};
struct NeighborDescriptor{
partsize_t nbParticlesToExchange;
int destProc;
int nbLevelsToExchange;
bool isRecv;
std::unique_ptr<real_number[]> toRecvAndMerge;
std::unique_ptr<real_number[]> toCompute;
std::unique_ptr<real_number[]> results;
};
enum Action{
NOTHING_TODO,
RECV_PARTICLES,
COMPUTE_PARTICLES,
RELEASE_BUFFER_PARTICLES,
MERGE_PARTICLES,
RECV_MOVE_NB_LOW,
RECV_MOVE_NB_UP,
RECV_MOVE_LOW,
RECV_MOVE_UP
};
MPI_Comm current_com;
int my_rank;
int nb_processes;
int nb_processes_involved;
const std::pair<int,int> current_partition_interval;
const int current_partition_size;
const std::array<size_t,3> field_grid_dim;
std::unique_ptr<int[]> partition_interval_size_per_proc;
std::unique_ptr<int[]> partition_interval_offset_per_proc;
std::unique_ptr<partsize_t[]> current_offset_particles_for_partition;
std::vector<std::pair<Action,int>> whatNext;
std::vector<MPI_Request> mpiRequests;
std::vector<NeighborDescriptor> neigDescriptors;
std::array<real_number,3> spatial_box_width;
std::array<real_number,3> spatial_box_offset;
const real_number cutoff_radius;
std::array<long int,3> nb_cell_levels;
public:
////////////////////////////////////////////////////////////////////////////
p2p_distr_mpi(MPI_Comm in_current_com,
const std::pair<int,int>& in_current_partitions,
const std::array<size_t,3>& in_field_grid_dim,
const std::array<real_number,3>& in_spatial_box_width,
const std::array<real_number,3>& in_spatial_box_offset,
const real_number in_cutoff_radius)
: current_com(in_current_com),
my_rank(-1), nb_processes(-1),nb_processes_involved(-1),
current_partition_interval(in_current_partitions),
current_partition_size(current_partition_interval.second-current_partition_interval.first),
field_grid_dim(in_field_grid_dim),
spatial_box_width(in_spatial_box_width), spatial_box_offset(in_spatial_box_offset),
cutoff_radius(in_cutoff_radius){
AssertMpi(MPI_Comm_rank(current_com, &my_rank));
AssertMpi(MPI_Comm_size(current_com, &nb_processes));
partition_interval_size_per_proc.reset(new int[nb_processes]);
AssertMpi( MPI_Allgather( const_cast<int*>(&current_partition_size), 1, MPI_INT,
partition_interval_size_per_proc.get(), 1, MPI_INT,
current_com) );
assert(partition_interval_size_per_proc[my_rank] == current_partition_size);
partition_interval_offset_per_proc.reset(new int[nb_processes+1]);
partition_interval_offset_per_proc[0] = 0;
for(int idxProc = 0 ; idxProc < nb_processes ; ++idxProc){
partition_interval_offset_per_proc[idxProc+1] = partition_interval_offset_per_proc[idxProc] + partition_interval_size_per_proc[idxProc];
}
current_offset_particles_for_partition.reset(new partsize_t[current_partition_size+1]);
nb_processes_involved = nb_processes;
while(nb_processes_involved != 0 && partition_interval_size_per_proc[nb_processes_involved-1] == 0){
nb_processes_involved -= 1;
}
assert(nb_processes_involved != 0);
for(int idx_proc_involved = 0 ; idx_proc_involved < nb_processes_involved ; ++idx_proc_involved){
assert(partition_interval_size_per_proc[idx_proc_involved] != 0);
}
assert(int(field_grid_dim[IDX_Z]) == partition_interval_offset_per_proc[nb_processes_involved]);
nb_cell_levels[IDX_X] = spatial_box_width[IDX_X]/cutoff_radius;
nb_cell_levels[IDX_Y] = spatial_box_width[IDX_Y]/cutoff_radius;
nb_cell_levels[IDX_Z] = spatial_box_width[IDX_Z]/cutoff_radius;
}
virtual ~p2p_distr_mpi(){}
////////////////////////////////////////////////////////////////////////////
long int get_cell_coord_x_from_index(const long int index) const{
return index % nb_cell_levels[IDX_X];
}
long int get_cell_coord_y_from_index(const long int index) const{
return (index - get_cell_coord_z_from_index(index)*(nb_cell_levels[IDX_X]*nb_cell_levels[IDX_Y]))
/ nb_cell_levels[IDX_X];
}
long int get_cell_coord_z_from_index(const long int index) const{
return index / (nb_cell_levels[IDX_X]*nb_cell_levels[IDX_Y]);
}
long int first_cell_level_proc(const int dest_proc) const{
const real_number field_section_width_z = spatial_box_width[IDX_Z]/real_number(field_grid_dim[IDX_Z]);
return static_cast<long int>((field_section_width_z*real_number(partition_interval_offset_per_proc[dest_proc]))/cutoff_radius);
}
long int last_cell_level_proc(const int dest_proc) const{
const real_number field_section_width_z = spatial_box_width[IDX_Z]/real_number(field_grid_dim[IDX_Z]);
return static_cast<long int>((field_section_width_z*real_number(partition_interval_offset_per_proc[dest_proc+1])
- std::numeric_limits<real_number>::epsilon())/cutoff_radius);
}
std::array<long int,3> get_cell_coordinate(const real_number pos_x, const real_number pos_y,
const real_number pos_z) const {
const real_number diff_x = pos_x - spatial_box_offset[IDX_X];
const real_number diff_y = pos_y - spatial_box_offset[IDX_Y];
const real_number diff_z = pos_z - spatial_box_offset[IDX_Z];
std::array<long int,3> coord;
coord[IDX_X] = static_cast<long int>(diff_x/cutoff_radius);
coord[IDX_Y] = static_cast<long int>(diff_y/cutoff_radius);
coord[IDX_Z] = static_cast<long int>(diff_z/cutoff_radius);
return coord;
}
long int get_cell_idx(const real_number pos_x, const real_number pos_y,
const real_number pos_z) const {
std::array<long int,3> coord = get_cell_coordinate(pos_x, pos_y, pos_z);
return ((coord[IDX_Z]*nb_cell_levels[IDX_Y])+coord[IDX_Y])*nb_cell_levels[IDX_X]+coord[IDX_X];
}
real_number compute_distance_r2(const real_number x1, const real_number y1, const real_number z1,
const real_number x2, const real_number y2, const real_number z2) const {
return ((x1-x2)*(x1-x2)) + ((y1-y2)*(y1-y2)) + ((z1-z2)*(z1-z2));
}
template <int size_particle_positions, int size_particle_rhs>
struct ParticleView{
partsize_t p_index;
real_number* ptr_particles_positions;
real_number* ptr_particles_current_rhs;
partsize_t* ptr_global_idx;
long int* ptr_cell_idx;
void swap(ParticleView& p1, ParticleView& p2){
for(int idx_pos = 0 ; idx_pos < size_particle_positions ; ++idx_pos){
std::swap(p1.ptr_particles_positions[p1.p_index*size_particle_positions+idx_pos],
p2.ptr_particles_positions[p2.p_index*size_particle_positions+idx_pos]);
}
for(int idx_rhs = 0 ; idx_rhs < size_particle_rhs ; ++idx_rhs){
std::swap(p1.ptr_particles_current_rhs[p1.p_index*size_particle_rhs+idx_rhs],
p2.ptr_particles_current_rhs[p2.p_index*size_particle_rhs+idx_rhs]);
}
std::swap(p1.ptr_cell_idx[p1.p_index],p2.ptr_cell_idx[p2.p_index]);
std::swap(p1.ptr_global_idx[p1.p_index],p2.ptr_global_idx[p2.p_index]);
std::swap(p1.p_index,p2.p_index);
}
};
template <class computer_class, int size_particle_positions, int size_particle_rhs>
void compute_distr(computer_class& in_computer,
const partsize_t current_my_nb_particles_per_partition[],
real_number particles_positions[],
real_number particles_current_rhs[],
partsize_t inout_index_particles[]){
TIMEZONE("compute_distr");
// Some processes might not be involved
if(nb_processes_involved <= my_rank){
return;
}
const long int my_top_z_cell_level = last_cell_level_proc(my_rank);
const long int my_down_z_cell_level = first_cell_level_proc(my_rank);
const long int my_nb_cell_levels = 1+my_top_z_cell_level-my_down_z_cell_level;
current_offset_particles_for_partition[0] = 0;
partsize_t myTotalNbParticles = 0;
for(int idxPartition = 0 ; idxPartition < current_partition_size ; ++idxPartition){
myTotalNbParticles += current_my_nb_particles_per_partition[idxPartition];
current_offset_particles_for_partition[idxPartition+1] = current_offset_particles_for_partition[idxPartition] + current_my_nb_particles_per_partition[idxPartition];
}
// Compute box idx for each particle
std::unique_ptr<long int[]> particles_coord(new long int[current_offset_particles_for_partition[current_partition_size]]);
{
for(int idxPartition = 0 ; idxPartition < current_partition_size ; ++idxPartition){
#pragma omp parallel for schedule(static)
for(partsize_t idxPart = current_offset_particles_for_partition[idxPartition] ; idxPart < current_offset_particles_for_partition[idxPartition+1] ; ++idxPart ){
particles_coord[idxPart] = get_cell_idx(particles_positions[(idxPart)*size_particle_positions + IDX_X],
particles_positions[(idxPart)*size_particle_positions + IDX_Y],
particles_positions[(idxPart)*size_particle_positions + IDX_Z]);
assert(my_down_z_cell_level <= get_cell_coord_z_from_index(particles_coord[idxPart]));
assert(get_cell_coord_z_from_index(particles_coord[idxPart]) <= my_top_z_cell_level);
}
}
std::vector<ParticleView<size_particle_positions,size_particle_rhs>> part_to_sort;
// Sort each partition in cells
for(int idxPartition = 0 ; idxPartition < current_partition_size ; ++idxPartition){
part_to_sort.clear();
for(partsize_t idxPart = current_offset_particles_for_partition[idxPartition] ; idxPart < current_offset_particles_for_partition[idxPartition+1] ; ++idxPart ){
part_to_sort.emplace_back();
part_to_sort.back().p_index = idxPart;
part_to_sort.back().ptr_particles_positions = particles_positions;
part_to_sort.back().ptr_particles_current_rhs = particles_current_rhs;
part_to_sort.back().ptr_global_idx = inout_index_particles;
part_to_sort.back().ptr_cell_idx = particles_coord.get();
}
assert(part_to_sort.size() == (current_offset_particles_for_partition[idxPartition+1]-current_offset_particles_for_partition[idxPartition]));
std::sort(part_to_sort.begin(), part_to_sort.end(),
[](const ParticleView<size_particle_positions,size_particle_rhs>& p1,
const ParticleView<size_particle_positions,size_particle_rhs>& p2){
return p1.ptr_cell_idx[p1.p_index] < p2.ptr_cell_idx[p2.p_index];
});
}
}
// Build the tree
p2p_tree<std::vector<std::pair<partsize_t,partsize_t>>> my_tree(nb_cell_levels);
for(int idxPartition = 0 ; idxPartition < current_partition_size ; ++idxPartition){
long int current_cell_idx = -1;
partsize_t current_nb_particles_in_cell = 0;
partsize_t current_cell_offset = 0;
for(partsize_t idx_part = current_offset_particles_for_partition[idxPartition] ;
idx_part != current_offset_particles_for_partition[idxPartition+1]; ++idx_part){
if(particles_coord[idx_part] != current_cell_idx){
if(current_nb_particles_in_cell){
my_tree.getCell(current_cell_idx).emplace_back(current_cell_offset,current_nb_particles_in_cell);
}
current_cell_idx = particles_coord[idx_part];
current_nb_particles_in_cell = 1;
current_cell_offset = idx_part;
}
}
if(current_nb_particles_in_cell){
my_tree.getCell(current_cell_idx).emplace_back(current_cell_offset,current_nb_particles_in_cell);
}
}
printf("[%d] go from cutoff level %ld to %ld\n",
my_rank, my_down_z_cell_level, my_top_z_cell_level); // TODO remove
fflush(stdout); // TODO
// Offset per cell layers
std::unique_ptr<partsize_t[]> particles_offset_layers(new partsize_t[my_nb_cell_levels+1]());
for(int idxPartition = 0 ; idxPartition < current_partition_size ; ++idxPartition){
for(partsize_t idx_part = current_offset_particles_for_partition[idxPartition] ;
idx_part != current_offset_particles_for_partition[idxPartition+1]; ++idx_part){
assert(my_down_z_cell_level <= get_cell_coord_z_from_index(particles_coord[idx_part]));
assert(get_cell_coord_z_from_index(particles_coord[idx_part]) <= my_top_z_cell_level);
particles_offset_layers[get_cell_coord_z_from_index(particles_coord[idx_part])+1-my_down_z_cell_level] += 1;
}
}
for(size_t idx_layer = 0 ; idx_layer < my_nb_cell_levels ; ++idx_layer){
printf("[%d] nb particles in cutoff level %llu are %ld\n",
my_rank, idx_layer, particles_offset_layers[idx_layer+1]); // TODO remove
fflush(stdout); // TODO
particles_offset_layers[idx_layer+1] += particles_offset_layers[idx_layer];
}
// Reset vectors
assert(whatNext.size() == 0);
assert(mpiRequests.size() == 0);
neigDescriptors.clear();
// Find process with at least one neighbor
{
std::cout << my_rank << " my_top_z_cell_level " << my_top_z_cell_level << std::endl;
std::cout << my_rank << " my_down_z_cell_level " << my_down_z_cell_level << std::endl;
std::cout.flush();// TODO
int dest_proc = (my_rank+1)%nb_processes_involved;
while(dest_proc != my_rank
&& (my_top_z_cell_level == first_cell_level_proc(dest_proc)
|| (my_top_z_cell_level+1)%nb_cell_levels[IDX_Z] == first_cell_level_proc(dest_proc))){
// Find if we have to send 1 or 2 cell levels
int nb_levels_to_send = 1;
if(my_nb_cell_levels > 1 // I have more than one level
&& (my_top_z_cell_level-1+2)%nb_cell_levels[IDX_Z] <= last_cell_level_proc(dest_proc)){
nb_levels_to_send += 1;
}
std::cout << my_rank << " dest_proc " << dest_proc << std::endl;
std::cout << my_rank << " first_cell_level_proc(dest_proc) " << first_cell_level_proc(dest_proc) << std::endl;
std::cout << my_rank << " last_cell_level_proc(dest_proc) " << last_cell_level_proc(dest_proc) << std::endl;
std::cout.flush();// TODO
NeighborDescriptor descriptor;
descriptor.destProc = dest_proc;
descriptor.nbLevelsToExchange = nb_levels_to_send;
descriptor.nbParticlesToExchange = particles_offset_layers[my_nb_cell_levels] - particles_offset_layers[my_nb_cell_levels-nb_levels_to_send];
descriptor.isRecv = false;
std::cout << my_rank << "SEND" << std::endl;
std::cout << "descriptor.destProc " << descriptor.destProc << std::endl;
std::cout << "descriptor.nbLevelsToExchange " << descriptor.nbLevelsToExchange << std::endl;
std::cout << "descriptor.nbParticlesToExchange " << descriptor.nbParticlesToExchange << std::endl;
std::cout << "descriptor.nbParticlesToExchange " << descriptor.nbParticlesToExchange << std::endl;
std::cout << "descriptor.isRecv " << descriptor.isRecv << std::endl;
std::cout << "neigDescriptors.size() " << neigDescriptors.size() << std::endl;
std::cout.flush();// TODO
neigDescriptors.emplace_back(std::move(descriptor));
dest_proc = (dest_proc+1)%nb_processes_involved;
}
std::cout << my_rank << " NO dest_proc " << dest_proc << std::endl;
std::cout << my_rank << " NO first_cell_level_proc(dest_proc) " << first_cell_level_proc(dest_proc) << std::endl;
std::cout.flush();// TODO
int src_proc = (my_rank-1+nb_processes_involved)%nb_processes_involved;
while(src_proc != my_rank
&& (last_cell_level_proc(src_proc) == my_down_z_cell_level
|| (last_cell_level_proc(src_proc)+1)%nb_cell_levels[IDX_Z] == my_down_z_cell_level)){
// Find if we have to send 1 or 2 cell levels
int nb_levels_to_recv = 1;
if(my_nb_cell_levels > 1 // I have more than one level
&& first_cell_level_proc(src_proc) <= (my_down_z_cell_level-1+2)%nb_cell_levels[IDX_Z]){
nb_levels_to_recv += 1;
}
std::cout << my_rank << " src_proc " << src_proc << std::endl;
std::cout << my_rank << " first_cell_level_proc(src_proc) " << first_cell_level_proc(src_proc) << std::endl;
std::cout.flush();// TODO
NeighborDescriptor descriptor;
descriptor.destProc = src_proc;
descriptor.nbLevelsToExchange = nb_levels_to_recv;
descriptor.nbParticlesToExchange = -1;
descriptor.isRecv = true;
neigDescriptors.emplace_back(std::move(descriptor));
std::cout << my_rank << "] RECV" << std::endl;
std::cout << "descriptor.destProc " << descriptor.destProc << std::endl;
std::cout << "descriptor.nbLevelsToExchange " << descriptor.nbLevelsToExchange << std::endl;
std::cout << "descriptor.nbParticlesToExchange " << descriptor.nbParticlesToExchange << std::endl;
std::cout << "descriptor.nbParticlesToExchange " << descriptor.nbParticlesToExchange << std::endl;
std::cout << "descriptor.isRecv " << descriptor.isRecv << std::endl;
std::cout << "neigDescriptors.size() " << neigDescriptors.size() << std::endl;
std::cout.flush();// TODO
src_proc = (src_proc-1+nb_processes_involved)%nb_processes_involved;
}
std::cout << my_rank << " NO src_proc " << src_proc << std::endl;
std::cout << my_rank << " NO first_cell_level_proc(src_proc) " << first_cell_level_proc(src_proc) << std::endl;
std::cout.flush();// TODO
}
//////////////////////////////////////////////////////////////////////
/// Exchange the number of particles in each partition
/// Could involve only here but I do not think it will be a problem
//////////////////////////////////////////////////////////////////////
assert(whatNext.size() == 0);
assert(mpiRequests.size() == 0);
for(int idxDescr = 0 ; idxDescr < int(neigDescriptors.size()) ; ++idxDescr){
NeighborDescriptor& descriptor = neigDescriptors[idxDescr];
if(descriptor.isRecv == false){
whatNext.emplace_back(std::pair<Action,int>{NOTHING_TODO, -1});
mpiRequests.emplace_back();
AssertMpi(MPI_Isend(const_cast<partsize_t*>(&descriptor.nbParticlesToExchange),
1, particles_utils::GetMpiType(partsize_t()),
descriptor.destProc, TAG_NB_PARTICLES,
current_com, &mpiRequests.back()));
if(descriptor.nbParticlesToExchange){
std::cout << my_rank << "] SEND_PARTICLES" << std::endl;
std::cout << "descriptor.nbParticlesToExchange " << descriptor.nbParticlesToExchange << std::endl;
std::cout << "descriptor.destProc " << descriptor.destProc << std::endl;
std::cout << "idxDescr " << idxDescr << std::endl;
whatNext.emplace_back(std::pair<Action,int>{NOTHING_TODO, -1});
mpiRequests.emplace_back();
assert(descriptor.nbParticlesToExchange*size_particle_positions < std::numeric_limits<int>::max());
AssertMpi(MPI_Isend(const_cast<real_number*>(&particles_positions[particles_offset_layers[my_nb_cell_levels-descriptor.nbLevelsToExchange]]),
int(descriptor.nbParticlesToExchange*size_particle_positions), particles_utils::GetMpiType(real_number()),
descriptor.destProc, TAG_POSITION_PARTICLES,
current_com, &mpiRequests.back()));
assert(descriptor.toRecvAndMerge == nullptr);
descriptor.toRecvAndMerge.reset(new real_number[descriptor.nbParticlesToExchange*size_particle_rhs]);
whatNext.emplace_back(std::pair<Action,int>{MERGE_PARTICLES, int(neigDescriptors.size())});
mpiRequests.emplace_back();
assert(descriptor.nbParticlesToExchange*size_particle_rhs < std::numeric_limits<int>::max());
AssertMpi(MPI_Irecv(descriptor.toRecvAndMerge.get(), int(descriptor.nbParticlesToExchange*size_particle_rhs),
particles_utils::GetMpiType(real_number()), descriptor.destProc, TAG_RESULT_PARTICLES,
current_com, &mpiRequests.back()));
}
}
else{
std::cout << "RECV_PARTICLES " << RECV_PARTICLES << std::endl;
std::cout << "idxDescr " << idxDescr << std::endl;
whatNext.emplace_back(std::pair<Action,int>{RECV_PARTICLES, idxDescr});
mpiRequests.emplace_back();
AssertMpi(MPI_Irecv(&descriptor.nbParticlesToExchange,
1, particles_utils::GetMpiType(partsize_t()), descriptor.destProc, TAG_NB_PARTICLES,
current_com, &mpiRequests.back()));
}
}
const bool more_than_one_thread = (omp_get_max_threads() > 1);
TIMEZONE_OMP_INIT_PREPARALLEL(omp_get_max_threads())
#pragma omp parallel default(shared)
{
#pragma omp master
{
while(mpiRequests.size()){
assert(mpiRequests.size() == whatNext.size());
int idxDone = int(mpiRequests.size());
{
TIMEZONE("wait");
AssertMpi(MPI_Waitany(int(mpiRequests.size()), mpiRequests.data(), &idxDone, MPI_STATUSES_IGNORE));
}
const std::pair<Action, int> releasedAction = whatNext[idxDone];
std::swap(mpiRequests[idxDone], mpiRequests[mpiRequests.size()-1]);
std::swap(whatNext[idxDone], whatNext[mpiRequests.size()-1]);
mpiRequests.pop_back();
whatNext.pop_back();
//////////////////////////////////////////////////////////////////////
/// Data to exchange particles
//////////////////////////////////////////////////////////////////////
if(releasedAction.first == RECV_PARTICLES){
NeighborDescriptor& descriptor = neigDescriptors[releasedAction.second];
assert(descriptor.isRecv == true);
const int destProc = descriptor.destProc;
const partsize_t NbParticlesToReceive = descriptor.nbParticlesToExchange;
assert(NbParticlesToReceive != -1);
assert(descriptor.toCompute == nullptr);
std::cout << my_rank << "] RECV_PARTICLES" << std::endl;
std::cout << "descriptor.nbParticlesToExchange " << descriptor.nbParticlesToExchange << std::endl;
std::cout << "descriptor.destProc " << descriptor.destProc << std::endl;
std::cout << "releasedAction.second " << releasedAction.second << std::endl;
if(NbParticlesToReceive){
std::cout << "MPI_Irecv " << std::endl;
descriptor.toCompute.reset(new real_number[NbParticlesToReceive*size_particle_positions]);
whatNext.emplace_back(std::pair<Action,int>{COMPUTE_PARTICLES, releasedAction.second});
mpiRequests.emplace_back();
assert(NbParticlesToReceive*size_particle_positions < std::numeric_limits<int>::max());
AssertMpi(MPI_Irecv(descriptor.toCompute.get(), int(NbParticlesToReceive*size_particle_positions),
particles_utils::GetMpiType(real_number()), destProc, TAG_POSITION_PARTICLES,
current_com, &mpiRequests.back()));
}
}
//////////////////////////////////////////////////////////////////////
/// Computation
//////////////////////////////////////////////////////////////////////
if(releasedAction.first == COMPUTE_PARTICLES){
NeighborDescriptor& descriptor = neigDescriptors[releasedAction.second];
const partsize_t NbParticlesToReceive = descriptor.nbParticlesToExchange;
assert(descriptor.toCompute != nullptr);
descriptor.results.reset(new real_number[NbParticlesToReceive*size_particle_rhs]);
// TODO in_computer.template init_result_array<size_particle_rhs>(descriptor.results.get(), NbParticlesToReceive);
// Compute
partsize_t idxPart = 0;
while(idxPart != NbParticlesToReceive){
const long int current_cell_idx = get_cell_idx(descriptor.toCompute[idxPart*size_particle_positions + IDX_X],
descriptor.toCompute[idxPart*size_particle_positions + IDX_Y],
descriptor.toCompute[idxPart*size_particle_positions + IDX_Z]);
partsize_t nb_parts_in_cell = 0;
while(idxPart+nb_parts_in_cell != NbParticlesToReceive
&& current_cell_idx == get_cell_idx(descriptor.toCompute[(idxPart+nb_parts_in_cell)*size_particle_positions + IDX_X],
descriptor.toCompute[(idxPart+nb_parts_in_cell)*size_particle_positions + IDX_Y],
descriptor.toCompute[(idxPart+nb_parts_in_cell)*size_particle_positions + IDX_Z])){
nb_parts_in_cell += 1;
}
const std::vector<std::pair<partsize_t,partsize_t>>* neighbors[27];
const int nbNeighbors = my_tree.getNeighbors(current_cell_idx, neighbors);
// with other interval
for(size_t idx_neighbor = 0 ; idx_neighbor < nbNeighbors ; ++idx_neighbor){
for(size_t idx_2 = 0 ; idx_2 < (*neighbors[idx_neighbor]).size() ; ++idx_2){
for(partsize_t idx_p1 = 0 ; idx_p1 < nb_parts_in_cell ; ++idx_p1){
for(partsize_t idx_p2 = 0 ; idx_p2 < (*neighbors[idx_neighbor])[idx_2].second ; ++idx_p2){
const real_number dist_r2 = compute_distance_r2(descriptor.toCompute[(idxPart+idx_p1)*size_particle_positions + IDX_X],
descriptor.toCompute[(idxPart+idx_p1)*size_particle_positions + IDX_Y],
descriptor.toCompute[(idxPart+idx_p1)*size_particle_positions + IDX_Z],
particles_positions[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_positions + IDX_X],
particles_positions[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_positions + IDX_Y],
particles_positions[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_positions + IDX_Z]);
if(dist_r2 < cutoff_radius*cutoff_radius){
// TODO in_computer.template compute_interaction<size_particle_positions,size_particle_rhs>(
// &descriptor.toCompute[(idxPart+idx_p1)*size_particle_positions],
// &descriptor.results[(idxPart+idx_p1)*size_particle_rhs],
// &particles_positions[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_positions],
// &particles_current_rhs[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_rhs],
// dist_r2);
}
}
}
}
}
idxPart += nb_parts_in_cell;
}
// Send back
const int destProc = descriptor.destProc;
whatNext.emplace_back(std::pair<Action,int>{RELEASE_BUFFER_PARTICLES, releasedAction.second});
mpiRequests.emplace_back();
assert(NbParticlesToReceive*size_particle_rhs < std::numeric_limits<int>::max());
AssertMpi(MPI_Isend(descriptor.results.get(), int(NbParticlesToReceive*size_particle_rhs),
particles_utils::GetMpiType(real_number()), destProc, TAG_RESULT_PARTICLES,
current_com, &mpiRequests.back()));
}
//////////////////////////////////////////////////////////////////////
/// Computation
//////////////////////////////////////////////////////////////////////
if(releasedAction.first == RELEASE_BUFFER_PARTICLES){
NeighborDescriptor& descriptor = neigDescriptors[releasedAction.second];
assert(descriptor.toCompute != nullptr);
descriptor.toCompute.release();
}
//////////////////////////////////////////////////////////////////////
/// Merge
//////////////////////////////////////////////////////////////////////
if(releasedAction.first == MERGE_PARTICLES && more_than_one_thread == false){
NeighborDescriptor& descriptor = neigDescriptors[releasedAction.second];
assert(descriptor.isRecv);
assert(descriptor.toRecvAndMerge != nullptr);
// TODO in_computer.template reduce_particles_rhs<size_particle_rhs>(&particles_current_rhs[0], descriptor.toRecvAndMerge.get(), descriptor.nbParticlesToExchange);
descriptor.toRecvAndMerge.release();
}
}
}
}
assert(whatNext.size() == 0);
assert(mpiRequests.size() == 0);
// Compute self data
for(const auto& iter_cell : my_tree){
const std::vector<std::pair<partsize_t,partsize_t>>& intervals = iter_cell.second;
for(size_t idx_1 = 0 ; idx_1 < intervals.size() ; ++idx_1){
// self interval
for(partsize_t idx_p1 = 0 ; idx_p1 < intervals[idx_1].second ; ++idx_p1){
for(partsize_t idx_p2 = idx_p1+1 ; idx_p2 < intervals[idx_1].second ; ++idx_p2){
const real_number dist_r2 = compute_distance_r2(particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDX_X],
particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDX_Y],
particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDX_Z],
particles_positions[(intervals[idx_1].first+idx_p2)*size_particle_positions + IDX_X],
particles_positions[(intervals[idx_1].first+idx_p2)*size_particle_positions + IDX_Y],
particles_positions[(intervals[idx_1].first+idx_p2)*size_particle_positions + IDX_Z]);
if(dist_r2 < cutoff_radius*cutoff_radius){
in_computer.template compute_interaction<size_particle_positions,size_particle_rhs>(
&particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions],
&particles_current_rhs[(intervals[idx_1].first+idx_p1)*size_particle_rhs],
&particles_positions[(intervals[idx_1].first+idx_p2)*size_particle_positions],
&particles_current_rhs[(intervals[idx_1].first+idx_p2)*size_particle_rhs],
dist_r2);
}
}
}
// with other interval
for(size_t idx_2 = idx_1+1 ; idx_2 < intervals.size() ; ++idx_2){
for(partsize_t idx_p1 = 0 ; idx_p1 < intervals[idx_1].second ; ++idx_p1){
for(partsize_t idx_p2 = 0 ; idx_p2 < intervals[idx_2].second ; ++idx_p2){
const real_number dist_r2 = compute_distance_r2(particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDX_X],
particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDX_Y],
particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDX_Z],
particles_positions[(intervals[idx_2].first+idx_p2)*size_particle_positions + IDX_X],
particles_positions[(intervals[idx_2].first+idx_p2)*size_particle_positions + IDX_Y],
particles_positions[(intervals[idx_2].first+idx_p2)*size_particle_positions + IDX_Z]);
if(dist_r2 < cutoff_radius*cutoff_radius){
in_computer.template compute_interaction<size_particle_positions,size_particle_rhs>(
&particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions],
&particles_current_rhs[(intervals[idx_1].first+idx_p1)*size_particle_rhs],
&particles_positions[(intervals[idx_2].first+idx_p2)*size_particle_positions],
&particles_current_rhs[(intervals[idx_2].first+idx_p2)*size_particle_rhs],
dist_r2);
}
}
}
}
}
const long int currenct_cell_idx = iter_cell.first;
const std::vector<std::pair<partsize_t,partsize_t>>* neighbors[27];
const int nbNeighbors = my_tree.getNeighbors(currenct_cell_idx, neighbors);
for(size_t idx_1 = 0 ; idx_1 < intervals.size() ; ++idx_1){
// with other interval
for(size_t idx_neighbor = 0 ; idx_neighbor < nbNeighbors ; ++idx_neighbor){
for(size_t idx_2 = 0 ; idx_2 < (*neighbors[idx_neighbor]).size() ; ++idx_2){
for(partsize_t idx_p1 = 0 ; idx_p1 < intervals[idx_1].second ; ++idx_p1){
for(partsize_t idx_p2 = 0 ; idx_p2 < (*neighbors[idx_neighbor])[idx_2].second ; ++idx_p2){
const real_number dist_r2 = compute_distance_r2(particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDX_X],
particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDX_Y],
particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDX_Z],
particles_positions[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_positions + IDX_X],
particles_positions[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_positions + IDX_Y],
particles_positions[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_positions + IDX_Z]);
if(dist_r2 < cutoff_radius*cutoff_radius){
in_computer.template compute_interaction<size_particle_positions,size_particle_rhs>(
&particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions],
&particles_current_rhs[(intervals[idx_1].first+idx_p1)*size_particle_rhs],
&particles_positions[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_positions],
&particles_current_rhs[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_rhs],
dist_r2);
}
}
}
}
}
}
}
}
};
#endif
#ifndef P2P_TREE_HPP
#define P2P_TREE_HPP
#include <unordered_map>
#include <vector>
template <class CellClass>
class p2p_tree{
std::unordered_map<long int, CellClass> data;
CellClass emptyCell;
std::array<long int,3> nb_cell_levels;
long int get_cell_coord_x_from_index(const long int index) const{
return index % nb_cell_levels[IDX_X];
}
long int get_cell_coord_y_from_index(const long int index) const{
return (index - get_cell_coord_z_from_index(index)*(nb_cell_levels[IDX_X]*nb_cell_levels[IDX_Y]))
/ nb_cell_levels[IDX_X];
}
long int get_cell_coord_z_from_index(const long int index) const{
return index / (nb_cell_levels[IDX_X]*nb_cell_levels[IDX_Y]);
}
long int get_cell_idx(const long int idx_x, const long int idx_y,
const long int idx_z) const {
return (((idx_z*nb_cell_levels[IDX_Y])+idx_y)*nb_cell_levels[IDX_X])+idx_x;
}
public:
explicit p2p_tree(std::array<long int,3> in_nb_cell_levels)
: nb_cell_levels(in_nb_cell_levels){
}
CellClass& getCell(const long int idx){
return data[idx];
}
const CellClass& getCell(const long int idx) const {
const auto& iter = data.find(idx);
if(iter != data.end()){
return iter->second;
}
return emptyCell;
}
int getNeighbors(const long int idx, const CellClass* output[27]) const{
int nbNeighbors = 0;
const long int idx_x = get_cell_coord_x_from_index(idx);
const long int idx_y = get_cell_coord_y_from_index(idx);
const long int idx_z = get_cell_coord_z_from_index(idx);
for(long int neigh_x = - 1 ; neigh_x <= 1 ; ++neigh_x){
long int neigh_x_pbc = neigh_x+idx_x;
if(neigh_x_pbc < 0){
neigh_x_pbc += nb_cell_levels[IDX_X];
}
else if(nb_cell_levels[IDX_X] <= neigh_x_pbc){
neigh_x_pbc -= nb_cell_levels[IDX_X];
}
for(long int neigh_y = - 1 ; neigh_y <= 1 ; ++neigh_y){
long int neigh_y_pbc = neigh_y+idx_y;
if(neigh_y_pbc < 0){
neigh_y_pbc += nb_cell_levels[IDX_Y];
}
else if(nb_cell_levels[IDX_Y] <= neigh_y_pbc){
neigh_y_pbc -= nb_cell_levels[IDX_Y];
}
for(long int neigh_z = - 1 ; neigh_z <= 1 ; ++neigh_z){
long int neigh_z_pbc = neigh_z+idx_z;
if(neigh_z_pbc < 0){
neigh_z_pbc += nb_cell_levels[IDX_Z];
}
else if(nb_cell_levels[IDX_Z] <= neigh_z_pbc){
neigh_z_pbc -= nb_cell_levels[IDX_Z];
}
// Not the current cell
if(neigh_x_pbc != idx_x || neigh_y_pbc != idx_y || neigh_z_pbc != idx_z){
const long int idx_neigh = get_cell_idx(neigh_x_pbc,
neigh_y_pbc,
neigh_z_pbc);
const auto& iter = data.find(idx_neigh);
if(iter != data.end()){
output[nbNeighbors] = &(iter->second);
}
}
}
}
}
return nbNeighbors;
}
typename std::unordered_map<long int, CellClass>::iterator begin(){
return data.begin();
}
typename std::unordered_map<long int, CellClass>::iterator end(){
return data.end();
}
};
#endif
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment