Commit ce1a1fdd authored by Volker Springel's avatar Volker Springel
Browse files

fix hang on restart when multiple nodes, gas, and MaxFilesWithConcurrentIO != 0 is used

parent 072b067e
...@@ -298,6 +298,9 @@ void restart::work_files(int modus) ...@@ -298,6 +298,9 @@ void restart::work_files(int modus)
MPI_Gather(&seq_loc, sizeof(seq_data), MPI_BYTE, seq, sizeof(seq_data), MPI_BYTE, 0, Communicator); MPI_Gather(&seq_loc, sizeof(seq_data), MPI_BYTE, seq, sizeof(seq_data), MPI_BYTE, 0, Communicator);
if(modus == MODUS_READ)
MPI_Comm_split(Communicator, Shmem.Island_Smallest_WorldTask, 0, &Sim->NgbTree.TreeSharedMemComm);
if(ThisTask == 0) if(ThisTask == 0)
{ {
std::sort(seq, seq + NTask); std::sort(seq, seq + NTask);
...@@ -357,6 +360,9 @@ void restart::work_files(int modus) ...@@ -357,6 +360,9 @@ void restart::work_files(int modus)
/* send back completion notice */ /* send back completion notice */
MPI_Ssend(&ThisTask, 1, MPI_INT, 0, TAG_KEY, Communicator); MPI_Ssend(&ThisTask, 1, MPI_INT, 0, TAG_KEY, Communicator);
} }
if(modus == MODUS_READ)
Sim->NgbTree.treeallocate_share_topnode_addresses();
} }
void restart::contents_restart_file(int modus) void restart::contents_restart_file(int modus)
...@@ -523,12 +529,16 @@ void restart::contents_restart_file(int modus) ...@@ -523,12 +529,16 @@ void restart::contents_restart_file(int modus)
{ {
byten(Sim->NgbTree.Nodes + Sim->NgbTree.MaxPart + Sim->Domain.NTopnodes, byten(Sim->NgbTree.Nodes + Sim->NgbTree.MaxPart + Sim->Domain.NTopnodes,
(Sim->NgbTree.NumNodes - Sim->Domain.NTopnodes) * sizeof(ngbnode), modus); (Sim->NgbTree.NumNodes - Sim->Domain.NTopnodes) * sizeof(ngbnode), modus);
byten(Sim->NgbTree.TopNodes + Sim->NgbTree.MaxPart, Sim->Domain.NTopnodes * sizeof(ngbnode), modus);
byten(Sim->NgbTree.NodeIndex, Sim->Domain.NTopleaves * sizeof(int), modus);
byten(Sim->NgbTree.NodeSibling, Sim->Domain.NTopleaves * sizeof(int), modus);
byten(Sim->NgbTree.NodeLevel, Sim->Domain.NTopleaves * sizeof(unsigned char), modus);
byten(Sim->NgbTree.Nextnode, (Sim->NgbTree.MaxPart + Sim->Domain.NTopleaves) * sizeof(int), modus); byten(Sim->NgbTree.Nextnode, (Sim->NgbTree.MaxPart + Sim->Domain.NTopleaves) * sizeof(int), modus);
byten(Sim->NgbTree.Father, Sim->NgbTree.MaxPart * sizeof(int), modus); byten(Sim->NgbTree.Father, Sim->NgbTree.MaxPart * sizeof(int), modus);
if(Sim->NgbTree.TreeSharedMem_ThisTask == 0)
{
byten(Sim->NgbTree.TopNodes + Sim->NgbTree.MaxPart, Sim->Domain.NTopnodes * sizeof(ngbnode), modus);
byten(Sim->NgbTree.NodeIndex, Sim->Domain.NTopleaves * sizeof(int), modus);
byten(Sim->NgbTree.NodeSibling, Sim->Domain.NTopleaves * sizeof(int), modus);
byten(Sim->NgbTree.NodeLevel, Sim->Domain.NTopleaves * sizeof(unsigned char), modus);
}
} }
byten(Sim->Domain.TopNodes, Sim->Domain.NTopnodes * Sim->Domain.domain_sizeof_topnode_data(), modus); byten(Sim->Domain.TopNodes, Sim->Domain.NTopnodes * Sim->Domain.domain_sizeof_topnode_data(), modus);
......
...@@ -778,10 +778,12 @@ void tree<node, partset, point_data, foreign_point_data>::treeallocate(int max_p ...@@ -778,10 +778,12 @@ void tree<node, partset, point_data, foreign_point_data>::treeallocate(int max_p
D = Dptr; D = Dptr;
Tp = Tp_ptr; Tp = Tp_ptr;
/* split up the communicator into pieces overlap with different shared memory regions */
if(max_partindex != -1) if(max_partindex != -1)
{ MPI_Comm_split(D->Communicator, Shmem.Island_Smallest_WorldTask, 0, &TreeSharedMemComm);
MPI_Allreduce(&max_partindex, &MaxPart, 1, MPI_INT, MPI_MAX, D->Communicator);
} if(max_partindex != -1)
MPI_Allreduce(&max_partindex, &MaxPart, 1, MPI_INT, MPI_MAX, D->Communicator);
if(MaxPart == 0) if(MaxPart == 0)
return; // nothing to be done return; // nothing to be done
...@@ -816,9 +818,6 @@ void tree<node, partset, point_data, foreign_point_data>::treeallocate(int max_p ...@@ -816,9 +818,6 @@ void tree<node, partset, point_data, foreign_point_data>::treeallocate(int max_p
max_partindex = MaxPart; max_partindex = MaxPart;
} }
/* now split up the communicator into pieces overlap with different shared memory regions */
MPI_Comm_split(D->Communicator, Shmem.Island_Smallest_WorldTask, 0, &TreeSharedMemComm);
MPI_Comm_rank(TreeSharedMemComm, &TreeSharedMem_ThisTask); MPI_Comm_rank(TreeSharedMemComm, &TreeSharedMem_ThisTask);
MPI_Comm_size(TreeSharedMemComm, &TreeSharedMem_NTask); MPI_Comm_size(TreeSharedMemComm, &TreeSharedMem_NTask);
...@@ -880,6 +879,16 @@ void tree<node, partset, point_data, foreign_point_data>::treeallocate(int max_p ...@@ -880,6 +879,16 @@ void tree<node, partset, point_data, foreign_point_data>::treeallocate(int max_p
} }
} }
Nodes = (node *)Mem.mymalloc_movable(&Nodes, "Nodes", (MaxNodes - D->NTopnodes + 1) * sizeof(node));
Nodes -= (MaxPart + D->NTopnodes);
if(max_partindex != -1)
treeallocate_share_topnode_addresses();
}
template <typename node, typename partset, typename point_data, typename foreign_point_data>
void tree<node, partset, point_data, foreign_point_data>::treeallocate_share_topnode_addresses(void)
{
MPI_Bcast(&TreeInfoHandle, 1, MPI_INT, 0, TreeSharedMemComm); MPI_Bcast(&TreeInfoHandle, 1, MPI_INT, 0, TreeSharedMemComm);
ptrdiff_t off[4] = {((char *)NodeLevel - Mem.Base), ((char *)NodeSibling - Mem.Base), ((char *)NodeIndex - Mem.Base), ptrdiff_t off[4] = {((char *)NodeLevel - Mem.Base), ((char *)NodeSibling - Mem.Base), ((char *)NodeIndex - Mem.Base),
...@@ -894,9 +903,6 @@ void tree<node, partset, point_data, foreign_point_data>::treeallocate(int max_p ...@@ -894,9 +903,6 @@ void tree<node, partset, point_data, foreign_point_data>::treeallocate(int max_p
NodeSibling = (int *)((char *)Shmem.SharedMemBaseAddr[shmrank] + off[1]); NodeSibling = (int *)((char *)Shmem.SharedMemBaseAddr[shmrank] + off[1]);
NodeIndex = (int *)((char *)Shmem.SharedMemBaseAddr[shmrank] + off[2]); NodeIndex = (int *)((char *)Shmem.SharedMemBaseAddr[shmrank] + off[2]);
TopNodes = (node *)((char *)Shmem.SharedMemBaseAddr[shmrank] + off[3]); TopNodes = (node *)((char *)Shmem.SharedMemBaseAddr[shmrank] + off[3]);
Nodes = (node *)Mem.mymalloc_movable(&Nodes, "Nodes", (MaxNodes - D->NTopnodes + 1) * sizeof(node));
Nodes -= (MaxPart + D->NTopnodes);
} }
template <typename node, typename partset, typename point_data, typename foreign_point_data> template <typename node, typename partset, typename point_data, typename foreign_point_data>
...@@ -1221,13 +1227,13 @@ void tree<node, partset, point_data, foreign_point_data>::tree_fetch_foreign_nod ...@@ -1221,13 +1227,13 @@ void tree<node, partset, point_data, foreign_point_data>::tree_fetch_foreign_nod
template <typename node, typename partset, typename point_data, typename foreign_point_data> template <typename node, typename partset, typename point_data, typename foreign_point_data>
void tree<node, partset, point_data, foreign_point_data>::treefree(void) void tree<node, partset, point_data, foreign_point_data>::treefree(void)
{ {
MPI_Comm_free(&TreeSharedMemComm);
if(MaxPart == 0) if(MaxPart == 0)
return; // nothing to be done return; // nothing to be done
if(Nodes) if(Nodes)
{ {
MPI_Comm_free(&TreeSharedMemComm);
if(Father) if(Father)
{ {
Mem.myfree_movable(Father); Mem.myfree_movable(Father);
......
...@@ -321,6 +321,7 @@ class tree ...@@ -321,6 +321,7 @@ class tree
int treebuild(int ninsert, int *indexlist); int treebuild(int ninsert, int *indexlist);
void treefree(void); void treefree(void);
void treeallocate(int max_partindex, partset *Pptr, domain<partset> *Dptr); void treeallocate(int max_partindex, partset *Pptr, domain<partset> *Dptr);
void treeallocate_share_topnode_addresses(void);
void tree_export_node_threads(int no, int i, thread_data *thread, offset_tuple off = 0); void tree_export_node_threads(int no, int i, thread_data *thread, offset_tuple off = 0);
void tree_export_node_threads_by_task_and_node(int task, int nodeindex, int i, thread_data *thread, offset_tuple off = 0); void tree_export_node_threads_by_task_and_node(int task, int nodeindex, int i, thread_data *thread, offset_tuple off = 0);
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment