Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Sten Delos
Gadget4
Commits
ce1a1fdd
Commit
ce1a1fdd
authored
Oct 21, 2020
by
Volker Springel
Browse files
fix hang on restart when multiple nodes, gas, and MaxFilesWithConcurrentIO != 0 is used
parent
072b067e
Changes
3
Hide whitespace changes
Inline
Side-by-side
src/io/restart.cc
View file @
ce1a1fdd
...
@@ -298,6 +298,9 @@ void restart::work_files(int modus)
...
@@ -298,6 +298,9 @@ void restart::work_files(int modus)
MPI_Gather
(
&
seq_loc
,
sizeof
(
seq_data
),
MPI_BYTE
,
seq
,
sizeof
(
seq_data
),
MPI_BYTE
,
0
,
Communicator
);
MPI_Gather
(
&
seq_loc
,
sizeof
(
seq_data
),
MPI_BYTE
,
seq
,
sizeof
(
seq_data
),
MPI_BYTE
,
0
,
Communicator
);
if
(
modus
==
MODUS_READ
)
MPI_Comm_split
(
Communicator
,
Shmem
.
Island_Smallest_WorldTask
,
0
,
&
Sim
->
NgbTree
.
TreeSharedMemComm
);
if
(
ThisTask
==
0
)
if
(
ThisTask
==
0
)
{
{
std
::
sort
(
seq
,
seq
+
NTask
);
std
::
sort
(
seq
,
seq
+
NTask
);
...
@@ -357,6 +360,9 @@ void restart::work_files(int modus)
...
@@ -357,6 +360,9 @@ void restart::work_files(int modus)
/* send back completion notice */
/* send back completion notice */
MPI_Ssend
(
&
ThisTask
,
1
,
MPI_INT
,
0
,
TAG_KEY
,
Communicator
);
MPI_Ssend
(
&
ThisTask
,
1
,
MPI_INT
,
0
,
TAG_KEY
,
Communicator
);
}
}
if
(
modus
==
MODUS_READ
)
Sim
->
NgbTree
.
treeallocate_share_topnode_addresses
();
}
}
void
restart
::
contents_restart_file
(
int
modus
)
void
restart
::
contents_restart_file
(
int
modus
)
...
@@ -523,12 +529,16 @@ void restart::contents_restart_file(int modus)
...
@@ -523,12 +529,16 @@ void restart::contents_restart_file(int modus)
{
{
byten
(
Sim
->
NgbTree
.
Nodes
+
Sim
->
NgbTree
.
MaxPart
+
Sim
->
Domain
.
NTopnodes
,
byten
(
Sim
->
NgbTree
.
Nodes
+
Sim
->
NgbTree
.
MaxPart
+
Sim
->
Domain
.
NTopnodes
,
(
Sim
->
NgbTree
.
NumNodes
-
Sim
->
Domain
.
NTopnodes
)
*
sizeof
(
ngbnode
),
modus
);
(
Sim
->
NgbTree
.
NumNodes
-
Sim
->
Domain
.
NTopnodes
)
*
sizeof
(
ngbnode
),
modus
);
byten
(
Sim
->
NgbTree
.
TopNodes
+
Sim
->
NgbTree
.
MaxPart
,
Sim
->
Domain
.
NTopnodes
*
sizeof
(
ngbnode
),
modus
);
byten
(
Sim
->
NgbTree
.
NodeIndex
,
Sim
->
Domain
.
NTopleaves
*
sizeof
(
int
),
modus
);
byten
(
Sim
->
NgbTree
.
NodeSibling
,
Sim
->
Domain
.
NTopleaves
*
sizeof
(
int
),
modus
);
byten
(
Sim
->
NgbTree
.
NodeLevel
,
Sim
->
Domain
.
NTopleaves
*
sizeof
(
unsigned
char
),
modus
);
byten
(
Sim
->
NgbTree
.
Nextnode
,
(
Sim
->
NgbTree
.
MaxPart
+
Sim
->
Domain
.
NTopleaves
)
*
sizeof
(
int
),
modus
);
byten
(
Sim
->
NgbTree
.
Nextnode
,
(
Sim
->
NgbTree
.
MaxPart
+
Sim
->
Domain
.
NTopleaves
)
*
sizeof
(
int
),
modus
);
byten
(
Sim
->
NgbTree
.
Father
,
Sim
->
NgbTree
.
MaxPart
*
sizeof
(
int
),
modus
);
byten
(
Sim
->
NgbTree
.
Father
,
Sim
->
NgbTree
.
MaxPart
*
sizeof
(
int
),
modus
);
if
(
Sim
->
NgbTree
.
TreeSharedMem_ThisTask
==
0
)
{
byten
(
Sim
->
NgbTree
.
TopNodes
+
Sim
->
NgbTree
.
MaxPart
,
Sim
->
Domain
.
NTopnodes
*
sizeof
(
ngbnode
),
modus
);
byten
(
Sim
->
NgbTree
.
NodeIndex
,
Sim
->
Domain
.
NTopleaves
*
sizeof
(
int
),
modus
);
byten
(
Sim
->
NgbTree
.
NodeSibling
,
Sim
->
Domain
.
NTopleaves
*
sizeof
(
int
),
modus
);
byten
(
Sim
->
NgbTree
.
NodeLevel
,
Sim
->
Domain
.
NTopleaves
*
sizeof
(
unsigned
char
),
modus
);
}
}
}
byten
(
Sim
->
Domain
.
TopNodes
,
Sim
->
Domain
.
NTopnodes
*
Sim
->
Domain
.
domain_sizeof_topnode_data
(),
modus
);
byten
(
Sim
->
Domain
.
TopNodes
,
Sim
->
Domain
.
NTopnodes
*
Sim
->
Domain
.
domain_sizeof_topnode_data
(),
modus
);
...
...
src/tree/tree.cc
View file @
ce1a1fdd
...
@@ -778,10 +778,12 @@ void tree<node, partset, point_data, foreign_point_data>::treeallocate(int max_p
...
@@ -778,10 +778,12 @@ void tree<node, partset, point_data, foreign_point_data>::treeallocate(int max_p
D
=
Dptr
;
D
=
Dptr
;
Tp
=
Tp_ptr
;
Tp
=
Tp_ptr
;
/* split up the communicator into pieces overlap with different shared memory regions */
if
(
max_partindex
!=
-
1
)
if
(
max_partindex
!=
-
1
)
{
MPI_Comm_split
(
D
->
Communicator
,
Shmem
.
Island_Smallest_WorldTask
,
0
,
&
TreeSharedMemComm
);
MPI_Allreduce
(
&
max_partindex
,
&
MaxPart
,
1
,
MPI_INT
,
MPI_MAX
,
D
->
Communicator
);
}
if
(
max_partindex
!=
-
1
)
MPI_Allreduce
(
&
max_partindex
,
&
MaxPart
,
1
,
MPI_INT
,
MPI_MAX
,
D
->
Communicator
);
if
(
MaxPart
==
0
)
if
(
MaxPart
==
0
)
return
;
// nothing to be done
return
;
// nothing to be done
...
@@ -816,9 +818,6 @@ void tree<node, partset, point_data, foreign_point_data>::treeallocate(int max_p
...
@@ -816,9 +818,6 @@ void tree<node, partset, point_data, foreign_point_data>::treeallocate(int max_p
max_partindex
=
MaxPart
;
max_partindex
=
MaxPart
;
}
}
/* now split up the communicator into pieces overlap with different shared memory regions */
MPI_Comm_split
(
D
->
Communicator
,
Shmem
.
Island_Smallest_WorldTask
,
0
,
&
TreeSharedMemComm
);
MPI_Comm_rank
(
TreeSharedMemComm
,
&
TreeSharedMem_ThisTask
);
MPI_Comm_rank
(
TreeSharedMemComm
,
&
TreeSharedMem_ThisTask
);
MPI_Comm_size
(
TreeSharedMemComm
,
&
TreeSharedMem_NTask
);
MPI_Comm_size
(
TreeSharedMemComm
,
&
TreeSharedMem_NTask
);
...
@@ -880,6 +879,16 @@ void tree<node, partset, point_data, foreign_point_data>::treeallocate(int max_p
...
@@ -880,6 +879,16 @@ void tree<node, partset, point_data, foreign_point_data>::treeallocate(int max_p
}
}
}
}
Nodes
=
(
node
*
)
Mem
.
mymalloc_movable
(
&
Nodes
,
"Nodes"
,
(
MaxNodes
-
D
->
NTopnodes
+
1
)
*
sizeof
(
node
));
Nodes
-=
(
MaxPart
+
D
->
NTopnodes
);
if
(
max_partindex
!=
-
1
)
treeallocate_share_topnode_addresses
();
}
template
<
typename
node
,
typename
partset
,
typename
point_data
,
typename
foreign_point_data
>
void
tree
<
node
,
partset
,
point_data
,
foreign_point_data
>::
treeallocate_share_topnode_addresses
(
void
)
{
MPI_Bcast
(
&
TreeInfoHandle
,
1
,
MPI_INT
,
0
,
TreeSharedMemComm
);
MPI_Bcast
(
&
TreeInfoHandle
,
1
,
MPI_INT
,
0
,
TreeSharedMemComm
);
ptrdiff_t
off
[
4
]
=
{((
char
*
)
NodeLevel
-
Mem
.
Base
),
((
char
*
)
NodeSibling
-
Mem
.
Base
),
((
char
*
)
NodeIndex
-
Mem
.
Base
),
ptrdiff_t
off
[
4
]
=
{((
char
*
)
NodeLevel
-
Mem
.
Base
),
((
char
*
)
NodeSibling
-
Mem
.
Base
),
((
char
*
)
NodeIndex
-
Mem
.
Base
),
...
@@ -894,9 +903,6 @@ void tree<node, partset, point_data, foreign_point_data>::treeallocate(int max_p
...
@@ -894,9 +903,6 @@ void tree<node, partset, point_data, foreign_point_data>::treeallocate(int max_p
NodeSibling
=
(
int
*
)((
char
*
)
Shmem
.
SharedMemBaseAddr
[
shmrank
]
+
off
[
1
]);
NodeSibling
=
(
int
*
)((
char
*
)
Shmem
.
SharedMemBaseAddr
[
shmrank
]
+
off
[
1
]);
NodeIndex
=
(
int
*
)((
char
*
)
Shmem
.
SharedMemBaseAddr
[
shmrank
]
+
off
[
2
]);
NodeIndex
=
(
int
*
)((
char
*
)
Shmem
.
SharedMemBaseAddr
[
shmrank
]
+
off
[
2
]);
TopNodes
=
(
node
*
)((
char
*
)
Shmem
.
SharedMemBaseAddr
[
shmrank
]
+
off
[
3
]);
TopNodes
=
(
node
*
)((
char
*
)
Shmem
.
SharedMemBaseAddr
[
shmrank
]
+
off
[
3
]);
Nodes
=
(
node
*
)
Mem
.
mymalloc_movable
(
&
Nodes
,
"Nodes"
,
(
MaxNodes
-
D
->
NTopnodes
+
1
)
*
sizeof
(
node
));
Nodes
-=
(
MaxPart
+
D
->
NTopnodes
);
}
}
template
<
typename
node
,
typename
partset
,
typename
point_data
,
typename
foreign_point_data
>
template
<
typename
node
,
typename
partset
,
typename
point_data
,
typename
foreign_point_data
>
...
@@ -1221,13 +1227,13 @@ void tree<node, partset, point_data, foreign_point_data>::tree_fetch_foreign_nod
...
@@ -1221,13 +1227,13 @@ void tree<node, partset, point_data, foreign_point_data>::tree_fetch_foreign_nod
template
<
typename
node
,
typename
partset
,
typename
point_data
,
typename
foreign_point_data
>
template
<
typename
node
,
typename
partset
,
typename
point_data
,
typename
foreign_point_data
>
void
tree
<
node
,
partset
,
point_data
,
foreign_point_data
>::
treefree
(
void
)
void
tree
<
node
,
partset
,
point_data
,
foreign_point_data
>::
treefree
(
void
)
{
{
MPI_Comm_free
(
&
TreeSharedMemComm
);
if
(
MaxPart
==
0
)
if
(
MaxPart
==
0
)
return
;
// nothing to be done
return
;
// nothing to be done
if
(
Nodes
)
if
(
Nodes
)
{
{
MPI_Comm_free
(
&
TreeSharedMemComm
);
if
(
Father
)
if
(
Father
)
{
{
Mem
.
myfree_movable
(
Father
);
Mem
.
myfree_movable
(
Father
);
...
...
src/tree/tree.h
View file @
ce1a1fdd
...
@@ -321,6 +321,7 @@ class tree
...
@@ -321,6 +321,7 @@ class tree
int
treebuild
(
int
ninsert
,
int
*
indexlist
);
int
treebuild
(
int
ninsert
,
int
*
indexlist
);
void
treefree
(
void
);
void
treefree
(
void
);
void
treeallocate
(
int
max_partindex
,
partset
*
Pptr
,
domain
<
partset
>
*
Dptr
);
void
treeallocate
(
int
max_partindex
,
partset
*
Pptr
,
domain
<
partset
>
*
Dptr
);
void
treeallocate_share_topnode_addresses
(
void
);
void
tree_export_node_threads
(
int
no
,
int
i
,
thread_data
*
thread
,
offset_tuple
off
=
0
);
void
tree_export_node_threads
(
int
no
,
int
i
,
thread_data
*
thread
,
offset_tuple
off
=
0
);
void
tree_export_node_threads_by_task_and_node
(
int
task
,
int
nodeindex
,
int
i
,
thread_data
*
thread
,
offset_tuple
off
=
0
);
void
tree_export_node_threads_by_task_and_node
(
int
task
,
int
nodeindex
,
int
i
,
thread_data
*
thread
,
offset_tuple
off
=
0
);
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment