Commit 5fd92133 authored by Philipp Schubert's avatar Philipp Schubert
Browse files

minor updates

parent 7a03caa4
......@@ -37,12 +37,17 @@ The Synaptic connectivity inference toolkit is currently developed at the Max-Pl
Acknowledgements
----------------
We are especially grateful for the support by Winfried Denk who enabled this work in his department. We also want to thank Christian Guggenberger
and his group at the MPCDF for cluster support and deepmind for providing egl extension code to handle multi-gpu rendering on the same machine.
The original code snippet (under the Apache License 2.0) used for our project can be found
We are especially grateful for the support by Winfried Denk who enabled
this work in his department. We also want to thank Christian
Guggenberger and his group at the MPCDF for cluster support and deepmind
for providing egl extension code to handle multi-gpu rendering on the
same machine. The original code snippet (under the Apache License 2.0)
used for our project can be found
[here](https://github.com/deepmind/dm_control/blob/30069ac11b60ee71acbd9159547d0bc334d63281/dm_control/_render/pyopengl/egl_ext.py).
Thanks to Julia Kuhl (see http://somedonkey.com/ for more beautiful work) for designing and creating the logo and to
Rangoli Saxena, Mariana Shumliakivska and Josef Mark for code contributions.
Thanks to Julia Kuhl (see http://somedonkey.com/ for more beautiful
work) for designing and creating the logo and to Rangoli Saxena, Mariana
Shumliakivska, Josef Mark, Maria Kawula, Atul Mohite, Alexandra Rother
and Martin Drawitsch and for code contributions.
Publications
......
......@@ -60,7 +60,7 @@ if __name__ == '__main__':
elif example_cube_id == 2:
chunk_size = (256, 256, 256)
else:
chunk_size = (512, 512, 512)
chunk_size = (512, 512, 256)
n_folders_fs = 1000
n_folders_fs_sc = 1000
curr_dir = os.path.dirname(os.path.realpath(__file__)) + '/'
......
......@@ -155,12 +155,12 @@ def init_cell_subcell_sds(chunk_size: Optional[Tuple[int, int, int]] = None,
log.info('Converting the predictions of the following cellular organelles to'
' KnossosDatasets: {}.'.format(global_params.config['existing_cell_organelles']))
start = time.time()
oew.generate_subcell_kd_from_proba(
global_params.config['existing_cell_organelles'],
chunk_size=chunk_size_kdinit, transf_func_kd_overlay=transf_func_kd_overlay,
load_cellorganelles_from_kd_overlaycubes=load_cellorganelles_from_kd_overlaycubes,
cube_of_interest_bb=cube_of_interest_bb, log=log, n_chunk_jobs=max_n_jobs,
n_cores=n_cores, overwrite=overwrite)
# oew.generate_subcell_kd_from_proba(
# global_params.config['existing_cell_organelles'],
# chunk_size=chunk_size_kdinit, transf_func_kd_overlay=transf_func_kd_overlay,
# load_cellorganelles_from_kd_overlaycubes=load_cellorganelles_from_kd_overlaycubes,
# cube_of_interest_bb=cube_of_interest_bb, log=log, n_chunk_jobs=max_n_jobs,
# n_cores=n_cores, overwrite=overwrite)
log.info('Finished KD generation after {:.0f}s.'.format(time.time() - start))
log.info('Generating SegmentationDatasets for subcellular structures {} and'
......
......@@ -329,8 +329,8 @@ def map_subcell_extract_props(kd_seg_path: str, kd_organelle_paths: dict,
# shutil.rmtree(dir_props)
# if os.path.isdir(dir_meshes):
# shutil.rmtree(dir_meshes)
os.makedirs(dir_props)
os.makedirs(dir_meshes)
os.makedirs(dir_props, exist_ok=True)
os.makedirs(dir_meshes, exist_ok=True)
all_times = []
step_names = []
......@@ -338,8 +338,8 @@ def map_subcell_extract_props(kd_seg_path: str, kd_organelle_paths: dict,
# extract mapping
start = time.time()
# random assignment to improve workload balance
np.random.seed(0)
np.random.shuffle(chunk_list)
# np.random.seed(0)
# np.random.shuffle(chunk_list)
multi_params = list(basics.chunkify_successive(
chunk_list, np.max([len(chunk_list) // n_chunk_jobs, 1])))
multi_params = [(chs, chunk_size, kd_seg_path, kd_organelle_paths,
......@@ -355,138 +355,138 @@ def map_subcell_extract_props(kd_seg_path: str, kd_organelle_paths: dict,
# needed for caching target storage folder for all objects
all_ids = {k: [] for k in list(kd_organelle_paths.keys()) + ['sv']}
# TODO: refactor write-out (and read in batchjob_enabled)
if qu.batchjob_enabled():
path_to_out = qu.batchjob_script(
multi_params, "map_subcell_extract_props", n_cores=n_cores,
n_max_co_processes=n_max_co_processes)
out_files = glob.glob(path_to_out + "/*")
for out_file in tqdm.tqdm(out_files, leave=False):
with open(out_file, 'rb') as f:
worker_nr, ref_mesh_dc = pkl.load(f)
for chunk_id, cell_ids in ref_mesh_dc['sv'].items():
cell_mesh_workers[chunk_id] = (worker_nr, cell_ids)
# memory consumption of list is about 0.25
cell_prop_worker[worker_nr] = list(set().union(*ref_mesh_dc['sv'].values()))
all_ids['sv'].extend(cell_prop_worker[worker_nr])
c_mesh_worker_dc = "{}/c_mesh_worker_dict.pkl".format(global_params.config.temp_path)
with open(c_mesh_worker_dc, 'wb') as f:
pkl.dump(cell_mesh_workers, f, protocol=4)
del cell_mesh_workers
c_prop_worker_dc = "{}/c_prop_worker_dict.pkl".format(global_params.config.temp_path)
with open(c_prop_worker_dc, 'wb') as f:
pkl.dump(cell_prop_worker, f, protocol=4)
all_ids['sv'] = np.unique(all_ids['sv']).astype(np.uint32)
del cell_prop_worker
# Collect organelle worker info
# memory consumption of list is about 0.25
for out_file in tqdm.tqdm(out_files, leave=False):
with open(out_file, 'rb') as f:
worker_nr, ref_mesh_dc = pkl.load(f)
# iterate over each subcellular structure
for ii, organelle in enumerate(kd_organelle_paths):
organelle = global_params.config['existing_cell_organelles'][ii]
for chunk_id, subcell_ids in ref_mesh_dc[organelle].items():
subcell_mesh_workers[ii][chunk_id] = (worker_nr, subcell_ids)
subcell_prop_workers[ii][worker_nr] = list(set().union(*ref_mesh_dc[
organelle].values()))
all_ids[organelle].extend(subcell_prop_workers[ii][worker_nr])
for ii, organelle in enumerate(kd_organelle_paths):
all_ids[organelle] = np.unique(all_ids[organelle]).astype(np.uint32)
sc_mesh_worker_dc = "{}/sc_{}_mesh_worker_dict.pkl".format(
global_params.config.temp_path, organelle)
with open(sc_mesh_worker_dc, 'wb') as f:
pkl.dump(subcell_mesh_workers[ii], f, protocol=4)
dict_paths_tmp += [sc_mesh_worker_dc]
sc_prop_worker_dc = "{}/sc_{}_prop_worker_dict.pkl".format(
global_params.config.temp_path, organelle)
with open(sc_prop_worker_dc, 'wb') as f:
pkl.dump(subcell_prop_workers[ii], f, protocol=4)
dict_paths_tmp += [sc_prop_worker_dc]
else:
results = sm.start_multiprocess_imap(
_map_subcell_extract_props_thread, multi_params, nb_cpus=n_max_co_processes,
verbose=False, debug=False)
for worker_nr, ref_mesh_dc in tqdm.tqdm(results, leave=False):
for chunk_id, cell_ids in ref_mesh_dc['sv'].items():
cell_mesh_workers[chunk_id] = (worker_nr, cell_ids)
# memory consumption of list is about 0.25
cell_prop_worker[worker_nr] = list(set().union(*ref_mesh_dc['sv'].values()))
all_ids['sv'].extend(cell_prop_worker[worker_nr])
# iterate over each subcellular structure
for ii, organelle in enumerate(kd_organelle_paths):
for chunk_id, subcell_ids in ref_mesh_dc[organelle].items():
subcell_mesh_workers[ii][chunk_id] = (worker_nr, subcell_ids)
subcell_prop_workers[ii][worker_nr] = list(set().union(*ref_mesh_dc[
organelle].values()))
all_ids[organelle].extend(subcell_prop_workers[ii][worker_nr])
del results
c_mesh_worker_dc = "{}/c_mesh_worker_dict.pkl".format(global_params.config.temp_path)
with open(c_mesh_worker_dc, 'wb') as f:
pkl.dump(cell_mesh_workers, f, protocol=4)
del cell_mesh_workers
c_prop_worker_dc = "{}/c_prop_worker_dict.pkl".format(global_params.config.temp_path)
with open(c_prop_worker_dc, 'wb') as f:
pkl.dump(cell_prop_worker, f, protocol=4)
del cell_prop_worker
all_ids['sv'] = np.unique(all_ids['sv']).astype(np.uint32)
for ii, organelle in enumerate(kd_organelle_paths):
all_ids[organelle] = np.unique(all_ids[organelle]).astype(np.uint32)
sc_mesh_worker_dc = "{}/sc_{}_mesh_worker_dict.pkl".format(
global_params.config.temp_path, organelle)
with open(sc_mesh_worker_dc, 'wb') as f:
pkl.dump(subcell_mesh_workers[ii], f, protocol=4)
dict_paths_tmp += [sc_mesh_worker_dc]
sc_prop_worker_dc = "{}/sc_{}_prop_worker_dict.pkl".format(
global_params.config.temp_path, organelle)
with open(sc_prop_worker_dc, 'wb') as f:
pkl.dump(subcell_prop_workers[ii], f, protocol=4)
dict_paths_tmp += [sc_prop_worker_dc]
del subcell_mesh_workers, subcell_prop_workers
params_cache = []
for k, v in all_ids.items():
dest_p = f'{global_params.config.temp_path}/storage_targets_{k}.pkl'
nf = n_folders_fs if k == 'sv' else n_folders_fs_sc
params_cache.append((dest_p, v, nf))
dict_paths_tmp.append(dest_p)
_ = sm.start_multiprocess_imap(_cache_storage_paths, params_cache,
nb_cpus=global_params.config['ncores_per_node'])
del all_ids, params_cache
dict_paths_tmp += [c_mesh_worker_dc, c_prop_worker_dc]
step_names.append("extract and map segmentation objects")
all_times.append(time.time() - start)
# reduce step
start = time.time()
# create folders for existing (sub-)cell supervoxels to prevent concurrent makedirs
ids = rep_helper.get_unique_subfold_ixs(n_folders_fs)
for k in tqdm.tqdm(ids, leave=False):
curr_dir = sv_sd.so_storage_path + target_dir_func(k, n_folders_fs)
os.makedirs(curr_dir, exist_ok=True)
for ii, organelle in enumerate(kd_organelle_paths):
sc_sd = segmentation.SegmentationDataset(
working_dir=global_params.config.working_dir, obj_type=organelle,
version=0, n_folders_fs=n_folders_fs_sc)
ids = rep_helper.get_unique_subfold_ixs(n_folders_fs_sc)
for ix in tqdm.tqdm(ids, leave=False):
curr_dir = sc_sd.so_storage_path + target_dir_func(
ix, n_folders_fs_sc)
os.makedirs(curr_dir, exist_ok=True)
all_times.append(time.time() - start)
step_names.append("conversion of results")
# # TODO: refactor write-out (and read in batchjob_enabled)
# if qu.batchjob_enabled():
# path_to_out = qu.batchjob_script(
# multi_params, "map_subcell_extract_props", n_cores=n_cores,
# n_max_co_processes=n_max_co_processes)
# out_files = glob.glob(path_to_out + "/*")
#
# for out_file in tqdm.tqdm(out_files, leave=False):
# with open(out_file, 'rb') as f:
# worker_nr, ref_mesh_dc = pkl.load(f)
# for chunk_id, cell_ids in ref_mesh_dc['sv'].items():
# cell_mesh_workers[chunk_id] = (worker_nr, cell_ids)
# # memory consumption of list is about 0.25
# cell_prop_worker[worker_nr] = list(set().union(*ref_mesh_dc['sv'].values()))
# all_ids['sv'].extend(cell_prop_worker[worker_nr])
# c_mesh_worker_dc = "{}/c_mesh_worker_dict.pkl".format(global_params.config.temp_path)
# with open(c_mesh_worker_dc, 'wb') as f:
# pkl.dump(cell_mesh_workers, f, protocol=4)
# del cell_mesh_workers
# c_prop_worker_dc = "{}/c_prop_worker_dict.pkl".format(global_params.config.temp_path)
# with open(c_prop_worker_dc, 'wb') as f:
# pkl.dump(cell_prop_worker, f, protocol=4)
#
# all_ids['sv'] = np.unique(all_ids['sv']).astype(np.uint32)
# del cell_prop_worker
#
# # Collect organelle worker info
# # memory consumption of list is about 0.25
# for out_file in tqdm.tqdm(out_files, leave=False):
# with open(out_file, 'rb') as f:
# worker_nr, ref_mesh_dc = pkl.load(f)
# # iterate over each subcellular structure
# for ii, organelle in enumerate(kd_organelle_paths):
# organelle = global_params.config['existing_cell_organelles'][ii]
# for chunk_id, subcell_ids in ref_mesh_dc[organelle].items():
# subcell_mesh_workers[ii][chunk_id] = (worker_nr, subcell_ids)
# subcell_prop_workers[ii][worker_nr] = list(set().union(*ref_mesh_dc[
# organelle].values()))
# all_ids[organelle].extend(subcell_prop_workers[ii][worker_nr])
# for ii, organelle in enumerate(kd_organelle_paths):
# all_ids[organelle] = np.unique(all_ids[organelle]).astype(np.uint32)
# sc_mesh_worker_dc = "{}/sc_{}_mesh_worker_dict.pkl".format(
# global_params.config.temp_path, organelle)
# with open(sc_mesh_worker_dc, 'wb') as f:
# pkl.dump(subcell_mesh_workers[ii], f, protocol=4)
# dict_paths_tmp += [sc_mesh_worker_dc]
#
# sc_prop_worker_dc = "{}/sc_{}_prop_worker_dict.pkl".format(
# global_params.config.temp_path, organelle)
# with open(sc_prop_worker_dc, 'wb') as f:
# pkl.dump(subcell_prop_workers[ii], f, protocol=4)
# dict_paths_tmp += [sc_prop_worker_dc]
# else:
# results = sm.start_multiprocess_imap(
# _map_subcell_extract_props_thread, multi_params, nb_cpus=n_max_co_processes,
# verbose=False, debug=False)
#
# for worker_nr, ref_mesh_dc in tqdm.tqdm(results, leave=False):
# for chunk_id, cell_ids in ref_mesh_dc['sv'].items():
# cell_mesh_workers[chunk_id] = (worker_nr, cell_ids)
# # memory consumption of list is about 0.25
# cell_prop_worker[worker_nr] = list(set().union(*ref_mesh_dc['sv'].values()))
# all_ids['sv'].extend(cell_prop_worker[worker_nr])
# # iterate over each subcellular structure
# for ii, organelle in enumerate(kd_organelle_paths):
# for chunk_id, subcell_ids in ref_mesh_dc[organelle].items():
# subcell_mesh_workers[ii][chunk_id] = (worker_nr, subcell_ids)
# subcell_prop_workers[ii][worker_nr] = list(set().union(*ref_mesh_dc[
# organelle].values()))
# all_ids[organelle].extend(subcell_prop_workers[ii][worker_nr])
# del results
# c_mesh_worker_dc = "{}/c_mesh_worker_dict.pkl".format(global_params.config.temp_path)
# with open(c_mesh_worker_dc, 'wb') as f:
# pkl.dump(cell_mesh_workers, f, protocol=4)
# del cell_mesh_workers
# c_prop_worker_dc = "{}/c_prop_worker_dict.pkl".format(global_params.config.temp_path)
# with open(c_prop_worker_dc, 'wb') as f:
# pkl.dump(cell_prop_worker, f, protocol=4)
# del cell_prop_worker
# all_ids['sv'] = np.unique(all_ids['sv']).astype(np.uint32)
#
# for ii, organelle in enumerate(kd_organelle_paths):
# all_ids[organelle] = np.unique(all_ids[organelle]).astype(np.uint32)
# sc_mesh_worker_dc = "{}/sc_{}_mesh_worker_dict.pkl".format(
# global_params.config.temp_path, organelle)
# with open(sc_mesh_worker_dc, 'wb') as f:
# pkl.dump(subcell_mesh_workers[ii], f, protocol=4)
# dict_paths_tmp += [sc_mesh_worker_dc]
#
# sc_prop_worker_dc = "{}/sc_{}_prop_worker_dict.pkl".format(
# global_params.config.temp_path, organelle)
# with open(sc_prop_worker_dc, 'wb') as f:
# pkl.dump(subcell_prop_workers[ii], f, protocol=4)
# dict_paths_tmp += [sc_prop_worker_dc]
#
# del subcell_mesh_workers, subcell_prop_workers
#
# params_cache = []
# for k, v in all_ids.items():
# dest_p = f'{global_params.config.temp_path}/storage_targets_{k}.pkl'
# nf = n_folders_fs if k == 'sv' else n_folders_fs_sc
# params_cache.append((dest_p, v, nf))
# dict_paths_tmp.append(dest_p)
# _ = sm.start_multiprocess_imap(_cache_storage_paths, params_cache,
# nb_cpus=global_params.config['ncores_per_node'])
# del all_ids, params_cache
#
# dict_paths_tmp += [c_mesh_worker_dc, c_prop_worker_dc]
# step_names.append("extract and map segmentation objects")
# all_times.append(time.time() - start)
#
# # reduce step
# start = time.time()
#
# # create folders for existing (sub-)cell supervoxels to prevent concurrent makedirs
# ids = rep_helper.get_unique_subfold_ixs(n_folders_fs)
# for k in tqdm.tqdm(ids, leave=False):
# curr_dir = sv_sd.so_storage_path + target_dir_func(k, n_folders_fs)
# os.makedirs(curr_dir, exist_ok=True)
#
# for ii, organelle in enumerate(kd_organelle_paths):
# sc_sd = segmentation.SegmentationDataset(
# working_dir=global_params.config.working_dir, obj_type=organelle,
# version=0, n_folders_fs=n_folders_fs_sc)
# ids = rep_helper.get_unique_subfold_ixs(n_folders_fs_sc)
# for ix in tqdm.tqdm(ids, leave=False):
# curr_dir = sc_sd.so_storage_path + target_dir_func(
# ix, n_folders_fs_sc)
# os.makedirs(curr_dir, exist_ok=True)
#
# all_times.append(time.time() - start)
# step_names.append("conversion of results")
# write to subcell. SV attribute dicts
# must be executed before '_write_props_to_sv_thread'
......@@ -556,10 +556,6 @@ def _map_subcell_extract_props_thread(args):
kd_subcell_ps = args[3] # Dict
worker_nr = args[4]
generate_sv_mesh = args[5]
# TODO: Currently min obj size is applied to property dicts and only indirectly to the mesh
# dicts, meaning: Meshes are generated regardless of the object size and stored to file but
# only collected for the object IDs in the property dicts. -> prevent saving meshes of small
# objects in the first place.
# worker_dir_meshes = f"{global_params.config.temp_path}/tmp_meshes/meshes_{worker_nr}/"
worker_dir_meshes = f"{global_params.config.temp_path}/tmp_meshes_{worker_nr}/"
os.makedirs(worker_dir_meshes, exist_ok=True)
......@@ -617,7 +613,8 @@ def _map_subcell_extract_props_thread(args):
obj_bdry = np.unique(obj_bdry)
obj_ids_bdry[organelle] = obj_bdry
dt_times_dc['data_io'] += time.time() - start
subcell_d.append(subc_d[None,]) # add auxiliary axis
# add auxiliary axis
subcell_d.append(subc_d[None, ])
subcell_d = np.concatenate(subcell_d)
start = time.time()
cell_d = kd_cell.load_seg(size=size, offset=offset, mag=1).swapaxes(0, 2)
......@@ -680,7 +677,6 @@ def _map_subcell_extract_props_thread(args):
log_proc.error(f'Exception raised when loading '
f'mesh cache {p}:\n{e}')
else:
print('USING CACHE')
if min_obj_vx[organelle] > 1:
for ix in small_obj_ids_inside[organelle]:
# the cache was pruned in an early version
......@@ -691,6 +687,8 @@ def _map_subcell_extract_props_thread(args):
del tmp_subcell_meshes
ch_cache_exists = True
if not ch_cache_exists:
# TODO: remove
raise()
start = time.time()
tmp_subcell_meshes = find_meshes(subcell_d[ii], offset, pad=1)
dt_times_dc['find_mesh'] += time.time() - start
......@@ -722,7 +720,6 @@ def _map_subcell_extract_props_thread(args):
log_proc.error(f'Exception raised when loading mesh cache {p}:'
f'\n{e}')
else:
print('USING CACHE')
if min_obj_vx['sv'] > 1:
for ix in small_obj_ids_inside['sv']:
# the cache was pruned in an early version
......@@ -733,6 +730,8 @@ def _map_subcell_extract_props_thread(args):
del tmp_cell_mesh
ch_cache_exists = True
if not ch_cache_exists:
# TODO: remove
raise()
start = time.time()
tmp_cell_mesh = find_meshes(cell_d, offset, pad=1)
dt_times_dc['find_mesh'] += time.time() - start
......@@ -857,7 +856,7 @@ def _write_props_to_sc_thread(args):
worker_ids = defaultdict(set)
for k in obj_keys:
# prop_dict contains [rc, bb, size] of the objects
s = prop_dict[2][k]
# s = prop_dict[2][k]
# TODO: remove
try:
......@@ -918,7 +917,12 @@ def _write_props_to_sc_thread(args):
disable_locking=True, read_only=False)
for sc_id in obj_keys:
size = prop_dict[2][sc_id]
# TODO: remove try-except
try:
size = prop_dict[2][sc_id]
except KeyError:
continue
if size < min_obj_vx:
continue
if sc_id in mapping_dict:
......
......@@ -47,6 +47,9 @@ try:
from ..proc.in_bounding_boxC import in_bounding_box
except ImportError:
from ..proc.in_bounding_box import in_bounding_box
from skimage.morphology import watershed
from skimage.feature import peak_local_max
from scipy import ndimage
def majority_vote(anno, prop, max_dist):
......@@ -2422,14 +2425,6 @@ def extract_spinehead_volume_mesh(sso: 'super_segmentation.SuperSegmentationObje
connected component spine head skeleton nodes, i.e. the inspected volume is
at least ``2*ctx_vol``.
"""
try:
from ..proc.in_bounding_boxC import in_bounding_box
except ImportError:
from ..proc.in_bounding_box import in_bounding_box
from skimage.morphology import watershed
from skimage.feature import peak_local_max
from scipy import ndimage
# use bigger skel context to get the correspondence to the voxel as accurate as possible
ctx_vol = np.array(ctx_vol)
scaling = sso.scaling
......@@ -2521,11 +2516,11 @@ def extract_spinehead_volume_mesh(sso: 'super_segmentation.SuperSegmentationObje
_, nn_id = nn_kdt.query([(c + offset) * sso.scaling])
max_id = ids[nn_id[0]]
log_reps.warn(f'SSO {sso.id} contained erroneous volume'
f' to spine head assignment. Found no spine head cluster '
f'within 10x10x10 voxel subcube at {c + offset},'
f' expected 1. '
f'Fall-back is using the volume of the closest'
f' spine head cluster via nearest-neighbor.')
f' to spine head assignment. Found no spine head '
f'cluster within 10x10x10 voxel subcube at '
f'{c + offset}, expected 1. Fall-back is using '
f'the volume of the closest spine head cluster '
f'via nearest-neighbor.')
else:
max_id = ids[np.argmax(cnts)]
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment