diff --git a/scripts/dataset_specific/start_j0251_gce.py b/scripts/dataset_specific/start_j0251_gce.py index 6e2ac80673723cbe6a90b47a6b60eb646e4f4d08..4ca7afa9bf240783cff2b699812dc5903dd4348e 100755 --- a/scripts/dataset_specific/start_j0251_gce.py +++ b/scripts/dataset_specific/start_j0251_gce.py @@ -27,11 +27,18 @@ if __name__ == '__main__': assert test_point_models or test_view_models experiment_name = 'j0251' scale = np.array([10, 10, 25]) - node_state = next(iter(nodestates_slurm().values())) + number_of_nodes = 16 + node_states = nodestates_slurm() + node_state = next(iter(node_states.values())) + exclude_nodes = [] + for nk in list(node_states.keys())[number_of_nodes:]: + exclude_nodes.append(nk) + del node_states[nk] + # check cluster state + assert number_of_nodes == np.sum([v['state'] == 'idle' for v in node_states.values()]) ncores_per_node = node_state['cpus'] mem_per_node = node_state['memory'] ngpus_per_node = 2 # node_state currently does not contain the number of gpus for 'gres' resource - number_of_nodes = 20 shape_j0251 = np.array([27119, 27350, 15494]) # *9 for ~3 TVx, *11 for 5.7, *7 for 1.4, *5.5 for 0.7, *4.5 for 0.4 cube_size = (np.array([2048, 2048, 1024]) * 4.5).astype(np.int) @@ -39,8 +46,6 @@ if __name__ == '__main__': cube_offset = ((shape_j0251 - cube_size) // 2).astype(np.int) cube_of_interest_bb = np.array([cube_offset, cube_offset + cube_size], dtype=np.int) # cube_of_interest_bb = None # process the entire cube! - # check that cluster is configured accordingly - assert number_of_nodes == np.sum([v['state'] == 'idle' for v in nodestates_slurm().values()]) prior_glia_removal = True use_point_models = True key_val_pairs_conf = [ @@ -67,7 +72,8 @@ if __name__ == '__main__': 'vc': ['binary_opening', 'binary_closing', 'binary_erosion']} } ), - ('cube_of_interest_bb', cube_of_interest_bb.tolist()) + ('cube_of_interest_bb', cube_of_interest_bb.tolist()), + ('slurm', {'exclude_nodes': exclude_nodes}) ] chunk_size = (512, 512, 256) if cube_size[0] <= 2048: @@ -125,7 +131,6 @@ if __name__ == '__main__': global_params.wd = working_dir os.makedirs(global_params.config.temp_path, exist_ok=True) - # create symlink to myelin predictions if not os.path.exists(f'{working_dir}/knossosdatasets/myelin'): assert os.path.exists('/mnt/j0251_data/myelin') diff --git a/syconn/exec/exec_syns.py b/syconn/exec/exec_syns.py index e56240f018526d7bf8c862656854c45e63534f73..46459f8a8c1a5c083e7998935fcc261a7b3a60d3 100755 --- a/syconn/exec/exec_syns.py +++ b/syconn/exec/exec_syns.py @@ -31,6 +31,7 @@ def run_matrix_export(): en-passant bouton: 3, terminal bouton: 4) of the partner neurons. * 'partner_spiness': Spine compartment predictions (0: dendritic shaft, 1: spine head, 2: spine neck, 3: other) of both neurons. + * 'partner_spineheadvol': Spinehead volume in µm^3 of pre- and post-synaptic partners. * 'partner_celltypes': Celltype of the both neurons. * 'latent_morph': Local morphology embeddings of the pre- and post- synaptic partners. @@ -139,6 +140,13 @@ def run_spinehead_volume_calc(): """ Calculate spine head volumes based on a watershed segmentation which is run on 3D spine label masks propagated from cell surface predictions. + Spine head volumes are stored in the SSV attribute dictionary with the key ``partner_spineheadvol`` in µm^3. + + Subsequent call to :func:`~syconn.extraction.cs_processing_steps.collect_properties_from_ssv_partners` will + add this property to the attribute dict of all `syn_ssv`. Calling :func:`syconn.proc.sd_proc.dataset_analysis` + accordingly collects all `syn_ssv` properties and makes them available as numpy arrays. These two steps are + performed in :func:`~run_matrix_export`. + """ log = initialize_logging('compartment_prediction', global_params.config.working_dir + '/logs/', overwrite=False) diff --git a/syconn/extraction/cs_processing_steps.py b/syconn/extraction/cs_processing_steps.py index 3e124efe119483a5f845d8b83c115927062b24a5..e8304f5450a1adb1b7625a5b056dc9f512bdb547 100755 --- a/syconn/extraction/cs_processing_steps.py +++ b/syconn/extraction/cs_processing_steps.py @@ -44,10 +44,12 @@ def collect_properties_from_ssv_partners(wd, obj_version=None, ssd_version=None, The following keys will be available in the ``attr_dict`` of ``syn_ssv`` typed :class:`~syconn.reps.segmentation.SegmentationObject`: + * 'partner_axoness': Cell compartment type (axon: 1, dendrite: 0, soma: 2, en-passant bouton: 3, terminal bouton: 4) of the partner neurons. * 'partner_spiness': Spine compartment predictions (0: dendritic shaft, 1: spine head, 2: spine neck, 3: other) of both neurons. + * 'partner_spineheadvol': Spinehead volume in µm^3. * 'partner_celltypes': Celltype of the both neurons. * 'latent_morph': Local morphology embeddings of the pre- and post- synaptic partners. diff --git a/syconn/handler/config.yml b/syconn/handler/config.yml index 5b3913aea9ffa926f6c87a9799ae2a66d34168da..2a021d4814530b4aa7b681128b1341060974ef52 100755 --- a/syconn/handler/config.yml +++ b/syconn/handler/config.yml @@ -14,17 +14,17 @@ version: existing_cell_organelles: ['mi', 'sj', 'vc'] syntype_avail: False -# Compute backend: 'QSUB', 'SLURM', None +# Compute backend: 'SLURM', None batch_proc_system: 'SLURM' # If None, fall-back is single node multiprocessing -# the here defined parameters -batch_pe: 'default' -batch_queue: 'all.q' - +# generic parameters for distributed processing mem_per_node: 249500 # in MB ncores_per_node: 20 ngpus_per_node: 2 nnodes_total: 17 +# slurm specific +slurm: + exclude_nodes: # --------- LOGGING # 'None' disables logging of SyConn modules (e.g. proc, handler, ...) to files. diff --git a/syconn/mp/batchjob_utils.py b/syconn/mp/batchjob_utils.py index ac30f19689f46b512a50d0143dccefe74efec4d4..05a832a048a0eb6644afa544cfee8660ae90b223 100755 --- a/syconn/mp/batchjob_utils.py +++ b/syconn/mp/batchjob_utils.py @@ -76,8 +76,8 @@ def batchjob_script(params: list, name: str, use_dill: bool = False, remove_jobfolder: bool = False, log: Logger = None, sleep_time: Optional[int] = None, - show_progress=True, - overwrite=False): + show_progress: bool = True, overwrite: bool = False, + exclude_nodes: Optional[list] = None): """ Submits batch jobs to process a list of parameters `params` with a python script on the specified environment (either None, SLURM or QSUB; run @@ -115,6 +115,7 @@ def batchjob_script(params: list, name: str, sleep_time: Sleep duration before checking batch job states again. show_progress: Only used if ``disabled_batchjob=True``. overwrite: + exclude_nodes: Nodes to exclude during job submission. """ starttime = datetime.datetime.today().strftime("%m.%d") # Parameter handling @@ -162,16 +163,22 @@ def batchjob_script(params: list, name: str, if global_params.config['batch_proc_system'] != 'SLURM': msg = ('"batchjob_script" currently does not support any other batch processing ' 'system than SLURM.') - log_mp.error(msg) + log_batchjob.error(msg) raise NotImplementedError(msg) cpus_per_node = global_params.config['ncores_per_node'] - mem_lim = int(global_params.config['mem_per_node'] / - cpus_per_node) - if '--mem' in additional_flags: - raise ValueError('"--mem" must not be set via the "additional_flags"' - ' kwarg.') + # mem_lim = int(global_params.config['mem_per_node'] / + # cpus_per_node) + # if '--mem' in additional_flags: + # raise ValueError('"--mem" must not be set via the "additional_flags"' + # ' kwarg.') # additional_flags += ' --mem-per-cpu={}M'.format(mem_lim) + if exclude_nodes is None: + exclude_nodes = global_params.config['slurm']['exclude_nodes'] + if exclude_nodes is not None: + additional_flags += f' --exclude={",".join(exclude_nodes)}' + log_batchjob.debug(f'Excluding slurm nodes: {",".join(exclude_nodes)}') + # Start SLURM job if len(job_name) > 8: msg = "job_name is longer than 8 characters. This is untested." @@ -284,7 +291,7 @@ def batchjob_script(params: list, name: str, nb_failed += 1 continue # restart job - if requeue_dc[j] == 20: # TODO: use global_params NCORES_PER_NODE + if requeue_dc[j] == cpus_per_node: log_batchjob.warning(f'About to re-submit job {j} ({job2slurm_dc[j]}) ' f'which already was assigned the maximum number ' f'of available CPUs.') @@ -359,13 +366,11 @@ def _delete_folder_daemon(dirname, log, job_name, timeout=60): def _delete_folder(dn, lg, to=60): start = time.time() - e = '' while to > time.time() - start: try: shutil.rmtree(dn) break except OSError as e: - e = str(e) time.sleep(5) if time.time() - start > to: shutil.rmtree(dn, ignore_errors=True) diff --git a/syconn/reps/connectivity_helper.py b/syconn/reps/connectivity_helper.py index b32d481450659ed86b5e832e55fa272153aee850..9fecc38b50f3388a31f4afc3e5ad1e530f2ac3cf 100755 --- a/syconn/reps/connectivity_helper.py +++ b/syconn/reps/connectivity_helper.py @@ -426,14 +426,15 @@ def diverge_map(high=(239 / 255., 65 / 255., 50 / 255.), def connectivity_hists_j0251(proba_thresh_syn: float = 0.8, proba_thresh_celltype: float = None, - r=(0.05, 2)): + r=(0.05, 2), use_spinehead_vol: bool = False): """ + Experimental. + Args: proba_thresh_syn: Synapse probability. Filters synapses below threshold. proba_thresh_celltype: Cell type probability. Filters cells below threshold. r: Range of synapse mesh area (um^2). - - Returns: + use_spinehead_vol: Use spinehead volume instead of ``mesh_area / 2``. """ from syconn.handler.prediction import int2str_converter, certainty_estimate @@ -462,6 +463,8 @@ def connectivity_hists_j0251(proba_thresh_syn: float = 0.8, proba_thresh_celltyp ax = sd_syn_ssv.load_numpy_data('partner_axoness') ct = sd_syn_ssv.load_numpy_data('partner_celltypes') area = sd_syn_ssv.load_numpy_data('mesh_area') + sh_vol = sd_syn_ssv.load_numpy_data('partner_spineheadvol') + # size = sd_syn_ssv.load_numpy_data('size') # syn_sign = sd_syn_ssv.load_numpy_data('syn_sign') # area *= syn_sign @@ -479,9 +482,11 @@ def connectivity_hists_j0251(proba_thresh_syn: float = 0.8, proba_thresh_celltyp ax = ax[m] area = area[m] # size = size[m] + sh_vol = sh_vol[m] partners = partners[m] if log_scale: area = np.log10(area) + sh_vol = np.log10(sh_vol) r = np.log(r) ct_receiving = {ctclass_converter(k): {ctclass_converter(kk): [] for kk in range(nclass)} for k in range(nclass)} ct_targets = {ctclass_converter(k): {ctclass_converter(kk): [] for kk in range(nclass)} for k in range(nclass)} @@ -495,29 +500,35 @@ def connectivity_hists_j0251(proba_thresh_syn: float = 0.8, proba_thresh_celltyp syn_ct = ct[ix] pre_ct = ctclass_converter(syn_ct[pre_ix]) post_ct = ctclass_converter(syn_ct[post_ix]) - ct_receiving[post_ct][pre_ct].append(area[ix]) - ct_targets[pre_ct][post_ct].append(area[ix]) + if use_spinehead_vol: + size_quantity = sh_vol[ix][post_ix] + else: + size_quantity = area[ix] + ct_receiving[post_ct][pre_ct].append(size_quantity) + ct_targets[pre_ct][post_ct].append(size_quantity) + size_quantity_label = 'spinehead_vol' if use_spinehead_vol else 'mesh_area' + print('Area/volume is in µm^2 or µm^3 respectively.') for ct_label in tqdm.tqdm(map(ctclass_converter, range(nclass)), total=nclass): data_rec = ct_receiving[ct_label] sizes = np.argsort([len(v) for v in data_rec.values()])[::-1] highest_cts = np.array(list(data_rec.keys()))[sizes][:plot_n_celltypes] - df = pd.DataFrame(data={'mesh_area': np.concatenate([data_rec[k] for k in highest_cts]), + df = pd.DataFrame(data={size_quantity_label: np.concatenate([data_rec[k] for k in highest_cts]), 'cell_type': np.concatenate([[k]*len(data_rec[k]) for k in highest_cts])}) - create_kde(f'{target_dir}/incoming{ct_label}.png', df, palette=palette, r=r) - df = pd.DataFrame(data={'mesh_area[um^2]': [np.sum(10**np.array(data_rec[k])) for k in data_rec], + create_kde(f'{target_dir}/incoming{ct_label}_{size_quantity_label}.png', df, palette=palette, r=r) + df = pd.DataFrame(data={size_quantity_label: [np.sum(10**np.array(data_rec[k])) for k in data_rec], 'n_synapses': [len(data_rec[k]) for k in data_rec], 'cell_type': [k for k in data_rec]}) - df.to_csv(f'{target_dir}/incoming{ct_label}_sum.csv') + df.to_csv(f'{target_dir}/incoming{ct_label}_{size_quantity_label}_sum.csv') data_out = ct_targets[ct_label] sizes = np.argsort([len(v) for v in data_out.values()])[::-1] highest_cts = np.array(list(data_out.keys()))[sizes][:plot_n_celltypes] - df = pd.DataFrame(data={'mesh_area': np.concatenate([data_out[k] for k in highest_cts]), + df = pd.DataFrame(data={size_quantity_label: np.concatenate([data_out[k] for k in highest_cts]), 'cell_type': np.concatenate([[k]*len(data_out[k]) for k in highest_cts])}) - create_kde(f'{target_dir}/outgoing{ct_label}.png', df, palette=palette, r=r) - df = pd.DataFrame(data={'mesh_area[um^2]': [np.sum(10**np.array(data_out[k])) for k in data_out], + create_kde(f'{target_dir}/outgoing{ct_label}_{size_quantity_label}.png', df, palette=palette, r=r) + df = pd.DataFrame(data={size_quantity_label: [np.sum(10**np.array(data_out[k])) for k in data_out], 'n_synapses': [len(data_out[k]) for k in data_out], 'cell_type': [k for k in data_out]}) - df.to_csv(f'{target_dir}/outgoing{ct_label}_sum.csv') + df.to_csv(f'{target_dir}/outgoing{ct_label}_{size_quantity_label}_sum.csv') def create_kde(dest_p, qs, ls=20, legend=False, r=None, **kwargs): diff --git a/syconn/reps/super_segmentation_helper.py b/syconn/reps/super_segmentation_helper.py index a5f1e19579f6431ff76a14f59d0e1dba8505d4ca..c1e5ba62c04af5f489b43dc05d378cc1a78f7147 100755 --- a/syconn/reps/super_segmentation_helper.py +++ b/syconn/reps/super_segmentation_helper.py @@ -2175,6 +2175,7 @@ def extract_spinehead_volume_mesh(sso: 'super_segmentation.SuperSegmentationObje the key ``spinehead_vol``. Notes: + * 'spine_headvol' in µm^3. * Requires a predicted cell mesh, i.e. 'spiness' must be present in ``label_dict('vertex')['spiness']``. * If the results have to be stored, call ``sso.save_attr_dict()``