Commit 5571d20a authored by Alexandra Rother's avatar Alexandra Rother
Browse files

update prediction, changes to cs_processing for debugging

parent 9484a2ab
Pipeline #139624 failed with stage
in 4 minutes
File mode changed from 100644 to 100755
elektronn3-dev @ 41fcd363
Subproject commit 4fe6a1f0c21f49750dffc2cac505d8efbcd40696
Subproject commit 41fcd3631bd585bc426d6759b7d3d4d058b2f6c8
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
......@@ -279,7 +279,7 @@ if elektronn3_avail:
super().__init__(ssd_kwargs=ssd_kwargs, cv_val=cv_val, **kwargs)
# load GT
assert self.train, "Other mode than 'train' is not implemented."
#assert self.train, "Other mode than 'train' is not implemented."
self.csv_p = "/wholebrain/songbird/j0251/groundtruth/celltypes/j0251_celltype_gt_v6_j0251_72_seg_20210127_agglo2_IDs.csv"
df = pandas.io.parsers.read_csv(self.csv_p, header=None, names=['ID', 'type']).values
ssv_ids = df[:, 0].astype(np.uint64)
......@@ -298,7 +298,10 @@ if elektronn3_avail:
self.splitting_dict = {'train': ssv_ids, 'valid': ssv_ids} # use all data
log_cnn.critical(f'Using all GT data for training!')
self.label_dc = {k: v for k, v in zip(ssv_ids, ssv_labels)}
self.sso_ids = self.splitting_dict['train']
if self.train:
self.sso_ids = self.splitting_dict['train']
else:
self.sso_ids = self.splitting_dict['valid']
for k, v in self.splitting_dict.items():
classes, c_cnts = np.unique([self.label_dc[ix] for ix in
self.splitting_dict[k]], return_counts=True)
......@@ -306,12 +309,13 @@ if elektronn3_avail:
log_cnn.debug(f'{len(self.sso_ids)} SSV IDs in training set: {self.sso_ids}')
def __len__(self):
if self.train:
return len(self.sso_ids) * 3
#if self.train:
# make use of the underlying LRU cache with high epoch size,
# worker instances of the pytorch loader will reset after each epoch
return len(self.sso_ids) * 60
else:
return max(len(self.sso_ids) // 5, 1)
#return len(self.sso_ids) * 3 #* 60
#else:
#return max(len(self.sso_ids) // 5, 1)
class CellCloudGlia(Dataset):
......
......@@ -78,7 +78,7 @@ eval_nr = random_seed # number of repetition
dr = 0.3
track_running_stats = False
use_norm = 'gn'
num_classes = 11
num_classes = 15
onehot = True
act = 'relu'
use_myelin = False
......@@ -167,11 +167,11 @@ valid_transform = clouds.Compose([clouds.Center(), clouds.Normalization(scale_no
train_ds = CellCloudDataJ0251(npoints=npoints, transform=train_transform, cv_val=cval,
cellshape_only=cellshape_only, use_syntype=use_syntype,
onehot=onehot, batch_size=batch_size, ctx_size=ctx, map_myelin=use_myelin)
# valid_ds = CellCloudDataJ0251(npoints=npoints, transform=valid_transform, train=False,
# cv_val=cval, cellshape_only=cellshape_only,
# use_syntype=use_syntype, onehot=onehot, batch_size=batch_size,
# ctx_size=ctx, map_myelin=use_myelin)
valid_ds = None
valid_ds = CellCloudDataJ0251(npoints=npoints, transform=valid_transform, train=False,
cv_val=cval, cellshape_only=cellshape_only,
use_syntype=use_syntype, onehot=onehot, batch_size=batch_size,
ctx_size=ctx, map_myelin=use_myelin)
#valid_ds = None
# PREPARE AND START TRAINING #
......@@ -212,16 +212,17 @@ trainer = Trainer3d(
train_dataset=train_ds,
valid_dataset=valid_ds,
batchsize=1,
num_workers=20,
num_workers=10,
valid_metrics=valid_metrics,
save_root=save_root,
enable_save_trace=enable_save_trace,
exp_name=name,
schedulers={"lr": lr_sched},
num_classes=num_classes,
# example_input=example_input,
example_input=example_input,
dataloader_kwargs=dict(collate_fn=lambda x: x[0]),
nbatch_avg=10,
tqdm_kwargs={"disable": False}
)
# Archiving training script, src folder, env info
......
File mode changed from 100644 to 100755
......@@ -631,7 +631,7 @@ def combine_and_split_cs(wd, ssd_version=None, cs_version=None, nb_cpus=None, n_
ssd = super_segmentation.SuperSegmentationDataset(wd, version=ssd_version)
cs_sd = segmentation.SegmentationDataset("cs", working_dir=wd, version=cs_version)
cs_version = cs_sd.version
if rel_ssv_with_cs_ids is None
if rel_ssv_with_cs_ids is None:
rel_ssv_with_cs_ids = filter_relevant_syn(cs_sd, ssd, log=log)
del ssd, cs_sd
storage_location_ids = get_unique_subfold_ixs(n_folders_fs)
......@@ -648,6 +648,7 @@ def combine_and_split_cs(wd, ssd_version=None, cs_version=None, nb_cpus=None, n_
raise FileExistsError(f'"{sd_cs_ssv.so_storage_path}" already exists, but overwrite was set to False.')
shutil.rmtree(sd_cs_ssv.so_storage_path)
# prepare folder structure
voxel_rel_paths_2stage = np.unique([subfold_from_ix(ix, n_folders_fs)[:-2]
for ix in storage_location_ids])
......@@ -664,7 +665,7 @@ def combine_and_split_cs(wd, ssd_version=None, cs_version=None, nb_cpus=None, n_
if not qu.batchjob_enabled():
_ = sm.start_multiprocess_imap(_combine_and_split_cs_thread, multi_params, nb_cpus=nb_cpus, debug=False)
else:
_ = qu.batchjob_script(multi_params, "combine_and_split_cs", remove_jobfolder=True, log=log)
_ = qu.batchjob_script(multi_params, "combine_and_split_cs", remove_jobfolder=True, log=log, exclude_nodes=['wb02', 'wb03', 'wb04', 'wb05', 'wb06', 'wb07', 'wb08', 'wb09'])
def _combine_and_split_cs_thread(args):
......@@ -691,6 +692,7 @@ def _combine_and_split_cs_thread(args):
base_dir = sd_cs_ssv.so_storage_path + voxel_rel_paths[cur_path_id]
os.makedirs(base_dir, exist_ok=True)
# get ID/path to storage to save intermediate results
base_id = ix_from_subfold(voxel_rel_paths[cur_path_id], sd_cs.n_folders_fs)
cs_ssv_id = base_id
......@@ -699,7 +701,7 @@ def _combine_and_split_cs_thread(args):
# iterate over cell partners and their contact site IDs (each contact site is between two supervoxels
# of the partner cells)
test_cs_id = 275781054487918910
for ssvpartners_enc, cs_ids in rel_ssv_with_cs_ids_items:
n_items_for_path += 1
......@@ -722,12 +724,10 @@ def _combine_and_split_cs_thread(args):
for mesh_cc in ccs:
cs_ssv = sd_cs_ssv.get_segmentation_object(cs_ssv_id)
cs_ssv = sd_cs_ssv.get_segmentation_object(cs_ssv_id, create = True)
if (os.path.abspath(cs_ssv.attr_dict_path)
!= os.path.abspath(base_dir + "/attr_dict.pkl")):
raise ValueError(f'Path mis-match!')
csssv_attr_dc = dict(neuron_partners=ssv_ids)
# don't store normals
cs_ssv._mesh = [mesh_cc[0], mesh_cc[1], np.zeros((0,), dtype=np.float32)]
......@@ -749,10 +749,6 @@ def _combine_and_split_cs_thread(args):
# add cs_ssv dict to AttributeStorage
attr_dc[cs_ssv_id] = csssv_attr_dc
if cs_ids[0] == test_cs_id:
print(cs_ssv_id)
print(attr_dc[cs_ssv_id])
print(base_dir)
if use_new_subfold:
cs_ssv_id += np.uint(1)
if cs_ssv_id - base_id >= div_base:
......@@ -767,8 +763,8 @@ def _combine_and_split_cs_thread(args):
cs_ssv_id += np.uint(sd_cs.n_folders_fs)
if n_items_for_path > n_per_voxel_path:
#attr_dc.push()
#mesh_dc.push()
attr_dc.push()
mesh_dc.push()
cur_path_id += 1
if len(voxel_rel_paths) == cur_path_id:
raise ValueError(f'Worker ran out of possible storage paths for storing {sd_cs_ssv.type}.')
......@@ -783,9 +779,9 @@ def _combine_and_split_cs_thread(args):
#if n_items_for_path > 0:
#attr_dc.push()
#mesh_dc.push()
if n_items_for_path > 0:
attr_dc.push()
mesh_dc.push()
def cc_large_voxel_lists(voxel_list, cs_gap_nm, max_concurrent_nodes=5000, verbose=False):
......
......@@ -1260,6 +1260,10 @@ def str2int_converter(comment: str, gt_type: str) -> int:
str2int_label = dict(STN=0, DA=1, MSN=2, LMAN=3, HVC=4, TAN=5, GPe=6, GPi=7,
FS=8, LTS=9, NGF=10)
return str2int_label[comment]
elif gt_type == 'ctgt_j0251_v3':
str2int_label = dict(STN=0, DA=1, MSN=2, LMAN=3, HVC=4, TAN=5, GPe=6, GPi=7,
FS=8, LTS=9, NGF=10, ASTRO = 11, OLIGO = 12, MICRO = 13, FRAG = 14)
return str2int_label[comment]
else:
raise ValueError("Given groundtruth type is not valid.")
......
......@@ -1692,6 +1692,12 @@ def get_pt_kwargs(mdir: str) -> Tuple[dict, dict]:
scale_fact = int(re.findall(r'_scale(\d+)_', mdir)[0])
mkwargs = dict(use_norm=use_norm, track_running_stats=track_running_stats, act=activation, use_bias=use_bias)
loader_kwargs = dict(ctx_size=ctx, scale_fact=scale_fact, npoints=npoints)
'''
mkwargs = dict(use_norm='gn',
track_running_stats=False, act='relu', use_bias=True)
loader_kwargs = dict(ctx_size=20000, scale_fact=2000,
npoints=50000) # TODO: manually set by best guesses from training script, lookup
'''
return mkwargs, loader_kwargs
......@@ -1727,10 +1733,10 @@ def get_celltype_model_pts(mpath: Optional[str] = None, device='cuda') -> 'Infer
mpath = global_params.config.mpath_celltype_pts
from elektronn3.models.convpoint import ModelNet40
mkwargs, loader_kwargs = get_pt_kwargs(mpath)
n_classes = 8
n_classes = 15
n_inputs = 5
if 'j0251' in mpath:
n_classes = 11
n_classes = 15
if '_myelin' in mpath:
n_inputs += 1
if '_noSyntype' in mpath:
......
......@@ -232,8 +232,10 @@ def batchjob_script(params: list, name: str,
pkl.dump(param, f)
os.chmod(this_sh_path, 0o744)
cmd_exec = "{0} --output={1} --error={2} --time=4-0 --job-name={3} {4}".format(
additional_flags, job_log_path, job_err_path, job_name, this_sh_path)
#cmd_exec = "{0} --output={1} --error={2} --time=4-0 --job-name={3} {4}".format(
# additional_flags, job_log_path, job_err_path, job_name, this_sh_path)
cmd_exec = "{0} --output={1} --error={2} --job-name={3} {4}".format(
additional_flags, job_log_path, job_err_path, job_name, this_sh_path)
if job_id == 0:
log_batchjob.debug(f'Starting jobs with command "{cmd_exec}".')
job_exec_dc[job_id] = cmd_exec
......
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment