Commit e2803341 authored by Philipp Schubert's avatar Philipp Schubert

Merge branch 'ptconv' into 'master'

Ptconv

See merge request pschuber/SyConn!36
parents 37182c51 dd7c8ab2
File mode changed from 100644 to 100755
......@@ -13,7 +13,7 @@ import numpy as np
from syconn import global_params
from syconn.handler.config import generate_default_conf, initialize_logging
from syconn.handler.basics import FileTimer
from syconn.proc.stats import FileTimer
from knossos_utils import knossosdataset
......@@ -42,9 +42,9 @@ if __name__ == '__main__':
prior_glia_removal = True
key_val_pairs_conf = [
('glia', {'prior_glia_removal': prior_glia_removal}),
('use_point_models', False),
('use_point_models', True),
('pyopengl_platform', 'egl'), # 'osmesa' or 'egl'
('batch_proc_system', None), # None, 'SLURM' or 'QSUB'
('batch_proc_system', 'SLURM'), # None, 'SLURM' or 'QSUB'
('ncores_per_node', 20),
('mem_per_node', 250000),
('ngpus_per_node', 2),
......@@ -245,7 +245,7 @@ if __name__ == '__main__':
exec_syns.run_matrix_export()
ftimer.stop()
time_summary_str = ftimer.prepare_report(experiment_name)
time_summary_str = ftimer.prepare_report()
log.info(time_summary_str)
log.info('Setting up flask server for inspection. Annotated cell reconstructions and wiring '
'can be analyzed via the KNOSSOS-SyConn plugin at '
......
......@@ -12,13 +12,13 @@ import networkx as nx
from syconn.handler.config import generate_default_conf, initialize_logging
from syconn import global_params
from syconn.handler.basics import FileTimer
from syconn.proc.stats import FileTimer
from syconn.exec import exec_init, exec_syns, exec_render, exec_dense_prediction, exec_inference, exec_skeleton
if __name__ == '__main__':
# ----------------- DEFAULT WORKING DIRECTORY ---------------------
working_dir = "/ssdscratch/pschuber/songbird/j0251/rag_flat_Jan2019_v2/"
working_dir = "/ssdscratch/pschuber/songbird/j0251/rag_flat_Jan2019_v3/"
experiment_name = 'j0251'
scale = np.array([10, 10, 25])
prior_glia_removal = True
......@@ -77,9 +77,6 @@ if __name__ == '__main__':
ftimer = FileTimer(working_dir + '/.timing.pkl')
ftimer.start('Preparation')
bb = None
bd = None
# Preparing config
# currently this is were SyConn looks for the neuron rag
if global_params.wd is not None:
......@@ -104,14 +101,13 @@ if __name__ == '__main__':
raise ValueError('Could not find model "{}". Make sure to copy the'
' "models" folder into the current working '
'directory "{}".'.format(mpath, working_dir))
ftimer.stop()
# # Start SyConn
# # --------------------------------------------------------------------------
# log.info('Finished example cube initialization (shape: {}). Starting'
# ' SyConn pipeline.'.format(bd))
# log.info('Example data will be processed in "{}".'.format(working_dir))
# ftimer.stop()
#
log.info('Starting SyConn pipeline for data cube (shape: {}).'.format(ftimer.dataset_shape))
log.critical('Working directory is set to "{}".'.format(working_dir))
# log.info('Step 1/9 - Predicting sub-cellular structures')
# ftimer.start('Dense predictions')
# # myelin is not needed before `run_create_neuron_ssd`
......@@ -126,7 +122,7 @@ if __name__ == '__main__':
# transf_func_kd_overlay=cellorganelle_transf_funcs,
# max_n_jobs=global_params.config.ncore_total * 4)
#
# # generate flattened RAG
# generate flattened RAG
# from syconn.reps.segmentation import SegmentationDataset
# sd = SegmentationDataset(obj_type="sv", working_dir=global_params.config.working_dir)
# rag_sub_g = nx.Graph()
......@@ -135,62 +131,61 @@ if __name__ == '__main__':
# mesh_bb = np.linalg.norm(mesh_bb[:, 1] - mesh_bb[:, 0], axis=1)
# filtered_ids = sd.ids[mesh_bb > global_params.config['glia']['min_cc_size_ssv']]
# rag_sub_g.add_edges_from([[el, el] for el in sd.ids])
# log.info('{} SVs were added to the RAG after application of the size '
# 'filter.'.format(len(filtered_ids)))
# log.info('{} SVs were added to the RAG after applying size filter with bounding box '
# 'diagonal > {} nm.'.format(len(filtered_ids), global_params.config['glia']['min_cc_size_ssv']))
# nx.write_edgelist(rag_sub_g, global_params.config.init_rag_path)
#
# exec_init.run_create_rag()
# ftimer.stop()
#
# log.info('Step 3/9 - Glia separation')
# if global_params.config.prior_glia_removal:
# ftimer.start('Glia separation')
# if not global_params.config.use_point_models:
# exec_render.run_glia_rendering()
# exec_inference.run_glia_prediction()
# else:
# exec_inference.run_glia_prediction_pts()
# exec_inference.run_glia_splitting()
# ftimer.stop()
# else:
# log.info('Glia separation disabled. Skipping.')
#
# log.info('Step 4/9 - Creating SuperSegmentationDataset')
# ftimer.start('SSD generation')
# exec_init.run_create_neuron_ssd()
# ftimer.stop()
#
# log.info('Step 5/10 - Creating SuperSegmentationDataset')
# ftimer.start('Skeleton generation')
# exec_skeleton.run_skeleton_generation(cube_of_interest_bb=cube_of_interest_bb)
# ftimer.stop()
#
#
# if not (global_params.config.use_onthefly_views or global_params.config.use_point_models):
# log.info('Step 4.5/9 - Neuron rendering')
# ftimer.start('Neuron rendering')
# exec_render.run_neuron_rendering()
# ftimer.stop()
#
# log.info('Step 5/9 - Synapse detection')
# ftimer.start('Synapse detection')
# exec_syns.run_syn_generation(chunk_size=chunk_size, n_folders_fs=n_folders_fs_sc)
# ftimer.stop()
log.info('Step 3/9 - Glia separation')
if global_params.config.prior_glia_removal:
ftimer.start('Glia separation')
if not global_params.config.use_point_models:
exec_render.run_glia_rendering()
exec_inference.run_glia_prediction()
else:
exec_inference.run_glia_prediction_pts()
exec_inference.run_glia_splitting()
ftimer.stop()
else:
log.info('Glia separation disabled. Skipping.')
log.info('Step 4/9 - Creating SuperSegmentationDataset')
ftimer.start('SSD generation')
exec_init.run_create_neuron_ssd()
ftimer.stop()
log.info('Step 5/10 - Creating SuperSegmentationDataset')
ftimer.start('Skeleton generation')
exec_skeleton.run_skeleton_generation()
ftimer.stop()
# TDO: remove
exec_skeleton.run_kimimaro_skelgen()
if not (global_params.config.use_onthefly_views or global_params.config.use_point_models):
log.info('Step 4.5/9 - Neuron rendering')
ftimer.start('Neuron rendering')
exec_render.run_neuron_rendering()
ftimer.stop()
log.info('Step 5/9 - Synapse detection')
ftimer.start('Synapse detection')
exec_syns.run_syn_generation(chunk_size=chunk_size, n_folders_fs=n_folders_fs_sc)
ftimer.stop()
log.info('Step 6/9 - Compartment prediction')
ftimer.start('Compartment predictions')
exec_inference.run_semsegaxoness_prediction()
if not global_params.config.use_point_models:
exec_inference.run_semsegspiness_prediction()
ftimer.stop()
#
# TODO: this step can be launched in parallel with the morphology extraction!
ftimer.start('Spine head volume estimation')
exec_syns.run_spinehead_volume_calc()
ftimer.stop()
# Use multi-views until here
raise()
# Used multi-views until here! Now use point models
global_params.config['use_point_models'] = True
global_params.config.write_config()
time.sleep(10) # wait for changes to apply
log.info('Step 7/9 - Morphology extraction')
ftimer.start('Morphology extraction')
exec_inference.run_morphology_embedding()
......@@ -206,9 +201,9 @@ if __name__ == '__main__':
exec_syns.run_matrix_export()
ftimer.stop()
time_summary_str = ftimer.prepare_report(experiment_name)
time_summary_str = ftimer.prepare_report()
log.info(time_summary_str)
log.info('Setting up flask server for inspection. Annotated cell reconstructions and wiring '
'can be analyzed via the KNOSSOS-SyConn plugin at '
'`SyConn/scripts/kplugin/syconn_knossos_viewer.py`.')
os.system(f'syconn.server --working_dir={example_wd} --port=10001')
# log.info('Setting up flask server for inspection. Annotated cell reconstructions and wiring '
# 'can be analyzed via the KNOSSOS-SyConn plugin at '
# '`SyConn/scripts/kplugin/syconn_knossos_viewer.py`.')
# os.system(f'syconn.server --working_dir={example_wd} --port=10001')
......@@ -7,36 +7,53 @@
import os
import time
import re
import numpy as np
import networkx as nx
import shutil
from multiprocessing import Process
from syconn.handler.basics import FileTimer
from syconn.proc.stats import FileTimer
from syconn.handler.config import generate_default_conf, initialize_logging
from syconn import global_params
from syconn.mp.batchjob_utils import batchjob_enabled
from syconn.mp.batchjob_utils import batchjob_enabled, nodestates_slurm
from syconn.exec import exec_init, exec_syns, exec_render, exec_dense_prediction, exec_inference, exec_skeleton
if __name__ == '__main__':
# ----------------- DEFAULT WORKING DIRECTORY ---------------------
test_point_models = True
test_view_models = True
assert test_point_models or test_view_models
experiment_name = 'j0251'
scale = np.array([10, 10, 25])
node_state = next(iter(nodestates_slurm().values()))
ncores_per_node = node_state['cpus']
mem_per_node = node_state['memory']
ngpus_per_node = 2 # node_state currently does not contain the number of gpus for 'gres' resource
number_of_nodes = 24
shape_j0251 = np.array([27119, 27350, 15494])
cube_size = (np.array([2048, 2048, 1024]) * 7).astype(np.int) # do *9 afterwards for ~3 TVx, *11 for 5.7 and
# all for 10 TVx
cube_offset = ((shape_j0251 - cube_size) // 2).astype(np.int)
cube_of_interest_bb = np.array([cube_offset, cube_offset + cube_size], dtype=np.int)
# cube_of_interest_bb = None # process the entire cube!
# check that cluster is configured accordingly
assert number_of_nodes == np.sum([v['state'] == 'idle' for v in nodestates_slurm().values()])
prior_glia_removal = True
use_point_models = True
key_val_pairs_conf = [
('glia', {'prior_glia_removal': prior_glia_removal, 'min_cc_size_ssv': 5000}), # in nm
('pyopengl_platform', 'egl'),
('batch_proc_system', 'SLURM'),
('ncores_per_node', 32),
('ncores_per_node', ncores_per_node),
('ngpus_per_node', 2),
('nnodes_total', 4),
('mem_per_node', 208990),
('use_point_models', False),
('nnodes_total', number_of_nodes),
('mem_per_node', mem_per_node),
('use_point_models', use_point_models),
('skeleton', {'use_kimimaro': True}),
('meshes', {'use_new_meshing': True}),
('views', {'use_onthefly_views': True,
'use_new_renderings_locs': True,
('views', {'use_new_renderings_locs': True,
'view_properties': {'nb_views': 3}
}),
('cell_objects',
......@@ -48,9 +65,12 @@ if __name__ == '__main__':
'sj': ['binary_opening', 'binary_closing', 'binary_erosion'],
'vc': ['binary_opening', 'binary_closing', 'binary_erosion']}
}
)
),
('cube_of_interest_bb', cube_of_interest_bb.tolist())
]
chunk_size = (384, 384, 192)
chunk_size = (512, 512, 256)
if cube_size[0] <= 2048:
chunk_size = (256, 256, 256)
n_folders_fs = 10000
n_folders_fs_sc = 10000
......@@ -77,14 +97,11 @@ if __name__ == '__main__':
# Prepare data
# --------------------------------------------------------------------------
# Setup working directory and logging
shape_j0251 = np.array([27119, 27350, 15494])
cube_size = np.array([2048, 2048, 1024]) * 4
cube_offset = (shape_j0251 - cube_size) // 2
cube_of_interest_bb = (cube_offset, cube_offset + cube_size)
# cube_of_interest_bb = None # process the entire cube!
working_dir = f"/mnt/example_runs/j0251_off{'_'.join(map(str, cube_offset))}_size{'_'.join(map(str, cube_size))}"
working_dir = f"/glusterfs/example_runs/j0251_off{'_'.join(map(str, cube_offset))}_size" \
f"{'_'.join(map(str, cube_size))}_{number_of_nodes}nodes"
log = initialize_logging(experiment_name, log_dir=working_dir + '/logs/')
ftimer = FileTimer(working_dir + '/.timing.pkl')
shutil.copy(os.path.abspath(__file__), f'{working_dir}/logs/')
log.info('Step 0/10 - Preparation')
ftimer.start('Preparation')
......@@ -101,7 +118,6 @@ if __name__ == '__main__':
if global_params.wd is not None:
log.critical('Example run started. Original working directory defined in `global_params.py` '
'is overwritten and set to "{}".'.format(working_dir))
generate_default_conf(working_dir, scale, syntype_avail=syntype_avail, kd_seg=seg_kd_path, kd_mi=mi_kd_path,
kd_vc=vc_kd_path, kd_sj=sj_kd_path, kd_sym=kd_sym_path, kd_asym=kd_asym_path,
key_value_pairs=key_val_pairs_conf, force_overwrite=True)
......@@ -124,50 +140,63 @@ if __name__ == '__main__':
'working directory "{}".'.format(mpath, working_dir))
ftimer.stop()
# Start SyConn
# --------------------------------------------------------------------------
log.info('Finished example cube initialization (shape: {}). Starting'
' SyConn pipeline.'.format(cube_size))
log.info('Example data will be processed in "{}".'.format(working_dir))
log.info('Step 1/10 - Predicting sub-cellular structures')
# ftimer.start('Myelin prediction')
# # myelin is not needed before `run_create_neuron_ssd`
# exec_dense_prediction.predict_myelin(raw_kd_path, cube_of_interest=cube_of_interest_bb)
# # Start SyConn
# # --------------------------------------------------------------------------
# log.info('Finished example cube initialization (shape: {}). Starting'
# ' SyConn pipeline.'.format(cube_size))
# log.info('Example data will be processed in "{}".'.format(working_dir))
# #
# # log.info('Step 1/10 - Predicting sub-cellular structures')
# # ftimer.start('Myelin prediction')
# # # myelin is not needed before `run_create_neuron_ssd`
# # exec_dense_prediction.predict_myelin(raw_kd_path, cube_of_interest=cube_of_interest_bb)
# # ftimer.stop()
#
# log.info('Step 2/10 - Creating SegmentationDatasets (incl. SV meshes)')
# ftimer.start('SD generation')
# exec_init.init_cell_subcell_sds(chunk_size=chunk_size, n_folders_fs_sc=n_folders_fs_sc,
# n_folders_fs=n_folders_fs, cube_of_interest_bb=cube_of_interest_bb,
# load_cellorganelles_from_kd_overlaycubes=True,
# transf_func_kd_overlay=cellorganelle_transf_funcs)
#
# # generate flattened RAG
# from syconn.reps.segmentation import SegmentationDataset
# sd = SegmentationDataset(obj_type="sv", working_dir=global_params.config.working_dir)
# rag_sub_g = nx.Graph()
# # add SV IDs to graph via self-edges
# mesh_bb = sd.load_cached_data('mesh_bb') # N, 2, 3
# mesh_bb = np.linalg.norm(mesh_bb[:, 1] - mesh_bb[:, 0], axis=1)
# filtered_ids = sd.ids[mesh_bb > global_params.config['glia']['min_cc_size_ssv']]
# rag_sub_g.add_edges_from([[el, el] for el in sd.ids])
# log.info('{} SVs were added to the RAG after applying the size '
# 'filter.'.format(len(filtered_ids)))
# nx.write_edgelist(rag_sub_g, global_params.config.init_rag_path)
#
# exec_init.run_create_rag()
# ftimer.stop()
log.info('Step 2/10 - Creating SegmentationDatasets (incl. SV meshes)')
ftimer.start('SD generation')
exec_init.init_cell_subcell_sds(chunk_size=chunk_size, n_folders_fs_sc=n_folders_fs_sc,
n_folders_fs=n_folders_fs, cube_of_interest_bb=cube_of_interest_bb,
load_cellorganelles_from_kd_overlaycubes=True,
transf_func_kd_overlay=cellorganelle_transf_funcs,
max_n_jobs=global_params.config.ncore_total * 4)
# generate flattened RAG
from syconn.reps.segmentation import SegmentationDataset
sd = SegmentationDataset(obj_type="sv", working_dir=global_params.config.working_dir)
rag_sub_g = nx.Graph()
# add SV IDs to graph via self-edges
mesh_bb = sd.load_cached_data('mesh_bb') # N, 2, 3
mesh_bb = np.linalg.norm(mesh_bb[:, 1] - mesh_bb[:, 0], axis=1)
filtered_ids = sd.ids[mesh_bb > global_params.config['glia']['min_cc_size_ssv']]
rag_sub_g.add_edges_from([[el, el] for el in sd.ids])
log.info('{} SVs were added to the RAG after application of the size '
'filter.'.format(len(filtered_ids)))
nx.write_edgelist(rag_sub_g, global_params.config.init_rag_path)
exec_init.run_create_rag()
ftimer.stop()
log.info('Step 3/10 - Glia separation')
if global_params.config.prior_glia_removal:
ftimer.start('Glia separation')
if not global_params.config.use_point_models:
if test_view_models:
global_params.config['use_point_models'] = False
global_params.config.write_config()
time.sleep(10) # wait for changes to apply
ftimer.start('Glia prediction (multi-views)')
# if not global_params.config.use_point_models:
exec_render.run_glia_rendering()
exec_inference.run_glia_prediction()
else:
ftimer.stop()
# else:
if test_point_models:
global_params.config['use_point_models'] = True
global_params.config.write_config()
time.sleep(10) # wait for changes to apply
ftimer.start('Glia prediction (points)')
exec_inference.run_glia_prediction_pts()
ftimer.stop()
ftimer.start('Glia splitting')
exec_inference.run_glia_splitting()
ftimer.stop()
......@@ -197,9 +226,11 @@ if __name__ == '__main__':
ftimer.stop()
# skeleton and synapse generation and rendering have independent dependencies and target storage
assert global_params.config.use_onthefly_views, ('"use_onthefly_views" must be True to enable parallel '
'execution of skel and syn generation')
procs = []
for func in [start_syn_gen, start_skel_gen, start_neuron_rendering]:
if not batchjob_enabled():
if 1: # not batchjob_enabled(): # do not use parallel processing for timings
func()
continue
p = Process(target=func)
......@@ -214,34 +245,75 @@ if __name__ == '__main__':
p.close()
log.info('Step 7/10 - Compartment prediction')
ftimer.start('Compartment predictions')
exec_inference.run_semsegaxoness_prediction()
if not global_params.config.use_point_models:
if test_view_models:
global_params.config['use_point_models'] = False
global_params.config.write_config()
time.sleep(10) # wait for changes to apply
ftimer.start('Compartment predictions (multi-views)')
exec_inference.run_semsegaxoness_prediction()
exec_inference.run_semsegspiness_prediction()
ftimer.stop()
ftimer.stop()
# if not global_params.config.use_point_models:
if test_point_models:
ftimer.start('Compartment predictions (points)')
global_params.config['use_point_models'] = True
global_params.config.write_config()
exec_inference.run_semsegaxoness_prediction()
ftimer.stop()
# TODO: this step can be launched in parallel with the morphology extraction!
ftimer.start('Spine head calculation')
exec_syns.run_spinehead_volume_calc()
ftimer.stop()
log.info('Step 8/10 - Morphology extraction')
ftimer.start('Morphology extraction')
exec_inference.run_morphology_embedding()
ftimer.stop()
if test_view_models:
global_params.config['use_point_models'] = False
global_params.config.write_config()
time.sleep(10) # wait for changes to apply
ftimer.start('Morphology extraction (multi-views)')
exec_inference.run_morphology_embedding()
ftimer.stop()
if test_point_models:
global_params.config['use_point_models'] = True
global_params.config.write_config()
time.sleep(10) # wait for changes to apply
ftimer.start('Morphology extraction (points)')
exec_inference.run_morphology_embedding()
ftimer.stop()
log.info('Step 9/10 - Celltype analysis')
ftimer.start('Celltype analysis')
exec_inference.run_celltype_prediction()
ftimer.stop()
if test_view_models:
global_params.config['use_point_models'] = False
global_params.config.write_config()
time.sleep(10) # wait for changes to apply
ftimer.start('Celltype analysis (multi-views)')
exec_inference.run_celltype_prediction()
ftimer.stop()
if test_point_models:
global_params.config['use_point_models'] = True
global_params.config.write_config()
time.sleep(10) # wait for changes to apply
ftimer.start('Celltype analysis (points)')
exec_inference.run_celltype_prediction()
ftimer.stop()
log.info('Step 10/10 - Matrix export')
ftimer.start('Matrix export')
exec_syns.run_matrix_export()
ftimer.stop()
time_summary_str = ftimer.prepare_report(experiment_name)
time_summary_str = ftimer.prepare_report()
log.info(time_summary_str)
log.info('Setting up flask server for inspection. Annotated cell reconstructions and wiring can be analyzed via '
'the KNOSSOS-SyConn plugin at `SyConn/scripts/kplugin/syconn_knossos_viewer.py`.')
os.system(f'syconn.server --working_dir={working_dir} --port=10001')
# remove unimportant stuff for timings
import glob, tqdm
if test_view_models:
for fname in tqdm.tqdm(glob.glob(working_dir + '/sv_0/so_storage*/*'), desc='SVs'):
shutil.rmtree(fname)
tmp_del_dir = f'{working_dir}/DEL_{cube_size[0]}_cube/'
os.makedirs(tmp_del_dir)
for d in tqdm.tqdm(['models', 'vc_0', 'sj_0', 'syn_ssv_0', 'syn_0', 'ssv_0', 'mi_0', 'cs_0',
'knossosdatasets', 'SLURM', 'tmp', 'chunkdatasets', 'ssv_gliaremoval'], desc='Folders'):
shutil.move(f'{working_dir}/{d}', tmp_del_dir)
shutil.move(tmp_del_dir, f'{working_dir}/../')
from syconn.proc.stats import FileTimer
from syconn.handler.config import initialize_logging
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
from collections import defaultdict
import os
import glob
# https://www.statsmodels.org/stable/generated/statsmodels.regression.linear_model.OLS.html
import statsmodels.api as sm
palette_ident = 'colorblind'
def get_speed_plots():
sns.set_style("ticks", {"xtick.major.size": 20, "ytick.major.size": 20})
wds = glob.glob('/mnt/example_runs/j0251_*')
base_dir = '/mnt/example_runs/timings/'
log = initialize_logging(f'speed_plots', log_dir=base_dir)
os.makedirs(base_dir, exist_ok=True)
res_dc = {'time': [], 'step': [], 'datasize[mm3]': [], 'datasize[GVx]': [],
'speed[mm3]': [], 'speed[GVx]': []}
for wd in sorted(wds, key=lambda x: FileTimer(x).dataset_mm3):
ft = FileTimer(wd, add_detail_vols=True)
log.info(f'\n-----------------------------------\nLoading time data of "{ft.working_dir}"')
log.info(f'{ft.prepare_report()}')
# no reasonable volume information for these steps:
for name in ['Preparation', 'Matrix export', 'Spine head calculation', 'Glia splitting']:
del ft.timings[name]
for name, dt in ft.timings.items():
dt = dt / 3600
res_dc['time'].append(dt)
res_dc['step'].append(name)
res_dc['datasize[mm3]'].append(ft.dataset_mm3['cube'])
res_dc['datasize[GVx]'].append(ft.dataset_nvoxels['cube'])
# use actually processed volums (e.g. all for synapses, glia-free rag for cell type inference)
if 'glia' in name.lower():
vol_mm3 = ft.dataset_mm3['neuron'] + ft.dataset_mm3['glia']
vol_nvox = ft.dataset_nvoxels['neuron'] + ft.dataset_nvoxels['glia']
elif name in ['SD generation', 'Synapse detection', 'Skeleton generation']:
vol_mm3 = ft.dataset_mm3['cube']
vol_nvox = ft.dataset_nvoxels['cube']
else:
vol_mm3 = ft.dataset_mm3['neuron']
vol_nvox = ft.dataset_nvoxels['neuron']
res_dc['speed[mm3]'].append(vol_mm3 / dt)
res_dc['speed[GVx]'].append(vol_nvox / dt)
assert len(wds) > 0
palette = sns.color_palette(n_colors=len(np.unique(res_dc['step'])), palette=palette_ident)
palette = {k: v for k, v in zip(np.unique(res_dc['step']), palette)}
df = pd.DataFrame(data=res_dc)
df.to_csv(f'{base_dir}/speed_data.csv')
fmt = '{:0.2f}'
# Speed bar plot
plt.figure()
axes = sns.barplot(data=df, x="datasize[GVx]", y="speed[GVx]", hue="step", palette=palette)
axes.legend(*axes.get_legend_handles_labels(), bbox_to_anchor=(1.05, 1),
loc='upper left', borderaxespad=0.)
axes.set_ylabel('speed [GVx / h]')
axes.set_xlabel('size [GVx]')
xticklabels = []
for item in axes.get_xticklabels():
item.set_text(fmt.format(float(item.get_text())))
xticklabels += [item]
axes.set_xticklabels(xticklabels)
plt.subplots_adjust(right=0.5)
plt.savefig(base_dir + '/speed_barplot.png')
plt.close()
# Speed scatter plot regression
log_reg = initialize_logging(f'speed_pointplot_reg', log_dir=base_dir)
plt.figure()
axes = sns.scatterplot(data=df, x="datasize[GVx]", y="speed[GVx]", hue="step", palette=palette)
for ii, step in enumerate(np.unique(res_dc['step'])):
x = np.array([df['datasize[GVx]'][ii] for ii in range(len(df['datasize[GVx]'])) if df['step'][ii] == step])
y = [df['speed[GVx]'][ii] for ii in range(len(df['datasize[GVx]'])) if df['step'][ii] == step]
# mod = sm.OLS(np.log(y), sm.add_constant(x), weights=np.sqrt(y)) # weight large y s.t. they are equally
# # weighted to small values. discrepancy due to log() transform.
# res = mod.fit()
# log.info(res.summary())
# x_fit = np.linspace(np.min(x), np.max(x), 1000)
# y_fit = np.exp(res.params[1] * x_fit + res.params[0])
mod = sm.OLS(y, sm.add_constant(x))
res = mod.fit()
log_reg.info(f'Fit summary for step "{step}"')
log_reg.info(res<