From 2cfee989c6e5db99c6fefad7acf3c05f794a2f17 Mon Sep 17 00:00:00 2001
From: Philipp Schubert <p.schubert@stud.uni-heidelberg.de>
Date: Thu, 11 Oct 2018 15:21:14 +0200
Subject: [PATCH] updated docs II

---
 README.md                                    | 10 ++--
 docs/analysis_parts.md                       |  3 --
 docs/contact_site_classification.md          |  4 ++
 docs/doc.md                                  | 33 ++++++++-----
 docs/from_probmaps_to_objects.md             | 17 +++++--
 docs/glia_removal.md                         | 23 +++++++++
 docs/neuron_analysis.md                      | 15 ++++++
 docs/super_segmentation_datasets.md          | 13 +++--
 docs/views.md                                | 30 -----------
 syconn/config/global_params.py               |  7 ++-
 syconn/extraction/object_extraction_steps.py | 52 ++++++++++----------
 syconn/ui/__init__.py                        |  3 --
 12 files changed, 115 insertions(+), 95 deletions(-)
 delete mode 100644 docs/analysis_parts.md
 create mode 100644 docs/glia_removal.md
 create mode 100644 docs/neuron_analysis.md
 delete mode 100644 docs/views.md

diff --git a/README.md b/README.md
index 2b2770eb..84b32af1 100644
--- a/README.md
+++ b/README.md
@@ -9,10 +9,10 @@ Version 2 currently features:
 - [glia identification and splitting](https://www.biorxiv.org/content/early/2018/07/06/364034)
 - generation of connectivity matrix
 
-## System Requirements & Installation
+## System requirements & installation
 * Python 3.5
 * The whole pipeline was designed and tested on Linux systems (CentOS, Arch)
-* SyConn is based on the packages [elektronn](http://elektronn.org)_, [knossos-utils](https://github.com/knossos-project/knossos_utils)
+* SyConn is based on the packages [elektronn](http://elektronn.org), [knossos-utils](https://github.com/knossos-project/knossos_utils)
  is used for visualization and annotation of 3D EM data sets.
 * [VIGRA](https://ukoethe.github.io/vigra/), e.g. ``conda install -c ukoethe vigra``
 * osmesa, e.g.: ``conda install -c menpo osmesa``
@@ -24,11 +24,11 @@ You can install SyConn using  ``git`` and  ``pip``:
     pip install -r requirements.txt
     pip install .
 
-## Tutorials & Documentation
+## Tutorials & documentation
 
-For tutorials see [here](docs/doc.md).
+For the SyConn documentation see [here](docs/doc.md).
 
-To build the documentation run `make html` in the `docs` folder.
+To build the API documentation run `make html` in the `docs` folder.
 
 # The Team
 The Synaptic connectivity inference toolkit developed is developed at Max-Planck-Institute of Neurobiology, Munich.
diff --git a/docs/analysis_parts.md b/docs/analysis_parts.md
deleted file mode 100644
index f3f86b3e..00000000
--- a/docs/analysis_parts.md
+++ /dev/null
@@ -1,3 +0,0 @@
-# Analysis steps
-
-_in progress_
\ No newline at end of file
diff --git a/docs/contact_site_classification.md b/docs/contact_site_classification.md
index a0145917..f7efafe9 100644
--- a/docs/contact_site_classification.md
+++ b/docs/contact_site_classification.md
@@ -3,6 +3,9 @@
 Contact sites are the basis for synaptic classification. Therefore, contact sites need to be combined with the synapse `SegmentationObjects` and then classified as synaptic or not-synaptic using an Random Forest Classifier (RFC).
 The code is in `syconn.extraction.cs_processing_steps`, `syconn.proc.sd_proc` and `syconn.proc.ssd_proc`.
 
+## Synapse type [TODO: check where this information is actually needed!]
+Information of the [synapse type](synapse_type.md)  can be inferred from a trained CNN model and be used in the wiring diagram in a later state.
+
 ## Overlap mapping 
 
 Synapse `SegmentationObjects` are mapped to contact sites by volume overlap the same way `SegmentationObjects` are mapped to supervoxels. First, the aggreagted contact sites (see `contact_site_extraction`) need to be exported to a `knossosdataset`:
@@ -41,6 +44,7 @@ creates the ground truth for the RFC and also trains and stores the classifier.
 cps.classify_conn_objects(working_dir, qsub_pe=my_qsub_pe, n_max_co_processes=100)
 ```
 
+
 ## Collecting directionality information (axoness)
 
 `conn` `SegmentationObjects` can acquire information about the "axoness" of both partners around the synapse. This allows
diff --git a/docs/doc.md b/docs/doc.md
index 188bca84..71144a68 100644
--- a/docs/doc.md
+++ b/docs/doc.md
@@ -1,29 +1,36 @@
-# Tutorials
+# Documentation
 
-* [Working directory and config setup](config.md)
+## Package structure and data classes
+The basic data structures and initialization procedures are explained in the following sections:
 
-* [Data classes](segmentation_datasets.md) for handling super voxel data
+* SyConn operates with pre-defined [working directory and config files](config.md)
 
-* Data class to store and handle a set of agglomerated super voxels (SSVs) called SSD ([SuperSegmentation](super_segmentation_datasets.md))
+* Super voxels (and cellular organelles) are stored in the SegmentationObject data class ([SO](segmentation_datasets.md)), which are
+organized in [SegmentationDatasets](segmentation_datasets.md).
 
-* [Mapping cellular organelles](object_mapping.md) to SSVs
+* SyConn principally supports different [backends](backend.md) for data storage
 
-* Data class to store agglomerated super voxels [SSO](super_segmentation_objects.md)
+* Agglomerated super voxels (SVs) are implemented as SuperSegmentationObjects ([SSO](super_segmentation_objects.md)). The collection
+ of super-SVs are usually defined in a region adjacency graph (RAG) which is used to initialize the SuperSegmentationDataset
+  ([SSD](super_segmentation_datasets.md)).
 
-* [Skeletons](skeletons.md) of (super) super voxel
+* [Skeletons](skeletons.md) of (super) super voxels
 
-* [Mesh](meshes.md) generation
+* [Mesh](meshes.md) generation and representation of SOs
 
-* [Multi-view](views.md) approaches
+* [Multi-view](views.md) representation of SSOs
 
-* [Contact site extraction](contact_site_extraction.md)
 
-* [Identification of synapses](contact_site_classification.md)
+## Analysis steps
+After initialization of the SDs (SVs and cellular organelles) and SSD (the segmentation defined by agglomerated SVs) SyConn allows
+the application of several analysis procedures:
 
-* [Synapse type](synapse_type.md) prediction
+* [Optional] [Glia removal](glia_removal.md)
 
+* [Neuron analysis](neuron_analysis.md) such as cellular compartment, spine and cell type classification
 
-For more detailed descriptions of parts of the analysis pipeline see [here](analysis_parts.md).
+* [Contact site extraction](contact_site_extraction.md)
 
+* [Identification of synapses and extraction of a wiring diagram](contact_site_classification.md)
 
 
diff --git a/docs/from_probmaps_to_objects.md b/docs/from_probmaps_to_objects.md
index 6e518a35..8f812cf8 100644
--- a/docs/from_probmaps_to_objects.md
+++ b/docs/from_probmaps_to_objects.md
@@ -1,4 +1,4 @@
-# Creation of segmentation objects
+# Generation of SegmentationDataset
 
 Probability maps and segmentations are stored in `ChunkDatasets` (see `chunky.py` in `knossos_utils`) 
 and are transformed to `SegmentationDatasets` (see `segmentationdataset` in `syconn.reps`) in multiple steps. 
@@ -20,11 +20,18 @@ specific prediction within the `ChunkDataset` and the `hdf5names` in the file th
 The wrappers sequentially call specific functions from `object_extraction_steps.py`. Parallelism is only 
 possible within these steps. `from_ids_to_objects` starts at step 4.
 
-1. **Connected components** within each chunk are created for each chunk by applying a Gaussian smoothing  (optional) and threshold first (`gauss_threshold_connected_components(...)`).
+1. **Connected components** within each chunk (chunk-wise segmentation) are created for
+by applying a Gaussian smoothing (optional) and threshold first (see method `object_segmentation(...)`).
+Note that the default procedure with smoothing and a subsequent thresholding can be replaced by
+ passing a custom-method to the method via the kwargs `transform_func=None, func_kwargs=None`. The provided method has to
+ obey the function signature of `_gauss_threshold_connected_components_thread`.
 2. `make_unique_labels` reassignes globally **unique labels** to all segments
-3. `make_stitch_list` collects information of which segments in different chunks are in fact the same and `make_merge_list` resolves this to a global **mergelist** that is then applied by `apply_merge_list`.
-4. `extract_voxels` writes the voxels of each object to a **temporary voxel storage** (similar to the voxel store of a `SegmentationDataset`) and guarantees no write conflicts.
-5. In `combine_voxels` each worker then reads the voxels belonging to each object from the temporary voxel storage and writes them to their final location, essentially **creating a `SegmentationDataset`**.
+3. `make_stitch_list` collects information of which segments in different
+chunks are in fact the same and `make_merge_list` resolves this to a global **mergelist** that is then applied by `apply_merge_list`.
+4. `extract_voxels` writes the voxels of each object to a **temporary voxel storage** (
+similar to the voxel store of a `SegmentationDataset`) and guarantees no write conflicts.
+5. In `combine_voxels` each worker then reads the voxels belonging to each object from the
+temporary voxel storage and writes them to their final location, essentially **creating a `SegmentationDataset`**.
 
 Steps 4 and 5 are necessary to prevent two workers to write to the same `VoxelDict` (hence, to avoid having locks) . This would happen because an object extends 
 over multiple chunks or because the ids of two different objects are assigned to the same `VoxelDict`. it also allows to balancing the 
diff --git a/docs/glia_removal.md b/docs/glia_removal.md
new file mode 100644
index 00000000..b18a17dc
--- /dev/null
+++ b/docs/glia_removal.md
@@ -0,0 +1,23 @@
+# Glia removal
+All scripts used for the analysis of the neuron segmentation are located in `SyConn/scripts/multiviews_glia/`.
+
+## Prerequisites
+* KNOSSOS- and SegmentationDataset of the super voxel segmentation
+* Initial RAG/SV-mapping
+
+## Steps
+For generating the multi-views prior to glia removal run:
+`start_sso_rendering_glia_removal.py`
+It is necessary to provide the script with an initial RAG/SV-mapping.
+The definitions of the agglomerated SVs are used as context for the glia prediction.
+In order to start the glia prediction run:
+`glia_prediction.py`
+
+SVs with predicted glia labels will be removed via a splitting heuristic.
+For splitting and generating the glia-free region adjacency graph (RAG) run:
+`glia_splitting.py`
+
+## Creating new SuperSegmentationDataset
+Now create a new SSD, the post-glia-removal SSD, and run the analysis to
+ assign cell objects (mitochondria, vesicle clouds and synaptic junctions)
+ to all its SSVs (see [SSD section](super_segmentation_datasets.md))
diff --git a/docs/neuron_analysis.md b/docs/neuron_analysis.md
new file mode 100644
index 00000000..21b4a0af
--- /dev/null
+++ b/docs/neuron_analysis.md
@@ -0,0 +1,15 @@
+# Neuron analysis
+All scripts used for the analysis of the neuron segmentation are located in `SyConn/scripts/multiviews_neuron/`.
+
+## Prerequisites
+* [Optional] [Glia removal](glia_removal.md)
+* KNOSSOS- and SegmentationDataset of the super voxel segmentation
+* SegmentationDatasets for all cellular organelles (currently mitochondria, vesicle clouds and synaptic junctions)
+* Initial RAG/SV-mapping
+* [Mapped cellular organelles](object_mapping.md) to SSVs
+
+## Steps
+The multi-views which contain channels for cell objects and SSV outline
+ are the basis for predicting cell compartments, cell type and spines.
+To generate these views run:
+`start_sso_rendering.py`
diff --git a/docs/super_segmentation_datasets.md b/docs/super_segmentation_datasets.md
index 2d72b867..5145cccf 100644
--- a/docs/super_segmentation_datasets.md
+++ b/docs/super_segmentation_datasets.md
@@ -1,10 +1,11 @@
 # SuperSegmentation datasets
 
-`SuperSegmentationDatasets` (SSD) and `SuperSegmentationObjects` (SSO; see corresponding section) are implemented in `super_segmentation_object.py` and `super_segmentation_object` (`syconn.reps`). 
-It is accompanied by helper functions in `super_segmentation_helper.py` for basic functionality such as loading and storing and 
-`ssd_proc.py` and `ssd_proc.assembly` (`syconn.proc`) which contain processing methods. 
+`SuperSegmentationDatasets` (SSD) and `SuperSegmentationObjects` (SSO; see corresponding section)
+ are implemented in `super_segmentation_object.py` and `super_segmentation_object` (`syconn.reps`).
+It is accompanied by helper functions in `super_segmentation_helper.py` for basic functionality such as
+ loading and storing and `ssd_proc.py` and `ssd_proc.assembly` (`syconn.proc`) which contain processing methods.
 
-Typically, initializing the SSD happens after glia removal. 
+The first initializing of an SSD usually happens after glia removal.
 Please check the corresponding documentation to learn more about that.
 
 
@@ -12,11 +13,9 @@ Please check the corresponding documentation to learn more about that.
 
 In order to create a SuperSegmentationDataset from scratch one has to provide
 the agglomerated super voxel (SSV) defined as a dict (coming soon!; agglomeration_source; keys: SSV IDs and values: list of SVs) or stored as a
-KNOSSOS mergelist (text file; variable holding the path string: agglomeration_source) and parse it
+KNOSSOS mergelist (text file; variable holding the path string: agglomeration_source) and pass it
 to the constructor (kwarg: 'sv_mapping').
 
-
-
     ssd = ss.SuperSegmentationDataset(working_dir="/wholebrain/scratch/areaxfs3/",
                                       version="spgt", ssd_type="ssv",
                                       sv_mapping=agglomeration_source)
diff --git a/docs/views.md b/docs/views.md
deleted file mode 100644
index 128bb8a1..00000000
--- a/docs/views.md
+++ /dev/null
@@ -1,30 +0,0 @@
-# (Multi-)View models
-View-related QSUB scripts are located at `SyConn/scripts/glia/` and `SyConn/scripts/multi_views/`.
-
-## Glia removal
-_in progress_
-
-For generating the multi-views prior to glia removal run:
-`start_sso_rendering_glia_removal.py`
-
-In order to start the glia prediction run:
-`glia_prediction.py`
-
-For splitting and generating the glia-free region adjacency graph (RAG) run:
-`glia_splitting.py`
-
-## Creating new SuperSegmentationDataset
-Now create a new SSD, the post-glia-removal SSD, and run the analysis to
- assign cell objects (mitochondria, vesicle clouds and synaptic junctions)
- to all its SSVs #TODO: what exactly has to be called for that?
-
-## Cellular morphology learning neural networks
-Now we can extract the multi-views which contain channels for cell objects and
- are the basis for predicting cell compartments, cell type and spines (coming soon).
-
-Run:
-`start_sso_rendering.py`
-
-
-# Groundtruth generation
-TBD
\ No newline at end of file
diff --git a/syconn/config/global_params.py b/syconn/config/global_params.py
index b50bef0c..d8b46b3c 100644
--- a/syconn/config/global_params.py
+++ b/syconn/config/global_params.py
@@ -18,17 +18,16 @@ backend = "FS"
 min_cc_size_glia = 8e3  # in nm; L1-norm on vertex bounding box
 # min. connected component size of neuron nodes/SV after thresholding glia proba
 min_cc_size_neuron = 8e3  # in nm; L1-norm on vertex bounding box
-
 min_single_sv_size = 30000  # in number of voxels
+glia_thresh = 0.161489   # Threshold for glia classification
 
-# Threshold for glia classification
-glia_thresh = 0.161489  #
+# --------------------------------------------------------------- MESH PARAMETER
 
 MESH_DOWNSAMPLING = {"sv": (8, 8, 4), "sj": (2, 2, 1), "vc": (4, 4, 2),
                      "mi": (8, 8, 4), "cs": (2, 2, 1), "conn": (2, 2, 1)}
 MESH_CLOSING = {"sv": 0, "sj": 0, "vc": 0, "mi": 0, "cs": 0, "conn": 4}
 
-SKEL_FEATURE_CONTEXT = {"axoness": 8000, "spiness": 1000} # in nm
+SKEL_FEATURE_CONTEXT = {"axoness": 8000, "spiness": 1000}  # in nm
 
 DISABLE_FILE_LOGGING = True
 
diff --git a/syconn/extraction/object_extraction_steps.py b/syconn/extraction/object_extraction_steps.py
index 5b76794a..d1a0b614 100644
--- a/syconn/extraction/object_extraction_steps.py
+++ b/syconn/extraction/object_extraction_steps.py
@@ -33,24 +33,14 @@ def gauss_threshold_connected_components(*args, **kwargs):
     return object_segmentation(*args, **kwargs)
 
 
-def object_segmentation(cset, filename, hdf5names,
-                                         overlap="auto", sigmas=None,
-                                         thresholds=None,
-                                         chunk_list=None,
-                                         debug=False,
-                                         swapdata=False,
-                                         prob_kd_path_dict=None,
-                                         membrane_filename=None,
-                                         membrane_kd_path=None,
-                                         hdf5_name_membrane=None,
-                                         fast_load=False,
-                                         suffix="",
-                                         qsub_pe=None,
-                                         qsub_queue=None,
-                                         nb_cpus=1,
-                                         n_max_co_processes=100,
-                                         transform_func=None,
-                                         func_kwargs=None):
+def object_segmentation(cset, filename, hdf5names, overlap="auto", sigmas=None,
+                        thresholds=None, chunk_list=None, debug=False,
+                        swapdata=False, prob_kd_path_dict=None,
+                        membrane_filename=None, membrane_kd_path=None,
+                        hdf5_name_membrane=None, fast_load=False, suffix="",
+                        qsub_pe=None, qsub_queue=None, nb_cpus=1,
+                        n_max_co_processes=100, transform_func=None,
+                        transform_func_kwargs=None):
     """
     Extracts connected component from probability maps
     1. Gaussian filter (defined by sigma)
@@ -112,7 +102,7 @@ def object_segmentation(cset, filename, hdf5names,
         qsub queue
     transform_func: callable
         Segmentation method which is applied
-    func_kwargs : dict
+    transform_func_kwargs : dict
         key word arguments for transform_func
 
     Returns
@@ -151,10 +141,6 @@ def object_segmentation(cset, filename, hdf5names,
 
         overlap = np.ceil(max_sigma * 4) + stitch_overlap
 
-    # print("overlap:", overlap)
-
-    # print("thresholds:", thresholds)
-
     multi_params = []
     for nb_chunk in chunk_list:
         multi_params.append(
@@ -162,7 +148,7 @@ def object_segmentation(cset, filename, hdf5names,
              hdf5names, overlap,
              sigmas, thresholds, swapdata, prob_kd_path_dict,
              membrane_filename, membrane_kd_path,
-             hdf5_name_membrane, fast_load, suffix, func_kwargs])
+             hdf5_name_membrane, fast_load, suffix, transform_func_kwargs])
 
     if qsub_pe is None and qsub_queue is None:
         results = sm.start_multiprocess_imap(transform_func,
@@ -196,8 +182,24 @@ def object_segmentation(cset, filename, hdf5names,
 
 
 def _gauss_threshold_connected_components_thread(args):
-    """ Default worker of object_segmentation """
+    """
+    Default worker of object_segmentation. Performs a gaussian blur with
+     subsequent thresholding to extract connected components of a probability
+     map. Result summaries are returned and connected components are stored as
+     .h5 files.
+     TODO: Add generic '_segmentation_thread' to enable a clean support of
+     custom-made segmentation functions passed to 'object_segmentation' via
+     'transform_func'-kwargs
 
+    Parameters
+    ----------
+    args : list
+
+    Returns
+    -------
+    list of lists
+        Results of connected component analysis
+    """
     chunk = args[0]
     path_head_folder = args[1]
     filename = args[2]
diff --git a/syconn/ui/__init__.py b/syconn/ui/__init__.py
index 9c8739bc..e69de29b 100644
--- a/syconn/ui/__init__.py
+++ b/syconn/ui/__init__.py
@@ -1,3 +0,0 @@
-# SyConn
-# Copyright (c) 2016 Philipp J. Schubert
-# All rights reserved
\ No newline at end of file
-- 
GitLab