From 759e8d2caa2e614cc40ac041ae4e34f6cc814e3d Mon Sep 17 00:00:00 2001 From: Markus Scheidgen <markus.scheidgen@gmail.com> Date: Sat, 5 Oct 2019 14:46:42 +0200 Subject: [PATCH] Allow to document definition properties in class doc strings. --- nomad/metainfo/__init__.py | 12 +++ nomad/metainfo/example.py | 9 +- nomad/metainfo/metainfo.py | 212 +++++++++++-------------------------- requirements.txt | 1 + 4 files changed, 84 insertions(+), 150 deletions(-) diff --git a/nomad/metainfo/__init__.py b/nomad/metainfo/__init__.py index 452341ebde..680b1a8f25 100644 --- a/nomad/metainfo/__init__.py +++ b/nomad/metainfo/__init__.py @@ -177,6 +177,18 @@ Categories .. autoclass:: Quantity +In the old meta-info this was known as `abstract types`. + +Categories are defined with Python classes that have :class:`MCategory` as base class. +Their name and description is taken from the class's name and docstring. An example +category looks like this: + +.. code-block:: python + + class CategoryName(MCategory): + ''' Category description ''' + m_def = Category(links=['http://further.explanation.eu'], categories=[ParentCategory]) + Packages -------- diff --git a/nomad/metainfo/example.py b/nomad/metainfo/example.py index 7f527be5e6..009557b621 100644 --- a/nomad/metainfo/example.py +++ b/nomad/metainfo/example.py @@ -13,7 +13,14 @@ class SystemHash(MCategory): class Parsing(MSection): - """ All data that describes the NOMAD parsing of this run. """ + """ All data that describes the NOMAD parsing of this run. + + Quantities can also be documented like this: + + Args: + parser_name: 'Name of the used parser' + parser_version: 'Version of the used parser' + """ parser_name = Quantity(type=str) parser_version = Quantity(type=str) diff --git a/nomad/metainfo/metainfo.py b/nomad/metainfo/metainfo.py index ae20771c7d..c9575ac9c2 100644 --- a/nomad/metainfo/metainfo.py +++ b/nomad/metainfo/metainfo.py @@ -20,13 +20,13 @@ import inspect import re import json import itertools - import numpy as np from pint.unit import _Unit from pint import UnitRegistry import aniso8601 from datetime import datetime import pytz +import docstring_parser m_package: 'Package' = None @@ -561,10 +561,8 @@ class MSection(metaclass=MObjectMeta): m_def = Section() setattr(cls, 'm_def', m_def) - # transfer name and description to m_def + # transfer name m_def m_def.name = cls.__name__ - if cls.__doc__ is not None: - m_def.description = inspect.cleandoc(cls.__doc__).strip() m_def.section_cls = cls # add base sections @@ -649,6 +647,37 @@ class MSection(metaclass=MObjectMeta): pkg = Package.from_module(module_name) pkg.m_add_sub_section(Package.section_definitions, cls.m_def) + # apply_google_docstrings + # Parses the google doc string of the given class and properly updates the + # definition descriptions. + + # This allows to document quantities and sub-sections with 'Args:' in the section + # class. It will remove the 'Args' section from the section definition and will + # set the respective pieces to the quantity and sub-section descriptions. + docstring = cls.__doc__ + if docstring is not None: + parsed_docstring = docstring_parser.parse(docstring) + short = parsed_docstring.short_description + dsc = parsed_docstring.long_description + + if short and dsc: + description = '%s %s' % (short.strip(), dsc.strip()) + elif short: + description = short.strip() + elif dsc: + description = dsc.strip() + else: + description = None + + if m_def.description is None: + m_def.description = description + + for param in parsed_docstring.params: + prop = m_def.all_properties.get(param.arg_name) + if prop is not None: + if prop.description is None: + prop.description = param.description + def __check_np(self, quantity_ref: 'Quantity', value: np.ndarray) -> np.ndarray: # TODO return value @@ -1159,7 +1188,7 @@ class Definition(MSection): some common attributes. These are defined in a common base: all metainfo items extend this common base and inherit from ``Definition``. - Attributes: + Args: name: Each `definition` has a name. Names have to be valid Python identifier. They can contain letters, numbers and _, but must not start with a number. This also qualifies them as identifier in most storage formats, databases, @@ -1240,7 +1269,7 @@ class Quantity(Property): Beyond basic :class:`Definition` attributes, Quantities are defined with the following attributes. - Attributes: + Args: type: Defines the datatype of quantity values. This is the type of individual elements in a potentially complex shape. If you define a list of integers for example, @@ -1394,7 +1423,7 @@ class SubSection(Property): and sub-section the property. This allows to use on child `section definition` as sub-section of many different parent `section definitions`. - Attributes: + Args: sub_section: A :class:`Section` or Python class object for a `section class`. This will be the child `section definition`. The defining section the child `section definition`. @@ -1431,7 +1460,7 @@ class Section(Definition): Section definitions determine what quantities and sub-sections can appear in a following section instance. - Attributes: + Args: quantities: The quantities definitions of this section definition as list of :class:`Quantity`. Will be automatically set from the `section class`. @@ -1557,7 +1586,7 @@ class Package(Definition): Besides the regular :class:`Defintion` attributes, packages can have the following attributes: - Attributes: + Args: section_definitions: All `section definitions` in this package as :class:`Section` objects. @@ -1591,21 +1620,9 @@ class Category(Definition): Categories therefore form a hierarchy of concepts that definitions can belong to, i.e. they form a `is a` relationship. - In the old meta-info this was known as `abstract types`. - - Categories are defined with Python classes that have :class:`MCategory` as base class. - Their name and description is taken from the class's name and docstring. An example - category looks like this: - - .. codeblock:: python - - class CategoryName(MCategory): - ''' Category description ''' - m_def = Category(links=['http://further.explanation.eu'], categories=[ParentCategory]) - - Attributes: + Args: definitions: A helper property that gives all definitions that are directly or - indirectly in this category. + indirectly in this category. """ def __init__(self, *args, **kwargs): @@ -1625,145 +1642,42 @@ SubSection.m_def = Section(name='SubSection') Category.m_def = Section(name='Category') Package.m_def = Section(name='Package') -Definition.name = DirectQuantity( - type=str, name='name', description=''' - The name of the quantity. Must be unique within a section. - ''') -Definition.description = Quantity( - type=str, name='description', description=''' - An optional human readable description. - ''') -Definition.links = Quantity( - type=str, shape=['0..*'], name='links', description=''' - A list of URLs to external resource that describe this definition. - ''') +Definition.name = DirectQuantity(type=str, name='name') +Definition.description = Quantity(type=str, name='description') +Definition.links = Quantity(type=str, shape=['0..*'], name='links') Definition.categories = Quantity( - type=Reference(Category.m_def), shape=['0..*'], default=[], name='categories', - description=''' - The categories that this definition belongs to. See :class:`Category`. - ''') + type=Reference(Category.m_def), shape=['0..*'], default=[], name='categories') Section.quantities = SubSection( - sub_section=Quantity.m_def, name='quantities', repeats=True, - description='''The quantities of this section.''') + sub_section=Quantity.m_def, name='quantities', repeats=True) Section.sub_sections = SubSection( - sub_section=SubSection.m_def, name='sub_sections', repeats=True, - description='''The sub sections of this section.''') + sub_section=SubSection.m_def, name='sub_sections', repeats=True) Section.base_sections = Quantity( - type=Reference(Section.m_def), shape=['0..*'], default=[], name='base_sections', - description=''' - Inherit all quantity and sub section definitions from the given sections. - Will be derived from Python base classes. - ''') -Section.extends_base_section = Quantity( - type=bool, default=False, name='extends_base_section', - description=''' - If True, the quantity definitions of this section will be added to the base section. - Only one base section is allowed. - ''') -Section.constraints = Quantity( - type=str, shape=['0..*'], name='constraints', description=''' - Constraints are rules that a section must fulfil to be valid. This allows to implement - semantic checks that goes behind mere type or shape checks. This quantity takes - the names of constraints. Constraints have to be implemented as methods of the - section definition class. These constraints functions must be named ``c_<constraint name>``` - and have no additional parameters. They can raise :class:`ConstraintVialated` or - an AssertionError to indicate that the constraint is not fulfilled for the ``self`` - section. This quantity will be set automatically from all ``c_`` methods in the - respective section class. - ''') + type=Reference(Section.m_def), shape=['0..*'], default=[], name='base_sections') +Section.extends_base_section = Quantity(type=bool, default=False, name='extends_base_section') +Section.constraints = Quantity(type=str, shape=['0..*'], name='constraints') Section.event_handlers = Quantity( - type=Callable, shape=['0..*'], name='event_handlers', virtual=True, default=[], description=''' - Event handler are functions that get called when the section data is changed. - There are two types of events: ``set`` and ``add_sub_section``. The handler type - is determined by the handler (i.e. function) name: ``on_set`` and ``on_add_sub_section``. - The handler arguments correspond to ``m_set`` (section, quantity_def, value) and - ``m_add_sub_section``(section, sub_section_def, sub_section). Handler are called after - the respective action was performed. This quantity is automatically populated with - handler from the section classes methods. If there is a method ``on_set`` or - ``on_add_sub_section``, it will be added as handler. - ''') - -SubSection.repeats = Quantity( - type=bool, name='repeats', default=False, - description='''Wether this sub section can appear only once or multiple times. ''') - -SubSection.sub_section = Quantity( - type=Reference(Section.m_def), name='sub_section', description=''' - The section definition for the sub section. Only section instances of this definition - can be contained as sub sections. - ''') + type=Callable, shape=['0..*'], name='event_handlers', virtual=True, default=[]) + +SubSection.repeats = Quantity(type=bool, name='repeats', default=False) + +SubSection.sub_section = Quantity(type=Reference(Section.m_def), name='sub_section') Quantity.m_def.section_cls = Quantity -Quantity.type = DirectQuantity( - type=QuantityType, name='type', description=''' - The type of the quantity. - - Can be one of the following: - - - none to support any value - - a build-in primitive Python type, e.g. ``int``, ``str`` - - an instance of :class:`Enum`, e.g. ``Enum(['one', 'two', 'three']) - - a instance of Section, i.e. a section definition. This will define a reference - - a custom meta-info DataType - - a numpy dtype, - - If set to a dtype, this quantity will use a numpy array to store values. It will use - the given dtype. If not set, this quantity will use (nested) Python lists to store values. - If values are set to the property, they will be converted to the respective - representation. - - In the NOMAD CoE meta-info this was basically the ``dTypeStr``. - ''') -Quantity.shape = DirectQuantity( - type=Dimension, shape=['0..*'], name='shape', default=[], description=''' - The shape of the quantity that defines its dimensionality. - - A shape is a list, where each item defines a dimension. Each dimension can be: - - - an integer that defines the exact size of the dimension, e.g. ``[3]`` is the - shape of a spacial vector - - the name of an int typed quantity in the same section - - a range specification as string build from a lower bound (i.e. int number), - and an upper bound (int or ``*`` denoting arbitrary large), e.g. ``'0..*'``, ``'1..3'`` - ''') -Quantity.unit = Quantity( - type=Unit, name='unit', description=''' - The optional physics unit for this quantity. - - Units are given in `pint` units. Pint is a Python package that defines units and - their algebra. There is a default registry :data:`units` that you can use. - Example units are: ``units.m``, ``units.m / units.s ** 2``. - ''') -Quantity.default = DirectQuantity( - type=Any, default=None, name='default', description=''' - The default value for this quantity. - ''') -Quantity.synonym_for = DirectQuantity( - type=str, name='synonym_for', description=''' - With this set, the quantity will become a virtual quantity and its data is not stored - directly. Setting and getting quantity, will change the *synonym* quantity instead. Use - the name of the quantity as value. - ''') -Quantity.derived = DirectQuantity( - type=Callable, default=None, name='derived', virtual=True, description=''' - Derived quantities are computed from other quantities of the same section. The value - of derived needs to be a callable that takes the section and returns a value. - ''') -Quantity.virtual = DirectQuantity( - type=bool, default=False, name='virtual', description=''' - Virtual quantities exist in memory, but are not serialized. This is useful for - purely derived quantities, or in situations where serialization is not required. - ''') +Quantity.type = DirectQuantity(type=QuantityType, name='type') +Quantity.shape = DirectQuantity(type=Dimension, shape=['0..*'], name='shape', default=[]) +Quantity.unit = Quantity(type=Unit, name='unit') +Quantity.default = DirectQuantity(type=Any, default=None, name='default') +Quantity.synonym_for = DirectQuantity(type=str, name='synonym_for') +Quantity.derived = DirectQuantity(type=Callable, default=None, name='derived', virtual=True) +Quantity.virtual = DirectQuantity(type=bool, default=False, name='virtual') Package.section_definitions = SubSection( - sub_section=Section.m_def, name='section_definitions', repeats=True, - description=''' The sections defined in this package. ''') + sub_section=Section.m_def, name='section_definitions', repeats=True) Package.category_definitions = SubSection( - sub_section=Category.m_def, name='category_definitions', repeats=True, - description=''' The categories defined in this package. ''') + sub_section=Category.m_def, name='category_definitions', repeats=True) is_bootstrapping = False diff --git a/requirements.txt b/requirements.txt index ead4bcf0c3..f0b06f5460 100644 --- a/requirements.txt +++ b/requirements.txt @@ -49,6 +49,7 @@ tabulate cachetools zipfile37 inflection +docstring-parser # dev/ops related setuptools -- GitLab