From 84c73643523209b62e6bd27018fa64cf4e7581c2 Mon Sep 17 00:00:00 2001 From: Markus Scheidgen <markus.scheidgen@gmail.com> Date: Fri, 16 Dec 2022 17:05:43 +0100 Subject: [PATCH] Added formal models for most annotations and updated the schema docs. --- docs/schema/basics.md | 19 +- docs/schema/elns.md | 23 +- docs/schema/suggestions.yaml | 127 ------- gui/src/metainfo.json | 9 +- nomad/cli/admin/run.py | 2 +- nomad/cli/dev.py | 315 ---------------- nomad/datamodel/metainfo/__init__.py | 2 + nomad/datamodel/metainfo/annotations.py | 387 ++++++++++++++++++++ nomad/datamodel/metainfo/eln/annotations.py | 84 ----- nomad/metainfo/metainfo.py | 96 +++-- nomad/metainfo/util.py | 39 +- nomad/mkdocs.py | 52 ++- nomad/parsing/tabular.py | 69 ++-- tests/datamodel/test_schema.py | 18 +- tests/metainfo/test_metainfo.py | 16 +- tests/metainfo/test_yaml_schema.py | 2 +- tests/parsing/test_tabular.py | 3 - 17 files changed, 570 insertions(+), 693 deletions(-) delete mode 100644 docs/schema/suggestions.yaml create mode 100644 nomad/datamodel/metainfo/annotations.py delete mode 100644 nomad/datamodel/metainfo/eln/annotations.py diff --git a/docs/schema/basics.md b/docs/schema/basics.md index 776866f075..3a5565808e 100644 --- a/docs/schema/basics.md +++ b/docs/schema/basics.md @@ -70,7 +70,24 @@ The *names* of *quantity definitions* serve as the *key*, used in respective *se ### Type This is a list of supported quantity types. -{{ get_schema_doc('type') }} +|type|description| +|-|-| +|`string`|| +|`str`|| +|`float`|| +|`integer`|| +|`int`|| +|`boolean`|| +|`bool`|| +|`np.int32`|Numpy based integer with 32 bits.| +|`np.int64`|Numpy based integer with 64 bits.| +|`np.float32`|Numpy based float with 32 bits.| +|`np.float64`|Numpy based float with 64 bits.| +|`Datetime`|| +|`User`|A type for NOMAD users as values.| +|`Author`|A complex type for author information.| +|`{type_kind: Enum, type_data: []}`|Use `type_data` to specify enum values as list of strings.| +|*<section name>*|To define a quantity that is a reference to a specific section.| ### Shape diff --git a/docs/schema/elns.md b/docs/schema/elns.md index f72abe92fb..d8e498fa19 100644 --- a/docs/schema/elns.md +++ b/docs/schema/elns.md @@ -38,17 +38,9 @@ NOMAD's upload page: --8<-- "examples/data/eln/schema.archive.yaml" ``` -## ELN Annotations -The `eln` annotations can contain the following keys: +{{ pydantic_model('nomad.datamodel.metainfo.annotations.ELNAnnotation', heading='## ELN Annotation') }} -{{ get_schema_doc('eln') }} - -The `eln` `component` can be one of the following components: - -{{ get_schema_doc('component') }} - -As part of the GUI, you'll find an overview about all -ELN edit annotations and components [here]({{ nomad_url() }}/../gui/dev/editquantity). +{{ pydantic_model('nomad.datamodel.metainfo.annotations.BrowserAnnotation', heading='## Browser Annotation') }} ## Tabular Annotations @@ -115,16 +107,13 @@ data are assumed to exist in the first sheet ```yaml --8<-- "examples/data/docs/tabular-parser-entry-mode.archive.yaml" ``` -Tabular annotation accepts the following keys: - -{{ get_schema_doc('tabular') }} -## Plot Annotations -Plot annotation is a wrapper for [plotly](https://plotly.com) library. One can use the following keys for plot annotation: +Here are all parameters for the two annotations `tabular_parser` and `tabular`. -{{ get_schema_doc('plot') }} +{{ pydantic_model('nomad.datamodel.metainfo.annotations.TabularParserAnnotation', heading='### tabular_parser') }} +{{ pydantic_model('nomad.datamodel.metainfo.annotations.TabularAnnotation', heading='### tabular') }} -which can be customized by using plotly commands. See [plot examples]({{ nomad_url() }}/../gui/dev/plot). +{{ pydantic_model('nomad.datamodel.metainfo.annotations.PlotAnnotation', heading='## Plot Annotation') }} ## Built-in base sections for ELNs diff --git a/docs/schema/suggestions.yaml b/docs/schema/suggestions.yaml deleted file mode 100644 index aa25e364b8..0000000000 --- a/docs/schema/suggestions.yaml +++ /dev/null @@ -1,127 +0,0 @@ -m_annotations: - eln: "ELN annotations" - tabular: "tabular annotations" - plot: "plot annotations" - -tabular: - name: "Either < column name > in `.csv` and `excel` or in the format of < sheet name >/< column name > only for `excel` files" - unit: "The unit to display the data" - comment: "A character denoting the commented lines in `excel` or `.csv` files" - sep: "In case of reading data from a `.csv` file, the separator annotation (e.g. `','` for comma or `'\\t'` for tab) can be specified here." - separator: "Aliases for `sep`." - mode: "Either `column` or `row`. Use only when setting `TableData` as a base-section. Defaults to `column`." - target_sub_section: "List of paths to the targeted repeating subsection < section >/< sub-sections >/ ... /< subsections >" - -eln: - component: "The name of ELN edit component" - hide: "list of hidden items" - overview: "with or without overview" - default: "The default value fo the component" - defaultDisplayUnit: "default display unit for numbers" - minValue: "minimum value of the numbers" - maxValue: "minimum value of the numbers" - dict(): "python dict" - -plot: - label: "plotly: label of the plot" - x: "plotly: x axis" - y: "plotly: y axis" - lines: "plotly: style of lines" - config: "plotly: plot config" - layout: "plotly: plot layout" - -component: - NumberEditQuantity: "a component: to edit quantity of type number" - StringEditQuantity: "a component to edit quantity of type string" - URLEditQuantity: "a component to edit quantity of type url" - EnumEditQuantity: "a component to edit quantity of type enumerate as a dropdown list. This component may be used for short enumerates." - RadioEnumEditQuantity: "a component to edit quantity of type enumerate as a list of radio buttons." - AutocompleteEditQuantity: "a component to edit quantity of type enumerate as an autocomplete dropdown list. This component may be used for longer enumerates." - BoolEditQuantity: "a component to edit quantity of type boolean" - FileEditQuantity: "a component to edit quantity of type file path" - DateTimeEditQuantity: "a component to edit quantity of type data and time" - RichTextEditQuantity: "a component to edit quantity of type rich text" - ReferenceEditQuantity: "a component to edit quantity of type reference" - UserEditQuantity: "a component to edit quantity of type nomad user" - AuthorEditQuantity: "a component to edit quantity of type author" - -type: - string: "Quantity Type: python string" - str: "Quantity Type: python string" - float: "Quantity Type: python float" - integer: "Quantity Type: python integer" - int: "Quantity Type: python integer" - boolean: "Quantity Type: python boolean" - bool: "Quantity Type: python boolean" - np.int32: "Quantity Type: numpy 32 bits integer" - np.int64: "Quantity Type: numpy 64 bits integer" - np.float32: "Quantity Type: numpy 32 bits float" - np.float64: "Quantity Type: numpy 64 bits float" - Datetime: "Quantity Type: Date and time" - User: "Quantity Type: Nomad User" - Author: "Quantity Type: Author" - '{type_kind: Enum, type_data: []}': "Quantity Type: Enumerate" - '< section name >': "Quantity Type: reference" - type_kind: "Quantity Type: Nomad Type kind" - type_data: "Quantity Type: Nomad Type data" - log: "Plotly Type: logarithmic plot" - scatter: "Plotly Type: scatter plot" - -type_kind: - Enum: "Type kind: Enumerate" - -type_data: - '[]': "Type data: Array of data" - -repeats: - true: "The section is an array" - false: "The section is not an array" - -overview: - true: "with overview" - false: "with no overview" - -lines: - mode: "plotly: line mode" - line: "plotly: line settings" - marker: "plotly: marker settings" - -mode: - markers: "plotly: plot by markers" - lines: "plotly: plot by line" - -line: - color: "plotly: color of line" - -marker: - color: "plotly: color of marker" - -layout: - xaxis: "plotly: x-axis layout settings" - yaxis: "plotly: y-axis layout settings" - -xaxis: - title: "plotly: x-axis title" - type: "plotly: x-axis type" - autorange: "plotly: auto range x-axis" - -yaxis: - title: "plotly: y-axis title" - type: "plotly: y-axis type" - autorange: "plotly: auto range y-axis" - -config: - editable: "plotly config: editable plot" - scrollZoom: "plotly config: zoom scrollable plot" - -editable: - true: "plotly: editable plot" - false: "plotly: non-editable plot" - -scrollZoom: - true: "plotly: zoom by scroll button" - false: "plotly: do not zoom by scroll button" - -autorange: - true: "plotly: autorange axis" - false: "plotly: not autorange axis" diff --git a/gui/src/metainfo.json b/gui/src/metainfo.json index 2d5a0236cf..553d65890e 100644 --- a/gui/src/metainfo.json +++ b/gui/src/metainfo.json @@ -51044,8 +51044,8 @@ "m_annotations": { "eln": [ { - "label": "Process atmosphere", - "component": "RadioEnumEditQuantity" + "component": "RadioEnumEditQuantity", + "label": "Process atmosphere" } ] }, @@ -51906,8 +51906,8 @@ "m_annotations": { "eln": [ { - "label": "Creation date and time of the layer.", - "component": "DateTimeEditQuantity" + "component": "DateTimeEditQuantity", + "label": "Creation date and time of the layer." } ] }, @@ -336547,7 +336547,6 @@ "/packages/57/section_definitions/2" ], "constraints": [ - "annotations_are_valid", "dimensions", "has_type", "higher_shapes_require_dtype" diff --git a/nomad/cli/admin/run.py b/nomad/cli/admin/run.py index 1f0b3bc551..1768b0d3c3 100644 --- a/nomad/cli/admin/run.py +++ b/nomad/cli/admin/run.py @@ -39,7 +39,7 @@ def worker(): @run.command(help='Run the nomad development app with all apis.') @click.option('--debug', help='Does run app in debug.', is_flag=True) -def app(debug: bool, with_chaos: int): +def app(debug: bool): run_app(debug=debug) diff --git a/nomad/cli/dev.py b/nomad/cli/dev.py index 91cb2c8845..9595267821 100644 --- a/nomad/cli/dev.py +++ b/nomad/cli/dev.py @@ -621,318 +621,3 @@ def _generate_units(all_metainfo): @dev.command(help='Creates a Javascript source file containing the required unit conversion factors.') def units(): print(_generate_units(_all_metainfo_packages())) - - -@dev.command(help='Generate vscode extension for nomad schema.') -@click.option('--output', '-o', type=str, help='Output path for extension.') -def vscode_extension(output: str): - import shutil - import yaml - - extension_path = os.path.normpath(output) + "/nomad-vscode" if output else "./nomad-vscode" - snippets_path = os.path.join(extension_path, "snippets") - syntaxes_path = os.path.join(extension_path, "syntaxes") - if os.path.exists(extension_path): - shutil.rmtree(extension_path) - os.makedirs(extension_path) - os.makedirs(snippets_path) - os.makedirs(syntaxes_path) - - schemaYaml = os.path.abspath(os.path.join(os.path.dirname(__file__), '../../docs/schema/suggestions.yaml')) - metainfoJson = os.path.abspath(os.path.join(os.path.dirname(__file__), '../../gui/src/metainfo.json')) - outputJson = os.path.join(snippets_path, "./nomad.code-snippets") - outputLanguage = os.path.join(syntaxes_path, "./nomad.tmlanguage.json") - output_config = os.path.join(extension_path, "./language-configuration.json") - output_package = os.path.join(extension_path, "./package.json") - - shutil.copyfile(os.path.join(os.path.dirname(__file__), '../../LICENSE'), os.path.join(extension_path, "./LICENSE")) - - with open(schemaYaml, "r") as yaml_file: - try: - schema = yaml.safe_load(yaml_file) - except yaml.YAMLError as exc: - print(exc) - - with open(metainfoJson, "r") as json_file: - try: - metainfo = json.load(json_file) - except json.JSONDecodeError as exc: - print(exc) - - nomadLanguage = { - "$schema": "https://raw.githubusercontent.com/martinring/tmlanguage/master/tmlanguage.json", - "name": "Nomad", - "patterns": [{"include": "#comments"}, - {"include": "#strings"}, - {"include": "#keywords"}, - {"include": "#metainfos"}, - {"include": "#suggestions"}, - {"include": "#keys"} - ], - "repository": { - "comments": { - "patterns": [ - { - "name": "comment.line.number-sign.nomad", - "match": "#.*" - } - ] - }, - "strings": { - "patterns": [ - { - "name": "string.quoted.double.nomad", - "match": "\".*\"" - }, - { - "name": "string.quoted.double.nomad", - "match": "'.*'" - } - ] - }, - "metainfos": { - "patterns": [ - { - "name": "keyword.control.nomad", - "match": "nomad..*" - } - ] - }, - "suggestions": { - "patterns": [ - { - "name": "invalid.illegal.nomad", - "match": "<.*>" - } - ] - }, - "keys": { - "patterns": [ - { - "name": "entity.name.type.nomad", - "match": "(?!.*[#<>].*).*:" - }, - { - "include": "#strings" - } - ] - }, - "keywords": {} # it would be added automatically - }, - "scopeName": "source.schema.archive.yaml" - } - - snippets = { - "Nomad definitions": { - "scope": "nomad", - "prefix": "definitions:", - "body": [ - "definitions:", - " name: '${1:schema name}'", - " sections:" - ], - "description": "Nomad schema definitions" - }, - "Nomad sections": { - "scope": "nomad", - "prefix": "sections:", - "body": [ - "sections:", - " ${1:<section name>}:", - " base_section: ${2:nomad.datamodel.data.EntryData}", - " quantities:" - ], - "description": "Nomad schema sections" - }, - "Nomad quantities": { - "scope": "nomad", - "prefix": "quantities:", - "body": [ - "quantities:", - " ${1:<quantity name>}:", - " type:$2", - " shape:$3", - " description:$4", - " m_annotations:" - ], - "description": "Nomad schema quantities" - }, - "Nomad m_annotations": { - "scope": "nomad", - "prefix": "m_annotations:", - "body": [ - "m_annotations:", - " template:", - " eln:$1" - ], - "description": "Nomad schema m_annotations" - }, - "Nomad eln": { - "scope": "nomad", - "prefix": "eln:", - "body": [ - "eln:", - " component:$1", - " hide: ${2:[]}" - ], - "description": "Nomad schema eln" - }, - "Nomad base_section": { - "scope": "nomad", - "prefix": "base_section:", - "body": [ - "base_section:", - " - '${1:nomad.datamodel.data.EntryData}'", - "quantities:", - "sub_sections:" - ], - "description": "Nomad schema base_section" - }, - "Nomad sub_sections": { - "scope": "nomad", - "prefix": "sub_sections:", - "body": [ - "sub_sections:", - " ${1:<section name>}:", - " section:" - ], - "description": "Nomad schema sub_sections" - }, - "Nomad section": { - "scope": "nomad", - "prefix": "section:", - "body": [ - "section:", - " base_sections:", - " m_annotations:" - ], - "description": "Nomad schema section" - }, - "Nomad components": { - "scope": "nomad", - "prefix": "components:", - "body": [ - "components:", - " repeats: {1:false}", - " m_annotations:", - " section:" - ], - "description": "Nomad schema components" - } - } - - keywords = [] - for key, value in schema.items(): - if key in ['m_annotations', 'tabular', 'eln', 'plot', 'lines', - 'line', 'marker', 'layout', 'xaxis', 'yaxis', 'config']: - snippets["Nomad {}".format(key)] = { - 'scope': "nomad", - 'prefix': "{}:".format(key), - 'body': ["{}:".format(key)] + [(" {}" if key == 'eln' and k == 'dict()' else " {}:").format(k) for k - in schema[key]], - 'description': value - } - else: - for item, description in schema[key].items(): - if key == 'type' and (item == 'type_kind' or item == 'type_data'): - snippets["Nomad {} {}: ".format(key, item)] = { - 'scope': "nomad", - 'prefix': "{}:".format(key), - 'body': ["{}:".format(key)] + [" {}:".format(item)], - 'description': description - } - else: - snippets["Nomad {} {}".format(key, item)] = { - 'scope': "nomad", - 'prefix': "{}: {}".format(key, item), - 'body': [ - "{}: {}".format(key, item) - ], - 'description': description - } - if not ('{' in str(item) or '[' in str(item) or '}' in str(item) or ']' in str(item)): - keywords.append(str(item)) - - allPackages = [] - for package in metainfo['packages']: - name = package['name'] - allPackages.append(name) - if 'section_definitions' in package: - for section in package['section_definitions']: - section_name = '{}.{}'.format(name, section['name']) - allPackages.append(section_name) - - for package in list(set(allPackages)): - snippets["Section {}".format(package)] = { - 'scope': "nomad", - 'prefix': package, - 'body': [ - package - ], - 'description': 'Nomad package {}'.format(package) - } - keywords.append(package) - - keywords = list(set(keywords)) - keywords.sort(reverse=True) - patterns = [] - for keyword in keywords: - pattern = { - "name": "constant.language.nomad", - "match": "\\b{}\\b".format(keyword) - } - patterns.append(pattern) - nomadLanguage['repository']['keywords']['patterns'] = patterns # type: ignore - - with open(outputJson, 'w') as f: - json.dump(snippets, f, indent=4) - - with open(outputLanguage, 'w') as f: - json.dump(nomadLanguage, f, indent=4) - - package_contents = { - "name": "nomad", - "displayName": "Nomad schema", - "description": "Nomad schema language support for vscode.", - "version": "0.0.1", - "engines": { - "vscode": "^1.63.0" - }, - "repository": { - "private": "true" - }, - "categories": [ - "Programming Languages" - ], - "contributes": { - "languages": [{ - "id": "nomad", - "aliases": ["Nomad", "nomad"], - "extensions": [".schema.archive.yaml"], - "configuration": "./language-configuration.json" - }], - "grammars": [{ - "language": "nomad", - "scopeName": "source.schema.archive.yaml", - "path": "./syntaxes/nomad.tmlanguage.json" - }], - "snippets": [{ - "language": "nomad", - "path": "./snippets/nomad.code-snippets" - }] - } - } - - config_contents = { - "brackets": [["{", "}"], ["[", "]"]], - "autoClosingPairs": [["{", "}"], ["[", "]"], ["'", "'"], ["\"", "\""]], - "surroundingPairs": [["{", "}"], ["[", "]"]] - } - - with open(output_package, 'w') as f: - json.dump(package_contents, f, indent=4) - - with open(output_config, 'w') as f: - json.dump(config_contents, f, indent=4) - - return 0 diff --git a/nomad/datamodel/metainfo/__init__.py b/nomad/datamodel/metainfo/__init__.py index d961a64e7b..aee7616465 100644 --- a/nomad/datamodel/metainfo/__init__.py +++ b/nomad/datamodel/metainfo/__init__.py @@ -15,6 +15,8 @@ # See the License for the specific language governing permissions and # limitations under the License. # + +from . import annotations # Should be imported first to register the annotations before they are used from .simulation import m_env from .eln.material_library import m_package from .eln.perovskite_solar_cell_database import m_package diff --git a/nomad/datamodel/metainfo/annotations.py b/nomad/datamodel/metainfo/annotations.py new file mode 100644 index 0000000000..6c46720342 --- /dev/null +++ b/nomad/datamodel/metainfo/annotations.py @@ -0,0 +1,387 @@ +# +# Copyright The NOMAD Authors. +# +# This file is part of NOMAD. See https://nomad-lab.eu for further info. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from typing import List, Any, Union, Dict +from enum import Enum +from pydantic import Field, validator + +from nomad.metainfo import AnnotationModel, MEnum, MTypes, Datetime, Reference, Quantity + + +class ELNComponentEnum(str, Enum): + StringEditQuantity = 'StringEditQuantity' + URLEditQuantity = 'URLEditQuantity' + EnumEditQuantity = 'EnumEditQuantity' + RadioEnumEditQuantity = 'RadioEnumEditQuantity' + AutocompleteEditQuantity = 'AutocompleteEditQuantity' + FileEditQuantity = 'FileEditQuantity' + BoolEditQuantity = 'BoolEditQuantity' + NumberEditQuantity = 'NumberEditQuantity' + SliderEditQuantity = 'SliderEditQuantity' + DateTimeEditQuantity = 'DateTimeEditQuantity' + RichTextEditQuantity = 'RichTextEditQuantity' + ReferenceEditQuantity = 'ReferenceEditQuantity' + UserEditQuantity = 'UserEditQuantity' + AuthorEditQuantity = 'AuthorEditQuantity' + + +valid_eln_types = { + 'str': ['str'], + 'bool': ['bool'], + 'number': [x.__name__ for x in MTypes.num_python] + [f'np.{x.__name__}' for x in MTypes.num_numpy], # type: ignore + 'datetime': ['Datetime'], + 'enum': ['{type_kind: Enum, type_data: [Operator, Responsible_person]}'], + 'user': ['User'], + 'author': ['Author'], + 'reference': [''] +} + + +valid_eln_components = { + 'str': [ + ELNComponentEnum.StringEditQuantity, + ELNComponentEnum.FileEditQuantity, + ELNComponentEnum.RichTextEditQuantity, + ELNComponentEnum.EnumEditQuantity], + 'bool': [ + ELNComponentEnum.BoolEditQuantity], + 'number': [ + ELNComponentEnum.NumberEditQuantity, + ELNComponentEnum.SliderEditQuantity], + 'datetime': [ + ELNComponentEnum.DateTimeEditQuantity], + 'enum': [ + ELNComponentEnum.EnumEditQuantity, + ELNComponentEnum.AutocompleteEditQuantity, + ELNComponentEnum.RadioEnumEditQuantity], + 'user': [ + ELNComponentEnum.AuthorEditQuantity], + 'author': [ + ELNComponentEnum.AuthorEditQuantity], + 'reference': [ + ELNComponentEnum.ReferenceEditQuantity] +} + + +class ELNAnnotation(AnnotationModel): + ''' + These annotations control how data can be entered and edited. + Use the key `eln` to add this annotations. For example: + + ```python + class Sample(EntryData): + sample_id = Quantity(type=str, a_eln=dict(component='StringEditQuantity'))`) + ``` + + or in YAML schemas: + ```yaml + Sample: + quantities: + sample_id: + type: str + m_annotations: + eln: + component: StringEditQuantity + ``` + + An `eln` annotation can be added to *section* and *quantity* definitions to different + effects. In both cases, it controls how sections and quantities are represented in the GUI + with different parameters; see below. + + The UI gives an overview about all ELN edit annotations and components + [here]({{ nomad_url() }}/../gui/dev/editquantity). + ''' + + component: ELNComponentEnum = Field(None, description=''' + The form field component that is used to make the annotated quantity editable. + If no component is given, the quantity won't be editable. This can be used on quantities only. + + The supported values are: + + `StringEditQuantity`: For editing simple short string values.<br/> + `URLEditQuantity`: For editing strings that are validated to be URLs.<br/> + `EnumEditQuantity`: For Editing enum values. Uses a dropdown list with enum values. This component may be used for short enumerates.<br/> + `RadioEnumEditQuantity`: For Editing enum values. Uses radio buttons.<br/> + `AutocompleteEditQuantity`: For editing enum values. Uses an autocomplete form with dropdown list. This component may be used for longer enumerates.<br/> + `FileEditQuantity`: For editing a reference to a file. Will allow to choose a file or upload a file.<br/> + `BoolEditQuantity`: For editing boolean choices.<br/> + `NumberEditQuantity`: For editing numbers with our without unit.<br/> + `SliderEditQuantity`: For editing numbers with a horizontal slider widget.<br/> + `DateTimeEditQuantity`: For editing datetimes.<br/> + `RichTextEditQuantity`: For editing long styled text with a rich text editor.<br/> + `ReferenceEditQuantity`: For editing references to other sections.<br/> + `UserEditQuantity`: For entering user information. Lets you choose a nomad user or enter information manually.<br/> + `AuthorEditQuantity`: For entering author information manually. + ''') + + label: str = Field(None, description='Custom label for the quantity shown on the form field.') + + props: Dict[str, Any] = Field(None, description=''' + A dictionary with additional props that are passed to the editcomponent. + ''') + + default: Any = Field(None, description=''' + Prefills any set form field component with the given value. This is different + from the quantities `default` property. The quantities default is not stored + in the data; the default value is assumed if no other value is given. The + ELN form field default value will be stored, even if not changed. + ''') + defaultDisplayUnit: str = Field(None, description=''' + Allows to define a default unit to initialize a `NumberEditQuantity` with. The + unit has to be compatible with the unit of the annotation quantity and the annotated + quantity must have a unit. Only applies to quantities and with + `component=NumberEditQuantity`. + ''') + + minValue: Union[int, float] = Field(None, description=''' + Allows to specify a minimum value for quantity annotations with number type. + Will show an error, if outside numbers are entered. Only works on quantities and + in conjunction with `component=NumberEditQuantity`. + ''') + maxValue: Union[int, float] = Field(None, description=''' + Allows to specify a maximum value for quantity annotations with number type. + Will show an error, if outside numbers are entered. Only works on quantities and + in conjunction with `component=NumberEditQuantity`. + ''') + + hide: List[str] = Field(None, description=''' + Allows you to hide certain quantities from a section editor. Give a list + of quantity names. Quantities must exist in the section that this annotation + is added to. Can only be used in section annotations. + ''') + + overview: bool = Field(None, description=''' + Shows the annotation section on the entry's overview page. Can only be used on + section annotations.''') + + lane_width: Union[str, int] = Field(None, description=''' + Value to overwrite the css width of the lane used to render the annotation + section and its editor. + ''') + + class Config: + validate_assignment = True + + @validator('m_definition') + def validate_component(cls, definition, values): # pylint: disable=no-self-argument + if not definition: + return definition + + def assert_component(component, quantity_name, quantity_type, accepted_components): + assert component in accepted_components, ( + f'The component {component} is not compatible with the quantity ' + f'{quantity_name} of the type {quantity_type}. ' + f'Accepted components: {", ".join(accepted_components)}.') + + component = values.get('component') + if not component: + return definition + + assert isinstance(definition, Quantity), 'Only quantities can be eln annotated with a component.' + quantity = definition + type_ = quantity.type + name = quantity.name + + if isinstance(type_, type): + if type_.__name__ == 'str': + assert_component(component, name, type_.__name__, valid_eln_components['str']) + elif type_.__name__ == 'bool': + assert_component(component, name, type_.__name__, valid_eln_components['bool']) + elif type_ in MTypes.num_python: + assert_component(component, name, type_.__name__, valid_eln_components['number']) + elif type_ in MTypes.num_numpy: + assert_component(component, name, f'np.{type_.__name__}', valid_eln_components['number']) + elif type_.__name__ == 'User': + assert_component(component, name, type_.__name__, valid_eln_components['user']) + elif type_.__name__ == 'Author': + assert_component(component, name, type_.__name__, valid_eln_components['author']) + + elif type_ == Datetime: + assert_component(component, name, type(type_).__name__, valid_eln_components['datetime']) + + elif isinstance(type_, MEnum): + assert_component(component, name, type(type_).__name__, valid_eln_components['enum']) + + elif isinstance(type_, Reference): + target_class = type_.target_section_def.section_cls + if target_class.__name__ == 'User': + assert_component(component, name, target_class.__name__, valid_eln_components['user']) + elif target_class.__name__ == 'Author': + assert_component(component, name, target_class.__name__, valid_eln_components['author']) + else: + assert_component(component, name, type(type_).__name__, valid_eln_components['reference']) + + return definition + + +class BrowserAdaptors(str, Enum): + RawFileAdaptor = 'RawFileAdaptor' + + +class BrowserRenderValues(str, Enum): + JsonValue = 'JsonValue' + HtmlValue = 'HtmlValue' + + +class BrowserAnnotation(AnnotationModel): + ''' + The `browser` annotation allows to specify if the processed data browser needs to + display a quantity differently. It can be applied to quantities. For example + + ```python + class Experiment(EntryData): + description = Quantity(type=str, a_browser=dict(render_value='HtmlValue')) + ``` + + or in yaml + + ```yaml + Experiment: + quantities: + description: + type: str + m_annotations: + browser: + render_value: HtmlValue + ``` + ''' + + adaptor: BrowserAdaptors = Field(None, description=''' + Allows to change the *Adaptor* implementation that is used to render the + lane for this quantity. Possible values are: + + `RawFileAdaptor`: An adopter that is used to show files, including all file + actions, like file preview. + ''') + render_value: BrowserRenderValues = Field(None, description=''' + Allows to change the *Component* used to render the value of the quantity. + Possible values are: + + `HtmlValue`: Renders a string as HTML.<br/> + `JsonValue`: Renders a dict or list in a collapsable tree. + ''') + + +class TabularMode(str, Enum): + row = 'row' + column = 'column' + + +class TabularParserAnnotation(AnnotationModel): + ''' + Instructs NOMAD to treat a string valued scalar quantity as a file path and + interprets the contents of this file as tabular data. Supports both + `.csv` and Excel files. + ''' + + comment: str = Field(None, description=''' + The character denoting the commented lines in `.csv` files. This is passed to + pandas to parse the file. Has to be used to annotate the quantity that + holds the path to the `.csv` or excel file. + ''') + sep: str = Field(None, description=''' + The character used to separate cells in a `.csv` file. This is passed to + pandas to parse the file. Has to be used to annotate the quantity that + holds the path to the `.csv` or excel file. + ''') + skiprows: int = Field(None, description=''' + Number of `.csv` file rows that are skipped. This is passed to + pandas to parse the file. Has to be used to annotate the quantity that + holds the path to the `.csv` or excel file. + ''') + separator: str = Field(None, description='An alias for `sep`') + mode: TabularMode = Field(TabularMode.column, description=''' + Either `column` or `row`. With `column` the whole column is mapped into a quantity + (needs to be a list). + With `row` each row (and its cells) are mapped into instances of a repeating + sub section, where each section represents a row (quantities need to be scalars). + Has to be used to annotate the quantity that + holds the path to the `.csv` or excel file. + ''') + target_sub_section: List[str] = Field([], description=''' + A lists of paths to sub-sections of the annotation quantity's section. Each path is a + `/` separated list of nested sub-sections. The targeted sub-sections, will be + considered when mapping table columns to quantities. + Has to be used to annotate the quantity that + holds the path to the `.csv` or excel file. + ''') + + +class TabularAnnotation(AnnotationModel): + ''' + Allows to map a quantity to a row of a tabular data-file. Should only be used + in conjunction with `tabular_parser`. + ''' + + name: str = Field(None, description=''' + The column name that should be mapped to the annotation quantity. Has to be + the same string that is used in the header, i.e. first `.csv` line or first excel file `row`. + For excel files with multiple sheets, the name can have the form `<sheet name>/<column name>`. + Otherwise, only the first sheets is used. Has to be applied to the + quantity that a column should be mapped to. + ''') + unit: str = Field(None, description=''' + The unit of the value in the file. Has to be compatible with the annotated quantity's + unit. Will be used to automatically convert the value. If this is not defined, + the values will not be converted. Has to be applied to the + quantity that a column should be mapped to. + ''') + + +class PlotAnnotation(AnnotationModel): + ''' + This annotation can be used to add a plot to a section or quantity. Example: + ```python + class Evaporation(MSection): + m_def = Section(a_plot={ + label: 'Temperature and Pressure', + x: 'process_time', + y: ['./substrate_temperature', './chamber_pressure'], + config: { + editable: true, + scrollZoom: false + } + }) + time = Quantity(type=float, shape=['*'], unit='s') + substrate_temperature = Quantity(type=float, shape=['*'], unit='K') + chamber_pressure = Quantity(type=float, shape=['*'], unit='Pa') + ``` + ''' + + label: str = Field(None, description='Is passed to plotly to define the label of the plot.') + x: str = Field(None, description=''' + A path to the quantity that holds the x-axis value. The path is a `/` separated + list of sub-section and quantity names that leads from the annotation section + to the quantity. + ''') + y: Union[List[str], str] = Field(None, description=''' + A path or list of paths to the y-axes values. Multiple paths will lead to a + plot with multiple line. Each path is a `/` separated + list of sub-section and quantity names that leads from the annotation section + to the quantity. + ''') + lines: List[dict] = Field(None, description='Is passed to plotly to configure the lines of the plot.') + layout: dict = Field(None, description='Is passed to plotly as `layout`.') + config: dict = Field(None, description='Is passed to plotly as `config`.') + + +AnnotationModel.m_registry['eln'] = ELNAnnotation +AnnotationModel.m_registry['browser'] = BrowserAnnotation +AnnotationModel.m_registry['tabular_parser'] = TabularParserAnnotation +AnnotationModel.m_registry['tabular'] = TabularAnnotation +AnnotationModel.m_registry['plot'] = PlotAnnotation diff --git a/nomad/datamodel/metainfo/eln/annotations.py b/nomad/datamodel/metainfo/eln/annotations.py deleted file mode 100644 index 08b0bc1b07..0000000000 --- a/nomad/datamodel/metainfo/eln/annotations.py +++ /dev/null @@ -1,84 +0,0 @@ -# -# Copyright The NOMAD Authors. -# -# This file is part of NOMAD. See https://nomad-lab.eu for further info. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -from nomad.metainfo import MTypes, Datetime, MEnum, Reference - - -validElnTypes = { - 'str': ['str'], - 'bool': ['bool'], - 'number': [x.__name__ for x in MTypes.num_python] + [f'np.{x.__name__}' for x in MTypes.num_numpy], # type: ignore - 'datetime': ['Datetime'], - 'enum': ['{type_kind: Enum, type_data: [Operator, Responsible_person]}'], - 'user': ['User'], - 'author': ['Author'], - 'reference': [''] -} - -validElnComponents = { - 'str': ['StringEditQuantity', 'FileEditQuantity', 'RichTextEditQuantity', 'EnumEditQuantity'], - 'bool': ['BoolEditQuantity'], - 'number': ['NumberEditQuantity', 'SliderEditQuantity'], - 'datetime': ['DateTimeEditQuantity'], - 'enum': ['EnumEditQuantity', 'AutocompleteEditQuantity', 'RadioEnumEditQuantity'], - 'user': ['AuthorEditQuantity'], - 'author': ['AuthorEditQuantity'], - 'reference': ['ReferenceEditQuantity'] -} - - -def validate_eln_quantity_annotations(quantity): - def assert_component(component_name, quantity_name, quantity_type, accepted_components): - assert component_name in accepted_components, ( - f'The component {component_name} is not compatible with the quantity ' - f'{quantity_name} of the type {quantity_type}. ' - f'Accepted components: {", ".join(accepted_components)}.') - - if 'eln' not in quantity.m_annotations: - return - - component = quantity.m_annotations['eln'].get('component', False) - assert component, 'Quantity ELN annotation need to define a component' - - mtype = quantity.type - name = quantity.name - if isinstance(mtype, type): - if mtype.__name__ == 'str': - assert_component(component, name, mtype.__name__, validElnComponents['str']) - elif mtype.__name__ == 'bool': - assert_component(component, name, mtype.__name__, validElnComponents['bool']) - elif mtype in MTypes.num_python: - assert_component(component, name, mtype.__name__, validElnComponents['number']) - elif mtype in MTypes.num_numpy: - assert_component(component, name, f'np.{mtype.__name__}', validElnComponents['number']) - elif mtype.__name__ == 'User': - assert_component(component, name, mtype.__name__, validElnComponents['user']) - elif mtype.__name__ == 'Author': - assert_component(component, name, mtype.__name__, validElnComponents['author']) - elif mtype == Datetime: - assert_component(component, name, type(mtype).__name__, validElnComponents['datetime']) - elif isinstance(mtype, MEnum): - assert_component(component, name, type(mtype).__name__, validElnComponents['enum']) - elif isinstance(mtype, Reference): - target_class = mtype.target_section_def.section_cls - if target_class.__name__ == 'User': - assert_component(component, name, target_class.__name__, validElnComponents['user']) - elif target_class.__name__ == 'Author': - assert_component(component, name, target_class.__name__, validElnComponents['author']) - else: - assert_component(component, name, type(mtype).__name__, validElnComponents['reference']) diff --git a/nomad/metainfo/metainfo.py b/nomad/metainfo/metainfo.py index 930891e5fc..2d0c1ab6b4 100644 --- a/nomad/metainfo/metainfo.py +++ b/nomad/metainfo/metainfo.py @@ -25,10 +25,9 @@ import re import sys from collections.abc import Iterable as IterableABC from functools import reduce -from pydantic import parse_obj_as +from pydantic import parse_obj_as, ValidationError, BaseModel, Field from typing import ( - Any, Callable as TypingCallable, Dict, Iterable, List, Optional, Set, Tuple, Type, TypeVar, Union, cast) - + Any, Callable as TypingCallable, Dict, Iterable, List, Optional, Set, Tuple, Type, TypeVar, Union, cast, ClassVar) import docstring_parser import jmespath import numpy as np @@ -40,8 +39,7 @@ from nomad.metainfo.util import ( Annotation, DefinitionAnnotation, MEnum, MQuantity, MRegEx, MSubSectionList, MTypes, ReferenceURL, SectionAnnotation, _delta_symbols, check_dimensionality, check_unit, convert_to, default_hash, dict_to_named_list, normalize_complex, normalize_datetime, resolve_variadic_name, retrieve_attribute, serialize_complex, - split_python_definition, to_dict, to_numpy, to_section_def, validate_shape, validate_url, - AnnotationModel) + split_python_definition, to_dict, to_numpy, to_section_def, validate_shape, validate_url) from nomad.units import ureg as units # todo: remove magic comment after upgrading pylint @@ -1093,7 +1091,7 @@ class MSection(metaclass=MObjectMeta): # TODO find a way to make this a subclas for name, annotation in section_annotation.new(self).items(): self.m_annotations[name] = annotation - self.m_validate_annotations() + self.m_parse_annotations() # set remaining kwargs if is_bootstrapping: @@ -1324,17 +1322,36 @@ class MSection(metaclass=MObjectMeta): # TODO find a way to make this a subclas return value - def m_validate_annotations(self): + def m_parse_annotations(self): for annotation_name, annotation in self.m_annotations.items(): annotation_model = AnnotationModel.m_registry.get(annotation_name) if not annotation_model: continue - if isinstance(annotation, list): - for index, item in enumerate(annotation): - annotation[index] = parse_obj_as(annotation_model, item) - else: - annotation = parse_obj_as(annotation_model, annotation) + def to_model(annotation_model, annotation_data): + if annotation_data is None: + return None + + if isinstance(annotation_data, AnnotationModel): + annotation = annotation_data + else: + annotation = parse_obj_as(annotation_model, annotation_data) + if isinstance(self, Definition): + annotation.m_definition = self + return annotation + + try: + if isinstance(annotation, list): + for index, item in enumerate(annotation): + annotation[index] = to_model(annotation_model, item) + else: + annotation = to_model(annotation_model, annotation) + except ValidationError as e: + # TODO use the error/warning system that constraints are using at least for schemas + from nomad.utils import get_logger + get_logger(__name__).error( + 'could not validate an annotation', annotation='annotation_name', exc_info=e) + raise e self.m_annotations[annotation_name] = annotation @@ -2291,7 +2308,7 @@ class MSection(metaclass=MObjectMeta): # TODO find a way to make this a subclas raise MetainfoError( f'The provided m_annotations is of a wrong type. {type(m_annotations).__name__} was provided.') section.m_annotations.update(m_annotations) - section.m_validate_annotations() + section.m_parse_annotations() section.m_update_from_dict(dct) return section @@ -2579,6 +2596,21 @@ class MSection(metaclass=MObjectMeta): # TODO find a way to make this a subclas if not validate_shape(self, quantity, self.m_get(quantity)): errors.append(f'The shape of quantity {quantity} does not match its value.') + for annotation in self.m_annotations.values(): + def validate_annotation(annotation): + if isinstance(annotation, AnnotationModel): + try: + # Trigger model validation by re-assigning the definition + annotation.m_definition = self + except ValidationError as e: + errors.append(f'Annotation validation error for {self}: {str(e)}') + + if isinstance(annotation, list): + for item in annotation: + validate_annotation(item) + else: + validate_annotation(annotation) + return errors, warnings def m_copy( @@ -3268,14 +3300,6 @@ class Quantity(Property): assert self.type in MTypes.numpy, \ f'Higher dimensional quantities ({self}) need a dtype and will be treated as numpy arrays.' - @constraint - def annotations_are_valid(self): - # TODO this should be replaced with a proper mechanism for defining and - # validating annotation types - if 'eln' in self.m_annotations: - from nomad.datamodel.metainfo.eln.annotations import validate_eln_quantity_annotations - validate_eln_quantity_annotations(self) - def _hash_seed(self) -> str: ''' Generate a unique representation for this quantity. @@ -4374,3 +4398,33 @@ class Environment(MSection): raise KeyError(f'Could not uniquely identify {name}, candidates are {defs}') raise KeyError(f'Could not resolve {name}') + + +class AnnotationModel(Annotation, BaseModel): + ''' + Base class for defining annotation models. Annotations used with simple dict-based + values, can be validated by defining and registering a formal pydantic-based + model. + ''' + + m_definition: Definition = Field( + None, description='The definition that this annotation is annotating.') + + m_registry: ClassVar[Dict[str, Type['AnnotationModel']]] = {} + ''' A static member that holds all currently known annotations with pydantic model. ''' + + def m_to_dict(self, *args, **kwargs): + return self.dict(exclude_unset=True) + + class Config: + fields = { + 'm_definition': { + 'exclude': True, + } + } + + validate_assignment = True + arbitrary_types_allowed = True + + +AnnotationModel.update_forward_refs() diff --git a/nomad/metainfo/util.py b/nomad/metainfo/util.py index cb5ad0b070..237bb758f3 100644 --- a/nomad/metainfo/util.py +++ b/nomad/metainfo/util.py @@ -23,9 +23,8 @@ from dataclasses import dataclass from datetime import date, datetime from difflib import SequenceMatcher from functools import reduce -from typing import Any, Dict, Optional, Sequence, Tuple, Union, Type, ClassVar +from typing import Any, Dict, Optional, Sequence, Tuple, Union from urllib.parse import SplitResult, urlsplit, urlunsplit -from pydantic import BaseModel import aniso8601 import numpy as np @@ -193,28 +192,6 @@ class MTypes: bool_numpy = {np.bool_} numpy = num_numpy | str_numpy | bool_numpy - eln = { - 'str': ['str', 'string'], - 'bool': ['bool', 'boolean'], - 'number': [x.__name__ for x in num_python] + [f'np.{x.__name__}' for x in num_numpy], # type: ignore - 'datetime': ['Datetime'], - 'enum': ['{type_kind: Enum, type_data: [Operator, Responsible_person]}'], - 'user': ['User'], - 'author': ['Author'], - 'reference': [''] - } - - eln_component = { - 'str': ['StringEditQuantity', 'FileEditQuantity', 'RichTextEditQuantity', 'EnumEditQuantity'], - 'bool': ['BoolEditQuantity'], - 'number': ['NumberEditQuantity', 'SliderEditQuantity'], - 'datetime': ['DateTimeEditQuantity'], - 'enum': ['EnumEditQuantity', 'AutocompleteEditQuantity', 'RadioEnumEditQuantity'], - 'user': ['UserEditQuantity'], - 'author': ['AuthorEditQuantity'], - 'reference': ['ReferenceEditQuantity'] - } - class MEnum(Sequence): ''' @@ -440,20 +417,6 @@ class Annotation: return str(self.__class__.__name__) -class AnnotationModel(Annotation, BaseModel): - ''' - Base class for defining annotation models. Annotations used with simple dict-based - values, can be validated by defining and registering a formal pydantic-based - model. - ''' - - m_registry: ClassVar[Dict[str, Type['AnnotationModel']]] = {} - ''' A static member that holds all currently known annotations with pydantic model. ''' - - def m_to_dict(self, *args, **kwargs): - return self.dict(exclude_unset=True) - - class DefinitionAnnotation(Annotation): ''' Base class for annotations for definitions. ''' diff --git a/nomad/mkdocs.py b/nomad/mkdocs.py index 0bda72c671..3fb4af956f 100644 --- a/nomad/mkdocs.py +++ b/nomad/mkdocs.py @@ -71,23 +71,6 @@ def define_env(env): or the archive of each entry (e.g. [a VASP example](../gui/search/entries/entry/id/d5OYC0SJTDevHMPk7YHd4A/-7j8ojKkna2NLXdytv_OjV4zsBXw/archive)) in the web-interface.''') - @env.macro - def get_schema_doc(key): # pylint: disable=unused-variable - schema_yaml = './docs/schema/suggestions.yaml' - with open(schema_yaml, "r") as yaml_file: - try: - schema = yaml.safe_load(yaml_file) - except yaml.YAMLError as exc: - print(exc) - - items = schema[key] - md_table = ['|Key|Description|', '|---|---|'] - for item, description in items.items(): - if key == 'type' and (item == 'log' or item == 'scatter'): - continue - md_table.append('|{}|{}|'.format(str(item), str(description))) - return utils.strip('\n'.join(md_table)) - @env.macro def yaml_snippet(path, indent, filter=None): # pylint: disable=unused-variable ''' @@ -157,7 +140,7 @@ def define_env(env): return results - def pydantic_model_from_model(model, name=None): + def pydantic_model_from_model(model, name=None, heading=None): fields = model.__fields__ required_models = set() if not name: @@ -166,13 +149,6 @@ def define_env(env): exported_config_models.add(name) - def default_value(field): - value = field.default - if isinstance(value, dict): - return '<complex dict>' - else: - return f'`{value}`' - def description(field): value = field.field_info.description @@ -188,7 +164,14 @@ def define_env(env): if field.field_info.description: result += f'{description(field)}<br/> ' - result += f'*default:* {default_value(field)}' + default_value = field.default + if default_value is not None: + if isinstance(default_value, dict): + default_value = '<complex dict>' + else: + default_value = f'`{default_value}`' + + result += f'*default:* {default_value}' if field.field_info.extra.get('deprecated', False): result += '<br/>**deprecated**' @@ -196,12 +179,21 @@ def define_env(env): return result def field_row(field): + if field.name.startswith('m_'): + return '' type_ = field.type_ if isclass(type_) and issubclass(type_, config.NomadSettings): required_models.add(type_) - return f'|{field.name}|{type_.__name__}|{content(field)}|\n' + try: + type_name = type_.__name__ + except Exception: + type_name = str(type_) + return f'|{field.name}|{type_name}|{content(field)}|\n' - result = f'### {name}\n' + if heading is None: + result = f'### {name}\n' + else: + result = heading + '\n' if model.__doc__ and model.__doc__ != '': result += utils.strip(model.__doc__) + '\n\n' @@ -218,7 +210,7 @@ def define_env(env): return result @env.macro - def pydantic_model(path): # pylint: disable=unused-variable + def pydantic_model(path, heading=None): # pylint: disable=unused-variable ''' Produces markdown code for the given pydantic model. @@ -231,4 +223,4 @@ def define_env(env): module = importlib.import_module(module_name) model = getattr(module, name) - return pydantic_model_from_model(model) + return pydantic_model_from_model(model, heading=heading) diff --git a/nomad/parsing/tabular.py b/nomad/parsing/tabular.py index 4f22b1afa2..0e2b1b12ff 100644 --- a/nomad/parsing/tabular.py +++ b/nomad/parsing/tabular.py @@ -29,6 +29,7 @@ from nomad.datamodel.context import Context from nomad.metainfo import Section, Quantity, Package, Reference, SectionProxy, MSection, Property from nomad.metainfo.metainfo import MetainfoError, SubSection from nomad.parsing.parser import MatchingParser +from nomad.datamodel.metainfo.annotations import TabularMode, TabularAnnotation, TabularParserAnnotation # We define a simple base schema for tabular data. The parser will then generate more # specialized sections based on the table headers. These specialized definitions will use @@ -63,12 +64,12 @@ class TableData(ArchiveSection): def normalize(self, archive, logger): super(TableData, self).normalize(archive, logger) - for quantity in self.m_def.all_quantities.values(): - tabular_parser_annotation = quantity.m_annotations.get('tabular_parser', {}) - if tabular_parser_annotation: - self.tabular_parser(quantity, archive, logger, **tabular_parser_annotation) + for quantity_def in self.m_def.all_quantities.values(): + annotation = quantity_def.m_get_annotations('tabular_parser') + if annotation: + self.tabular_parser(quantity_def, archive, logger, annotation) - def tabular_parser(self, quantity_def: Quantity, archive, logger, **kwargs): + def tabular_parser(self, quantity_def: Quantity, archive, logger, annotation: TabularParserAnnotation): if logger is None: logger = utils.get_logger(__name__) @@ -80,16 +81,16 @@ class TableData(ArchiveSection): return with archive.m_context.raw_file(self.data_file) as f: - data = read_table_data(self.data_file, f, **kwargs) + data = read_table_data(self.data_file, f, **annotation.dict(include={'sep', 'comment', 'skiprows'})) - tabular_parser_mode = 'column' if kwargs.get('mode') is None else kwargs.get('mode') - if tabular_parser_mode == 'column': + tabular_parser_mode = annotation.mode + if tabular_parser_mode == TabularMode.column: parse_columns(data, self) - elif tabular_parser_mode == 'row': + elif tabular_parser_mode == TabularMode.row: # Getting list of all repeating sections where new instances are going to be read from excel/csv file # and appended. - section_names: List[str] = kwargs.get('target_sub_section') + section_names: List[str] = annotation.target_sub_section # A list to track if the top level section has ever been read. top_level_section_list: List[str] = [] for section_name in section_names: @@ -118,7 +119,7 @@ class TableData(ArchiveSection): self_updated.append(section_updated[0]) else: - raise MetainfoError(f'The provided mode {tabular_parser_mode} should be either "column" or "row".') + raise MetainfoError(f'The provided mode {tabular_parser_mode.value} should be either "column" or "row".') m_package.__init_metainfo__() @@ -136,9 +137,9 @@ def _create_column_to_quantity_mapping(section_def: Section): continue properties.add(quantity) - tabular_annotation = quantity.m_annotations.get('tabular', {}) - if tabular_annotation and 'name' in tabular_annotation: - col_name = tabular_annotation['name'] + annotation = quantity.m_get_annotations('tabular') + if annotation and annotation.name: + col_name = annotation.name else: col_name = quantity.name if len(path) > 0: @@ -149,7 +150,10 @@ def _create_column_to_quantity_mapping(section_def: Section): f'The schema has non unique column names. {col_name} exists twice. ' f'Column names must be unique, to be used for tabular parsing.') - def set_value(section: MSection, value, path=path, quantity=quantity, tabular_annotation=tabular_annotation): + def set_value( + section: MSection, value, path=path, quantity=quantity, + annotation: TabularAnnotation = annotation): + import numpy as np for sub_section, section_def in path: next_section = None @@ -162,8 +166,8 @@ def _create_column_to_quantity_mapping(section_def: Section): section.m_add_sub_section(sub_section, next_section, -1) section = next_section - if tabular_annotation and 'unit' in tabular_annotation: - value *= ureg(tabular_annotation['unit']) + if annotation and annotation.unit: + value *= ureg(annotation.unit) # NaN values are not supported in the metainfo. Set as None # which means that they are not stored. @@ -273,7 +277,9 @@ def parse_table(pd_dataframe, section_def: Section, logger): return sections -def read_table_data(path, file_or_path=None, **kwargs): +def read_table_data( + path, file_or_path=None, + comment: str = None, sep: str = None, skiprows: int = None): import pandas as pd df = pd.DataFrame() @@ -285,22 +291,16 @@ def read_table_data(path, file_or_path=None, **kwargs): file_or_path if isinstance(file_or_path, str) else file_or_path.name) for sheet_name in excel_file.sheet_names: df.loc[0, sheet_name] = [ - pd.read_excel(excel_file, sheet_name=sheet_name, - comment=kwargs.get('comment'), - skiprows=kwargs.get('skiprows')).to_dict()] + pd.read_excel(excel_file, sheet_name=sheet_name, comment=comment).to_dict() + ] else: - if kwargs.get('sep') is not None: - sep_keyword = kwargs.get('sep') - elif kwargs.get('separator') is not None: - sep_keyword = kwargs.get('sep') - else: - sep_keyword = None df.loc[0, 0] = [ - pd.read_csv(file_or_path, engine='python', - comment=kwargs.get('comment'), - sep=sep_keyword, - skipinitialspace=True - ).to_dict() + pd.read_csv( + file_or_path, engine='python', + comment=comment, + sep=sep, + skipinitialspace=True + ).to_dict() ] return df @@ -380,8 +380,9 @@ class TabularDataParser(MatchingParser): logger.error('Schema for tabular data must inherit from TableRow.') return - tabular_parser_annotation = section_def.m_annotations.get('tabular-parser', {}) - data = read_table_data(mainfile, **tabular_parser_annotation) + annotation: TabularParserAnnotation = section_def.m_get_annotations('tabular_parser') + kwargs = annotation.dict(include={'comment', 'sep', 'skiprows'}) if annotation else {} + data = read_table_data(mainfile, **kwargs) child_sections = parse_table(data, section_def, logger=logger) assert len(child_archives) == len(child_sections) diff --git a/tests/datamodel/test_schema.py b/tests/datamodel/test_schema.py index 0f559809a4..a6c0855749 100644 --- a/tests/datamodel/test_schema.py +++ b/tests/datamodel/test_schema.py @@ -23,7 +23,7 @@ from nomad.metainfo import MetainfoError from nomad.datamodel.context import ServerContext from nomad.datamodel.datamodel import EntryArchive, EntryMetadata from nomad.datamodel.data import UserReference, AuthorReference -from nomad.datamodel.metainfo.eln.annotations import validElnTypes, validElnComponents +from nomad.datamodel.metainfo.annotations import valid_eln_types, valid_eln_components from nomad.parsing.parser import ArchiveParser from nomad.processing.data import Upload from nomad.utils import get_logger, strip @@ -72,8 +72,8 @@ def test_eln_annotation_validation_parsing(raw_files, caplog): assert has_error -@pytest.mark.parametrize("eln_type", validElnTypes.keys()) -@pytest.mark.parametrize("eln_component", sum(validElnComponents.values(), [])) +@pytest.mark.parametrize("eln_type", valid_eln_types.keys()) +@pytest.mark.parametrize("eln_component", sum(valid_eln_components.values(), [])) def test_eln_annotation_validation(eln_type, eln_component): base_schema = strip(''' m_def: 'nomad.metainfo.metainfo.Package' @@ -96,13 +96,13 @@ def test_eln_annotation_validation(eln_type, eln_component): component: eln_component ''') - for quantity_type in validElnTypes[eln_type]: + for quantity_type in valid_eln_types[eln_type]: if eln_type == 'reference': yaml_schema = base_schema.replace("quantity_type", "'#/Sample'").replace("eln_component", eln_component) else: yaml_schema = base_schema.replace("quantity_type", quantity_type).replace("eln_component", eln_component) - if eln_component not in validElnComponents[eln_type]: + if eln_component not in valid_eln_components[eln_type]: package = yaml_to_package(yaml_schema) type_name = quantity_type if eln_type in ['number', 'datetime', 'enum', 'reference']: @@ -113,11 +113,11 @@ def test_eln_annotation_validation(eln_type, eln_component): package.__init_metainfo__() assert isinstance(exception.value, MetainfoError) - assert exception.value.args[0] == ( - f'One constraint was violated: The component {eln_component} ' + error_str = ( + f'The component {eln_component} ' f'is not compatible with the quantity quantity_name of the type {type_name}. ' - f'Accepted components: {", ".join(validElnComponents[eln_type])} ' - f'(there are 0 more violations)') + f'Accepted components: {", ".join(valid_eln_components[eln_type])}') + assert error_str in exception.value.args[0] def test_user_author_yaml_deserialization(): diff --git a/tests/metainfo/test_metainfo.py b/tests/metainfo/test_metainfo.py index 8e678743e8..04eed26ac5 100644 --- a/tests/metainfo/test_metainfo.py +++ b/tests/metainfo/test_metainfo.py @@ -375,7 +375,7 @@ class TestM2: pytest.param(dict(integer=1), True, id='passes-int'), pytest.param(dict(integer='string'), False, id='fails') ]) - def test_formal_annotations(self, annotation, passes): + def test_annotation_models(self, annotation, passes): class TestAnnotation(AnnotationModel): string: str = 'default' integer: int = 0 @@ -384,12 +384,14 @@ class TestM2: AnnotationModel.m_registry['test'] = TestAnnotation if passes: - run = Run(a_test=annotation) - assert isinstance(run.a_test, TestAnnotation) - - as_dict = run.m_to_dict(with_meta=True) - run = Run.m_from_dict(as_dict) - assert isinstance(run.a_test[0], TestAnnotation) + section_def = Section(name='test', a_test=annotation) + assert isinstance(section_def.a_test, TestAnnotation) + assert section_def.a_test.m_definition is not None + as_dict = section_def.m_to_dict(with_meta=True) + assert 'm_definition' not in as_dict + section_def = Section.m_from_dict(as_dict) + assert isinstance(section_def.a_test[0], TestAnnotation) + assert section_def.a_test[0].m_definition is not None else: with pytest.raises(Exception): Run(a_test=annotation) diff --git a/tests/metainfo/test_yaml_schema.py b/tests/metainfo/test_yaml_schema.py index 8bab2b6bd6..807a77a11e 100644 --- a/tests/metainfo/test_yaml_schema.py +++ b/tests/metainfo/test_yaml_schema.py @@ -120,7 +120,7 @@ def test_yaml_deserialization(): assert sample_id.name == des_sample_id.name == 'sample_id' assert sample_id.type == des_sample_id.type == str assert sample_id.shape == des_sample_id.shape - assert sample_id.m_annotations["eln"]["component"] == des_sample_id.m_annotations["eln"]["component"] + assert sample_id.m_annotations["eln"].component == des_sample_id.m_annotations["eln"].component assert sample_id.description == des_sample_id.description.rstrip('\n') assert process.name == des_process.name == "Process" diff --git a/tests/parsing/test_tabular.py b/tests/parsing/test_tabular.py index d6cc557575..8be7005373 100644 --- a/tests/parsing/test_tabular.py +++ b/tests/parsing/test_tabular.py @@ -323,9 +323,6 @@ data: base_section: Substance2''', 'header_0,header_1,header_2\n0_0,0_1,0_2\n1_0,1_1,1_2', id='simple_2_sections'), pytest.param('test_3', '', '- subsection_1/my_substance1', f'''subsection_1: section: - m_annotations: - eln: - dict() sub_sections: my_substance1: repeats: true -- GitLab