diff --git a/dependencies/normalizers/simulation/band_structure b/dependencies/normalizers/simulation/band_structure index 95f42793d8c8babb5b3503ef2b63e3d403a24e4e..0c3b290ebcf636b022eea484083159e8fee3f013 160000 --- a/dependencies/normalizers/simulation/band_structure +++ b/dependencies/normalizers/simulation/band_structure @@ -1 +1 @@ -Subproject commit 95f42793d8c8babb5b3503ef2b63e3d403a24e4e +Subproject commit 0c3b290ebcf636b022eea484083159e8fee3f013 diff --git a/dependencies/normalizers/simulation/dos b/dependencies/normalizers/simulation/dos index e47742260dc747b3a8ad00d3a3d5dbe223c125ba..b81e76ab448223301c208d9f0a8b12058de9eceb 160000 --- a/dependencies/normalizers/simulation/dos +++ b/dependencies/normalizers/simulation/dos @@ -1 +1 @@ -Subproject commit e47742260dc747b3a8ad00d3a3d5dbe223c125ba +Subproject commit b81e76ab448223301c208d9f0a8b12058de9eceb diff --git a/dependencies/normalizers/simulation/soap b/dependencies/normalizers/simulation/soap index 06800da2a4c6634dd843d1b66ae5cd5eae9ac9ad..f594bd6bbdcb275020a5109885bf6b613a76d9d1 160000 --- a/dependencies/normalizers/simulation/soap +++ b/dependencies/normalizers/simulation/soap @@ -1 +1 @@ -Subproject commit 06800da2a4c6634dd843d1b66ae5cd5eae9ac9ad +Subproject commit f594bd6bbdcb275020a5109885bf6b613a76d9d1 diff --git a/dependencies/normalizers/simulation/spectra b/dependencies/normalizers/simulation/spectra index 6e05a3b042c44c86b01cb6b635856c6d49b740f5..d98d97b8fae90cd1c3f61860e9eb92ecffd58fe8 160000 --- a/dependencies/normalizers/simulation/spectra +++ b/dependencies/normalizers/simulation/spectra @@ -1 +1 @@ -Subproject commit 6e05a3b042c44c86b01cb6b635856c6d49b740f5 +Subproject commit d98d97b8fae90cd1c3f61860e9eb92ecffd58fe8 diff --git a/dependencies/normalizers/simulation/system b/dependencies/normalizers/simulation/system index 7d078267f4a644e7c4ea87740669ea362c60775d..6160799d99d888b4fbab80caffc0647185286b96 160000 --- a/dependencies/normalizers/simulation/system +++ b/dependencies/normalizers/simulation/system @@ -1 +1 @@ -Subproject commit 7d078267f4a644e7c4ea87740669ea362c60775d +Subproject commit 6160799d99d888b4fbab80caffc0647185286b96 diff --git a/dependencies/normalizers/simulation/workflow b/dependencies/normalizers/simulation/workflow index 5ac17eba4a206112753a1fab7c7d548759492d59..78eb29c91306d51c54ff1af1c28072a6efe41440 160000 --- a/dependencies/normalizers/simulation/workflow +++ b/dependencies/normalizers/simulation/workflow @@ -1 +1 @@ -Subproject commit 5ac17eba4a206112753a1fab7c7d548759492d59 +Subproject commit 78eb29c91306d51c54ff1af1c28072a6efe41440 diff --git a/docs/examples/computational_data/parser_plugins.md b/docs/examples/computational_data/parser_plugins.md index d62b70b0a942ed3342f3bff1d26470bff74af1f5..2c4dae531b6675cd17d0d61d83cf5815439cee7a 100644 --- a/docs/examples/computational_data/parser_plugins.md +++ b/docs/examples/computational_data/parser_plugins.md @@ -18,7 +18,7 @@ The NOMAD computational parsers can be found within your local NOMAD distributio * [nexus](https://github.com/nomad-coe/nomad-parser-nexus) - Parsers for combining various instrument output formats and electronic lab notebooks. * [workflow](https://github.com/nomad-coe/workflow-parsers) - Parsers for output from task managers and workflow schedulers. -You can also examine the source code of the parsers by following the above links to the corresponding GitHub repository for each project. Within each project folder you will find a `test/` directory, containing the [parser tests](../../howto/customization/parsers.md#testing-a-parser), and also a directory containing the parsers' source code, +You can also examine the source code of the parsers by following the above links to the corresponding GitHub repository for each project. Within each project folder you will find a `test/` directory, containing the [parser tests](../../howto/plugins/plugins.md#testing), and also a directory containing the parsers' source code, `<parserproject>parser` or `<parserproject>parsers`, depending on if one or more parsers are contained within the project, respectively. In the case of multiple parsers, the files for individual parsers are contained within a corresponding subdirectory: `<parserproject>parsers/<parsername>` @@ -31,9 +31,9 @@ For example, the Quantum Espresso parser files are found in `dependencies/parser The general docs contain information about the nuts and bolts of developing a plugin: -- [How to install plugins](../../howto/customization/plugins.md): Some basic information about different types of plugins, plugin anatomy, and creating a plugin project. +- [How to write a plugin](../../howto/plugins/plugins.md): Some basic information about different types of plugins, plugin anatomy, and creating a plugin project. -- [How to write a parser](../../howto/customization/parsers.md): The basics of how NOMAD parsers work, how to utlize existing parser classes, and how to customize your parser metadata. +- [How to write a parser](../../howto/plugins/parsers.md): The basics of how NOMAD parsers work, how to configure the files that your parser will match, and how to utilize existing parser classes. !!! Attention This page is under construction as we convert NOMAD's standard computational parsers to parser plugins. Along the way, we will add content below to guide you in the development of your own computational parser plugins. diff --git a/docs/explanation/basics.md b/docs/explanation/basics.md index d5655e1c694158782159929b8a8e5c485df65801..d8f169d0cdcbabe82964c332e7f52381c1a63655 100644 --- a/docs/explanation/basics.md +++ b/docs/explanation/basics.md @@ -75,8 +75,8 @@ the entry corresponding to the mainfile, will be processed. Processing consist o Parsers are small programs that transform data from a recognized *mainfile* into a structured machine processable tree of data that we call the *archive* or [**processed data**](data.md) of the entry. Only one parser is used for each entry. The used parser is determined -during matching and depends on the file format. [A dedicated guide](../howto/customization/parsers.md#match-your-raw-file) shows how to match a specific file from your parser. Parsers can be added to NOMAD as -[plugins](../howto/customization/parsers.md#developing-a-parser-plugin); this is a list of [all built-in parsers](../reference/parsers.md). +during matching and depends on the file format. [A dedicated guide](../howto/plugins/parsers.md#match-your-raw-file) shows how to match a specific file from your parser. Parsers can be added to NOMAD as +[plugins](../howto/plugins/parsers.md); this is a list of [all built-in parsers](../reference/parsers.md). !!! note A special case is the parsing of NOMAD archive files. Usually a parser converts a file @@ -98,7 +98,7 @@ processed data. Learn more about why to normalize in the documentation on [struc There are two principle ways to implement normalization in NOMAD: **normalizers** and **normalize** functions. -[Normalizers](../howto/customization/normalizers.md) are small programs that take processed data as input. +[Normalizers](../howto/plugins/normalizers.md) are small programs that take processed data as input. There is a list of normalizers registered in the [NOMAD configuration](../reference/config.md#normalize). In the future, normalizers might be added as plugins as well. They run in the configured order. Every normalizer is run @@ -106,10 +106,10 @@ on all entries and the normalizer might decide to do something or not, depending it sees in the processed data. Normalize functions are special functions implemented as part of section definitions -in [Python schemas](../howto/customization/schemas.md#writing-schemas-in-python-compared-to-yaml-schemas). +in [Python schemas](../howto/plugins/schema_packages.md#schema-packages-python-vs-yaml). There is a special normalizer that will go through all processed data and execute these function if they are defined. Normalize functions get the respective section instance as -input. This allows [schema plugin](../howto/customization/schemas.md#how-to-write-a-schema-plugin) developers to add normalizing to their sections. +input. This allows [schema plugin](../howto/plugins/schema_packages.md) developers to add normalizing to their sections. Read about our [structured data](./data.md) to learn more about the different sections. ### Storing and indexing diff --git a/docs/explanation/data.md b/docs/explanation/data.md index 55bb3155eacead270d2ae663a45bb76dfac9d85c..b6bd38ffa52203ddad248992daf9d07a5e04ab38 100644 --- a/docs/explanation/data.md +++ b/docs/explanation/data.md @@ -134,7 +134,7 @@ Contributions here are only possible through merge requests. Base sections can be contributed via plugins. Here they can be explored in the Metainfo browser, your plugin can provide more tools, and you can make use of normalize functions. -See also our [how-to on writing schema plugins](../howto/customization/schemas.md#how-to-write-a-schema-plugin). You could +See also our [how-to on writing schema packages](../howto/plugins/schema_packages.md). You could also provide base sections via uploaded schemas, but those are harder to explore and distribute to other NOMAD installations. @@ -143,7 +143,7 @@ schemas, you most likely also upload data in archive files (or use ELNs to edit Here you can also provide schemas and data in the same file. In many case specific schemas will be small and only re-combine existing base sections. See also our -[how-to on writing schemas](../howto/customization/basics.md). +[how-to on writing YAML schemas](../howto/customization/basics.md). ## Data @@ -156,7 +156,7 @@ The Metainfo has many serialized forms. You can write `.archive.json` or `.archi files yourself. NOMAD internally stores all processed data in [message pack](https://msgpack.org/){:target="_blank"}. Some of the data is stored in mongodb or elasticsearch. When you request processed data via API, you receive it in JSON. When you use the [ArchiveQuery](../howto/programmatic/archive_query.md), all data is represented -as Python objects (see also [a starting example](../howto/customization/schemas.md#starting-example)). +as Python objects (see also [example in schema package documentation](../howto/plugins/schema_packages.md#schemapackage-class)). No matter what the representation is, you can rely on the structure, names, types, shapes, and units defined in the schema to interpret the data. diff --git a/docs/explanation/processing.md b/docs/explanation/processing.md index 430f3102dd09b34d1a54a6a9d4d2e55ac4457afa..5acf566f9b42dfb6a6d804b1d33534bc5e16decd 100644 --- a/docs/explanation/processing.md +++ b/docs/explanation/processing.md @@ -62,7 +62,7 @@ In most scenarios, entry processing is not triggered individually, but as part o processing. Many entries of one upload might be processed at the same time. Some order can be enforced through *processing levels*. Levels are part of the parser metadata and entries paired to parsers with a higher level are processed after entries with a -parser of lower level. See also [how to write parser plugins](../howto/customization/parsers.md#developing-a-parser-plugin). +parser of lower level. See also [how to write parsers](../howto/plugins/parsers.md). ## Customize processing @@ -90,7 +90,7 @@ section, might indirectly use custom processing functionality. A parser plugin can define a new parser and therefore add to the *matching*, *parsing*, (and *normalizing*). A schema plugin defines new sections that can contain `normalize` functions that add to the *normalizing*. -See also the how-tos on [plugins installation](../howto/oasis/plugins_install.md), and development of [parsers and schemas](../howto/customization/plugins.md). +See also the how-tos on [plugins installation](../howto/oasis/plugins_install.md), and development of [parsers and schemas](../howto/plugins/plugins.md). #### Matching @@ -115,7 +115,7 @@ uses certain criteria, for example: - regular expressions on mimetypes - regular expressions on header content -See [How to write a parser](../howto/customization/parsers.md#developing-a-parser-plugin) for more details. +See [How to write a parser](../howto/plugins/parsers.md) for more details. The matching step of an upload's processing, will call this function for every file and on all parsers. There are some hidden optimizations and additional parameters, but diff --git a/docs/howto/customization/basics.md b/docs/howto/customization/basics.md index 28bf7c488e76c0bf879f7e0e0233ad1d12be635a..b8348c8d09bd7bd7d7d15caef33ef0f0f260ab72 100644 --- a/docs/howto/customization/basics.md +++ b/docs/howto/customization/basics.md @@ -1,8 +1,6 @@ -# How to write a schema +# How to write a YAML schema package -<!-- # Write NOMAD Schemas in YAML --> - -This guide explains how to write and upload NOMAD schemas in our `.archive.yaml` format. For more information on how an archive file is composed, visit [Explanation > Data structure](../../explanation/data.md). +This guide explains how to write and upload NOMAD schema packages in the YAML format that can be uploaded as part of your data. This is a good way to start out experimenting with custom data structures in NOMAD, but for more advanced use cases you may need to use [Python schema packages](../plugins/schema_packages.md). For more information on how an archive file is composed, visit [Explanation > Data structure](../../explanation/data.md). ## Example data @@ -17,23 +15,23 @@ of *primitive values* (e.g. `'H2O'`, `1.141`), *objects* (a set of *keys* and *v ## Sections -In a schema, we want to describe the structure of data, i.e. what are the allowed combinations of *objects*, *lists*, and *primitive values*. +In a schema package, we want to describe the structure of data, i.e. what are the allowed combinations of *objects*, *lists*, and *primitive values*. The crucial task here is to define what *keys* certain *types of objects* can have and what possible *values* might exist for each of these keys. In NOMAD, we call *objects* **sections** and we define *types of objects* with **section definitions**. Since *objects* can be nested, **sections** become like the sections and subsections of a book or paper. Sections are a representation of data and they are -the building blocks for [**archives**](../../reference/glossary.md#archive). Section definitions form a schema and they are +the building blocks for [**archives**](../../reference/glossary.md#archive). Section definitions form a schema package and they are the building blocks for the [**metainfo**](../../reference/glossary.md#metainfo). In the above example, we have two *types* of *objects*: an overaching object for the entire structure (with *keys* for `composition` and `elements`), and an additional object which describes the internal structure of `elements` (with *keys* for `label`, `density`, and `isotopes`). Let's start with -the *definition* for elements. This is what the *section definition* looks like in NOMAD's yaml-based schema format: +the *definition* for elements. This is what the *section definition* looks like in NOMAD's yaml-based schema package format: ```yaml Element: -{{ yaml_snippet('examples/docs/basic_schema/schema.archive.yaml:definitions/sections/Element', ' ') }} +{{ yaml_snippet('examples/docs/basic_schema/package.archive.yaml:definitions/sections/Element', ' ') }} ``` A *section definition* provides all the available *keys* for a *section* that instantiates @@ -44,7 +42,7 @@ Let's have a look at the overall definition for our chemical composition: ```yaml Composition: -{{ yaml_snippet('examples/docs/basic_schema/schema.archive.yaml:definitions/sections/Composition', ' ') }} +{{ yaml_snippet('examples/docs/basic_schema/package.archive.yaml:definitions/sections/Composition', ' ') }} ``` Again, all possible *keys* (`composition` and `elements`) are defined. But now we see @@ -103,7 +101,7 @@ NOMAD manages units and data with units via the [Pint](https://pint.readthedocs. be simple units (or their aliases) or complex expressions. Here are a few examples: `m`, `meter`, `mm`, `millimeter`, `m/s`, `m/s**2`. -While you can use all kinds of units in your uploaded schemas, the built-in NOMAD schema (Metainfo) uses only SI units. +While you can use all kinds of units in your uploaded schema packages, the built-in NOMAD schema (Metainfo) uses only SI units. ## Subsections @@ -114,17 +112,17 @@ one `Composition` can contain many `Elements`. The *names* of *subsection definitions* serve as the *key*, used in respective *section objects*. -## Uploading schemas +## Uploading schema packages NOMAD archive files allow you to upload data in NOMAD's native file format. An archive file can be a .yaml or .json file. It ends with `.archive.json` or `.archive.yaml`. -Archive files are mainly used to convey data. Since schemas are also "just" data, archive -files can also be used to convey a schema. +Archive files are mainly used to convey data. Since YAML schema packages are also "just" data, archive +files can also be used to convey a schema package. -You can upload schemas and data in separate files. -`schema.archive.yaml` +You can upload schema packages and data in separate files. +`schema_package.archive.yaml` ```yaml ---8<-- "examples/docs/basic_schema/schema.archive.yaml" +--8<-- "examples/docs/basic_schema/package.archive.yaml" ``` and `data.archive.yaml` @@ -132,9 +130,9 @@ and `data.archive.yaml` --8<-- "examples/docs/basic_schema/data.archive.yaml" ``` -Or, you can upload schemas and data in the same file: +Or, you can upload the schema package and data in the same file: ```yaml ---8<-- "examples/docs/basic_schema/schema.archive.yaml" +--8<-- "examples/docs/basic_schema/package.archive.yaml" data: m_def: Composition {{ yaml_snippet('examples/docs/basic_schema/data.archive.yaml:data', ' ', 'm_def') }} @@ -148,7 +146,7 @@ want to represent highly inter-linked data, this is often insufficient. *Referen allow us to create a more lose relationship between sections. A reference is a uni-directional link between a *source* section and a *target* section. -References can be defined in a schema as a quantity in the *source* section definition +References can be defined in a schema package as a quantity in the *source* section definition that uses the *target* section definition as a type. Instead of connecting the elements in a composition with subsections, we can also @@ -183,10 +181,10 @@ data: If you follow the *keys* `data`, `periodic_table`, `elements`, `0`, you reach the section that represent hydrogen. Keep in mind that *lists* use index-numbers as *keys*. -### Schema references +### Schema package references References can look different depending on the context. Above we saw simple references that point from one data section to another. But, you also already a saw a different -type of reference. Schema's themselves contain references: when we +type of reference. Schema packages themselves contain references: when we used `type: Element` or `section: Element` to refer to a *section definition*, we were writing down references that point to a *section definition*. Here we can use a convenience representation: `Element` simply replaces the otherwise cryptic `#/definitions/sections/0`. @@ -227,9 +225,9 @@ between two NOMAD entries. These inter-entry references have two parts: `<entry>#<section>`, where *entry* is a path or URL denoting the *target* entry and *section* a path within the *target* entry's subsection containment hierarchy. -Please note that also schemas can be spread over multiple files. In the above example, -one file contained the schema and data for a periodic table and another file contained -schema and data for the composition of water (using the periodic table). +Please note that also schema packages can be spread over multiple files. In the above example, +one file contained the schema package and data for a periodic table and another file contained +schema package and data for the composition of water (using the periodic table). ## Base sections and inheritance @@ -240,7 +238,7 @@ definitions* ### Base sections -Here is a simple schema with two *specialization* of the same *abstract* section +Here is a simple schema package with two *specialization* of the same *abstract* section definition: ```yaml definitions: @@ -305,13 +303,7 @@ EntryData: ``` Compare this to the previous examples: we used the top-level *keys* `definitions` -and `data` without really explaining why. Here you can see why. The `EntryArchive` *property* `definitions` allows -us to put a *metainfo package* (i.e. a NOMAD *schema*) into our archives. And -the `EntryArchive` *property* `data` allows us to put *data* into archives that is a -*specialization* of `EntryData`. The `EntryData` definition is empty. It is merely an *abstract* placeholder that allows you to add *specialized* data sections to your archive. -Therefore, all *section definitions* that define a top-level data section, should -correctly use `nomad.datamodel.EntryData` as a base section. This would be the first "correct" -example: +and `data` without really explaining why. Here you can see why. The `EntryArchive` *property* `definitions` allows us to put a *schema package* into our archives. And the `EntryArchive` *property* `data` allows us to put *data* into archives that is a *specialization* of `Schema`. The `Schema` definition is empty. It is merely an *abstract* placeholder that allows you to add *specialized* data sections to your archive. Therefore, all *section definitions* that define a top-level data section, should correctly use `nomad.datamodel.Schema` as a base section. This would be the first "correct" example: ```yaml --8<-- "examples/docs/inheritance/hello.archive.yaml" @@ -331,35 +323,33 @@ Here are a few other built-in section definitions and packages of definitions: |nomad.metainfo.*|A package that contains all *definitions* of *definitions*, e.g. NOMAD's "schema language". Here you find *definitions* for what a sections, quantity, subsections, etc. is.| -## Separating data and schema +## Separating data and schema package -As we saw above, a NOMAD entry can contain schema `definitions` and `data` at the -same time. To organize your schemas and data efficiently, it is often necessary to re-use -schemas and certain data in other entries. You can use *references* to spread your -schemas and data over multiple entries and connect the pieces via *references*. +As we saw above, a NOMAD entry can contain schema package `definitions` and `data` at the +same time. To organize your schema package and data efficiently, it is often necessary to re-use +schema packages and certain data in other entries. You can use *references* to spread your +schema packages and data over multiple entries and connect the pieces via *references*. -Here is a simple schema, stored in a NOMAD entry with mainfile name `schema.archive.yaml`: +Here is a simple schema package, stored in a NOMAD entry with mainfile name `package.archive.yaml`: ```yaml ---8<-- "examples/docs/references/multiple_files/schema.archive.yaml" +--8<-- "examples/docs/references/multiple_files/package.archive.yaml" ``` -Now, we can re-use this schema in many entries via *references*. Here, we extend -the schema and instantiate definitions is a separate mainfile `data-and-schema.archive.yaml`: +Now, we can re-use this schema package in many entries via *references*. Here, we extend +a schema contained in the package and instantiate definitions is a separate mainfile `data-and-package.archive.yaml`: ```yaml ---8<-- "examples/docs/references/multiple_files/data-and-schema.archive.yaml" +--8<-- "examples/docs/references/multiple_files/data-and-package.archive.yaml" ``` Here is a last example that re-uses the schema and references data from the two entries above: ```yaml ---8<-- "examples/docs/references/multiple_files/data.archive.yaml" +--8<-- "examples/docs/references/multiple_files/package.archive.yaml" ``` !!! warning "Attention" You cannot create definitions that lead to circular loading of `*.archive.yaml` files. - Each `definitions` section in an NOMAD entry represents a schema *package*. Each *package* - needs to be fully loaded and analyzed before it can be used by other *packages* in other entries. - Therefore, two *packages* in two entries cannot reference each other. + Each `definitions` section in an NOMAD entry represents a *schema package*. Each *schema package* needs to be fully loaded and analyzed before it can be used by other *schema packages* in other entries. Therefore, two *schema packages* in two entries cannot reference each other. diff --git a/docs/howto/customization/normalizers.md b/docs/howto/customization/normalizers.md deleted file mode 100644 index 986eca6306bd60fd6e74eef906276e428fb0baf8..0000000000000000000000000000000000000000 --- a/docs/howto/customization/normalizers.md +++ /dev/null @@ -1,148 +0,0 @@ -# How to write a normalizer - -## Introduction - -A normalizer can be any Python algorithm that takes the archive of an entry as input -and manipulates (usually expands) the given archive. This way, a normalizer can add -additional sections and quantities based on the information already available in the -archive. - -All normalizers are executed after parsing. Normalizers are run for each entry (i.e. each -of the parsed mainfiles). Normalizers are run in a particular order, and -you can make assumptions about the availability of data created by other normalizers. -A normalizer is run in any case, but it might choose not to do anything. A normalizer -can perform any operation on the archive, but in general it should only add more -information, not alter existing information. - -## Getting started -A normalizer can be developed either as part of the NOMAD package or as a plugin. In the -case of the former, the code can be added to the `nomad.normalizing` module. Developing -it as a plugin is discussed [How to develop a normalizer plugin](#developing-a-normalizer-plugin). In the -following, we illustrate the structure of a normalizer and describe how to interface it -to NOMAD in each of the two cases. - -## Starting example - -This is an example for a very simple normalizer that computes the unit cell volume from -a given lattice and adds it to each of the member of the system in the `run` section of the archive. - -```python -from nomad.normalizing import Normalizer -from nomad.atomutils import get_volume - -class UnitCellVolumeNormalizer(Normalizer): - - normalizer_level = 1 - - def normalize(self): - for system in self.entry_archive.run[-1].system: - system.unit_cell_volume = get_volume(system.lattice_vectors.magnitude) - - self.logger.debug('computed unit cell volume', system_index=system.m_parent_index) -``` - -A normalizer simply inherits `Normalizer` and implements the `normalize` method. The -`entry_archive` is available as a field. There is also a logger on the object that can be used. -Be aware that the processing will already report the run of the normalizer, log its -execution time and any exceptions that might been thrown. - -## Implementing a system normalizer - -There is a special base class for normalizing systems that allows to run the normalization -on all (or only the resulting) `representative` systems: - -```python -from nomad.normalizing import SystemBasedNormalizer -from nomad.atomutils import get_volume - -class UnitCellVolumeNormalizer(SystemBasedNormalizer): - def _normalize_system(self, system, is_representative): - system.unit_cell_volume = get_volume(system.lattice_vectors.magnitude) -``` - -For `SystemBasedNormalizer`, we implement the `_normalize_system` method. -The parameter `is_representative` will be true for the `representative` systems. The -representative system refers to the system that corresponds to the calculation result. -It is determined by scanning the archive sections starting with `workflow2` until -the system fitting the criterion is found. For example, it refers to the final step in a -geometry optimization or other workflow. - -Of course, if you add new information to the archive, this also needs to be defined in the -schema (see [How-to extend the schema](schemas.md#extending-existing-sections)). For example you could extend the section system with a special system definition -that extends the existing section system definition: - -```python -import numpy as np -from nomad.datamodel.metainfo import runschema -from nomad.metainfo import Section, Quantity - -class UnitCellVolumeSystem(runschema.system.System): - m_def = Section(extends_base_section=True) - unit_cell_volume = Quantity(np.dtype(np.float64), unit='m^3') -``` - -Here, we used the schema definition for the `run` section defined in this [plugin](schemas.md#pre-defined-schemas-in-nomad). - -## Adding a normalizer to the processing - -For any new normalizer class to be recognized by the processing, the normalizer class -needs to be added to the list of normalizers in the config file `nomad/config/__init__.py`. -For [normalizer plugins](#developing-a-normalizer-plugin) one needs to include it in -the `nomad.yaml` file (see [Adding a plugin to NOMAD](plugins.md#add-a-plugin-to-your-nomad)). -By default, the execution order of the normalizers during processing is determined by the -order of the normalizers in the list. One can specify the order of a normalizer relative -to the other normalizers by specifying the `normalizer_level` field. The following lists -the normalizers used in NOMAD in order of execution: - -| Normalizer class | Path/Project url | -| ---------------------------- | --------------------------------------------------------------------------- | -| SimulationWorkflowNormalizer | <https://github.com/nomad-coe/nomad-schema-plugin-simulation-workflow.git> | -| SystemNormalizer | <https://github.com/nomad-coe/nomad-normalizer-plugin-system.git> | -| SoapNormalizer | <https://github.com/nomad-coe/nomad-normalizer-plugin-soap.git> | -| SpectraNormalizer | <https://github.com/nomad-coe/nomad-normalizer-plugin-spectra.git> | -| OptimadeNormaizer | nomad.normalizing.optimade.OptimadeNormalizer | -| MetainfoNormaizer | nomad.normalizing.optimade.MetainfoNormalizer | -| DosNormalizer | <https://github.com/nomad-coe/nomad-normalizer-plugin-dos.git> | -| BandStructureNormalizer | <https://github.com/nomad-coe/nomad-normalizer-plugin-bandstructure.git> | -| ResultsNormalizer | nomad.normalizing.results.ResultsNormalizer | - -In the future, all normalizers will be migrated to plugins therefore new normalizers should be developed as plugins. - -## Testing a normalizer - -To simply try out a normalizer, you could use the CLI and run the parse command: - -```shell -nomad --debug parse --show-archive <path-to-example-file> -``` - -But eventually you need to add a more formal test. Place your `pytest` tests in -`tests/normalizing/test_unitcellvolume.py` similar to the existing tests or in the plugin -tests for the case of plugins. Necessary test data can be added to `tests/data/normalizers`. - -## Developing a Normalizer plugin -Fork and clone the [normalizer example project](https://github.com/nomad-coe/nomad-normalizer-plugin-example) -as described in [How to develop and publish plugins](plugins.md). The normalizer class is -defined in `nomadnormalizerexample/normalizer.py` - -```python -from nomad.normalizing import Normalizer -from nomad.datamodel.metainfo.workflow import Workflow - - -class ExampleNormalizer(Normalizer): - - domain = None - - def normalize(self, logger): - super().normalize(logger) - logger.info('ExampleNormalizer called') - - self.entry_archive.workflow2 = Workflow(name='Example workflow') -``` - -In this simple example, we create a the workflow2 section in the archive. - -## Normalizer plugin metadata - -{{pydantic_model('nomad.config.models.plugins.Normalizer')}} \ No newline at end of file diff --git a/docs/howto/customization/plugins.md b/docs/howto/customization/plugins.md deleted file mode 100644 index 1b585934fca6b1309ed066570c99b26e31e34cbf..0000000000000000000000000000000000000000 --- a/docs/howto/customization/plugins.md +++ /dev/null @@ -1,168 +0,0 @@ -# How to write a plugin - -The following sections explain how to write a plugin and add it to a NOMAD installation. -Dedicated explanation sections provide more background information on the types of plugins: -[schema](../../explanation/data.md#schema), [parser](../../explanation/processing.md#schemas-parsers-plugins) -and [normalizer](../../explanation/processing.md#normalizing). - -## Plugin anatomy - -A plugin usually consists of the *plugin code* (a Python package) and -*plugin metadata*. The installation **independent** *plugin metadata* (e.g. name, description, python package, etc.) -can be defined in a `nomad_plugin.yaml` that is part of the *plugin code*. -The installation **dependent** *plugin metadata* (e.g. plugin key, order and priority, parser matching rules, etc.) -are added to the [`nomad.yaml` file](../develop/setup.md#nomadyaml) of the NOMAD installation. - -Here is the project layout of the schema example: -``` -nomad-schema-plugin-example -├── nomadschemaexample -│ ├── __init__.py -│ ├── nomad_plugin.yaml -│ └── schema.py -├── tests -│ ├── data -│ │ └── test.archive.yaml -│ └── test_schema.py -├── LICENSE -├── README.md -├── nomad.yaml -└── requirements.txt -``` -<!-- TODO pyproject.toml, MANIFEST.in, setup.py are missing. requirements.txt is no longer there Additionally, we could adopt following a src structure, src/nomadschemaexample. --> - -## Plugin code - -The directory `nomadschemaexample` is our Python package *plugin code*, and contains `schema.py`: - -```python -{{ file_contents('examples/plugins/schema/nomadschemaexample/schema.py') }} -``` - -Read the [schema plugin documentation](schemas.md#develop-a-schema-plugin) -for more details. - -### Code Quality and Linting - -While developing NOMAD plugins, we highly recommend using a Python linter, such as [Ruff](https://docs.astral.sh/ruff), to analyze and enforce coding standards in your plugin projects. This also ensures smoother integration and collaboration. Ruff is also included in the templates provided on Github. - -## Plugin metadata - -The file `nomad_plugin.yaml` contains the installation **independent** *plugin metadata*. The following is for the schema plugin example: - -```yaml -{{ file_contents('examples/plugins/schema/nomadschemaexample/nomad_plugin.yaml') }} -``` - -The metadata contains the `plugin_type` (e.g. `schema`, `parser` or `normalizer`). The rest -will depend on the type and the underlying metadata model. For schemas there are only -descriptive metadata like `name` or `description` as schemas do not contain any technical -metadata that are necessary to use them. Refer to the *plugin metadata* models for -[schema](schemas.md#schema-plugin-metadata), [parser](parsers.md#parser-plugin-metadata) -and [normalizer](normalizers.md#normalizer-plugin-metadata). - -One can specify which plugin to enable in a nomad installation in the `nomad.yaml` file: - -```yaml -{{ file_contents('examples/plugins/schema/nomad.yaml') }} -``` - -Plugins are defined under the `plugins` key. This consists of `include` (or `exclude`) to -select (or ignore) a subset of all plugins. In this example, we disable all [built-in plugins](#different-forms-of-plugin-distribution) by only including `schemas/example` under `plugins`. -The `options` field can be used to define the plugin metadata. -This allows one to overwrite the metadata in the package's `nomad_plugin.yaml`. - -Please note that `python_package` is the name of a Python package and not a path to the -code. This also means that the package has to be in your `PYTHONPATH` (see [Add a plugin to your NOMAD](#add-a-plugin-to-your-nomad)). - - -As a plugin developer you have [installed the NOMAD Python package](../programmatic/pythonlib.md) -and can run the `nomad parse <mainfile>` command to make sure installation is successful. -Now follow the instructions for [one of our examples](#develop-a-plugin) and try for yourself! - -# Publish a plugin - -!!! warning "Attention" - The standard processes for publishing plugins and using plugins from other developers are still being worked out. The "best" practices mentioned in the following are preliminary. - -## Create a (GitHub) project - -If you forked from our examples, you already have a GitHub project. Otherwise, you -should create one. This allows others to get your plugin sources or initiate communication -via issues or pull requests. - -!!! tip "Important" - If you create a project from scratch, you should still follow the layout of our example projects. - -We suggest the following naming convention for plugin projects: - -- `nomad-<projectname>-plugin` (a single plugin) -- `nomad-<projectname>-plugins` (multiple plugins) - -A project can contain multiple plugins if it has multiple modules with corresponding -`nomad-plugin.yaml` files. - -!!! note - If you develop a plugin in the context of [FAIRmat](https://github.com/fairmat-nfdi) or - the [NOMAD CoE](https://github.com/nomad-coe), put your plugin projects in the - corresponding GitHub organization. In these cases, the naming convention above is required. - -## Different forms of plugin distribution - -- **source code**: [Mounting plugin code into a NOMAD (Oasis) installation](../oasis/plugins_install.md#mount-plugin-into-a-nomad-oasis) -- only the plugin source code is needed. -- **built-in**: Plugins that are directly maintained by NOMAD as distributed as part of -the NOMAD docker images. The Python code for those plugins is already installed, you only need -to configure NOMAD to use the plugins (or not). -- **PyPI/pip package**: Plugin projects can be published as PyPI/pip packages. Those -packages can then be installed either during NOMAD start-up (not implemented yet) or -when building a customized docker images (see [PyPI/pip package](#pypipip-package) below). - -Independent of the form of distribution, you will still need to add the plugin to -your configuration as explained in [previously](#plugin-metadata). - -## PyPI/pip package - -Learn from the PyPI documentation how to [create a package for PyPI](https://packaging.python.org/en/latest/tutorials/packaging-projects/){:target="_blank"}. -We recommend to use the `pyproject.toml`-based approach. Here is an example `pyproject.toml` file: - -```toml ---8<-- "examples/plugins/schema/pyproject.toml" -``` - -The package can be built like this: -``` -pip install build -python -m build --sdist -``` - -The PyPI documentation provides further information about how to [publish a package to PyPI](https://packaging.python.org/en/latest/tutorials/packaging-projects/#uploading-the-distribution-archives){:target="_blank"}. -If you have access to the MPCDF GitLab and NOMAD's presence there, you can also -use the `nomad-FAIR` registry: - -``` -pip install twine -twine upload \ - -u <username> -p <password> \ - --repository-url https://gitlab.mpcdf.mpg.de/api/v4/projects/2187/packages/pypi \ - dist/nomad-example-schema-plugin-*.tar.gz -``` - -## Register your plugin - -!!! warning "Attention" - This is work in progress. We plan to provide a plugin registry that allows you to - publish your plugin's *metadata*. This will then be used to simplify plugin management - within a NOMAD installation. - - The built-in plugins can already be found in the [documentation reference](../../reference/plugins.md). - -# Add a plugin to your NOMAD - -Adding a plugin depends on the type of plugin distribution and how you run NOMAD. -However, in all cases you will need to add the *plugin metadata* to `nomad.yaml` (see [above](#plugin-metadata)) and include the *plugin code* to the `PYTHONPATH`. There are several ways to add *plugin code*. - -## Add to Python path - -When you run NOMAD as a developer, simply add the plugin directory to the `PYTHONPATH` environment variable. -When you [run NOMAD](../develop/setup.md#run-nomad) (e.g. `nomad admin run appworker`), Python will find your code when NOMAD imports the `python_package` given in the `plugins.options` of your `nomad.yaml`. - diff --git a/docs/howto/develop/contrib.md b/docs/howto/develop/contrib.md index 39fd1c53a672ab512700ffc5723cc6726d6aa13d..6f84988cbda76903b195e9199ed208b0e4e61a49 100644 --- a/docs/howto/develop/contrib.md +++ b/docs/howto/develop/contrib.md @@ -80,8 +80,7 @@ See also the [documentation part](./code.md#documentation) in our code navigatio ## Plugins -Also read the guide on -[how to develop, publish, and distribute plugins](../customization/plugins.md). +Also read the guide on [how to develop, publish, and distribute plugins](../plugins/plugins.md). ### Built-in plugins (and submodules) diff --git a/docs/howto/oasis/apps.md b/docs/howto/oasis/apps.md deleted file mode 100644 index 91d18bdeadc84a0a723ff36f274b22ccde64529e..0000000000000000000000000000000000000000 --- a/docs/howto/oasis/apps.md +++ /dev/null @@ -1,204 +0,0 @@ -# How to configure custom apps - -Apps provide customized views of data for specific domains, making it easier for -the users to navigate and understand the data. This typically means that certain -domain-specific properties are highlighted, different units may be used for -physical properties, and specialized dashboards may be presented. This becomes -crucial for NOMAD installations to be able to scale with data that contains a -mixture of experiments and simulations, different techniques, and physical -properties spanning different time and length scales. - -Apps only affect the way data is *displayed* for the user: if you wish to affect -the underlying data structure, you will need to define a custom [Python schema](../customization/schemas.md#how-to-write-a-schema-plugin) -or [YAML schema](../customization/basics.md). It is common that a custom schema has -an app associated with it, but apps can also provide different views of the same -underlying data. - -Apps are defined with a static YAML configuration file, which means that no -special programming skills are needed and app definitions can be shared easily. - -## App example - -Here is an example of a simple app definition in YAML. A full breakdown of the -configuration options are given in the [reference below](#app-configuration-reference). - -```yaml -# Label of the App -label: 'My App' -# Path used in the URL, must be unique -path: 'myapp' -# Used to categorize apps in the explore menu -category: 'Simulations' -# Brief description used in the app menu -description: 'An app customized for me.' -# Longer description that can also use markdown -readme: 'Here is a much longer description of this app' -# Controls which columns are shown in the results table -columns: - selected: - - 'entry_type' - options: - entry_type: - label: 'Entry type' - align: 'left' - upload_create_time: - label: 'Upload time' - align: 'left' -# Dictionary of search filters that are always enabled for queries made within -# this app. This is especially important to narrow down the results to the -# wanted subset. Any available search filter can be targeted here. -filters_locked: - upload_create_time: - gte: 0 -# Controls the filter menus shown on the left -filter_menus: - options: - material: - label: 'Material' - level: 0 - elements: - label: 'Elements / Formula' - level: 1 - size: 'xl' -# Controls the default dashboard shown in the search interface -dashboard: - widgets: - - type: histogram - showinput: false - autorange: true - nbins: 30 - scale: linear - quantity: results.material.n_elements - layout: - lg: - minH: 3 - minW: 3 - h: 4 - w: 12 - y: 0 - x: 0 -``` - -## Customizing default apps in a NOMAD installation - -Each NOMAD installation has a set of built-in apps, which are controlled through -the [ui.apps](../../reference/config.md#ui) field in the `nomad.yaml` configuration file. These are -the apps that are defined by default in a NOMAD installation: - -{{ default_apps_list()}} - -In `nomad.yaml`, it is easy to to select which apps to include or exclude like -this: - -```yaml -ui: - apps: - include: ['entries', 'materials'] -``` - -It is also possible to customize specific parts of an existing app definition: - -```yaml -ui: - apps: - options: - entries: - columns: - exclude: ['upload_create_time'] -``` - -Completely new apps can also be defined by adding new entries to the -`ui.apps.options` dictionary: - -```yaml -ui: - apps: - options: - myapp: - label: 'My App' - ... -``` - -If no explicit rules are added in `ui.apps.include` or `ui.apps.exclude`, these -new options will be included by default. - -## Using schema quantities in an app - -Each app may define additional **filters** that should be enabled in it. Filters -have a special meaning in the app context: filters are pieces of (meta)info than -can be queried in the search interface of the app, but also targeted in the rest -of the app configuration as explained below in. By default, none of the -quantities from custom schemas are loaded as filters, and they need to be -explicitly added. - -!!! note - - Note that not all of the quantities from a custom schema can be exposed as - filters. At the moment we only support targeting **scalar** quantities from - custom schemas. - -Each schema has a unique name within the NOMAD ecosystem, which is needed to -target them in the configuration. The name depends on the resource in which the -schema is defined in: - -- Python schemas are identified by the python path for the class that inherits -from `EntryData`. For example, if you have a python package called `myschema`, -which has a module called `schema.py`, which contains the class `MySchema`, then -the schema name will be `myschema.schema.MySchema`. -- YAML schemas are identified by the entry id of the schema file together with -the name of the section defined in the YAML schema. For example -if you have uploaded a schema YAML file containing a section definition called -`MySchema`, and it has been assigned an `entry_id`, the schema name will be -`entry_id:<entry_id>.MySchema`. - -The quantities from schemas may be included or excluded as filter by using the -[`filters`](#filters) field in the app config. This option supports a -wildcard/glob syntax for including/excluding certain filters. For example, to -include all filters from the Python schema defined in the class -`myschema.schema.MySchema`, you could use: - -```yaml -myapp: - filters: - include: - - '*#myschema.schema.MySchema' -``` - -The same thing for a YAML schema could be achieved with: - -```yaml -myapp: - filters: - include: - - '*#entry_id:<entry_id>.MySchema' -``` - -Once quantities from a schema are included in an app as filters, they can be -targeted in the rest of the app. The app configuration often refers to specific -filters to configure parts of the user interface. For example, -one could configure the results table to show a new column using one of the -schema quantities with: - -```yaml -myapp: - columns: - include: - - 'data.mysection.myquantity#myschema.schema.MySchema' - - 'entry_id' - options: - data.mysection.myquantity#myschema.schema.MySchema: - ... -``` - -The syntax for targeting quantities depends on the resource: - -- For python schemas, you need to provide the path and the python schema name separated -by a hashtag (#), for example `data.mysection.myquantity#myschema.schema.MySchema`. -- For YAML schemas, you need to provide the path and the YAML schema name separated -by a hashtag (#), for example `data.mysection.myquantity#entry_id:<entry_id>.MySchema`. -- Quantities that are common for all NOMAD entries can be targeted by using only -the path without the need for specifying a schema, e.g. `results.material.symmetry.space_group`. - -## App configuration reference - -{{ pydantic_model('nomad.config.models.ui.App')}} diff --git a/docs/howto/oasis/customize.md b/docs/howto/oasis/customize.md index 8c087bbb4c6c97be057d632197e42c65bfcc867f..079c0ff4506a63256f0048bd1a744d3cf1aa11b2 100644 --- a/docs/howto/oasis/customize.md +++ b/docs/howto/oasis/customize.md @@ -9,9 +9,7 @@ guides to learn more. - Installation specific changes (domain, path-prefix): [How to install an Oasis](install.md) - [Restricting user access](admin.md#restricting-access-to-your-oasis) -- [Configure custom apps](apps.md) - Write .yaml based [schemas](../customization/basics.md) and [ELNs](../customization/elns.md) - Learn how to use the [tabular parser](../customization/tabular.md) to manage data from .xls or .csv -- Develop a [schema plugin](../customization/schemas.md#how-to-write-a-schema-plugin) for more powerful schemas and ELNs -- Use and develop [parser plugins](../customization/parsers.md#developing-a-parser-plugin) for specific file formats +- Learn [how to develop plugins](../plugins/plugins.md) that can be installed in an Oasis - Add specialized [NORTH tools](../manage/north.md) \ No newline at end of file diff --git a/docs/howto/oasis/plugins_install.md b/docs/howto/oasis/plugins_install.md index cc1b05dbec75d29ba8e571062f82f638d2c3a7c0..89d220308c765ec446dadfb97b0df18dc137ca2b 100644 --- a/docs/howto/oasis/plugins_install.md +++ b/docs/howto/oasis/plugins_install.md @@ -1,15 +1,12 @@ -# How to mount a plugin into a NOMAD Oasis -[Plugins](../customization/plugins.md#register-your-plugin) allow the customization of a -NOMAD deployment in terms of which parsers, schemas, and normalizers are included or excluded. -In the following we will show to how to mount specific plugins in a NOMAD Oasis. - -The NOMAD docker image adds the folder `/app/plugins` to the `PYTHONPATH`. You simply have -to add the *plugin metadata* to your Oasis' `nomad.yaml` and mount your code into the `/app/plugins` -directory via the volumes section of the `app` and `worker` services in your `docker-compose.yaml`. - -For example, you can do this by adding an extension to the `docker-compose.yaml`, e.g. a file called -`docker-compose.plugins.yaml`. Assuming you cloned the example plugins above into the Oasis folder as -`./nomad-schema-plugin-example`, `./nomad-parser-plugin-example` and `./nomad-normalizer-plugin-example`, +# How to install plugins into a NOMAD Oasis + +[Plugins](../plugins/plugins.md) allow the customization of a NOMAD deployment in terms of which apps, normalizers, parsers and schema packages are available. In the following we will show to how to install plugins into a NOMAD Oasis. + +## Option 1: Mount the plugin code + +The NOMAD docker image adds the folder `/app/plugins` to the `PYTHONPATH`. This means that you can mount your code into the `/app/plugins` directory via the volumes section of the `app` and `worker` services in your `docker-compose.yaml`. + +For example, you can do this by adding an extension to the `docker-compose.yaml`, e.g. a file called `docker-compose.plugins.yaml`. Assuming you have cloned three plugins into the Oasis folder as `./nomad-schema-plugin-example`, `./nomad-parser-plugin-example` and `./nomad-normalizer-plugin-example`, your `docker-compose.plugins.yaml` should look like this: ```yaml @@ -26,16 +23,14 @@ services: - ./nomad-normalizer-plugin-example/nomadparserexample:/app/plugins/nomadparserexample ``` -You have to tell docker that there are now two compose files. This can be done via the -`COMPOSE_FILE` environment variable. This is how you can start the Oasis with the plugins: +You have to tell docker that there are now two compose files. This can be done via the `COMPOSE_FILE` environment variable. This is how you can start the Oasis with the plugins: ```sh export COMPOSE_FILE=docker-compose.yaml:docker-compose.plugins.yaml docker compose up -d ``` -Here is a complete Oasis setup [nomad-oasis-with-plugins.zip](../../assets/nomad-oasis-with-plugins.zip). -Simply download, extract, and start like any other Oasis: +Here is a complete Oasis setup [nomad-oasis-with-plugins.zip](../../assets/nomad-oasis-with-plugins.zip). Simply download, extract, and start like any other Oasis: ```sh unzip nomad-oasis-with-plugins.zip @@ -60,33 +55,54 @@ curl localhost/nomad-oasis/alive Read the [Oasis install guide](install.md) for more details. -### Install PyPI/pip package - -If the plugin is published on PyPI, you can simply install it with pip. If the -plugin was published to our MPCDF GitLab registry, you have to use the `--index-url` -parameter: - -``` -pip install nomad-example-schema-plugin --index-url https://gitlab.mpcdf.mpg.de/api/v4/projects/2187/packages/pypi/simple -``` - -Installing via pip works for NOMAD developers, but how to pip install into an Oasis? -The package could either be installed when NOMAD is started or via -a customized docker image. +## Option 2: Create a derived Docker image with plugin installed via `pip` -!!! warning "Attention" - We still need to implement that configured plugins, if not already installed, - get automatically installed during NOMAD start. - -You can build a custom NOMAD docker image that has your packages already installed. -Here is an example `Dockerfile`: +Instead of mounting the code into an existing image, you can also create a new, derived image which has your plugin installed as a `pip` package. For this you will to create a new `Dockerfile`, which runs the installation step. The basic idea is that your Dockerfile looks something like this: ```Dockerfile --8<-- "examples/plugins/schema/Dockerfile" ``` -The image can be build like this: +The image can then be build like this: ``` docker build -t nomad-with-plugins . ``` + +Depending on how your plugin code is distributed, you have several options for the actual install steps: + +1. Plugin published in PyPI: + + ```sh + RUN pip install <package_name>` + ``` + +2. Plugin code available in GitHub: + + ```sh + RUN pip install git+https://<repository_url> + ``` + +3. Plugin published in MPCDF GitLab registry: + + ```sh + RUN pip install nomad-example-schema-plugin --index-url https://gitlab.mpcdf.mpg.de/api/v4/projects/2187/packages/pypi/simple + ``` + +4. Copy plugin code from host machine: + + ```sh + RUN pip install build + + COPY \ + nomadschemaexample \ + tests \ + README.md \ + LICENSE \ + pyproject.toml \ + . + + RUN python -m build --sdist + + RUN pip install dist/nomad-schema-plugin-example-*.tar.gz + ``` diff --git a/docs/howto/overview.md b/docs/howto/overview.md index c52c902d3e0a4691f55243dc531b08d8c1719450..1ee25615e7bdbe5ad371726458e91213ef799a37 100644 --- a/docs/howto/overview.md +++ b/docs/howto/overview.md @@ -49,7 +49,6 @@ Host NOMAD for your lab or institution. - [Install an Oasis](oasis/install.md) - [Customize an Oasis](oasis/customize.md) - [Install plugins](oasis/plugins_install.md) -- [Configure custom apps](oasis/apps.md) - [Migrate Oasis versions](oasis/migrate.md) - [Administrate and maintain an Oasis](oasis/admin.md) @@ -65,10 +64,11 @@ Customize NOMAD, write plugins, and tailor NOMAD Oasis. - [Use base sections](customization/base_sections.md) - [Parse tabular data](customization/tabular.md) - [Define workflows](customization/workflows.md) -- [Write plugins](customization/plugins.md) -- [Write a python schema](customization/schemas.md) -- [Write a parser](customization/parsers.md) -- [Write a normalizer](customization/normalizers.md) +- [Write plugins](plugins/plugins.md) +- [Write an app](plugins/apps.md) +- [Write a normalizer](plugins/normalizers.md) +- [Write a parser](plugins/parsers.md) +- [Write a schema packages](plugins/schema_packages.md) - [Work with units](customization/units.md) - [Use HDF5 to handle large quantities](customization/hdf5.md) diff --git a/docs/howto/plugins/apps.md b/docs/howto/plugins/apps.md new file mode 100644 index 0000000000000000000000000000000000000000..f244125cfa45f14d66bc238ce1d4ba2448a28546 --- /dev/null +++ b/docs/howto/plugins/apps.md @@ -0,0 +1,193 @@ +# How to write an app + +Apps provide customized views of data in the GUI, making it easier for the users to navigate and understand the data related to a specific domain. This typically means that certain domain-specific properties are highlighted, different units may be used for physical properties, and specialized dashboards may be presented. This becomes crucial for NOMAD installations to be able to scale with data that contains a mixture of experiments and simulations, different techniques, and physical properties spanning different time and length scales. + +Apps only affect the way data is *displayed* for the user: if you wish to affect the underlying data structure, you will need to write a [Python schema package](./schema_packages.md) or a [YAML schema package](../customization/basics.md). + +This documentation shows you how to write an plugin entry point for an app. You should read the [documentation on getting started with plugins](./plugins.md) to have a basic understanding of how plugins and plugin entry points work in the NOMAD ecosystem. + +## Getting started + +You can use our [template repository](https://github.com/FAIRmat-NFDI/nomad-plugin-template) to create an initial structure for a plugin containing an app. The relevant part of the repository layout will look something like this: + +```txt +nomad-example + ├── src + │ ├── nomad_example + │ │ ├── apps + │ │ │ ├── __init__.py + ├── LICENSE.txt + ├── README.md + └── pyproject.toml +``` + +See the documentation on [plugin development guidelines](./plugins.md#plugin-development-guidelines) for more details on the best development practices for plugins, including linting, testing and documenting. + +## App entry point + +The entry point defines basic information about your app and is used to automatically load the app into a NOMAD distribution. It is an instance of an `AppEntryPoint` and unlike many other plugin entry points, it does not have a separate resource that needs to be lazy-loaded as the entire app is defined in the configuration as an instance of `nomad.config.models.ui.App`. You will learn more about the `App` class in the next sections. The entry point should be defined in `*/apps/__init__.py` like this: + +```python +from nomad.config.models.plugins import AppEntryPoint + +myapp = MyAppEntryPoint( + name = 'MyApp', + description = 'My custom app.', + app = App(...) +) +``` + +Here we have instantiated an object `myapp` in which you specify the default parameterization and other details about the app. In the reference you can see all of the available [configuration options for an `AppEntryPoint`](../../reference/plugins.md#appentrypoint). + +The entry point instance should then be added to the `[project.entry-points.'nomad.plugin']` table in `pyproject.toml` in order for the app to be automatically detected: + +```toml +[project.entry-points.'nomad.plugin'] +myapp = "nomad_example.apps:myapp" +``` + +## `App` class + +The definition fo the actual app is given as an instance of the `App` class specified as part of the entry point. A full breakdown of the model is given below in the [app reference](#app-reference), but here is a small example: + +```python +from nomad.config.models.plugins import AppEntryPoint +from nomad.config.models.ui import App, Column, Columns, FilterMenu, FilterMenus + + +myapp = AppEntryPoint( + name='MyApp', + description='App defined using the new plugin mechanism.', + app = App( + # Label of the App + label='My App', + # Path used in the URL, must be unique + path='myapp', + # Used to categorize apps in the explore menu + category='Theory', + # Brief description used in the app menu + description='An app customized for me.', + # Longer description that can also use markdown + readme='Here is a much longer description of this app.', + # Controls which columns are shown in the results table + columns=Columns( + selected=['entry_id'], + options={ + 'entry_id': Column(), + 'upload_create_time': Column(), + } + ), + # Dictionary of search filters that are always enabled for queries made + # within this app. This is especially important to narrow down the + # results to the wanted subset. Any available search filter can be + # targeted here. + filters_locked={ + 'upload_create_time': { + 'gte': 0 + } + }, + # Controls the filter menus shown on the left + filter_menus=FilterMenus( + options={ + 'material': FilterMenu(label="Material"), + } + ), + # Controls the default dashboard shown in the search interface + dashboard={ + 'widgets': [ + { + 'type': 'histogram', + 'showinput': False, + 'autorange': True, + 'nbins': 30, + 'scale': 'linear', + 'quantity': 'results.material.n_elements', + 'layout': { + 'lg': { + 'minH': 3, + 'minW': 3, + 'h': 4, + 'w': 12, + 'y': 0, + 'x': 0 + } + } + } + ] + } + ) +) +``` + +### Loading custom quantity definitions into an app + +By default, none of the quantities from custom schemas are available in an app, and they need to be explicitly added. Each app may define additional **filters** that should be enabled in it. Filters have a special meaning in the app context: filters are pieces of (meta)info than can be queried in the search interface of the app, but also targeted in the rest of the app configuration as explained below in. + +!!! note + + Note that not all of the quantities from a custom schema can be exposed as + filters. At the moment we only support targeting **scalar** quantities from + custom schemas. + +Each schema has a unique name within the NOMAD ecosystem, which is needed to +target them in the configuration. The name depends on the resource in which the +schema is defined in: + +- Python schemas are identified by the python path for the class that inherits +from `Schema`. For example, if you have a python package called `nomad_example`, +which has a subpackage called `schema_packages`, containing a module called `myschema.py`, which contains the class `MySchema`, then +the schema name will be `nomad_example.schema_packages.myschema.MySchema`. +- YAML schemas are identified by the entry id of the schema file together with +the name of the section defined in the YAML schema. For example +if you have uploaded a schema YAML file containing a section definition called +`MySchema`, and it has been assigned an `entry_id`, the schema name will be +`entry_id:<entry_id>.MySchema`. + +The quantities from schemas may be included or excluded as filter by using the +[`filters`](#filters) field in the app config. This option supports a +wildcard/glob syntax for including/excluding certain filters. For example, to +include all filters from the Python schema defined in the class +`nomad_example.schema_packages.myschema.MySchema`, you could use: + +```yaml +myapp: + filters: + include: + - '*#nomad_example.schema_packages.myschema.MySchema' +``` + +The same thing for a YAML schema could be achieved with: + +```yaml +myapp: + filters: + include: + - '*#entry_id:<entry_id>.MySchema' +``` + +Once quantities from a schema are included in an app as filters, they can be targeted in the rest of the app. The app configuration often refers to specific filters to configure parts of the user interface. For example, one could configure the results table to show a new column using one of the schema quantities with: + +```yaml +myapp: + columns: + include: + - 'data.mysection.myquantity#nomad_example.schema_packages.myschema.MySchema' + - 'entry_id' + options: + data.mysection.myquantity#myschema.schema.MySchema: + ... +``` + +The syntax for targeting quantities depends on the resource: + +- For python schemas, you need to provide the path and the python schema name separated +by a hashtag (#), for example `data.mysection.myquantity#nomad_example.schema_packages.myschema.MySchema`. +- For YAML schemas, you need to provide the path and the YAML schema name separated +by a hashtag (#), for example `data.mysection.myquantity#entry_id:<entry_id>.MySchema`. +- Quantities that are common for all NOMAD entries can be targeted by using only +the path without the need for specifying a schema, e.g. `results.material.symmetry.space_group`. + + +## App reference + +{{ pydantic_model('nomad.config.models.ui.App')}} diff --git a/docs/howto/plugins/normalizers.md b/docs/howto/plugins/normalizers.md new file mode 100644 index 0000000000000000000000000000000000000000..890ea6a91902d9ae872e199e938ef92ecd121c12 --- /dev/null +++ b/docs/howto/plugins/normalizers.md @@ -0,0 +1,192 @@ +# How to write a normalizer + +A normalizer takes the archive of an entry as input and manipulates (usually expands) the given archive. This way, a normalizer can add additional sections and quantities based on the information already available in the archive. All normalizers are executed in the order [determined by their `level`](#control-normalizer-execution-order) after parsing, but the normalizer may decide to not do anything based on the entry contents. + +This documentation shows you how to write a plugin entry point for a normaliser. You should read the [documentation on getting started with plugins](./plugins.md) to have a basic understanding of how plugins and plugin entry points work in the NOMAD ecosystem. + +## Getting started + +You can use our [template repository](https://github.com/FAIRmat-NFDI/nomad-plugin-template) to create an initial structure for a plugin containing a normalizer. The relevant part of the repository layout will look something like this: + +```txt +nomad-example + ├── src + │ ├── nomad_example + │ │ ├── normalizers + │ │ │ ├── __init__.py + │ │ │ ├── mynormalizer.py + ├── LICENSE.txt + ├── README.md + └── pyproject.toml +``` + +See the documentation on [plugin development guidelines](./plugins.md#plugin-development-guidelines) for more details on the best development practices for plugins, including linting, testing and documenting. + +## Normalizer entry point + +The entry point defines basic information about your normalizer and is used to automatically load the normalizer code into a NOMAD distribution. It is an instance of a `NormalizerEntryPoint` or its subclass and it contains a `load` method which returns a `nomad.normalizing.Normalizer` instance that will perform the actual normalization. You will learn more about the `Normalizer` class in the next sections. The entry point should be defined in `*/normalizers/__init__.py` like this: + +```python +from pydantic import Field +from nomad.config.models.plugins import NormalizerEntryPoint + + +class MyNormalizerEntryPoint(NormalizerEntryPoint): + + def load(self): + from nomad_example.normalizers.mynormalizer import MyNormalizer + + return MyNormalizer(**self.dict()) + + +mynormalizer = MyNormalizerEntryPoint( + name = 'MyNormalizer', + description = 'My custom normalizer.', +) +``` + +Here you can see that a new subclass of `NormalizerEntryPoint` was defined. In this new class you can override the `load` method to determine how the `Normalizer` class is instantiated, but you can also extend the `NormalizerEntryPoint` model to add new configurable parameters for this normalizer as explained [here](./plugins.md#extending-and-using-the-entry-point). + +We also instantiate an object `mynormalizer` from the new subclass. This is the final entry point instance in which you specify the default parameterization and other details about the normalizer. In the reference you can see all of the available [configuration options for a `NormalizerEntryPoint`](../../reference/plugins.md#normalizerentrypoint). + + +The entry point instance should then be added to the `[project.entry-points.'nomad.plugin']` table in `pyproject.toml` in order for the normalizer to be automatically detected: + +```toml +[project.entry-points.'nomad.plugin'] +mynormalizer = "nomad_example.normalizers:mynormalizer" +``` + +## `Normalizer` class + +The resource returned by a normalizer entry point must be an instance of a `nomad.normalizing.Normalizer` class. This normalizer definition should be contained in a separate file (e.g. `*/normalizer/mynormalizer.py`) and could look like this: + +```python +from typing import Dict + +from nomad.datamodel import EntryArchive +from nomad.normalizing import Normalizer + + +class MyNormalizer(Normalizer): + def normalize( + self, + archive: EntryArchive, + logger=None, + ) -> None: + logger.info('MyNormalizer called') +``` + +The minimal requirement is that your class has a `normalize` function, which as input takes: + + - `archive`: The [`EntryArchive` object](../../reference/glossary.md#archive) in which the normalization results will be stored + - `logger`: Logger that you can use to log normalization events into + +## `SystemBasedNormalizer` class + +`SystemBasedNormalizer` is a special base class for normalizing systems that allows to run the normalization on all (or only the resulting) `representative` systems: + +```python +from nomad.normalizing import SystemBasedNormalizer +from nomad.atomutils import get_volume + +class UnitCellVolumeNormalizer(SystemBasedNormalizer): + def _normalize_system(self, system, is_representative): + system.unit_cell_volume = get_volume(system.lattice_vectors.magnitude) +``` + +For `SystemBasedNormalizer`, we implement the `_normalize_system` method. The parameter `is_representative` will be true for the `representative` systems. The representative system refers to the system that corresponds to the calculation result. It is determined by scanning the archive sections starting with `workflow2` until the system fitting the criterion is found. For example, it refers to the final step in a geometry optimization or other workflow. + +Of course, if you add new information to the archive, this also needs to be defined in the schema (see [How-to extend the schema](schema_packages.md#extending-existing-sections)). For example you could extend the section system with a special system definition that extends the existing section system definition: + +```python +import numpy as np +from nomad.datamodel.metainfo import runschema +from nomad.metainfo import Section, Quantity + +class UnitCellVolumeSystem(runschema.system.System): + m_def = Section(extends_base_section=True) + unit_cell_volume = Quantity(np.dtype(np.float64), unit='m^3') +``` + +Here, we used the schema definition for the `run` section defined in this [plugin](schema_packages.md#schema-packages-developed-by-fairmat). + +## Control normalizer execution order + +`NormalizerEntryPoints` have an attribute `level`, which you can use to control their execution order. Normalizers are executed in order from lowest level to highest level. The default level for normalizers is `0`, but this can be changed per installation using `nomad.yaml`: + +```yaml +plugins: + entry_points: + options: + "nomad_example.normalizers:mynormalizer1": + level: 1 + "nomad_example.normalizers:mynormalizer2": + level: 2 +``` + +## Running the normalizer + +If you have the plugin package and `nomad-lab` installed in your Python environment, you can run the normalization as a part of the parsing process using the NOMAD CLI: + +```shell +nomad parse <filepath> --show-archive +``` + +The output will return the final archive in JSON format. + +Normalization can also be run within a python script (or Jupyter notebook), e.g., to facilate debugging, with the following code: + +```python +from nomad.datamodel import EntryArchive +from nomad_example.normalizers.mynormalizer import MyNormalizer +import logging + +p = MyNormalizer() +a = EntryArchive() +p.normalize(a, logger=logging.getLogger()) + +print(a.m_to_dict()) +``` + +## Normalizers developed by FAIRmat + +The following is a list of plugins containing normalizers developed by FAIRmat: + +| Normalizer class | Path/Project url | +| ---------------------------- | --------------------------------------------------------------------------- | +| SimulationWorkflowNormalizer | <https://github.com/nomad-coe/nomad-schema-plugin-simulation-workflow.git> | +| SystemNormalizer | <https://github.com/nomad-coe/nomad-normalizer-plugin-system.git> | +| SoapNormalizer | <https://github.com/nomad-coe/nomad-normalizer-plugin-soap.git> | +| SpectraNormalizer | <https://github.com/nomad-coe/nomad-normalizer-plugin-spectra.git> | +| DosNormalizer | <https://github.com/nomad-coe/nomad-normalizer-plugin-dos.git> | +| BandStructureNormalizer | <https://github.com/nomad-coe/nomad-normalizer-plugin-bandstructure.git> | + +To refine an existing normalizer, you should install it via the `nomad-lab` package: + +```shell +pip install nomad-lab +``` + +Clone the normalizer project: + +```shell +git clone <normalizer-project-url> +cd <normalizer-dir> +``` + +Either remove the installed normalizer and `pip install` the cloned version: + +```shell +rm -rf <path-to-your-python-env>/lib/python3.9/site-packages/<normalizer-module-name> +pip install -e . +``` + +Or set `PYTHONPATH` so that the cloned code takes precedence over the installed code: + +```shell +PYTHONPATH=. nomad parse <path-to-example-file> +``` + +Alternatively, you can also do a full [developer setup](../develop/setup.md) of the NOMAD infrastructure and +enhance the normalizer there. \ No newline at end of file diff --git a/docs/howto/customization/parsers.md b/docs/howto/plugins/parsers.md similarity index 62% rename from docs/howto/customization/parsers.md rename to docs/howto/plugins/parsers.md index 37422258c157689eea103dea62dbe73c64c2d1c9..aa2b3462e8ff2789e09261af30b64cd98a979668 100644 --- a/docs/howto/customization/parsers.md +++ b/docs/howto/plugins/parsers.md @@ -1,87 +1,133 @@ # How to write a parser -NOMAD uses parsers to convert raw code input and output files into NOMAD's common archive -format. This is the documentation on how to develop such a parser. +NOMAD uses parsers to automatically extract information from raw files and output that information into structured [archives](../../reference/glossary.md#archive). Parsers can decide which files act upon based on the filename, mime type or file contents and can also decide into which schema the information should be populated into. -## Getting started - -We have prepared an example parser in a github repository to learn how to write parsers. To explore the example, you can clone the project with the command: - -```shell -git clone https://github.com/nomad-coe/nomad-parser-example.git --branch hello-world -``` +This documentation shows you how to write a plugin entry point for a parser. You should read the [documentation on getting started with plugins](./plugins.md) to have a basic understanding of how plugins and plugin entry points work in the NOMAD ecosystem. -Alternatively, fork the example project on Github and create your own parser. +## Getting started -Once you clone the example project, the file structure is: +You can use our [template repository](https://github.com/FAIRmat-NFDI/nomad-plugin-template) to create an initial structure for a plugin containing a parser. The relevant part of the repository layout will look something like this: ```txt -example - ├── exampleparser - │ ├── __init__.py - │ ├── __main__.py - │ ├── metainfo.py - │ └── parser.py +nomad-example + ├── src + │ ├── nomad_example + │ │ ├── parsers + │ │ │ ├── __init__.py + │ │ │ ├── myparser.py ├── LICENSE.txt ├── README.md └── pyproject.toml ``` -Create a virtual environment (**make sure to use Python 3.9**) and install the new parser: +See the documentation on [plugin development guidelines](./plugins.md#plugin-development-guidelines) for more details on the best development practices for plugin, including linting, testing and documenting. -```shell -cd nomad-parser-example/ -python3.9 -m venv .pyenv -source .pyenv/bin/activate -pip install --upgrade pip -pip install -e . +## Parser entry point + +The entry point defines basic information about your parser and is used to automatically load the parser code into a NOMAD distribution. It is an instance of a `ParserEntryPoint` or its subclass and it contains a `load` method which returns a `nomad.parsing.Parser` instance that will perform the actual parsing. You will learn more about the `Parser` class in the next sections. The entry point should be defined in `*/parsers/__init__.py` like this: + +```python +from pydantic import Field +from nomad.config.models.plugins import ParserEntryPoint + + +class MyParserEntryPoint(ParserEntryPoint): + + def load(self): + from nomad_example.parsers.myparser import MyParser + + return MyParser(**self.dict()) + + +myparser = MyParserEntryPoint( + name = 'MyParser', + description = 'My custom parser.', + mainfile_name_re = '.*\.myparser', +) ``` -The last command will install both the `nomad-lab` Python package and the new parser. -The `-e` parameter installs the parser in *development* mode, which allows you to change the sources without having to reinstall. +Here you can see that a new subclass of `ParserEntryPoint` was defined. In this new class you can override the `load` method to determine how the `Parser` class is instantiated, but you can also extend the `ParserEntryPoint` model to add new configurable parameters for this parser as explained [here](./plugins.md#extending-and-using-the-entry-point). + +We also instantiate an object `myparser` from the new subclass. This is the final entry point instance in which you specify the default parameterization and other details about the parser. In the reference you can see all of the available [configuration options for a `ParserEntryPoint`](../../reference/plugins.md#parserentrypoint). + +The entry point instance should then be added to the `[project.entry-points.'nomad.plugin']` table in `pyproject.toml` in order for the parser to be automatically detected: + +```toml +[project.entry-points.'nomad.plugin'] +myparser = "nomad_example.parsers:myparser" +``` -The main parser class is found in `exampleparser/parser.py`: +## `Parser` class + +The resource returned by a parser entry point must be an instance of a `nomad.parsing.Parser` class. In many cases you will, however, want to use the already existing `nomad.parsing.MatchingParser` subclass that takes care of the file matching process for you. This parser definition should be contained in a separate file (e.g. `*/parsers/myparser.py`) and could look like this: ```python -class ExampleParser: - def parse(self, mainfile: str, archive: EntryArchive, logger): - # Log a hello world, just to get us started. TODO remove from an actual parser. - logger.info('Hello World') +from typing import Dict - archive.workflow2 = Workflow(name='EXAMPLE') +from nomad.datamodel import EntryArchive +from nomad.parsing import MatchingParser + + +class MyParser(MatchingParser): + def parse( + self, + mainfile: str, + archive: EntryArchive, + logger=None, + child_archives: Dict[str, EntryArchive] = None, + ) -> None: + logger.info('MyParser called') ``` -A parser is a simple Python module containing a single class. The convention -for the class name is `<Name>Parser` where `Name` is the file type or code name -e.g. `VASPParser`. It has a main function, `parse` which takes the path to the mainfile -and an empty [`EntryArchive` object](../../reference/glossary.md#archive) as input to be -populated with the parsed quantities. The development of parsers is up to each user, and -will heavily depend on what the user wants to parse. In the simple example above, we created -a logger info entry and populated the archive with a root section called [`Workflow`](../../explanation/data.md#archive-files-a-shared-entry-structure). We then set the workflow name to `EXAMPLE`. +If you are using the `MatchingParser` interface, the minimal requirement is +that your class has a `parse` function, which will take as input: -You can run the parser (see the included `__main__.py`) with the path to the file to be -parsed as an argument: + - `mainfile`: Filepath to a raw file that the parser should open and run on + - `archive`: The [`EntryArchive` object](../../reference/glossary.md#archive) in which the parsing results will be stored + - `logger`: Logger that you can use to log parsing events into -```shell -python -m exampleparser tests/data/example.out +Note here that if using `MatchingParser`, the process of identifying which files the `parse` method is run against is take care of by passing in the required parameters to the instance in the `load` mehod. In the previous section, the `load` method looked something like this: + +```python + def load(self): + from nomad_example.parsers.myparser import MyParser + + return MyParser(**self.dict()) +``` + +There we are passing all of the entry configuration options to the parser instance, including things like `mainfile_name_re` and `mainfile_contents_re`. The `MatchingParser` constructor uses these parameters to set up the file matching appropriately. If you wish to take full control of the file matching process, you can use the `nomad.parsing.Parser` class and override the `is_mainfile` function. + +## Match your raw file + +If you are using the `MatchingParser` interface you can configure which files +are matched directly in the `ParserEntryPoint`. For example to match only certain file extensions and file contents, you can use the `mainfile_name_re` and `mainfile_contents_re` fields: + +```python +myparser = MyParserEntryPoint( + name = 'MyParser', + description = 'My custom parser.', + mainfile_name_re = '.*\.myparser', + mainfile_contents_re = '\s*\n\s*HELLO WORLD', +) ``` -The output show the log entry and the minimal archive with a `workflow2` section with the -quantity `name` as in the following: +You can find all of the available matching criteria in the [`ParserEntryPoint` reference](../../reference/plugins.md#parserentrypoint) + +## Running the parser + +If you have the plugin package and `nomad-lab` installed in your Python environment, you can run the parser against a file using the NOMAD CLI: -```json -{ - "workflow2": { - "name": "EXAMPLE" - } -} +```shell +nomad parse <filepath> --show-archive ``` +The output will return the final archive in JSON format. + Parsing can also be run within a python script (or Jupyter notebook), e.g., to facilate debugging, with the following code: ```python -from exampleparser import ExampleParser from nomad.datamodel import EntryArchive +from nomad_example.parsers.myparser import MyParser import logging p = ExampleParser() @@ -89,9 +135,7 @@ a = EntryArchive() p.parse('tests/data/example.out', a, logger=logging.getLogger()) print(a.m_to_dict()) -{'workflow2': {'name': 'EXAMPLE'}} ``` -<!-- TODO Add some tips for working with archives in python somewhere --> ## Parsing text files @@ -227,8 +271,8 @@ mainfile_parser.calculation ``` The next step is to write the parsed data into the NOMAD archive. We can use one of the -[predefined schemas plugins](schemas.md#pre-defined-schemas-in-nomad) in NOMAD. -However, to better illustrate the connection between a parser and a schema we will define our own schema in this example (See [How to write a schema in python](./schemas.md#writing-schemas-in-python-compared-to-yaml-schemas) for additional information on this topic). We define a root section called `Simulation` containing two subsections, `Model` and `Output`. The definitions are found in `exampleparser/metainfo/example.py`: +[predefined plugins containing schema packages](schema_packages.md#schema-packages-developed-by-fairmat) in NOMAD. +However, to better illustrate the connection between a parser and a schema we will define our own schema in this example (See [How to write a schema in python](./schema_packages.md#writing-schemas-in-python-compared-to-yaml-schemas) for additional information on this topic). We define a root section called `Simulation` containing two subsections, `Model` and `Output`. The definitions are found in `exampleparser/metainfo/example.py`: ```python class Model(ArchiveSection): @@ -281,7 +325,7 @@ class Simulation(ArchiveSection): output = SubSection(sub_section=Output, repeats=True) ``` -Each of the classes innherit from the base class `ArchiveSection`. This is the abstract class used in NOMAD to define sections and subsections in a schema. The `Model` section is used to store the `sites` and `lattice/cell` information, while the `Output` section is used to store the `energy` quantity. +Each of the classes inherit from the base class `ArchiveSection`. This is the abstract class used in NOMAD to define sections and subsections in a schema. The `Model` section is used to store the `sites` and `lattice/cell` information, while the `Output` section is used to store the `energy` quantity. Each of the classes that we defined is a sub-class of `ArchiveSection`. This is required in order to assign these sections to the `data` section of the NOMAD archive. @@ -341,35 +385,7 @@ class ExampleWorkflow(Workflow): ) ``` <!-- TODO remove x_ notation in the future --> -This is the approach for domain-specific schemas such as for [simulation workflows](https://github.com/nomad-coe/nomad-schema-plugin-simulation-workflow.git). Refer to [how to extend schemas](schemas.md#extending-existing-sections). - - -## Testing a parser - -Good software development practice involves adding tests during parser development in order to catch bugs and extend the maintainaibility of the parser in the future. For this purpose, we use the Python unit test framework `pytest`. A typical test would take one example file, -parse it, and check assertions about the output: - -<!-- TODO suggest a more compartmentalized testing framework --> -```python -def test_example(): - parser = ExampleParser() - archive = EntryArchive() - parser.parse('tests/data/example.out', archive, logging) - - sim = archive.data - assert len(sim.model) == 2 - assert len(sim.output) == 2 - assert archive.workflow2.x_example_magic_value == 42 -``` - -Run all the tests in the `tests/` directory with: - -```shell -python -m pytest -svx tests -``` - -You should follow good [python testing best practices](https://realpython.com/python-testing/). -<!-- TODO add some specific guidance here! --> +This is the approach for domain-specific schemas such as for [simulation workflows](https://github.com/nomad-coe/nomad-schema-plugin-simulation-workflow.git). Refer to [how to extend schemas](schema_packages.md#extending-existing-sections). ## Other FileParser classes Aside from `TextParser`, other `FileParser` classes are also defined. These include: @@ -381,59 +397,9 @@ the parser takes in an XPath-style key to access individual quantities. By defau automatic data type conversion is performed, which can be switched off by setting `convert=False`. -## Adding the parser to NOMAD -NOMAD has to manage multiple parsers and must decide during processing which parsers to run -on which files. To accomplish this, specific parser attributes are matched to a -file. These are specified by interfacing the parser with `MatchingParser`. This can be achieved -by either 1. adding it as a plugin (`nomad.config.__init__.py::plugins`) or 2. directly adding it to the list of parsers (`nomad.parsing.parsers.py::parsers`), -the former being the preferred route. See [How to add a plugin to your NOMAD](plugins.md#add-a-plugin-to-your-nomad) -to learn more. - -```python -MatchingParserInterface( - 'parsers/example', - mainfile_contents_re=(r'^\s*#\s*This is example output'), - mainfile_mime_re=r'(application/.*)|(text/.*)', - supported_compressions=["gz", "bz2", "xz"], - mainfile_alternative=False, - mainfile_contents_dict={'program': {'version': '1', 'name': 'EXAMPLE'}}) -``` - -- `mainfile_mime_re`: A regular expression on the MIME type of files. The parser is run - only on files with matching MIME type. The MIME type is *guessed* with libmagic. - -- `mainfile_contents_re`: A regular expression that is applied to the first 4k characters of a file. - The parser is run only on files where this matches. - -- `mainfile_name_re`: A regular expression that can be used to match against the name and - path of the file. - -- `supported compressions`: A list of [`gz`, `bz2`] if the parser supports compressed - files. - -- `mainfile_alternative`: If `True`, a file is `mainfile` unless another file in the same - directory matches `mainfile_name_re`. - -- `mainfile_contents_dict`: A dictionary to match the contents of the file. If provided, - it will load the file and match the value of the key(s) provided. One can also specify - the keys that should be present by using the tags `__has_key`, `__has_all_keys` - and `__has_only_keys`. For example, one can have - `{'program': {'__has_all_keys': ['version', 'name']}}` to specify that `version` and `name` - must be present in the file to be matched. - -The NOMAD infrastructure keeps a list of [Supported Parsers](../../reference/parsers.md#supported-parsers) in -`nomad/parsing/parsers.py::parsers`. These parsers are considered in the order they -appear in the list. The first matching parser is used to parse a given file. +## Parsers developed by FAIRmat -Once the parser is successfully installed and added, it will also become available through the NOMAD [Command Line Interface (CLI)](../../reference/cli.md): - -```shell -nomad parse tests/data/example.out -``` - -## Developing an existing parser - -A number of parsers are constantly being developed in NOMAD. +The following is a list of plugins containin parsers developed by FAIRmat: | Description | Project url | | ---------------------------- | ------------------------------------------------------ | @@ -470,11 +436,3 @@ PYTHONPATH=. nomad parse <path-to-example-file> Alternatively, you can also do a full [developer setup](../develop/setup.md) of the NOMAD infrastructure and enhance the parser there. - -## Developing a Parser plugin -In order to make a parser into a [plugin](plugins.md#add-a-plugin-to-your-nomad), one needs to provide the parser metadata. We provide a separate project for the example parser as plugin. -Fork and clone the [parser example project](https://github.com/nomad-coe/nomad-parser-plugin-example){:target="_blank"} as described in [How to install a plugin](plugins.md). - -## Parser plugin metadata -{{pydantic_model('nomad.config.models.plugins.Parser', hide=['code_name','code_category','code_homepage','metadata'])}} - diff --git a/docs/howto/plugins/plugins.md b/docs/howto/plugins/plugins.md new file mode 100644 index 0000000000000000000000000000000000000000..e9bdb7190b2467e0b4a919c29a901035050de30b --- /dev/null +++ b/docs/howto/plugins/plugins.md @@ -0,0 +1,199 @@ +# Get started with plugins + +The main way to customize a NOMAD installation is through the use of **plugins**. A NOMAD Plugin is a Git repository that contains a Python package that an administrator can install into a NOMAD deployment to add custom features. This page contains the basics of how to create, develop and publish a NOMAD Plugin. + +## Plugin anatomy + +!!! tip + We provide a [template repository](https://github.com/FAIRmat-NFDI/nomad-plugin-template) which you can use to create the initial plugin layout for you. + +Plugin Git repositories should roughly follow this layout: + +```txt +├── nomad-example +│ ├── src +| │ ├── nomad_example +| | │ ├── apps +| | │ │ ├── __init__.py +| | │ ├── normalizers +| | │ │ ├── mynormalizer.py +| | │ │ ├── __init__.py +| | │ ├── schema_packages +| | │ │ ├── mypackage.py +| | │ │ ├── __init__.py +| | │ ├── parsers +| | │ │ ├── myparser.py +| | │ │ ├── __init__.py +│ ├── docs +│ ├── tests +│ ├── pyproject.toml +│ ├── LICENSE.txt +│ ├── README.md +``` + +We suggest using the following convention for naming the repository name and the plugin package: + + - repository name: `nomad-<plugin name>` + - package name: `nomad_<plugin name>` + +In the folder structure you can see that a single plugin can contain multiple types of customizations: apps, parsers, schema packages and normalizers. These are called a **plugin entry points** and you will learn more about them next. + +## Plugin entry points + +Plugin entry points represent different types of customizations that can be added to a NOMAD installation. The following plugin entry point types are currently supported: + + - [Apps](./apps.md) + - [Normalizers](./parsers.md) + - [Parsers](./parsers.md) + - [Schema packages](./schema_packages.md) + +Entry points contain **configuration**, but also a separate **resource**, which should live in a separate Python module. This split enables lazy-loading: the configuration can be loaded immediately, while the resource is loaded later when/if it is required. This can significantly improve startup times, as long as all time-consuming initializations are performed only when loading the resource. This split also helps to avoid cyclical imports between the plugin code and the `nomad-lab` package. + +For example the entry point instance for a parser is contained in `.../parsers/__init__.py` and it contains e.g. the name, version and any additional entry point-specific parameters that control its behaviour. The entry point has a `load` method than can be called lazily to return the resource, which is a `Parser` instance defined in `.../parsers/myparser.py`. + +In `pyproject.toml` you can expose plugin entry points for automatic discovery. E.g. to expose an app and a package, you would add the following to `pyproject.toml`: + +```toml +[project.entry-points.'nomad.plugin'] +myapp = "nomad_example.parsers:myapp" +mypackage = "nomad_example.schema_packages:mypackage" +``` + +Here it is important to use the `nomad.plugin` group name in the `project.entry-points` header. The plugin name used on the left side (`mypackage`) can be arbitrary, what matters is that the key (`"nomad_example.schema_packages:mypackage"`) is a path pointing to a plugin entry point instance inside the python code. This unique key will be used to identify the plugin entry point when e.g. accessing it to read some of it's configuration values. + +You can read more about how to write different types of entry points in their dedicated documentation pages or learn more about the [Python entry point mechanism](https://setuptools.pypa.io/en/latest/userguide/entry_point.html). + +### Controlling loading of plugin entry points + +By default, plugin entry points are automatically loaded, and as an administrator you only need to install the Python package. You can, however, control which entry points to load by explicitly including/excluding them in your `nomad.yaml`. For example, if a plugin has the following `pyproject.toml`: + +```toml +[project.entry-points.'nomad.plugin'] +myparser = "nomad_example.parsers:myparser" +``` + +You could disable the parser entry point in your `nomad.yaml` with: + +```yaml +plugins: + entry_points: + exclude: ["nomad_plugin.parsers:myparser"] +``` + +### Extending and using the entry point + +The plugin entry point is an instance of a [`pydantic`](https://docs.pydantic.dev/1.10/) model. This base model may already contain entry point-specific fields (such as the file extensions that a parser plugin will match) but it is also possible to extend this model to define additional fields that control your plugin behaviour. + +To specify new configuration options, you can add new `pydantic` fields to the subclass. For example, if we wanted to add a new configuration option for a parser, we could do the following: + +```python +from pydantic import Field +from nomad.config.models.plugins import ParserEntryPoint + + +class MyParserEntryPoint(ParserEntryPoint): + parameter: int = Field(0, description='Config parameter for this parser.') +``` + +where we have defined a new subclass of `ParserEntryPoint` and added a new configuration field `parameter`. The plugin users can then control these settings in their `nomad.yaml` using `plugins.entry_points.options`: + +```yaml +plugins: + entry_points: + options: + "nomad_example.parsers:myparser": + parameter: 47 +``` + +Note that the model will also validate the values coming from `nomad.yaml`, and you should utilize the validation mechanisms of `pydantic` to provide users with helpful messages about invalid configuration. + +In your code, you can then access the whole entry point by loading it with `config.get_plugin_entry_point`: + +```python +from nomad.config import config + +configuration = config.get_plugin_entry_point('nomad_example.parsers:myparser') +print(f'The parser parameter is: {configuration.parameter}') +``` + +## Plugin development guidelines + +### Linting and formatting + +While developing NOMAD plugins, we highly recommend using a Python linter, such as [Ruff](https://docs.astral.sh/ruff), to analyze and enforce coding standards in your plugin projects. This also ensures smoother integration and collaboration. If you have used our [template repository](https://github.com/FAIRmat-NFDI/nomad-plugin-template), you will automatically have `ruff` defined as a development dependency with suitable defaults set in `pyproject.toml` together with a GitHub actions that runs the linting and formatting checks on each push to the Git repository. + +### Testing + +For testing, you should use [pytest](https://docs.pytest.org/), and a folder structure that mimics the package layout with test modules named after the tested module. For example, if you are developing a parser in `myparser.py`, the test folder structure should look like this: + +```txt +├── nomad-example-plugin +│ ├── src +| │ ├── nomad_example +| | │ ├── parsers +| | │ │ ├── myparser.py +| | │ │ ├── __init__.py +│ ├── tests +| │ ├── parsers +| | │ ├── test_myparser.py +| | │ ├── conftest.py +| │ ├── conftest.py +``` + +Any shared test utilities (such as `pytest` fixtures) should live in `conftest.py` modules placed at the appropriate level in the folder hierarchy, i.e. utilities dealing with parsers would live in `tests/parsers/conftest.py`, while root level utilities would live in `tests/conftest.py`. If you have used our [template repository](https://github.com/FAIRmat-NFDI/nomad-plugin-template), you will automatically have an initial test folder structure, `pytest` defined as a development dependency in `pyproject.toml` and a GitHub action that runs the test suite on each push to the Git repository. + +In the `pytest` framework, test cases are created by defining functions with the `test_` prefix, which perform assertions. A typical test case could look like this: + +```python +def test_parse_file(): + parser = MyParser() + archive = EntryArchive() + parser.parse('tests/data/example.out', archive, logging) + + sim = archive.data + assert len(sim.model) == 2 + assert len(sim.output) == 2 + assert archive.workflow2.x_example_magic_value == 42 +``` + +You can run all the tests in the `tests/` directory with: + +```shell +python -m pytest -svx tests +``` + +### Documentation + +As your plugin matures, you should also think about documenting its usage. We recommend using [`mkdocs`](https://www.mkdocs.org/) to create your documentation as a set of markdown files. If you have used our [template repository](https://github.com/FAIRmat-NFDI/nomad-plugin-template), you will automatically have an initial documentation folder structure, `mkdocs` defined as a development dependency in `pyproject.toml` and a GitHub action that builds the docs to a separate `gh-pages` branch each push to the Git repository. Note that if you wish to host the documentation using [GitHub pages](https://pages.github.com/), you need to [enable](https://docs.github.com/en/pages/getting-started-with-github-pages/configuring-a-publishing-source-for-your-github-pages-site#publishing-from-a-branch) this in the repository settings. + +## Publishing a plugin + +!!! warning "Attention" + The standard processes for publishing plugins and using plugins from other developers are still being worked out. The "best" practices mentioned in the following are preliminary. We aim to set up a dedicated plugin registry that allows you to publish your plugin and find plugins from others. + +### GitHub repository + +The simplest way to publish a plugin is to have it live in a publicly shared Git +repository. The package can then be installed with: + +```sh +pip install git+https://<repository_url> +``` + +!!! note + If you develop a plugin in the context of [FAIRmat](https://github.com/fairmat-nfdi) or the [NOMAD CoE](https://github.com/nomad-coe), put your plugin repositories in the corresponding GitHub organization. + +### PyPI/pip package + +You may additionally publish the plugin package in PyPI. Learn from the PyPI documentation how to [create a package for PyPI](https://packaging.python.org/en/latest/tutorials/packaging-projects/){:target="_blank"}. We recommend to use the `pyproject.toml`-based approach. + +The PyPI documentation provides further information about how to [publish a package to PyPI](https://packaging.python.org/en/latest/tutorials/packaging-projects/#uploading-the-distribution-archives){:target="_blank"}. If you have access to the MPCDF GitLab and NOMAD's presence there, you can also +use the `nomad-FAIR` package registry: + +``` +pip install twine +twine upload \ + -u <username> -p <password> \ + --repository-url https://gitlab.mpcdf.mpg.de/api/v4/projects/2187/packages/pypi \ + dist/nomad-example-plugin-*.tar.gz +``` diff --git a/docs/howto/customization/schemas.md b/docs/howto/plugins/schema_packages.md similarity index 69% rename from docs/howto/customization/schemas.md rename to docs/howto/plugins/schema_packages.md index 56b5d27550eb72922901999ae0d4ae7750610787..98e198e660952eede76b988eb4600f6035815134 100644 --- a/docs/howto/customization/schemas.md +++ b/docs/howto/plugins/schema_packages.md @@ -1,19 +1,72 @@ -# How to write a schema plugin +# How to write a schema package + +Schema packages are used to define and distribute custom data definitions that can be used within NOMAD. These schema packages typically contain [schemas](../../reference/glossary.md#schema) that users can select to instantiate manually filled entries using our ELN functionality, or that parsers when organizing data they extract from files. Schema packages may also contain more abstract base classes that other schema packages use. + +This documentation shows you how to write a plugin entry point for a schema package. You should read the [documentation on getting started with plugins](./plugins.md) to have a basic understanding of how plugins and plugin entry points work in the NOMAD ecosystem. ## Getting started -Fork and clone the [schema example project](https://github.com/nomad-coe/nomad-schema-plugin-example){:target="_blank"} as described in [How-to mount a plugin](../oasis/plugins_install.md). +You can use our [template repository](https://github.com/FAIRmat-NFDI/nomad-plugin-template) to create an initial structure for a plugin containing a schema package. The relevant part of the repository layout will look something like this: + +```txt +nomad-example + ├── src + │ ├── nomad_example + │ │ ├── schema_packages + │ │ │ ├── __init__.py + │ │ │ ├── mypackage.py + ├── LICENSE.txt + ├── README.md + └── pyproject.toml +``` + +See the documentation on [plugin development guidelines](./plugins.md#plugin-development-guidelines) for more details on the best development practices for plugins, including linting, testing and documenting. + +## Schema package entry point + +The entry point defines basic information about your schema package and is used to automatically load it into a NOMAD distribution. It is an instance of a `SchemaPackageEntryPoint` or its subclass and it contains a `load` method which returns a `nomad.metainfo.SchemaPackage` instance that contains section and schema definitions. You will learn more about the `SchemaPackage` class in the next sections. The entry point should be defined in `*/schema_packages/__init__.py` like this: + +```python +from pydantic import Field +from nomad.config.models.plugins import SchemaPackageEntryPoint + + +class MySchemaPackageEntryPoint(SchemaPackageEntryPoint): + + def load(self): + from nomad_example.schema_packages.mypackage import m_package + + return m_package -## Writing schemas in Python compared to YAML schemas -In this [guide](basics.md), we explain how to write and upload schemas in the `.archive.yaml` format. Writing and uploading such YAML schemas is a good way for NOMAD users to add schemas. But it has limitations. As a NOMAD developer or Oasis administrator you can add Python schemas to NOMAD. All built-in NOMAD schemas (e.g. for electronic structure code data) are written in Python and are part of the NOMAD sources (`nomad.datamodel.metainfo.*`). +mypackage = MySchemaPackageEntryPoint( + name = 'MyPackage', + description = 'My custom schema package.', +) +``` + +Here you can see that a new subclass of `SchemaPackageEntryPoint` was defined. In this new class you can override the `load` method to determine how the `SchemaPackage` class is loaded, but you can also extend the `SchemaPackageEntryPoint` model to add new configurable parameters for this schema package as explained [here](./plugins.md#extending-and-using-the-entry-point). + +We also instantiate an object `mypackage` from the new subclass. This is the final entry point instance in which you specify the default parameterization and other details about the schema package. In the reference you can see all of the available [configuration options for a `SchemaPackageEntryPoint`](../../reference/plugins.md#schemapackageentrypoint). + +The entry point instance should then be added to the `[project.entry-points.'nomad.plugin']` table in `pyproject.toml` in order for it to be automatically detected: + +```toml +[project.entry-points.'nomad.plugin'] +mypackage = "nomad_example.schema_packages:mypackage" +``` -There is a 1-1 translation between Python schemas (written in classes) and YAML (or JSON) schemas (written in objects). Both use the same fundamental concepts, like *section*, *quantity*, or *subsection*, introduced in [YAML schemas](basics.md). +## `SchemaPackage` class -## Starting example +The `load`-method of a schema package entry point returns an instance of a `nomad.metainfo.SchemaPackage` class. This definition should be contained in a separate file (e.g. `*/schema_packages/mypackage.py`) and could look like this: ```python -from nomad.metainfo import MSection, Quantity, SubSection, Units +from nomad.datamodel.data import Schema +from nomad.datamodel.metainfo.annotations import ELNAnnotation, ELNComponentEnum +from nomad.metainfo import SchemaPackage, Quantity, MSection + +m_package = SchemaPackage() + class System(MSection): ''' @@ -23,30 +76,29 @@ class System(MSection): n_atoms = Quantity( type=int, description=''' - A Defines the number of atoms in the system. + Defines the number of atoms in the system. ''') atom_labels = Quantity( type=MEnum(ase.data.chemical_symbols), shape['n_atoms']) - atom_positions = Quantity(type=float, shape=['n_atoms', 3], unit=Units.m) - simulation_cell = Quantity(type=float, shape=[3, 3], unit=Units.m) + atom_positions = Quantity(type=float, shape=['n_atoms', 3], unit='angstrom') + simulation_cell = Quantity(type=float, shape=[3, 3], unit='angstrom') pbc = Quantity(type=bool, shape=[3]) -class Run(MSection): - section_system = SubSection(sub_section=System, repeats=True) + +class Simulation(Schema): + system = SubSection(sub_section=System, repeats=True) + +m_package.__init_metainfo__() ``` -We define a simple metainfo schema with two *sections* called `System` and `Run`. -Each section can have two types of properties: *quantities* and *subsections*. Sections and their properties are defined with -Python classes and their attributes. Each *quantity* defines a piece of data. Basic quantity attributes are `type`, `shape`, -`unit`, and `description`. +Schema packages typically contain one or several [schema](../../reference/glossary.md#schema) definitions, that can the be used to manually create new entries through the ELN functionality, or also by parsers to create instances of this schema fully automatically. All of the definitions contained in the package should be placed between the contructor call (`m_package = SchemaPackage()`) and the initialization (`m_package.__init_metainfo__()`). + +In this basic example we defined two *sections*: `System` and `Simulation`. `System` inherits from most primitive type of section - `MSection` - whereas `Simulation` is defined as a subclass of `Schema` which makes it possible to use this as the root section of an entry. Each section can have two types of properties: *quantities* and *subsections*. Sections and their properties are defined with Python classes and their attributes. Each *quantity* defines a piece of data. Basic quantity attributes are `type`, `shape`, `unit`, and `description`. -*Subsections* allow the placement of sections within each other, forming containment -hierarchies. Basic subsection attributes are -`sub_section`—a reference to the section definition of the subsection—and `repeats`—determines whether a subsection can be included once or multiple times. +*Subsections* allow the placement of sections within each other, forming containment hierarchies. Basic subsection attributes are `sub_section`—a reference to the section definition of the subsection—and `repeats`—determines whether a subsection can be included once or multiple times. -To use the above-defined schema and create actual data, we have to -instantiate the classes: +To use the above-defined schema and create actual data, we have to instantiate the classes: ```python run = Run() @@ -88,6 +140,44 @@ This will convert the data into JSON: } ``` +## Schema packages: Python vs. YAML + +In this [guide](../customization/basics.md), we explain how to write and upload schema packages in the `.archive.yaml` format. Writing and uploading such YAML schema packages is a good way for NOMAD users to start exploring schemas, but it has limitations. As a NOMAD developer or Oasis administrator you can add Python schema packages to NOMAD. All built-in NOMAD schemas (e.g. for electronic structure code data) are written in Python and are part of the NOMAD sources (`nomad.datamodel.metainfo.*`). + +There is a 1-1 translation between the structure in Python schema packages (written in classes) and YAML (or JSON) schema packages (written in objects). Both use the same fundamental concepts, like *section*, *quantity*, or *subsection*, introduced in [YAML schemas](../customization/basics.md). The main benefit of Python schema packages is the ability to define custom `normalize`-functions. + +`normalize`-functions are attached to sections and are are called when instances of these sections are processed. All files are processed when they are uploaded or changed. To add a `normalize` function, your section has to inherit from `Schema` or `ArchiveSection` which provides the base for this functionality. Here is an example: + +```python +--8<-- "examples/archive/custom_schema.py" +``` + +Make sure to call the `super` implementation properly to support multiple inheritance. In order to control the order by which the `normalize` calls are executed, one can define `normalizer_level` which is set to 0 by default. The normalize functions are always called for any sub section before the parent section. However, the order for any sections on the same level will be from low values of `normalizer_level` to high. + +If we parse an archive like this: + +```yaml +--8<-- "examples/archive/custom_data.archive.yaml" +``` + +we will get a final normalized archive that contains our data like this: + +```json +{ + "data": { + "m_def": "examples.archive.custom_schema.SampleDatabase", + "samples": [ + { + "added_date": "2022-06-18T00:00:00+00:00", + "formula": "NaCl", + "sample_id": "2022-06-18 00:00:00+00:00--NaCl" + } + ] + } +} +``` + + ## Definitions The following describes in detail the schema language for the NOMAD Metainfo and how it is expressed in Python. @@ -226,19 +316,6 @@ class CategoryName(MCategory): m_def = Category(links=['http://further.explanation.eu'], categories=[ParentCategory]) ``` -### Packages - -Metainfo packages correspond to Python packages. Typically your metainfo Python files should follow this pattern: -```python -from nomad.metainfo import Package - -m_package = Package() - -# Your section classes and categories - -m_package.__init_metainfo__() -``` - ## Adding Python schemas to NOMAD The following describes how to integrate new schema modules into the existing code according @@ -304,7 +381,7 @@ short handle of a code name or other special method prefix. ### Access structured data via API The [API section](../programmatic/api.md#access-archives) demonstrates how to access an Archive, i.e. -retrieve the processed data from a NOAMD entry. This API will give you JSON data likes this: +retrieve the processed data from a NOMAD entry. This API will give you JSON data likes this: ```json title="https://nomad-lab.eu/prod/v1/api/v1/entries/--dLZstNvL_x05wDg2djQmlU_oKn/archive" { @@ -353,7 +430,6 @@ To learn what each key means, you need to look up its definition in the Metainfo {{ metainfo_data() }} - ### Wrap data with Python schema classes In Python, JSON data is typically represented as nested combinations of dictionaries @@ -392,55 +468,9 @@ The NOMAD Python package provides utilities to [query large amounts of archive data](../programmatic/archive_query.md). This uses the built-in Python schema classes as an interface to the data. -## Custom normalizers - -For custom schemas, you might want to add custom normalizers. All files are parsed -and normalized when they are uploaded or changed. The NOMAD metainfo Python interface -allows you to add functions that are called when your data is normalized. - -Here is an example: - -```python ---8<-- "examples/archive/custom_schema.py" -``` - -To add a `normalize` function, your section has to inherit from `ArchiveSection` which -provides the base for this functionality. Now you can overwrite the `normalize` function -and add you own behavior. Make sure to call the `super` implementation properly to -support schemas with multiple inheritance. In order to control the order by which the -normalizations are executed, one can define `normalizer_level` which is set to 0 by -default. The normalize functions are always called for any sub section before the parent section. -However, the order for any sections on the same level will be from low values of `normalizer_level` -to high. - -If we parse an archive like this: - -```yaml ---8<-- "examples/archive/custom_data.archive.yaml" -``` - -we will get a final normalized archive that contains our data like this: - -```json -{ - "data": { - "m_def": "examples.archive.custom_schema.SampleDatabase", - "samples": [ - { - "added_date": "2022-06-18T00:00:00+00:00", - "formula": "NaCl", - "sample_id": "2022-06-18 00:00:00+00:00--NaCl" - } - ] - } -} -``` -## Schema plugin metadata -{{pydantic_model('nomad.config.models.plugins.Schema')}} +## Schema packages developed by FAIRmat -## Pre-defined schemas in NOMAD -Several schemas are currently being developed in NOMAD for various metadata. -The following lists these projects: +The following is a list of plugins containing schema packages developed by FAIRmat: | Description | Project url | | ---------------------------- | -------------------------------------------------------------------------- | diff --git a/docs/howto/programmatic/api.md b/docs/howto/programmatic/api.md index cce6824aa633ef2a950f1dafbf65c5b593e64f59..0bccf3682894eae782f64c7b4341db8f78de4e45 100644 --- a/docs/howto/programmatic/api.md +++ b/docs/howto/programmatic/api.md @@ -225,7 +225,7 @@ The result will look like this: You can work with the results in the given JSON (or respective Python dict/list) data already. If you have [NOMAD's Python library](./pythonlib.md) installed , you can take the archive data and use the Python interface. -The [Python interface](../customization/schemas.md#wrap-data-with-python-schema-classes) will help with code-completion (e.g. in notebook environments), +The [Python interface](../plugins/schema_packages.md#wrap-data-with-python-schema-classes) will help with code-completion (e.g. in notebook environments), resolve archive references (e.g. from workflow to calculation to system), and allow unit conversion: ```py from nomad.datamodel import EntryArchive diff --git a/docs/howto/programmatic/archive_query.md b/docs/howto/programmatic/archive_query.md index 7327b84a706074bb1294e764dc39c442880bd67e..407a1fb66e23c97d60fe5544c6eb3aff03a43a46 100644 --- a/docs/howto/programmatic/archive_query.md +++ b/docs/howto/programmatic/archive_query.md @@ -3,7 +3,7 @@ The `ArchiveQuery` allows you to search for entries and access their parsed and processed *archive* data at the same time. Furthermore, all data is accessible through a convenient Python interface based on the schema rather than plain JSON. See also this guide on using -[NOMAD's Python schemas](../customization/schemas.md#use-python-schemas-to-work-with-data) +[NOMAD's Python schemas](../plugins/schema_packages.md#use-python-schemas-to-work-with-data) to work with processed data. As a requirement, you have to install the `nomad-lab` Python package. Follow the diff --git a/docs/reference/config.md b/docs/reference/config.md index fcf72f178551acd2116e9f20d53f6fea54021e93..a8f109c15495ead5a57566e00904723a70ba149f 100644 --- a/docs/reference/config.md +++ b/docs/reference/config.md @@ -100,7 +100,7 @@ The following is a reference of all configuration sections and attributes. ## User Interface -These settings affect the behaviour of the user interface. Note that the configuration of apps is documented in more detail in the guide on [how to define apps](../howto/oasis/apps.md). +These settings affect the behaviour of the user interface. Note that the preferred way for creating custom apps is by using [app plugin entry points](../howto/plugins/apps.md). {{ config_models(['ui'])}} diff --git a/docs/reference/glossary.md b/docs/reference/glossary.md index 14676befbb2301be04d19a7fa413d79cbc671a23..1448c98b11d5ae0780d5fd15035565a8de19a895 100644 --- a/docs/reference/glossary.md +++ b/docs/reference/glossary.md @@ -20,6 +20,10 @@ out the application and this documentation. directly defining the type or shape of data. They often allow to alter how certain data is managed, represented, or edited. See [annotations in the schema documentation](../howto/customization/elns.md#annotations). +### App + +Apps allow you to build customized user interfaces for specific research domains, making it easier to navigate and understand the data. This typically means that certain domain-specific properties are highlighted, different units may be used for physical properties, and specialized dashboards may be presented. This becomes crucial for NOMAD installations to be able to scale with data that contains a mixture of experiments and simulations, different techniques, and physical properties spanning different time and length scales. + ### Archive NOMAD processes (parses and normalizes) all data. @@ -104,6 +108,14 @@ can open and read data from other files (e.g. those referenced in the mainfile). a parser is associated with a certain file-format and is only applied to files of that format. +### Plugin + +NOMAD installations can be customized through plugins, which are Git repositories containing an installable python package that will add new features upon being installed. Plugins can contain one or many plugin entry points, which represent individual customizations. + +### Plugin entry point + +Plugin entry points are used to configure and load different types of NOMAD customizations. There are several entry point types, including entry points for parsers, schema packages and apps. A single plugin may contain multiple entry points. + ### Processed data NOMAD processes (parses and normalizes) all data. The *processed data* is the outcome of this process. @@ -158,8 +170,11 @@ are similar to *ontologies* as they define possible relationships between data o within them. A schema is a collection of [section](#section-and-subsection) and [quantity](#quantity) -definitions. Schemas are organized in *packages*, i.e. collections of definitions. -All schemas combined form the [metainfo](#metainfo). +definitions. Schemas are organized in [schema packages](#schema-package), i.e. collections of definitions. All schemas combined form the [metainfo](#metainfo). + +### Schema package + +*Schema packages* contain a collection of [schema](#schema) definitions. Schema packages may be defined as [YAML files](../howto/customization/basics.md) or in Python as [plugin entry points](../howto/plugins/schema_packages.md). ### Section and Subsection diff --git a/docs/reference/parsers.md b/docs/reference/parsers.md index 9bbfe7bedddb5822f480e68c6b6e4ba91b5e2fb9..05a9dec37531251f2b51c2b124c8afe8479aed3e 100644 --- a/docs/reference/parsers.md +++ b/docs/reference/parsers.md @@ -5,7 +5,7 @@ You might also want to read: - [How to run parsers locally](../howto/programmatic/local_parsers.md) - - [How to develop a parser plugin](../howto/customization/plugins.md#develop-a-parser-plugin) + - [How to write a parser](../howto/plugins/plugins.md) This is a list of all available parsers and supported file formats: diff --git a/docs/reference/plugins.md b/docs/reference/plugins.md index 614e42d13162df9207acc977560326a1a548210b..3b5ad54f4a349e9ee2d885e34e431156caa7616b 100644 --- a/docs/reference/plugins.md +++ b/docs/reference/plugins.md @@ -1,33 +1,22 @@ # Plugins -Plugins allow one to add Python-based functionality to NOMAD without a custom build -NOMAD image or release. Plugins can be installed at NOMAD start-up time. Therefore, a NOMAD -installation or [Oasis](../howto/oasis/install.md) can be configured with a different -custom set of plugins or disable unnecessary plugins. - -NOMAD support different kinds of plugins: - -- Python **schema** -- **parser** -- **normalizer** -- additional custom **APIs** (coming soon...) +Plugins allow one to add Python-based functionality to NOMAD without a custom NOMAD image or release. Plugins can be installed at NOMAD start-up time. Therefore, a NOMAD installation or [Oasis](../howto/oasis/install.md) can be configured with a different custom set of plugins or disable unnecessary plugins. !!! note - You might also want to read [the plugin how-tos](../howto/customization/plugins.md) + You might also want to read [the how-to guide on plugins](../howto/plugins/plugins.md) -## Types of plugins +## Plugin entry point reference -We provide template projects on GitHub for each kind plugin in NOMAD. +This is a list of the available plugin entry point configuration models. -- [schema plugin](https://github.com/nomad-coe/nomad-schema-plugin-example){:target="_blank"} -- [parser plugin](https://github.com/nomad-coe/nomad-parser-plugin-example){:target="_blank"} -- [normalizer plugin](https://github.com/nomad-coe/nomad-normalizer-plugin-example.git){:target="_blank"} +{{ pydantic_model('nomad.config.models.plugins.AppEntryPoint') }} +{{ pydantic_model('nomad.config.models.plugins.NormalizerEntryPoint') }} +{{ pydantic_model('nomad.config.models.plugins.ParserEntryPoint') }} +{{ pydantic_model('nomad.config.models.plugins.SchemaPackageEntryPoint') }} -You can fork these projects and follow the instructions in their `README.md`. These -instructions will give you everything you need to run and test your plugin. +## Default plugin entry points -## Built-in plugins -This is a list of all built-in plugins: +This is a list of the plugin entry points that are activated by default: -{{ plugin_list() }} +{{ plugin_entry_point_list() }} diff --git a/examples/archive/custom_schema.py b/examples/archive/custom_schema.py index 351ce1bf8968bc3a07b8f2dbf600ed61d353376d..10b15337622ef1be69b23dd3928a2a7350f1c44c 100644 --- a/examples/archive/custom_schema.py +++ b/examples/archive/custom_schema.py @@ -1,4 +1,4 @@ -from nomad.datamodel import EntryData, ArchiveSection +from nomad.datamodel import Schema, ArchiveSection from nomad.metainfo.metainfo import Quantity, Datetime, SubSection @@ -15,5 +15,5 @@ class Sample(ArchiveSection): self.sample_id = f'{self.added_date}--{self.formula}' -class SampleDatabase(EntryData): +class SampleDatabase(Schema): samples = SubSection(section=Sample, repeats=True) diff --git a/examples/docs/basic_schema/data.archive.yaml b/examples/docs/basic_schema/data.archive.yaml index 72e26ca3912a70368282420a160e6293e20ae77a..95bb4e77bec72dccb0971f9065cc6af611c4b77e 100644 --- a/examples/docs/basic_schema/data.archive.yaml +++ b/examples/docs/basic_schema/data.archive.yaml @@ -1,5 +1,5 @@ data: - m_def: '../upload/raw/schema.archive.yaml#Composition' + m_def: '../upload/raw/package.archive.yaml#Composition' composition: 'H2O' elements: - label: H diff --git a/examples/docs/basic_schema/schema.archive.yaml b/examples/docs/basic_schema/package.archive.yaml similarity index 100% rename from examples/docs/basic_schema/schema.archive.yaml rename to examples/docs/basic_schema/package.archive.yaml diff --git a/examples/docs/basic_schema/schema.py b/examples/docs/basic_schema/package.py similarity index 100% rename from examples/docs/basic_schema/schema.py rename to examples/docs/basic_schema/package.py diff --git a/examples/docs/references/multiple_files/data-and-schema.archive.yaml b/examples/docs/references/multiple_files/data-and-package.archive.yaml similarity index 80% rename from examples/docs/references/multiple_files/data-and-schema.archive.yaml rename to examples/docs/references/multiple_files/data-and-package.archive.yaml index ec5e85b9370d3dc9f9315e91eea9a88e389b29a8..4848d84628c2f66defaec31c1a5637aea3cb51f6 100644 --- a/examples/docs/references/multiple_files/data-and-schema.archive.yaml +++ b/examples/docs/references/multiple_files/data-and-package.archive.yaml @@ -2,14 +2,14 @@ definitions: sections: SpecialElement: # Extending the definition from another entry - base_section: '../upload/raw/schema.archive.yaml#Element' + base_section: '../upload/raw/package.archive.yaml#Element' quantities: atomic_weight: type: float unit: 'g/mol' data: # Instantiating the definition from another entry - m_def: '../upload/raw/schema.archive.yaml#Composition' + m_def: '../upload/raw/package.archive.yaml#Composition' composition: 'H2O' elements: # Implicitly instantiate Element as defined for Composition.elements diff --git a/examples/docs/references/multiple_files/data.archive.yaml b/examples/docs/references/multiple_files/data.archive.yaml index 07cee497daa215ec66763880fca5fd22f45a4cf6..c261e4507e0ae24f5052af7e467ceb39b0303c84 100644 --- a/examples/docs/references/multiple_files/data.archive.yaml +++ b/examples/docs/references/multiple_files/data.archive.yaml @@ -1,9 +1,9 @@ data: # Instantiating the definition from another entry - m_def: '../upload/raw/schema.archive.yaml#Solution' + m_def: '../upload/raw/package.archive.yaml#Solution' composition: 'H2O' # Referencing data in another entry - solvent: '../upload/raw/data-and-schema.archive.yaml#data' + solvent: '../upload/raw/data-and-package.archive.yaml#data' solute: elements: - label: Na diff --git a/examples/docs/references/multiple_files/schema.archive.yaml b/examples/docs/references/multiple_files/package.archive.yaml similarity index 100% rename from examples/docs/references/multiple_files/schema.archive.yaml rename to examples/docs/references/multiple_files/package.archive.yaml diff --git a/mkdocs.yml b/mkdocs.yml index f98185c367fe9877150355a627a7155d72958e0e..87a82f7cd5359aac5957bf44e0f1a71faa7093f3 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -29,20 +29,21 @@ nav: - NOMAD Oasis: - Install an Oasis: howto/oasis/install.md - Customize an Oasis: howto/oasis/customize.md - - Mount plugins: howto/oasis/plugins_install.md - - Configure custom apps: howto/oasis/apps.md + - Install plugins: howto/oasis/plugins_install.md - Migrate Oasis versions: howto/oasis/migrate.md - Perform admin tasks: howto/oasis/admin.md + - Plugins: + - Get started with plugins: howto/plugins/plugins.md + - Write an app: howto/plugins/apps.md + - Write a normalizer: howto/plugins/normalizers.md + - Write a parser: howto/plugins/parsers.md + - Write a schema package: howto/plugins/schema_packages.md - Customization: - - Write a schema: howto/customization/basics.md + - Write a YAML schema package: howto/customization/basics.md - Define ELNs: howto/customization/elns.md - Use base sections: howto/customization/base_sections.md - Parse tabular data: howto/customization/tabular.md - Define workflows: howto/customization/workflows.md - - Write plugins: howto/customization/plugins.md - - Write a schema plugin: howto/customization/schemas.md - - Write a parser: howto/customization/parsers.md - - Write a normalizer: howto/customization/normalizers.md - Work with units: howto/customization/units.md - Use HDF5 to handle large quantities: howto/customization/hdf5.md - Development: diff --git a/nomad/config/models/plugins.py b/nomad/config/models/plugins.py index d81126bde307d25ddb7927f14786760fb9f46b87..af199bddb751d73225b4ea8863dc075aa2eb4e0e 100644 --- a/nomad/config/models/plugins.py +++ b/nomad/config/models/plugins.py @@ -54,6 +54,8 @@ class EntryPoint(BaseModel): class AppEntryPoint(EntryPoint): + """Base model for a app plugin entry points.""" + entry_point_type: Literal['app'] = Field( 'app', description='Determines the entry point type.' ) @@ -64,6 +66,8 @@ class AppEntryPoint(EntryPoint): class SchemaPackageEntryPoint(EntryPoint, metaclass=ABCMeta): + """Base model for schema package plugin entry points.""" + entry_point_type: Literal['schema_package'] = Field( 'schema_package', description='Specifies the entry point type.' ) @@ -77,9 +81,18 @@ class SchemaPackageEntryPoint(EntryPoint, metaclass=ABCMeta): class NormalizerEntryPoint(EntryPoint, metaclass=ABCMeta): + """Base model for normalizer plugin entry points.""" + entry_point_type: Literal['normalizer'] = Field( 'normalizer', description='Determines the entry point type.' ) + level: int = Field( + 0, + description=""" + Integer that determines the execution order of this normalizer. Normalizers are + run in order from lowest level to highest level. + """, + ) @abstractmethod def load(self) -> 'NormalizerBaseClass': @@ -90,13 +103,16 @@ class NormalizerEntryPoint(EntryPoint, metaclass=ABCMeta): class ParserEntryPoint(EntryPoint, metaclass=ABCMeta): + """Base model for parser plugin entry points.""" + entry_point_type: Literal['parser'] = Field( 'parser', description='Determines the entry point type.' ) level: int = Field( 0, description=""" - The order by which the parser is executed with respect to other parsers. + Integer that determines the execution order of this parser. Parser with lowest + level will attempt to match raw files first. """, ) diff --git a/nomad/config/models/ui.py b/nomad/config/models/ui.py index e468150171ce677e1b9b8c735cdf88e511bd0b5f..41ac56c7dad1dc6b2258d87621c88ed8a56c7e93 100644 --- a/nomad/config/models/ui.py +++ b/nomad/config/models/ui.py @@ -378,7 +378,7 @@ class SearchSyntaxes(ConfigBaseModel): - `equality`: Used to query for a specific value with exact match. - `range_bounded`: Queries values that are between two numerical limits, inclusive or exclusive. - `range_half_bounded`: Queries values that are above/below a numerical limit, inclusive or exclusive. - - `free_text`: For inexact, queries. Requires that a set of keywords has been filled in the entry. + - `free_text`: For inexact, free-text queries. Requires that a set of keywords has been filled in the entry. """ exclude: Optional[List[str]] = Field( diff --git a/nomad/mkdocs.py b/nomad/mkdocs.py index 52941dfdc7387f53fccb510a2edbe0f1b4540b9e..c344e5ca148005a4987fb15d47a447272aabf7c3 100644 --- a/nomad/mkdocs.py +++ b/nomad/mkdocs.py @@ -445,7 +445,7 @@ def define_env(env): ) @env.macro - def plugin_list(): # pylint: disable=unused-variable + def plugin_entry_point_list(): # pylint: disable=unused-variable plugins = [plugin for plugin in config.plugins.entry_points.options.values()] def render_plugin(plugin: EntryPointType) -> str: @@ -459,7 +459,7 @@ def define_env(env): ]: value = getattr(plugin, field, None) if value: - dosc_or_code_url = value + docs_or_code_url = value break if docs_or_code_url: result = f'[{plugin.name}]({docs_or_code_url})' @@ -473,6 +473,8 @@ def define_env(env): category = getattr( plugin, 'plugin_type', getattr(plugin, 'entry_point_type', None) ) + if category == 'schema': + category = 'schema package' categories.setdefault(category, []).append(plugin) return '\n\n'.join( diff --git a/nomad/normalizing/__init__.py b/nomad/normalizing/__init__.py index 43da668aa0bd223abfcb9d78531ae78a1c4b5e3d..dd10ef39c926f41c7112f0e79beb45eccd1472b1 100644 --- a/nomad/normalizing/__init__.py +++ b/nomad/normalizing/__init__.py @@ -88,9 +88,10 @@ class NormalizerInterface: class NormalizerInterfaceNew: - def __init__(self, normalizer: Normalizer) -> None: + def __init__(self, normalizer: Normalizer, level: int) -> None: self.normalizer = normalizer self.archive = None + self.level = level def normalize(self, logger=None): self.normalizer.normalize(self.archive, logger) @@ -102,6 +103,8 @@ class NormalizerInterfaceNew: def __getattr__(self, name: str): if name == '__name__': return self.normalizer.__class__.__name__ + if name == 'normalizer_level': + return self.level return getattr(self.normalizer, name, None) @@ -121,4 +124,6 @@ for normalizer in config.normalize.normalizers.filtered_values(): # Load normalizers using new plugin mechanism for entry_point in enabled_entry_points: if isinstance(entry_point, NormalizerEntryPoint): - normalizers.append(NormalizerInterfaceNew(entry_point.load())) + normalizers.append( + NormalizerInterfaceNew(entry_point.load(), entry_point.level) + ) diff --git a/nomad/normalizing/normalizer.py b/nomad/normalizing/normalizer.py index bf121f513d9547bfcde602a625a6d095aa3155e8..53967ed92c41eb86109ef0b964d8de2310d85ec3 100644 --- a/nomad/normalizing/normalizer.py +++ b/nomad/normalizing/normalizer.py @@ -33,9 +33,9 @@ class Normalizer(metaclass=ABCMeta): """ domain: Optional[str] = 'dft' - """ The domain this normalizer should be used in. Default for all normalizer is 'DFT'. """ + """Deprecated: The domain this normalizer should be used in. Default for all normalizer is 'DFT'.""" normalizer_level = 0 - """ Specifies the order of normalization with respect to other normalizers. Lower level + """Deprecated: Specifies the order of normalization with respect to other normalizers. Lower level is executed first.""" def __init__(self, **kwargs) -> None: diff --git a/tests/examples/test_docs.py b/tests/examples/test_docs.py index cc8db40511a6665d4e826b5b2e35b50f0cdf1f3a..46c43ed8ed9eed3c273294472191dfc6030bef1b 100644 --- a/tests/examples/test_docs.py +++ b/tests/examples/test_docs.py @@ -28,14 +28,14 @@ def test_python_schema(): yaml_data = _load_yaml('basic_schema/data.archive.yaml')['data'] del yaml_data['m_def'] - from examples.docs.basic_schema.schema import Sample + from examples.docs.basic_schema.package import Sample sample = Sample.m_from_dict(yaml_data) assert json.dumps(sample.m_to_dict()) == json.dumps(yaml_data) def test_yaml_schema(): - yaml_package = _load_yaml('basic_schema/schema.archive.yaml')['definitions'] + yaml_package = _load_yaml('basic_schema/package.archive.yaml')['definitions'] yaml_data = _load_yaml('basic_schema/data.archive.yaml')['data'] del yaml_data['m_def'] @@ -73,10 +73,10 @@ def test_inheritance(): def test_multiple_files(): - archive = _parse_archive('references/multiple_files/schema.archive.yaml') + archive = _parse_archive('references/multiple_files/package.archive.yaml') assert len(archive.definitions.sections) == 3 - archive = _parse_archive('references/multiple_files/data-and-schema.archive.yaml') + archive = _parse_archive('references/multiple_files/data-and-package.archive.yaml') assert archive.data.elements[0].label == 'H' assert archive.data.elements[1].label == 'O'