diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 7da3dfc7396c74d55120ee14c7eac41e05675d63..3e5448d5b29d1b77c6460aefc0533ce30aae7e72 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -1,27 +1,33 @@
-# default installed image for docker executor is: python:3.6
-# using an image that can do git, docker, docker-compose
-image: gitlab-registry.mpcdf.mpg.de/nomad-lab/nomad-fair/ci-runner:latest
+# Syntax reference: https://docs.gitlab.com/ci/yaml/
+
+# Overview:
+# Pipelines run only on merge requests, schedules, tags, or the develop branch (default).
+# - A schedule pipeline (e.g. nightly build) runs all the jobs.
+# - A tag pipeline runs most of the jobs but skips some (also, v#.#.# tags are special).
+# - Merge requests and pushes to develop will run
+#   - few jobs if only `docs/` files changed,
+#   - all jobs if any other files changed.
 
-# build directory inside
-# https://gitlab.mpcdf.mpg.de/help/ci/runners/configure_runners.md#custom-build-directories
+.anchors:
+  .non-docs-changes: &non-docs-changes
+    changes:
+      - examples
+      - gui
+      - nomad
+      - ops
+      - scripts
+      - tests
+      - "*"  # all files in root
+  .manual-allow_failure: &manual-allow_failure
+    when: manual
+    allow_failure: true
 
-# https://docs.gitlab.com/ee/ci/yaml/workflow.html
-# https://docs.gitlab.com/ee/ci/variables/predefined_variables.html
-# if: CI_COMMIT_BRANCH && CI_COMMIT_BEFORE_SHA == "0000000000000000000000000000000000000000"
-# A branch pipeline, but it is the first commit for that branch
-# if: $CI_COMMIT_BRANCH && $CI_OPEN_MERGE_REQUESTS && $CI_PIPELINE_SOURCE == "push"
-# For an existing workflow section to switch from branch pipelines to merge request pipelines when a merge request is created.
-# if: $CI_COMMIT_BRANCH && $CI_OPEN_MERGE_REQUESTS
-# A branch pipeline, but a merge request is open for that branch, do not run the branch pipeline.
-# if: $CI_PIPELINE_SOURCE == "merge_request_event"
-# A merge request pipeline, start the pipeline.
-# if: $CI_COMMIT_BRANCH
-# A branch pipeline, but there is no merge request open for the branch, run the branch pipeline.
+# using an image that can do git, docker, docker-compose
+image: gitlab-registry.mpcdf.mpg.de/nomad-lab/nomad-fair/ci-runner:latest
 
 default:
   tags:
-    # Necessary to select the right CI runner
-    - cloud
+    - cloud  # Necessary to select the right CI runner
 
 variables:
   DOCKER_TAG: ${CI_COMMIT_REF_SLUG}
@@ -38,7 +44,6 @@ workflow:
     - if: $CI_COMMIT_TAG
       variables:
         DOCKER_TAG: ${CI_COMMIT_REF_NAME}
-    - when: never
 
 stages:
   - build
@@ -46,6 +51,8 @@ stages:
   - deploy
   - release
 
+# JOBS
+
 update changelog:
   stage: build
   script:
@@ -76,6 +83,9 @@ build gui:
   variables:
     TARGET: dev_node
     DESTINATION: "${CI_REGISTRY_IMAGE}/dev_node:${DOCKER_TAG}"
+  rules:
+    - if: $CI_PIPELINE_SOURCE == "schedule" || $CI_COMMIT_TAG
+    - <<: *non-docs-changes
 
 # kaniko image doesn't contain pip, so we have to save the scm_pretend_version in an earlier job and reuse it later
 update_scm_pretend_version:
@@ -93,6 +103,9 @@ build python:
   variables:
     TARGET: dev_python
     DESTINATION: "${CI_REGISTRY_IMAGE}/dev_python:${DOCKER_TAG}"
+  rules:
+    - if: $CI_PIPELINE_SOURCE == "schedule" || $CI_COMMIT_TAG
+    - <<: *non-docs-changes
 
 python linting:
   stage: test
@@ -105,7 +118,7 @@ python linting:
   rules:
     - if: $CI_COMMIT_TAG
       when: never
-    - when: on_success
+    - <<: *non-docs-changes
   artifacts:
     name: "nomad_code_quality"
     when: always
@@ -119,8 +132,9 @@ python package clean up:
   script:
     - python scripts/cleanup_packages.py
   rules:
-    - when: manual
-      allow_failure: true
+    - if: $CI_PIPELINE_SOURCE == "schedule" || $CI_COMMIT_TAG
+      <<: *manual-allow_failure
+    - <<: [*non-docs-changes, *manual-allow_failure]
 
 check python dependencies:
   stage: test
@@ -130,8 +144,9 @@ check python dependencies:
   rules:
     - if: $CI_COMMIT_TAG
       when: never
-    - when: manual
-      allow_failure: true
+    - if: $CI_PIPELINE_SOURCE == "schedule"
+      <<: *manual-allow_failure
+    - <<: [*non-docs-changes, *manual-allow_failure]
 
 .base_test:
   image: ${CI_REGISTRY_IMAGE}/dev_python:${DOCKER_TAG}
@@ -168,15 +183,14 @@ generate pytest timings:
     - python -m pytest --store-durations
   artifacts:
     expire_in: 1 days
-    when: on_success
     paths:
       - .test_durations
-
   rules:
     - if: $CI_COMMIT_TAG
       when: never
-    - when: manual
-      allow_failure: true
+    - if: $CI_PIPELINE_SOURCE == "schedule"
+      <<: *manual-allow_failure
+    - <<: [*non-docs-changes, *manual-allow_failure]
 
 python tests:
   parallel: 3
@@ -187,13 +201,13 @@ python tests:
     - cp .coverage .coverage_${CI_NODE_INDEX}
   artifacts:
     expire_in: 1 days
-    when: on_success
     paths:
       - .coverage_${CI_NODE_INDEX}
   rules:
     - if: $CI_COMMIT_TAG
       when: never
-    - when: on_success
+    - if: $CI_PIPELINE_SOURCE == "schedule"
+    - <<: *non-docs-changes
 
 python coverage report:
   stage: test
@@ -213,7 +227,8 @@ python coverage report:
   rules:
     - if: $CI_COMMIT_TAG
       when: never
-    - when: on_success
+    - if: $CI_PIPELINE_SOURCE == "schedule"
+    - <<: *non-docs-changes
 
 gui linting:
   stage: test
@@ -227,7 +242,8 @@ gui linting:
   rules:
     - if: $CI_COMMIT_TAG
       when: never
-    - when: on_success
+    - if: $CI_PIPELINE_SOURCE == "schedule"
+    - <<: *non-docs-changes
 
 generate gui artifacts:
   stage: test
@@ -239,7 +255,8 @@ generate gui artifacts:
   rules:
     - if: $CI_COMMIT_TAG
       when: never
-    - when: on_success
+    - if: $CI_PIPELINE_SOURCE == "schedule"
+    - <<: *non-docs-changes
   artifacts:
     paths:
       - gui/tests/env.js
@@ -278,7 +295,8 @@ gui tests:
   rules:
     - if: $CI_COMMIT_TAG
       when: never
-    - when: on_success
+    - if: $CI_PIPELINE_SOURCE == "schedule"
+    - <<: *non-docs-changes
 
 build python package:
   stage: test
@@ -301,13 +319,12 @@ build python package:
     - cp /app/tests/data/examples/example.out $CI_PROJECT_DIR/
   artifacts:
     expire_in: 1 days
-    when: on_success
     paths:
       - dist/
       - archive.json
       - example.out
 
-install tests:
+python package install tests:
   stage: test
   parallel:
     matrix:
@@ -327,8 +344,11 @@ install tests:
     - python -m nomad.cli parse --skip-normalizers archive.json
     - uv pip install git+https://github.com/nomad-coe/nomad-parser-example.git@ba6027fdd4cda0cf9e0b32546bd809c8fdda79e6
     - python -m exampleparser example.out
+  rules:
+    - if: $CI_PIPELINE_SOURCE == "schedule" || $CI_COMMIT_TAG
+    - <<: *non-docs-changes
 
-python package:
+release python package:
   stage: release
   image: ghcr.io/astral-sh/uv:$UV_VERSION-python$PYTHON_VERSION-bookworm
   variables:
@@ -336,12 +356,11 @@ python package:
   script: uv publish -u gitlab-ci-token -p ${CI_JOB_TOKEN} --publish-url https://gitlab.mpcdf.mpg.de/api/v4/projects/${CI_PROJECT_ID}/packages/pypi dist/nomad-lab-*.tar.gz
   rules:
     - if: $CI_COMMIT_BRANCH == "develop" && $NIGHTLY
-      when: on_success
-    - when: manual
-      allow_failure: true
     - if: $CI_COMMIT_TAG
+      <<: *manual-allow_failure
+    - <<: [*non-docs-changes, *manual-allow_failure]
 
-pypi package:
+release pypi package:
   stage: release
   variables:
     GIT_STRATEGY: none
@@ -350,9 +369,8 @@ pypi package:
   script: twine upload -u $CI_TWINE_USER -p $CI_TWINE_PASSWORD dist/nomad-lab-*.tar.gz
   rules:
     - if: $CI_COMMIT_TAG
-      when: manual
-      allow_failure: true
-    - when: never
+      <<: *manual-allow_failure
+    - <<: [*non-docs-changes, *manual-allow_failure]
 
 push to github:
   stage: release
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 17a1426c35f24b747611387520ec6e9e4ada59b7..a87a02a1da68401882a62b7e66ce4a09dbb374d7 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,53 @@
+## 1.3.15 (2025-03-05)
+
+### Fixed (1 change)
+
+- [Fixed user group collection name in MongoDB](https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-FAIR/-/commit/e8daac5fba118c3f2d013f97bc68753cb916754a) ([merge request](https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-FAIR/-/merge_requests/2350))
+
+## 1.3.14 (2025-02-28)
+
+### Added (11 changes)
+
+- [Add nomad distro commit info](https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-FAIR/-/commit/1a03bfa35ecc17b5040b2f1e5ee20b24d544d17e) ([merge request](https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-FAIR/-/merge_requests/2329))
+- [Resolve "Generic xml parser"](https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-FAIR/-/commit/3f2535f2443892ec71cc2d5881aeb50452052b30) ([merge request](https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-FAIR/-/merge_requests/1655))
+- [Resolve "create edges in the workflow"](https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-FAIR/-/commit/fb04f4f163e61b94c0d088863a8ac8afb468cf53) ([merge request](https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-FAIR/-/merge_requests/2318))
+- [Added implementation and documentation for DataFrames.](https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-FAIR/-/commit/0696502401fc74a0b460ed3a37acf9f2b378cff9) ([merge request](https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-FAIR/-/merge_requests/2211))
+- [Implement h5 ref for axes, aux signal](https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-FAIR/-/commit/7f162a3be13ba454abfd69e2b364739b1b8166d0) ([merge request](https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-FAIR/-/merge_requests/2276))
+- [Added support for targeting any NeXus search quantity in aggregations by...](https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-FAIR/-/commit/bef33475c794afe9328b0e38e9986ae05be78aaf) ([merge request](https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-FAIR/-/merge_requests/2288))
+- [Added support for custom atomic labels, radii and colors in the NGL-based structure viewer.](https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-FAIR/-/commit/d26703add00be4c7b57764646aa4bfcf1f272e61) ([merge request](https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-FAIR/-/merge_requests/2302))
+- [Added documentation about schema versioning.](https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-FAIR/-/commit/dc5fe24bd18a4564ab0c9c3d73b948152377ef7e) ([merge request](https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-FAIR/-/merge_requests/2297))
+- [Added annotation for controlling schema availability and naming in the GUI.](https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-FAIR/-/commit/27f4b87bd937cab1163eb2498020c7aef097b805) ([merge request](https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-FAIR/-/merge_requests/2298))
+- [Resolve "JSON to JSON model transformer"](https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-FAIR/-/commit/573f53dcbb87be87134a26028cdf181d87dd1303) ([merge request](https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-FAIR/-/merge_requests/1806))
+- [Implement wildcard in paths](https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-FAIR/-/commit/fa64155d93bfb2b39bc016a579d5af65a3f8b6f9) ([merge request](https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-FAIR/-/merge_requests/2240))
+
+### Fixed (11 changes)
+
+- [Fixed issue with setting and reading boolean values in queries.](https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-FAIR/-/commit/b37db79e23d15470981cfdd6ffdfe701269b5693) ([merge request](https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-FAIR/-/merge_requests/2347))
+- [Fixed issue with resolving units when using full storage](https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-FAIR/-/commit/b767f85f45377fd979950970234dc1448d1b4dd7) ([merge request](https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-FAIR/-/merge_requests/2339))
+- [Fix properties normalization](https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-FAIR/-/commit/f011e10cc5ad46d8c5d0276d33e429fff9c98f0c) ([merge request](https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-FAIR/-/merge_requests/2337))
+- [Fixed issues with north config optional values](https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-FAIR/-/commit/7ecbbb66ad6cb8b44f045a51deb93e7a77c4defa) ([merge request](https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-FAIR/-/merge_requests/2336))
+- [Fixed issue with suggestions not being shown by default for terms menu items,...](https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-FAIR/-/commit/3446265bf31e085b7659bf2446e88052d90a08b2) ([merge request](https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-FAIR/-/merge_requests/2330))
+- [Updated nomad install documentation and fixed broken links.](https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-FAIR/-/commit/c47bacc6754f7713247c17d1e90a99dd75be6ed0) ([merge request](https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-FAIR/-/merge_requests/2297))
+- [Migrated to datacite kernel-4. Fixes #2245.](https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-FAIR/-/commit/5529523b0af7f81004e3501d430f1d441f753590) ([merge request](https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-FAIR/-/merge_requests/2285))
+- [Merge branch 'fix-gui-archive-browser-for-readonly-archives' into 'develop'](https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-FAIR/-/commit/9cba8ca3e63082e275242c2dac62f22f1f9477bf) ([merge request](https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-FAIR/-/merge_requests/2280))
+- [bug fix if no tasks](https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-FAIR/-/commit/561b12a50c27ae3b203ded11466ee17d5351b39c) ([merge request](https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-FAIR/-/merge_requests/2270))
+- [Fix early return in PubChemPureSubstanceSection to prevent unnecessary processing](https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-FAIR/-/commit/6ab989cef19ad1a5da810c6a7050339dd38b8b23) ([merge request](https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-FAIR/-/merge_requests/2258))
+- [Fixes and improvements](https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-FAIR/-/commit/6b6cbc54920fa58e5e31b200ca65d824d380bcfc) ([merge request](https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-FAIR/-/merge_requests/2234))
+
+### Changed (11 changes)
+
+- [Workflow visualizer](https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-FAIR/-/commit/2db3bee03270dbdd4f1222353925279c385c5208) ([merge request](https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-FAIR/-/merge_requests/2340))
+- [Enable ruff rule F401, remove unused import](https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-FAIR/-/commit/73fb0bb0298b414ec2557cee1de4933593932443) ([merge request](https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-FAIR/-/merge_requests/2325))
+- [Patch h5web in postinstall](https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-FAIR/-/commit/e98b6000cb49556c1c1f74cb593c699219604f57) ([merge request](https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-FAIR/-/merge_requests/2319))
+- [Merge branch 'pydantic-v2' into 'develop'](https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-FAIR/-/commit/5b5acfba8fa96aa746d411ecc0464a556a9cc935) ([merge request](https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-FAIR/-/merge_requests/2304))
+- [Migrate from Pydantic v1 to v2](https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-FAIR/-/commit/07b51f2ea57dbcc6c380fea962e4b7e4e5488950) ([merge request](https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-FAIR/-/merge_requests/2304))
+- [Resolve "Simplification of System/CompositeSystem/PureSubstance base sections"](https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-FAIR/-/commit/4c4a35aa494983b662e41e030416c6b334dbd903) ([merge request](https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-FAIR/-/merge_requests/2300))
+- [Resolve "Simplification of System/CompositeSystem/PureSubstance base sections"](https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-FAIR/-/commit/237115a8963f01fe783eb0f3f1f97b5ac6213eda) ([merge request](https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-FAIR/-/merge_requests/2300))
+- [Reduced RichTextEdit height for BaseSection description](https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-FAIR/-/commit/ef14f78325b12bd992f9ad40db323908ed7b1303) ([merge request](https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-FAIR/-/merge_requests/2264))
+- [Update jupyter image](https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-FAIR/-/commit/2a367112e219581d2faea063a064a763df1ffb63) ([merge request](https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-FAIR/-/merge_requests/2252))
+- [Respect host and port in nomad config](https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-FAIR/-/commit/1302e4efbfe877bbb0107d260e217271938d9fa9) ([merge request](https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-FAIR/-/merge_requests/2243))
+- [Restrict httpx version in dependencies](https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-FAIR/-/commit/543756f04922dd877ad6ccf82c9729019d54ace2) ([merge request](https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-FAIR/-/merge_requests/2237))
+
 ## 1.3.13 (2024-11-29)
 
 No changes.
diff --git a/Dockerfile b/Dockerfile
index 0309d0f739e6a5c753af747907a455756ffb2cb9..8b6add58bec779926c277feb1b6c81859d1711ce 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -20,8 +20,9 @@
 # If you need more help, visit the Dockerfile reference guide at
 # https://docs.docker.com/engine/reference/builder/
 
-FROM node:20 AS base_node
-FROM python:3.12-slim AS base_python
+# node20 image local copy
+FROM gitlab-registry.mpcdf.mpg.de/nomad-lab/nomad-fair:node AS base_node
+FROM ghcr.io/astral-sh/uv:0.5-python3.12-bookworm-slim AS base_python
 # Keeps Python from buffering stdout and stderr to avoid situations where
 # the application crashes without emitting any logs due to buffering.
 ENV PYTHONUNBUFFERED=1
@@ -61,8 +62,6 @@ RUN apt-get update \
 
 WORKDIR /app
 
-# Install UV
-COPY --from=ghcr.io/astral-sh/uv:0.4 /uv /bin/uv
 
 # Python environment
 COPY requirements.txt .
@@ -92,9 +91,6 @@ RUN apt-get update \
       git \
  && rm -rf /var/lib/apt/lists/*
 
-# Install UV
-COPY --from=ghcr.io/astral-sh/uv:0.4 /uv /bin/uv
-
 # Python environment
 COPY requirements-dev.txt .
 
diff --git a/docs/howto/customization/mapping_parser.md b/docs/howto/customization/mapping_parser.md
new file mode 100644
index 0000000000000000000000000000000000000000..35d1d0c1f2f2e00e9a401b94fb1224a8fc7cd812
--- /dev/null
+++ b/docs/howto/customization/mapping_parser.md
@@ -0,0 +1,217 @@
+# How to write data to archive with MappingParser
+
+`MappingParser` is a generic parser class implemented in
+`nomad.parsing.file_parser/mapping_parser.py` to handle the conversion to and from a
+data object and a python dictionary. We refer to an instance of the
+this class as 'mapping parser' throughout this section. In the following, the abstract
+properties and methods of the mapping parser are explained. The various implementations of
+the mapping parser are also defined and `Mapper` which is required to convert a
+mapping parser into another mapping parser is explained as well.
+
+## MappingParser
+
+The mapping parser has several abstract properties and methods and the most important
+ones are listed in the following:
+
+- `filepath`: path to the input file to be parsed
+- `data_object`: object resulting from loading the file in memory with `load_file`
+- `data`: dictionary representation of `data_object`
+- `mapper`: instance of `Mapper` required by `convert`
+- `load_file`: method to load the file given by `filepath`
+- `to_dict`: method to convert `data_object` into `data`
+- `from_dict`: method to convert `data` into `data_object`
+- `convert`: method to convert to another mapping parser
+
+`data_object` can be an `XML` element tree or a `metainfo` section for example depending on
+the inheriting class. In order to convert a mapping parser to another parser,
+the target parser must provide a [`Mapper`](#mapper) object. We refer to this simply as
+mapper throughout.
+
+In the following, we describe the currently implemented mapping parsers.
+
+### XMLParser
+
+This is mapping parser for XML files. It uses [`lxml`](https://lxml.de/) to
+load the file as an element tree. The dictionary is generated by iteratively parsing the
+elements of the tree in `to_dict`. The values parsed from element `text` are automatically
+converted to a corresponding data type. If attributes are present, the value is wrapped in
+a dictionary with key given by `value_key` ('__value' by default) while the attribute keys
+are prefixed by `attribute_prefix` ('@' by default). The following XML:
+
+```xml
+<a>
+  <b name='item1'>name</b>
+  <b name='item2'>name2</b>
+</a>
+```
+
+will be converted to:
+
+```python
+    data = {
+      'a' : {
+        'b': [
+          {'@name': 'item1', '__value': 'name'},
+          {'@name': 'item2', '__value': 'name2'}
+        ]
+      }
+    }
+```
+
+The conversion can be reversed using the `from_dict` method.
+
+### HDF5Parser
+
+This is the mapping parser for HDF5 files. It uses [`h5py`](https://www.h5py.org/) to load
+the file as an HDF5 group. Similar to [XMLParser](#xmlparser), the HDF5 datasets are
+iteratively parsed from the underlying groups and if attributes are present these are
+also parsed. The `from_dict` method is also implemented to convert a dictionary into an
+HDF5 group.
+
+### MetainfoParser
+
+This is the mapping parser for NOMAD archive files or metainfo sections.
+It accepts a schema root node annotated with `MappingAnnotation` as `data_object`.
+`create_mapper` generates the actual mapper as matching the `annotation_key`.
+If a `filepath` is specified, it instead falls back on the [`ArchiveParser`](--ref--).  <!-- TODO: add reference -->
+
+The annotation should always point to a parsed value via a `path` (JMesPath format).
+It may optionally specify a multi-argument `operator` for data mangling.  <!-- most operators are binary, would change the name -->
+In this case, specify a tuple consisting of:
+
+- the operator name, defined within the same scope.
+- a list of paths with the corresponding values for the operator arguments.  <!-- @Alvin: can you verify? -->
+
+Similar to `MSection`, it can be converted to (`to_dict`) or from (`from_dict`) a Python `dict`.
+Other attributes are currently accessible.
+
+```python
+from nomad.datamodel.metainfo.annotations import Mapper as MappingAnnotation
+
+class BSection(ArchiveSection):
+    v = Quantity(type=np.float64, shape=[2, 2])
+    v.m_annotations['mapping'] = dict(
+        xml=MappingAnnotation(mapper='.v'),
+        hdf5=MappingAnnotation(mapper=('get_v', ['.v[0].d'])),
+    )
+
+    v2 = Quantity(type=str)
+    v2.m_annotations['mapping'] = dict(
+        xml=MappingAnnotation(mapper='.c[0].d[1]'),
+        hdf5=MappingAnnotation(mapper='g.v[-2]'),
+    )
+
+class ExampleSection(ArchiveSection):
+    b = SubSection(sub_section=BSection, repeats=True)
+    b.m_annotations['mapping'] = dict(
+        xml=MappingAnnotation(mapper='a.b1'), hdf5=MappingAnnotation(mapper='.g1')
+    )
+
+ExampleSection.m_def.m_annotations['mapping'] = dict(
+    xml=MappingAnnotation(mapper='a'), hdf5=MappingAnnotation(mapper='g')
+)
+
+parser = MetainfoParser()
+p.data_object = ExampleSection(b=[BSection()])
+p.annotation_key = 'xml'
+p.mapper
+# Mapper(source=Path(path='a'....
+```
+
+### Converting mapping parsers
+
+The following is a sample python code to illustrate the mapping of the contents of an
+HDF5 file to an archive. First, we create a `MetainfoParser` object for the archive. The
+annotation key is set to `hdf5` which will generate a
+[mapper](#mapper) from the `hdf5` annotations defined in the definitions. Essentially,
+only metainfo sections and quantities with the `hdf5` annotation will be mapped. The mapper
+will contain paths for the source (HDF5) and the target (archive). The archive is then
+set to the archive parser `data_object`. Here, the archive already contains some data
+which should be merged to data that will be parsed. Next, a parser for HDF5 data is
+created. We use a custom class of the `HDF5Parser` which implements the `get_v` method
+defined in `BSection.v` In this example, we do not read the data from the HDF5 file but
+instead generate it from a dictionary by using the `from_dict` method. By invoking the
+`convert` method, the archive parser data object is populated with the corresponding
+HDF5 data.
+
+```python
+    class ExampleHDF5Parser(HDF5Parser):
+        @staticmethod
+        def get_v(value):
+            return np.array(value)[1:, :2]
+
+    archive_parser = MetainfoParser()
+    archive_parser.annotation_key = 'hdf5'
+    archive_parser.data_object = ExampleSection(b=[BSection(v=np.eye(2))])
+
+    hdf5_parser = ExampleHDF5Parser()
+    d = dict(
+        g=dict(
+            g1=dict(v=[dict(d=np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]))]),
+            v=['x', 'y', 'z'],
+            g=dict(
+                c1=dict(
+                    i=[4, 6],
+                    f=[
+                        {'@index': 0, '__value': 1},
+                        {'@index': 2, '__value': 2},
+                        {'@index': 1, '__value': 1},
+                    ],
+                    d=[dict(e=[3, 0, 4, 8, 1, 6]), dict(e=[1, 7, 8, 3, 9, 1])],
+                ),
+                c=dict(v=[dict(d=np.eye(3), e=np.zeros(3)), dict(d=np.ones((3, 3)))]),
+            ),
+        )
+    )
+    hdf5_parser.from_dict(d)
+
+    hdf5_parser.convert(archive_parser)
+
+    # >>> archive_parser.data_object
+    # ExampleSection(b, b2)
+    # >>> archive_parser.data_object.b[1].v
+    # array([[4., 5.],
+    #   [7., 8.]])
+```
+
+## Mapper
+
+A mapper is necessary in order to convert a mapping parser to a target mapping parser
+by mapping data from the source to the target. There are three kinds of mapper: `Map`,
+`Evaluate` and `Mapper` each inheriting from `BaseMapper`. A mapper has attributes
+source and target which define the paths to the source data and target, respectively.
+`Map` is intended for mapping data directly from source to target. The path to the data is
+given by the attribute `path`. `Evaluate` will execute a function defined by
+`function_name` with the arguments given by the mapped values of the paths in
+`function_args`. Lastly, `Mapper` allows the nesting of mappers by providing a list of
+mappers to its attribute `mapper`. All the paths are instances of `Path` with the string
+value of the path to the data given by the attribute `path`. The value of path should
+follow the [jmespath specifications](https://jmespath.org/specification.html) but could be
+prefixed by `.` which indicates that this is a path relative to the parent. This will communicate to the
+mapper which source to get the data.
+
+```python
+    Mapper(
+        source=Path(path='a.b2', target=Path(path='b2'), mapper=[
+            Mapper(
+                source=Path(path='.c', parent=Path(path='a.b2')),
+                target=Path(path='.c', parent=Path(path='b2')), mapper=[
+                    Map(
+                        target=Path(
+                            path='.i', parent=Path(path='.c', parent=Path(path='b2'))
+                        ),
+                        path=Path(
+                            path='.d', parent=Path(path='.c' parent=Path(path='a.b2'))
+                        )
+                    ),
+                    Evaluate(
+                        target=Path(
+                            path='.g', parent=Path(path='.c', parent=Path(path='b2'))
+                        ),
+                        function_name='slice', function_args=[Path(path='a.b2.c.f.g.i')]
+                    )
+                ]
+            )
+        ),
+    )
+```
diff --git a/docs/howto/develop/code.md b/docs/howto/develop/code.md
index 51e82c94db6cfcd7855d7d6a06f1aeeec7e49ce7..f74869a9feb7a510b1ffb7ed7c0b4d90a3d46d5f 100644
--- a/docs/howto/develop/code.md
+++ b/docs/howto/develop/code.md
@@ -5,25 +5,32 @@ about the codebase and ideas about what to look at first.
 
 ## Git Projects
 
-There is one [main NOMAD project](https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-FAIR){:target="_blank"}
-(and its [fork on GitHub](https://github.com/nomad-coe/nomad){:target="_blank"}). This project contains
-all the framework and infrastructure code. It instigates all checks, builds, and
-deployments for the public NOMAD service, the NOMAD Oasis, and the `nomad-lab` Python
-package. All contributions to NOMAD have to go through this project eventually.
+There is one
+[main NOMAD project](https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-FAIR){:target="_blank"}
+(and its [fork on GitHub](https://github.com/nomad-coe/nomad){:target="_blank"}).
+This project contains all the framework and infrastructure code. It instigates all checks,
+builds, and deployments for the public NOMAD service, the NOMAD Oasis, and the `nomad-lab`
+Python package. All contributions to NOMAD have to go through this project eventually.
 
 All (Git) projects that NOMAD depends on are either a Git submodule (you find
 them all in the `dependencies` directory or its subdirectories) or they are
 listed as PyPI packages in the `pyproject.toml` of the main project (or one of its
 submodules).
 
-You can also have a look at the [built-in plugins](../../reference/plugins.md) that constitute the majority of these projects. The only other projects are [MatID](https://github.com/nomad-coe/matid){:target="_blank"}, [DOS fingerprints](https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-dos-fingerprints){:target="_blank"}, and the [NOMAD Remote Tools Hub](https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-remote-tools-hub){:target="_blank"}.
+You can also have a look at the [built-in plugins](../../reference/plugins.md) that
+constitute the majority of these projects. The only other projects are
+[MatID](https://github.com/nomad-coe/matid){:target="_blank"},
+[DOS fingerprints](https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-dos-fingerprints){:target="_blank"},
+and the
+[NOMAD Remote Tools Hub](https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-remote-tools-hub){:target="_blank"}.
 
 !!! note
-    The GitLab organization [nomad-lab](https://gitlab.mpcdf.mpg.de/nomad-lab){:target="_blank"} and the
-    GitHub organizations for [FAIRmat](https://github.com/fairmat-nfdi){:target="_blank"} and the
-    [NOMAD CoE](https://github.com/nomad-coe){:target="_blank"} all represent larger infrastructure and
-    research projects, and they include many other Git projects that are not related.
-    When navigating the codebase, only follow the submodules.
+    The GitLab organization
+    [nomad-lab](https://gitlab.mpcdf.mpg.de/nomad-lab){:target="_blank"} and the GitHub
+    organizations for [FAIRmat](https://github.com/fairmat-nfdi){:target="_blank"} and the
+    [NOMAD CoE](https://github.com/nomad-coe){:target="_blank"} all represent larger
+    infrastructure and research projects, and they include many other Git projects that
+    are not related. When navigating the codebase, only follow the submodules.
 
 ## Python code
 
@@ -39,22 +46,26 @@ There are three main directories with Python code:
 The `nomad` directory contains the following "main" modules. This list is not extensive
 but should help you to navigate the codebase:
 
-- `app`: The [FastAPI](https://fastapi.tiangolo.com/){:target="_blank"} APIs: v1 and v1.2 NOMAD APIs,
-  [OPTIMADE](https://www.optimade.org/){:target="_blank"}, [DCAT](https://www.w3.org/TR/vocab-dcat-2/){:target="_blank"},
+- `app`: The [FastAPI](https://fastapi.tiangolo.com/){:target="_blank"} APIs: v1 and v1.2
+  NOMAD APIs,
+  [OPTIMADE](https://www.optimade.org/){:target="_blank"},
+  [DCAT](https://www.w3.org/TR/vocab-dcat-2/){:target="_blank"},
   [h5grove](https://github.com/silx-kit/h5grove){:target="_blank"}, and more.
 
 - `archive`: Functionality to store and access archive files. This is the storage format
   for all processed data in NOMAD. See also the docs on
   [structured data](../../explanation/data.md).
 
-- `cli`: The command line interface (based on [Click](https://click.palletsprojects.com){:target="_blank"}).
-  Subcommands are structured into submodules.
+- `cli`: The command line interface (based on
+  [Click](https://click.palletsprojects.com){:target="_blank"}). Subcommands are
+  structured into submodules.
 
 - `config`: NOMAD is configured through the `nomad.yaml` file. This contains all the
-  ([Pydantic](https://docs.pydantic.dev/){:target="_blank"}) models and default config parameters.
+  ([Pydantic](https://docs.pydantic.dev/){:target="_blank"}) models and default config
+  parameters.
 
-- `datamodel`: The built-in schemas (e.g. `nomad.datamodel.metainfo.workflow` used to construct
-  workflows). The base sections and section for the shared entry structure.
+- `datamodel`: The built-in schemas (e.g. `nomad.datamodel.metainfo.workflow` used to
+  construct workflows). The base sections and section for the shared entry structure.
   See also the docs on the [datamodel](../../explanation/data.md) and
   [processing](../../explanation/basics.md).
 
@@ -68,7 +79,8 @@ but should help you to navigate the codebase:
   [processing](../../explanation/basics.md#parsing).
 
 - `processing`: It's all about processing uploads and entries. The interface to
-  [Celery](https://docs.celeryq.dev/en/stable/){:target="_blank"} and [MongoDB](https://www.mongodb.com).
+  [Celery](https://docs.celeryq.dev/en/stable/){:target="_blank"} and
+  [MongoDB](https://www.mongodb.com).
 
 - `units`: The unit and unit conversion system based on
   [Pint](https://pint.readthedocs.io){:target="_blank"}.
@@ -84,16 +96,18 @@ but should help you to navigate the codebase:
 
 ## GUI code
 
-The NOMAD UI is written as a [React](https://react.dev/){:target="_blank"} single-page application (SPA). It
-uses (among many other libraries) [MUI](https://mui.com/){:target="_blank"},
-[Plotly](https://plotly.com/python/){:target="_blank"}, and [D3](https://d3js.org/){:target="_blank"}. The GUI code is
-maintained in the `gui` directory. Most relevant code can be found in
-`gui/src/components`. The application entry point is `gui/src/index.js`.
+The NOMAD UI is written as a [React](https://react.dev/){:target="_blank"} single-page
+application (SPA). It uses (among many other libraries)
+[MUI](https://mui.com/){:target="_blank"},
+[Plotly](https://plotly.com/python/){:target="_blank"}, and
+[D3](https://d3js.org/){:target="_blank"}. The GUI code is maintained in the `gui`
+directory. Most relevant code can be found in `gui/src/components`. The application entry
+point is `gui/src/index.js`.
 
 ## Documentation
 
-The documentation is based on [MkDocs](https://www.mkdocs.org/){:target="_blank"}. The important files
-and directories are:
+The documentation is based on [MkDocs](https://www.mkdocs.org/){:target="_blank"}. The
+important files and directories are:
 
 - `docs`: Contains all the Markdown files that contribute to the documentation system.
 
@@ -101,7 +115,8 @@ and directories are:
   added here as well.
 
 - `nomad/mkdocs.py`: Python code that defines
-  [macros](https://mkdocs-macros-plugin.readthedocs.io/){:target="_blank"} which can be used in Markdown.
+  [macros](https://mkdocs-macros-plugin.readthedocs.io/){:target="_blank"} which can be
+  used in Markdown.
 
 ## Other top-level directories
 
diff --git a/docs/howto/overview.md b/docs/howto/overview.md
index c8968052d9fe6452abfefdced09bff2676da5558..7a1bcb634ff11325d9e33084526d3e98a10c4cc6 100644
--- a/docs/howto/overview.md
+++ b/docs/howto/overview.md
@@ -6,7 +6,8 @@ hide: toc
 
 ## Users
 
-These how-to guides target NOMAD users and cover data management, exploration, analysis with NOMAD graphical web-interface and APIs.
+These how-to guides target NOMAD users and cover data management, exploration, analysis
+with NOMAD graphical web-interface and APIs.
 
 <div markdown="block" class="home-grid">
 <div markdown="block">
@@ -38,7 +39,9 @@ Use NOMAD's functions programmatically and via its APIs.
 
 ## Data stewards, administrators, and developers
 
-These how-to guides allow advanced users, NOMAD administrators, data stewards, and developers to customize and operate NOMAD and NOMAD Oasis or contribute to NOMAD's development.
+These how-to guides allow advanced users, NOMAD administrators, data stewards, and
+developers to customize and operate NOMAD and NOMAD Oasis or contribute to NOMAD's
+development.
 
 <div markdown="block" class="home-grid">
 <div markdown="block">
@@ -71,6 +74,7 @@ Customize NOMAD, write plugins, and tailor NOMAD Oasis.
 - [Write a schema packages](plugins/schema_packages.md)
 - [Work with units](customization/units.md)
 - [Use HDF5 to handle large quantities](customization/hdf5.md)
+- [Use Mapping parser to write data on archive](customization/mapping_parser.md)
 
 </div>
 <div markdown="block">
@@ -89,4 +93,6 @@ Become a NOMAD developer and contribute to the source code.
 
 <h2>One last thing</h2>
 
-If you can't find what you're looking for in our guides, [contact our team](mailto:support@nomad-lab.eu) for personalized help and assistance. Don't worry, we're here to help and learn what we're doing wrong!
+If you can't find what you're looking for in our guides,
+[contact our team](mailto:support@nomad-lab.eu) for personalized help and assistance.
+Don't worry, we're here to help and learn what we're doing wrong!
diff --git a/docs/howto/plugins/schema_packages.md b/docs/howto/plugins/schema_packages.md
index c64af853e86c03d7aa8e01ffbd74db99215a1f9b..fbe1ea0863ae1e66542ea37ef2f459fef6580ded 100644
--- a/docs/howto/plugins/schema_packages.md
+++ b/docs/howto/plugins/schema_packages.md
@@ -178,6 +178,18 @@ we will get a final normalized archive that contains our data like this:
 }
 ```
 
+## Migration guide
+
+By default, schema packages are identified by the full qualified path to the Python module that contains the definitions. An example of a full qualified path could be `nomad_example.schema_packages.mypackage`, where the first part is the Python package name, second part is a subpackage, and the last part is a Python module containing the definitions. This is the easiest way to prevent conflicts between different schema packages: python package names are unique (prevents clashes between packages) and paths inside a package must point to a single python module (prevents clashes within package). This does, however, mean that _if you move your schema definition in the plugin source code, any references to the old definition will break_. This becomes problematic in installations that have lot of old data processed with the old definition location, as those entries will still refer to the old location and will not work correctly.
+
+As it might not be possible, or even wise to prevent changes in the source code layout, and reprocessing all old entries might be impractical, we do provide an alias mechanism to help with migration tasks. Imagine your schema package was contained in `nomad_example.schema_packages.mypackage`, and in a newer version of your plugin you want to move it to `nomad_example.schema_packages.mynewpackage`. The way to do this without completely breaking the old entries is to add an alias in the schema package definition:
+
+```python
+m_package = SchemaPackage(aliases=['nomad_example.schema_packages.mypackage'])
+```
+
+Note that this will only help in scenarious where you have moved the definition and not removed or modified any of them.
+
 ## Definitions
 
 The following describes in detail the schema language for the NOMAD Metainfo and how it is expressed in Python.
@@ -305,6 +317,12 @@ The above example works, if `System` is eventually defined in the same package.
 
 ### Categories
 
+!!! Warning
+
+    Categories are now deprecated.
+    Their previous occurrences should be replaced
+    with respective annotations.
+
 In the old metainfo this was known as _abstract types_.
 
 Categories are defined with Python classes that have `:class:MCategory` as base class.
@@ -317,6 +335,122 @@ class CategoryName(MCategory):
     m_def = Category(links=['http://further.explanation.eu'], categories=[ParentCategory])
 ```
 
+## Data frames
+
+On top of the core Metainfo concepts like `Sections`, `Quantities`, and `SubSection`, we provide a mechanism for modeling _data frames_.
+
+A NOMAD data frame is a multi-index table with named indices (variables) and columns (fields).
+All columns should match in length, as they are all parametrized by the same indices.
+Both variables and fields are defined standalone using Values.
+A DataFrame may contain any number of Values, though a bare minimum can be defined via the `mandatory_variables` and `mandatory_fields` respectively.
+
+The mechanism is based on a concept called `Values` for storing arrays of numeric data to
+represent a _field_ or _variable_ (or axis, dimension, etc.) and a concept called
+`DataFrame` that combines _fields_ and _variables_ with matching dimensions into a data frame.
+Our `DataFrame` is conceptually close to xarray datasets, pandas data frames, or the NeXus NXData group.
+
+`Values` and `DataFrame` are usually not used directly, instead you will create
+re-usable templates that allow you to use the same type of `Values` (e.g. describing
+physical properties like energies, temperatures, pressures, ...) and the same type of `DataFrame` (e.g.
+describing material properties at different variables like density of states or band gap).
+
+### Illustrating example
+
+```py
+--8<-- "examples/metainfo/data_frames.py:9:31"
+
+--8<-- "examples/metainfo/data_frames.py:41:44"
+
+
+--8<-- "examples/metainfo/data_frames.py:55:63"
+
+```
+
+### Fields vs variables (and dimensions)
+
+Both _fields_ and _variables_ hold values (i.e. columns) in your data frame.
+While _fields_ hold the actual data, _variables_ span the data space and its dimensions (i.e. column indices).
+
+_Variables_ and _dimensions_ are conceptually slightly different. First, _variables_ provide
+the values on a certain dimension (via shared indices). Second, the number of _Variables_ often, but not necessarily,
+are equal to the number of dimensions. If some _variables_ depend on each other, they might
+span shared dimensions. _Fields_ on the other hand always provide values for all dimensions.
+
+Let's compare two datasets; one dataset that you could plot in a heatmap and one that
+you would plot in a scatter plot. In both cases, we have two _variables_ `Temperature` and `Pressure`,
+as well as one _field_ `Energy`.
+
+In the heatmap scenario, we vary `Temperature` and `Pressure` independently and have a
+`Energy` value (i.e. heatmap color/intensity) for each `Temperature` reading at every `Pressure` reading.
+For two values on each _variable_, we respectively we have 4 (2x2) _field_ values:
+
+```py
+--8<-- "examples/metainfo/data_frames.py:89:97"
+```
+
+In the scatter plot scenario, we vary `Temperature` and `Pressure` together.
+We only have one _field_ value (y-axis) for each pair of temperature and pressure (two x-axes)
+values.
+With two combined temperature and pressure readings, we respectively only have two field values:
+
+```py
+--8<-- "examples/metainfo/data_frames.py:100:106"
+```
+
+We can use the `ValueTemplate` kwarg `spanned_dimenions` to define how `Temperature` and
+`Pressure` are related. The given indices refer to the indices of the field values and
+represent the logical dimension of the data space.
+
+The first example without the `spanned_dimensions` is equivalent to this example
+with `spanned_dimensions`. Here we span two independent dimensions:
+
+```py
+--8<-- "examples/metainfo/data_frames.py:109:117"
+```
+
+### Field and variables in the schema vs parsing
+
+The templates allow you to define _mandatory_ _fields_ and _variables_ in the schema.
+These _fields_ and _variables_ have to be provided by the parser when instantiating the
+respective dataset. However, parser can provide additional _fields_ and _variables_.
+This allows to extend what is defined in the template without requiering new definitions.
+
+### Data representation
+
+Each call to `ValueTemplate` and `DatasetTemplate` produces a section definition
+inheriting from `Values` and `DataFrame` respectively.
+
+`Values` sections define a single quantity `values`. The `values` quantity always holds a numpy array
+based on the type and shape given in the template. The shape of the `values` quantity
+is the shape given in the template plus one dimension of arbitrary length.
+_Variable_ values are always a flat list of values anyways (the values themselves can have a higher shape).
+_Field_ values are always flattened. You might provide them in a higher dimensional array
+according to the dimensionality of the _Variables_, but they are always flattened as the
+`value` quantity only provides one additional dimension, because the real number of dimensions
+is only available at runtime. The original (runtime) shape of _fields_ is stored int the `original_shape` `Values` quantity.
+
+`DataFrame` sections define repeating sub-sections for `fields` and `variables`.
+The specific `DataFrame` section defined by the template, will also hold an annotation `DatasetAnnotation` that keeps the `mandatory_fields` and `mandatory_variables` for runtime validation.
+The `fields` and `variables` sub-sections provide a `Values`
+instances for each _field_ in `mandatory_fields` and each _variable_ in `mandatory_variables`,
+but they can also hold additional _fields_ and _variables_ to accommodate more
+_fields_ and _variables_ determined during parsing.
+
+When a `ValuesTemplate` is used (e.g. `some_property = Energy()`), a quantity is created.
+This quantity is a copy of the `values` quantity created by the template.
+This allows to reuse templated value quantities. When a `DatasetTemplate` is used
+(e.g. `some_property = BandGap()`), a sub-section is created. This sub-section targets
+the `DataFrame` section defined by the template.
+
+#### Working with xarrays and pandas
+
+We provide utility function on `DataFrame` that you can use to translate into
+respective xarray datasets and pandas data frames.
+
+!!! Warning
+
+    The documentation on this is still pending.
+
 ## Adding Python schemas to NOMAD
 
 The following describes how to integrate new schema modules into the existing code according
diff --git a/docs/howto/programmatic/api.md b/docs/howto/programmatic/api.md
index b26b0a5b39c635005330d0e1cd170b41d6bf386d..3a20fa79418d6a2693cd7e116a655d97502a98ac 100644
--- a/docs/howto/programmatic/api.md
+++ b/docs/howto/programmatic/api.md
@@ -1,6 +1,6 @@
 # How to use the API
 
-This guide is about using NOMAD's REST APIs directly, e.g. via Python's *request*.
+This guide is about using NOMAD's REST APIs directly, e.g. via Python's `requests` library.
 
 To access the processed data with our client library `nomad-lab` follow
 [How to access processed data](archive_query.md). You can also watch our
@@ -8,14 +8,17 @@ To access the processed data with our client library `nomad-lab` follow
 
 ## Different options to use the API
 
-NOMAD offers all its functionality through application
-programming interfaces (APIs). More specifically [RESTful HTTP APIs](https://en.wikipedia.org/wiki/Representational_state_transfer){:target="_blank"} that allows you
-to use NOMAD as a set of resources (think data) that can be uploaded, accessed, downloaded,
-searched for, etc. via [HTTP requests](https://en.wikipedia.org/wiki/Hypertext_Transfer_Protocol){:target="_blank"}.
+NOMAD offers all its functionality through application programming interfaces (APIs). More
+specifically
+[RESTful HTTP APIs](https://en.wikipedia.org/wiki/Representational_state_transfer){:target="_blank"}
+that allows you to use NOMAD as a set of resources (think data) that can be uploaded,
+accessed, downloaded, searched for, etc. via
+[HTTP requests](https://en.wikipedia.org/wiki/Hypertext_Transfer_Protocol){:target="_blank"}.
 
-You can get an overview on all NOMAD APIs on the [API page]({{ nomad_url() }}../../gui/analyze/apis).
-We will focus here on NOMAD's main API (v1). In fact, this API is also used by
-the web interface and should provide everything you need.
+You can get an overview on all NOMAD APIs on the
+[API page]({{ nomad_url() }}../../gui/analyze/apis). We will focus here on NOMAD's main
+API (v1). In fact, this API is also used by the web interface and should provide
+everything you need.
 
 There are different tools and libraries to use the NOMAD API that come with different
 trade-offs between expressiveness, learning curve, and convenience.
@@ -25,24 +28,25 @@ trade-offs between expressiveness, learning curve, and convenience.
 For example to see the metadata for all entries with elements *Ti* and *O* go here:
 [{{ nomad_url() }}/v1/entries?elements=Ti&elements=O]({{ nomad_url() }}/v1/entries?elements=Ti&elements=O)
 
-#### Use *curl* or *wget*
+#### Use `curl` or `wget`
 
-REST API's use resources located via URLs. You access URLs with *curl* or *wget*. Same
+REST API's use resources located via URLs. You access URLs with `curl` or `wget`. Same
 *Ti*, *O* example as before:
+
 ```sh
 curl "{{ nomad_url() }}/v1/entries?results.material.elements=Ti&results.material.elements=O" | python -m json.tool
 ```
 
-####  Use Python and requests
+####  Use Python and `requests`
 
-Requests is a popular Python library to use the internets HTTP protocol that is used to
+`Requests` is a popular Python library to use the internet's HTTP protocol that is used to
 communicate with REST APIs. Install with `pip install requests`.
 See [the initial example](#using-request).
 
 #### Use our dashboard
 
-The NOMAD API has an [OpenAPI dashboard]({{ nomad_url() }}/v1). This is an interactive documentation of all
-API functions that allows you to try these functions in the browser.
+The NOMAD API has an [OpenAPI dashboard]({{ nomad_url() }}/v1). This is an interactive
+documentation of all API functions that allows you to try these functions in the browser.
 
 #### Use NOMAD's Python package
 
@@ -50,7 +54,7 @@ Install the [NOMAD Python client library](./pythonlib.md) and use it's `ArchiveQ
 functionality for a more convenient query based access of archive data following the
 [How-to access the processed data](archive_query.md) guide.
 
-## Using request
+## Using `requests`
 
 If you are comfortable with REST APIs and using Pythons `requests` library, this example
 demonstrates the basic concepts of NOMAD's main API. You can get more documentation and
@@ -118,10 +122,10 @@ This will give you something like this:
 }
 ```
 
-The `entry_id` is a unique identifier for, well, entries. You can use it to access
-other entry data. For example, you want to access the entry's archive. More
-precisely, you want to gather the formula and energies from the main workflow result.
-The following requests the archive based on the `entry_id` and only requires some archive sections.
+The `entry_id` is a unique identifier for, well, entries. You can use it to access other
+entry data. For example, you want to access the entry's archive. More precisely, you want
+to gather the formula and energies from the main workflow result. The following requests
+the archive based on the `entry_id` and only requires some archive sections.
 
 ```py
 first_entry_id = response_json['data'][0]['entry_id']
@@ -222,11 +226,13 @@ The result will look like this:
 }
 ```
 
-You can work with the results in the given JSON (or respective Python dict/list) data already.
-If you have [NOMAD's Python library](./pythonlib.md) installed ,
-you can take the archive data and use the Python interface.
-The [Python interface](../plugins/schema_packages.md#wrap-data-with-python-schema-classes) will help with code-completion (e.g. in notebook environments),
-resolve archive references (e.g. from workflow to calculation to system), and allow unit conversion:
+You can work with the results in the given JSON (or respective Python dict/list) data
+already. If you have [NOMAD's Python library](./pythonlib.md) installed, you can take the
+archive data and use the Python interface. The
+[Python interface](../plugins/schema_packages.md#wrap-data-with-python-schema-classes)
+will help with code-completion (e.g. in notebook environments), resolve archive references
+(e.g. from workflow to calculation to system), and allow unit conversion:
+
 ```py
 from nomad.datamodel import EntryArchive
 from nomad.metainfo import units
@@ -238,6 +244,7 @@ print(result.energy.total.value.to(units('eV')))
 ```
 
 This will give you an output like this:
+
 ```
 OOSrTiOOOSrTiOOOSrTiOFF
 -355626.93095025205 electron_volt
@@ -252,7 +259,8 @@ the API:
 - Raw files, the files as they were uploaded to NOMAD.
 - Archive data, all of the extracted data for an entry.
 
-There are also different entities (see also [Datamodel](../../explanation/basics.md)) with different functions in the API:
+There are also different entities (see also [Datamodel](../../explanation/basics.md))
+with different functions in the API:
 
 - Entries
 - Uploads
@@ -275,17 +283,20 @@ Let's discuss some of the common concepts.
 
 ### Response layout
 
-Functions that have a JSON response, will have a common layout. First, the response will contain all keys and values of the request. The request is not repeated verbatim, but
-in a normalized form. Abbreviations in search queries might be expanded, default values for optional parameters are added, or additional response specific information
-is included. Second, the response will contain the results under the key `data`.
+Functions that have a JSON response, will have a common layout. First, the response will
+contain all keys and values of the request. The request is not repeated verbatim, but
+in a normalized form. Abbreviations in search queries might be expanded, default values
+for optional parameters are added, or additional response specific information is
+included. Second, the response will contain the results under the key `data`.
 
 ### Owner
 
-All functions that allow a query will also allow to specify the `owner`. Depending on
-the API function, its default value will be mostly `visible`. Some values are only
-available if you are [logged in](#authentication).
+All functions that allow a query will also allow to specify the `owner`. Depending on the
+API function, its default value will be mostly `visible`. Some values are only available
+if you are [logged in](#authentication).
 
 {{ doc_snippet('owner')}}
+
 ### Queries
 
 {{ doc_snippet('query') }}
@@ -293,10 +304,11 @@ available if you are [logged in](#authentication).
 ### Pagination
 
 When you issue a query, usually not all results can be returned. Instead, an API returns
-only one *page*. This behavior is controlled through pagination parameters,
-like `page_site`, `page`, `page_offset`, or `page_after_value`.
+only one *page*. This behavior is controlled through pagination parameters, like
+`page_site`, `page`, `page_offset`, or `page_after_value`.
 
 Let's consider a search for entries as an example.
+
 ```py
 response = requests.post(
     f'{base_url}/entries/query',
@@ -313,8 +325,9 @@ response = requests.post(
 )
 ```
 
-This will only result in a response with a maximum of 10 entries. The response will contain a
-`pagination` object like this:
+This will only result in a response with a maximum of 10 entries. The response will
+contain a `pagination` object like this:
+
 ```json
 {
     "page_size": 10,
@@ -345,10 +358,11 @@ response = requests.post(
     }
 )
 ```
+
 You will get the next 10 results.
 
-Here is a full example that collects the first 100 formulas from entries that match
-a certain query by paginating.
+Here is a full example that collects the first 100 formulas from entries that match a
+certain query by paginating.
 
 ```python
 --8<-- "examples/docs/api/pagination.py"
@@ -357,16 +371,21 @@ a certain query by paginating.
 ### Authentication
 
 Most of the API operations do not require any authorization and can be freely used
-without a user or credentials. However, to upload, edit, or view your own and potentially unpublished data, the API needs to authenticate you.
+without a user or credentials. However, to upload, edit, or view your own and potentially
+unpublished data, the API needs to authenticate you.
 
 The NOMAD API uses OAuth and tokens to authenticate users. We provide simple operations
 that allow you to acquire an *access token* via username and password:
 
 ```py
+import os
+
 import requests
 
-response = requests.get(
-    '{{ nomad_url() }}/v1/auth/token', params=dict(username='myname', password='mypassword'))
+response = requests.post(
+    '{{ nomad_url() }}/v1/auth/token',
+    data={'username': os.getenv('NOMAD_USERNAME'), 'password': os.getenv('NOMAD_PASSWORD')},
+)
 token = response.json()['access_token']
 
 response = requests.get(
@@ -379,12 +398,14 @@ If you have the [NOMAD Python package](./pythonlib.md) installed. You can use it
 implementation:
 
 ```py
+import os
+
 import requests
 from nomad.client import Auth
 
 response = requests.get(
     '{{ nomad_url() }}/v1/uploads',
-    auth=Auth(user='myname or email', password='mypassword'))
+    auth=Auth(user=os.getenv('NOMAD_USERNAME'), password=os.getenv('NOMAD_PASSWORD')))
 uploads = response.json()['data']
 ```
 
@@ -426,9 +447,9 @@ curl "{{ nomad_url() }}/v1/entries/raw?results.material.elements=Ti&results.mate
 ```
 
 ## Access processed data (archives)
-Above under [using requests](#using-request), you've already learned how to access
-archive data. A special feature of the archive API functions is that you can define what is `required`
-from the archives.
+Above under [using requests](#using-request), you've already learned how to access archive
+data. A special feature of the archive API functions is that you can define what is
+`required` from the archives.
 
 ```py
 response = requests.post(
@@ -476,13 +497,14 @@ or 10 concurrent requests.
 
 Consider to use endpoints that allow you to retrieve full
 pages of resources, instead of endpoints that force you to access resources one at a time.
-See also the sections on [types of data](#different-kinds-of-data) and [pagination](#pagination).
+See also the sections on [types of data](#different-kinds-of-data) and
+[pagination](#pagination).
 
 However, pagination also has its limits and you might ask for pages that are too large.
-If you get responses in the 400 range, e.g. **422 Unprocessable Content** or **400 Bad request**,
-you might hit an api limit. Those responses are typically accompanied by an error message
-in the response body that will inform you about the limit, e.g. the maximum allowed
-page size.
+If you get responses in the 400 range, e.g. **422 Unprocessable Content** or
+**400 Bad request**, you might hit an api limit. Those responses are typically accompanied
+by an error message in the response body that will inform you about the limit, e.g. the
+maximum allowed page size.
 
 ## User Groups
 
diff --git a/docs/reference/basesections.md b/docs/reference/basesections.md
new file mode 100644
index 0000000000000000000000000000000000000000..3dbf454aedb90c5085b367aa6ccabf8e3924f69d
--- /dev/null
+++ b/docs/reference/basesections.md
@@ -0,0 +1,7 @@
+# Base Sections
+
+The `nomad.datamodel.metainfo.basesections` Metainfo package contains a set of
+_base sections_. They provides shared definitions across materials science domains and schemas. Some functionality, e.g. the workflow visualisation, depend on these definitions. Inherit from these base sections when you create your own schemas to
+align your definitions with those of other schemas and to make use of respective functionality.
+
+{{ metainfo_package('nomad.datamodel.metainfo.basesections') }}
diff --git a/examples/metainfo/data_frames.py b/examples/metainfo/data_frames.py
new file mode 100644
index 0000000000000000000000000000000000000000..1760f38b5f30edc14bca3d71bd80b3acb6c32264
--- /dev/null
+++ b/examples/metainfo/data_frames.py
@@ -0,0 +1,146 @@
+import json
+import numpy as np
+from nomad.metainfo.data_frames import DataFrameTemplate, ValuesTemplate
+from nomad.metainfo.metainfo import MSection, Package, Quantity, SubSection
+
+
+m_package = Package()
+
+Energy = ValuesTemplate(
+    name='Energy',
+    type=np.float64,
+    shape=[],
+    unit='J',
+    iri='https://www.wikidata.org/wiki/Q11379',
+)
+
+Temperature = ValuesTemplate(
+    name='Temperature',
+    type=np.float64,
+    shape=[],
+    unit='K',
+    iri='https://www.wikidata.org/wiki/Q11466',
+)
+
+Pressure = ValuesTemplate(
+    name='Pressure',
+    type=np.float64,
+    shape=[],
+    unit='Pa',
+    iri='https://www.wikidata.org/wiki/Q39552',
+)
+
+Count = ValuesTemplate(
+    name='Count',
+    type=np.int64,
+    shape=[],
+    unit='1',
+    iri='https://www.wikidata.org/wiki/Q1520033',
+)
+
+BandGap = DataFrameTemplate(
+    name='BandGap',
+    mandatory_fields=[Energy],
+)
+
+Dos = DataFrameTemplate(
+    name='Dos',
+    mandatory_fields=[Count],
+    mandatory_variables=[Energy],
+)
+
+m_package.__init_metainfo__()
+
+
+class MySection(MSection):
+    band_gaps = BandGap()
+
+
+my_section = MySection()
+my_section.band_gaps = BandGap.create()
+my_section.band_gaps.fields = [Energy.create(1.0, 1.1)]
+my_section.band_gaps.variables = [Temperature.create(200, 220)]
+
+
+# If really necessary, you can specialize the template generated section class,
+# but generally we would like to incentivise that users use the containing section
+# to do this.
+class MyBandGap(BandGap.section_cls):  # type: ignore
+    type = Quantity(type=str)
+
+    def normalize(self, archive, logger):
+        pass
+
+
+class MySection(MSection):
+    band_gap = Energy()  # Instantiate the Energy values template, creates a quantity
+    band_gaps = (
+        BandGap()
+    )  # Instantiate the BandGap data frame template, creates a sub section
+    my_band_gaps = SubSection(section=MyBandGap)
+    dos = Dos()
+
+
+# Value template instances (quantities) are used like quantities
+my_section = MySection()
+my_section.band_gap = 1.0
+
+# Example of a "heatmap" scenario
+my_section = MySection()
+my_section.band_gaps = BandGap.create()
+my_section.band_gaps.fields = [
+    Energy.create(np.array([[1.0, 1.1], [1.3, 1.4], [1.6, 1.7]]))
+]
+my_section.band_gaps.variables = [
+    Temperature.create(200, 220),
+    Pressure.create(1e5, 1.2e5, 1.4e5),
+]
+
+# Example of a "scatter plot" scenario
+my_section = MySection()
+my_section.band_gaps = BandGap.create()
+my_section.band_gaps.fields = [Energy.create(1.0, 1.1, 1.2)]
+my_section.band_gaps.variables = [
+    Temperature.create(200, 220, 240, spanned_dimensions=[0]),
+    Pressure.create(1e5, 1.2e5, 1.4e5, spanned_dimensions=[0]),
+]
+
+# Explicitly spanned dimensions
+my_section = MySection()
+my_section.band_gaps = BandGap.create()
+my_section.band_gaps.fields = [
+    Energy.create(np.array([[1.0, 1.1], [1.3, 1.4], [1.6, 1.7]]))
+]
+my_section.band_gaps.variables = [
+    Temperature.create(200, 220, spanned_dimensions=[0]),
+    Pressure.create(1e5, 1.2e5, 1.4e5, spanned_dimensions=[1]),
+]
+
+# You can also reference values instead of setting them directly
+my_section.dos = Dos.create(
+    fields=[Count.create(1, 2, 2, 4)],
+    variables=[
+        Energy.create(1.0, 1.1),
+        Temperature.create(my_section.band_gaps.get_variable(Temperature)),
+    ],
+)
+
+# If you have a specialized template section section its a normal sub section
+# and the interface is a bit different
+my_band_gaps = MyBandGap(type='foo')
+my_section.band_gaps.fields = [Energy.create(1.0, 1.1)]
+my_section.band_gaps.variables = [
+    Temperature.create(200, 220, spanned_dimensions=[0]),
+    Pressure.create(1e5, 1.2e5, spanned_dimensions=[0]),
+]
+
+# Access references values
+print('###', my_section.dos.get_variable(Temperature).get_values())
+
+# Run the constraints to validate field, variables, and dimensions
+my_section.m_all_validate()
+
+print('---- schema ----')
+print(json.dumps(m_package.m_to_dict(), indent=2))
+print('---- data ----')
+print(json.dumps(my_section.m_to_dict(), indent=2))
diff --git a/gui/src/components/archive/ArchiveBrowser.js b/gui/src/components/archive/ArchiveBrowser.js
index 6df3ebce8c7911c870a1769a7d963ec04a2575d6..97f5f9e3e74310db8d4fbb9b15af6aedeef994a0 100644
--- a/gui/src/components/archive/ArchiveBrowser.js
+++ b/gui/src/components/archive/ArchiveBrowser.js
@@ -33,7 +33,7 @@ import SaveIcon from '@material-ui/icons/Save'
 import { Alert } from '@material-ui/lab'
 import classNames from 'classnames'
 import DOMPurify from 'dompurify'
-import { isArray, isNaN, partition, range } from 'lodash'
+import { isArray, isNaN, isPlainObject, partition, range } from 'lodash'
 import { complex, format } from 'mathjs'
 import PropTypes from 'prop-types'
 import React, { useCallback, useContext, useEffect, useMemo, useRef, useState } from 'react'
@@ -60,6 +60,7 @@ import { useErrors } from '../errors'
 import Markdown from '../Markdown'
 import { EntryButton } from '../nav/Routes'
 import { Quantity as Q } from '../units/Quantity'
+import { Unit } from '../units/Unit'
 import { useDisplayUnit } from '../units/useDisplayUnit'
 import H5Web from '../visualization/H5Web'
 import Pagination from '../visualization/Pagination'
@@ -673,11 +674,13 @@ class QuantityAdaptor extends ArchiveAdaptor {
   }
 
   render() {
-    if (quantityUsesFullStorage(this.def)) {
-      return <FullStorageQuantity value={this.obj} def={this.def}/>
-    } else {
-      return <Quantity value={this.obj} def={this.def}/>
-    }
+    return <Quantity value={this.obj} def={this.def}>
+      {this.obj?.m_attributes?.length > 0 && <Compartment title="attributes">
+        {Object.keys(this.obj?.m_attributes).map(key => (
+          <Item key={key} itemKey={key}>{key}</Item>
+        ))}
+      </Compartment>}
+    </Quantity>
   }
 }
 
@@ -694,7 +697,7 @@ const convertComplexArray = (real, imag) => {
 }
 
 export function QuantityItemPreview({value, def}) {
-  const displayUnit = useDisplayUnit(def)
+  let {finalValue, displayUnit, storageUnit} = useQuantityData(value, def)
 
   if (isReference(def)) {
     return <Box component="span" fontStyle="italic">
@@ -720,14 +723,14 @@ export function QuantityItemPreview({value, def}) {
     const dimensions = []
     let typeLabel = 'unknown'
     try {
-      let current = value.re || value.im || value
+      let current = finalValue.re || finalValue.im || finalValue
       for (let i = 0; i < def.shape.length; i++) {
         dimensions.push(current.length)
         current = current[0]
       }
       if (def.type.type_kind === 'python') {
         typeLabel = 'list'
-      } else if (typeof value === 'string') {
+      } else if (typeof finalValue === 'string') {
         typeLabel = 'HDF5 array'
         dimensions.length = 0
       } else {
@@ -752,17 +755,13 @@ export function QuantityItemPreview({value, def}) {
       </Typography>
     </Box>
   } else {
-    let finalValue
     if (def.type.type_data === 'nomad.metainfo.metainfo._Datetime' || def.type.type_data === 'nomad.metainfo.data_type.Datetime') {
-      finalValue = formatTimestamp(value)
+      finalValue = formatTimestamp(finalValue)
     } else if (def.type.type_data.startsWith?.('complex')) {
-      finalValue = convertComplexArray(value.re, value.im)
-    } else {
-      finalValue = value
+      finalValue = convertComplexArray(finalValue.re, finalValue.im)
     }
-
     if (displayUnit) {
-      finalValue = new Q(finalValue, def.unit).to(displayUnit).value()
+      finalValue = new Q(finalValue, storageUnit).to(displayUnit).value()
     }
     return <Box component="span" whiteSpace="nowarp">
       <Number component="span" variant="body1" value={finalValue} exp={8}/>
@@ -776,12 +775,32 @@ QuantityItemPreview.propTypes = ({
   def: PropTypes.object.isRequired
 })
 
+/**
+ * Hook for getting the final value and units for a quantity. Also supports
+ * quantities using full storage.
+ *
+ * @param {*} data Value of the quantity
+ * @param {*} def Defintion of the quantity
+ * @returns Object containing the final value, storage unit and display unit.
+ */
+function useQuantityData(data, def) {
+  let storageUnit = def.unit
+  let displayUnit = useDisplayUnit(def)
+  let finalValue = data
+  if (quantityUsesFullStorage(def) && isPlainObject(data)) {
+    displayUnit = data?.m_unit && new Unit(data.m_unit)
+    storageUnit = data?.m_original_unit
+    finalValue = data?.m_value
+  }
+  return {finalValue, displayUnit, storageUnit}
+}
+
 export const QuantityValue = React.memo(function QuantityValue({value, def}) {
   const {uploadId} = useEntryStore() || {}
-  const displayUnit = useDisplayUnit(def)
+  let {finalValue, displayUnit, storageUnit} = useQuantityData(value, def)
 
-  const getRenderValue = useCallback(value => {
-    let finalValue
+  const getRenderValue = useCallback((value) => {
+    let finalValue, finalUnit
     if (def.type.type_data === 'nomad.metainfo.metainfo._Datetime' || def.type.type_data === 'nomad.metainfo.data_type.Datetime') {
       finalValue = formatTimestamp(value)
     } else if (def.type.type_data.startsWith?.('complex')) {
@@ -789,22 +808,24 @@ export const QuantityValue = React.memo(function QuantityValue({value, def}) {
     } else {
       finalValue = value
     }
-    let finalUnit
-    if (def.unit && typeof finalValue !== 'string') {
-      const systemUnitQ = new Q(finalValue, def.unit).to(displayUnit)
+
+    if (typeof finalValue !== 'string' && storageUnit && displayUnit) {
+      const systemUnitQ = new Q(finalValue, storageUnit).to(displayUnit)
       finalValue = systemUnitQ.value()
       finalUnit = systemUnitQ.label()
     }
+
     return [finalValue, finalUnit]
-  }, [def, displayUnit])
+  }, [def, storageUnit, displayUnit])
 
   const isMathValue = (def.type.type_kind === 'numpy' || def.type.type_kind === 'python') && typeof value !== 'string'
   if (isMathValue) {
-    const [finalValue, finalUnit] = getRenderValue(value)
+    const [renderValue, finalUnit] = getRenderValue(finalValue)
     if (def.shape.length > 0) {
+      console.log(renderValue)
       return <Box textAlign="center">
         <Matrix
-          values={finalValue}
+          values={renderValue}
           shape={def.shape}
           invert={def.shape.length === 1}
           type={def.type.type_data}
@@ -818,54 +839,53 @@ export const QuantityValue = React.memo(function QuantityValue({value, def}) {
         {finalUnit && <Typography noWrap>{finalUnit}</Typography>}
       </Box>
     } else {
-      return <Number value={finalValue} exp={16} variant="body1" unit={finalUnit}/>
+      return <Number value={renderValue} exp={16} variant="body1" unit={finalUnit}/>
     }
   } else if (def.m_annotations?.browser?.[0]?.render_value === 'HtmlValue' || def.m_annotations?.eln?.[0]?.component === 'RichTextEditQuantity') {
-    const html = DOMPurify.sanitize(value)
+    const html = DOMPurify.sanitize(finalValue)
     return <div dangerouslySetInnerHTML={{__html: html}}/>
   } else if (def.type?.type_data === 'nomad.metainfo.metainfo._JSON' || def.type?.type_data === 'nomad.metainfo.data_type.JSON') {
     return <ReactJson
       name="value"
-      src={value}
+      src={finalValue}
       enableClipboard={false}
       collapsed={2}
       displayObjectSize={false}
     />
   } else {
     if (def.type.type_data.startsWith?.('complex')) {
-      value = convertComplexArray(value.re, value.im)
+      finalValue = convertComplexArray(finalValue.re, finalValue.im)
 
-      return Array.isArray(value)
+      return Array.isArray(finalValue)
         ? <ul style={{margin: 0}}>
-          {value.map((value, index) => <li key={index}><Typography>{value}</Typography></li>)}
+          {finalValue.map((value, index) => <li key={index}><Typography>{value}</Typography></li>)}
         </ul>
-        : <Typography>{value}</Typography>
-    } else if (Array.isArray(value)) {
+        : <Typography>{finalValue}</Typography>
+    } else if (Array.isArray(finalValue)) {
       return <ul style={{margin: 0}}>
-        {value.map((value, index) => {
-          const [finalValue] = getRenderValue(value)
+        {finalValue.map((value, index) => {
+          const [renderValue] = getRenderValue(value)
           return <li key={index}>
-            <Typography>{typeof finalValue === 'object' ? JSON.stringify(finalValue) : finalValue?.toString()}</Typography>
+            <Typography>{typeof renderValue === 'object' ? JSON.stringify(renderValue) : renderValue?.toString()}</Typography>
           </li>
         })}
       </ul>
     } else if (def.type?.type_data === 'nomad.datamodel.hdf5.HDF5Dataset' || def.type?.type_data === 'nomad.datamodel.hdf5.HDF5Reference') {
-      const {h5UploadId, h5File, h5Source, h5Path} = matchH5Path(value)
+      const {h5UploadId, h5File, h5Source, h5Path} = matchH5Path(finalValue)
       return <Compartment title='hdf5'>
         <H5Web upload_id={h5UploadId || uploadId} filename={h5File} initialPath={h5Path} source={h5Source} sidebarOpen={false}></H5Web>
       </Compartment>
     } else if (def?.type?.type_kind === 'custom' && def?.type?.type_data === 'nomad.datamodel.data.Query') {
-      return <Query value={value} def={def}/>
+      return <Query value={finalValue} def={def}/>
     } else {
-      const [finalValue] = getRenderValue(value)
-      return <Typography>{typeof finalValue === 'object' ? JSON.stringify(finalValue) : finalValue?.toString()}</Typography>
+      const [renderValue] = getRenderValue(finalValue)
+      return <Typography>{typeof renderValue === 'object' ? JSON.stringify(renderValue) : renderValue?.toString()}</Typography>
     }
   }
 })
 QuantityValue.propTypes = ({
   value: PropTypes.any,
-  def: PropTypes.object.isRequired,
-  unit: PropTypes.string
+  def: PropTypes.object.isRequired
 })
 
 const InheritingSections = React.memo(function InheritingSections({def, section, lane}) {
@@ -1072,7 +1092,7 @@ export function Section({section, def, property, parentRelation, sectionIsEditab
       const storage = section[quantityDef.name] || {}
       return <React.Fragment key={key}>
         {Object.keys(storage).map(quantityName =>
-          renderQuantityItem(key, quantityName, quantityDef, storage[quantityName]?.m_value, disabled)
+          renderQuantityItem(key, quantityName, quantityDef, storage[quantityName], disabled)
         )}
       </React.Fragment>
     } else {
@@ -1623,23 +1643,7 @@ SectionPlots.propTypes = {
   entryId: PropTypes.string
 }
 
-function FullStorageQuantity({value, def}) {
-  const attributes = value.m_attributes || {}
-  return <Quantity value={value.m_value} def={def} unit={value.m_unit}>
-    {Object.keys(attributes).length > 0 && <Compartment title="attributes">
-      {Object.keys(attributes).map(key => (
-        <Item key={key} itemKey={key}>{key}</Item>
-      ))}
-    </Compartment>}
-  </Quantity>
-}
-
-FullStorageQuantity.propTypes = ({
-  value: PropTypes.any,
-  def: PropTypes.object.isRequired
-})
-
-function Quantity({value, def, unit, children}) {
+function Quantity({value, def, children}) {
   const {prev} = useLane()
   return <Content>
     <ArchiveTitle def={def} data={value} kindLabel="value"/>
@@ -1657,7 +1661,6 @@ function Quantity({value, def, unit, children}) {
       <QuantityValue
         value={value}
         def={def}
-        unit={unit}
       />
     </Compartment>
     {children}
diff --git a/gui/src/components/archive/Quantity.spec.js b/gui/src/components/archive/Quantity.spec.js
index 0e269c998b643e6f17c9c64fd58951a7aa357f9f..f1ba5a24c2676b06e8b6b047e2089da57c278bdf 100644
--- a/gui/src/components/archive/Quantity.spec.js
+++ b/gui/src/components/archive/Quantity.spec.js
@@ -67,6 +67,18 @@ test.each([
     undefined,
     'mm',
     '3500Â mm'
+  ],
+  [
+    'full storage',
+    {
+      m_value: 3.5,
+      m_unit: 'm',
+      m_original_unit: 'm'
+    },
+    undefined,
+    undefined,
+    'm',
+    '3.50000Â m'
   ]
 ])('Test QuantityItemPreview %s', async (name, value, unit, displayUnit, elnUnit, expected) => {
   const def = {
@@ -85,7 +97,7 @@ test.each([
 
   render(
     <QuantityItemPreview
-      def={{name: 'value1', shape: [], type: {type_kind: 'python', type_data: 'float'}, ...def}}
+      def={{name: 'value1', shape: [], type: {type_kind: 'python', type_data: 'float'}, variable: !!value?.m_value, ...def}}
       value={value}
     />
   )
@@ -182,6 +194,22 @@ describe("Test QuantityValue", () => {
       false,
       '(1)',
       'mm'
+    ],
+    [
+      'full storage',
+      {
+        m_value: [3.5],
+        m_unit: 'm',
+        m_original_unit: 'm'
+      },
+      [1],
+      undefined,
+      undefined,
+      undefined,
+      '3.50000',
+      false,
+      '(1)',
+      'm'
     ]
   ])('%s', async (name, value, shape, unit, displayUnit, elnUnit, expectedValue, scientific, expectedDim, expectedUnit) => {
     const def = {
@@ -201,7 +229,7 @@ describe("Test QuantityValue", () => {
 
     const screen = render(
       <QuantityValue
-        def={{name: 'value1', type: {type_kind: 'python', type_data: 'float'}, ...def}}
+        def={{name: 'value1', type: {type_kind: 'python', type_data: 'float'}, variable: !!value?.m_value, ...def}}
         value={value}
       />
     )
diff --git a/gui/src/components/entry/properties/WorkflowCard.js b/gui/src/components/entry/properties/WorkflowCard.js
index 2e8eb39577051491cb924da00216ea305d582fdf..0ca4c6a8275f954106a376e36536ae14d076fd69 100644
--- a/gui/src/components/entry/properties/WorkflowCard.js
+++ b/gui/src/components/entry/properties/WorkflowCard.js
@@ -22,7 +22,7 @@ import { makeStyles, Tooltip, IconButton, TextField, FormControl } from '@materi
 import { Replay, Undo, Label, LabelOff, PlayArrowSharp, StopSharp, Clear } from '@material-ui/icons'
 import { useHistory } from 'react-router-dom'
 import { isPlainObject } from 'lodash'
-import { PropertyCard, PropertyGrid } from './PropertyCard'
+import { PropertyCard } from './PropertyCard'
 import { resolveNomadUrl, resolveInternalRef, createEntryUrl } from '../../../utils'
 import { useApi } from '../../api'
 import { getUrl } from '../../nav/Routes'
@@ -203,6 +203,15 @@ const getLinks = async (source, query) => {
   const isLinked = (source, target) => {
     if (source.url === target.url) return false
 
+    const inputs = []
+    if (target.type === 'tasks' && target.nodes) {
+      inputs.push(...target.nodes.filter(node => node.type && node.type.startsWith('inputs')).map(node => node.url))
+    } else {
+      inputs.push(target.url)
+    }
+
+    if (inputs.includes(source.url)) return true
+
     const outputs = []
     if (source.type === 'tasks' && source.nodes) {
       outputs.push(...source.nodes.filter(node => node.type === 'outputs').map(node => node.url))
@@ -210,12 +219,7 @@ const getLinks = async (source, query) => {
       outputs.push(source.url)
     }
 
-    const inputs = []
-    if (target.type === 'tasks' && target.nodes) {
-      inputs.push(...target.nodes.filter(node => node.type && node.type.startsWith('inputs')).map(node => node.url))
-    } else {
-      inputs.push(target.url)
-    }
+    if (outputs.includes(target.url)) return true
 
     let linked = false
     for (const output of outputs) {
@@ -287,9 +291,10 @@ const Graph = React.memo(({
   const svgRef = useRef()
   const history = useHistory()
   const asyncError = useAsyncError()
+  const width = document.getElementById('container')?.clientWidth || 675
   const finalLayout = useMemo(() => {
     const defaultLayout = {
-      width: 700,
+      width: width,
       margin: {top: 60, bottom: 60, left: 40, right: 40},
       circleRadius: 30,
       markerWidth: 4,
@@ -313,7 +318,7 @@ const Graph = React.memo(({
       }
     }
     return {...defaultLayout, ...layout}
-  }, [layout])
+  }, [layout, width])
   const [tooltipContent, setTooltipContent] = useState('')
   const [tooltipPosition, setTooltipPosition] = useState({x: undefined, y: undefined})
   const [showTooltip, setShowTooltip] = useState(false)
@@ -328,6 +333,7 @@ const Graph = React.memo(({
 
     const svg = d3.select(svgRef.current)
     svg.selectAll('g').remove()
+    svg.selectAll('defs').remove()
 
     const inOutColor = d3.interpolateRgb(color.input, color.output)(0.5)
 
@@ -351,6 +357,11 @@ const Graph = React.memo(({
       return tasks.length > 0
     }
 
+    const isRoot = (d) => {
+      const rootSections = ['data']
+      return rootSections.includes(d.sectionType)
+    }
+
     const nodeColor = (d) => {
       if (d.color) return d.color
       if (d.type === 'link') return '#ffffff'
@@ -434,29 +445,21 @@ const Graph = React.memo(({
         .style('alignment-baseline', 'middle')
 
       gLegend
-        .on('mouseover', () => {
+        .on('mouseenter', () => {
           let tooltip = ''
           if (label === 'input') {
-            tooltip = <p>
-              Input to a task or workflow.
-            </p>
+            tooltip = 'Input to a task or workflow.'
           } else if (label === 'output') {
-            tooltip = <p>
-              Output from a task or workflow.
-            </p>
+            tooltip = 'Output from a task or workflow.'
           } else if (label === 'workflow') {
-            tooltip = <p>
-              Task containing further sub-tasks.
-            </p>
+            tooltip = 'Task containing further sub-tasks.'
           } else if (label === 'task') {
-            tooltip = <p>
-              Elementary task with inputs and outputs.
-            </p>
+            tooltip = 'Elementary task with inputs and outputs.'
           }
           setShowTooltip(true && legend.attr('visibility') === 'visible')
           setTooltipContent(tooltip)
         })
-        .on('mouseout', () => {
+        .on('mouseleave', () => {
           setShowTooltip(false)
         })
     }
@@ -887,15 +890,15 @@ const Graph = React.memo(({
         d3.select(`#icon-${d.id}`).style('stroke-opacity', 1).style('stroke', color.outlineHigh)
         if (d.id === source.id) {
           if (!previousNode || previousNode === 'root') return
-          // setShowTooltip(true)
-          setTooltipContent(<p>Click to go back up</p>)
+          setShowTooltip(true)
+          setTooltipContent('Click to go back up')
         } else if (['inputs', 'outputs'].includes(d.type)) {
           setShowTooltip(true)
-          setTooltipContent(<p>Click to switch {d.type} filter</p>)
+          setTooltipContent(`Click to switch ${d.type} filter`)
         } else if (d.type === 'tasks') {
           const sectionType = d.sectionType === 'tasks' ? 'task' : 'workflow'
           setShowTooltip(true)
-          setTooltipContent(<p>Click to expand {sectionType}</p>)
+          setTooltipContent(`Click to expand ${sectionType}`)
         }
       }
 
@@ -961,8 +964,8 @@ const Graph = React.memo(({
           if (d.id === source.id) return 0.2
           return 1
         })
-        .on('mouseover', handleMouseOverIcon)
-        .on('mouseout', handleMouseOutIcon)
+        .on('mouseenter', handleMouseOverIcon)
+        .on('mouseleave', handleMouseOutIcon)
         .on('click', handleClickIcon)
 
       node
@@ -972,8 +975,8 @@ const Graph = React.memo(({
         .attr('id', d => `icon-${d.id}`)
         .attr('stroke', color.outline)
         .attr('fill', d => nodeColor(d))
-        .on('mouseover', handleMouseOverIcon)
-        .on('mouseout', handleMouseOutIcon)
+        .on('mouseenter', handleMouseOverIcon)
+        .on('mouseleave', handleMouseOutIcon)
         .on('click', handleClickIcon)
 
       node.append('text')
@@ -988,22 +991,22 @@ const Graph = React.memo(({
           if (!d.entryId || !d.parent) return
           let path = `entry/id/${d.entryId}`
           const sectionPath = d.path ? d.path.replace(/\/(?=\d)/g, ':') : null
-          path = isWorkflow(d) ? path : sectionPath ? `${path}/data${sectionPath}` : path
+          path = isWorkflow(d) || isRoot(d) ? path : sectionPath ? `${path}/data${sectionPath}` : path
           const url = getUrl(path)
           history.push(url)
         })
-        .on('mouseover', d => {
+        .on('mouseenter', d => {
           if (!d.type || !d.parent) return
           if (!d.sectionType) return
           d3.select(`#text-${d.id}`).style('font-weight', 'bold')
             .text(d.name)
-          const text = isWorkflow(d) ? 'overview page' : 'archive section'
+          const text = isWorkflow(d) || isRoot(d) ? 'overview page' : 'archive section'
           if (d.entryId) {
             setShowTooltip(true)
-            setTooltipContent(<p>Click to go to {text}</p>)
+            setTooltipContent(`Click to go to ${text}`)
           }
         })
-        .on('mouseout', d => {
+        .on('mouseleave', d => {
           setShowTooltip(false)
           d3.select(`#text-${d.id}`).style('font-weight', null)
             .text(d => trimName(d.name))
@@ -1142,15 +1145,15 @@ const Graph = React.memo(({
             setNodesPosition(linkNode)
         })
         })
-        .on('mouseover', d => {
+        .on('mouseenter', d => {
           d3.select(`#link-${d.id}`).style('stroke-opacity', 1.0).style('stroke', color.linkHigh)
           svg.select(`.marker-${d.id}`).attr('fill-opacity', 1.0).style('stroke', color.linkHigh).style('fill', color.linkHigh)
           d3.select(`#icon-${d.source.id}`).style('stroke', color.linkHigh).style('stroke-opacity', 1.0)
           d3.select(`#icon-${d.target.id}`).style('stroke', color.linkHigh).style('stroke-opacity', 1.0)
           setShowTooltip(d.label)
-          setTooltipContent(<p>{d.label}</p>)
+          setTooltipContent(d.label)
         })
-        .on('mouseout', d => {
+        .on('mouseleave', d => {
           d3.select(`#link-${d.id}`).style('stroke-opacity', 0.5).style('stroke', color.link)
           svg.select(`.marker-${d.id}`).attr('fill-opacity', 0.5).style('stroke', color.link).style('fill', color.link)
           d3.select(`#icon-${d.source.id}`).style('stroke', color.outline).style('stroke-opacity', 0.5)
@@ -1194,8 +1197,6 @@ const Graph = React.memo(({
     <Tooltip
       title={tooltipContent}
       open={showTooltip}
-      enterDelay={1000}
-      enterNextDelay={0}
       onMouseMove={event => setTooltipPosition({x: event.pageX, y: event.pageY})}
       PopperProps={
         {anchorEl: {
@@ -1212,7 +1213,7 @@ const Graph = React.memo(({
         }}
       }
     >
-      <div id='tooltip'>
+      <div>
         <svg className={classes.root} ref={svgRef}></svg>
       </div>
       </Tooltip>
@@ -1316,9 +1317,9 @@ const WorkflowCard = React.memo(({archive}) => {
   </div>
 
   return graph && <PropertyCard title='Workflow Graph' action={actions}>
-    <PropertyGrid>
+    <div id='container'>
       {graph}
-    </PropertyGrid>
+    </div>
   </PropertyCard>
 })
 
diff --git a/gui/src/components/search/Filter.js b/gui/src/components/search/Filter.js
index dd4d4782e05af72ae5b0b76f271b03d0c6f2ef3e..a7f863ea7c689a5c808bbf9c83907c4b6737fa0f 100644
--- a/gui/src/components/search/Filter.js
+++ b/gui/src/components/search/Filter.js
@@ -144,9 +144,7 @@ export class Filter {
     this.parent = parent
     this.group = params.group
     this.placeholder = params?.placeholder
-    this.multiple = params?.multiple === undefined
-      ? multiTypes.has(this.dtype)
-      : params?.multiple
+    this.multiple = params?.multiple ?? multiTypes.has(this.dtype)
     this.exclusive = params?.exclusive === undefined ? true : params?.exclusive
     this.queryMode = params?.queryMode || (this.multiple ? 'any' : undefined)
     this.options = params?.options || getEnumOptions(this.quantity)
diff --git a/gui/src/components/search/FilterRegistry.js b/gui/src/components/search/FilterRegistry.js
index 4d91c63e73f5a763f4b272f9d82bb300b4d34e4f..cfe9f46e9a459985458cdb846340574e8a890db0 100644
--- a/gui/src/components/search/FilterRegistry.js
+++ b/gui/src/components/search/FilterRegistry.js
@@ -143,7 +143,7 @@ const termQuantityAll = {aggs: {terms: {size: 5}}, exclusive: false, multiple: t
 const termQuantityAllNonExclusive = {...termQuantityNonExclusive, queryMode: 'all'}
 const noAggQuantity = {}
 const nestedQuantity = {}
-const noQueryQuantity = {multiple: false, global: true}
+const noQueryQuantity = {global: true}
 const numberHistogramQuantity = {multiple: false, exclusive: false}
 
 // Filters that directly correspond to a metainfo value
@@ -587,6 +587,7 @@ registerFilter(
   'combine',
   {
     ...noQueryQuantity,
+    dtype: DType.Boolean,
     default: true,
     description: 'If selected, your filters may be matched from several entries that contain the same material. When unchecked, the material has to have a single entry that matches all your filters.'
   }
diff --git a/gui/src/components/search/input/InputTerms.js b/gui/src/components/search/input/InputTerms.js
index 690e04920be11881fb12e014e0342df7c5e404be..e71af1169764c90f0f993f44a9ed4684627c24cc 100644
--- a/gui/src/components/search/input/InputTerms.js
+++ b/gui/src/components/search/input/InputTerms.js
@@ -28,10 +28,24 @@ import InputUnavailable from './InputUnavailable'
 import Placeholder from '../../visualization/Placeholder'
 import { useSearchContext } from '../SearchContext'
 import { isNil, isNumber } from 'lodash'
+import { DType } from '../../../utils'
 import Pagination from '../../visualization/Pagination'
 import { guiState } from '../../GUIMenu'
 import { InputTextQuantity } from './InputText'
 
+/**
+ * Converts a string value to its appropriate type based on the provided dtype.
+ * Needed for converting booleans used as object keys (object keys are always
+ * strings).
+ *
+ * @param {string} value - The value to be converted.
+ * @param {DType} dtype - The data type to convert the value to.
+ * @returns {boolean|string} - The converted value.
+ */
+function getFinalKey(value, dtype) {
+  return dtype === DType.Boolean ? value === 'true' : value
+}
+
 /**
  * Generic input component that can be configured to fit several use cases. The
  * most typical configufations are:
@@ -152,6 +166,7 @@ const InputTerms = React.memo(({
   // results or change in the available options.
   useEffect(() => {
     let options = Object.entries(finalOptions).reduce((opt, [key, value]) => {
+      key = getFinalKey(key, filterData[searchQuantity]?.dtype)
       const selected = filter?.has(key) || false
       opt[key] = {
         checked: selected,
@@ -184,7 +199,7 @@ const InputTerms = React.memo(({
     }
 
     setVisibleOptions(options)
-  }, [agg?.data, filter, finalOptions, fixedOptions, isStatisticsEnabled, showStatistics, sortStatic])
+  }, [agg?.data, filter, filterData, finalOptions, fixedOptions, isStatisticsEnabled, searchQuantity, showStatistics, sortStatic])
 
   // Show more values
   const handleShowMore = useCallback(() => {
@@ -214,9 +229,9 @@ const InputTerms = React.memo(({
     newOptions[key].checked = selected
     const checked = Object.entries(newOptions)
       .filter(([key, value]) => value.checked)
-      .map(([key, value]) => key)
+      .map(([key, value]) => getFinalKey(key, filterData[searchQuantity]?.dtype))
     setFilter(new Set(checked))
-  }, [setFilter, visibleOptions])
+  }, [setFilter, visibleOptions, filterData, searchQuantity])
 
   // Create the search component
   const searchComponent = useMemo(() => {
diff --git a/gui/src/components/units/useDisplayUnit.js b/gui/src/components/units/useDisplayUnit.js
index 8036688340a99776072b817e7fa198ce3600327b..52d5937f15e7a4b1a768df694e3d16477bc74ed8 100644
--- a/gui/src/components/units/useDisplayUnit.js
+++ b/gui/src/components/units/useDisplayUnit.js
@@ -4,6 +4,12 @@ import {Unit} from "./Unit"
 import {useUnitContext} from "./UnitContext"
 import {getFieldProps} from "../editQuantity/StringEditQuantity"
 
+/**
+ * Used to retrieve the unit to use for displaying a quantity.
+ *
+ * @param {*} quantityDef Definition for the quantity
+ * @returns {Unit} The unit to use for displaying the quantity.
+ */
 export function useDisplayUnit(quantityDef) {
   const {units} = useUnitContext()
   const {raiseError} = useErrors()
diff --git a/gui/src/components/uploads/UploadPage.spec.js b/gui/src/components/uploads/UploadPage.spec.js
index dcba24b1698988e8d38b65e5600b49a39e64c7be..8cb86d5573293210bd7bed710a3e2a6b1106f0bf 100644
--- a/gui/src/components/uploads/UploadPage.spec.js
+++ b/gui/src/components/uploads/UploadPage.spec.js
@@ -516,7 +516,7 @@ test('Toggle visible for all checkbox; check embargo, icon', async () => {
   await user.click(await screen.findByRole('option', { name: '36' }))
   expect(embargoButton).toHaveTextContent('36')
   expect(embargoHelper).toHaveTextContent('months before the data becomes public')
-  expect(screen.getByTooltip('Unpublished, accessible by you, coauthors and reviewers')).toBeInTheDocument()
+  expect(screen.getByTooltip('Unpublished, accessible only by you')).toBeInTheDocument()
 
   await testAndToggleCheckbox(false)
   expect(embargoButton).toHaveAttribute('aria-disabled', 'true')
@@ -528,7 +528,7 @@ test('Toggle visible for all checkbox; check embargo, icon', async () => {
   expect(embargoButton).not.toHaveAttribute('aria-disabled', 'true')
   expect(embargoButton).toHaveTextContent('No embargo')
   expect(embargoHelper).toHaveTextContent('publish without embargo')
-  expect(screen.getByTooltip('Unpublished, accessible by you, coauthors and reviewers')).toBeInTheDocument()
+  expect(screen.getByTooltip('Unpublished, accessible only by you')).toBeInTheDocument()
 
   await testAndToggleCheckbox(false, { skipToggle: true })
 })
diff --git a/gui/src/components/uploads/UploadStatusIcon.js b/gui/src/components/uploads/UploadStatusIcon.js
index 88f106402ddee6caafa42db076d0ec0b489af260..b03db43d641ff3e528ad0e8160f96035245992f4 100644
--- a/gui/src/components/uploads/UploadStatusIcon.js
+++ b/gui/src/components/uploads/UploadStatusIcon.js
@@ -70,7 +70,7 @@ const UploadStatusIcon = React.memo(({data, user, ...props}) => {
       if (isVisibleForAll) {
         tooltip = "Unpublished but accessible by everyone"
       } else if (isMainAuthor) {
-        tooltip = "Unpublished, accessible by you, coauthors and reviewers"
+        tooltip = "Unpublished, accessible only by you"
       } else if (isCoauthor) {
         tooltip = "Unpublished, accessible by you as a coauthor"
       } else if (isReviewer) {
diff --git a/gui/src/components/uploads/UploadStatusIcon.spec.js b/gui/src/components/uploads/UploadStatusIcon.spec.js
index 4220c96dd1fe9084ed8657e593d794104d862d2c..6ee021c2ae2a4b23a517d0af85f145a34047534e 100644
--- a/gui/src/components/uploads/UploadStatusIcon.spec.js
+++ b/gui/src/components/uploads/UploadStatusIcon.spec.js
@@ -30,7 +30,7 @@ describe('test different states', function() {
     ['published, embargo, viewer', 'Published with embargo and accessible by you as a reviewer', {published: true, with_embargo: true, main_author: 'a', viewers: [{user_id: 'b'}]}, {sub: 'b'}],
     ['published, embargo, external', 'Published with embargo and not accessible by you', {published: true, with_embargo: true, main_author: 'a', viewers: [{user_id: 'b'}]}, {sub: 'c'}],
     ['published, embargo, no user data', 'Published with embargo and might become accessible after login', {published: true, with_embargo: true, main_author: 'a', viewers: [{user_id: 'b'}]}, undefined],
-    ['unpublished, main author', 'Unpublished, accessible by you, coauthors and reviewers', {published: false, main_author: 'a'}, {sub: 'a'}],
+    ['unpublished, main author', 'Unpublished, accessible only by you', {published: false, main_author: 'a'}, {sub: 'a'}],
     ['unpublished, coauthor', 'Unpublished, accessible by you as a coauthor', {published: false, main_author: 'a', coauthors: ['b']}, {sub: 'b'}],
     ['unpublished, author', 'Unpublished, accessible by you as a coauthor', {published: false, main_author: 'a', authors: [{user_id: 'b'}]}, {sub: 'b'}],
     ['unpublished, reviewer', 'Unpublished, accessible by you as a reviewer', {published: false, main_author: 'a', reviewers: ['b']}, {sub: 'b'}],
diff --git a/gui/src/components/uploads/UploadsPage.spec.js b/gui/src/components/uploads/UploadsPage.spec.js
index b774b88a0a1f504394484cca960f75c50e63d945..7f26706834cf8468505380ae9501f4f3f1ac82fa 100644
--- a/gui/src/components/uploads/UploadsPage.spec.js
+++ b/gui/src/components/uploads/UploadsPage.spec.js
@@ -51,7 +51,7 @@ test('Render uploads page: sort by upload create time', async () => {
     expect(within(rows[i]).queryByText(`dft_upload_${11 - i}`)).toBeInTheDocument()
     expect(within(rows[i]).queryByTitle(((i + 1) % 2 === 0
       ? 'Published and accessible by everyone'
-      : 'Unpublished, accessible by you, coauthors and reviewers'
+      : 'Unpublished, accessible only by you'
     ))).toBeInTheDocument()
   }
 
@@ -77,7 +77,7 @@ test('Render uploads page: sort by upload create time', async () => {
     expect(within(rows[i]).queryByText(`dft_upload_${i + 1}`)).toBeInTheDocument()
     expect(within(rows[i]).queryByTitle(((i + 1) % 2 === 0
       ? 'Published and accessible by everyone'
-      : 'Unpublished, accessible by you, coauthors and reviewers'
+      : 'Unpublished, accessible only by you'
     ))).toBeInTheDocument()
   }
 
diff --git a/mkdocs.yml b/mkdocs.yml
index 4fa2bc73058f0316339b91ae8a986de7f7aa2428..bbb8a6363b34e159fe26e9f29244577404279f8b 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -87,6 +87,7 @@ nav:
   - Reference:
       - reference/config.md
       - reference/annotations.md
+      - reference/basesections.md
       - reference/cli.md
       - reference/plugins.md
       - reference/parsers.md
@@ -140,7 +141,7 @@ use_directory_urls: false
 plugins:
   - search
   - macros:
-      module_name: nomad/mkdocs
+      module_name: nomad/mkdocs/__init__
   - redirects:
       redirect_maps:
         "pythonlib.md": "howto/programmatic/pythonlib.md"
diff --git a/nomad/app/dcat/common.py b/nomad/app/dcat/common.py
index 9a91b70c9feb5bc6c6314f217c015c26852d4337..90e6fc4876220a103f264a83c77806d0d34e9391 100644
--- a/nomad/app/dcat/common.py
+++ b/nomad/app/dcat/common.py
@@ -16,14 +16,13 @@
 # limitations under the License.
 #
 
-from typing import Optional
-from fastapi import Response, Query, Header
 import urllib.parse
-from rdflib import Graph
 from enum import Enum
 
-from nomad.config import config
+from fastapi import Header, Query, Response
+from rdflib import Graph
 
+from nomad.config import config
 
 root_path = f'{config.services.api_base_path}/dcat'
 base_url = config.api_url(api='dcat')
diff --git a/nomad/app/dcat/main.py b/nomad/app/dcat/main.py
index 49d4d68d50fcfb197fb6d2afb15718fcf9414ce4..0fbfa0d4f6d193a255273a904c3afdca47f25769 100644
--- a/nomad/app/dcat/main.py
+++ b/nomad/app/dcat/main.py
@@ -16,10 +16,11 @@
 # limitations under the License.
 #
 
-from fastapi import FastAPI, status, Request
+import traceback
+
+from fastapi import FastAPI, Request, status
 from fastapi.middleware.cors import CORSMiddleware
 from fastapi.responses import JSONResponse, RedirectResponse
-import traceback
 
 from nomad import utils
 from nomad.config import config
@@ -27,7 +28,6 @@ from nomad.config import config
 from .common import root_path
 from .routers import dcat
 
-
 logger = utils.get_logger(__name__)
 
 
diff --git a/nomad/app/dcat/mapping.py b/nomad/app/dcat/mapping.py
index d22399a7628fba69adf3d0309b1b1cf1f63072dc..c15ec0a87693b66ae4be9c291f64ff839517dc42 100644
--- a/nomad/app/dcat/mapping.py
+++ b/nomad/app/dcat/mapping.py
@@ -16,15 +16,15 @@
 # limitations under the License.
 #
 
-from rdflib import Graph, Literal, RDF, URIRef, BNode
-from rdflib.namespace import Namespace, DCAT, DCTERMS as DCT, FOAF, RDF
+from rdflib import RDF, BNode, Graph, Literal, URIRef
+from rdflib.namespace import DCAT, FOAF, RDF, Namespace
+from rdflib.namespace import DCTERMS as DCT
 
 from nomad.config import config
 from nomad.datamodel import User
 
 from .common import url
 
-
 VCARD = Namespace('http://www.w3.org/2006/vcard/ns#')
 HYDRA = Namespace('http://www.w3.org/ns/hydra/core#')
 
diff --git a/nomad/app/dcat/routers/dcat.py b/nomad/app/dcat/routers/dcat.py
index 7dc52a01be1fa3dea138428aebff2653e6552b61..0aa5b6b6ece36af188994b1a6ac63f91d0b5ffb6 100644
--- a/nomad/app/dcat/routers/dcat.py
+++ b/nomad/app/dcat/routers/dcat.py
@@ -16,22 +16,27 @@
 # limitations under the License.
 #
 
-from typing import Union
-from fastapi import APIRouter, Query, Path, HTTPException, status, Depends
-from datetime import datetime, date
+from datetime import date, datetime
+from enum import Enum
+
 from elasticsearch_dsl import Q
+from fastapi import APIRouter, Depends, HTTPException, Path, Query, status
 
 from nomad import utils
-from nomad.utils import strip
-from nomad.search import search
-from nomad.app.v1.models import MetadataPagination, HTTPExceptionModel
+from nomad.app.v1.models import HTTPExceptionModel, MetadataPagination
 from nomad.app.v1.utils import create_responses
+from nomad.search import search
+from nomad.utils import strip
 
 from ..common import rdf_response
 from ..mapping import Mapping
 
 router = APIRouter()
-default_tag = 'dcat'
+
+
+class APITag(str, Enum):
+    DEFAULT = 'dcat'
+
 
 logger = utils.get_logger(__name__)
 
@@ -58,7 +63,7 @@ _raw_response = (
 
 @router.get(
     '/datasets/{entry_id}',
-    tags=[default_tag],
+    tags=[APITag.DEFAULT],
     summary='Returns a DCAT dataset for a given NOMAD entry id.',
     responses=create_responses(_bad_id_response, _raw_response),
 )
@@ -83,7 +88,7 @@ async def get_dataset(
 
 @router.get(
     '/catalog/',
-    tags=[default_tag],
+    tags=[APITag.DEFAULT],
     summary='Returns a DCAT dataset for a given NOMAD entry id.',
     responses=create_responses(_raw_response),
 )
diff --git a/nomad/app/h5grove_app.py b/nomad/app/h5grove_app.py
index 110eb589a7cdac8d0c436fb84d5b4ddbae3c9896..8e01225ff41914e31848e83f5e3d250ef4300f13 100644
--- a/nomad/app/h5grove_app.py
+++ b/nomad/app/h5grove_app.py
@@ -17,23 +17,24 @@
 #
 from __future__ import annotations
 
-from fastapi import FastAPI, status, Request, Depends
-from fastapi.middleware.cors import CORSMiddleware
-from fastapi.responses import JSONResponse
-import traceback
 import re
+import traceback
 import urllib.parse
-import h5py
-from typing import Dict, Any, IO
 from collections.abc import Callable
+from typing import IO, Any
 
-from h5grove import fastapi_utils as h5grove_router, utils as h5grove_utils
+import h5py
+from fastapi import Depends, FastAPI, Request, status
+from fastapi.middleware.cors import CORSMiddleware
+from fastapi.responses import JSONResponse
+from h5grove import fastapi_utils as h5grove_router
+from h5grove import utils as h5grove_utils
 
 from nomad import utils
-from nomad.files import UploadFiles, PublicUploadFiles
 from nomad.app.v1.models import User
 from nomad.app.v1.routers.auth import create_user_dependency
 from nomad.app.v1.routers.uploads import get_upload_with_read_access
+from nomad.files import PublicUploadFiles, UploadFiles
 
 logger = utils.get_logger(__name__)
 
@@ -43,8 +44,9 @@ def open_zipped_h5_file(
     create_error: Callable[[int, str], Exception],
     h5py_options: dict[str, Any] = {},
 ) -> h5py.File:
-    import re
     import io
+    import re
+
     from nomad import files
 
     """
diff --git a/nomad/app/main.py b/nomad/app/main.py
index 2168e5fd965f7e7bd939d5779e7f707dd2cf7c6f..29829f997ace12d293728e89518a0cb1c668850b 100644
--- a/nomad/app/main.py
+++ b/nomad/app/main.py
@@ -23,16 +23,17 @@ from fastapi import FastAPI, Response, status
 from fastapi.exception_handlers import (
     http_exception_handler as default_http_exception_handler,
 )
-from starlette.exceptions import HTTPException as StarletteHTTPException
 from fastapi.responses import HTMLResponse, JSONResponse
+from starlette.exceptions import HTTPException as StarletteHTTPException
 from starlette.middleware.base import BaseHTTPMiddleware
 
 from nomad import infrastructure
 from nomad.config import config
 from nomad.config.models.plugins import APIEntryPoint
 
+from .static import GuiFiles
+from .static import app as static_files_app
 from .v1.main import app as v1_app
-from .static import app as static_files_app, GuiFiles
 
 
 class OasisAuthenticationMiddleware(BaseHTTPMiddleware):
@@ -164,11 +165,10 @@ async def http_exception_handler(request, exc):
 
 @app.on_event('startup')
 async def startup_event():
-    from nomad.cli.dev import get_gui_artifacts_js
-    from nomad.cli.dev import get_gui_config
-    from nomad.parsing.parsers import import_all_parsers
     from nomad import infrastructure
+    from nomad.cli.dev import get_gui_artifacts_js, get_gui_config
     from nomad.metainfo.elasticsearch_extension import entry_type
+    from nomad.parsing.parsers import import_all_parsers
 
     import_all_parsers()
 
diff --git a/nomad/app/optimade/common.py b/nomad/app/optimade/common.py
index 623cd3847cdf3cb2ebe36363bef0ba18ed62841b..45824ef06bce9789286da9887d30f52151adbc1d 100644
--- a/nomad/app/optimade/common.py
+++ b/nomad/app/optimade/common.py
@@ -16,12 +16,11 @@
 # limitations under the License.
 #
 
-from typing import Dict, cast
+from typing import cast
 
 from nomad.metainfo.data_type import Datatype, to_optimade_type
-from nomad.metainfo.metainfo import Quantity, Reference
 from nomad.metainfo.elasticsearch_extension import SearchQuantity, entry_type
-
+from nomad.metainfo.metainfo import Quantity, Reference
 
 _provider_specific_fields: dict[str, SearchQuantity] = None
 
diff --git a/nomad/app/optimade/elasticsearch.py b/nomad/app/optimade/elasticsearch.py
index 910306b4199e4808c0f3d4268f8ab3deb4721c42..d966f7d7ccbba75f9e8c7b8c47d14695b43494a0 100644
--- a/nomad/app/optimade/elasticsearch.py
+++ b/nomad/app/optimade/elasticsearch.py
@@ -1,23 +1,23 @@
-from typing import List, Dict, Set, Any
-from elasticsearch_dsl import Q
+from typing import Any
 
+from elasticsearch_dsl import Q
 from optimade.filterparser import LarkParser
+from optimade.models import StructureResource
 from optimade.server.entry_collections import EntryCollection
 from optimade.server.exceptions import BadRequest
 from optimade.server.mappers import StructureMapper
 from optimade.server.mappers.entries import classproperty
-from optimade.models import StructureResource
 
-from nomad.units import ureg
-from nomad.atomutils import Formula
-from nomad.search import search
+from nomad import datamodel, files, utils
 from nomad.app.v1.models import MetadataPagination, MetadataRequired
+from nomad.atomutils import Formula
 from nomad.config import config
-from nomad import datamodel, files, utils
+from nomad.search import search
+from nomad.units import ureg
 
-from .filterparser import _get_transformer as get_transformer
-from .common import provider_specific_fields
 from ...archive import to_json
+from .common import provider_specific_fields
+from .filterparser import _get_transformer as get_transformer
 
 logger = utils.get_logger(__name__)
 
diff --git a/nomad/app/optimade/filterparser.py b/nomad/app/optimade/filterparser.py
index c24b6c540071f11b06f95e60e5aa484a0e5807c5..11caebaf9e9625d820b365292fc1b8ddaeb1a448 100644
--- a/nomad/app/optimade/filterparser.py
+++ b/nomad/app/optimade/filterparser.py
@@ -16,19 +16,16 @@
 # limitations under the License.
 #
 
-from typing import Dict
-from elasticsearch_dsl import Q
 from cachetools import cached
-
+from elasticsearch_dsl import Q
 from optimade.filterparser import LarkParser
+from optimade.filtertransformers.elasticsearch import ElasticsearchQuantity as Quantity
 from optimade.filtertransformers.elasticsearch import (
-    ElasticsearchQuantity as Quantity,
     ElasticTransformer as OPTElasticTransformer,
 )
 
 from .common import provider_specific_fields
 
-
 _parser = LarkParser(version=(1, 0, 1))
 
 
diff --git a/nomad/app/resources/common.py b/nomad/app/resources/common.py
index f0cedfb72be356e1a06565929ef2d59554392ec2..96a0e9c977d4e6b25fb515c2b45d029f5563b760 100644
--- a/nomad/app/resources/common.py
+++ b/nomad/app/resources/common.py
@@ -18,6 +18,5 @@
 
 from nomad.config import config
 
-
 root_path = f'{config.services.api_base_path}/resources'
 base_url = config.api_url(api='resources')
diff --git a/nomad/app/resources/main.py b/nomad/app/resources/main.py
index e5c4b6992de2ffdd2ea96610a917f511023567f9..ab6e17d11d972638ef073fb55525c9156328d475 100644
--- a/nomad/app/resources/main.py
+++ b/nomad/app/resources/main.py
@@ -16,18 +16,18 @@
 # limitations under the License.
 #
 
-from fastapi import FastAPI, status, Request
-from fastapi.middleware.cors import CORSMiddleware
-from fastapi.responses import JSONResponse
 import traceback
+
 from celery.signals import worker_process_init
+from fastapi import FastAPI, Request, status
+from fastapi.middleware.cors import CORSMiddleware
+from fastapi.responses import JSONResponse
 
 from nomad import utils
 from nomad.config import config
 
 from .routers import resources
 
-
 logger = utils.get_logger(__name__)
 
 mongo_client_resources = None
diff --git a/nomad/app/resources/routers/resources.py b/nomad/app/resources/routers/resources.py
index 878d3a3ea335a78e2c7d92bcab4566c9a0d7aaf6..45f4f9559bd3e8a20bc373aca798d13b4d6eb6d4 100644
--- a/nomad/app/resources/routers/resources.py
+++ b/nomad/app/resources/routers/resources.py
@@ -16,39 +16,44 @@
 # limitations under the License.
 #
 
-import re
-import os
-import io
-import bs4
 import asyncio
-import httpx
-from fastapi import APIRouter, Query as FastApiQuery
-from pydantic import BaseModel, Field
-from typing import List, Any, Dict, Optional
+import io
+import os
+import re
 from datetime import datetime
+from enum import Enum
+from typing import Any
+
 import ase.io
+import bs4
+import httpx
+from asgiref.sync import async_to_sync
+from fastapi import APIRouter
+from fastapi import Query as FastApiQuery
 from mongoengine import (
-    Document,
-    StringField,
+    BooleanField,
     DateTimeField,
+    Document,
     IntField,
     ListField,
-    BooleanField,
+    StringField,
 )
 from mongoengine.queryset.visitor import Q
-from asgiref.sync import async_to_sync
+from pydantic import BaseModel, Field
 
 from nomad import utils
-from nomad.config import config
 from nomad.atomutils import Formula
+from nomad.config import config
 from nomad.processing.base import app
 
-
 logger = utils.get_logger(__name__)
 
 router = APIRouter()
 
-default_tag = 'resources'
+
+class APITag(str, Enum):
+    DEFAULT = 'resources'
+
 
 # TODO generate list from optimade api
 optimade_providers = {
@@ -664,7 +669,7 @@ def retrieve_resources(
 
 @router.get(
     '/',
-    tags=[default_tag],
+    tags=[APITag.DEFAULT],
     summary='Get a list of external resources.',
     response_model=ResourcesModel,
     response_model_exclude_unset=True,
diff --git a/nomad/app/v1/main.py b/nomad/app/v1/main.py
index 1d1e00590cc633cd8bb0a5c9611dfc4c100764e3..af4def92478e5e50ecd01f38c3e3fc652838063c 100644
--- a/nomad/app/v1/main.py
+++ b/nomad/app/v1/main.py
@@ -18,9 +18,9 @@
 
 import traceback
 
-from fastapi import FastAPI, status, Request
+from fastapi import FastAPI, Request, status
 from fastapi.middleware.cors import CORSMiddleware
-from fastapi.responses import JSONResponse, RedirectResponse, ORJSONResponse
+from fastapi.responses import JSONResponse, ORJSONResponse, RedirectResponse
 from pyinstrument import Profiler
 from starlette.middleware import Middleware
 from starlette.middleware.base import BaseHTTPMiddleware
@@ -29,22 +29,23 @@ from starlette.types import ASGIApp, Receive, Scope, Send
 
 from nomad import utils
 from nomad.config import config
+
 from .common import root_path
 from .routers import (
-    users,
-    entries,
-    materials,
     auth,
-    info,
     datasets,
-    uploads,
-    suggestions,
-    metainfo,
-    north,
-    systems,
+    entries,
     federation,
     graph,
     groups,
+    info,
+    materials,
+    metainfo,
+    north,
+    suggestions,
+    systems,
+    uploads,
+    users,
 )
 
 logger = utils.get_logger(__name__)
@@ -123,18 +124,18 @@ async def unicorn_exception_handler(request: Request, e: Exception):
     )
 
 
-app.include_router(info.router, prefix='/info')
 app.include_router(auth.router, prefix='/auth')
+app.include_router(datasets.router, prefix='/datasets')
+app.include_router(entries.router, prefix='/entries')
 app.include_router(federation.router, prefix='/federation')
+app.include_router(graph.router, prefix='/graph')
+app.include_router(groups.router, prefix='/groups')
+app.include_router(info.router, prefix='/info')
 app.include_router(materials.router, prefix='/materials')
-app.include_router(entries.router, prefix='/entries')
-app.include_router(datasets.router, prefix='/datasets')
-app.include_router(uploads.router, prefix='/uploads')
 app.include_router(metainfo.router, prefix='/metainfo')
-app.include_router(users.router, prefix='/users')
-app.include_router(suggestions.router, prefix='/suggestions')
 if config.north.enabled:
     app.include_router(north.router, prefix='/north')
+app.include_router(suggestions.router, prefix='/suggestions')
 app.include_router(systems.router, prefix='/systems')
-app.include_router(graph.router, prefix='/graph')
-app.include_router(groups.router, prefix='/groups')
+app.include_router(uploads.router, prefix='/uploads')
+app.include_router(users.router, prefix='/users')
diff --git a/nomad/app/v1/models/graph/graph_models.py b/nomad/app/v1/models/graph/graph_models.py
index acb7b59ffc1fb17c93a989a2ef918fa21a714f10..dff9d7d42bdf3fc109d4d23e1a32c92613bccf00 100644
--- a/nomad/app/v1/models/graph/graph_models.py
+++ b/nomad/app/v1/models/graph/graph_models.py
@@ -17,35 +17,37 @@
 #
 
 from __future__ import annotations
-from typing import Optional, List, Union, Any, Literal
-from pydantic import BaseModel, ConfigDict, Field, Extra
 
-from ..groups import UserGroup, UserGroupPagination, UserGroupQuery
+from typing import Any, Literal
 
-from nomad.graph.model import (
-    RequestConfig,
-    DatasetQuery,
-    MetainfoQuery,
-    MetainfoPagination,
+from pydantic import BaseModel, ConfigDict, Extra, Field
+
+from nomad.app.v1.models.graph.utils import (
+    generate_request_model,
+    generate_response_model,
+    mapped,
 )
-from nomad.metainfo.pydantic_extension import PydanticModel
-from nomad.datamodel.data import User as UserModel
 from nomad.app.v1.models.models import Metadata, MetadataResponse
-from nomad.app.v1.routers.datasets import Dataset as DatasetV1, DatasetPagination
+from nomad.app.v1.routers.datasets import Dataset as DatasetV1
+from nomad.app.v1.routers.datasets import DatasetPagination
 from nomad.app.v1.routers.uploads import (
+    EntryProcData,
+    EntryProcDataPagination,
+    PaginationResponse,
     UploadProcData,
     UploadProcDataPagination,
     UploadProcDataQuery,
-    PaginationResponse,
-    EntryProcData,
-    EntryProcDataPagination,
 )
-
-from nomad.app.v1.models.graph.utils import (
-    generate_request_model,
-    generate_response_model,
-    mapped,
+from nomad.datamodel.data import User as UserModel
+from nomad.graph.model import (
+    DatasetQuery,
+    MetainfoPagination,
+    MetainfoQuery,
+    RequestConfig,
 )
+from nomad.metainfo.pydantic_extension import PydanticModel
+
+from ..groups import UserGroup, UserGroupPagination, UserGroupQuery
 
 
 class Error(BaseModel):
diff --git a/nomad/app/v1/models/graph/utils.py b/nomad/app/v1/models/graph/utils.py
index 6d420a6e1002c51bca54bd55ccbb815ddac4c34d..6c871c55478ef09754f5998ac8aa97686b1e645f 100644
--- a/nomad/app/v1/models/graph/utils.py
+++ b/nomad/app/v1/models/graph/utils.py
@@ -17,32 +17,33 @@
 #
 
 from __future__ import annotations
+
+import sys
+from collections.abc import Callable
+from datetime import datetime
+from types import UnionType
 from typing import (
-    Optional,
-    Literal,
-    Union,
     Any,
     ForwardRef,
-    get_type_hints,
-    get_origin,
-    get_args,
+    Literal,
+    Optional,
+    Union,
     cast,
+    get_args,
+    get_origin,
+    get_type_hints,
 )
-from collections.abc import Callable
-from types import UnionType
-from datetime import datetime
+
 from pydantic import (
     BaseModel,
     ConfigDict,
+    Field,
     TypeAdapter,
+    ValidationError,
     create_model,
-    Field,
     model_validator,
-    ValidationError,
 )
 from pydantic.config import ConfigDict as BaseConfigDict
-import sys
-
 
 ref_prefix = '#/components/schemas'
 request_suffix = 'Request'
diff --git a/nomad/app/v1/models/groups.py b/nomad/app/v1/models/groups.py
index 1e7c34bc4d6f6af56489a2ff73ba7deb4a80728e..dbdae164fba850f19314e7cb855e8312bf9d10d3 100644
--- a/nomad/app/v1/models/groups.py
+++ b/nomad/app/v1/models/groups.py
@@ -1,13 +1,4 @@
-from typing import List, Optional, Set
-
-from pydantic import (
-    BaseModel,
-    ConfigDict,
-    Field,
-    field_validator,
-    root_validator,
-    validator,
-)
+from pydantic import BaseModel, ConfigDict, Field, field_validator
 from pydantic_core import PydanticCustomError
 
 from .pagination import Direction, Pagination, PaginationResponse
diff --git a/nomad/app/v1/models/models.py b/nomad/app/v1/models/models.py
index 4513d7b03e1f3d136364f408db12446779825e7e..7dab17d0f8e71bbe934b4646085151a71b885b2e 100644
--- a/nomad/app/v1/models/models.py
+++ b/nomad/app/v1/models/models.py
@@ -16,31 +16,30 @@
 # limitations under the License.
 #
 import datetime
-import enum
 import fnmatch
 import json
 import re
-from typing import Any
 from collections.abc import Mapping
+from enum import Enum
+from typing import Annotated, Any
 
-import pydantic
 from fastapi import Body, HTTPException, Request
 from fastapi import Query as FastApiQuery
-from pydantic import (  # pylint: disable=unused-import
-    field_validator,
-    model_validator,
-    StringConstraints,
-    ConfigDict,
+from pydantic import (  # noqa: F401
     BaseModel,
+    ConfigDict,
     Field,
     StrictBool,
     StrictFloat,
     StrictInt,
+    StringConstraints,
+    field_validator,
+    model_validator,
 )
 from pydantic.main import create_model
 from pydantic_core import PydanticCustomError
 
-from nomad import datamodel, metainfo  # pylint: disable=unused-import
+from nomad import datamodel, metainfo  # noqa: F401
 from nomad.app.v1.utils import parameter_dependency_from_model
 from nomad.metainfo.elasticsearch_extension import (
     DocumentType,
@@ -50,8 +49,6 @@ from nomad.metainfo.elasticsearch_extension import (
 from nomad.utils import strip
 
 from .pagination import Pagination, PaginationResponse
-from typing import Annotated
-
 
 User: Any = datamodel.User.m_def.a_pydantic.model
 # It is important that datetime.datetime comes last. Otherwise, number valued strings
@@ -81,7 +78,7 @@ owner_documentation = strip(
 )
 
 
-class Owner(str, enum.Enum):
+class Owner(str, Enum):
     __doc__ = owner_documentation
 
     # There seems to be a slight bug in fast API. When it creates the example in OpenAPI
@@ -917,7 +914,7 @@ class HistogramAggregation(BucketAggregation):
         """
         ),
     )
-    offset: float | None = Field(None, gte=0)
+    offset: float | None = Field(None)
     extended_bounds: Bounds | None = None
 
     @model_validator(mode='before')
diff --git a/nomad/app/v1/models/pagination.py b/nomad/app/v1/models/pagination.py
index 2a127218625f3a7a4a552e72e1e351406cbbe592..c3fba17369172950363d5af640d5c8390cadb97c 100644
--- a/nomad/app/v1/models/pagination.py
+++ b/nomad/app/v1/models/pagination.py
@@ -1,21 +1,14 @@
-import enum
-from typing import Optional
+from enum import Enum
+
 from fastapi import HTTPException, Request
-from pydantic import (
-    BaseModel,
-    ConfigDict,
-    Field,
-    field_validator,
-    model_validator,
-    validator,
-)
+from pydantic import BaseModel, ConfigDict, Field, field_validator, model_validator
 from pydantic_core import PydanticCustomError
 
 from nomad.app.v1.utils import update_url_query_arguments
 from nomad.utils import strip
 
 
-class Direction(str, enum.Enum):
+class Direction(str, Enum):
     """
     Order direction, either ascending (`asc`) or descending (`desc`)
     """
diff --git a/nomad/app/v1/routers/auth.py b/nomad/app/v1/routers/auth.py
index 8cf79a9df7e5f22536959550c6dca67ff4a011f5..01740cda39d9ec2026a2d50fd7694631716c9052 100644
--- a/nomad/app/v1/routers/auth.py
+++ b/nomad/app/v1/routers/auth.py
@@ -16,39 +16,38 @@
 # limitations under the License.
 #
 
-import hmac
+import datetime
 import hashlib
+import hmac
 import uuid
-import requests
-from typing import cast, Union
 from collections.abc import Callable
-from inspect import Parameter, signature
+from enum import Enum
 from functools import wraps
-from fastapi import (
-    APIRouter,
-    Depends,
-    Query as FastApiQuery,
-    Request,
-    HTTPException,
-    status,
-)
+from inspect import Parameter, signature
+from typing import cast
+
+import jwt
+import requests
+from fastapi import APIRouter, Depends, HTTPException, Request, status
+from fastapi import Query as FastApiQuery
 from fastapi.security import OAuth2PasswordBearer, OAuth2PasswordRequestForm
 from pydantic import BaseModel
-import jwt
-import datetime
 
-from nomad import utils, infrastructure, datamodel
+from nomad import datamodel, infrastructure, utils
 from nomad.config import config
 from nomad.utils import get_logger, strip
 
 from ..common import root_path
-from ..models import User, HTTPExceptionModel
+from ..models import HTTPExceptionModel, User
 from ..utils import create_responses
 
 logger = get_logger(__name__)
 
 router = APIRouter()
-default_tag = 'auth'
+
+
+class APITag(str, Enum):
+    DEFAULT = 'auth'
 
 
 class Token(BaseModel):
@@ -126,7 +125,7 @@ def create_user_dependency(
             except Exception as e:
                 logger = utils.get_logger(__name__)
                 logger.error(
-                    'Api usage by unknown user. Possible missconfiguration', exc_info=e
+                    'API usage by unknown user. Possible misconfiguration', exc_info=e
                 )
                 raise HTTPException(
                     status_code=status.HTTP_401_UNAUTHORIZED,
@@ -284,8 +283,8 @@ def _get_user_signature_token_auth(signature_token: str, request: Request) -> Us
     corresponding user object, or None, if no upload_token provided.
     """
     if signature_token:
-        user = _get_user_from_simple_token(signature_token)
-        return user
+        return _get_user_from_simple_token(signature_token)
+
     elif request:
         auth_cookie = request.cookies.get('Authorization')
         if auth_cookie:
@@ -293,11 +292,11 @@ def _get_user_signature_token_auth(signature_token: str, request: Request) -> Us
                 auth_cookie = requests.utils.unquote(auth_cookie)  # type: ignore
                 if auth_cookie.startswith('Bearer '):
                     cookie_bearer_token = auth_cookie[7:]
-                    user = cast(
+                    return cast(
                         datamodel.User,
                         infrastructure.keycloak.tokenauth(cookie_bearer_token),
                     )
-                    return user
+
             except infrastructure.KeycloakError as e:
                 raise HTTPException(
                     status_code=status.HTTP_401_UNAUTHORIZED,
@@ -347,7 +346,7 @@ _bad_credentials_response = (
 
 @router.post(
     '/token',
-    tags=[default_tag],
+    tags=[APITag.DEFAULT],
     summary='Get an access token',
     responses=create_responses(_bad_credentials_response),
     response_model=Token,
@@ -382,15 +381,22 @@ async def get_token(form_data: OAuth2PasswordRequestForm = Depends()):
 
 @router.get(
     '/token',
-    tags=[default_tag],
+    tags=[APITag.DEFAULT],
     summary='Get an access token',
     responses=create_responses(_bad_credentials_response),
     response_model=Token,
+    deprecated=True,
 )
 async def get_token_via_query(username: str, password: str):
     """
-    This is an convenience alternative to the **POST** version of this operation.
-    It allows you to retrieve an *access token* by providing username and password.
+    **[DEPRECATED]** This endpoint is **no longer recommended**.
+    Please use the **POST** endpoint instead.
+
+    This was a convenience alternative to the **POST** version, allowing retrieval of
+    an *access token* by providing a username and password via query parameters.
+
+    **Why is this deprecated?**
+        Query parameters expose credentials in URLs, which can be logged or cached.
     """
     try:
         access_token = infrastructure.keycloak.basicauth(username, password)
@@ -406,7 +412,7 @@ async def get_token_via_query(username: str, password: str):
 
 @router.get(
     '/signature_token',
-    tags=[default_tag],
+    tags=[APITag.DEFAULT],
     summary='Get a signature token',
     response_model=SignatureToken,
 )
@@ -423,7 +429,7 @@ async def get_signature_token(
 
 @router.get(
     '/app_token',
-    tags=[default_tag],
+    tags=[APITag.DEFAULT],
     summary='Get an app token',
     response_model=AppToken,
 )
@@ -452,8 +458,7 @@ def generate_simple_token(user_id, expires_in: int):
     """
     expires_at = datetime.datetime.utcnow() + datetime.timedelta(seconds=expires_in)
     payload = dict(user=user_id, exp=expires_at)
-    token = jwt.encode(payload, config.services.api_secret, 'HS256')
-    return token
+    return jwt.encode(payload, config.services.api_secret, 'HS256')
 
 
 def generate_upload_token(user):
diff --git a/nomad/app/v1/routers/datasets.py b/nomad/app/v1/routers/datasets.py
index c4850b0d29b59b4b23576a740281691fe63e7488..6038f44f98ccd1af3a4174e7022a3161e0b72d2c 100644
--- a/nomad/app/v1/routers/datasets.py
+++ b/nomad/app/v1/routers/datasets.py
@@ -17,46 +17,44 @@
 #
 
 import re
-from typing import cast, Optional, List
-from fastapi import (
-    APIRouter,
-    Request,
-    Depends,
-    Query as FastApiQuery,
-    Path,
-    HTTPException,
-    status,
-)
-from pydantic import field_validator, BaseModel, Field, validator
 from datetime import datetime
-import enum
+from enum import Enum
+from typing import cast
+
+from fastapi import APIRouter, Depends, HTTPException, Path, Request, status
+from fastapi import Query as FastApiQuery
+from pydantic import BaseModel, Field, field_validator
 
-from nomad import utils, datamodel, processing
+from nomad import datamodel, processing, utils
 from nomad.config import config
-from nomad.metainfo.elasticsearch_extension import entry_type
-from nomad.utils import strip, create_uuid
 from nomad.datamodel import Dataset as DatasetDefinitionCls
 from nomad.doi import DOI, DOIException
+from nomad.metainfo.elasticsearch_extension import entry_type
 from nomad.search import search, update_by_query
+from nomad.utils import create_uuid, strip
 
-from .auth import create_user_dependency
-from .entries import _do_exhaustive_search
-from ..utils import create_responses, parameter_dependency_from_model
 from ..models import (
+    Any_,
+    Direction,
+    HTTPExceptionModel,
+    MetadataPagination,
+    MetadataRequired,
+    Owner,
     Pagination,
     PaginationResponse,
-    MetadataPagination,
     Query,
-    HTTPExceptionModel,
     User,
-    Direction,
-    Owner,
-    Any_,
 )
-
+from ..utils import create_responses, parameter_dependency_from_model
+from .auth import create_user_dependency
+from .entries import _do_exhaustive_search
 
 router = APIRouter()
-default_tag = 'datasets'
+
+
+class APITag(str, Enum):
+    DEFAULT = 'datasets'
+
 
 logger = utils.get_logger(__name__)
 
@@ -178,7 +176,10 @@ Dataset = datamodel.Dataset.m_def.a_pydantic.model
 def _delete_dataset(user: User, dataset_id, dataset):
     es_query = cast(Query, {'datasets.dataset_id': dataset_id})
     entries = _do_exhaustive_search(
-        owner=Owner.user, query=es_query, user=user, include=['entry_id']
+        owner=Owner.user,
+        query=es_query,
+        user=user,
+        required=MetadataRequired(include=['entry_id']),
     )
     entry_ids = [entry['entry_id'] for entry in entries]
     mongo_query = {'_id': {'$in': entry_ids}}
@@ -257,7 +258,7 @@ class DatasetResponse(BaseModel):
     data: Dataset = Field()  # type: ignore
 
 
-class DatasetType(str, enum.Enum):
+class DatasetType(str, Enum):
     owned = 'owned'
     foreign = 'foreign'
 
@@ -271,7 +272,7 @@ class DatasetCreate(BaseModel):  # type: ignore
 
 @router.get(
     '/',
-    tags=[default_tag],
+    tags=[APITag.DEFAULT],
     summary='Get a list of datasets',
     response_model=DatasetsResponse,
     response_model_exclude_unset=True,
@@ -318,7 +319,7 @@ async def get_datasets(
 
 @router.get(
     '/{dataset_id}',
-    tags=[default_tag],
+    tags=[APITag.DEFAULT],
     summary='Get a list of datasets',
     response_model=DatasetResponse,
     responses=create_responses(_bad_id_response),
@@ -347,7 +348,7 @@ async def get_dataset(
 
 @router.post(
     '/',
-    tags=[default_tag],
+    tags=[APITag.DEFAULT],
     summary='Create a new dataset',
     response_model=DatasetResponse,
     responses=create_responses(_existing_name_response),
@@ -408,7 +409,10 @@ async def post_datasets(
             empty = True
         else:
             entries = _do_exhaustive_search(
-                owner=Owner.user, query=es_query, user=user, include=['entry_id']
+                owner=Owner.user,
+                query=es_query,
+                user=user,
+                required=MetadataRequired(include=['entry_id']),
             )
             entry_ids = [entry['entry_id'] for entry in entries]
             mongo_query = {'_id': {'$in': entry_ids}}
@@ -436,7 +440,7 @@ async def post_datasets(
 
 @router.delete(
     '/{dataset_id}',
-    tags=[default_tag],
+    tags=[APITag.DEFAULT],
     summary='Delete a dataset',
     response_model=DatasetResponse,
     responses=create_responses(
@@ -482,7 +486,7 @@ async def delete_dataset(
 
 @router.post(
     '/{dataset_id}/action/doi',
-    tags=[default_tag],
+    tags=[APITag.DEFAULT],
     summary='Assign a DOI to a dataset',
     response_model=DatasetResponse,
     responses=create_responses(
diff --git a/nomad/app/v1/routers/entries.py b/nomad/app/v1/routers/entries.py
index 5e1836894cffe5c628a433c56d36bb0f15c172df..f974e154214bba6607ac9a7d8f79ff0a7267ab12 100644
--- a/nomad/app/v1/routers/entries.py
+++ b/nomad/app/v1/routers/entries.py
@@ -15,95 +15,85 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
+import io
+import json
+import os.path
+from collections.abc import Iterator
 from datetime import datetime
-
 from enum import Enum
 from typing import Any
-from collections.abc import Iterator
-from fastapi import (
-    APIRouter,
-    Depends,
-    Path,
-    status,
-    HTTPException,
-    Request,
-    Query as QueryParameter,
-    Body,
-)
-from fastapi.responses import StreamingResponse, ORJSONResponse
-from fastapi.exceptions import RequestValidationError
-from pydantic import (
-    ConfigDict,
-    field_validator,
-    BaseModel,
-    Field,
-)
-import os.path
-import io
-import json
+
 import orjson
+import yaml
+from fastapi import APIRouter, Body, Depends, HTTPException, Path, Request, status
+from fastapi import Query as QueryParameter
+from fastapi.exceptions import RequestValidationError
+from fastapi.responses import ORJSONResponse, StreamingResponse
+from pydantic import BaseModel, ConfigDict, Field, field_validator
 from pydantic.main import create_model
 from starlette.responses import Response
-import yaml
 
-from nomad import files, utils, metainfo, processing as proc
-from nomad import datamodel
+from nomad import datamodel, files, metainfo, utils
+from nomad import processing as proc
+from nomad.archive import ArchiveQueryError, RequiredReader, RequiredValidationError
 from nomad.config import config
 from nomad.config.models.config import Reprocess
 from nomad.datamodel import EditableUserMetadata
 from nomad.datamodel.context import ServerContext
 from nomad.files import StreamedFile, create_zipstream_async
-from nomad.processing.data import Upload
-from nomad.utils import strip
-from nomad.archive import RequiredReader, RequiredValidationError, ArchiveQueryError
 from nomad.groups import get_group_ids
+from nomad.metainfo.elasticsearch_extension import entry_type
+from nomad.processing.data import Upload
 from nomad.search import (
     AuthenticationRequiredError,
     QueryValidationError,
     SearchError,
     search,
-    update_metadata as es_update_metadata,
 )
-from nomad.metainfo.elasticsearch_extension import entry_type
+from nomad.search import update_metadata as es_update_metadata
+from nomad.utils import strip
 
-from .auth import create_user_dependency
-from ..utils import (
-    create_download_stream_zipped,
-    create_download_stream_raw_file,
-    browser_download_headers,
-    DownloadItem,
-    create_responses,
-    log_query,
-)
 from ..models import (
     Aggregation,
-    Pagination,
-    PaginationResponse,
+    Files,
+    HTTPExceptionModel,
+    Metadata,
+    MetadataEditRequest,
     MetadataPagination,
-    TermsAggregation,
-    WithQuery,
-    WithQueryAndPagination,
     MetadataRequired,
     MetadataResponse,
-    Metadata,
-    MetadataEditRequest,
-    Files,
-    Query,
-    User,
     Owner,
+    Pagination,
+    PaginationResponse,
+    Query,
     QueryParameters,
-    metadata_required_parameters,
+    TermsAggregation,
+    User,
+    WithQuery,
+    WithQueryAndPagination,
     files_parameters,
     metadata_pagination_parameters,
-    HTTPExceptionModel,
+    metadata_required_parameters,
 )
-
+from ..utils import (
+    DownloadItem,
+    browser_download_headers,
+    create_download_stream_raw_file,
+    create_download_stream_zipped,
+    create_responses,
+    log_query,
+)
+from .auth import create_user_dependency
 
 router = APIRouter()
-default_tag = 'entries'
-metadata_tag = 'entries/metadata'
-raw_tag = 'entries/raw'
-archive_tag = 'entries/archive'
+
+
+class APITag(str, Enum):
+    DEFAULT = 'entries'
+    METADATA = 'entries/metadata'
+    RAW = 'entries/raw'
+    ARCHIVE = 'entries/archive'
+
 
 logger = utils.get_logger(__name__)
 
@@ -277,19 +267,21 @@ class EntryMetadataResponse(BaseModel):
 
 
 class EntryMetadataEditActionField(BaseModel):
-    value: str = Field(None, description='The value/values that is set as a string.')
+    value: str | None = Field(
+        None, description='The value/values that is set as a string.'
+    )
     success: bool | None = Field(
         None, description='If this can/could be done. Only in API response.'
     )
     message: str | None = Field(
         None,
-        descriptin='A message that details the action result. Only in API response.',
+        description='A message that details the action result. Only in API response.',
     )
 
 
-EntryMetadataEditActions = create_model(
-    'EntryMetadataEditActions',
-    **{  # type: ignore
+EntryMetadataEditActions: Any = create_model(
+    'EntryMetadataEditActions',  # type: ignore
+    **{
         quantity.name: (
             EntryMetadataEditActionField | None
             if quantity.is_scalar
@@ -307,7 +299,7 @@ class EntryMetadataEdit(WithQuery):
     actions: EntryMetadataEditActions = Field(  # type: ignore
         None,
         description='Each action specifies a single value (even for multi valued quantities).',
-    )
+    )  # type: ignore
 
     @field_validator('owner')
     @classmethod
@@ -495,7 +487,7 @@ def perform_search(*args, **kwargs):
 
 @router.post(
     '/query',
-    tags=[metadata_tag],
+    tags=[APITag.METADATA],
     summary='Search entries and retrieve their metadata',
     response_model=MetadataResponse,
     responses=create_responses(_bad_owner_response),
@@ -536,7 +528,7 @@ async def post_entries_metadata_query(
 
 @router.get(
     '',
-    tags=[metadata_tag],
+    tags=[APITag.METADATA],
     summary='Search entries and retrieve their metadata',
     response_model=MetadataResponse,
     responses=create_responses(_bad_owner_response),
@@ -574,17 +566,32 @@ async def get_entries_metadata(
 
 
 def _do_exhaustive_search(
-    owner: Owner, query: Query, include: list[str], user: User
+    owner: Owner,
+    query: Query,
+    required: MetadataRequired,
+    user: User,
+    page_size: int = 100,
 ) -> Iterator[dict[str, Any]]:
-    page_after_value = None
+    """Perform a paginated search.
+
+    Args:
+        owner (Owner): The owner defining the search scope.
+        query (Query): The query specifying search filters and conditions.
+        required (MetadataRequired): Includes and excludes for the response.
+        user (User): The user performing the search, used for authorization.
+        page_size (int): The number of results per page.
+    """
+    page_after_value: str | None = None
     while True:
         response = perform_search(
             owner=owner,
             query=query,
             pagination=MetadataPagination(
-                page_size=100, page_after_value=page_after_value, order_by='upload_id'
+                page_size=page_size,
+                page_after_value=page_after_value,
+                order_by='upload_id',
             ),
-            required=MetadataRequired(include=include),
+            required=required,
             user_id=user.user_id if user is not None else None,
         )
 
@@ -719,7 +726,10 @@ def _answer_entries_raw_request(owner: Owner, query: Query, files: Files, user:
         def download_items_generator():
             # go through all entries that match the query
             for entry_metadata in _do_exhaustive_search(
-                owner, query, include=search_includes, user=user
+                owner,
+                query,
+                required=MetadataRequired(include=search_includes),
+                user=user,
             ):
                 upload_id = entry_metadata['upload_id']
                 mainfile = entry_metadata['mainfile']
@@ -770,7 +780,7 @@ _entries_rawdir_query_docstring = strip(
 
 @router.post(
     '/rawdir/query',
-    tags=[raw_tag],
+    tags=[APITag.RAW],
     summary='Search entries and get their raw files metadata',
     description=_entries_rawdir_query_docstring,
     response_model=EntriesRawDirResponse,
@@ -790,7 +800,7 @@ async def post_entries_rawdir_query(
 
 @router.get(
     '/rawdir',
-    tags=[raw_tag],
+    tags=[APITag.RAW],
     summary='Search entries and get their raw files metadata',
     description=_entries_rawdir_query_docstring,
     response_model=EntriesRawDirResponse,
@@ -834,7 +844,7 @@ _entries_raw_query_docstring = strip(
 
 @router.post(
     '/raw/query',
-    tags=[raw_tag],
+    tags=[APITag.RAW],
     summary='Search entries and download their raw files',
     description=_entries_raw_query_docstring,
     response_class=StreamingResponse,
@@ -850,7 +860,7 @@ async def post_entries_raw_query(
 
 @router.get(
     '/raw',
-    tags=[raw_tag],
+    tags=[APITag.RAW],
     summary='Search entries and download their raw files',
     description=_entries_raw_query_docstring,
     response_class=StreamingResponse,
@@ -991,7 +1001,7 @@ _entries_archive_docstring = strip(
 
 @router.post(
     '/archive/query',
-    tags=[archive_tag],
+    tags=[APITag.ARCHIVE],
     summary='Search entries and access their archives',
     description=_entries_archive_docstring,
     response_model=EntriesArchiveResponse,
@@ -1025,7 +1035,7 @@ async def post_entries_archive_query(
 
 @router.get(
     '/archive',
-    tags=[archive_tag],
+    tags=[APITag.ARCHIVE],
     summary='Search entries and access their archives',
     description=_entries_archive_docstring,
     response_model=EntriesArchiveResponse,
@@ -1090,7 +1100,7 @@ def _answer_entries_archive_download_request(
     def streamed_files():
         # go through all entries that match the query
         for entry_metadata in _do_exhaustive_search(
-            owner, query, include=search_includes, user=user
+            owner, query, required=MetadataRequired(include=search_includes), user=user
         ):
             path = os.path.join(
                 entry_metadata['upload_id'], f'{entry_metadata["entry_id"]}.json'
@@ -1142,7 +1152,7 @@ _entries_archive_download_docstring = strip(
 
 @router.post(
     '/archive/download/query',
-    tags=[archive_tag],
+    tags=[APITag.ARCHIVE],
     summary='Search entries and download their archives',
     description=_entries_archive_download_docstring,
     response_class=StreamingResponse,
@@ -1164,7 +1174,7 @@ async def post_entries_archive_download_query(
 
 @router.get(
     '/archive/download',
-    tags=[archive_tag],
+    tags=[APITag.ARCHIVE],
     summary='Search entries and download their archives',
     description=_entries_archive_download_docstring,
     response_class=StreamingResponse,
@@ -1188,7 +1198,7 @@ async def get_entries_archive_download(
 
 @router.get(
     '/{entry_id}',
-    tags=[metadata_tag],
+    tags=[APITag.METADATA],
     summary='Get the metadata of an entry by its id',
     response_model=EntryMetadataResponse,
     responses=create_responses(_bad_id_response),
@@ -1225,7 +1235,7 @@ async def get_entry_metadata(
 
 @router.get(
     '/{entry_id}/rawdir',
-    tags=[raw_tag],
+    tags=[APITag.RAW],
     summary='Get the raw files metadata for an entry by its id',
     response_model=EntryRawDirResponse,
     responses=create_responses(_bad_id_response),
@@ -1264,7 +1274,7 @@ async def get_entry_rawdir(
 
 @router.get(
     '/{entry_id}/raw',
-    tags=[raw_tag],
+    tags=[APITag.RAW],
     summary='Get the raw data of an entry by its id',
     response_class=StreamingResponse,
     responses=create_responses(_bad_id_response, _raw_response),
@@ -1300,7 +1310,7 @@ async def get_entry_raw(
 
 @router.get(
     '/{entry_id}/raw/{path}',
-    tags=[raw_tag],
+    tags=[APITag.RAW],
     summary='Get the raw data of an entry by its id',
     response_class=StreamingResponse,
     responses=create_responses(
@@ -1430,7 +1440,7 @@ def answer_entry_archive_request(
 
 @router.post(
     '/{entry_id}/edit',
-    tags=[raw_tag],
+    tags=[APITag.RAW],
     summary='Edit a raw mainfile in archive format.',
     response_model=EntryEditResponse,
     response_model_exclude_unset=True,
@@ -1561,7 +1571,7 @@ async def post_entry_edit(
 
 @router.get(
     '/{entry_id}/archive',
-    tags=[archive_tag],
+    tags=[APITag.ARCHIVE],
     summary='Get the archive for an entry by its id',
     response_model=EntryArchiveResponse,
     response_model_exclude_unset=True,
@@ -1585,7 +1595,7 @@ async def get_entry_archive(
 
 @router.get(
     '/{entry_id}/archive/download',
-    tags=[archive_tag],
+    tags=[APITag.ARCHIVE],
     summary='Get the archive for an entry by its id as plain archive json',
     responses=create_responses(_bad_id_response, _archive_download_response),
 )
@@ -1607,7 +1617,7 @@ async def get_entry_archive_download(
 
 @router.post(
     '/{entry_id}/archive/query',
-    tags=[archive_tag],
+    tags=[APITag.ARCHIVE],
     summary='Get the archive for an entry by its id',
     response_model=EntryArchiveResponse,
     response_model_exclude_unset=True,
@@ -1641,7 +1651,10 @@ def edit(
     upload_ids: set[str] = set()
     with utils.timer(logger, 'edit query executed'):
         all_entries = _do_exhaustive_search(
-            owner=Owner.user, query=query, include=['entry_id', 'upload_id'], user=user
+            owner=Owner.user,
+            query=query,
+            required=MetadataRequired(include=['entry_id', 'upload_id']),
+            user=user,
         )
 
         for entry_dict in all_entries:
@@ -1705,7 +1718,7 @@ _editable_quantities = {
 
 @router.post(
     '/edit_v0',
-    tags=[metadata_tag],
+    tags=[APITag.METADATA],
     summary='Edit the user metadata of a set of entries',
     response_model=EntryMetadataEditResponse,
     response_model_exclude_unset=True,
@@ -1896,7 +1909,7 @@ async def post_entry_metadata_edit(
 
 @router.post(
     '/edit',
-    tags=[metadata_tag],
+    tags=[APITag.METADATA],
     summary='Edit the user metadata of a set of entries',
     response_model=MetadataEditRequest,
     response_model_exclude_unset=True,
diff --git a/nomad/app/v1/routers/federation.py b/nomad/app/v1/routers/federation.py
index 29cf1b2e08dbaf6a67916b8ecdeecf8c5d6d2736..c554228d0bffcc7b92666bed3a3f82dbcc7a4329 100644
--- a/nomad/app/v1/routers/federation.py
+++ b/nomad/app/v1/routers/federation.py
@@ -22,8 +22,9 @@ API endpoint to receive telemetry data (in logstash format) from local installat
 
 import socket
 import zlib
+from enum import Enum
 
-from fastapi import Request, HTTPException
+from fastapi import HTTPException, Request
 from fastapi.routing import APIRouter
 
 from nomad import utils
@@ -32,12 +33,15 @@ from nomad.config import config
 logger = utils.get_logger(__name__)
 
 router = APIRouter()
-default_tag = 'federation'
+
+
+class APITag(str, Enum):
+    DEFAULT = 'federation'
 
 
 @router.post(
     '/logs/',
-    tags=[default_tag],
+    tags=[APITag.DEFAULT],
     summary='Receive logs in logstash format from other Nomad installations and store into central logstash '
     'for further analysis.',
 )
diff --git a/nomad/app/v1/routers/graph.py b/nomad/app/v1/routers/graph.py
index 907e4c866521dc5252c5655fd9e329d1bafaef78..1d1006c53c11f2f8198d23cda71807b6c1003ee8 100644
--- a/nomad/app/v1/routers/graph.py
+++ b/nomad/app/v1/routers/graph.py
@@ -16,24 +16,30 @@
 # limitations under the License.
 #
 
-from fastapi import Depends, APIRouter, Body, HTTPException
+from enum import Enum
+
+from fastapi import APIRouter, Body, Depends, HTTPException
 from fastapi.responses import ORJSONResponse
 
+from nomad.app.v1.models.graph import GraphRequest, GraphResponse
 from nomad.graph.graph_reader import (
-    MongoReader,
     ConfigError,
     GeneralReader,
-    UserReader,
+    MongoReader,
     Token,
+    UserReader,
 )
 from nomad.graph.lazy_wrapper import LazyWrapper
+
+from ..models import User
 from .auth import create_user_dependency
 from .entries import EntriesArchive
-from ..models import User
-from nomad.app.v1.models.graph import GraphRequest, GraphResponse
 
 router = APIRouter()
-default_tag = 'graph'
+
+
+class APITag(str, Enum):
+    DEFAULT = 'graph'
 
 
 def unwrap_response(result):
@@ -69,7 +75,7 @@ def relocate_children(request):
 
 @router.post(
     '/raw_query',
-    tags=[default_tag],
+    tags=[APITag.DEFAULT],
     summary='Query the database with a graph style without verification.',
     description='Query the database with a graph style without verification.',
     response_class=GraphJSONResponse,
@@ -84,7 +90,7 @@ async def raw_query(
 
 @router.post(
     '/query',
-    tags=[default_tag],
+    tags=[APITag.DEFAULT],
     summary='Query the database with a graph style.',
     description='Query the database with a graph style.',
     response_model=GraphResponse,
@@ -112,7 +118,7 @@ async def basic_query(
 
 @router.post(
     '/archive/query',
-    tags=[default_tag],
+    tags=[APITag.DEFAULT],
     summary='Search entries and access their archives',
     response_class=GraphJSONResponse,
 )
diff --git a/nomad/app/v1/routers/groups.py b/nomad/app/v1/routers/groups.py
index 26697b9bf23aee9a19f65a0461bcd94f9155b31f..945b95f75706b66025c04ac8970047ae2b48e5bd 100644
--- a/nomad/app/v1/routers/groups.py
+++ b/nomad/app/v1/routers/groups.py
@@ -16,6 +16,8 @@
 # limitations under the License.
 #
 
+from enum import Enum
+
 from fastapi import APIRouter, Depends, HTTPException, Request, status
 
 from nomad.app.v1.models.groups import (
@@ -27,11 +29,6 @@ from nomad.app.v1.models.groups import (
 )
 from nomad.app.v1.models.pagination import PaginationResponse
 from nomad.app.v1.utils import parameter_dependency_from_model
-from typing import List, Optional, Set
-
-from fastapi import APIRouter, Depends, HTTPException, Query, status
-from pydantic import ConfigDict, BaseModel, Field
-
 from nomad.datamodel import User as UserDataModel
 from nomad.groups import MongoUserGroup
 from nomad.groups import create_user_group as create_mongo_user_group
@@ -41,7 +38,10 @@ from ..models import User
 from .auth import create_user_dependency
 
 router = APIRouter()
-default_tag = 'groups'
+
+
+class APITag(str, Enum):
+    DEFAULT = 'groups'
 
 
 user_group_query_parameters = parameter_dependency_from_model(
@@ -93,7 +93,7 @@ def check_user_may_edit_user_group(user: User, user_group: MongoUserGroup):
 
 @router.get(
     '',
-    tags=[default_tag],
+    tags=[APITag.DEFAULT],
     summary='List user groups. Use at most one filter.',
     response_model=UserGroupResponse,
 )
@@ -118,7 +118,7 @@ async def get_user_groups(
 
 @router.get(
     '/{group_id}',
-    tags=[default_tag],
+    tags=[APITag.DEFAULT],
     summary='Get data about user group.',
     response_model=UserGroup,
 )
@@ -131,7 +131,7 @@ async def get_user_group(group_id: str):
 
 @router.post(
     '',
-    tags=[default_tag],
+    tags=[APITag.DEFAULT],
     status_code=status.HTTP_201_CREATED,
     summary='Create user group.',
     response_model=UserGroup,
@@ -153,7 +153,7 @@ async def create_user_group(
 
 @router.post(
     '/{group_id}/edit',
-    tags=[default_tag],
+    tags=[APITag.DEFAULT],
     summary='Update user group.',
     response_model=UserGroup,
 )
@@ -179,7 +179,7 @@ async def update_user_group(
 
 @router.delete(
     '/{group_id}',
-    tags=[default_tag],
+    tags=[APITag.DEFAULT],
     status_code=status.HTTP_204_NO_CONTENT,
     summary='Delete user group.',
 )
diff --git a/nomad/app/v1/routers/info.py b/nomad/app/v1/routers/info.py
index 8f123c6ec80955219fa4d1e30eee11b1e4b15158..0e63c5778822a8f4cca30a14645ac58435b09d2a 100644
--- a/nomad/app/v1/routers/info.py
+++ b/nomad/app/v1/routers/info.py
@@ -20,24 +20,28 @@
 API endpoint that deliver backend configuration details.
 """
 
-from typing import Dict, Any, List, Optional
 from datetime import datetime
+from enum import Enum
+from typing import Any
+
 from fastapi.routing import APIRouter
 from pydantic.fields import Field
 from pydantic.main import BaseModel
 
 from nomad import normalizing
+from nomad.app.v1.models import Aggregation, StatisticsAggregation
 from nomad.config import config
-from nomad.utils import strip
-from nomad.search import search
+from nomad.metainfo.elasticsearch_extension import entry_type
 from nomad.parsing import parsers
 from nomad.parsing.parsers import code_metadata
-from nomad.app.v1.models import Aggregation, StatisticsAggregation
-from nomad.metainfo.elasticsearch_extension import entry_type
-
+from nomad.search import search
+from nomad.utils import strip
 
 router = APIRouter()
-default_tag = 'info'
+
+
+class APITag(str, Enum):
+    DEFAULT = 'info'
 
 
 class MetainfoModel(BaseModel):
@@ -89,11 +93,11 @@ class InfoModel(BaseModel):
     normalizers: list[str]
     plugin_entry_points: list[dict] = Field(
         None,
-        desciption='List of plugin entry points that are activated in this deployment.',
+        description='List of plugin entry points that are activated in this deployment.',
     )
     plugin_packages: list[dict] = Field(
         None,
-        desciption='List of plugin packages that are installed in this deployment.',
+        description='List of plugin packages that are installed in this deployment.',
     )
     statistics: StatisticsModel = Field(None, description='General NOMAD statistics')
     search_quantities: dict
@@ -145,7 +149,7 @@ def statistics():
 
 @router.get(
     '',
-    tags=[default_tag],
+    tags=[APITag.DEFAULT],
     summary='Get information about the nomad backend and its configuration',
     response_model_exclude_unset=True,
     response_model_exclude_none=True,
diff --git a/nomad/app/v1/routers/materials.py b/nomad/app/v1/routers/materials.py
index 579342c5fb1e8e0ec7a6fd89f6642fc8c562300c..70c6ef56b6b52531df4ac31d5a9090e82abcfeb3 100644
--- a/nomad/app/v1/routers/materials.py
+++ b/nomad/app/v1/routers/materials.py
@@ -17,32 +17,36 @@
 #
 
 from typing import Any
-from fastapi import APIRouter, Depends, Path, status, HTTPException, Request
+
+from fastapi import APIRouter, Depends, HTTPException, Path, Request, status
 from fastapi.exception_handlers import RequestValidationError
 from pydantic import BaseModel, Field
 
 from nomad import utils
+from nomad.metainfo.elasticsearch_extension import material_index, material_type
+from nomad.search import (
+    AuthenticationRequiredError,
+    QueryValidationError,
+    SearchError,
+    search,
+)
 from nomad.utils import strip
-from nomad.search import AuthenticationRequiredError, SearchError
-from nomad.search import search, QueryValidationError
-from nomad.metainfo.elasticsearch_extension import material_type, material_index
 
-from .auth import create_user_dependency
-from ..utils import create_responses
 from ..models import (
-    User,
-    Owner,
-    WithQuery,
-    MetadataResponse,
+    HTTPExceptionModel,
     Metadata,
     MetadataPagination,
     MetadataRequired,
+    MetadataResponse,
+    Owner,
+    QueryParameters,
+    User,
+    WithQuery,
     metadata_pagination_parameters,
     metadata_required_parameters,
-    QueryParameters,
-    HTTPExceptionModel,
 )
-
+from ..utils import create_responses
+from .auth import create_user_dependency
 
 router = APIRouter()
 
diff --git a/nomad/app/v1/routers/metainfo.py b/nomad/app/v1/routers/metainfo.py
index fe3477ce5b1452b1d45904d97342101d0b937e39..a92fccbb529875004a86ca8dc889a7ab047dea92 100644
--- a/nomad/app/v1/routers/metainfo.py
+++ b/nomad/app/v1/routers/metainfo.py
@@ -17,18 +17,18 @@
 #
 import copy
 import datetime
-from typing import Any, Dict
+from typing import Any
 
-from fastapi import APIRouter, Path, status, HTTPException
+from fastapi import APIRouter, HTTPException, Path, status
 from pydantic import BaseModel, Field
 
 from nomad.app.v1.models import HTTPExceptionModel
 from nomad.app.v1.utils import create_responses
-from nomad.metainfo import Package
-from nomad.metainfo.metainfo import MSection, Section, Quantity, Datetime, JSON
-from nomad.metainfo.mongoengine_extension import MongoDocument, Mongo
-from nomad.utils import strip, get_logger
 from nomad.config import config
+from nomad.metainfo import Package
+from nomad.metainfo.metainfo import JSON, Datetime, MSection, Quantity, Section
+from nomad.metainfo.mongoengine_extension import Mongo, MongoDocument
+from nomad.utils import get_logger, strip
 
 logger = get_logger(__name__)
 
diff --git a/nomad/app/v1/routers/north.py b/nomad/app/v1/routers/north.py
index 0ad4cb4105f3952479a09455a42f50636e4247e2..73c795bcf98baf1a2e78a37a02c94c2ccfe14312 100644
--- a/nomad/app/v1/routers/north.py
+++ b/nomad/app/v1/routers/north.py
@@ -17,30 +17,33 @@
 #
 
 import os
-import requests
-
-from typing import List, Dict, Optional
 from enum import Enum
-from nomad.groups import get_group_ids
-from pydantic import BaseModel
-from fastapi import APIRouter, Depends, status, HTTPException
+
+import requests
+from fastapi import APIRouter, Depends, HTTPException, status
 from mongoengine.queryset.visitor import Q
+from pydantic import BaseModel
 
+from nomad.app.v1.routers.auth import generate_simple_token
 from nomad.config import config
 from nomad.config.models.north import NORTHTool
-from nomad.utils import strip, get_logger, slugify
+from nomad.groups import get_group_ids
 from nomad.processing import Upload
-from nomad.app.v1.routers.auth import generate_simple_token
-from .auth import create_user_dependency, oauth2_scheme
-from ..models import User, HTTPExceptionModel
-from ..utils import create_responses
+from nomad.utils import get_logger, slugify, strip
 
+from ..models import HTTPExceptionModel, User
+from ..utils import create_responses
+from .auth import create_user_dependency
 
 TOOLS = {k: v for k, v in config.north.tools.filtered_items()}
 
-default_tag = 'north'
 router = APIRouter()
 
+
+class APITag(str, Enum):
+    DEFAULT = 'north'
+
+
 hub_api_headers = {'Authorization': f'Bearer {config.north.hub_service_api_token}'}
 logger = get_logger(__name__)
 
@@ -108,7 +111,7 @@ def _get_status(tool: ToolModel, user: User) -> ToolModel:
 
 @router.get(
     '/',
-    tags=[default_tag],
+    tags=[APITag.DEFAULT],
     response_model=ToolsResponseModel,
     summary='Get a list of all configured tools and their current state.',
     response_model_exclude_unset=True,
@@ -135,7 +138,7 @@ async def tool(name: str) -> ToolModel:
 
 @router.get(
     '/{name}',
-    tags=[default_tag],
+    tags=[APITag.DEFAULT],
     summary='Get information for a specific tool.',
     response_model=ToolResponseModel,
     responses=create_responses(_bad_tool_response),
@@ -153,7 +156,7 @@ async def get_tool(
 
 @router.post(
     '/{name}',
-    tags=[default_tag],
+    tags=[APITag.DEFAULT],
     response_model=ToolResponseModel,
     summary='Start a tool.',
     response_model_exclude_unset=True,
@@ -301,7 +304,7 @@ async def start_tool(
 
 @router.delete(
     '/{name}',
-    tags=[default_tag],
+    tags=[APITag.DEFAULT],
     response_model=ToolResponseModel,
     summary='Stop a tool.',
     response_model_exclude_unset=True,
diff --git a/nomad/app/v1/routers/suggestions.py b/nomad/app/v1/routers/suggestions.py
index a528fde90bf9788b78131c7acde5948207713367..2bd6e515e3a2d446c29cde595e9bd5bb126f8195 100644
--- a/nomad/app/v1/routers/suggestions.py
+++ b/nomad/app/v1/routers/suggestions.py
@@ -16,19 +16,18 @@
 # limitations under the License.
 #
 
-from typing import List, Dict, Optional, Set
 from collections import defaultdict
-from pydantic import BaseModel, Field
-from fastapi import APIRouter, Depends, Request, HTTPException, status
+
+from elasticsearch.exceptions import RequestError
 from elasticsearch_dsl import Search
 from elasticsearch_dsl.utils import AttrList
-from elasticsearch.exceptions import RequestError
+from fastapi import APIRouter, Depends, HTTPException, Request, status
+from pydantic import BaseModel, Field
 
 from nomad.metainfo.elasticsearch_extension import entry_index, entry_type
 
-from .auth import create_user_dependency
 from ..models import User
-
+from .auth import create_user_dependency
 
 router = APIRouter()
 
diff --git a/nomad/app/v1/routers/systems.py b/nomad/app/v1/routers/systems.py
index d392014a7fb30e1d0665c71b38140a0b0a731076..c92e1c44f4520b1198a395817a214afec987ccc9 100644
--- a/nomad/app/v1/routers/systems.py
+++ b/nomad/app/v1/routers/systems.py
@@ -15,33 +15,33 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-from typing import Dict, List, Union
-from io import StringIO, BytesIO
 import sys
 from collections import OrderedDict
 from enum import Enum
+from io import BytesIO, StringIO
 
+import ase.build
+import ase.io
 import numpy as np
-from fastapi import APIRouter, Depends, Path, Query, status, HTTPException
+from fastapi import APIRouter, Depends, HTTPException, Path, Query, status
 from fastapi.responses import Response
-import ase.io
-import ase.build
 
-from nomad.units import ureg
-from nomad.utils import strip, deep_get, query_list_to_dict
-from nomad.atomutils import Formula, wrap_positions, unwrap_positions
-from nomad.normalizing.common import (
-    ase_atoms_from_nomad_atoms,
-)
+from nomad.atomutils import Formula, unwrap_positions, wrap_positions
 from nomad.datamodel.metainfo.system import Atoms as NOMADAtoms
-from .entries import answer_entry_archive_request
+from nomad.normalizing.common import ase_atoms_from_nomad_atoms
+from nomad.units import ureg
+from nomad.utils import deep_get, query_list_to_dict, strip
 
-from .auth import create_user_dependency
+from ..models import HTTPExceptionModel, User
 from ..utils import create_responses
-from ..models import User, HTTPExceptionModel
+from .auth import create_user_dependency
+from .entries import answer_entry_archive_request
 
 router = APIRouter()
-default_tag = 'systems'
+
+
+class APITag(str, Enum):
+    DEFAULT = 'systems'
 
 
 def write_pdb(atoms: NOMADAtoms, entry_id: str = None, formula: str = None) -> str:
@@ -284,7 +284,7 @@ _serialization_error_response = (
 
 @router.get(
     '/{entry_id}',
-    tags=[default_tag],
+    tags=[APITag.DEFAULT],
     summary=strip(
         """
     Build and retrieve an atomistic structure file from data within an entry.
diff --git a/nomad/app/v1/routers/uploads.py b/nomad/app/v1/routers/uploads.py
index 3043cc64b41cdf0d5c4b2918d80876bb77ad3635..fa6ff235ca0c7454634a8c64fe93f062507048f1 100644
--- a/nomad/app/v1/routers/uploads.py
+++ b/nomad/app/v1/routers/uploads.py
@@ -15,98 +15,91 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-import os
 import io
+import os
 import shutil
-from enum import Enum
+import tarfile
+import zipfile
 from datetime import datetime
-from typing import Tuple, List, Set, Dict, Any, Optional, Union, cast
-from pydantic import (
-    field_validator,
-    ConfigDict,
-    BaseModel,
-    Field,
-    model_validator,
-)
-from mongoengine.queryset.visitor import Q
+from enum import Enum
+from typing import Any, cast
 from urllib.parse import unquote
+
 from fastapi import (
     APIRouter,
-    Request,
+    Body,
+    Depends,
     File,
+    HTTPException,
+    Path,
+    Request,
     UploadFile,
     status,
-    Depends,
-    Body,
-    Path,
-    Query as FastApiQuery,
-    HTTPException,
 )
-from fastapi.responses import StreamingResponse, FileResponse
+from fastapi import Query as FastApiQuery
 from fastapi.exceptions import RequestValidationError
+from fastapi.responses import FileResponse, StreamingResponse
+from mongoengine.queryset.visitor import Q
+from pydantic import BaseModel, ConfigDict, Field, field_validator, model_validator
 from pydantic_core import PydanticCustomError
 
-from nomad import utils, files
-from nomad.common import is_safe_relative_path, is_safe_basename
-from nomad.config import config
-from nomad.config.models.plugins import ExampleUploadEntryPoint
-from nomad.files import (
-    StagingUploadFiles,
-    PublicUploadFiles,
-)
+from nomad import files, utils
 from nomad.bundles import BundleExporter, BundleImporter
+from nomad.common import get_compression_format, is_safe_basename, is_safe_relative_path
+from nomad.config import config
 from nomad.config.models.config import Reprocess
+from nomad.config.models.plugins import ExampleUploadEntryPoint
+from nomad.files import PublicUploadFiles, StagingUploadFiles
 from nomad.groups import get_group_ids
 from nomad.processing import (
-    Upload,
     Entry,
+    MetadataEditRequestHandler,
     ProcessAlreadyRunning,
     ProcessStatus,
-    MetadataEditRequestHandler,
+    Upload,
 )
-from nomad.common import get_compression_format
+from nomad.search import QueryValidationError, search, search_iterator
+from nomad.search import refresh as search_refresh
 from nomad.utils import strip
-from nomad.search import (
-    search,
-    search_iterator,
-    refresh as search_refresh,
-    QueryValidationError,
-)
 
-from .auth import create_user_dependency, generate_upload_token
 from ..models import (
-    MetadataPagination,
-    User,
     Direction,
-    Pagination,
-    PaginationResponse,
-    HTTPExceptionModel,
     Files,
-    files_parameters,
+    HTTPExceptionModel,
+    MetadataEditRequest,
+    MetadataPagination,
+    MetadataRequired,
     Owner,
+    Pagination,
+    PaginationResponse,
+    User,
     WithQuery,
-    MetadataRequired,
-    MetadataEditRequest,
+    files_parameters,
     restrict_query_to_upload,
 )
-from .entries import EntryArchiveResponse, answer_entry_archive_request
 from ..utils import (
-    parameter_dependency_from_model,
-    create_responses,
     DownloadItem,
     browser_download_headers,
-    create_download_stream_zipped,
     create_download_stream_raw_file,
+    create_download_stream_zipped,
+    create_responses,
     create_stream_from_string,
+    parameter_dependency_from_model,
 )
+from .auth import create_user_dependency, generate_upload_token
+from .entries import EntryArchiveResponse, answer_entry_archive_request
 
 router = APIRouter()
-default_tag = 'uploads'
-metadata_tag = 'uploads/metadata'
-raw_tag = 'uploads/raw'
-archive_tag = 'uploads/archive'
-action_tag = 'uploads/action'
-bundle_tag = 'uploads/bundle'
+
+
+class APITag(str, Enum):
+    DEFAULT = 'uploads'
+    METADATA = 'uploads/metadata'
+    RAW = 'uploads/raw'
+    ARCHIVE = 'uploads/archive'
+    ACTION = 'uploads/action'
+    BUNDLE = 'uploads/bundle'
+
 
 logger = utils.get_logger(__name__)
 
@@ -139,7 +132,7 @@ class ProcData(BaseModel):
         'process is currently running.',
     )
     errors: list[str] = Field(
-        descriptions='A list of error messages that occurred during the last processing'
+        description='A list of error messages that occurred during the last processing'
     )
     warnings: list[str] = Field(
         description='A list of warning messages that occurred during the last processing'
@@ -737,7 +730,7 @@ and publish your data."""
 
 @router.get(
     '/command-examples',
-    tags=[default_tag],
+    tags=[APITag.DEFAULT],
     summary='Get example commands for shell based uploads.',
     response_model=UploadCommandExamplesResponse,
     responses=create_responses(_not_authorized),
@@ -768,7 +761,7 @@ async def get_command_examples(
 
 @router.get(
     '',
-    tags=[metadata_tag],
+    tags=[APITag.METADATA],
     summary='List uploads of authenticated user.',
     response_model=UploadProcDataQueryResponse,
     responses=create_responses(_not_authorized, _bad_pagination),
@@ -835,7 +828,7 @@ async def get_uploads(
 
 @router.get(
     '/{upload_id}',
-    tags=[metadata_tag],
+    tags=[APITag.METADATA],
     summary='Get a specific upload',
     response_model=UploadProcDataResponse,
     responses=create_responses(_upload_not_found, _not_authorized_to_upload),
@@ -857,7 +850,7 @@ async def get_upload(
 
 @router.get(
     '/{upload_id}/entries',
-    tags=[metadata_tag],
+    tags=[APITag.METADATA],
     summary='Get the entries of the specific upload as a list',
     response_model=EntryProcDataQueryResponse,
     responses=create_responses(
@@ -934,7 +927,7 @@ async def get_upload_entries(
 
 @router.get(
     '/{upload_id}/entries/{entry_id}',
-    tags=[metadata_tag],
+    tags=[APITag.METADATA],
     summary='Get a specific entry for a specific upload',
     response_model=EntryProcDataResponse,
     responses=create_responses(_entry_not_found, _not_authorized_to_entry),
@@ -970,7 +963,7 @@ async def get_upload_entry(
 
 @router.get(
     '/{upload_id}/rawdir/{path:path}',
-    tags=[raw_tag],
+    tags=[APITag.RAW],
     summary='Get the metadata for the raw file or folder located at the specified path in the specified upload.',
     response_model=RawDirResponse,
     responses=create_responses(
@@ -1085,7 +1078,7 @@ async def get_upload_rawdir_path(
 
 @router.get(
     '/{upload_id}/raw',
-    tags=[raw_tag],
+    tags=[APITag.RAW],
     summary='Downloads the published upload .zip file with all the raw files of the upload.',
     response_class=StreamingResponse,
     responses=create_responses(
@@ -1133,7 +1126,7 @@ async def get_upload_raw(
 
 @router.get(
     '/{upload_id}/raw/{path:path}',
-    tags=[raw_tag],
+    tags=[APITag.RAW],
     summary='Download the raw file or folder located at the specified path in the specified upload.',
     response_class=StreamingResponse,
     responses=create_responses(
@@ -1308,7 +1301,7 @@ async def get_upload_raw_path(
 
 @router.put(
     '/{upload_id}/raw/{path:path}',
-    tags=[raw_tag],
+    tags=[APITag.RAW],
     summary='Upload a raw file to the specified path (directory) in the specified upload.',
     response_class=StreamingResponse,
     responses=create_responses(
@@ -1625,7 +1618,7 @@ async def put_upload_raw_path(
 
 @router.delete(
     '/{upload_id}/raw/{path:path}',
-    tags=[raw_tag],
+    tags=[APITag.RAW],
     summary='Delete the raw file or folder located at the specified path in the specified upload.',
     response_model=UploadProcDataResponse,
     responses=create_responses(
@@ -1674,7 +1667,7 @@ async def delete_upload_raw_path(
 
 @router.post(
     '/{upload_id}/raw-create-dir/{path:path}',
-    tags=[raw_tag],
+    tags=[APITag.RAW],
     summary='Create a new empty directory with the specified path in the specified upload.',
     response_model=UploadProcDataResponse,
     responses=create_responses(
@@ -1721,7 +1714,7 @@ async def post_upload_raw_create_dir_path(
 
 @router.get(
     '/{upload_id}/archive/mainfile/{mainfile:path}',
-    tags=[archive_tag],
+    tags=[APITag.ARCHIVE],
     summary='Get the full archive for the given upload and mainfile path.',
     response_model=EntryArchiveResponse,
     response_model_exclude_unset=True,
@@ -1751,7 +1744,7 @@ async def get_upload_entry_archive_mainfile(
 
 @router.get(
     '/{upload_id}/archive/{entry_id}',
-    tags=[archive_tag],
+    tags=[APITag.ARCHIVE],
     summary='Get the full archive for the given upload and entry.',
     response_model=EntryArchiveResponse,
     response_model_exclude_unset=True,
@@ -1775,7 +1768,7 @@ async def get_upload_entry_archive(
 
 @router.post(
     '',
-    tags=[default_tag],
+    tags=[APITag.DEFAULT],
     summary='Submit a new upload',
     response_class=StreamingResponse,
     responses=create_responses(_post_upload_response, _not_authorized, _bad_request),
@@ -1961,7 +1954,7 @@ async def post_upload(
 
 @router.post(
     '/{upload_id}/edit',
-    tags=[metadata_tag],
+    tags=[APITag.METADATA],
     summary='Updates the metadata of the specified upload.',
     response_model=UploadProcDataResponse,
     responses=create_responses(
@@ -2008,7 +2001,7 @@ async def post_upload_edit(
 
 @router.delete(
     '/{upload_id}',
-    tags=[default_tag],
+    tags=[APITag.DEFAULT],
     summary='Delete an upload',
     response_model=UploadProcDataResponse,
     responses=create_responses(
@@ -2054,7 +2047,7 @@ async def delete_upload(
 
 @router.post(
     '/{upload_id}/action/publish',
-    tags=[action_tag],
+    tags=[APITag.ACTION],
     summary='Publish an upload',
     response_model=UploadProcDataResponse,
     responses=create_responses(
@@ -2162,7 +2155,7 @@ async def post_upload_action_publish(
 
 @router.post(
     '/{upload_id}/action/process',
-    tags=[action_tag],
+    tags=[APITag.ACTION],
     summary='Manually triggers processing of an upload.',
     response_model=UploadProcDataResponse,
     responses=create_responses(
@@ -2191,7 +2184,7 @@ async def post_upload_action_process(
 
 @router.post(
     '/{upload_id}/action/delete-entry-files',
-    tags=[action_tag],
+    tags=[APITag.ACTION],
     summary='Deletes the files of the entries specified by a query.',
     response_model=UploadProcDataResponse,
     responses=create_responses(
@@ -2262,7 +2255,7 @@ async def post_upload_action_delete_entry_files(
 
 @router.post(
     '/{upload_id}/action/lift-embargo',
-    tags=[action_tag],
+    tags=[APITag.ACTION],
     summary='Lifts the embargo of an upload.',
     response_model=UploadProcDataResponse,
     responses=create_responses(
@@ -2314,7 +2307,7 @@ async def post_upload_action_lift_embargo(
 
 @router.get(
     '/{upload_id}/bundle',
-    tags=[bundle_tag],
+    tags=[APITag.BUNDLE],
     summary='Gets an *upload bundle* for the specified upload.',
     response_class=StreamingResponse,
     responses=create_responses(
@@ -2388,7 +2381,7 @@ async def get_upload_bundle(
 
 @router.post(
     '/bundle',
-    tags=[bundle_tag],
+    tags=[APITag.BUNDLE],
     summary='Posts an *upload bundle* to this NOMAD deployment.',
     response_model=UploadProcDataResponse,
     responses=create_responses(_not_authorized, _bad_request),
@@ -2691,9 +2684,9 @@ async def _get_files_if_provided(
         # Only ok if uploaded file is a zip or a tar archive.
         ext = (
             '.zip'
-            if files.zipfile.is_zipfile(upload_path)
+            if zipfile.is_zipfile(upload_path)
             else '.tar'
-            if files.tarfile.is_tarfile(upload_path)
+            if tarfile.is_tarfile(upload_path)
             else None
         )
         if not ext:
diff --git a/nomad/app/v1/routers/users.py b/nomad/app/v1/routers/users.py
index 3b47845604a862381f4434284b4f7d06a485b952..3e48ee899559efa081d41d6c16481e6ea2200028 100644
--- a/nomad/app/v1/routers/users.py
+++ b/nomad/app/v1/routers/users.py
@@ -16,20 +16,24 @@
 # limitations under the License.
 #
 
-from typing import List, Union, Optional
-from fastapi import Depends, APIRouter, status, HTTPException, Query
+from enum import Enum
+
+from fastapi import APIRouter, Depends, HTTPException, Query, status
 from pydantic.main import BaseModel
 
-from nomad import infrastructure, datamodel
+from nomad import datamodel, infrastructure
 from nomad.config import config
 from nomad.utils import strip
 
-from .auth import create_user_dependency
-from ..models import User, HTTPExceptionModel
+from ..models import HTTPExceptionModel, User
 from ..utils import create_responses
+from .auth import create_user_dependency
 
 router = APIRouter()
-default_tag = 'users'
+
+
+class APITag(str, Enum):
+    DEFAULT = 'users'
 
 
 _authentication_required_response = (
@@ -62,7 +66,7 @@ class Users(BaseModel):
 
 @router.get(
     '/me',
-    tags=[default_tag],
+    tags=[APITag.DEFAULT],
     summary='Get your account data',
     description='Returns the account data of the authenticated user.',
     responses=create_responses(_authentication_required_response),
@@ -83,7 +87,7 @@ async def read_users_me(
 
 @router.get(
     '',
-    tags=[default_tag],
+    tags=[APITag.DEFAULT],
     summary='Get existing users',
     description='Get existing users for given criteria',
     response_model_exclude_unset=True,
@@ -163,7 +167,7 @@ class PublicUserInfo(BaseModel):
 
 @router.get(
     '/{user_id}',
-    tags=[default_tag],
+    tags=[APITag.DEFAULT],
     summary='Get existing users',
     description='Get the user using the given user_id',
     response_model_exclude_unset=True,
@@ -178,7 +182,7 @@ async def get_user(user_id: str):
 
 @router.put(
     '/invite',
-    tags=[default_tag],
+    tags=[APITag.DEFAULT],
     summary='Invite a new user',
     responses=create_responses(_authentication_required_response, _bad_invite_response),
     response_model=User,
diff --git a/nomad/app/v1/utils.py b/nomad/app/v1/utils.py
index 315c2de571d2d29a3e2fba35c29166efb564b8dd..c62316fe4e8a02263d5a69014c25d6ea913939d7 100644
--- a/nomad/app/v1/utils.py
+++ b/nomad/app/v1/utils.py
@@ -16,19 +16,21 @@
 # limitations under the License.
 #
 
-from typing import List, Dict, Tuple, Set, Iterator, Any, Optional, Union
-from collections.abc import Iterator
-from types import FunctionType
-import urllib
+import gzip
+import inspect
 import io
 import json
-import os
-import inspect
-from fastapi import Request, Query, HTTPException, status  # pylint: disable=unused-import
-from pydantic import ValidationError, BaseModel  # pylint: disable=unused-import
-import gzip
 import lzma
-from nomad.files import UploadFiles, StreamedFile, create_zipstream
+import os
+import urllib
+from collections.abc import Iterator
+from types import FunctionType
+from typing import Any
+
+from fastapi import HTTPException, Query, Request, status  # noqa: F401
+from pydantic import BaseModel, ValidationError  # noqa: F401
+
+from nomad.files import StreamedFile, UploadFiles, create_zipstream
 
 
 def parameter_dependency_from_model(
diff --git a/nomad/archive/converter.py b/nomad/archive/converter.py
index b72233f3ffdf3053471751b669d3cd11ca181780..96b6e494fd78e1b69613958571f556b81c4273ef 100644
--- a/nomad/archive/converter.py
+++ b/nomad/archive/converter.py
@@ -21,15 +21,14 @@ import functools
 import hashlib
 import os.path
 import signal
+from collections.abc import Callable, Iterable
 from concurrent.futures import ProcessPoolExecutor
 from multiprocessing import Manager
-from collections.abc import Callable
-from collections.abc import Iterable
 
-from nomad.config import config
-from nomad.archive import to_json, read_archive
+from nomad.archive import read_archive, to_json
 from nomad.archive.storage_v2 import ArchiveWriter as ArchiveWriterNew
-from nomad.files import StagingUploadFiles, PublicUploadFiles
+from nomad.config import config
+from nomad.files import PublicUploadFiles, StagingUploadFiles
 from nomad.infrastructure import setup
 from nomad.processing import Upload
 
diff --git a/nomad/archive/partial.py b/nomad/archive/partial.py
index a1b0e44a244bbb432380916cf742b128d9fc1194..6d0f4012f0096975be51eb4ab53d6596a5b4436b 100644
--- a/nomad/archive/partial.py
+++ b/nomad/archive/partial.py
@@ -16,20 +16,20 @@
 # limitations under the License.
 #
 
-from typing import Any, Tuple, Dict, Union, List
+from typing import Any
 
 from nomad import infrastructure
 from nomad.config import config
+from nomad.datamodel import EntryArchive
+from nomad.datamodel.metainfo.common import FastAccess
 from nomad.metainfo import (
-    MSection,
     Definition,
+    MSection,
     Quantity,
     Reference,
-    SubSection,
     Section,
+    SubSection,
 )
-from nomad.datamodel import EntryArchive
-from nomad.datamodel.metainfo.common import FastAccess
 
 
 def create_partial_archive(archive: EntryArchive) -> dict:
diff --git a/nomad/archive/query.py b/nomad/archive/query.py
index 05bbc2700eade759945cfa1312fc3c1917332784..09696fcdbcb95ce81219b57fe8511f4e984ce78a 100644
--- a/nomad/archive/query.py
+++ b/nomad/archive/query.py
@@ -18,13 +18,13 @@
 
 import functools
 import re
-from typing import Any, Dict, Union, Tuple
 from collections.abc import Callable
 from io import BytesIO
+from typing import Any
 
 from nomad import utils
 
-from .storage import ArchiveReader, ArchiveList, ArchiveDict, to_json, read_archive
+from .storage import ArchiveDict, ArchiveList, ArchiveReader, read_archive, to_json
 
 _query_archive_key_pattern = re.compile(r'^([\s\w\-]+)(\[([-?0-9]*)(:([-?0-9]*))?])?$')
 
diff --git a/nomad/archive/required.py b/nomad/archive/required.py
index 6a08652900df662c92b9eb958cf3c29d149e938b..402e116ad6fdaea53b8cfb08ae25137145984c31 100644
--- a/nomad/archive/required.py
+++ b/nomad/archive/required.py
@@ -21,31 +21,32 @@ import copy
 import dataclasses
 import functools
 import re
-from typing import cast, Union, Dict, Tuple
+from typing import cast
 
 from fastapi import HTTPException
 
 from nomad import utils
 from nomad.metainfo import (
     Definition,
-    Section,
+    Package,
     Quantity,
-    SubSection,
-    Reference,
     QuantityReference,
+    Reference,
+    Section,
     SectionReference,
-    Package,
+    SubSection,
 )
+
+from ..datamodel.context import ServerContext, parse_path
 from .query import (
     ArchiveQueryError,
-    to_json,
-    _query_archive_key_pattern,
-    _extract_key_and_index,
     _extract_child,
+    _extract_key_and_index,
+    _query_archive_key_pattern,
+    to_json,
 )
-from .storage import ArchiveReader, ArchiveList, ArchiveError, ArchiveDict
+from .storage import ArchiveDict, ArchiveError, ArchiveList, ArchiveReader
 from .storage_v2 import ArchiveDict as NewArchiveDict
-from ..datamodel.context import parse_path, ServerContext
 
 
 class RequiredValidationError(Exception):
diff --git a/nomad/archive/storage.py b/nomad/archive/storage.py
index 6d33ecfdd378ec709decfa3d6d476bbe26f5b7ed..29102e1eae6ddd68cf10542a797954c4804d41f2 100644
--- a/nomad/archive/storage.py
+++ b/nomad/archive/storage.py
@@ -17,12 +17,10 @@
 #
 from __future__ import annotations
 
-from typing import Any, Tuple, Dict, Union, cast
-from collections.abc import Generator
-from io import BytesIO, BufferedReader
-from collections.abc import Mapping, Sequence
-
 import struct
+from collections.abc import Generator, Mapping, Sequence
+from io import BufferedReader, BytesIO
+from typing import Any, cast
 
 import msgspec
 
@@ -328,10 +326,8 @@ def read_archive(file_or_path: str | BytesIO, **kwargs) -> ArchiveReader:
         will lazily load data as it is used. The mapping needs to be closed or used within
         a 'with' statement to free the underlying file resource after use.
     """
-    from .storage_v2 import (
-        ArchiveWriter as ArchiveWriterNew,
-        ArchiveReader as ArchiveReaderNew,
-    )
+    from .storage_v2 import ArchiveReader as ArchiveReaderNew
+    from .storage_v2 import ArchiveWriter as ArchiveWriterNew
 
     # todo: replace implementation to enable automatic conversion
     # if isinstance(file_or_path, str):
diff --git a/nomad/archive/storage_v2.py b/nomad/archive/storage_v2.py
index 6a5d1e59af5aa7ad6dd82a3b87d647c44927df1c..ffb8fdaf6fdfdc0eca76797f591756a71ea0934a 100644
--- a/nomad/archive/storage_v2.py
+++ b/nomad/archive/storage_v2.py
@@ -27,8 +27,8 @@ from bitarray import bitarray
 from msgpack import Unpacker
 
 from nomad import utils
-from nomad.config import config
 from nomad.archive import ArchiveError
+from nomad.config import config
 
 _packer = msgpack.Packer(autoreset=True, use_bin_type=True)
 
diff --git a/nomad/atomutils.py b/nomad/atomutils.py
index 0e142213d4d1c50728d9e681366be3102764b97d..d0deb7ded1bd8e66ea46a6b4bb693d7ad44391b1 100644
--- a/nomad/atomutils.py
+++ b/nomad/atomutils.py
@@ -23,18 +23,10 @@ import itertools
 import logging
 import math
 import re
+from collections.abc import Iterable
 from functools import reduce
 from string import ascii_uppercase
-from typing import (
-    TYPE_CHECKING,
-    Any,
-    Dict,
-    List,
-    Tuple,
-    Union,
-    cast,
-)
-from collections.abc import Iterable
+from typing import TYPE_CHECKING, Any, cast
 
 import ase.data
 import ase.geometry
diff --git a/nomad/bundles.py b/nomad/bundles.py
index 126f0a2c71aa003a72ee277beab12c621f05efff..93c45612e1fa0d47e75ed604e9ae4406517a5794 100644
--- a/nomad/bundles.py
+++ b/nomad/bundles.py
@@ -8,35 +8,36 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-from typing import cast, Any, Tuple, List, Set, Dict
-from collections.abc import Iterable
-import os
 import json
+import os
+from collections.abc import Iterable
 from datetime import datetime, timedelta
+from typing import Any, cast
+
+from fastapi import HTTPException, status
 from packaging import version
 
-from nomad import utils, datamodel, search
+from nomad import datamodel, search, utils
 from nomad.config import config
-from nomad.config.models.config import BundleImportSettings, BundleExportSettings
+from nomad.config.models.config import BundleExportSettings, BundleImportSettings
 from nomad.files import (
-    zipfile,
+    BrowsableFileSource,
+    CombinedFileSource,
+    DiskFileSource,
+    FileSource,
     PathObject,
-    UploadFiles,
     PublicUploadFiles,
     StagingUploadFiles,
-    FileSource,
-    BrowsableFileSource,
-    CombinedFileSource,
+    StandardJSONDecoder,
     StreamedFileSource,
-    DiskFileSource,
+    UploadFiles,
     ZipFileSource,
-    json_to_streamed_file,
     bundle_info_filename,
-    StandardJSONDecoder,
+    json_to_streamed_file,
+    zipfile,
 )
 from nomad.processing.base import ProcessStatus
-from nomad.processing.data import Upload, Entry, mongo_entry_metadata
-from fastapi import HTTPException, status
+from nomad.processing.data import Entry, Upload, mongo_entry_metadata
 
 
 class BundleExporter:
diff --git a/nomad/cli/admin/admin.py b/nomad/cli/admin/admin.py
index 7c02a16cdd51457e9b241def679e6d26b8a492ba..75f5cc88179cf5d44b972ddc2b668d05fa898419 100644
--- a/nomad/cli/admin/admin.py
+++ b/nomad/cli/admin/admin.py
@@ -18,8 +18,8 @@
 
 import click
 
-from nomad.config import config
 from nomad.cli.cli import cli
+from nomad.config import config
 
 
 @cli.group(
@@ -68,7 +68,8 @@ def reset(remove, i_am_really_sure):
 def reset_processing(zero_complete_time):
     from datetime import datetime
 
-    from nomad import infrastructure, processing as proc
+    from nomad import infrastructure
+    from nomad import processing as proc
 
     infrastructure.setup_mongo()
 
@@ -109,9 +110,11 @@ def reset_processing(zero_complete_time):
 )
 def lift_embargo(dry, parallel):
     from datetime import datetime
+
     from dateutil.relativedelta import relativedelta
 
-    from nomad import infrastructure, processing as proc
+    from nomad import infrastructure
+    from nomad import processing as proc
     from nomad.search import quantity_values
 
     infrastructure.setup_mongo()
@@ -415,6 +418,7 @@ def migrate_mongo(
     import sys
 
     from pymongo.database import Database
+
     from nomad import infrastructure
     from nomad.cli.admin import migrate
 
@@ -490,9 +494,10 @@ def migrate_mongo(
 )
 def rewrite_doi_urls(dois, dry, save_existing_records):
     import json
+
     import requests
 
-    from nomad.doi import edit_doi_url, _create_dataset_url
+    from nomad.doi import _create_dataset_url, edit_doi_url
 
     existing_records = []
 
diff --git a/nomad/cli/admin/clean.py b/nomad/cli/admin/clean.py
index 6ca3d6c7ca5773b59bf79b7a716508f890d9be98..2522466c06d18e88161157447efe52e7d6264092 100644
--- a/nomad/cli/admin/clean.py
+++ b/nomad/cli/admin/clean.py
@@ -39,13 +39,13 @@ from .admin import admin
 def clean(dry, skip_entries, skip_fs, skip_es, staging_too, force):
     import os
     import shutil
-    import tabulate
+
     import elasticsearch_dsl
+    import tabulate
 
     from nomad import infrastructure, processing
     from nomad.config import config as nomad_config
-    from nomad.search import delete_by_query
-    from nomad.search import quantity_values
+    from nomad.search import delete_by_query, quantity_values
 
     mongo_client = infrastructure.setup_mongo()
     infrastructure.setup_elastic()
diff --git a/nomad/cli/admin/entries.py b/nomad/cli/admin/entries.py
index 54ac693eed7dcd6510287497f0733e912432729a..91728ae86d3c413947cacffb9e98d7d3782efe7c 100644
--- a/nomad/cli/admin/entries.py
+++ b/nomad/cli/admin/entries.py
@@ -33,7 +33,8 @@ def entries():
 )
 @click.option('--skip-mongo', help='Keep uploads and entries in mongo.', is_flag=True)
 def rm(entries, skip_es, skip_mongo):
-    from nomad import processing as proc, infrastructure, search
+    from nomad import infrastructure, search
+    from nomad import processing as proc
 
     infrastructure.setup_mongo()
     infrastructure.setup_elastic()
diff --git a/nomad/cli/admin/migrate.py b/nomad/cli/admin/migrate.py
index e857c013e657121fb330afa213aee002ea31e4f1..5f28df89b19ae28ec8b7378d3a8545489e355b8c 100644
--- a/nomad/cli/admin/migrate.py
+++ b/nomad/cli/admin/migrate.py
@@ -18,16 +18,16 @@
 
 import time
 from datetime import datetime
-from typing import List, Dict, Set, Any, Optional
-from pydantic import BaseModel
+from typing import Any
 
+from pydantic import BaseModel
 from pymongo import ReplaceOne
-from pymongo.database import Database, Collection
+from pymongo.database import Collection, Database
+
 from nomad import utils
-from nomad.processing import ProcessStatus, Upload, Entry
 from nomad.datamodel import Dataset
 from nomad.parsing.parsers import parser_dict
-
+from nomad.processing import Entry, ProcessStatus, Upload
 
 _upload_keys_to_remove_v0 = (
     'published',
@@ -69,10 +69,10 @@ class _CollectionStatistics(BaseModel):
 
 
 class _UpgradeStatistics(BaseModel):
-    uploads = _CollectionStatistics(collection_name='Uploads')
-    entries = _CollectionStatistics(collection_name='Entries')
-    datasets = _CollectionStatistics(collection_name='Datasets')
-    dois = _CollectionStatistics(collection_name='DOIs')
+    uploads: _CollectionStatistics = _CollectionStatistics(collection_name='Uploads')
+    entries: _CollectionStatistics = _CollectionStatistics(collection_name='Entries')
+    datasets: _CollectionStatistics = _CollectionStatistics(collection_name='Datasets')
+    dois: _CollectionStatistics = _CollectionStatistics(collection_name='DOIs')
 
 
 class _DatasetCacheItem(BaseModel):
diff --git a/nomad/cli/admin/run.py b/nomad/cli/admin/run.py
index ab846f3e0e203bbe039094be8d0393eac403915d..1739b89c1e2dc22d887f9172f5dec9c5e68002ed 100644
--- a/nomad/cli/admin/run.py
+++ b/nomad/cli/admin/run.py
@@ -75,9 +75,9 @@ def run_app(
     # port = port or config.services.api_port
 
     if with_gui:
+        import glob
         import os
         import os.path
-        import glob
         import shutil
 
         gui_folder = os.path.abspath(
@@ -122,9 +122,10 @@ def run_app(
     from nomad.utils import get_logger
 
     if gunicorn:
-        from gunicorn.app.wsgiapp import WSGIApplication
         import logging.config
 
+        from gunicorn.app.wsgiapp import WSGIApplication
+
         if log_config:
             logging.config.fileConfig(log_config)
 
@@ -152,7 +153,7 @@ def run_app(
         get_logger(__name__).info('created gunicorn server', data=str(gunicorn_app.cfg))
         gunicorn_app.run()
     else:
-        from uvicorn import Server, Config
+        from uvicorn import Config, Server
 
         kwargs['log_config'] = log_config
 
@@ -182,10 +183,11 @@ def run_worker(*, workers=None):
 
 
 def run_hub():
-    from jupyterhub.app import main
-    import sys
     import os
     import subprocess
+    import sys
+
+    from jupyterhub.app import main
 
     if 'JUPYTERHUB_CRYPT_KEY' not in os.environ:
         crypt_key = config.north.jupyterhub_crypt_key
diff --git a/nomad/cli/admin/springer.py b/nomad/cli/admin/springer.py
index 7c468d179682ee84ea3c5c333073a538a40b2444..dec3b88fdc0c4f71670124933d6757971dceebcc 100644
--- a/nomad/cli/admin/springer.py
+++ b/nomad/cli/admin/springer.py
@@ -23,17 +23,18 @@ http://materials.springer.com. The database is stuctured as
 space_group_number : normalized_formula : springer_id : entry
 """
 
-from typing import Dict, List, Any
-import requests
+import os.path
 import re
-import bs4
 import time
-import os.path
+from typing import Any
+
+import bs4
+import requests
 
 import nomad.archive.storage_v2
 from nomad import archive
-from nomad.config import config
 from nomad.archive import read_archive
+from nomad.config import config
 
 required_items = {
     'Alphabetic Formula:': 'alphabetic_formula',
diff --git a/nomad/cli/admin/uploads.py b/nomad/cli/admin/uploads.py
index 258e8affdbe03cb23a239b250ac28f59d5de3921..c55c64948f43df3568160bffa7438f1f9bc04783 100644
--- a/nomad/cli/admin/uploads.py
+++ b/nomad/cli/admin/uploads.py
@@ -20,12 +20,12 @@ import json
 import os
 import os.path
 import traceback
-import typing
 
 import click
 from orjson import dumps
 
 from nomad.config import config
+
 from .admin import admin
 
 
@@ -35,7 +35,8 @@ def _run_parallel(
     import threading
     import time
 
-    from nomad import utils, processing as proc
+    from nomad import processing as proc
+    from nomad import utils
 
     if isinstance(uploads, tuple | list):
         uploads_count = len(uploads)
@@ -238,11 +239,13 @@ def _query_uploads(
     list of upoad ids and further filter parameters.
     """
 
-    from typing import Set, cast
     import json
+    from typing import cast
+
     from mongoengine import Q
 
-    from nomad import infrastructure, processing as proc, search
+    from nomad import infrastructure, search
+    from nomad import processing as proc
     from nomad.app.v1 import models
 
     infrastructure.setup_mongo()
@@ -352,13 +355,14 @@ def _query_uploads(
 @click.pass_context
 def export(ctx, uploads, required, output: str):
     import sys
-    from nomad.processing import Entry
-    from nomad.utils import get_logger
-    from nomad.files import UploadFiles
-    from nomad.archive import ArchiveQueryError, RequiredReader
     import time
     import zipfile
 
+    from nomad.archive import ArchiveQueryError, RequiredReader
+    from nomad.files import UploadFiles
+    from nomad.processing import Entry
+    from nomad.utils import get_logger
+
     logger = get_logger(__name__)
 
     if not output:
@@ -630,7 +634,8 @@ def index(ctx, uploads, parallel, transformer, skip_materials, print_progress):
 def delete_upload(
     upload, skip_es: bool = False, skip_files: bool = False, skip_mongo: bool = False
 ):
-    from nomad import search, files, utils, processing as proc
+    from nomad import files, search, utils
+    from nomad import processing as proc
 
     # delete elastic
     if not skip_es:
@@ -783,7 +788,8 @@ def re_pack(ctx, uploads):
 def stop(ctx, uploads, entries: bool, kill: bool, no_celery: bool):
     import mongoengine
 
-    from nomad import utils, processing as proc
+    from nomad import processing as proc
+    from nomad import utils
 
     query, _ = _query_uploads(uploads, **ctx.obj.uploads_kwargs)
 
@@ -912,9 +918,8 @@ def integrity(
 ):
     from nomad.app.v1.models import MetadataPagination, MetadataRequired
     from nomad.archive.storage_v2 import ArchiveWriter
-    from nomad.files import StagingUploadFiles, PublicUploadFiles
-    from nomad.processing import Entry
-    from nomad.processing import Upload
+    from nomad.files import PublicUploadFiles, StagingUploadFiles
+    from nomad.processing import Entry, Upload
     from nomad.search import search
 
     def search_params(upload_id: str):
@@ -1301,8 +1306,8 @@ def export_bundle(
 def import_bundle(
     ctx, input_path, multi, settings, embargo_length, use_celery, ignore_errors
 ):
-    from nomad.bundles import BundleImporter
     from nomad import infrastructure
+    from nomad.bundles import BundleImporter
 
     for key, value in ctx.obj.uploads_kwargs.items():
         if value:
diff --git a/nomad/cli/admin/users.py b/nomad/cli/admin/users.py
index 21253fb313395efbbc99604d35c864f22b40e3ed..7752903f2977a4f8dc0431260abfd215c72aa258 100644
--- a/nomad/cli/admin/users.py
+++ b/nomad/cli/admin/users.py
@@ -29,10 +29,10 @@ def users():
 @users.command(help='Import users to keycloak from a JSON file.', name='import')
 @click.argument('PATH_TO_USERS_FILE', type=str, nargs=1)
 def import_command(path_to_users_file):
-    import json
     import datetime
+    import json
 
-    from nomad import infrastructure, datamodel, utils
+    from nomad import datamodel, infrastructure, utils
 
     with open(path_to_users_file) as f:
         users = json.load(f)
diff --git a/nomad/cli/aflow.py b/nomad/cli/aflow.py
index 6912ec39dde56d95bd5106ead7305b83d0efadcf..eec1729ad7319cf9e3cce68591a3270b9ee55de3 100644
--- a/nomad/cli/aflow.py
+++ b/nomad/cli/aflow.py
@@ -20,28 +20,27 @@
 # code will fail.
 # TODO The metadata should not be set via API, but added to the uploads as nomad.json.
 
-from typing import List
-import requests
+import io
+import json
+import os
 import re
 import subprocess
-from urllib import parse as urllib_parse
-import os
 import tarfile
 import threading
 import time
-import typing
-import io
-import re
 import uuid
-import json
-import numpy as np
+from urllib import parse as urllib_parse
+
 import ase
 import bs4
 import matid  # pylint: disable=import-error
+import numpy as np
+import requests
 
-from nomad import atomutils, client, processing as proc
-from nomad.config import config
+from nomad import atomutils, client
+from nomad import processing as proc
 from nomad.client import api, upload_file
+from nomad.config import config
 
 
 class DbUpdater:
diff --git a/nomad/cli/cli.py b/nomad/cli/cli.py
index 4120b883cc02d3b73789554071ff3c2fa4af150d..316293d1cbfc0fe6fd86009c60130b078289a95a 100644
--- a/nomad/cli/cli.py
+++ b/nomad/cli/cli.py
@@ -16,10 +16,11 @@
 # limitations under the License.
 #
 
-import click
 import logging
 import os
 
+import click
+
 from nomad import utils
 from nomad.config import config
 
diff --git a/nomad/cli/client/integrationtests.py b/nomad/cli/client/integrationtests.py
index 744cba59c894a99e755cf379fc887877e2ba088d..0f99ca2327b36d8faf2d049f1de3a4e23eed4393 100644
--- a/nomad/cli/client/integrationtests.py
+++ b/nomad/cli/client/integrationtests.py
@@ -21,9 +21,9 @@ A command that runs some example operations on a working nomad@FAIRDI installati
 as a final integration test.
 """
 
-import time
-import os
 import json
+import os
+import time
 
 from nomad.client import api
 
diff --git a/nomad/cli/dev.py b/nomad/cli/dev.py
index e7ff602405fd1c7f177d26d301180a72ba10afdc..4491c228e9c25be42b5474982c0e95a14e4f861c 100644
--- a/nomad/cli/dev.py
+++ b/nomad/cli/dev.py
@@ -15,18 +15,18 @@
 # limitations under the License.
 #
 
-from typing import Tuple, Any
-import sys
 import json
 import os
-import click
+import sys
+from typing import Any
 
+import click
 from pint import Unit
 from pint.errors import UndefinedUnitError
 
 from nomad.config import config
-from nomad.config.models.plugins import ExampleUploadEntryPoint
 from nomad.metainfo.elasticsearch_extension import schema_separator
+
 from .cli import cli
 
 
@@ -81,8 +81,8 @@ def gui_qa(skip_tests: bool):
 @dev.command(help='Export an API model in JSON schema.')
 @click.argument('model')
 def api_model(model):
-    import json
     import importlib
+    import json
 
     def remove_null_types(data):
         """
@@ -124,11 +124,11 @@ def api_model(model):
         'nomad.app.v1.models.graph.GraphRequest',
         'nomad.app.v1.models.graph.GraphResponse',
     ]:
+        from nomad.app.v1.models.graph.graph_models import Graph
         from nomad.app.v1.models.graph.utils import (
             generate_request_model,
             generate_response_model,
         )
-        from nomad.app.v1.models.graph.graph_models import Graph
 
         sys.modules['nomad.app.v1.models.graph.utils'].ref_prefix = '#/definitions'
         sys.modules['nomad.app.v1.models.graph.utils'].graph_model_export = True
@@ -189,8 +189,8 @@ def metainfo():
 
 def _generate_search_quantities():
     # Currently only quantities with "entry_type" are included.
-    from nomad.metainfo.elasticsearch_extension import entry_type, Elasticsearch
     from nomad.datamodel import EntryArchive
+    from nomad.metainfo.elasticsearch_extension import Elasticsearch, entry_type
 
     def to_dict(search_quantity, section=False, repeats=False):
         if section:
@@ -350,8 +350,9 @@ def gui_config():
     '--parser', help='Only updated the README of the given parsers subdirctory.'
 )
 def update_parser_readmes(parser):
-    from glob import glob
     import re
+    from glob import glob
+
     import yaml
 
     os.chdir(os.path.join(os.path.dirname(__file__), '../..'))
@@ -490,8 +491,10 @@ def example_data(username: str):
 
 
 def _generate_units_json() -> tuple[Any, Any]:
-    from pint.converters import ScaleConverter
     from collections import defaultdict
+
+    from pint.converters import ScaleConverter
+
     from nomad.units import ureg
 
     # TODO: Check that all units are unambiguously defined, and that there are
diff --git a/nomad/cli/parse.py b/nomad/cli/parse.py
index 723796dfbac151b3372d61ca5592c47c59dcde89..275cdf80ce8c1dbd42f50c8589b2545148d48212 100644
--- a/nomad/cli/parse.py
+++ b/nomad/cli/parse.py
@@ -84,12 +84,12 @@ def _parse(
     save_plot_dir,
 ):
     import json
-    import sys
     import os
+    import sys
 
+    from nomad import utils
     from nomad.client import normalize_all, parse
     from nomad.datamodel.metainfo.plot import resolve_plot_references
-    from nomad import utils
 
     kwargs = dict(
         strict=not not_strict,
diff --git a/nomad/client/api.py b/nomad/client/api.py
index 414eb02a03ad0eb6c0cdf0aed8843cf3519d9c9c..fe85171217a799fc0f6a4d52e5959e0262580a26 100644
--- a/nomad/client/api.py
+++ b/nomad/client/api.py
@@ -16,9 +16,10 @@
 # limitations under the License.
 #
 
+import time
+
 import requests
 from keycloak import KeycloakOpenID
-import time
 
 from nomad.config import config
 
diff --git a/nomad/client/archive.py b/nomad/client/archive.py
index f68d6f88757dc1614259829dbee625f140542bc6..1c0f496e09df39b92e5ba41ec22620eda40df145 100644
--- a/nomad/client/archive.py
+++ b/nomad/client/archive.py
@@ -18,19 +18,19 @@
 from __future__ import annotations
 
 import asyncio
+import threading
 from asyncio import Semaphore
 from itertools import islice
-from typing import Any, Union
 from time import monotonic
-import threading
+from typing import Any
 
 from click import progressbar
-from httpx import Timeout, AsyncClient
+from httpx import AsyncClient, Timeout
 from keycloak import KeycloakOpenID
 
 from nomad import metainfo as mi
 from nomad.config import config
-from nomad.datamodel import EntryArchive, ClientContext
+from nomad.datamodel import ClientContext, EntryArchive
 from nomad.utils import dict_to_dataframe
 
 
@@ -413,7 +413,7 @@ class ArchiveQuery:
                 ]
                 results = await asyncio.gather(*tasks)
 
-        return [archive for result in results if result for archive in result]
+        return [archive for result in results if result for archive in result]  # type: ignore
 
     async def _acquire(
         self,
diff --git a/nomad/client/processing.py b/nomad/client/processing.py
index 3445fc88711a48284f432cd15507ac0c93955adb..03d54ba78cd8bd8ec47892cd42d83a7ea02c711c 100644
--- a/nomad/client/processing.py
+++ b/nomad/client/processing.py
@@ -16,15 +16,14 @@
 # limitations under the License.
 #
 
-import os
 import io
-import typing
+import os
 import sys
+import typing
 
-from nomad import utils, datamodel
+from nomad import datamodel, utils
 from nomad.config import config
 
-
 from .api import Auth
 
 
diff --git a/nomad/client/upload.py b/nomad/client/upload.py
index e800b8da04781ad004150d26b8cc47042d5917e6..8b2c6f4c38d1e974cd1b72637338df1c1854cf6f 100644
--- a/nomad/client/upload.py
+++ b/nomad/client/upload.py
@@ -41,8 +41,8 @@ def upload_file(
 
     Returns: The upload_id if successful or None if not.
     """
-    from nomad.processing import ProcessStatus
     from nomad.client import api
+    from nomad.processing import ProcessStatus
 
     if local_path:
         response = api.post(
diff --git a/nomad/common.py b/nomad/common.py
index ce0d536ff66afc9088d9e245b69b1567f6010d30..181c232cbe600ea4f0914ace29c5a710f7cb4825 100644
--- a/nomad/common.py
+++ b/nomad/common.py
@@ -24,11 +24,11 @@ source code without circular imports.
 import os
 import pkgutil
 import shutil
-import zipfile
 import tarfile
-from typing import Optional
-from typing import Literal
+import zipfile
 from tempfile import TemporaryDirectory
+from typing import Literal
+
 import httpx
 
 
diff --git a/nomad/config/__init__.py b/nomad/config/__init__.py
index 303d2c0d4c63ba26481c65728d8d667be1dd2e2f..b898ed97ee7ca4eba0232640f8ba57c041251b7f 100644
--- a/nomad/config/__init__.py
+++ b/nomad/config/__init__.py
@@ -37,7 +37,7 @@ import sys
 import yaml
 import logging
 import os.path
-from typing import Dict, Any
+from typing import Any
 from nomad.config.models.config import Config
 
 # use std python logger, since logging is not configured while loading configuration
@@ -94,7 +94,7 @@ def _load_config_env() -> dict[str, Any]:
         # Some environment variables starting with NOMAD_ are unavoidable
         # in docker/kubernetes environments. We should ignore them here,
         # before they cause a warning later when the config is validated.
-        if all([not key.startswith(field) for field in Config.__fields__.keys()]):
+        if all([not key.startswith(field) for field in Config.model_fields.keys()]):
             continue
 
         add_deep(config_data, key, value)
@@ -156,6 +156,6 @@ config = load_config()
 
 # Expose config fields under this module for backwards compatibility
 _module = sys.modules[__name__]
-_fields = Config.__fields__
+_fields = Config.model_fields
 for field_name in _fields.keys():
     setattr(_module, field_name, getattr(config, field_name))
diff --git a/nomad/config/models/common.py b/nomad/config/models/common.py
index f091f34cf60fb03b5dde1fe12447935c70dd42bb..02f68c3814f991baaaf0f26089e97ef342817919 100644
--- a/nomad/config/models/common.py
+++ b/nomad/config/models/common.py
@@ -16,9 +16,9 @@
 # limitations under the License.
 #
 
-import logging
-from typing import List, Dict, Tuple, Any, Optional, Union, cast, TypeVar
-from pydantic import ConfigDict, model_validator, BaseModel, Field  # pylint: disable=unused-import
+from typing import Any, TypeVar, cast
+
+from pydantic import BaseModel, ConfigDict, Field, model_validator  # noqa: F401
 
 ConfigBaseModelBound = TypeVar('ConfigBaseModelBound', bound='ConfigBaseModel')
 
diff --git a/nomad/config/models/config.py b/nomad/config/models/config.py
index 2ec981ae5b53d69f9af968692f5499e2e23b53f3..385f0d8ba62b059589d96448b1e237d1b804b0c7 100644
--- a/nomad/config/models/config.py
+++ b/nomad/config/models/config.py
@@ -18,20 +18,12 @@
 
 import logging
 import os
-import sys
 import warnings
 from importlib.metadata import version
-from typing import Any, Dict, List, Optional, Tuple, Union
+from typing import Any
 
 import yaml
-from pydantic import (
-    BaseModel,
-    field_validator,
-    model_validator,
-    Field,
-    validator,
-    ConfigDict,
-)
+from pydantic import BaseModel, ConfigDict, Field, field_validator, model_validator
 
 try:
     __version__ = version('nomad-lab')
@@ -41,15 +33,12 @@ except Exception:  # noqa
 
 from importlib.metadata import entry_points
 
+from nomad.common import get_package_path
 
-from .common import (
-    ConfigBaseModel,
-    Options,
-)
+from .common import ConfigBaseModel, Options
 from .north import NORTH
 from .plugins import EntryPointType, PluginPackage, Plugins
 from .ui import UI
-from nomad.common import get_package_path
 
 warnings.filterwarnings('ignore', message='numpy.dtype size changed')
 warnings.filterwarnings('ignore', message='numpy.ufunc size changed')
@@ -510,7 +499,7 @@ class Logtransfer(ConfigBaseModel):
     )
 
     # Validators
-    _level = validator('level', allow_reuse=True)(normalize_loglevel)
+    _level = field_validator('level', mode='before')(normalize_loglevel)
 
 
 class Tests(ConfigBaseModel):
diff --git a/nomad/config/models/north.py b/nomad/config/models/north.py
index c99bf6c9fbe7841b20d4457fff7d0fb4d91b03e9..dced965c79bed434fd47dcecd6adca71631d2533 100644
--- a/nomad/config/models/north.py
+++ b/nomad/config/models/north.py
@@ -17,7 +17,6 @@
 #
 
 from enum import Enum
-from typing import Dict, List, Optional, Union
 
 from pydantic import BaseModel, Field
 
diff --git a/nomad/config/models/plugins.py b/nomad/config/models/plugins.py
index eeade496bca2cea5bfd59018510aa28a2d85d805..6ab41803294bc867da0327930d7c622ef6859fa4 100644
--- a/nomad/config/models/plugins.py
+++ b/nomad/config/models/plugins.py
@@ -16,15 +16,16 @@
 # limitations under the License.
 #
 
+import importlib
 import os
-import sys
 import shutil
+import sys
 from abc import ABCMeta, abstractmethod
-import importlib
-from typing import Optional, Dict, Union, List, Literal, cast, TYPE_CHECKING
-from pydantic import model_validator, BaseModel, Field
+from typing import TYPE_CHECKING, Literal, Union, cast
+
+from pydantic import BaseModel, Field, model_validator
 
-from nomad.common import get_package_path, download_file, is_url, is_safe_relative_path
+from nomad.common import download_file, get_package_path, is_safe_relative_path, is_url
 
 from .common import Options
 from .ui import App
@@ -32,10 +33,11 @@ from .ui import App
 example_prefix = '__examples__'
 
 if TYPE_CHECKING:
+    from fastapi import FastAPI
+
     from nomad.metainfo import SchemaPackage
     from nomad.normalizing import Normalizer as NormalizerBaseClass
     from nomad.parsing import Parser as ParserBaseClass
-    from fastapi import FastAPI
 
 
 class EntryPoint(BaseModel):
@@ -762,8 +764,8 @@ def add_plugin(plugin: Schema) -> None:
 def remove_plugin(plugin) -> None:
     """Function for removing a plugin."""
     from nomad.config import config
-    from nomad.metainfo.elasticsearch_extension import entry_type
     from nomad.metainfo import Package
+    from nomad.metainfo.elasticsearch_extension import entry_type
 
     # Remove from path
     try:
diff --git a/nomad/config/models/ui.py b/nomad/config/models/ui.py
index 88845aba8eaff855fdb7e5af268f9532a0637597..ed171ebd81925128e2e02dcf960a5d2483f900f3 100644
--- a/nomad/config/models/ui.py
+++ b/nomad/config/models/ui.py
@@ -17,18 +17,17 @@
 #
 
 from enum import Enum
-from typing import List, Dict, Union, Optional
-from typing import Literal
-from typing import Annotated
-from pydantic import BaseModel, ConfigDict, model_validator, Field
+from typing import Annotated, Literal, Union
+
+from pydantic import BaseModel, ConfigDict, Field, model_validator
 
 from .common import (
     ConfigBaseModel,
     Options,
-    OptionsSingle,
-    OptionsMulti,
-    OptionsGlob,
     OptionsBase,
+    OptionsGlob,
+    OptionsMulti,
+    OptionsSingle,
 )
 
 
@@ -116,9 +115,10 @@ class UnitSystem(ConfigBaseModel):
             values = values.model_dump(exclude_none=True)
         """Adds SI defaults for dimensions that are missing a unit."""
         units = values.get('units', {})
-        from nomad.units import ureg
         from pint import UndefinedUnitError
 
+        from nomad.units import ureg
+
         # Check that only supported dimensions and units are used
         for key in units.keys():
             if key not in dimensions:
diff --git a/nomad/datamodel/context.py b/nomad/datamodel/context.py
index dca4c701c1bd5bd8615b059889505c7021f4682f..3c4529c903c1b6107159863d0976416c76390676 100644
--- a/nomad/datamodel/context.py
+++ b/nomad/datamodel/context.py
@@ -16,26 +16,19 @@
 # limitations under the License.
 #
 
-from typing import Dict, Any
-from urllib.parse import urlsplit, urlunsplit
-import re
 import os.path
+import re
+from urllib.parse import urlsplit, urlunsplit
 
-import h5py
 import requests
 
 from nomad import utils
 from nomad.config import config
-from nomad.datamodel.util import parse_path
-from nomad.datamodel.datamodel import EntryMetadata
-from nomad.metainfo import (
-    Context as MetainfoContext,
-    MSection,
-    Quantity,
-    MetainfoReferenceError,
-    Package,
-)
 from nomad.datamodel import EntryArchive
+from nomad.datamodel.datamodel import EntryMetadata
+from nomad.datamodel.util import parse_path
+from nomad.metainfo import Context as MetainfoContext
+from nomad.metainfo import MetainfoReferenceError, MSection, Package, Quantity
 
 
 class Context(MetainfoContext):
diff --git a/nomad/datamodel/data.py b/nomad/datamodel/data.py
index 36b55b2d65bd0802c0921e2c1292e4a238e15e02..ca029f896327fc8748978d8c9380f74d5ab4a41e 100644
--- a/nomad/datamodel/data.py
+++ b/nomad/datamodel/data.py
@@ -17,24 +17,23 @@
 #
 
 import os.path
+from typing import Any
 
 from cachetools import TTLCache, cached
-
-from typing import Dict, Any, Optional
 from pydantic import Field
 
 from nomad.config import config
 from nomad.metainfo.elasticsearch_extension import Elasticsearch, material_entry_type
 from nomad.metainfo.metainfo import (
+    JSON,
+    Capitalized,
     Category,
+    Datetime,
     MCategory,
     MSection,
     Quantity,
-    Capitalized,
-    Section,
-    Datetime,
     Reference,
-    JSON,
+    Section,
 )
 from nomad.metainfo.pydantic_extension import PydanticModel
 
@@ -96,8 +95,8 @@ class EntryData(ArchiveSection):
     def normalize(self, archive, logger):
         super().normalize(archive, logger)
 
-        from nomad.datamodel.results import Results
         from nomad.datamodel import EntryArchive
+        from nomad.datamodel.results import Results
 
         # TODO entry_type should only be assigned if not already defined (done to pass eln test)
         if archive.metadata:
diff --git a/nomad/datamodel/datamodel.py b/nomad/datamodel/datamodel.py
index 2bf32ebd83c6091725c8048090d19d636020f335..3b048eec5182b6980d3b40a9dabe92e5194a4d80 100644
--- a/nomad/datamodel/datamodel.py
+++ b/nomad/datamodel/datamodel.py
@@ -18,39 +18,40 @@
 
 """All generic entry metadata and related classes."""
 
-from typing import List, Any
-from enum import Enum
 import os.path
+from enum import Enum
+from typing import Any
 
 import rfc3161ng
 from elasticsearch_dsl import analyzer, tokenizer
 
 from nomad import utils
 from nomad.datamodel.metainfo.common import FastAccess
-from nomad.metainfo.mongoengine_extension import Mongo, MongoDocument
-from nomad.metainfo.pydantic_extension import PydanticModel
 from nomad.metainfo.elasticsearch_extension import (
     Elasticsearch,
-    material_entry_type,
-    entry_type as es_entry_type,
     create_searchable_quantity,
+    material_entry_type,
 )
-from .util import parse_path
+from nomad.metainfo.elasticsearch_extension import entry_type as es_entry_type
+from nomad.metainfo.mongoengine_extension import Mongo, MongoDocument
+from nomad.metainfo.pydantic_extension import PydanticModel
+
 from ..metainfo import (
+    JSON,
     Bytes,
-    Package,
+    Datetime,
     Definition,
-    MSection,
     MCategory,
+    MEnum,
+    MSection,
+    Package,
+    Quantity,
     Section,
     SubSection,
-    Quantity,
-    MEnum,
-    Datetime,
-    JSON,
 )
 from ..metainfo.data_type import m_str
 from ..metainfo.metainfo import Reference
+from .util import parse_path
 
 # This is usually defined automatically when the first metainfo definition is evaluated, but
 # due to the next imports requiring the m_package already, this would be too late.
@@ -723,6 +724,12 @@ class EntryMetadata(MSection):
         a_elasticsearch=Elasticsearch(),
     )
 
+    nomad_distro_commit_url = Quantity(
+        type=str,
+        description='The NOMAD distro commit url used for the last processing',
+        categories=[MongoEntryMetadata],
+        a_elasticsearch=Elasticsearch(),
+    )
     comment = Quantity(
         type=str,
         categories=[MongoEntryMetadata, EditableUserMetadata],
diff --git a/nomad/datamodel/hdf5.py b/nomad/datamodel/hdf5.py
index fa04a76f9465b26cfa6eb6f0271cd40e7f0cd396..daf56c68ad8fdd8de9750f7de76a07d8abff82f5 100644
--- a/nomad/datamodel/hdf5.py
+++ b/nomad/datamodel/hdf5.py
@@ -17,16 +17,16 @@
 #
 from __future__ import annotations
 
-from typing import Any
-import h5py
 import re
+from typing import Any
 
+import h5py
 import numpy as np
 import pint
 from h5py import File
 
-from nomad.metainfo.data_type import NonPrimitive
 from nomad.datamodel.metainfo.annotations import H5WebAnnotation
+from nomad.metainfo.data_type import NonPrimitive
 from nomad.utils import get_logger
 
 LOGGER = get_logger(__name__)
diff --git a/nomad/datamodel/metainfo/action.py b/nomad/datamodel/metainfo/action.py
index a4c73d3de524f16e174415d663477c1bbde37261..8f219878de54194a6c1eb9040efbd3a7a4f8e184 100644
--- a/nomad/datamodel/metainfo/action.py
+++ b/nomad/datamodel/metainfo/action.py
@@ -16,7 +16,7 @@
 # limitations under the License.
 #
 from nomad.datamodel.data import ArchiveSection
-from nomad.metainfo import Quantity, Package
+from nomad.metainfo import Package, Quantity
 
 m_package = Package()
 
diff --git a/nomad/datamodel/metainfo/annotations.py b/nomad/datamodel/metainfo/annotations.py
index 719b5e7c886f65e41e22d8f45f99852118b11aef..ed177b2541d635769a755fa10fc544f220765c29 100644
--- a/nomad/datamodel/metainfo/annotations.py
+++ b/nomad/datamodel/metainfo/annotations.py
@@ -16,18 +16,19 @@
 # limitations under the License.
 #
 
-from typing import List, Any, Union, Dict, Optional
-from enum import Enum
-from pydantic import field_validator, ConfigDict, Field, validator
 import re
+from enum import Enum
+from typing import Any
 
+from pydantic import ConfigDict, Field, field_validator, model_validator
 from pydantic.main import BaseModel
 
+from nomad.metainfo import AnnotationModel, Datetime, MEnum, Quantity, Reference
 from nomad.utils import strip
-from nomad.metainfo import AnnotationModel, MEnum, Datetime, Reference, Quantity
-from .plot import PlotlyError
-from ..data import Query
+
 from ...metainfo.data_type import Datatype
+from ..data import Query
+from .plot import PlotlyError
 
 
 class ELNComponentEnum(str, Enum):
@@ -1140,6 +1141,69 @@ class SchemaAnnotation(AnnotationModel):
     )
 
 
+class Mapper(BaseModel):
+    """
+    Specifications to map the contents from a source specified by mapper. If string,
+    will be a path to the data following the jmespath grammar
+    (see https://jmespath.org/specification.html) eg:
+
+        'length(.array.set.set)'
+
+    If additional transformation is required to the data before assignment, one can
+    provide a tuple of function name and list of paths to the source data. The data are
+    resolved then passed to the function which should be implemented in the parser
+    class method.
+
+        For example:
+
+        ('get_eigenvalues_energies',
+            [
+                '.array.set.set[].set[].r',
+                'length(.array.set.set)',
+                'length(.array.set.set[0].set)'
+            ]
+        )
+    """
+
+    mapper: str | tuple[str, list[str]] | tuple[str, list[str], dict[str, Any]] = Field(
+        '',
+        description="""Mapper from dictionary to archive property either as path"""
+        """ or Tuple of name of transformer function and list of paths to be resolved"""
+        """ as argument to the function.""",
+    )
+    remove: bool = Field(None, description="""Removes data from source.""")
+    cache: bool = Field(None, description="""Store value.""")
+    path_parser: str = Field(
+        'jmespath', description="""Name of the parser for paths."""
+    )
+    unit: str = Field(None, description="""Pint unit to be applied to value.""")
+    indices: str = Field(
+        None, description="""Name of function to evaluate indices to include in data"""
+    )
+    search: str = Field(None, description="""Path to search on value.""")
+
+
+class MappingAnnotation(AnnotationModel):
+    """
+    Annotation model used with mapping parser.
+
+    class MySection(MSection):
+
+        m_def = Section(a_mapping={'hdf5': {mapper: 'data'}})
+    """
+
+    @model_validator(mode='before')
+    def validate_mapper(cls, values):
+        for name, value in values.items():
+            if name in cls.model_fields:
+                continue
+            values[name] = Mapper.model_validate(value)
+        return values
+
+    class Config:
+        extra = 'allow'
+
+
 AnnotationModel.m_registry['eln'] = ELNAnnotation
 AnnotationModel.m_registry['browser'] = BrowserAnnotation
 AnnotationModel.m_registry['tabular_parser'] = TabularParserAnnotation
@@ -1148,3 +1212,4 @@ AnnotationModel.m_registry['hdf5'] = HDF5Annotation
 AnnotationModel.m_registry['plot'] = PlotAnnotation
 AnnotationModel.m_registry['h5web'] = H5WebAnnotation
 AnnotationModel.m_registry['schema'] = SchemaAnnotation
+AnnotationModel.m_registry['mapping'] = MappingAnnotation
diff --git a/nomad/datamodel/metainfo/basesections/v1.py b/nomad/datamodel/metainfo/basesections/v1.py
index 5604e971d4830f0ca9911bf7a3d27c01c8e3a6ec..eacde4ab5eab8fb1ba55d743ad57606d1ef5b009 100644
--- a/nomad/datamodel/metainfo/basesections/v1.py
+++ b/nomad/datamodel/metainfo/basesections/v1.py
@@ -20,8 +20,8 @@ import os
 import random
 import re
 import time
-from typing import TYPE_CHECKING, Dict, List
 from collections.abc import Iterable
+from typing import TYPE_CHECKING
 
 import h5py
 import numpy as np
@@ -29,22 +29,17 @@ import requests
 from ase.data import atomic_masses, atomic_numbers, chemical_symbols
 from unidecode import unidecode
 
-from nomad.metainfo import SchemaPackage
 from nomad.datamodel.metainfo.workflow import Link, Task, TaskReference, Workflow
+from nomad.metainfo import SchemaPackage
 from nomad.metainfo.data_type import m_str
 
 if TYPE_CHECKING:
-    from structlog.stdlib import (
-        BoundLogger,
-    )
+    from structlog.stdlib import BoundLogger
 
 from nomad import utils
 from nomad.atomutils import Formula
 from nomad.datamodel.data import ArchiveSection, EntryData
-from nomad.datamodel.metainfo.annotations import (
-    ELNAnnotation,
-    HDF5Annotation,
-)
+from nomad.datamodel.metainfo.annotations import ELNAnnotation, HDF5Annotation
 from nomad.datamodel.results import ELN, Material, Results
 from nomad.datamodel.results import ElementalComposition as ResultsElementalComposition
 from nomad.datamodel.util import create_custom_mapping
@@ -233,12 +228,12 @@ class BaseSection(ArchiveSection):
 
     def normalize(self, archive, logger: 'BoundLogger') -> None:
         """
-        The normalizer for the `BaseSection` class.
-
-        Args:
-            archive (EntryArchive): The archive containing the section that is being
-            normalized.
-            logger ('BoundLogger'): A structlog logger.
+        - If the instance is of type `EntryData`, it sets the archive's entry name based on the instance's name.
+        - Sets the `datetime` field to the current time if it is not already set.
+        - Manages the `lab_id` field and updates the archive's `results.eln.lab_ids` list.
+        - Adds the instance's `name` and `description` to the archive's `results.eln.names` and `results.eln.descriptions` lists, respectively.
+        - Handles the `tags` attribute, if present, and updates the archive's `results.eln.tags` list.
+        - Appends the section's name to the archive's `results.eln.sections` list.
         """
         super().normalize(archive, logger)
 
@@ -385,12 +380,8 @@ class Activity(BaseSection):
 
     def normalize(self, archive, logger: 'BoundLogger') -> None:
         """
-        The normalizer for the `Activity` class.
-
-        Args:
-            archive (EntryArchive): The archive containing the section that is being
-            normalized.
-            logger ('BoundLogger'): A structlog logger.
+        - Ensures the `results.eln.methods` list is initialized and appends the method or section name.
+        - Converts each step in `self.steps` to a task, using the steps `to_task()` method, and assigns it to `archive.workflow2.tasks`.
         """
         super().normalize(archive, logger)
 
@@ -450,13 +441,7 @@ class EntityReference(SectionReference):
 
     def normalize(self, archive, logger: 'BoundLogger') -> None:
         """
-        The normalizer for the `EntityReference` class.
         Will attempt to fill the `reference` from the `lab_id` or vice versa.
-
-        Args:
-            archive (EntryArchive): The archive containing the section that is being
-            normalized.
-            logger ('BoundLogger'): A structlog logger.
         """
         super().normalize(archive, logger)
         if self.reference is None and self.lab_id is not None:
@@ -513,15 +498,9 @@ class ExperimentStep(ActivityStep):
 
     def normalize(self, archive, logger: 'BoundLogger') -> None:
         """
-        The normalizer for the `ExperimentStep` class.
         Will attempt to fill the `activity` from the `lab_id` or vice versa.
         If the activity reference is filled but the start time is not the time will be
         taken from the `datetime` property of the referenced activity.
-
-        Args:
-            archive (EntryArchive): The archive containing the section that is being
-            normalized.
-            logger ('BoundLogger'): A structlog logger.
         """
         super().normalize(archive, logger)
         if self.activity is None and self.lab_id is not None:
@@ -618,17 +597,12 @@ class ElementalComposition(ArchiveSection):
 
     def normalize(self, archive, logger: 'BoundLogger') -> None:
         """
-        The normalizer for the `ElementalComposition` class. Will add a
+        Will add a
         results.material subsection if none exists. Will append the element to the
         elements property of that subsection and a
         nomad.datamodel.results.ElementalComposition instances to the
         elemental_composition property  using the element and atomic fraction from this
         section.
-
-        Args:
-            archive (EntryArchive): The archive containing the section that is being
-            normalized.
-            logger ('BoundLogger'): A structlog logger.
         """
         super().normalize(archive, logger)
 
@@ -740,13 +714,8 @@ class System(Entity):
 
     def normalize(self, archive, logger: 'BoundLogger') -> None:
         """
-        The normalizer for the `System` class. Will attempt to fill mass fractions or
+        Will attempt to fill mass fractions or
         atomic fractions if left blank.
-
-        Args:
-            archive (EntryArchive): The archive containing the section that is being
-            normalized.
-            logger ('BoundLogger'): A structlog logger.
         """
         super().normalize(archive, logger)
 
@@ -761,12 +730,7 @@ class Instrument(Entity):
 
     def normalize(self, archive, logger: 'BoundLogger') -> None:
         """
-        The normalizer for the `Instrument` class.
-
-        Args:
-            archive (EntryArchive): The archive containing the section that is being
-            normalized.
-            logger ('BoundLogger'): A structlog logger.
+        Adds the name of the instrument to the `results.eln.instruments` list.
         """
         super().normalize(archive, logger)
 
@@ -827,14 +791,9 @@ class SystemComponent(Component):
 
     def normalize(self, archive, logger: 'BoundLogger') -> None:
         """
-        The normalizer for the `SystemComponent` class. If none is set, the normalizer
+        If none is set, the normalizer
         will set the name of the component to be that of the referenced system if it has
         one.
-
-        Args:
-            archive (EntryArchive): The archive containing the section that is being
-            normalized.
-            logger ('BoundLogger'): A structlog logger.
         """
         super().normalize(archive, logger)
         if self.name is None and self.system is not None:
@@ -950,14 +909,9 @@ class PureSubstanceComponent(Component):
 
     def normalize(self, archive, logger: 'BoundLogger') -> None:
         """
-        The normalizer for the `PureSubstanceComponent` class. If none is set, the
+        If none is set, the
         normalizer will set the name of the component to be the molecular formula of the
         substance.
-
-        Args:
-            archive (EntryArchive): The archive containing the section that is being
-            normalized.
-            logger ('BoundLogger'): A structlog logger.
         """
         super().normalize(archive, logger)
         if self.substance_name and self.pure_substance is None:
@@ -1076,18 +1030,13 @@ class CompositeSystem(System):
 
     def normalize(self, archive, logger: 'BoundLogger') -> None:
         """
-        The normalizer for the `CompositeSystem` class. If the elemental composition list is
+        If the elemental composition list is
         empty, the normalizer will iterate over the components and extract all the
         elements for populating the elemental composition list. If masses are provided for
         all components and the elemental composition of all components contain atomic
         fractions the normalizer will also calculate the atomic fractions for the
         composite system. The populated elemental composition list is added to the results
         by the normalizer in the `System` super class.
-
-        Args:
-            archive (EntryArchive): The archive containing the section that is being
-            normalized.
-            logger ('BoundLogger'): A structlog logger.
         """
         if logger is None:
             logger = utils.get_logger(__name__)
@@ -1229,12 +1178,9 @@ class Process(Activity):
 
     def normalize(self, archive, logger: 'BoundLogger') -> None:
         """
-        The normalizer for the `Process` class.
-
-        Args:
-            archive (EntryArchive): The archive containing the section that is being
-            normalized.
-            logger ('BoundLogger'): A structlog logger.
+        - Sets the start time for each step in `self.steps` if not already set, based on the `datetime` and `duration` fields.
+        - Sets the `end_time` field to the calculated end time if it is not already set.
+        - Updates the `archive.workflow2.outputs` list with links to the samples processed.
         """
         super().normalize(archive, logger)
         if (
@@ -1297,12 +1243,8 @@ class Analysis(Activity):
 
     def normalize(self, archive, logger: 'BoundLogger') -> None:
         """
-        The normalizer for the `Analysis` section.
-
-        Args:
-            archive (EntryArchive): The archive containing the section that is being
-            normalized.
-            logger ('BoundLogger'): A structlog logger.
+        - Updates the `archive.workflow2.inputs` list with links to the input data.
+        - Updates the `archive.workflow2.outputs` list with links to the output data.
         """
         super().normalize(archive, logger)
         archive.workflow2.inputs = [
@@ -1364,12 +1306,8 @@ class Measurement(Activity):
 
     def normalize(self, archive, logger: 'BoundLogger') -> None:
         """
-        The normalizer for the `Measurement` section.
-
-        Args:
-            archive (EntryArchive): The archive containing the section that is being
-            normalized.
-            logger ('BoundLogger'): A structlog logger.
+        - Updates the `archive.workflow2.inputs` list with links to the input samples.
+        - Updates the `archive.workflow2.outputs` list with links to the measurement results.
         """
         super().normalize(archive, logger)
         archive.workflow2.inputs = [
@@ -1420,13 +1358,8 @@ class PureSubstance(System):
 
     def normalize(self, archive, logger: 'BoundLogger') -> None:
         """
-        The normalizer method for the `Substance` class.
         This method will populate the results.material section and the elemental
         composition sub section using the molecular formula.
-
-        Args:
-            archive (EntryArchive): The archive that is being normalized.
-            logger ('BoundLogger'): A structlog logger.
         """
         super().normalize(archive, logger)
         if logger is None:
@@ -1625,11 +1558,11 @@ class PubChemPureSubstanceSection(PureSubstanceSection):
 
     def normalize(self, archive, logger: 'BoundLogger') -> None:
         """
-        The normalizer method for the `PubChemSubstanceSection` class.
         This method will attempt to get data on the substance instance from the PubChem
         PUG REST API: https://pubchem.ncbi.nlm.nih.gov/docs/pug-rest
         If a PubChem CID is specified the details are retrieved directly.
         Otherwise a search query is made for the filled attributes in the following order:
+
         1. `smile`
         2. `canonical_smile`
         3. `inchi_key`
@@ -1637,10 +1570,6 @@ class PubChemPureSubstanceSection(PureSubstanceSection):
         5. `name`
         6. `molecular_formula`
         7. `cas_number`
-
-        Args:
-            archive (EntryArchive): The archive that is being normalized.
-            logger ('BoundLogger'): A structlog logger.
         """
         if logger is None:
             logger = utils.get_logger(__name__)
@@ -1881,7 +1810,6 @@ class CASPureSubstanceSection(PureSubstanceSection):
 
     def normalize(self, archive, logger: 'BoundLogger') -> None:
         """
-        The normalizer method for the `CASPureSubstanceSection` class.
         This method will attempt to get data on the pure substance instance from the CAS
         API: https://commonchemistry.cas.org/api-overview
         If a CAS number is specified the details are retrieved directly.
@@ -1893,10 +1821,6 @@ class CASPureSubstanceSection(PureSubstanceSection):
         4. `smile`
         5. `canonical_smile`
         6. `name`
-
-        Args:
-            archive (EntryArchive): The archive that is being normalized.
-            logger ('BoundLogger'): A structlog logger.
         """
         if logger is None:
             logger = utils.get_logger(__name__)
@@ -1968,23 +1892,19 @@ class ReadableIdentifiers(ArchiveSection):
 
     def normalize(self, archive, logger: 'BoundLogger') -> None:
         """
-        The normalizer for the `ReadableIdentifiers` class.
         If owner is not filled the field will be filled by the first two letters of
         the first name joined with the first two letters of the last name of the author.
         If the institute is not filled a institute abreviations will be constructed from
         the author's affiliation.
+
         If no datetime is filled, the datetime will be taken from the `datetime`
         property of the parent, if it exists, otherwise the current date and time will be
         used.
+
         If no short name is filled, the name will be taken from the parent name, if it
         exists, otherwise it will be taken from the archive metadata entry name, if it
         exists, and finally if no other options are available it will use the name of the
         mainfile.
-
-        Args:
-            archive (EntryArchive): The archive containing the section that is being
-            normalized.
-            logger ('BoundLogger'): A structlog logger.
         """
         super().normalize(archive, logger)
 
@@ -2106,16 +2026,15 @@ class PublicationReference(ArchiveSection):
 
     def normalize(self, archive, logger: 'BoundLogger') -> None:
         """
-        The normalizer for the `PublicationReference` class.
-
-        Args:
-            archive (EntryArchive): The archive containing the section that is being
-            normalized.
-            logger ('BoundLogger'): A structlog logger.
+        - If a DOI number is provided, retrieves publication details from the CrossRef API.
+        - Populates the `publication_authors`, `journal`, `publication_title`, and `publication_date` fields based on the CrossRef response.
+        - Ensures the DOI number has the prefix `https://doi.org/`.
+        - Updates the archive's metadata references with the DOI number if it is not already present.
         """
         super().normalize(archive, logger)
         import dateutil.parser
         import requests
+
         from nomad.datamodel.datamodel import EntryMetadata
 
         # Parse journal name, lead author and publication date from crossref
diff --git a/nomad/datamodel/metainfo/basesections/v2.py b/nomad/datamodel/metainfo/basesections/v2.py
index b2b442bbb7fcc6fe4d48dd55e61ad9a30eaf7222..1ad7e401f79d68d49a8ba972424a624f9e427b15 100644
--- a/nomad/datamodel/metainfo/basesections/v2.py
+++ b/nomad/datamodel/metainfo/basesections/v2.py
@@ -15,72 +15,39 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
+import datetime
 import os
-from typing import TYPE_CHECKING
-from collections.abc import Iterable
 import random
-import time
-import datetime
 import re
-from typing import (
-    Dict,
-    List,
-)
+import time
+from collections.abc import Iterable
+from typing import TYPE_CHECKING
 
-from unidecode import unidecode
-import numpy as np
 import h5py
-from ase.data import (
-    chemical_symbols,
-    atomic_numbers,
-    atomic_masses,
-)
+import numpy as np
 import requests
+from ase.data import atomic_masses, atomic_numbers, chemical_symbols
+from unidecode import unidecode
 
-from nomad.datamodel.metainfo.workflow import Link, Task, TaskReference, Workflow
+from nomad.datamodel.metainfo.workflow import Link, Task, Workflow
 from nomad.metainfo.data_type import m_str
 
 if TYPE_CHECKING:
-    from structlog.stdlib import (
-        BoundLogger,
-    )
-from nomad.atomutils import (
-    Formula,
-)
-from nomad import (
-    utils,
-)
-from nomad.units import (
-    ureg,
-)
-from nomad.metainfo import (
-    Quantity,
-    Datetime,
-    Reference,
-    Section,
-    SectionProxy,
-    SubSection,
-)
-from nomad.metainfo.util import MEnum
-from nomad.datamodel.util import create_custom_mapping
-from nomad.datamodel.data import (
-    ArchiveSection,
-    EntryData,
-)
-from nomad.datamodel.results import (
-    Results,
-    ELN,
-    ElementalComposition as ResultsElementalComposition,
-    Material,
-)
+    from structlog.stdlib import BoundLogger
+from nomad import utils
+from nomad.datamodel.data import ArchiveSection
 from nomad.datamodel.metainfo.annotations import (
     ELNAnnotation,
-    ELNComponentEnum,
     Filter,
-    SectionProperties,
     HDF5Annotation,
+    SectionProperties,
 )
-
+from nomad.datamodel.results import ELN, Material, Results
+from nomad.datamodel.results import ElementalComposition as ResultsElementalComposition
+from nomad.datamodel.util import create_custom_mapping
+from nomad.metainfo import Datetime, Quantity, Section, SectionProxy, SubSection
+from nomad.metainfo.util import MEnum
+from nomad.units import ureg
 
 PUB_CHEM_PUG_PATH = 'https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound'
 CAS_API_PATH = 'https://commonchemistry.cas.org/api'
@@ -291,7 +258,7 @@ class ActivityStep(ArchiveSection):
         """,
         a_eln=ELNAnnotation(component='DateTimeEditQuantity', label='starting time'),
     )
-    comment = Quantity(
+    description = Quantity(
         type=str,
         description="""
         Any additional information about the step not captured by the other fields.
@@ -418,7 +385,7 @@ class EntityReference(SectionReference):
         """
         super().normalize(archive, logger)
         if self.reference is None and self.lab_id is not None:
-            from nomad.search import search, MetadataPagination
+            from nomad.search import MetadataPagination, search
 
             query = {'results.eln.lab_ids': self.lab_id}
             search_result = search(
@@ -449,6 +416,16 @@ class ExperimentStep(ActivityStep):
     Any dependant step of an `Experiment`.
     """
 
+    lab_id = Quantity(
+        type=str,
+        description="""
+        The readable identifier for the activity.
+        """,
+        a_eln=ELNAnnotation(
+            component='StringEditQuantity',
+            label='activity ID',
+        ),
+    )
     activity = Quantity(
         type=Activity,
         description="""
@@ -458,66 +435,48 @@ class ExperimentStep(ActivityStep):
             component='ReferenceEditQuantity',
         ),
     )
-    lab_id = Quantity(
-        type=str,
+
+
+class NestedExperimentStep(ExperimentStep):
+    """
+    A step of an Experiment.
+
+    This class is a wrapper for the `Activity` class and is used to describe
+    the metadata of an activity when it is a step of another, larger, experiment.
+
+    The `Activity` class instance can be instantiated in the `activity` property
+    as a nested subsection.
+
+    A normalizer will create a link in the activity property inherited from
+    the ExperimentStep class.
+
+    """
+
+    m_def = Section(
+        a_eln=ELNAnnotation(
+            properties=SectionProperties(
+                visible=Filter(
+                    exclude=[
+                        'activity',
+                    ],
+                ),
+            )
+        )
+    )
+
+    nested_activity = SubSection(
+        section_def=Activity,
         description="""
-        The readable identifier for the activity.
+        Section describing the activity that is the step on an experiment.
         """,
-        a_eln=ELNAnnotation(
-            component='StringEditQuantity',
-            label='activity ID',
-        ),
+        label='activity',
     )
 
     def normalize(self, archive, logger: 'BoundLogger') -> None:
-        """
-        The normalizer for the `ExperimentStep` class.
-        Will attempt to fill the `activity` from the `lab_id` or vice versa.
-        If the activity reference is filled but the start time is not the time will be
-        taken from the `datetime` property of the referenced activity.
-
-        Args:
-            archive (EntryArchive): The archive containing the section that is being
-            normalized.
-            logger ('BoundLogger'): A structlog logger.
-        """
         super().normalize(archive, logger)
-        if self.activity is None and self.lab_id is not None:
-            from nomad.search import search, MetadataPagination
-
-            query = {'results.eln.lab_ids': self.lab_id}
-            search_result = search(
-                owner='all',
-                query=query,
-                pagination=MetadataPagination(page_size=1),
-                user_id=archive.metadata.main_author.user_id,
-            )
-            if search_result.pagination.total > 0:
-                entry_id = search_result.data[0]['entry_id']
-                upload_id = search_result.data[0]['upload_id']
-                self.activity = f'../uploads/{upload_id}/archive/{entry_id}#data'
-                if search_result.pagination.total > 1:
-                    logger.warn(
-                        f'Found {search_result.pagination.total} entries with lab_id: '
-                        f'"{self.lab_id}". Will use the first one found.'
-                    )
-            else:
-                logger.warn(f'Found no entries with lab_id: "{self.lab_id}".')
-        elif self.lab_id is None and self.activity is not None:
-            self.lab_id = self.activity.lab_id
-        if self.name is None and self.lab_id is not None:
-            self.name = self.lab_id
-        if (
-            self.activity is not None
-            and self.start_time is None
-            and self.activity.datetime
-        ):
-            self.start_time = self.activity.datetime
 
-    def to_task(self) -> Task:
-        if self.activity is None:
-            return Task(name=self.name)
-        return TaskReference(task=self.activity.m_parent.workflow2)
+        if self.nested_activity:
+            self.activity = self.nested_activity
 
 
 class Experiment(Activity):
@@ -525,8 +484,13 @@ class Experiment(Activity):
     A section for grouping activities together into an experiment.
     """
 
-    steps = Activity.steps.m_copy()
-    steps.section_def = ExperimentStep
+    steps = SubSection(
+        section_def=ExperimentStep,
+        description="""
+        An ordered list of all the dependant steps that make up this experiment.
+        """,
+        repeats=True,
+    )
 
 
 class Collection(Entity):
@@ -1608,10 +1572,11 @@ class PublicationReference(ArchiveSection):
             logger ('BoundLogger'): A structlog logger.
         """
         super().normalize(archive, logger)
-        from nomad.datamodel.datamodel import EntryMetadata
         import dateutil.parser
         import requests
 
+        from nomad.datamodel.datamodel import EntryMetadata
+
         # Parse journal name, lead author and publication date from crossref
         if self.DOI_number:
             try:
diff --git a/nomad/datamodel/metainfo/common.py b/nomad/datamodel/metainfo/common.py
index 14bc1024ec90ec024b8bb863729254b268c6dbfa..beeb755d6d910afe3250dbe525b763c6ed54a4b2 100644
--- a/nomad/datamodel/metainfo/common.py
+++ b/nomad/datamodel/metainfo/common.py
@@ -16,8 +16,7 @@
 # limitations under the License.
 #
 
-from nomad.metainfo import MCategory, Category
-from nomad.metainfo import MSection, Section, SubSection, Quantity
+from nomad.metainfo import Category, MCategory, MSection, Quantity, Section, SubSection
 from nomad.metainfo.elasticsearch_extension import Elasticsearch, material_entry_type
 
 
diff --git a/nomad/datamodel/metainfo/downloads.py b/nomad/datamodel/metainfo/downloads.py
index 9ff64389d79966909ddf158a38a2cc249d61f781..8ada69e13bc3735549b3955517b4a5ccd762af1e 100644
--- a/nomad/datamodel/metainfo/downloads.py
+++ b/nomad/datamodel/metainfo/downloads.py
@@ -18,10 +18,8 @@
 
 import os.path
 
-from nomad.metainfo import MSection, Package, Quantity, SubSection
-
 from nomad.datamodel.data import ArchiveSection
-
+from nomad.metainfo import MSection, Package, Quantity, SubSection
 
 m_package = Package(name='downloads')
 
@@ -137,7 +135,8 @@ class Downloads(ArchiveSection):
 
         import pathlib
         import urllib.request
-        from nomad.common import get_compression_format, extract_file
+
+        from nomad.common import extract_file, get_compression_format
 
         # download and extract files
         skip_download = True
diff --git a/nomad/datamodel/metainfo/eln/__init__.py b/nomad/datamodel/metainfo/eln/__init__.py
index 319780330ac46b02638b985932d49f33e2b26f12..e9faca60422ce4a611331e40ec44327e5dccefbd 100644
--- a/nomad/datamodel/metainfo/eln/__init__.py
+++ b/nomad/datamodel/metainfo/eln/__init__.py
@@ -18,7 +18,7 @@
 
 import datetime
 import re
-from typing import TYPE_CHECKING, Any, Dict, List
+from typing import TYPE_CHECKING, Any
 
 import numpy as np
 from unidecode import unidecode
diff --git a/nomad/datamodel/metainfo/measurements.py b/nomad/datamodel/metainfo/measurements.py
index c10787470caf5c12a1fde04e29fd44a6daa80698..a6bb1fca5cf666c4c9699d8b15d3ec1228248011 100644
--- a/nomad/datamodel/metainfo/measurements.py
+++ b/nomad/datamodel/metainfo/measurements.py
@@ -18,10 +18,9 @@
 
 import numpy as np
 
-from nomad.metainfo import MSection, Package, Quantity, SubSection, Datetime
-from nomad.metainfo.metainfo import Reference, SectionProxy
 from nomad.datamodel import Author
-
+from nomad.metainfo import Datetime, MSection, Package, Quantity, SubSection
+from nomad.metainfo.metainfo import Reference, SectionProxy
 
 m_package = Package(name='measurements')
 
diff --git a/nomad/datamodel/metainfo/plot.py b/nomad/datamodel/metainfo/plot.py
index 3618263f8e78b5f19f434f6af17791de836de6a4..caaf30037f341d1d9038009d2d75094d12a85b2a 100644
--- a/nomad/datamodel/metainfo/plot.py
+++ b/nomad/datamodel/metainfo/plot.py
@@ -15,14 +15,16 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-from nomad.datamodel.data import ArchiveSection
-from nomad.metainfo import Quantity, SubSection, Package, MSection, JSON, Section
+from copy import deepcopy
+from datetime import datetime
+
+import numpy as np
 import plotly.express as px
 import plotly.graph_objs as go
 from plotly.subplots import make_subplots
-import numpy as np
-from copy import deepcopy
-from datetime import datetime
+
+from nomad.datamodel.data import ArchiveSection
+from nomad.metainfo import JSON, MSection, Package, Quantity, Section, SubSection
 
 
 class PlotlyError(Exception):
diff --git a/nomad/datamodel/metainfo/simulation/calculation.py b/nomad/datamodel/metainfo/simulation/calculation.py
index 5f017bf063cd11eed29826612a37d24838c4019a..a4ecc9095459db3682c480b3cfcd6b9693620f14 100644
--- a/nomad/datamodel/metainfo/simulation/calculation.py
+++ b/nomad/datamodel/metainfo/simulation/calculation.py
@@ -20,28 +20,28 @@
 # Only for purpose of compatibility. Use run schema plugin.
 # https://github.com/nomad-coe/nomad-schema-plugin-run.git
 
-import numpy as np  # pylint: disable=unused-import
-from nomad.metainfo import (  # pylint: disable=unused-import
-    MSection,
-    MCategory,
+import numpy as np  # noqa: F401
+
+from nomad.datamodel.data import ArchiveSection
+from nomad.datamodel.metainfo.common import PropertySection, ProvenanceTracker
+from nomad.datamodel.metainfo.simulation.method import HoppingMatrix, Method
+from nomad.datamodel.metainfo.simulation.system import AtomsGroup, System
+from nomad.metainfo import (  # noqa: F401
     Category,
+    MCategory,
+    MEnum,
+    MSection,
     Package,
     Quantity,
+    Reference,
     Section,
-    SubSection,
     SectionProxy,
-    Reference,
-    MEnum,
+    SubSection,
     derived,
 )
-from nomad.datamodel.metainfo.common import ProvenanceTracker, PropertySection
-from nomad.datamodel.metainfo.simulation.system import System, AtomsGroup
-from nomad.datamodel.metainfo.simulation.method import Method, HoppingMatrix
-from nomad.datamodel.data import ArchiveSection
 
 from ..common import FastAccess
 
-
 m_package = Package()
 
 
diff --git a/nomad/datamodel/metainfo/simulation/legacy_workflows.py b/nomad/datamodel/metainfo/simulation/legacy_workflows.py
index 52a62e4a1f4833c630a4ef2202e7a739863088ac..7a4eef3e5d887f275b911570155e4d03d2b81b4c 100644
--- a/nomad/datamodel/metainfo/simulation/legacy_workflows.py
+++ b/nomad/datamodel/metainfo/simulation/legacy_workflows.py
@@ -22,26 +22,26 @@
 
 import numpy as np
 from nptyping import NDArray
+
+from nomad.datamodel.metainfo.common import FastAccess
+from nomad.datamodel.metainfo.simulation.calculation import (
+    BandStructure,
+    Calculation,
+    Dos,
+)
+from nomad.datamodel.metainfo.simulation.run import Run
+from nomad.datamodel.metainfo.simulation.system import Atoms, AtomsGroup, System
 from nomad.metainfo import (
-    MSection,
     MEnum,
+    MSection,
+    Package,
     Quantity,
+    Reference,
     Section,
-    SubSection,
     SectionProxy,
-    Reference,
-    Package,
+    SubSection,
     derived,
 )
-from nomad.datamodel.metainfo.simulation.calculation import (
-    Calculation,
-    Dos,
-    BandStructure,
-)
-from nomad.datamodel.metainfo.simulation.run import Run
-from nomad.datamodel.metainfo.simulation.system import System, Atoms, AtomsGroup
-from nomad.datamodel.metainfo.common import FastAccess
-
 
 m_package = Package()
 
diff --git a/nomad/datamodel/metainfo/simulation/method.py b/nomad/datamodel/metainfo/simulation/method.py
index 3cb23695ca1cfda7c8d102f097550f9e697ffe5f..0d88668563fd6c5dcbd90c76a15f92d0e65381d9 100644
--- a/nomad/datamodel/metainfo/simulation/method.py
+++ b/nomad/datamodel/metainfo/simulation/method.py
@@ -20,28 +20,28 @@
 # Only for purpose of compatibility. Use run schema plugin.
 # https://github.com/nomad-coe/nomad-schema-plugin-run.git
 
-from logging import Logger
-import numpy as np  # pylint: disable=unused-import
 import typing
+from logging import Logger
+
+import numpy as np  # noqa: F401
+from pint.util import SharedRegistryObject  # noqa: F401
 
-from pint.util import SharedRegistryObject  # pylint: disable=unused-import
 from nomad.datamodel.data import ArchiveSection
-from nomad.metainfo import (  # pylint: disable=unused-import
-    MSection,
-    MCategory,
+from nomad.metainfo import (  # noqa: F401
     Category,
+    MCategory,
+    MEnum,
+    MSection,
     Package,
     Quantity,
+    Reference,
     Section,
-    SubSection,
     SectionProxy,
-    Reference,
-    MEnum,
+    SubSection,
 )
-from nomad.metainfo.metainfo import derived
 from nomad.quantum_states import RussellSaundersState
-from ..common import FastAccess
 
+from ..common import FastAccess
 
 m_package = Package()
 
diff --git a/nomad/datamodel/metainfo/simulation/run.py b/nomad/datamodel/metainfo/simulation/run.py
index 99dab5d713c0d0f2740b665e81c4d2fab70c14f6..2dd64bf26696ef6d4b7450071f435851a39a8eed 100644
--- a/nomad/datamodel/metainfo/simulation/run.py
+++ b/nomad/datamodel/metainfo/simulation/run.py
@@ -20,24 +20,24 @@
 # Only for purpose of compatibility. Use run schema plugin.
 # https://github.com/nomad-coe/nomad-schema-plugin-run.git
 
-import numpy as np  # pylint: disable=unused-import
+import numpy as np  # noqa: F401
 
-from nomad.metainfo import (  # pylint: disable=unused-import
-    MSection,
-    MCategory,
+from nomad.datamodel.data import ArchiveSection
+from nomad.datamodel.metainfo.common import FastAccess
+from nomad.datamodel.metainfo.simulation.calculation import Calculation
+from nomad.datamodel.metainfo.simulation.method import Method
+from nomad.datamodel.metainfo.simulation.system import System
+from nomad.metainfo import (  # noqa: F401
     Category,
+    MCategory,
+    MSection,
     Package,
     Quantity,
+    Reference,
     Section,
-    SubSection,
     SectionProxy,
-    Reference,
+    SubSection,
 )
-from nomad.datamodel.metainfo.simulation.method import Method
-from nomad.datamodel.metainfo.simulation.system import System
-from nomad.datamodel.metainfo.simulation.calculation import Calculation
-from nomad.datamodel.metainfo.common import FastAccess
-from nomad.datamodel.data import ArchiveSection
 
 m_package = Package()
 
diff --git a/nomad/datamodel/metainfo/simulation/system.py b/nomad/datamodel/metainfo/simulation/system.py
index e3bcc8d7862d1d51efe89a2957048467a34f53a6..c5645aebbf0f6aa5dd19826b6213f13a52f88019 100644
--- a/nomad/datamodel/metainfo/simulation/system.py
+++ b/nomad/datamodel/metainfo/simulation/system.py
@@ -20,26 +20,28 @@
 # Only for purpose of compatibility. Use run schema plugin.
 # https://github.com/nomad-coe/nomad-schema-plugin-run.git
 
-import numpy as np  # pylint: disable=unused-import
-import typing  # pylint: disable=unused-import
-from nomad.metainfo import (  # pylint: disable=unused-import
-    MSection,
-    MCategory,
+import typing  # noqa: F401
+
+import numpy as np  # noqa: F401
+
+from nomad.datamodel.data import ArchiveSection
+from nomad.metainfo import (  # noqa: F401
     Category,
+    MCategory,
+    MEnum,
+    MSection,
     Package,
     Quantity,
+    Reference,
     Section,
-    SubSection,
     SectionProxy,
-    Reference,
-    MEnum,
+    SubSection,
     derived,
 )
-from nomad.datamodel.data import ArchiveSection
 from nomad.metainfo.data_type import m_float64
+from nomad.units import ureg
 
 from ..common import FastAccess
-from nomad.units import ureg
 
 m_package = Package()
 
diff --git a/nomad/datamodel/metainfo/simulation/workflow.py b/nomad/datamodel/metainfo/simulation/workflow.py
index 36374441b67d139e489cc44509c4381fa312271f..aa77ddb2d01bccb48645324e4d9986c897058ba3 100644
--- a/nomad/datamodel/metainfo/simulation/workflow.py
+++ b/nomad/datamodel/metainfo/simulation/workflow.py
@@ -20,7 +20,6 @@
 # Only for purpose of compatibility. Use simulation workflow schema plugin.
 # https://github.com/nomad-coe/nomad-schema-plugin-simulation-workflow.git
 
-from typing import List
 import numpy as np
 from ase import Atoms
 from ase.eos import EquationOfState as aseEOS
@@ -28,45 +27,48 @@ from nptyping import NDArray
 
 from nomad.atomutils import get_volume
 from nomad.datamodel.data import ArchiveSection
-from nomad.units import ureg
-from nomad.metainfo import (
-    MSection,
-    SubSection,
-    Section,
-    Quantity,
-    MEnum,
-    Reference,
-    Package,
-    derived,
-)
 from nomad.datamodel.metainfo.common import FastAccess
-from nomad.datamodel.metainfo.workflow import Workflow, Link, Task
-from nomad.datamodel.metainfo.simulation.system import System, AtomsGroup
-from nomad.datamodel.metainfo.simulation.method import (
-    Method,
-    XCFunctional,
-    BasisSetContainer,
-    GW as GWMethodology,
-    TB as TBMethodology,
-    DMFT as DMFTMethodology,
-    BSE as BSEMethodology,
-)
 from nomad.datamodel.metainfo.simulation.calculation import (
-    Calculation,
+    BandEnergies,
     BandGap,
-    Dos,
     BandStructure,
-    BandEnergies,
+    Calculation,
     Density,
-    Potential,
-    Spectra,
+    Dos,
     ElectronicStructureProvenance,
+    EnergyEntry,
     GreensFunctions,
+    Potential,
+    Spectra,
+)
+from nomad.datamodel.metainfo.simulation.calculation import (
     RadiusOfGyration as RadiusOfGyrationCalculation,
+)
+from nomad.datamodel.metainfo.simulation.calculation import (
     RadiusOfGyrationValues as RadiusOfGyrationValuesCalculation,
-    EnergyEntry,
 )
-
+from nomad.datamodel.metainfo.simulation.method import BSE as BSEMethodology
+from nomad.datamodel.metainfo.simulation.method import DMFT as DMFTMethodology
+from nomad.datamodel.metainfo.simulation.method import GW as GWMethodology
+from nomad.datamodel.metainfo.simulation.method import TB as TBMethodology
+from nomad.datamodel.metainfo.simulation.method import (
+    BasisSetContainer,
+    Method,
+    XCFunctional,
+)
+from nomad.datamodel.metainfo.simulation.system import AtomsGroup, System
+from nomad.datamodel.metainfo.workflow import Link, Task, Workflow
+from nomad.metainfo import (
+    MEnum,
+    MSection,
+    Package,
+    Quantity,
+    Reference,
+    Section,
+    SubSection,
+    derived,
+)
+from nomad.units import ureg
 
 # TODO remove this after reprocessing with the new schema defined in
 # simulationworkflowschema plug in https://github.com/nomad-coe/nomad-schema-plugin-simulation-workflow.git
@@ -2203,11 +2205,11 @@ class MolecularDynamicsResults(ThermodynamicsResults):
         super().normalize(archive, logger)
 
         try:
-            from simulationworkflowschema.molecular_dynamics import archive_to_universe
             from simulationworkflowschema.molecular_dynamics import (
-                calc_molecular_rdf,
+                archive_to_universe,
                 calc_molecular_mean_squared_displacements,
                 calc_molecular_radius_of_gyration,
+                calc_molecular_rdf,
             )
 
             universe = archive_to_universe(archive)
diff --git a/nomad/datamodel/metainfo/system.py b/nomad/datamodel/metainfo/system.py
index bcff5a8172b802abbdf02991f982c954c32b47b2..05fce8c21c63a8dc2c831dde8a8647fef4c31ca0 100644
--- a/nomad/datamodel/metainfo/system.py
+++ b/nomad/datamodel/metainfo/system.py
@@ -15,11 +15,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-import numpy as np
 import ase
+import numpy as np
 
-from nomad.metainfo import Package, Quantity, Section, SubSection, SectionProxy
 from nomad.datamodel.data import ArchiveSection
+from nomad.metainfo import Package, Quantity, Section, SectionProxy, SubSection
 from nomad.units import ureg
 
 # TODO System should be redefined from base section
diff --git a/nomad/datamodel/metainfo/tabulartree.py b/nomad/datamodel/metainfo/tabulartree.py
index 806be1c2c6651d346cf82af29a3006dd3b66984f..496090623503842216ee0543243152407caa7f84 100644
--- a/nomad/datamodel/metainfo/tabulartree.py
+++ b/nomad/datamodel/metainfo/tabulartree.py
@@ -20,7 +20,6 @@ import typing
 
 from nomad.metainfo import MSection, Package, Quantity, SubSection
 
-
 m_package = Package(name='tabulartree')
 
 
diff --git a/nomad/datamodel/metainfo/workflow.py b/nomad/datamodel/metainfo/workflow.py
index a77f0af398ff271e2fae073b16230995a311bfd1..a1c3fe137a10327b3ec04aeb1ad514d736e9c8b1 100644
--- a/nomad/datamodel/metainfo/workflow.py
+++ b/nomad/datamodel/metainfo/workflow.py
@@ -16,9 +16,8 @@
 # limitations under the License.
 #
 
-from nomad.metainfo import Quantity, SubSection, Section
-
 from nomad.datamodel.data import ArchiveSection, EntryData, WorkflowsElnCategory
+from nomad.metainfo import Quantity, Section, SubSection
 
 
 class Link(ArchiveSection):
@@ -98,9 +97,18 @@ class TaskReference(Task):
 
     def normalize(self, archive, logger):
         super().normalize(archive, logger)
-        if not self.name and self.task:
+        if self.task is None:
+            return
+
+        if not self.name:
             self.name = self.task.name
 
+        # add task inputs/outputs to inputs/outputs
+        self.inputs.extend([inp for inp in self.task.inputs if inp not in self.inputs])
+        self.outputs.extend(
+            [out for out in self.task.outputs if out not in self.outputs]
+        )
+
 
 class Workflow(Task, EntryData):
     """
diff --git a/nomad/datamodel/optimade.py b/nomad/datamodel/optimade.py
index f72fa69cbf3a836fc54d7b3ab0af320716d91868..5dfea496884e1083320992269c16d073e97cdb3e 100644
--- a/nomad/datamodel/optimade.py
+++ b/nomad/datamodel/optimade.py
@@ -16,19 +16,19 @@
 # limitations under the License.
 #
 
-from ase.data import chemical_symbols
 import numpy as np
+from ase.data import chemical_symbols
 
-from nomad.units import ureg
 from nomad.metainfo import (
+    DefinitionAnnotation,
+    MEnum,
     MSection,
-    Section,
     Quantity,
+    Section,
     SubSection,
-    MEnum,
-    DefinitionAnnotation,
 )
 from nomad.metainfo.elasticsearch_extension import Elasticsearch
+from nomad.units import ureg
 
 
 def optimade_links(section: str):
diff --git a/nomad/datamodel/results.py b/nomad/datamodel/results.py
index 75c1e962798ff5f3357a217bb18d405d4b995f3e..2942b4bd44cfe49ccaeae5cae4dfd147b145d576 100644
--- a/nomad/datamodel/results.py
+++ b/nomad/datamodel/results.py
@@ -17,45 +17,41 @@
 #
 
 from logging import Logger
-from typing import List, Optional, TYPE_CHECKING
-import numpy as np
-from elasticsearch_dsl import Text
+from typing import TYPE_CHECKING
 
+import numpy as np
 from ase.data import chemical_symbols
+from elasticsearch_dsl import Text
 
-from nomad import utils
 from nomad.config import config
-from nomad.datamodel.metainfo.common import ProvenanceTracker, PropertySection
+from nomad.datamodel.metainfo.annotations import H5WebAnnotation
+from nomad.datamodel.metainfo.common import PropertySection, ProvenanceTracker
 from nomad.datamodel.metainfo.simulation.method import CoreHole as CoreHoleRun
-from nomad.metainfo.elasticsearch_extension import (
-    Elasticsearch,
-    material_type,
-    material_entry_type,
-    get_tokenizer,
-)
-
+from nomad.datamodel.optimade import Species as OptimadeSpecies  # noqa
 from nomad.metainfo import (
-    MSection,
-    Section,
-    SubSection,
-    Quantity,
+    Datetime,
     MEnum,
+    MSection,
     Package,
-    Datetime,
+    Quantity,
     Reference,
+    Section,
+    SubSection,
+)
+from nomad.metainfo.elasticsearch_extension import (
+    Elasticsearch,
+    get_tokenizer,
+    material_entry_type,
+    material_type,
 )
-from nomad.datamodel.metainfo.common import ProvenanceTracker, PropertySection
-from nomad.datamodel.optimade import Species as OptimadeSpecies  # noqa
-from nomad.datamodel.metainfo.annotations import H5WebAnnotation
 
 try:
     import runschema
 
     runschema.run_schema_entry_point.load()
-    import runschema.method
     import runschema.calculation
+    import runschema.method
     import runschema.system
-
     import simulationworkflowschema
 
     simulationworkflowschema.simulationworkflow_schema_entry_point.load()
diff --git a/nomad/datamodel/util.py b/nomad/datamodel/util.py
index 63ab9e0c481a61278f3dbee0373d5beb7626780e..60780338245abd42e1bfc44f3715804742c4a8f9 100644
--- a/nomad/datamodel/util.py
+++ b/nomad/datamodel/util.py
@@ -17,19 +17,19 @@
 #
 import math
 import re
-from typing import Any
 from collections.abc import Callable
+from typing import Any
 
 import numpy as np
 
 from nomad import utils
 from nomad.metainfo import (
-    Section,
     AnnotationModel,
+    MetainfoError,
     MSection,
-    SubSection,
     Property,
-    MetainfoError,
+    Section,
+    SubSection,
 )
 from nomad.units import ureg
 
diff --git a/nomad/doi.py b/nomad/doi.py
index a755e19c31e6059503dbef60b6d59340b954bbda..fe1df5f0492c2500fee31111c48572c19ab58f76 100644
--- a/nomad/doi.py
+++ b/nomad/doi.py
@@ -21,17 +21,18 @@ This module contains all functions necessary to manage DOI via datacite.org and
 MDS API (https://support.datacite.org/docs/mds-api-guide).
 """
 
-import xml.etree.ElementTree as ET
 import datetime
+import xml.etree.ElementTree as ET
+
 import requests
-from requests.auth import HTTPBasicAuth
-from mongoengine import Document, StringField, DateTimeField
+from fastapi import HTTPException
+from mongoengine import DateTimeField, Document, StringField
 from mongoengine.errors import NotUniqueError
+from requests.auth import HTTPBasicAuth
 
-from nomad.datamodel import User
-from nomad.config import config
 from nomad import utils
-from fastapi import HTTPException
+from nomad.config import config
+from nomad.datamodel import User
 
 
 class DOIException(Exception):
diff --git a/nomad/files.py b/nomad/files.py
index f9c98ecb86b644bf68119ba866f8b07806222848..d40bf0e1024af97eaaf1339d03d6c8021c39d542 100644
--- a/nomad/files.py
+++ b/nomad/files.py
@@ -45,43 +45,35 @@ original mainfile, and vice versa.
 
 from __future__ import annotations
 
-from abc import ABCMeta
-from typing import (
-    IO,
-    Set,
-    Dict,
-    List,
-    Tuple,
-    Any,
-    NamedTuple,
-)
-from collections.abc import Callable
-from collections.abc import Iterable, Iterator
-from pydantic import BaseModel
-from datetime import datetime
-import os.path
-import os
-import shutil
-import zipstream
 import hashlib
 import io
 import json
-import yaml
-import magic
+import os
+import os.path
+import shutil
+import tarfile  # noqa: F401
 import zipfile
-import tarfile
+from abc import ABCMeta
+from collections.abc import Callable, Iterable, Iterator
+from datetime import datetime
+from typing import IO, Any, NamedTuple
+
+import magic
+import yaml
+import zipstream
+from pydantic import BaseModel
 
-from nomad import utils, datamodel
+from nomad import datamodel, utils
+from nomad.archive import ArchiveReader, read_archive, to_json, write_archive
+from nomad.archive.storage_v2 import combine_archive
 from nomad.common import (
-    get_compression_format,
     extract_file,
+    get_compression_format,
     is_safe_basename,
     is_safe_relative_path,
 )
 from nomad.config import config
-from nomad.archive.storage_v2 import combine_archive
-from nomad.config.models.config import BundleImportSettings, BundleExportSettings
-from nomad.archive import write_archive, read_archive, ArchiveReader, to_json
+from nomad.config.models.config import BundleExportSettings, BundleImportSettings
 
 bundle_info_filename = 'bundle_info.json'
 
diff --git a/nomad/graph/graph_reader.py b/nomad/graph/graph_reader.py
index 63473f36cda60f050161a2ea53e53649ca6e10b1..24139f771409b93ca2c6d3ee3cd064f12790534d 100644
--- a/nomad/graph/graph_reader.py
+++ b/nomad/graph/graph_reader.py
@@ -24,11 +24,10 @@ import functools
 import itertools
 import os
 import re
-from collections.abc import AsyncIterator, Iterator
+from collections.abc import AsyncIterator, Callable, Iterator
 from contextlib import contextmanager
 from threading import Lock
-from typing import Any, Type, Union
-from collections.abc import Callable
+from typing import Any
 
 import orjson
 from cachetools import TTLCache
@@ -55,23 +54,10 @@ from nomad.app.v1.routers.uploads import (
     get_upload_with_read_access,
     upload_to_pydantic,
 )
-from nomad.archive import (
-    ArchiveDict,
-    ArchiveList,
-    to_json,
-)
-from nomad.archive.storage_v2 import (
-    ArchiveDict as ArchiveDictNew,
-)
-from nomad.archive.storage_v2 import (
-    ArchiveList as ArchiveListNew,
-)
-from nomad.datamodel import (
-    Dataset,
-    EntryArchive,
-    ServerContext,
-    User,
-)
+from nomad.archive import ArchiveDict, ArchiveList, to_json
+from nomad.archive.storage_v2 import ArchiveDict as ArchiveDictNew
+from nomad.archive.storage_v2 import ArchiveList as ArchiveListNew
+from nomad.datamodel import Dataset, EntryArchive, ServerContext, User
 from nomad.datamodel.util import parse_path
 from nomad.files import RawPathInfo, UploadFiles
 from nomad.graph.lazy_wrapper import (
@@ -102,16 +88,14 @@ from nomad.metainfo import (
     SectionReference,
     SubSection,
 )
-from nomad.metainfo.data_type import Any as AnyType
 from nomad.metainfo.data_type import JSON, Datatype
+from nomad.metainfo.data_type import Any as AnyType
 from nomad.metainfo.util import MSubSectionList, split_python_definition
 from nomad.processing import Entry, ProcessStatus, Upload
 from nomad.utils import timer
 
 logger = utils.get_logger(__name__)
 
-# bug when used in isinstance() with mypy
-# see https://github.com/python/mypy/issues/11673
 GenericList = list | ArchiveList | ArchiveListNew
 GenericDict = dict | ArchiveDict | ArchiveDictNew
 
diff --git a/nomad/graph/lazy_wrapper.py b/nomad/graph/lazy_wrapper.py
index 87b777b4833de13f0c7344454a94a0315cb25fa9..148a6c079de5944615cf1edbfd162360050392d4 100644
--- a/nomad/graph/lazy_wrapper.py
+++ b/nomad/graph/lazy_wrapper.py
@@ -26,6 +26,7 @@ Different wrappers are catered for different types of objects/operations.
 """
 
 from __future__ import annotations
+
 from functools import cached_property
 
 from nomad.datamodel import User
diff --git a/nomad/graph/model.py b/nomad/graph/model.py
index 0358df9756165728654fbdacb0a01ef764718a28..669d7f1276e4a8386e1aadea61583917473cc7ed 100644
--- a/nomad/graph/model.py
+++ b/nomad/graph/model.py
@@ -21,18 +21,19 @@ import functools
 import re
 from enum import Enum
 from hashlib import sha1
-from typing import Annotated, Optional, Union
+from typing import Annotated, Union
 
 from pydantic import (
     AfterValidator,
-    field_validator,
-    ConfigDict,
     BaseModel,
+    ConfigDict,
     Field,
     ValidationError,
+    field_validator,
 )
 
 from nomad.app.v1.models import Direction, Metadata, MetadataPagination, Pagination
+from nomad.app.v1.models.groups import UserGroupPagination, UserGroupQuery
 from nomad.app.v1.routers.datasets import DatasetPagination
 from nomad.app.v1.routers.uploads import (
     EntryProcDataPagination,
@@ -40,7 +41,6 @@ from nomad.app.v1.routers.uploads import (
     UploadProcDataPagination,
     UploadProcDataQuery,
 )
-from nomad.app.v1.models.groups import UserGroupQuery, UserGroupPagination
 
 
 class DatasetQuery(BaseModel):
diff --git a/nomad/groups.py b/nomad/groups.py
index c964fcc560877c56d050eeee6b05d9b1b86c938a..ee792271d02ddaa82ef8bef3226e8a9aae1c08a9 100644
--- a/nomad/groups.py
+++ b/nomad/groups.py
@@ -18,7 +18,6 @@
 
 from __future__ import annotations
 
-from typing import Optional, Union
 from collections.abc import Iterable
 
 from mongoengine import Document, ListField, Q, QuerySet, StringField
diff --git a/nomad/infrastructure.py b/nomad/infrastructure.py
index e5f458d0070e5408163b76f4aa2d5ee4856180bd..0d98646a160f725c7ce489d61a6d16ce8187ac95 100644
--- a/nomad/infrastructure.py
+++ b/nomad/infrastructure.py
@@ -23,33 +23,34 @@ is run once for each *api* and *worker* process. Individual functions for partia
 exist to facilitate testing, aspects of :py:mod:`nomad.cli`, etc.
 """
 
-import os.path
+import json
 import os
+import os.path
+import re
 import shutil
-from elasticsearch_dsl import connections
-from mongoengine import connect, disconnect
-from mongoengine.connection import ConnectionFailure
 import smtplib
+
+# TODO put somemore thought into warnings
+import warnings
+from datetime import datetime
 from email.mime.text import MIMEText
-from keycloak import KeycloakOpenID, KeycloakAdmin
-from keycloak.exceptions import KeycloakAuthenticationError, KeycloakGetError
-import json
+
 import jwt
-from datetime import datetime
-import re
 import unidecode
+from elasticsearch_dsl import connections
+from keycloak import KeycloakAdmin, KeycloakOpenID
+from keycloak.exceptions import KeycloakAuthenticationError, KeycloakGetError
+from mongoengine import connect, disconnect
+from mongoengine.connection import ConnectionFailure
 
 from nomad import utils
 from nomad.config import config
-from nomad.utils.structlogging import get_logger
 
 # The metainfo is defined and used during imports. This is problematic.
 # We import all parsers very early in the infrastructure setup. This will populate
 # the metainfo with parser specific definitions, before the metainfo might be used.
-from nomad.parsing import parsers  # pylint: disable=unused-import
-
-# TODO put somemore thought into warnings
-import warnings
+from nomad.parsing import parsers  # noqa: F401
+from nomad.utils.structlogging import get_logger
 
 warnings.filterwarnings('ignore')
 
@@ -102,12 +103,13 @@ def setup_mongo(client=False):
 
 def check_mongo():
     db = mongo_client.get_database(config.mongo.db_name)
-    names = db.list_collection_names()
+    names = set(db.list_collection_names())
 
     expected_names = {'upload', 'user_group', 'entry', 'dataset', 'archive'}
-    if names != expected_names:
+    if not expected_names.issuperset(names):
         logger.warning(
-            f'Expected MongoDB collections: {expected_names} but found: {names}'
+            f'Expected MongoDB collections: {sorted(expected_names)}; '
+            f'but found: {sorted(names)}'
         )
 
     # regression https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-FAIR/-/issues/2281
diff --git a/nomad/logtransfer.py b/nomad/logtransfer.py
index 6c884540c21a379be6f8eb4bbf005ef5ec207a7b..f85170d8bd3d43d93212bc83b3c9e6609bf7937b 100644
--- a/nomad/logtransfer.py
+++ b/nomad/logtransfer.py
@@ -16,15 +16,15 @@
 # limitations under the License.
 #
 
-import requests
-import zlib
-import os.path
 import os
+import os.path
 import time
+import zlib
 
+import requests
 
-from nomad.config import config
 from nomad import utils
+from nomad.config import config
 
 logger = utils.get_logger(__name__)
 
diff --git a/nomad/metainfo/__init__.py b/nomad/metainfo/__init__.py
index 357685a971207f10ba108d3f5528d14d6e5d6540..048d0d6622dba7ba5f21c8fda627db34c12b8e81 100644
--- a/nomad/metainfo/__init__.py
+++ b/nomad/metainfo/__init__.py
@@ -72,3 +72,22 @@ from .annotation import (
     SectionAnnotation,
     AnnotationModel,
 )
+from .data_type import (
+    Datatype,
+    Primitive,
+    Number,
+    ExactNumber,
+    InexactNumber,
+    NonPrimitive,
+    URL,
+    File,
+    Any,
+    Capitalized,
+    Bytes,
+    JSON,
+    Dimension,
+    Unit,
+    Callable,
+    Datetime,
+    Enum,
+)
diff --git a/nomad/metainfo/annotation.py b/nomad/metainfo/annotation.py
index 8e2a7c5abd1311aa4fa9cbd9d6abc77003435505..6ee540abe928d152d28b324e2ca686c93f708f75 100644
--- a/nomad/metainfo/annotation.py
+++ b/nomad/metainfo/annotation.py
@@ -18,9 +18,9 @@
 
 from __future__ import annotations
 
-from typing import Annotated, Any, ClassVar, ForwardRef, Optional
+from typing import Any, ClassVar, ForwardRef
 
-from pydantic import ConfigDict, BaseModel, Field
+from pydantic import BaseModel, ConfigDict, Field
 
 
 class Annotation:
diff --git a/nomad/metainfo/data_frames.py b/nomad/metainfo/data_frames.py
new file mode 100644
index 0000000000000000000000000000000000000000..4f2f1fc7d1079f32328e601cc83a30c5d7138d14
--- /dev/null
+++ b/nomad/metainfo/data_frames.py
@@ -0,0 +1,301 @@
+import inspect
+import types
+from collections.abc import Iterable
+from typing import Union, cast
+
+import numpy as np
+import xarray as xr
+from pydantic import BaseModel
+
+from nomad.metainfo import MSection, Package, Quantity, Section, SubSection, constraint
+from nomad.metainfo.metainfo import _placeholder_quantity
+from nomad.units import ureg
+
+m_package = Package()
+
+
+class Values(MSection):
+    name = Quantity(type=str)
+    values = _placeholder_quantity
+    values_ref = Quantity(type='Values', shape=[])
+    spanned_dimensions = Quantity(type=int, shape=['*'])
+    original_shape = Quantity(type=int, shape=['*'])
+
+    def get_values(self, reshape: bool = True) -> np.ndarray:
+        if self.values_ref:
+            return self.values_ref.m_resolved().get_values()
+        values = self.values
+        if not isinstance(self.values, np.ndarray | ureg.Quantity):
+            values = np.array(self.values)
+        if reshape:
+            return cast(np.ndarray, values).reshape(self.original_shape)
+        return values
+
+    def __init__(self, *args, **kwargs):
+        values_ref = None
+        values: list = []
+        if len(args) == 0:
+            pass
+        elif len(args) == 1 and isinstance(args[0], list | np.ndarray | ureg.Quantity):
+            values = args[0]
+        elif len(args) == 1 and isinstance(args[0], (Values)):
+            values_ref = args[0]
+            values = None
+        else:
+            values = args
+
+        original_shape = kwargs.pop('original_shape', None)
+        if isinstance(values, np.ndarray):
+            values_shape = values.shape
+            quantity_shape = self.m_def.all_quantities['values'].shape[:-1]
+            if len(values_shape) < len(quantity_shape):
+                raise ValueError(
+                    f'The quantity shape, {quantity_shape}, does not meet the '
+                    f'lower-bound set by the values shape, {values_shape}'
+                )
+            flat_shape = values_shape[: len(quantity_shape)] + (-1,)
+            values = values.reshape(flat_shape)
+            if original_shape is None:
+                original_shape = values_shape
+        elif isinstance(values, Iterable):
+            original_shape = [len(values)]
+
+        super().__init__(
+            values=values,
+            values_ref=values_ref,
+            original_shape=original_shape,
+            **kwargs,
+        )
+
+    def xarray_attrs(self) -> dict[str, str]:
+        return dict(
+            units=self.m_def.all_quantities['values'].unit,
+            long_name=self.m_def.all_quantities['values'].label,
+            description=self.m_def.all_quantities['values'].description,
+            iri=self.m_def.all_quantities['values'].iri,
+        )
+
+
+def _get_default_names(iterable: Iterable[Values]) -> list[str]:
+    names = []
+    for values in iterable:
+        counter = 0
+        while True:
+            counter += 1
+            unique_name = f'{values.m_def.name}_{counter}'
+            if unique_name not in names:
+                names.append(unique_name)
+                break
+    return names
+
+
+def _get_names(iterable: Iterable[Values]) -> list[str]:
+    default_names = _get_default_names(iterable)
+    return [
+        values.name if values.name else default
+        for values, default in zip(iterable, default_names)
+    ]
+
+
+def _get_values(
+    iterable: Iterable[Values], values: Union[str, 'ValuesTemplate']
+) -> Values:
+    return_values = None
+    if isinstance(values, str):
+        default_names = _get_default_names(iterable)
+        for v, default in zip(iterable, default_names):
+            if v.name == values or default == values:
+                if return_values is not None:
+                    raise ValueError(f'Multiple values matching {values}')
+                return_values = v
+        return return_values
+    for v in iterable:
+        if v.m_def == values.section_def:
+            if return_values is not None:
+                raise ValueError(f'Multiple values matching {values}')
+            return_values = v
+    return return_values
+
+
+class DataFrame(MSection):
+    fields = SubSection(section='Values', repeats=True)
+    variables = SubSection(section='Values', repeats=True)
+
+    def get_field(self, field: Union[str, 'ValuesTemplate']) -> Values:
+        return _get_values(self.fields, field)
+
+    def get_variable(self, variable: Union[str, 'ValuesTemplate']) -> Values:
+        return _get_values(self.variables, variable)
+
+    @constraint(warning=False)
+    def check_dimensions(self):
+        # TODO constrains that validate the soundness of field and variable dimensions
+        pass
+
+    @constraint(warning=False)
+    def check_mandatory_fields_and_variables(self):
+        data_frame_annotation = self.m_def.m_get_annotation(DataFrameAnnotation)
+        if data_frame_annotation is not None:
+            for index, field in enumerate(data_frame_annotation.mandatory_fields):
+                assert index < len(self.fields), f'Mandatory field {field} missing'
+                assert self.fields[index].m_def == field.section_def, (
+                    f'Field {field} missing'
+                )
+
+            for index, variable in enumerate(data_frame_annotation.mandatory_variables):
+                assert index < len(self.variables), (
+                    f'Mandatory field {variable} missing'
+                )
+                assert self.variables[index].m_def == variable.section_def, (
+                    f'Field {variable} missing'
+                )
+
+    def to_xarray(self) -> xr.Dataset:
+        shape = []
+        dims = []
+        coords = {}
+        var: Values
+        for var, name in zip(self.variables, _get_names(self.variables)):
+            if var.spanned_dimensions is None or len(var.spanned_dimensions) == 0:
+                coord_dims = [name]
+                shape.append(len(var.values))
+                dims.append(name)
+            elif len(var.spanned_dimensions) == 1:
+                dim = var.spanned_dimensions[0]
+                if dim >= len(shape):
+                    shape.append(len(var.values))
+                    dims.append(f'm_dim_{dim}')
+                coord_dims = [f'm_dim_{dim}']
+            else:
+                raise NotImplementedError('Only one spanned dimension supported')
+            coords[name] = (
+                coord_dims,
+                var.values,
+                var.xarray_attrs(),
+            )
+        data_vars = {}
+        field: Values
+        for field, name in zip(self.fields, _get_names(self.fields)):
+            data_vars[name] = (
+                dims,
+                cast(np.ndarray, field.values).reshape(shape),
+                field.xarray_attrs(),
+            )
+        return xr.Dataset(
+            data_vars=data_vars,
+            coords=coords,
+            attrs=dict(
+                description=self.m_def.description,
+                long_name=self.m_def.label,
+            ),
+        )
+
+    def to_pandas(self):
+        return self.to_xarray().to_dataframe()
+
+
+def _get_package():
+    package = inspect.currentframe().f_back.f_back.f_globals.get('m_package', None)
+    assert package is not None, (
+        'PhysicalQuantities have to be defined within a python package with global '
+        'Package m_package variable'
+    )
+    assert isinstance(m_package, Package), 'm_package has to be a Package instance'
+    return package
+
+
+class ValuesTemplate:
+    """
+    A generator for quantities of a certain template with type, shape, unit, name, description, iri, etc.
+    """
+
+    def __init__(self, **kwargs):
+        self.quantity = Quantity(**kwargs)
+        assert self.quantity.name is not None, (
+            'Values templates must be explicitly named'
+        )
+
+        class ValuesTemplate(Values):
+            m_def = Section(name=self.quantity.name)
+            values = self(name='values', shape=self.quantity.shape + ['*'])
+
+        _get_package().section_definitions.append(ValuesTemplate.m_def)
+        self.section_def = ValuesTemplate.m_def
+        self.create = ValuesTemplate
+        self.section_cls = ValuesTemplate
+
+    def __call__(self, **kwargs):
+        # Make a deep copy of the quantity via m_from_dict(m_to_dict)
+        quantity = Quantity.m_from_dict(self.quantity.m_to_dict())
+        quantity.m_update(**kwargs)
+        return quantity
+
+
+class DataFrameAnnotation(BaseModel):
+    class Config:
+        arbitrary_types_allowed = True
+
+    mandatory_fields: list[ValuesTemplate]
+    mandatory_variables: list[ValuesTemplate]
+
+    def dict(self, *args, **kwargs):
+        return dict(
+            mandatory_fields=[
+                field.section_def.qualified_name() for field in self.mandatory_fields
+            ],
+            mandatory_variables=[
+                variable.section_def.qualified_name()
+                for variable in self.mandatory_variables
+            ],
+        )
+
+
+class DataFrameTemplate:
+    """
+    A generator for data frames with specific mandatory fields and default variables.
+    """
+
+    def __init__(
+        self,
+        mandatory_fields: list[ValuesTemplate],
+        mandatory_variables: list[ValuesTemplate] = [],
+        **kwargs,
+    ):
+        self.sub_section = SubSection(**kwargs)
+        self.fields = mandatory_fields
+        self.variables = mandatory_variables
+
+        assert self.sub_section.name is not None, (
+            'DataFrame templates must be explicitly named'
+        )
+
+        class DataFrameTemplate(DataFrame):
+            m_def = Section(name=self.sub_section.name)
+
+            # TODO validation that default fields and variables are actually present
+
+        DataFrameTemplate.m_def.m_annotations['data_frame'] = DataFrameAnnotation(
+            mandatory_fields=mandatory_fields,
+            mandatory_variables=mandatory_variables,
+        )
+
+        _get_package().section_definitions.append(DataFrameTemplate.m_def)
+        self.create = DataFrameTemplate
+        self.section_cls = DataFrameTemplate
+        self.section_def = DataFrameTemplate.m_def
+        self.sub_section.section = self.section_def
+
+    def __call__(self, **kwargs):
+        sub_section = self.sub_section.m_copy()
+        sub_section.m_update(**kwargs)
+
+        def __init_metainfo__(self):
+            # TODO here we can add a more specialised section def to the caller
+            # definition (e.g. MySection) as an inner_section_definition
+            pass
+
+        sub_section.__init_metainfo__ = types.MethodType(__init_metainfo__, sub_section)
+        return sub_section
+
+
+m_package.__init_metainfo__()
diff --git a/nomad/metainfo/data_type.py b/nomad/metainfo/data_type.py
index 04b4c724651ac2a2307d9d3c55b8aba26799605b..6f9b89f798ae35a02ff7bcb57afcb6cb714d36dd 100644
--- a/nomad/metainfo/data_type.py
+++ b/nomad/metainfo/data_type.py
@@ -22,7 +22,7 @@ import importlib
 import re
 import typing
 from base64 import b64decode, b64encode
-from datetime import datetime, date
+from datetime import date, datetime
 from functools import reduce
 from inspect import isclass
 from typing import Any as TypingAny
@@ -1248,12 +1248,12 @@ def to_optimade_type(in_type: Datatype):
 
 def to_mongo_type(in_type: Datatype):
     from mongoengine import (
-        IntField,
-        FloatField,
         BooleanField,
-        StringField,
         DateTimeField,
         DictField,
+        FloatField,
+        IntField,
+        StringField,
     )
 
     standard_type = in_type.standard_type()
diff --git a/nomad/metainfo/elasticsearch_extension.py b/nomad/metainfo/elasticsearch_extension.py
index 605b0bb779cc0f2f453ac3f2d63557ea42912b49..9d386d432962409b16d2d4f8cc34019d87cf0eb3 100644
--- a/nomad/metainfo/elasticsearch_extension.py
+++ b/nomad/metainfo/elasticsearch_extension.py
@@ -159,25 +159,15 @@ sub-sections as if they were direct sub-sections.
 import math
 import re
 from collections import defaultdict
-from typing import (
-    TYPE_CHECKING,
-    Any,
-    DefaultDict,
-    Dict,
-    List,
-    Optional,
-    Set,
-    Tuple,
-    Union,
-    cast,
-)
 from collections.abc import Callable
+from typing import TYPE_CHECKING, Any, Optional, cast
 
 from elasticsearch_dsl import Q
+from pint import Quantity as PintQuantity
+
 from nomad import utils
 from nomad.config import config
 from nomad.config.models.plugins import Parser, Schema, SchemaPackageEntryPoint
-from pint import Quantity as PintQuantity
 
 from . import DefinitionAnnotation
 from .data_type import Datatype, to_elastic_type
@@ -193,7 +183,7 @@ from .metainfo import (
 )
 
 if TYPE_CHECKING:
-    from nomad.datamodel.datamodel import EntryArchive, SearchableQuantity
+    from nomad.datamodel.datamodel import SearchableQuantity
 
 schema_separator = '#'
 dtype_separator = '#'
diff --git a/nomad/metainfo/example.py b/nomad/metainfo/example.py
index 3e7f7443b41013722c1058de5bf70e0491629b7d..d7acce8ee9bae8c5514cce7113692b3e3663a5de 100644
--- a/nomad/metainfo/example.py
+++ b/nomad/metainfo/example.py
@@ -18,21 +18,22 @@
 
 """An example metainfo package."""
 
-import numpy as np
 from datetime import datetime
 
-from nomad.units import ureg
+import numpy as np
+
 from nomad.metainfo import (
-    MSection,
+    Datetime,
     MCategory,
-    Section,
-    Quantity,
+    MEnum,
+    MSection,
     Package,
+    Quantity,
+    Section,
     SubSection,
-    MEnum,
-    Datetime,
     constraint,
 )
+from nomad.units import ureg
 
 m_package = Package(links=['https://nomad-lab.eu/prod/rae/docs/metainfo.html'])
 
diff --git a/nomad/metainfo/metainfo.py b/nomad/metainfo/metainfo.py
index c5e99bac8c8f4c16709beda576e15beb8a7b1c43..67072e835dee58c21f0007d82df38f6dc6ae3e1f 100644
--- a/nomad/metainfo/metainfo.py
+++ b/nomad/metainfo/metainfo.py
@@ -24,19 +24,17 @@ import json
 import re
 import sys
 import warnings
+from collections.abc import Callable as TypingCallable
 from collections.abc import Iterable
 from copy import deepcopy
 from functools import wraps
-from typing import Any
-from typing import Callable as TypingCallable
-from typing import Literal, TypeVar, cast
+from typing import Any, Literal, TypeVar, cast
 from urllib.parse import urlsplit, urlunsplit
 
 import docstring_parser
 import jmespath
 import pint
-from pydantic import TypeAdapter, ValidationError
-from typing_extensions import deprecated  # type: ignore
+from pydantic import BaseModel, TypeAdapter, ValidationError
 
 from nomad.config import config
 from nomad.metainfo.data_type import JSON as JSONType
@@ -45,14 +43,20 @@ from nomad.metainfo.data_type import Any as AnyType
 from nomad.metainfo.data_type import Bytes as BytesType
 from nomad.metainfo.data_type import Callable as CallableType
 from nomad.metainfo.data_type import Capitalized as CapitalizedType
-from nomad.metainfo.data_type import Datatype
+from nomad.metainfo.data_type import (
+    Datatype,
+    Enum,
+    ExactNumber,
+    InexactNumber,
+    Number,
+    check_dimensionality,
+    m_str,
+    normalize_type,
+)
 from nomad.metainfo.data_type import Datetime as DatetimeType
 from nomad.metainfo.data_type import Dimension as DimensionType
-from nomad.metainfo.data_type import Enum, ExactNumber
 from nomad.metainfo.data_type import File as FileType
-from nomad.metainfo.data_type import InexactNumber, Number
 from nomad.metainfo.data_type import Unit as UnitType
-from nomad.metainfo.data_type import check_dimensionality, m_str, normalize_type
 from nomad.metainfo.util import (
     MQuantity,
     MSubSectionList,
@@ -64,8 +68,6 @@ from nomad.metainfo.util import (
     to_dict,
 )
 from nomad.units import ureg as units
-from pydantic import ValidationError, parse_obj_as
-from typing_extensions import deprecated  # type: ignore
 
 from .annotation import (
     Annotation,
@@ -1348,9 +1350,6 @@ class MSection(metaclass=MObjectMeta):
             if not definition.repeats or target is None:
                 return _wrap(target)
 
-            # this practically does nothing only to make mypy happy
-            # it is guaranteed to be a MSubSectionList
-            target = cast(MSubSectionList, target)
             if isinstance(index, str) and target.has_duplicated_key():
                 raise MetainfoError(f'Multiple sections with key {index} exist.')
 
@@ -2017,6 +2016,9 @@ class MSection(metaclass=MObjectMeta):
             if isinstance(annotation, Annotation):
                 return annotation.m_to_dict()
 
+            if isinstance(annotation, BaseModel):
+                return annotation.dict()
+
             if not isinstance(annotation, dict):
                 return str(annotation)
 
@@ -2758,7 +2760,7 @@ class Definition(MSection):
             Python references, e.g. in `m_def`.
 
         variable:
-            A boolean that indicates this property as variable parts in its name.
+            A boolean that indicates this property has variable parts in its name.
             If this is set to true, all capital letters in the name can be
             replaced with arbitrary strings. However, variable names work similar to
             aliases and can be considered on-demand aliases. Other aliases and the
diff --git a/nomad/metainfo/mongoengine_extension.py b/nomad/metainfo/mongoengine_extension.py
index 3d98ea4a8b63bb61f36cfbdf3832982c4bb74d51..c7dd4baa97feed7ab5921ed8e3f926708c902558 100644
--- a/nomad/metainfo/mongoengine_extension.py
+++ b/nomad/metainfo/mongoengine_extension.py
@@ -33,14 +33,11 @@ Adds mongoengine supports to the metainfo. Allows to create, save, and get metai
 sections from mongoengine. The annotation key is 'mongo'.
 """
 
-from typing import Any, Dict, List
+from typing import Any
 
-from .data_type import Datatype, to_mongo_type
-from .metainfo import (
-    MSection,
-    Quantity,
-)
 from . import Annotation, DefinitionAnnotation, SectionAnnotation
+from .data_type import Datatype, to_mongo_type
+from .metainfo import MSection, Quantity
 
 
 class Mongo(DefinitionAnnotation):
diff --git a/nomad/metainfo/pydantic_extension.py b/nomad/metainfo/pydantic_extension.py
index 8aee87b63121022518bb76e467e030017e8f8dee..155140406df838c76120e987d18527833f04861f 100644
--- a/nomad/metainfo/pydantic_extension.py
+++ b/nomad/metainfo/pydantic_extension.py
@@ -32,16 +32,13 @@
 Allows to create pydantic models from section definitions.
 """
 
-from typing import Optional, cast, Type
-from pydantic import create_model, Field, BaseModel
+from typing import cast
+
+from pydantic import BaseModel, Field, create_model
 
-from .data_type import to_pydantic_type
-from .metainfo import (
-    Definition,
-    Section,
-    Quantity,
-)
 from . import DefinitionAnnotation
+from .data_type import to_pydantic_type
+from .metainfo import Definition, Quantity, Section
 
 
 class PydanticModel(DefinitionAnnotation):
diff --git a/nomad/metainfo/util.py b/nomad/metainfo/util.py
index 53e59e2e3fb8bebecde44edc416a05e12a5821d6..a46e70d43c38bbb9eda3fb6807a3bd37af570357 100644
--- a/nomad/metainfo/util.py
+++ b/nomad/metainfo/util.py
@@ -19,7 +19,7 @@ from __future__ import annotations
 
 import hashlib
 import re
-from typing import Any, Optional
+from typing import Any
 
 import pint
 
@@ -361,13 +361,15 @@ def resolve_variadic_name(definitions: dict, name: str, hint: str | None = None)
     candidates = {}
     hint_candidates = {}
 
-    for definition in definitions:
-        match_score = get_namefit(name, definition)
+    for dname, definition in definitions.items():
+        if not definition.variable:  # TODO: also if type does not match
+            continue
+        match_score = get_namefit(name, dname)
         if match_score >= 0:
-            candidates[definition] = match_score
+            candidates[dname] = match_score
             # Check if the hint exists in the definition
             if hint and hint in definition.all_attributes:
-                hint_candidates[definition] = match_score
+                hint_candidates[dname] = match_score
 
     if len(candidates) == 0:
         raise ValueError(f'Cannot find a proper definition for name "{name}".')
diff --git a/nomad/mkdocs.py b/nomad/mkdocs/__init__.py
similarity index 69%
rename from nomad/mkdocs.py
rename to nomad/mkdocs/__init__.py
index d033a51bd83b0c9b0563adc22bf91527e960a338..e0fbf44fac3c772faa9a1a8aff5545b0e29e720a 100644
--- a/nomad/mkdocs.py
+++ b/nomad/mkdocs/__init__.py
@@ -24,186 +24,36 @@ from types import UnionType
 from pydantic.fields import FieldInfo
 import yaml
 import json
-from enum import Enum
-from pydantic import BaseModel
 import os.path
-from typing import Annotated, Any, Union, get_args, get_origin
-from typing import Literal
-from inspect import isclass
-from markdown.extensions.toc import slugify
-
-from nomad.utils import strip
-from nomad.config import config
-from nomad.config.models.plugins import ParserEntryPoint, EntryPointType
-from nomad.app.v1.models import query_documentation, owner_documentation
-from nomad.app.v1.routers.entries import archive_required_documentation
-from nomad import utils
-
-
-exported_config_models = set()  # type: ignore
-
-
-doc_snippets = {
-    'query': query_documentation,
-    'owner': owner_documentation,
-    'archive-required': archive_required_documentation,
-}
-
-
-def get_field_type_info(field: FieldInfo) -> tuple[str, set[Any]]:
-    """Used to recursively walk through a type definition, building up a cleaned
-    up type name and returning all of the classes that were used.
-
-    Args:
-        type_: The type to inspect. Can be any valid type definition.
-
-    Returns:
-        Tuple containing the cleaned up type name and a set of classes
-        found inside.
-    """
-    classes = set()
-    annotation = field.annotation
-
-    def get_class_name(ann: Any) -> str:
-        if hasattr(ann, '__name__'):
-            name = ann.__name__
-            return 'None' if name == 'NoneType' else name
-        return str(ann)
-
-    def _recursive_extract(ann: Any, type_str: str = '') -> str:
-        nonlocal classes
-
-        origin = get_origin(ann)
-        args = get_args(ann)
-
-        if origin is None and issubclass(ann, Enum):
-            classes.add(ann)
-            # Determine base type for Enums
-            if issubclass(ann, str):
-                return get_class_name(str)
-            elif issubclass(ann, int):
-                return get_class_name(int)
-            else:
-                return get_class_name(ann)
-        elif origin is None:
-            classes.add(ann)
-            return get_class_name(ann)
-        if origin is list:
-            classes.add(origin)
-            if type_str:
-                type_str += '[' + _recursive_extract(args[0]) + ']'
-            else:
-                type_str = 'list[' + _recursive_extract(args[0]) + ']'
-        elif origin is dict:
-            classes.add(origin)
-            if type_str:
-                type_str += (
-                    '['
-                    + _recursive_extract(args[0])
-                    + ', '
-                    + _recursive_extract(args[1])
-                    + ']'
-                )
-            else:
-                type_str = (
-                    'dict['
-                    + _recursive_extract(args[0])
-                    + ', '
-                    + _recursive_extract(args[1])
-                    + ']'
-                )
-
-        elif origin is UnionType or origin is Union:
-            # Handle Union types (e.g., Optional[str] is equivalent to Union[str, None])
-            union_types = []
-            for arg in args:
-                union_types.append(_recursive_extract(arg))
-            type_str = ' | '.join(union_types)
-        elif origin is Literal:
-            classes.add(origin)
-            return get_class_name(
-                type(args[0])
-            )  # Add name of the literal value (e.g., str)
-        elif origin is Annotated:
-            # Extract the underlying type from Annotated
-            return _recursive_extract(args[0])
-        else:
-            # Handle generic types
-            classes.add(origin)
-            return get_class_name(ann)
-
-        return type_str
-
-    type_name = _recursive_extract(annotation)
-    return type_name, classes
-
-
-def get_field_description(field: FieldInfo) -> str | None:
-    """Retrieves the description for a pydantic field as a markdown string.
-
-    Args:
-        field: The pydantic field to inspect.
-
-    Returns:
-        Markdown string for the description.
-    """
-    value = field.description
-    if value:
-        value = utils.strip(value)
-        value = value.replace('\n\n', '<br/>').replace('\n', ' ')
 
-    return value
+from typing import get_args
 
+from inspect import isclass
 
-def get_field_default(field: FieldInfo) -> str | None:
-    """Retrieves the default value from a pydantic field as a markdown string.
-
-    Args:
-        field: The pydantic field to inspect.
-
-    Returns:
-        Markdown string for the default value.
-    """
-    default_value = field.default
-    if default_value is not None:
-        if isinstance(default_value, dict | BaseModel):
-            default_value = 'Complex object, default value not displayed.'
-        elif default_value == '':
-            default_value = '""'
-        else:
-            default_value = f'`{default_value}`'
-    return default_value
-
-
-def get_field_options(field: FieldInfo) -> dict[str, str | None]:
-    """Retrieves a dictionary of value-description pairs from a pydantic field.
-
-    Args:
-        field: The pydantic field to inspect.
+from pydantic.fields import FieldInfo
 
-    Returns:
-        Dictionary containing the possible options and their description for
-        this field. The description may be None indicating that it does not exist.
-    """
-    options: dict[str, str | None] = {}
-    if isclass(field.annotation) and issubclass(field.annotation, Enum):
-        for x in field.annotation:
-            options[str(x.value)] = None
-    return options
+from pydantic import BaseModel
 
+from markdown.extensions.toc import slugify
 
-def get_field_deprecated(field: FieldInfo) -> bool:
-    """Returns whether the given pydantic field is deprecated or not.
+from nomad.utils import strip
+from nomad.config import config
+from nomad import utils
 
-    Args:
-        field: The pydantic field to inspect.
+from nomad.mkdocs.pydantic import (
+    exported_config_models,
+    get_field_default,
+    get_field_deprecated,
+    get_field_description,
+    get_field_options,
+    get_field_type_info,
+)
+from nomad.mkdocs.metainfo import (
+    section_markdown_from_section_cls,
+    package_markdown_from_package,
+)
 
-    Returns:
-        Whether the field is deprecated.
-    """
-    if field.deprecated:
-        return True
-    return False
+from nomad.config.models.plugins import ParserEntryPoint, EntryPointType
 
 
 class MyYamlDumper(yaml.Dumper):
@@ -225,6 +75,14 @@ def define_env(env):
 
     @env.macro
     def doc_snippet(key):  # pylint: disable=unused-variable
+        from nomad.app.v1.models import query_documentation, owner_documentation
+        from nomad.app.v1.routers.entries import archive_required_documentation
+
+        doc_snippets = {
+            'query': query_documentation,
+            'owner': owner_documentation,
+            'archive-required': archive_required_documentation,
+        }
         return doc_snippets[key]
 
     @env.macro
@@ -258,7 +116,7 @@ def define_env(env):
             path = f'{path}:'
 
         file_path, json_path = path.split(':')
-        file_path = os.path.join(os.path.dirname(__file__), '..', file_path)
+        file_path = os.path.join(os.path.dirname(__file__), '../..', file_path)
 
         with open(file_path) as f:
             if file_path.endswith('.yaml'):
@@ -517,3 +375,19 @@ def define_env(env):
                 for category, plugins in categories.items()
             ]
         )
+
+    @env.macro
+    def metainfo_package(path, heading=None, hide=[]):  # pylint: disable=unused-variable
+        """
+        Produces markdown code for the given metainfo package.
+
+        Arguments:
+            path: The python qualified name of the package.
+        """
+        import importlib
+
+        module_name, name = path.rsplit('.', 1)
+        module = importlib.import_module(path)
+        pkg = getattr(module, 'm_package')
+
+        return package_markdown_from_package(pkg)
diff --git a/nomad/mkdocs/metainfo.py b/nomad/mkdocs/metainfo.py
new file mode 100644
index 0000000000000000000000000000000000000000..710a6383dc7cb8ab8ae30b4a667077a7a45e49b3
--- /dev/null
+++ b/nomad/mkdocs/metainfo.py
@@ -0,0 +1,158 @@
+#
+# This file is part of NOMAD. See https://nomad-lab.eu for further info.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from nomad import utils
+from nomad.datamodel.data import ArchiveSection
+from nomad.metainfo import Datatype, Property, Quantity, Reference, SubSection
+
+
+def get_reference(section_def, pkg) -> str:
+    if section_def.m_parent == pkg:
+        return f'[`{section_def.name}`](#{section_def.name.lower()})'
+
+    return f'`{section_def.qualified_name()}`'
+
+
+def get_property_type_info(property: Property, pkg=None) -> str:
+    if isinstance(property, Quantity):
+        type = property.type
+        if isinstance(type, Reference):
+            return get_reference(type.target_section_def, pkg)
+        if isinstance(type, Datatype):
+            try:
+                return f'`{type.serialize_self()["type_data"]}`'
+            except NotImplementedError:
+                pass
+
+    if isinstance(property, SubSection):
+        return get_reference(property.section_def, pkg)
+
+    return '*unknown type*'
+
+
+def get_property_description(property: Property) -> str | None:
+    value = property.description
+    if value:
+        value = utils.strip(value)
+        value = value.replace('\n\n', '<br/>').replace('\n', ' ')
+
+    return value
+
+
+def get_quantity_default(quantity: Quantity) -> str:
+    default = quantity.default
+    if isinstance(default, dict):
+        return 'Complex object, default value not displayed.'
+    return f'`{str(quantity.default)}`' if quantity.default is not None else ''
+
+
+def get_property_options(property: Property) -> str:
+    options: list[str] = []
+    if isinstance(property, Quantity):
+        if property.shape != []:
+            options.append(f'**shape**=`{property.shape}`')
+        if property.unit:
+            options.append(f'**unit**=`{property.unit}`')
+
+        default = get_quantity_default(property)
+        if default != '':
+            options.append(f'**default**=`{default}`')
+
+    if isinstance(property, SubSection):
+        options.append('**sub-section**')
+        if property.repeats:
+            options.append('**repeats**')
+
+    return ', '.join(options)
+
+
+def section_markdown_from_section_cls(
+    section_cls, name=None, heading=None, hide=[], pkg=None
+):
+    section_def = section_cls.m_def
+    properties = section_def.quantities + section_def.sub_sections
+
+    if not name:
+        name = section_cls.__name__
+
+    def content(property):
+        result = []
+        description = get_property_description(property)
+        if description:
+            result.append(description)
+        options = get_property_options(property)
+        if options != '':
+            result.append(options)
+
+        return '</br>'.join(result)
+
+    def property_row(property):
+        if property.name.startswith('m_'):
+            return ''
+        type_name = get_property_type_info(property, pkg)
+        return f'|{property.name}|{type_name}|{content(property)}|\n'
+
+    if heading is None:
+        result = f'### {name}\n'
+    else:
+        result = heading + '\n'
+
+    if section_def.description and section_def.description != '':
+        result += f'**description**: {utils.strip(section_def.description)}\n\n'
+
+    if len(section_def.base_sections) > 0:
+        base_sections = [
+            get_reference(base_section, pkg)
+            for base_section in section_def.base_sections
+        ]
+        result += f'**inherits from**: {", ".join(base_sections)}\n\n'
+
+    if section_def.links:
+        links = [f'[{link}]({link})' for link in section_def.links]
+        result += f'**links**: {", ".join(links)}\n\n'
+
+    if len(properties) > 0:
+        result += '**properties**:\n\n'
+        result += '|name|type| |\n'
+        result += '|----|----|-|\n'
+        result += ''.join(
+            [
+                property_row(property)
+                for property in properties
+                if property.name not in hide
+            ]
+        )
+        result += '\n\n'
+
+    if (
+        section_cls.normalize
+        and section_cls.normalize.__doc__ != ArchiveSection.normalize.__doc__
+    ):
+        if section_cls.normalize.__doc__:
+            result += f'**normalization**: \n\n{utils.strip(section_cls.normalize.__doc__)}\n\n'
+        else:
+            result += f'**normalization** without further documentation\n\n'
+
+    return result
+
+
+def package_markdown_from_package(pkg):
+    return ''.join(
+        [
+            section_markdown_from_section_cls(section_def.section_cls, pkg=pkg)
+            for section_def in pkg.section_definitions
+        ]
+    )
diff --git a/nomad/mkdocs/pydantic.py b/nomad/mkdocs/pydantic.py
new file mode 100644
index 0000000000000000000000000000000000000000..774843aa3d48ee149cdda56f32517b3e9543fd54
--- /dev/null
+++ b/nomad/mkdocs/pydantic.py
@@ -0,0 +1,189 @@
+#
+# Copyright The NOMAD Authors.
+#
+# This file is part of NOMAD. See https://nomad-lab.eu for further info.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""
+Definitions that are used in the documentation via mkdocs-macro-plugin.
+"""
+
+from enum import Enum
+from inspect import isclass
+from types import UnionType
+from typing import Annotated, Any, Literal, Union, get_args, get_origin
+
+from pydantic import BaseModel
+from pydantic.fields import FieldInfo
+
+from nomad import utils
+
+exported_config_models = set()  # type: ignore
+
+
+def get_field_type_info(field: FieldInfo) -> tuple[str, set[Any]]:
+    """Used to recursively walk through a type definition, building up a cleaned
+    up type name and returning all of the classes that were used.
+
+    Args:
+        type_: The type to inspect. Can be any valid type definition.
+
+    Returns:
+        Tuple containing the cleaned up type name and a set of classes
+        found inside.
+    """
+    classes = set()
+    annotation = field.annotation
+
+    def get_class_name(ann: Any) -> str:
+        if hasattr(ann, '__name__'):
+            name = ann.__name__
+            return 'None' if name == 'NoneType' else name
+        return str(ann)
+
+    def _recursive_extract(ann: Any, type_str: str = '') -> str:
+        nonlocal classes
+
+        origin = get_origin(ann)
+        args = get_args(ann)
+
+        if origin is None and issubclass(ann, Enum):
+            classes.add(ann)
+            # Determine base type for Enums
+            if issubclass(ann, str):
+                return get_class_name(str)
+            elif issubclass(ann, int):
+                return get_class_name(int)
+            else:
+                return get_class_name(ann)
+        elif origin is None:
+            classes.add(ann)
+            return get_class_name(ann)
+        if origin is list:
+            classes.add(origin)
+            if type_str:
+                type_str += '[' + _recursive_extract(args[0]) + ']'
+            else:
+                type_str = 'list[' + _recursive_extract(args[0]) + ']'
+        elif origin is dict:
+            classes.add(origin)
+            if type_str:
+                type_str += (
+                    '['
+                    + _recursive_extract(args[0])
+                    + ', '
+                    + _recursive_extract(args[1])
+                    + ']'
+                )
+            else:
+                type_str = (
+                    'dict['
+                    + _recursive_extract(args[0])
+                    + ', '
+                    + _recursive_extract(args[1])
+                    + ']'
+                )
+
+        elif origin is UnionType or origin is Union:
+            # Handle Union types (e.g., Optional[str] is equivalent to Union[str, None])
+            union_types = []
+            for arg in args:
+                union_types.append(_recursive_extract(arg))
+            type_str = ' | '.join(union_types)
+        elif origin is Literal:
+            classes.add(origin)
+            return get_class_name(
+                type(args[0])
+            )  # Add name of the literal value (e.g., str)
+        elif origin is Annotated:
+            # Extract the underlying type from Annotated
+            return _recursive_extract(args[0])
+        else:
+            # Handle generic types
+            classes.add(origin)
+            return get_class_name(ann)
+
+        return type_str
+
+    type_name = _recursive_extract(annotation)
+    return type_name, classes
+
+
+def get_field_description(field: FieldInfo) -> str | None:
+    """Retrieves the description for a pydantic field as a markdown string.
+
+    Args:
+        field: The pydantic field to inspect.
+
+    Returns:
+        Markdown string for the description.
+    """
+    value = field.description
+    if value:
+        value = utils.strip(value)
+        value = value.replace('\n\n', '<br/>').replace('\n', ' ')
+
+    return value
+
+
+def get_field_default(field: FieldInfo) -> str | None:
+    """Retrieves the default value from a pydantic field as a markdown string.
+
+    Args:
+        field: The pydantic field to inspect.
+
+    Returns:
+        Markdown string for the default value.
+    """
+    default_value = field.default
+    if default_value is not None:
+        if isinstance(default_value, dict | BaseModel):
+            default_value = 'Complex object, default value not displayed.'
+        elif default_value == '':
+            default_value = '""'
+        else:
+            default_value = f'`{default_value}`'
+    return default_value
+
+
+def get_field_options(field: FieldInfo) -> dict[str, str | None]:
+    """Retrieves a dictionary of value-description pairs from a pydantic field.
+
+    Args:
+        field: The pydantic field to inspect.
+
+    Returns:
+        Dictionary containing the possible options and their description for
+        this field. The description may be None indicating that it does not exist.
+    """
+    options: dict[str, str | None] = {}
+    if isclass(field.annotation) and issubclass(field.annotation, Enum):
+        for x in field.annotation:
+            options[str(x.value)] = None
+    return options
+
+
+def get_field_deprecated(field: FieldInfo) -> bool:
+    """Returns whether the given pydantic field is deprecated or not.
+
+    Args:
+        field: The pydantic field to inspect.
+
+    Returns:
+        Whether the field is deprecated.
+    """
+    if field.deprecated:
+        return True
+    return False
diff --git a/nomad/normalizing/common.py b/nomad/normalizing/common.py
index 75d5c2bdf7935570c29893928482bfa0252baa7c..f03185579dc8dbdbec5a0075841fa9f115be9ade 100644
--- a/nomad/normalizing/common.py
+++ b/nomad/normalizing/common.py
@@ -15,27 +15,24 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
+from typing import Any
+
+import matid.geometry  # pylint: disable=import-error
 import numpy as np
-from math import isnan
 from ase import Atoms
-from typing import List, Set, Any, Optional, Dict, Union
-from nptyping import NDArray
 from matid import SymmetryAnalyzer  # pylint: disable=import-error
-from matid.symmetry.wyckoffset import WyckoffSet as WyckoffSetMatID  # pylint: disable=import-error
-import matid.geometry  # pylint: disable=import-error
+from matid.symmetry.wyckoffset import (
+    WyckoffSet as WyckoffSetMatID,  # pylint: disable=import-error
+)
+from nptyping import NDArray
 
 from nomad import atomutils
 from nomad.config import config
-from nomad.utils import hash
-from nomad.units import ureg
 from nomad.datamodel.metainfo.system import Atoms as NOMADAtoms
 from nomad.datamodel.optimade import Species
-from nomad.datamodel.results import (
-    Cell,
-    Structure,
-    LatticeParameters,
-    WyckoffSet,
-)
+from nomad.datamodel.results import Cell, LatticeParameters, Structure, WyckoffSet
+from nomad.units import ureg
+from nomad.utils import hash
 
 
 def wyckoff_sets_from_matid(wyckoff_sets: list[WyckoffSetMatID]) -> list[WyckoffSet]:
diff --git a/nomad/normalizing/material.py b/nomad/normalizing/material.py
index 078e4f0452e60c6d3dbe1bcb3ee98352f4c9b8b6..9192162c68cabba23ff671779e183f158afb39b6 100644
--- a/nomad/normalizing/material.py
+++ b/nomad/normalizing/material.py
@@ -16,25 +16,24 @@
 #
 
 import re
-from typing import Union, Dict, List
-from nptyping import NDArray
 
 import ase.data
 from matid.classification.classifications import (
-    Class0D,
     Atom,
+    Class0D,
     Class1D,
     Class2D,
+    Class3D,
     Material2D,
     Surface,
-    Class3D,
 )
+from nptyping import NDArray
 
 from nomad import atomutils
 from nomad.atomutils import Formula
-from nomad.normalizing.common import material_id_bulk, material_id_2d, material_id_1d
+from nomad.datamodel.results import Material, Symmetry, structure_name_map
+from nomad.normalizing.common import material_id_1d, material_id_2d, material_id_bulk
 from nomad.normalizing.topology import TopologyNormalizer
-from nomad.datamodel.results import Symmetry, Material, structure_name_map
 
 
 class MaterialNormalizer:
diff --git a/nomad/normalizing/metainfo.py b/nomad/normalizing/metainfo.py
index b76df84ca7d766f35c6556a831a857fec93980bc..73e3ddd01829687edab628a8065c73107b60fe26 100644
--- a/nomad/normalizing/metainfo.py
+++ b/nomad/normalizing/metainfo.py
@@ -18,8 +18,7 @@
 
 from nomad.datamodel import EntryArchive
 from nomad.datamodel.data import ArchiveSection
-from nomad.datamodel import EntryArchive
-from typing import Optional
+
 from . import Normalizer
 
 
diff --git a/nomad/normalizing/method.py b/nomad/normalizing/method.py
index 0fc961ec59be8499fe209547351a58d40abe2f8b..2a3355f8b13bf6b9c87d5be54b4f0c5aa54011e1 100644
--- a/nomad/normalizing/method.py
+++ b/nomad/normalizing/method.py
@@ -15,33 +15,33 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
+import re
 from abc import ABC, abstractmethod
-from ase.dft.kpoints import monkhorst_pack, get_monkhorst_pack_size_and_offset
 from collections import OrderedDict
-import re
+
 import numpy as np
+from ase.dft.kpoints import get_monkhorst_pack_size_and_offset, monkhorst_pack
 
-from nomad.datamodel import EntryArchive, ArchiveSection
-from nomad.metainfo import MSection
-from nomad.metainfo.data_type import Number
-from nomad.units import ureg
-from nomad.metainfo import Section
-from nomad.utils import RestrictedDict
 from nomad.config import config
+from nomad.datamodel import ArchiveSection, EntryArchive
 from nomad.datamodel.results import (
-    Method,
-    Simulation,
-    HubbardKanamoriModel,
-    DFT,
-    TB,
-    GW,
     BSE,
+    DFT,
     DMFT,
-    Precision,
+    GW,
+    TB,
+    HubbardKanamoriModel,
     Material,
+    Method,
+    Precision,
+    Simulation,
     xc_treatments,
     xc_treatments_extended,
 )
+from nomad.metainfo import MSection, Section
+from nomad.metainfo.data_type import Number
+from nomad.units import ureg
+from nomad.utils import RestrictedDict
 
 
 class MethodNormalizer:  # TODO: add normalizer for atom_parameters.label
diff --git a/nomad/normalizing/normalizer.py b/nomad/normalizing/normalizer.py
index 82eb23aa250800f302c03f9d2a228438cced1cea..0c3976f29788071f990c8c94349cacf36134cc81 100644
--- a/nomad/normalizing/normalizer.py
+++ b/nomad/normalizing/normalizer.py
@@ -17,11 +17,10 @@
 #
 
 from abc import ABCMeta, abstractmethod
-from typing import List, Optional
 
-from nomad.utils import get_logger
-from nomad.metainfo import MSection
 from nomad.datamodel import EntryArchive
+from nomad.metainfo import MSection
+from nomad.utils import get_logger
 
 
 class Normalizer(metaclass=ABCMeta):
diff --git a/nomad/normalizing/optimade.py b/nomad/normalizing/optimade.py
index d19b0e09853d7e0cfaa1cadd486058de90b53ac8..3c78ab4ca58561576318bd6507cce93685270a8e 100644
--- a/nomad/normalizing/optimade.py
+++ b/nomad/normalizing/optimade.py
@@ -16,19 +16,18 @@
 # limitations under the License.
 #
 
-from typing import Any, Dict
-import numpy as np
 import re
+from typing import Any
+
 import ase.data
 import ase.formula
+import numpy as np
 import pint.quantity
 
-from nomad.datamodel import EntryArchive
 from nomad.atomutils import Formula
+from nomad.datamodel import EntryArchive, EntryMetadata, OptimadeEntry, Species
 from nomad.normalizing.normalizer import SystemBasedNormalizer
 from nomad.units import ureg
-from nomad.datamodel import OptimadeEntry, Species, EntryMetadata
-
 
 species_re = re.compile(r'^([A-Z][a-z]?)(\d*)$')
 atom_label_re = re.compile(
diff --git a/nomad/normalizing/results.py b/nomad/normalizing/results.py
index 6f5f4ebde1e15ef57b0668b85d49efb63c332d82..2316cb4510002da124d5deb5408224a3354218ea 100644
--- a/nomad/normalizing/results.py
+++ b/nomad/normalizing/results.py
@@ -17,77 +17,78 @@
 #
 
 import re
-import numpy as np
-from typing import Union, Any, Optional
+from typing import Any
+
 import ase.data
-from matid import SymmetryAnalyzer  # pylint: disable=import-error
 import matid.geometry  # pylint: disable=import-error
+import numpy as np
+from matid import SymmetryAnalyzer  # pylint: disable=import-error
 
 from nomad import atomutils
-from nomad.config import config
-from nomad.utils import traverse_reversed, extract_section
 from nomad.atomutils import Formula
-from nomad.normalizing.normalizer import Normalizer
-from nomad.normalizing.method import MethodNormalizer
-from nomad.normalizing.material import MaterialNormalizer
+from nomad.config import config
 from nomad.datamodel import EntryArchive
-from nomad.datamodel.metainfo.workflow import Workflow
 from nomad.datamodel.data import ArchiveSection
-from nomad.normalizing.common import structures_2d
+from nomad.datamodel.metainfo.workflow import Workflow
 from nomad.datamodel.results import (
     BandGap,
     BandGapDeprecated,
-    RadialDistributionFunction,
-    RadiusOfGyration,
-    MeanSquaredDisplacement,
-    Results,
-    Material,
-    Method,
-    GeometryOptimization,
-    Trajectory,
-    MolecularDynamics,
-    MDProvenance,
-    TemperatureDynamic,
-    VolumeDynamic,
-    PressureDynamic,
-    EnergyDynamic,
-    Properties,
-    StructuralProperties,
-    DynamicalProperties,
-    EnergyVolumeCurve,
-    BulkModulus,
-    ShearModulus,
-    MechanicalProperties,
-    ElectronicProperties,
-    VibrationalProperties,
-    ThermodynamicProperties,
     BandStructureElectronic,
     BandStructurePhonon,
+    BulkModulus,
+    DensityCharge,
     DOSElectronic,
-    DOSNew,
     DOSElectronicNew,
+    DOSNew,
     DOSPhonon,
-    GreensFunctionsElectronic,
+    DynamicalProperties,
+    EELSMethodology,
+    ElectricFieldGradient,
+    ElectronicProperties,
+    EnergyDynamic,
     EnergyFreeHelmholtz,
+    EnergyVolumeCurve,
+    GeometryOptimization,
+    GreensFunctionsElectronic,
     HeatCapacityConstantVolume,
-    SpectroscopicProperties,
-    EELSMethodology,
-    SpectraProvenance,
-    Spectra,
     MagneticProperties,
     MagneticShielding,
     MagneticSusceptibility,
-    ElectricFieldGradient,
+    Material,
+    MDProvenance,
+    MeanSquaredDisplacement,
+    MechanicalProperties,
+    Method,
+    MolecularDynamics,
+    PressureDynamic,
+    Properties,
+    RadialDistributionFunction,
+    RadiusOfGyration,
+    Results,
+    ShearModulus,
+    Spectra,
+    SpectraProvenance,
+    SpectroscopicProperties,
     SpinSpinCoupling,
-    DensityCharge,
+    StructuralProperties,
+    TemperatureDynamic,
+    ThermodynamicProperties,
+    Trajectory,
+    VibrationalProperties,
+    VolumeDynamic,
 )
+from nomad.normalizing.common import structures_2d
+from nomad.normalizing.material import MaterialNormalizer
+from nomad.normalizing.method import MethodNormalizer
+from nomad.normalizing.normalizer import Normalizer
+from nomad.utils import extract_section, traverse_reversed
 
 try:
     import runschema
 
     runschema.run_schema_entry_point.load()
-    import runschema.method
     import runschema.calculation
+    import runschema.method
     import runschema.system
 except Exception as e:
     runschema, simulationworkflowschema = None, None
@@ -288,11 +289,12 @@ class ResultsNormalizer(Normalizer):
             else:
                 self.entry_archive.metadata.entry_name = f'{type_tag}'
 
-    def resolve_band_gap(self) -> list[BandGap]:
+    def resolve_band_gap(
+        self, path: list[str] = ['run', 'calculation', 'band_gap']
+    ) -> list[BandGap]:
         """Extract all band gaps from the given `path` and return them in a list along
         with their provenance.
         """
-        path = ['run', 'calculation', 'band_gap']
         bg_root: list[BandGap] = []
         if band_gaps := traverse_reversed(self.entry_archive, path):
             for bg in band_gaps:
@@ -306,7 +308,9 @@ class ResultsNormalizer(Normalizer):
                 bg_root.insert(0, bg_results)
         return bg_root
 
-    def resolve_band_structure(self) -> list[BandStructureElectronic]:
+    def resolve_band_structure(
+        self, path: list[str] = ['run', 'calculation', 'band_structure_electronic']
+    ) -> list[BandStructureElectronic]:
         """Returns a new section containing an electronic band structure. In
         the case of multiple valid band structures, only the latest one is
         considered.
@@ -315,7 +319,6 @@ class ResultsNormalizer(Normalizer):
             - There is a non-empty array of kpoints.
             - There is a non-empty array of energies.
         """
-        path = ['run', 'calculation', 'band_structure_electronic']
         bs_root: list[BandStructureElectronic] = []
         if band_structures := traverse_reversed(self.entry_archive, path):
             for bs in band_structures:
@@ -346,7 +349,9 @@ class ResultsNormalizer(Normalizer):
                     bs_root.insert(0, bs_results)
         return bs_root
 
-    def resolve_dos_deprecated(self) -> list[DOSElectronic]:
+    def resolve_dos_deprecated(
+        self, path: list[str] = ['run', 'calculation', 'dos_electronic']
+    ) -> list[DOSElectronic]:
         """Returns a reference to the section containing an electronic dos. In
         the case of multiple valid DOSes, only the latest one is reported.
 
@@ -358,7 +363,6 @@ class ResultsNormalizer(Normalizer):
         to an old schema which will be deleted. The new function `resolve_dos` should be the
         one which persists over time.
         """
-        path = ['run', 'calculation', 'dos_electronic']
         dos_sections = extract_section(self.entry_archive, path, full_list=True)
         # The old mapping does not work for the new spin-polarized schema
         if (
@@ -376,7 +380,9 @@ class ResultsNormalizer(Normalizer):
             dos_results.energy_fermi = dos.energy_fermi
         return [dos_results] if dos_results else []
 
-    def resolve_dos(self) -> list[DOSElectronicNew]:
+    def resolve_dos(
+        self, path: list[str] = ['run', 'calculation', 'dos_electronic']
+    ) -> list[DOSElectronicNew]:
         """Returns a section containing the references for an electronic DOS. This section
         is then stored under `archive.results.properties.electronic.dos_electronic_new`.
 
@@ -393,7 +399,6 @@ class ResultsNormalizer(Normalizer):
         Returns:
             List[DOSElectronicNew]: the mapped DOS.
         """
-        path = ['run', 'calculation', 'dos_electronic']
         dos_result = None  # only instantiate `dos_results` if the tests below pass
         if dos_sections := extract_section(self.entry_archive, path, full_list=True):
             for dos_section in dos_sections:
@@ -434,7 +439,7 @@ class ResultsNormalizer(Normalizer):
         return [dos_result] if dos_result else []
 
     def resolve_greens_functions(
-        self, path: list[str]
+        self, path: list[str] = ['run', 'calculation', 'greens_functions']
     ) -> list[GreensFunctionsElectronic]:
         """Returns a section containing the references of the electronic Greens functions.
         This section is then stored under `archive.results.properties.electronic`.
@@ -496,8 +501,9 @@ class ResultsNormalizer(Normalizer):
                 gfs_root.append(gfs_results)
         return gfs_root
 
-    def fetch_charge_density(self) -> list[DensityCharge]:
-        path = ['run', 'calculation', 'density_charge', 'value_hdf5']
+    def fetch_charge_density(
+        self, path: list[str] = ['run', 'calculation', 'density_charge', 'value_hdf5']
+    ) -> list[DensityCharge]:
         return_list: list[DensityCharge] = []
         if runschema and (
             hdf5_wrappers := list(traverse_reversed(self.entry_archive, path))
@@ -508,7 +514,9 @@ class ResultsNormalizer(Normalizer):
                 return_list.append(d)
         return return_list
 
-    def resolve_electric_field_gradient(self) -> list[ElectricFieldGradient]:
+    def resolve_electric_field_gradient(
+        self, path: list[str] = ['run', 'calculation', 'electric_field_gradient']
+    ) -> list[ElectricFieldGradient]:
         """Returns a section containing the references for the Electric Field Gradient.
         This section is then stored under `archive.results.properties.electronic`.
 
@@ -522,7 +530,6 @@ class ResultsNormalizer(Normalizer):
         Returns:
             list[ElectricFieldGradient]: the mapped Electric Field Gradient.
         """
-        path = ['run', 'calculation', 'electric_field_gradient']
         mapped_data: list[ElectricFieldGradient] = []
         if stored_data := traverse_reversed(self.entry_archive, path):
             for data in stored_data:
@@ -671,6 +678,10 @@ class ResultsNormalizer(Normalizer):
             methods (list[str]): the list of methods from which the properties are resolved.
             properties (list[str]): the list of properties to be resolved from `workflow2.results`.
         """
+        properties_map = {
+            'dos': 'dos_electronic_new',
+            'band_structure': 'band_structure_electronic',
+        }
         for method in methods:
             name = (
                 'MaxEnt'
@@ -680,7 +691,9 @@ class ResultsNormalizer(Normalizer):
                 else method.upper()
             )
             for prop in properties:
-                property_list = self.electronic_properties.get(prop)
+                property_list = self.electronic_properties.get(
+                    properties_map.get(prop, prop)
+                )
                 method_property_resolved = getattr(self, f'resolve_{prop}')(
                     ['workflow2', 'results', f'{method}_outputs', prop]
                 )
@@ -1147,9 +1160,7 @@ class ResultsNormalizer(Normalizer):
             'dos_electronic': self.resolve_dos_deprecated(),
             'dos_electronic_new': self.resolve_dos(),
             'band_structure_electronic': self.resolve_band_structure(),
-            'greens_functions_electronic': self.resolve_greens_functions(
-                ['run', 'calculation', 'greens_functions']
-            ),
+            'greens_functions_electronic': self.resolve_greens_functions(),
             'density_charge': self.fetch_charge_density(),
             'electric_field_gradient': self.resolve_electric_field_gradient(),
         }
diff --git a/nomad/normalizing/topology.py b/nomad/normalizing/topology.py
index da0cf2ee5ae501af6ad9a80bc5106bd8ea900b9e..2477f91ad67dbb7dd591002a481001838c511919 100644
--- a/nomad/normalizing/topology.py
+++ b/nomad/normalizing/topology.py
@@ -16,48 +16,45 @@
 # limitations under the License.
 #
 
-from typing import Dict, List, Optional, Union
-from collections import defaultdict
-import pathlib
 import json
-from math import isnan
+import pathlib
+from collections import defaultdict
 
+import numpy as np
 from ase import Atoms
 from ase.data import chemical_symbols
-import numpy as np
-from matid.clustering import SBC, Cluster
-from matid.symmetry.symmetryanalyzer import SymmetryAnalyzer
 from matid.classification.classifications import (
-    Class0D,
     Atom,
+    Class0D,
     Class1D,
     Class2D,
+    Class3D,
     Material2D,
     Surface,
-    Class3D,
 )
+from matid.clustering import SBC, Cluster
+from matid.symmetry.symmetryanalyzer import SymmetryAnalyzer
 
-from nomad import utils
+from nomad import atomutils, utils
 from nomad.config import config
-from nomad import atomutils
+from nomad.datamodel.datamodel import EntryArchive
 from nomad.datamodel.results import (
     CoreHole,
-    SymmetryNew as Symmetry,
     Material,
-    System,
     Relation,
+    System,
     structure_name_map,
 )
-from nomad.datamodel.datamodel import EntryArchive
+from nomad.datamodel.results import SymmetryNew as Symmetry
 from nomad.normalizing.common import (
-    cell_from_ase_atoms,
     ase_atoms_from_nomad_atoms,
+    cell_from_ase_atoms,
+    material_id_1d,
+    material_id_2d,
+    material_id_bulk,
     nomad_atoms_from_ase_atoms,
-    wyckoff_sets_from_matid,
     structures_2d,
-    material_id_bulk,
-    material_id_2d,
-    material_id_1d,
+    wyckoff_sets_from_matid,
 )
 
 conventional_description = 'The conventional cell of the material from which the subsystem is constructed from.'
diff --git a/nomad/parsing/artificial.py b/nomad/parsing/artificial.py
index 6302b72dde31231c95de7924a889b9d38d20423e..ae4f1b74b182c8bda27412689866722071b53fbd 100644
--- a/nomad/parsing/artificial.py
+++ b/nomad/parsing/artificial.py
@@ -21,19 +21,20 @@ Parser for creating artificial test, brenchmark, and demonstration data.
 """
 
 import json
+import os
 import os.path
 import random
-from ase.data import chemical_symbols
-import numpy
+import signal
 import sys
 import time
-import os
-import signal
+
+import numpy
+from ase.data import chemical_symbols
 
 from nomad.datamodel import EntryArchive
 from nomad.datamodel.metainfo import runschema
 
-from .parser import Parser, MatchingParser
+from .parser import MatchingParser, Parser
 
 
 class EmptyParser(MatchingParser):
diff --git a/nomad/parsing/file_parser/__init__.py b/nomad/parsing/file_parser/__init__.py
index e6a541c281a03db0a00a926a97293e57e8e9693c..6c50c195c5b9899c62fdedf5bf025212b8bf83d4 100644
--- a/nomad/parsing/file_parser/__init__.py
+++ b/nomad/parsing/file_parser/__init__.py
@@ -1,6 +1,7 @@
-from .file_parser import FileParser, Parser
+from .file_parser import FileParser, ArchiveWriter
 from .text_parser import TextParser, DataTextParser, Quantity, ParsePattern
 from .xml_parser import XMLParser
 from .tar_parser import TarParser
 
 UnstructuredTextFileParser = TextParser
+Parser = ArchiveWriter
diff --git a/nomad/parsing/file_parser/file_parser.py b/nomad/parsing/file_parser/file_parser.py
index 79b4f9086a8fc43858b11dbd5fe5ee17c26c72af..b9bef402991694ccb5fa5b64d5d25f5eedecbeb4 100644
--- a/nomad/parsing/file_parser/file_parser.py
+++ b/nomad/parsing/file_parser/file_parser.py
@@ -12,20 +12,21 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from abc import ABC, abstractmethod
-import os
-import pint
-from typing import Any, Dict, IO, Union, List
-from collections.abc import Callable
-import gzip
 import bz2
+import gzip
 import lzma
+import os
 import tarfile
+from abc import ABC, abstractmethod
+from collections.abc import Callable
 from contextlib import contextmanager
+from typing import IO, Any
 
+import pint
+
+from nomad.datamodel import EntryArchive
 from nomad.metainfo import MSection, SubSection
 from nomad.utils import get_logger
-from nomad.datamodel import EntryArchive
 
 
 class FileParser(ABC):
@@ -221,12 +222,20 @@ class FileParser(ABC):
     def parse(self, quantity_key: str = None, **kwargs):
         pass
 
+    def pop(self, key, default=None):
+        return self._results.pop(key, default)
+
     def __getitem__(self, key):
         if isinstance(key, str):
             return self.get(key)
         elif isinstance(key, int):
             return self[int]
 
+    def __setitem__(self, key, val):
+        if self._results is None:
+            self._results = {}
+        self._results[key] = val
+
     def __getattr__(self, key):
         if self._results is None:
             self._results = {}
@@ -258,11 +267,11 @@ class FileParser(ABC):
                 pass
 
 
-class Parser(ABC):
+class ArchiveWriter(ABC):
     mainfile: str = None
     archive: EntryArchive = None
     logger = None
-    child_archives = None
+    child_archives: dict[str, EntryArchive] = None
 
     def get_mainfile_keys(self, filename: str, decoded_buffer: str) -> bool | list[str]:
         """
@@ -307,11 +316,11 @@ class Parser(ABC):
 
         self.archive.m_update_from_dict(self.to_dict())
 
-    def parse(
+    def write(
         self, mainfile: str, archive: EntryArchive, logger=None, child_archives=None
     ) -> None:
         """
-        Main interface to the nomad parsing infrastructure.
+        Wrapper to write_to_archive method.
         """
         self.mainfile = mainfile
         self.archive = archive
@@ -319,3 +328,11 @@ class Parser(ABC):
         self.child_archives = child_archives
 
         self.write_to_archive()
+
+    def parse(
+        self, mainfile: str, archive: EntryArchive, logger=None, child_archives=None
+    ) -> None:
+        """
+        Wraps write method for backwards compatibility.
+        """
+        self.write(mainfile, archive, logger, child_archives)
diff --git a/nomad/parsing/file_parser/mapping_parser.py b/nomad/parsing/file_parser/mapping_parser.py
new file mode 100644
index 0000000000000000000000000000000000000000..5d0be33bcab33d614518f8d1528a47c37a635750
--- /dev/null
+++ b/nomad/parsing/file_parser/mapping_parser.py
@@ -0,0 +1,1498 @@
+import json
+import os
+import re
+from abc import ABC, abstractmethod
+from io import BytesIO
+from typing import Any, Optional
+
+import h5py
+import jmespath
+import jmespath.visitor
+import numpy as np
+from jsonpath_ng.parser import JsonPathParser
+from lxml import etree
+from pydantic import BaseModel, Field, model_validator
+
+from nomad.datamodel import EntryArchive
+from nomad.datamodel.metainfo.annotations import Mapper as MapperAnnotation
+from nomad.metainfo import MSection, SubSection
+from nomad.parsing.file_parser import TextParser as TextFileParser
+from nomad.parsing.parser import ArchiveParser
+from nomad.units import ureg
+from nomad.utils import get_logger
+
+MAPPING_ANNOTATION_KEY = 'mapping'
+
+
+class JmespathOptions(jmespath.visitor.Options):
+    def __init__(self, **kwargs):
+        self.pop = False
+        self.search = True
+
+        for key in list(kwargs.keys()):
+            if not hasattr(super(), key):
+                setattr(self, key, kwargs[key])
+                del kwargs[key]
+        super().__init__(**kwargs)
+
+
+LOGGER = get_logger(__name__)
+
+
+class TreeInterpreter(jmespath.visitor.TreeInterpreter):
+    def __init__(self, options=None):
+        self.stack = []
+        self._current_node = None
+        self.current_stack = None
+        self._parent = None
+        self.nodes = []
+        self.indices = []
+        self.keys = []
+        self._cache = []
+        self._parent_key = '__parent'
+        super().__init__(options)
+
+    def visit(self, node, *args, **kwargs):
+        node_type = node.get('type')
+        for child in node.get('children'):
+            if hasattr(child, 'get'):
+                child[self._parent_key] = node_type
+
+        value = super().visit(node, *args, **kwargs)
+        node.pop(self._parent_key, None)
+        return value
+
+    def visit_field(self, node, value):
+        parent = node.get(self._parent_key, None)
+        if isinstance(value, list):
+            if not value and not self._options.search:
+                value.append({})
+            if not value:
+                return None
+            value = value[-1]
+        if not hasattr(value, 'get'):
+            return None
+
+        if not self._options.search:
+            if parent == 'index_expression' and not isinstance(
+                value.get(node['value']), list
+            ):
+                value[node['value']] = []
+
+            value.setdefault(node['value'], [] if parent == 'index_expression' else {})
+
+        if self.stack and not self.indices[-1]:
+            parent_stack = self.stack[-1].get(self.keys[-1], {})
+            if value == parent_stack or (
+                isinstance(parent_stack, list) and value in parent_stack
+            ):
+                self.indices[-1] = [0]
+
+        if parent != 'comparator':
+            self.indices.append([])
+            self.stack.append(value)
+            self.keys.append(node['value'])
+
+        try:
+            return value.get(node['value'])
+        except AttributeError:
+            return None
+
+    def visit_index_expression(self, node, value):
+        value = super().visit_index_expression(node, value)
+        if node.get(self._parent_key) == 'pipe' and self.indices:
+            self.indices[-1] = []
+        return value
+
+    def visit_index(self, node, value):
+        if not isinstance(value, list):
+            return None
+
+        index = node['value']
+        n_value = len(value)
+        if self._options.search and index >= n_value:
+            return None
+
+        n_target = abs(index) - n_value + (0 if index < 0 else 1)
+        value.extend([{} for _ in range(n_target)])
+
+        if self.indices:
+            self.indices[-1] = [index]
+        return value[index]
+
+    def visit_slice(self, node, value):
+        if not isinstance(value, list):
+            return None
+
+        s = slice(*node['children'])
+        n_value = len(value)
+        indices = list(range(s.start or 0, s.stop or n_value or 1, s.step or 1))
+        if indices:
+            max_index = max(np.abs(indices))
+            min_index = min(indices)
+            n_target = (
+                max_index
+                - n_value
+                + (0 if min_index < 0 and max_index == -min_index else 1)
+            )
+
+            if max_index >= n_value and self._options.search:
+                return None
+
+            value.extend([{} for _ in range(n_target)])
+        # if isinstance(value, h5py.Group):
+        #     return [g for g in value.values()][s]
+        self.indices[-1] = indices
+        return value[s]
+
+
+class ParsedResult(jmespath.parser.ParsedResult):
+    def _set_value(self, value, options, data):
+        self._interpreter = TreeInterpreter(options=options)
+        result = self._interpreter.visit(self.parsed, value)
+
+        values = []
+        if not options.pop and data is None:
+            return result, values
+
+        stack, stack_indices, stack_keys = [], [], []
+        for n, s in enumerate(self._interpreter.stack):
+            add = s == self._interpreter.stack[-1]
+            if not add:
+                val = s[self._interpreter.keys[n]]
+                add = val and not hasattr(
+                    val[0] if isinstance(val, list) else val, 'get'
+                )
+            if add:
+                stack.append(s)
+                stack_indices.append(self._interpreter.indices[n])
+                stack_keys.append(self._interpreter.keys[n])
+
+        for n, indices in enumerate(stack_indices):
+            d = (
+                data[n]
+                if isinstance(data, list)
+                and len(data) > 1
+                and len(data) == len(stack_indices)
+                else data
+            )
+            if not indices:
+                stack[n][stack_keys[n]] = d
+                v = (
+                    stack[n][stack_keys[n]]
+                    if not options.pop
+                    else stack[n].pop(stack_keys[n])
+                )
+                values.append(v)
+                continue
+            map_data = isinstance(d, list) and len(d) == len(indices)
+            for nd in range(len(indices) - 1, -1, -1):
+                index = indices[nd]
+                stack[n][stack_keys[n]][index] = d[nd] if map_data else d
+                v = (
+                    stack[n][stack_keys[n]][index]
+                    if not options.pop
+                    else stack[n][stack_keys[n]].pop(index)
+                )
+                values.append(v)
+
+        return result, values[0] if len(values) == 1 else values
+
+    def search(self, value, **kwargs):
+        options = JmespathOptions(search=True, **kwargs)
+        return self._set_value(value, options, None)[0]
+
+    def set(self, value, data, **kwargs):
+        options = JmespathOptions(search=False, **kwargs)
+        return self._set_value(value, options, data)[1]
+
+
+class JmespathParser(jmespath.parser.Parser):
+    """
+    JmespathParser extension implementing search with pop and set functionalities.
+    """
+
+    def parse(self, expression):
+        parsed_result = super().parse(expression)
+        return ParsedResult(parsed_result.expression, parsed_result.parsed)
+
+
+class PathParser(BaseModel):
+    parser_name: str = Field(
+        'jmespath', description="""Name of the parser to perform parsing."""
+    )
+
+    def get_data(self, path, source, **kwargs) -> Any:
+        if self.parser_name == 'jmespath':
+
+            def _get(path, source, **kwargs):
+                return JmespathParser().parse(path).search(source, **kwargs)
+
+            return _get(path, source, **kwargs)
+        elif self.parser_name == 'jsonpath_ng':
+
+            def _get(path, source, **kwargs):
+                parser = JsonPathParser().parse(path)
+                results = [match.value for match in parser.find(source)]
+                if kwargs.get('pop'):
+                    # TODO is find and filter somehow can be performed simulatenously
+                    parser.filter(lambda v: True, source)
+                return results[0] if len(results) == 1 else results
+
+            return _get(path, source, **kwargs)
+
+        return None
+
+    def set_data(self, path, target, data, **kwargs) -> Any:
+        if self.parser_name == 'jmespath':
+
+            def _set(path, target, data, **kwargs):
+                return JmespathParser().parse(path).set(target, data, **kwargs)
+
+            return _set(path, target, data, **kwargs)
+
+        elif self.parser_name == 'jsonpath_ng':
+
+            def _set(path, target, data, **kwargs):
+                return JsonPathParser().parse(path).update(target, data)
+
+            return _set(path, target, data)
+
+        return None
+
+
+class Path(BaseModel, validate_assignment=True):
+    """
+    Wrapper for jmespath parser to get/set data from/to an input dictionary.
+    """
+
+    path: str = Field('', description="""User-defined path to the data.""")
+    parent: Optional['Path'] = Field(None, description="""Parent path.""")
+    relative_path: str = Field('', description="""Relative path to the data.""")
+    absolute_path: str = Field('', description="""Absolute path to the data.""")
+    reduced_path: str = Field('', description="""Reduced absolute path.""")
+    parser: PathParser = Field(
+        PathParser(), description="""The parser to use to search and set data."""
+    )
+
+    @model_validator(mode='before')
+    def get_relative_path(cls, values: dict[str, Any]) -> dict[str, Any]:
+        relative_path = values.get('path', '')
+        parent = values.get('parent')
+        match = re.match(r'^\.(.+)|(.+\()\.(.+)', relative_path)
+        if match:
+            relative_path = ''.join([g for g in match.groups() if g])
+        values['relative_path'] = relative_path
+
+        absolute_path = relative_path
+        if parent:
+            segments = [parent.absolute_path, absolute_path]
+            absolute_path = '.'.join([s for s in segments if s != '@' and s])
+        values['absolute_path'] = absolute_path
+
+        values['reduced_path'] = re.sub(r'\[.+?\]|\|', '', absolute_path)
+
+        return values
+
+    def is_relative_path(self):
+        return self.relative_path != self.path or self.parent is not None
+
+    def get_data(self, source: dict[str, Any], **kwargs) -> Any:
+        try:
+            return self.parser.get_data(self.relative_path, source, **kwargs)
+        except Exception:
+            return kwargs.get('default')
+
+    def set_data(self, data: Any, target: dict[str, Any], **kwargs) -> Any:
+        cur_data = self.get_data(target, **kwargs)
+        update_mode = kwargs.get('update_mode')
+        path = self.relative_path
+
+        def update(source: Any, target: Any):
+            if not isinstance(source, type(target)):
+                return (
+                    target if update_mode == 'append' and target is not None else source
+                )
+
+            if isinstance(source, dict):
+                if update_mode != 'replace':
+                    for key in list(source.keys()):
+                        target[f'.{key}'] = update(
+                            source.get(key), target.get(f'.{key}')
+                        )
+                return target
+
+            if isinstance(source, list):
+                merge = re.match(r'merge(?:@(.+))*', update_mode or '')
+                if merge:
+                    merge_at = merge.groups()[0]
+                    if not merge_at or merge_at == 'start':
+                        start = 0
+                    elif merge_at == 'last':
+                        start = len(source) - len(target)
+                    else:
+                        start = int(merge_at)
+                    if start < 0:
+                        start += len(source)
+                    for n, d in enumerate(source):
+                        if n >= start and n < start + len(target):
+                            update(d, target[n - start])
+                        else:
+                            target.insert(n, d)
+                elif update_mode == 'append':
+                    for n, d in enumerate(source):
+                        target.insert(n, update(d, {}))
+                return target
+
+            return target if update_mode == 'append' and target is not None else source
+
+        res = self.parser.set_data(path, target, data, **kwargs)
+
+        update(cur_data, res)
+
+        return res
+
+
+Path.model_rebuild()
+
+
+class Data(BaseModel, validate_assignment=True):
+    """
+    Wrapper for the path to the data or a transformer to extract the data.
+    """
+
+    path: Path = Field(None, description="""Path to the data.""")
+    transformer: 'Transformer' = Field(
+        None, description="""Transformer to extract data."""
+    )
+    parent: Path = Field(None, description="""Parent path.""")
+    path_parser: PathParser = Field(
+        None, description="""Parser used to search and set data."""
+    )
+
+    @model_validator(mode='before')
+    def set_attributes(cls, values: dict[str, Any]) -> dict[str, Any]:
+        if values.get('path') is None and values.get('transformer'):
+            transformer = values['transformer']
+            if len(transformer.function_args) == 1:
+                values['path'] = transformer.function_args[0]
+            else:
+                values['path'] = Path(path='@')
+
+        if values.get('parent'):
+            if values.get('transformer'):
+                for arg in values['transformer'].function_args:
+                    if arg.is_relative_path():
+                        arg.parent = values['parent']
+            if values.get('path') and values['path'].is_relative_path():
+                values['path'].parent = values['parent']
+
+        if values.get('path_parser'):
+            if values.get('path'):
+                values['path'].parser = values['path_parser']
+            if values.get('transformer'):
+                for arg in values['transformer'].function_args:
+                    arg.parser = values['path_parser']
+
+        return values
+
+    def get_data(
+        self, source_data: dict[str, Any], parser: 'MappingParser' = None, **kwargs
+    ) -> Any:
+        if self.transformer:
+            value = self.transformer.get_data(source_data, parser, **kwargs)
+            return self.transformer.normalize_data(value)
+        elif self.path:
+            return self.path.get_data(
+                source_data if self.path.is_relative_path() else parser.data, **kwargs
+            )
+
+
+class BaseMapper(BaseModel):
+    """
+    Base class for a mapper.
+    """
+
+    source: 'Data' = Field(None, description="""Source data.""")
+    target: 'Data' = Field(None, description="""Target data.""")
+    indices: list[int] | str | None = Field(
+        None, description="""List of indices of data to include."""
+    )
+    order: int = Field(None, description="""Execution order.""")
+    remove: bool | None = Field(None, description="""Remove data from source.""")
+    cache: bool | None = Field(None, description="""Store the result of the mapper.""")
+    all_paths: list[str] = Field(
+        [], description="""List of all unindexed abs. paths."""
+    )
+
+    def get_data(self, source_data: Any, parser: 'MappingParser', **kwargs) -> Any:
+        return None
+
+    def normalize_data(self, data: Any) -> Any:
+        return data
+
+    @staticmethod
+    def from_dict(dct: dict[str, Any], parent: 'BaseMapper' = None) -> 'BaseMapper':
+        """
+        Convert dictionary to a BaseMapper object. Dictionary may contain the following
+            source: str or Path or tuple or Transformer to extract source data
+            target: str or Path object of target data
+            mapper:
+                str or Path object returns Transfomer with identity function
+                Tuple[str, List[str]] returns Transformer
+                List[Dict] returns Mapper
+            path: str or Path object returns Map object
+            function_name: str name of transformation function
+            function_args: List[str] of paths of data as arguments to function
+            indices: str or List of indices of data to include
+                str is function name to evaluate indices
+            remove: Remove data from source
+        """
+        paths: dict[str, Data] = {}
+        path_parser = dct.get('path_parser')
+
+        for ptype in ['source', 'target']:
+            path = dct.get(ptype)
+            if isinstance(path, str):
+                path_obj = Data(path=Path(path=path))
+            elif isinstance(path, tuple):
+                args = [Path(path=p) for p in path[1]]
+                path_obj = Data(
+                    transformer=Transformer(function_name=path[0], function_args=args)
+                )
+                if len(path) == 3:
+                    path_obj.transformer.function_kwargs = path[2]
+                path_obj.transformer.cache = dct.get('cache')
+            elif isinstance(path, Data):
+                path_obj = path
+            else:
+                path_obj = None
+
+            if path_obj:
+                parent_path = getattr(parent, ptype, None)
+                if parent_path is not None:
+                    path_obj.parent = parent_path.path
+                if path_parser:
+                    path_obj.path_parser = PathParser(parser_name=path_parser)
+                paths[ptype] = path_obj
+
+        mapper = (
+            dct.get('mapper')
+            or dct.get('path')
+            or (dct.get('function_name'), dct.get('function_args'))
+        )
+        obj: BaseMapper = BaseMapper()
+        if isinstance(mapper, tuple) and None in mapper:
+            return obj
+
+        def add_path_attrs(path: Path):
+            if path.is_relative_path():
+                source_path = paths.get('source', parent.source if parent else None)
+                if source_path:
+                    path.parent = source_path.path
+            if path_parser:
+                path.parser = PathParser(parser_name=path_parser)
+
+        if isinstance(mapper, str | Path):
+            path = Path(path=mapper) if isinstance(mapper, str) else mapper
+            obj = Transformer()
+            add_path_attrs(path)
+            obj.function_args.append(path)
+
+        elif (
+            isinstance(mapper, tuple | list)
+            and len(mapper) in [2, 3]
+            and isinstance(mapper[0], str)
+            and isinstance(mapper[1], list)
+        ):
+            function_args = []
+            for v in mapper[1]:
+                arg = v
+                if isinstance(v, str):
+                    arg = Path(path=v)
+                add_path_attrs(arg)
+                function_args.append(arg)
+            obj = Transformer(function_name=mapper[0], function_args=function_args)
+            if len(mapper) == 3:
+                obj.function_kwargs = mapper[2]
+
+        elif isinstance(mapper, list) and isinstance(mapper[0], dict):
+            obj = Mapper()
+        else:
+            LOGGER.error('Unknown mapper type.')
+
+        for key in ['indices', 'remove', 'cache']:
+            if dct.get(key) is not None:
+                setattr(obj, key, dct.get(key))
+        if paths.get('source'):
+            obj.source = paths.get('source')
+        if paths.get('target'):
+            obj.target = paths.get('target')
+
+        if isinstance(obj, Mapper):
+            mappers = []
+            for v in mapper:
+                m = BaseMapper.from_dict(v, obj)
+                mappers.append(m)
+            obj.mappers = mappers
+
+        return obj
+
+    def get_required_paths(self) -> list[str]:
+        def get_path_segments(parsed: dict[str, Any]) -> list[str]:
+            segments: list[str] = []
+            value = parsed.get('value')
+            ptype = parsed.get('type')
+
+            if ptype == 'comparator':
+                return segments
+
+            if value and ptype == 'field':
+                segments.append(value)
+
+            for children in parsed.get('children', []):
+                if not isinstance(children, dict):
+                    continue
+                segments.extend(get_path_segments(children))
+
+            return segments
+
+        def filter_path(path: str) -> list[str]:
+            parsed = JmespathParser().parse(path).parsed
+            segments = get_path_segments(parsed)
+            return ['.'.join(segments[:n]) for n in range(1, len(segments) + 1)]
+
+        def get_paths(mapper: BaseMapper) -> list[str]:
+            paths = []
+            if mapper.source and mapper.source.transformer:
+                for path in mapper.source.transformer.function_args:
+                    paths.extend(filter_path(path.absolute_path))
+
+            if isinstance(mapper, Mapper):
+                for sub_mapper in mapper.mappers:
+                    paths.extend(get_paths(sub_mapper))
+
+            elif isinstance(mapper, Transformer):
+                for path in mapper.function_args:
+                    paths.extend(filter_path(path.absolute_path))
+
+            return paths
+
+        return list(set(get_paths(self)))
+
+
+class Transformer(BaseMapper):
+    """
+    Mapper to perform a transformation of the data.
+
+    A static method with function_name should be implemented in the parser class.
+
+        class Parser(MappingParser):
+            @staticmethod
+            def get_eigenvalues_energies(array: np.ndarray, n_spin: int, n_kpoints: int):
+                array = np.transpose(array)[0].T
+                return np.reshape(array, (n_spin, n_kpoints, len(array[0])))
+
+    If function is not defined, identity transformation is applied.
+    """
+
+    function_name: str = Field(
+        '', description="""Name of the function defined in the parser."""
+    )
+    function_args: list[Path] = Field(
+        [], description="""Paths to the data as arguments to the function."""
+    )
+    function_kwargs: dict[str, Any] = Field(
+        {}, description="""Keyword args to pass to function."""
+    )
+    order: int = 1
+
+    def get_data(
+        self, source_data: dict[str, Any], parser: 'MappingParser', **kwargs
+    ) -> Any:
+        remove: bool = kwargs.get('remove', self.remove)
+        func = (
+            getattr(parser, self.function_name, None)
+            if self.function_name
+            else lambda x: x
+        )
+        args = [
+            m.get_data(
+                source_data if m.is_relative_path() else parser.data,
+                pop=remove and self.all_paths.count(m.reduced_path) <= 1,
+            )
+            for m in self.function_args
+        ]
+        try:
+            return (
+                func(*args)
+                if not self.function_kwargs
+                else func(*args, **self.function_kwargs)
+            )
+        except Exception:
+            # if self.function_name == 'get_positions':
+            #     raise
+            return None
+
+
+Data.model_rebuild()
+
+
+class Mapper(BaseMapper, validate_assignment=True):
+    """
+    Mapper for nested mappers.
+    """
+
+    mappers: list[BaseMapper] = Field([], description="""List of sub mappers.""")
+    order: int = 0
+    __cache: dict[str, Any] = {}
+
+    @model_validator(mode='before')
+    def set_attributes(cls, values: dict[str, Any]) -> dict[str, Any]:
+        def get_paths(mapper: BaseMapper) -> list[str]:
+            paths = []
+            if isinstance(mapper, Transformer):
+                paths.extend([p.reduced_path for p in mapper.function_args])
+            elif isinstance(mapper, Mapper):
+                for m in mapper.mappers:
+                    paths.extend(get_paths(m))
+            return paths
+
+        def set_paths(mapper: BaseMapper, paths: list[str]):
+            mapper.all_paths = paths
+            if isinstance(mapper, Mapper):
+                for m in mapper.mappers:
+                    set_paths(m, paths)
+
+        def set_remove(mapper: BaseMapper, remove: bool):
+            mapper.remove = remove
+            if isinstance(mapper, Mapper):
+                for m in mapper.mappers:
+                    set_remove(m, remove)
+
+        paths = []
+        for mapper in values.get('mappers', []):
+            paths.extend(get_paths(mapper))
+
+        # propagate all properties to all mappers
+        for mapper in values.get('mappers', []):
+            if not values.get('all_paths'):
+                set_paths(mapper, paths)
+            set_remove(mapper, values.get('remove'))
+
+        if not values.get('all_paths'):
+            values['all_paths'] = paths
+
+        return values
+
+    def get_data(
+        self, source_data: dict[str, Any], parser: 'MappingParser', **kwargs
+    ) -> Any:
+        dct = {}
+        for mapper in self.mappers:
+            data = source_data
+            if mapper.source:
+                data = None
+                if mapper.source.transformer and mapper.source.transformer.cache:
+                    data = self.__cache.get(mapper.source.transformer.function_name)
+                if data is None:
+                    data = mapper.source.get_data(source_data, parser, **kwargs)
+                    if mapper.source.transformer and mapper.source.transformer.cache:
+                        self.__cache.setdefault(
+                            mapper.source.transformer.function_name, data
+                        )
+
+            def is_not_value(value: Any) -> bool:
+                if isinstance(value, np.ndarray):
+                    return value.size == 0
+                if hasattr(value, 'magnitude'):
+                    return is_not_value(value.magnitude)
+
+                not_value: Any
+                for not_value in [None, [], {}]:
+                    test = value == not_value
+                    result = test.any() if isinstance(test, np.ndarray) else test
+                    if result:
+                        return bool(result)
+
+                return False
+
+            indices = mapper.indices
+            if isinstance(indices, str):
+                indices = getattr(parser, indices, [])
+                if callable(indices):
+                    indices = indices()
+
+            value: list[Any] = []
+            if isinstance(mapper, Transformer) and mapper.cache:
+                value = self.__cache.get(mapper.function_name, value)
+
+            if not value:
+                for n, d in enumerate(data if isinstance(data, list) else [data]):
+                    v = mapper.get_data(d, parser, **kwargs)
+                    if indices and n not in indices:
+                        continue
+                    if not is_not_value(v):
+                        value.append(v)
+                if value and mapper.cache and isinstance(mapper, Transformer):
+                    self.__cache.setdefault(mapper.function_name, value)
+            if value:
+                normalized_value = [mapper.normalize_data(v) for v in value]
+                dct[mapper.target.path.path] = (
+                    normalized_value[0] if mapper.indices is None else normalized_value
+                )
+        return dct
+
+    def sort(self, recursive=True):
+        self.mappers.sort(key=lambda m: m.order)
+        if recursive:
+            for mapper in self.mappers:
+                if isinstance(mapper, Mapper):
+                    mapper.sort()
+
+
+Mapper.model_rebuild()
+
+
+class MappingParser(ABC):
+    """
+    A generic parser class to convert the contents of a file specified by filepath to a
+    dictionary. The data object is the abstract interface to the data which can defined
+    by implementing the load_file method.
+
+    If attributes are parsed, the data is wrapped in a dictionary with the attribute keys
+    prefixed by attribute_prefix while the value can be accesed by value_key.
+
+    data = {
+      'a' : {
+        'b': [
+          {'@name': 'item1', '__value': 'name'},
+          {'@name': 'item2', '__value': 'name2'}
+        ]
+      }
+    }
+    a.b[?"@name"==\'item2\'].__value
+    >> name2
+
+    A mapping parser can be converted to another mapping parser using the convert method
+    by providing a mapper object.
+
+    Attributes:
+        parse_only_required
+            Parse only data required by target parser.
+        attribute_prefix
+            Added to start of key to denote it is a data attribute.
+        value_key
+            Key to the value of the data.
+    """
+
+    parse_only_required: bool = False
+    attribute_prefix: str = '@'
+    value_key: str = '__value'
+    logger = get_logger(__name__)
+
+    def __init__(self, **kwargs):
+        for key, val in kwargs.items():
+            if hasattr(self, key):
+                setattr(self, key, val)
+        self._mapper: BaseMapper = kwargs.get('mapper')
+        self._filepath: str = kwargs.get('filepath')
+        self._data: dict[str, Any] = kwargs.get('data', {})
+        self._data_object: Any = kwargs.get('data_object')
+        self._required_paths: list[str] = kwargs.get('required_paths', [])
+
+    @abstractmethod
+    def load_file(self) -> Any:
+        return {}
+
+    @abstractmethod
+    def to_dict(self, **kwargs) -> dict[str, Any]:
+        return {}
+
+    @abstractmethod
+    def from_dict(self, dct: dict[str, Any]):
+        pass
+
+    def build_mapper(self) -> BaseMapper:
+        return Mapper()
+
+    @property
+    def filepath(self) -> str:
+        return self._filepath
+
+    @filepath.setter
+    def filepath(self, value: str):
+        self._filepath = value
+        self._data_object = None
+        self._data = None
+
+    @property
+    def data(self):
+        if not self._data:
+            self._data = self.to_dict()
+        return self._data
+
+    @property
+    def data_object(self):
+        if self._data_object is None:
+            self._data_object = self.load_file()
+        return self._data_object
+
+    @data_object.setter
+    def data_object(self, value: Any):
+        self._data_object = value
+        self._data = None
+        self._filepath = None
+
+    @property
+    def mapper(self) -> BaseMapper:
+        if self._mapper is None:
+            self._mapper = self.build_mapper()
+        return self._mapper
+
+    @mapper.setter
+    def mapper(self, value: BaseMapper):
+        self._mapper = value
+
+    def set_data(self, data: Any, target: dict[str, Any], **kwargs) -> None:
+        if isinstance(data, dict):
+            for key in list(data.keys()):
+                path = Path(path=key)
+                new_data = path.set_data(
+                    data.pop(key) if kwargs.get('remove') else data[key],
+                    data if path.is_relative_path() else target,
+                    update_mode=kwargs.get('update_mode', 'merge'),
+                )
+                self.set_data(new_data, target, remove=True)
+
+        elif isinstance(data, list):
+            for val in data:
+                self.set_data(val, target, **kwargs)
+
+    def get_data(
+        self,
+        mapper: BaseMapper,
+        source_data: dict[str, Any],
+    ) -> Any:
+        return mapper.get_data(source_data, self)
+
+    def convert(
+        self,
+        target: 'MappingParser',
+        mapper: 'BaseMapper' = None,
+        update_mode: str = 'merge',
+        remove: bool = False,
+    ):
+        if mapper is None:
+            mapper = target.mapper
+        if self.parse_only_required and mapper and not self._required_paths:
+            self._required_paths = mapper.get_required_paths()
+        source_data = self.data
+        if mapper.source:
+            source_data = mapper.source.get_data(self.data, self)
+        result = mapper.get_data(source_data, self, remove=remove)
+        target.set_data(result, target.data, update_mode=update_mode)
+        target.from_dict(target.data)
+
+    def close(self):
+        if hasattr(self._data_object, 'close'):
+            self._data_object.close()
+        self._data_object = None
+        self._data = {}
+
+    def __enter__(self):
+        return self
+
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        self.close()
+
+    def __repr__(self) -> str:
+        string = f'{self.__class__.__name__}'
+        if self.filepath:
+            string += f'({os.path.basename(self.filepath)})'
+        if self._data_object:
+            string += f': {type(self._data_object).__name__}'
+        if self._data:
+            keys = list(self._data.keys())
+            keys = keys[: min(len(keys), 5)]
+            string += f' -> data.keys: {", ".join([key for key in keys])}'
+            if len(self._data.keys()) > 5:
+                string += '...'
+        return string
+
+
+class MetainfoBaseMapper(BaseMapper):
+    @staticmethod
+    def from_dict(dct: dict[str, Any], parent: BaseMapper = None) -> 'BaseMapper':
+        parent = BaseMapper.from_dict(dct) if parent is None else parent
+
+        if isinstance(parent, Transformer):
+            transformer = MetainfoTransformer()
+            for key in parent.model_fields.keys():
+                val = getattr(parent, key)
+                if val is not None:
+                    setattr(transformer, key, val)
+            for key in ['unit', 'search']:
+                if dct.get(key):
+                    setattr(transformer, key, dct.get(key))
+            return transformer
+        elif isinstance(parent, Mapper):
+            mdct = dct.get('mapper')
+            mapper = MetainfoMapper()
+            for key in parent.model_fields.keys():
+                val = getattr(parent, key)
+                if val is not None:
+                    setattr(mapper, key, val)
+            if dct.get('m_def'):
+                mapper.m_def = dct.get('m_def')
+            for n, obj in enumerate(parent.mappers):
+                parent.mappers[n] = MetainfoBaseMapper.from_dict(mdct[n], obj)
+            mapper.mappers = parent.mappers
+            return mapper
+        return parent
+
+
+class MetainfoMapper(MetainfoBaseMapper, Mapper):
+    m_def: str = Field(None, description="""Section definition.""")
+
+    def get_data(
+        self, source_data: dict[str, Any], parser: MappingParser, **kwargs
+    ) -> Any:
+        dct = super().get_data(source_data, parser, **kwargs)
+        if self.m_def:
+            dct['.m_def'] = self.m_def
+        return dct
+
+
+class MetainfoTransformer(MetainfoBaseMapper, Transformer):
+    unit: str = Field(None, description="""Pint unit to be applied to value.""")
+    search: str = Field(None, description="""Path to search value.""")
+
+    def normalize_data(self, value: Any):
+        if self.search:
+            path = Path(path=self.search)
+            value = path.get_data(value)
+        if self.unit is not None and value is not None and not hasattr(value, 'units'):
+            value = value * ureg(self.unit)
+        return value
+
+
+class MetainfoParser(MappingParser):
+    """
+    A parser for metainfo sections.
+    """
+
+    def __init__(self, **kwargs):
+        self._annotation_key: str = kwargs.get('annotation_key', 'mapping')
+        self.max_nested_level: int = 1
+        super().__init__(**kwargs)
+
+    @property
+    def annotation_key(self) -> str:
+        return self._annotation_key
+
+    @annotation_key.setter
+    def annotation_key(self, value):
+        self._annotation_key = value
+        self._mapper = None
+
+    def load_file(self) -> MSection:
+        if self._data_object is not None:
+            with open(self.filepath) as f:
+                return self._data_object.m_from_dict(json.load(f))
+        elif self.filepath:
+            try:
+                archive = EntryArchive()
+                ArchiveParser().parse(self.filepath, archive)
+                return archive
+            except Exception:
+                self.logger.errror('Error loading archive file.')
+        return None
+
+    def to_dict(self, **kwargs) -> dict[str, Any]:
+        if self.data_object is not None:
+            return self.data_object.m_to_dict()
+        return {}
+
+    def from_dict(self, dct: dict[str, Any], root: MSection = None) -> None:
+        # if self.data_object is not None:
+        #     self.data_object = self.data_object.m_from_dict(dct)
+        # return
+
+        # TODO this is a temporary fix for nomad_simulations  PhysicalProperty
+        # error with m_from_dict
+        if self.data_object is None:
+            return
+
+        if root is None:
+            root = self.data_object
+
+        for key, val in dct.items():
+            if not hasattr(root, key):
+                continue
+
+            section = getattr(root.m_def.section_cls, key)
+            if isinstance(section, SubSection):
+                val_list = [val] if isinstance(val, dict) else val
+                m_def = val_list[-1].get('m_def')
+                section_def = section.sub_section
+                if m_def is not None and m_def != section.qualified_name():
+                    for isection in section.sub_section.all_inheriting_sections:
+                        if isection.qualified_name() == m_def:
+                            section_def = isection
+                            break
+
+                for n, val_n in enumerate(val_list):
+                    quantities = section_def.all_quantities
+                    try:
+                        sub_section = root.m_get_sub_section(section, n)
+                    except Exception:
+                        sub_section = None
+                    if sub_section is None:
+                        sub_section = section_def.section_cls(
+                            **{
+                                n: val_n.get(n)
+                                for n, q in quantities.items()
+                                if not q.derived and n in val_n and n != 'value'
+                            }
+                        )
+                        root.m_add_sub_section(section, sub_section)
+                    self.from_dict(val_n, sub_section)
+                    value = val_n.get('value')
+                    if value is not None:
+                        sub_section.value = value
+                continue
+
+            if key == 'm_def' or key == 'value':
+                continue
+
+            try:
+                root.m_set(root.m_get_quantity_definition(key), val)
+            except Exception:
+                pass
+
+    def build_mapper(self, max_level: int = None) -> BaseMapper:
+        """
+        Builds a mapper for source data from the another parser with path or operator
+        specified in metainfo annotation with key annotation_key. The target path is
+        given by the sub section key.
+        """
+
+        def fill_mapper(
+            mapper: dict[str, Any],
+            annotation: MapperAnnotation,
+            attributes: list[str],
+        ) -> None:
+            for key in attributes:
+                value = getattr(annotation, key, None)
+                if value is not None:
+                    mapper.setdefault(key, value)
+
+        def build_section_mapper(
+            section: SubSection | MSection, level: int = 0
+        ) -> dict[str, Any]:
+            mapper: dict[str, Any] = {}
+            if level >= (max_level or self.max_nested_level):
+                return mapper
+
+            section_def = (
+                section.sub_section
+                if isinstance(section, SubSection)
+                else section.m_def
+            )
+
+            if not section_def:
+                return mapper
+
+            # try to get annotation from sub-section
+            annotation: MapperAnnotation = (
+                (section if isinstance(section, SubSection) else section_def)
+                .m_get_annotations(MAPPING_ANNOTATION_KEY, {})
+                .get(self.annotation_key)
+            )
+
+            if not annotation:
+                # get it from def
+                annotation = section_def.m_get_annotations(
+                    MAPPING_ANNOTATION_KEY, {}
+                ).get(self.annotation_key)
+
+            if isinstance(section, SubSection) and not annotation:
+                # search also all inheriting sections
+                for inheriting_section in section_def.all_inheriting_sections:
+                    annotation = inheriting_section.m_get_annotations(
+                        MAPPING_ANNOTATION_KEY, {}
+                    ).get(self.annotation_key)
+                    if annotation:
+                        # TODO this does not work as it will applies to base class
+                        # section.sub_section = inheriting_section
+                        # TODO this is a hacky patch, metainfo should have an alternative
+                        # way to resolve the sub-section def
+                        mapper['m_def'] = inheriting_section.qualified_name()
+                        section_def = inheriting_section
+                        break
+
+            if not annotation:
+                return mapper
+
+            fill_mapper(mapper, annotation, ['remove', 'cache', 'path_parser'])
+            mapper['source'] = annotation.mapper
+
+            mapper['mapper'] = []
+            for name, quantity_def in section_def.all_quantities.items():
+                qannotation = quantity_def.m_get_annotations(
+                    MAPPING_ANNOTATION_KEY, {}
+                ).get(self.annotation_key)
+                if qannotation:
+                    quantity_mapper = {
+                        'mapper': qannotation.mapper,
+                        'target': f'{"" if section == self.data_object else "."}{name}',
+                    }
+                    fill_mapper(
+                        quantity_mapper,
+                        qannotation,
+                        ['remove', 'cache', 'path_parser', 'unit', 'search'],
+                    )
+                    mapper['mapper'].append(quantity_mapper)
+
+            all_ids = [section_def.definition_id]
+            all_ids.extend([s.definition_id for s in section_def.all_base_sections])
+            for name, sub_section in section_def.all_sub_sections.items():
+                # avoid recursion
+                # if sub_section.sub_section.definition_id in all_ids:
+                #     continue
+                # allow recursion up to max_level
+                nested = sub_section.sub_section.definition_id in all_ids
+                sub_section_mapper = build_section_mapper(
+                    sub_section, level + (1 if nested else 0)
+                )
+                if sub_section_mapper and sub_section_mapper.get('mapper'):
+                    sub_section_mapper['target'] = (
+                        f'{"" if section == self.data_object else "."}{name}'
+                    )
+                    sub_section_mapper['indices'] = [] if sub_section.repeats else None
+                    sannotation = sub_section.m_get_annotations(
+                        MAPPING_ANNOTATION_KEY, {}
+                    ).get(self.annotation_key)
+                    if sannotation:
+                        sub_section_mapper['source'] = sannotation.mapper
+                        fill_mapper(
+                            sub_section_mapper,
+                            sannotation,
+                            ['remove', 'cache', 'path_parser', 'indices'],
+                        )
+                    mapper['mapper'].append(sub_section_mapper)
+
+            return mapper
+
+        dct = build_section_mapper(self.data_object)
+        return MetainfoMapper.from_dict(dct)
+
+
+class HDF5Parser(MappingParser):
+    """
+    Mapping parser for HDF5.
+    """
+
+    def load_file(self, **kwargs) -> h5py.Group:
+        try:
+            filepath = kwargs.get('file', self.filepath)
+            mode = (
+                'w'
+                if isinstance(filepath, str) and not os.path.isfile(filepath)
+                else 'r'
+            )
+            return h5py.File(filepath, kwargs.get('mode', mode))
+        except Exception:
+            self.logger.error('Cannot read HDF5 file.')
+
+    def to_dict(self, **kwargs) -> dict[str, Any]:
+        if self.data_object is None:
+            return {}
+
+        def set_attributes(val: h5py.Dataset | h5py.Group, dct: dict[str, Any]):
+            for name, attr in val.attrs.items():
+                dct[f'{self.attribute_prefix}{name}'] = (
+                    attr.tolist() if hasattr(attr, 'tolist') else attr
+                )
+
+        def group_to_dict(
+            group: h5py.Group, root: dict[str, Any] | list[dict[str, Any]]
+        ):
+            for key, val in group.items():
+                key = int(key) if key.isdecimal() else key
+                path = '.'.join(
+                    [p for p in val.name.split('/') if not p.isdecimal() and p]
+                )
+                if self._required_paths and path not in self._required_paths:
+                    continue
+                if isinstance(root, list) and isinstance(val, h5py.Group):
+                    group_to_dict(val, root[key])
+                    set_attributes(val, root[key])
+                elif isinstance(root, dict) and isinstance(val, h5py.Group):
+                    default: list[dict[str, Any]] = [
+                        {} if k.isdecimal() else None for k in val.keys()
+                    ]
+                    group_to_dict(
+                        val, root.setdefault(key, {} if None in default else default)
+                    )
+                    if not root[key]:
+                        root[key] = {}
+                    set_attributes(val, root[key])
+                elif isinstance(val, h5py.Dataset):
+                    data = val[()]
+                    v = (
+                        data.astype(str if data.dtype == np.object_ else data.dtype)
+                        if isinstance(data, np.ndarray)
+                        else data.decode()
+                        if isinstance(data, bytes)
+                        else data
+                    )
+                    v = v.tolist() if hasattr(v, 'tolist') else v
+                    attrs = list(val.attrs.keys())
+                    if attrs:
+                        root[key] = {self.value_key: v}
+                        set_attributes(val, root[key])
+                    else:
+                        root[key] = v  # type: ignore
+            return root
+
+        dct: dict[str, Any] = {}
+        group_to_dict(self.data_object, dct)
+        return dct
+
+    def from_dict(self, dct: dict[str, Any]) -> None:
+        if self._data_object is not None:
+            self._data_object.close()
+
+        root = self.load_file(mode='a', file=self.filepath or BytesIO())
+
+        def dict_to_hdf5(dct: dict[str, Any], root: h5py.Group) -> h5py.Group:
+            for key, val in dct.items():
+                if key.startswith(self.attribute_prefix):
+                    root.attrs[key.lstrip(self.attribute_prefix)] = val
+                elif isinstance(val, dict) and self.value_key not in val:
+                    group = root.require_group(key)
+                    dict_to_hdf5(val, group)
+                elif isinstance(val, list) and val and isinstance(val[0], dict):
+                    data = {}
+                    for n, v in enumerate(val):
+                        if self.value_key not in v:
+                            group = root.require_group(f'{key}/{n}')
+                            dict_to_hdf5(v, group)
+                        else:
+                            data[f'{key}/{n}'] = v
+                    dict_to_hdf5(data, root)
+                else:
+                    attrs = val if isinstance(val, dict) else {}
+                    v = attrs.get(self.value_key, None) if attrs else val
+                    if v is None:
+                        continue
+
+                    if isinstance(v, list):
+                        v = np.array(v)
+
+                    shape = v.shape if hasattr(v, 'shape') else ()
+                    dtype = v.dtype.type if hasattr(v, 'dtype') else type(v)
+                    if dtype in [np.str_, str]:
+                        dtype = h5py.string_dtype()
+                    dataset = root.require_dataset(key, shape, dtype)
+                    dataset[...] = v.tolist() if hasattr(v, 'tolist') else v
+                    for name, attr in attrs.items():
+                        if name == self.value_key:
+                            continue
+                        dataset.attrs[name.lstrip(self.attribute_prefix)] = attr
+
+            return root
+
+        self._data_object = dict_to_hdf5(dct, root)
+
+
+class XMLParser(MappingParser):
+    """
+    A mapping parser for XML files. The contents of the xml file are converted into
+    a dictionary using the lxml module (see https://lxml.de/).
+    """
+
+    def from_dict(self, dct: dict[str, Any]) -> None:
+        def to_string(val: Any) -> str | None:
+            val = val.tolist() if hasattr(val, 'tolist') else val
+            if not isinstance(val, list):
+                return str(val)
+            string = ''
+            for v in val:
+                if not isinstance(v, str | float | int):
+                    return None
+                string += f' {v}'
+            return string.strip()
+
+        def data_to_element(
+            tag: str, data: Any, root: etree._Element = None
+        ) -> etree._Element:
+            if tag.startswith(self.attribute_prefix) and root is not None:
+                root.set(tag.lstrip(self.attribute_prefix), data)
+            elif tag.startswith(self.value_key) and root is not None:
+                root.text = to_string(data)
+            elif isinstance(data, dict):
+                root = (
+                    etree.Element(tag) if root is None else etree.SubElement(root, tag)
+                )
+                for key, val in data.items():
+                    data_to_element(key, val, root)
+            elif isinstance(data, list):
+                string = to_string(data)
+                if string is not None:
+                    element = etree.SubElement(root, tag)
+                    element.text = string
+                else:
+                    for val in data:
+                        data_to_element(tag, val, root)
+            elif hasattr(data, 'tolist'):
+                data_to_element(tag, data.tolist(), root)
+            else:
+                element = etree.SubElement(root, tag)
+                element.text = to_string(data)
+            return root
+
+        self._data_object = data_to_element('root', dct).getchildren()[0]
+
+    def to_dict(self, **kwargs) -> dict[str, Any]:
+        def convert(text: str) -> Any:
+            val = text.strip()
+            try:
+                val_array = np.array(val.split(), dtype=float)
+                if np.all(np.mod(val_array, 1) == 0):
+                    val_array = np.array(val_array, dtype=int)
+                val_array = val_array.tolist()
+                return val_array[0] if len(val_array) == 1 else val_array
+            except Exception:
+                return val
+
+        stack: list[dict[str, Any]] = []
+        results: dict[str, Any] = {}
+        if self.filepath is None:
+            return results
+
+        current_path = ''
+        # TODO determine if iterparse is better than iterwalk
+        with open(self.filepath, 'rb') as f:
+            for event, element in etree.iterparse(f, events=('start', 'end')):
+                tag = element.tag
+                if event == 'start':
+                    current_path = tag if not current_path else f'{current_path}.{tag}'
+                    if (
+                        self._required_paths
+                        and current_path not in self._required_paths
+                    ):
+                        continue
+                    stack.append({tag: {}})
+                else:
+                    path = current_path
+                    current_path = current_path.rsplit('.', 1)[0]
+                    if self._required_paths and path not in self._required_paths:
+                        continue
+                    data = stack.pop(-1)
+                    text = element.text.strip() if element.text else None
+                    attrib = element.attrib
+                    if attrib:
+                        data.setdefault(tag, {})
+                        data[tag].update(
+                            (f'{self.attribute_prefix}{k}', v)
+                            for k, v in attrib.items()
+                        )
+                    if text:
+                        value = convert(text)
+                        if attrib or data[tag]:
+                            data[tag][self.value_key] = value
+                        else:
+                            data[tag] = value
+                    if stack and data:
+                        parent = stack[-1][list(stack[-1].keys())[0]]
+                        if tag in parent:
+                            if (
+                                isinstance(data[tag], list)
+                                and isinstance(parent[tag], list)
+                                and parent[tag]
+                                and not isinstance(parent[tag][0], list)
+                            ):
+                                parent[tag] = [parent[tag]]
+                            if isinstance(parent[tag], list):
+                                parent[tag].append(data[tag])
+                            else:
+                                parent[tag] = [
+                                    parent[tag],
+                                    data[tag],
+                                ]
+                        else:
+                            # parent[tag] = [data[tag]] if attrib else data[tag]
+                            parent[tag] = data[tag]
+                    else:
+                        results = data
+        return results
+
+    def load_file(self) -> etree._Element:
+        try:
+            return etree.parse(self.filepath)
+        except Exception:
+            self.logger.error('Cannot read XML file')
+
+
+class TextParser(MappingParser):
+    """
+    Interface to text file parser.
+    """
+
+    text_parser: TextFileParser = None
+
+    def to_dict(self, **kwargs) -> dict[str, Any]:
+        if self.data_object:
+            self.data_object.parse()
+            return self.data_object._results
+        return {}
+
+    def from_dict(self, dct: dict[str, Any]):
+        raise NotImplementedError
+
+    def load_file(self) -> Any:
+        if self.filepath:
+            self.text_parser.findlazy = True
+            self.text_parser.mainfile = self.filepath
+        return self.text_parser
+
+
+if __name__ == '__main__':
+    from nomad.parsing.file_parser.mapping_parser import MetainfoParser
+    from tests.parsing.test_mapping_parser import (
+        BSection,
+        ExampleHDF5Parser,
+        ExampleSection,
+    )
+
+    with MetainfoParser() as archive_parser, ExampleHDF5Parser() as hdf5_parser:
+        archive_parser.annotation_key = 'hdf5'
+        archive_parser.data_object = ExampleSection(b=[BSection(v=np.eye(2))])
+
+        d = dict(
+            g=dict(
+                g1=dict(v=[dict(d=np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]))]),
+                v=['x', 'y', 'z'],
+                g=dict(
+                    c1=dict(
+                        i=[4, 6],
+                        f=[
+                            {'@index': 0, '__value': 1},
+                            {'@index': 2, '__value': 2},
+                            {'@index': 1, '__value': 1},
+                        ],
+                        d=[dict(e=[3, 0, 4, 8, 1, 6]), dict(e=[1, 7, 8, 3, 9, 1])],
+                    ),
+                    c=dict(
+                        v=[dict(d=np.eye(3), e=np.zeros(3)), dict(d=np.ones((3, 3)))]
+                    ),
+                ),
+            )
+        )
+
+        hdf5_parser.from_dict(d)
+
+        hdf5_parser.convert(archive_parser)
diff --git a/nomad/parsing/file_parser/text_parser.py b/nomad/parsing/file_parser/text_parser.py
index 10802db4aa74c8fee7928a14df1aaedac06f97eb..a742e171ebae728ad626fd92ec69f010acd41f39 100644
--- a/nomad/parsing/file_parser/text_parser.py
+++ b/nomad/parsing/file_parser/text_parser.py
@@ -13,16 +13,17 @@
 # limitations under the License.
 
 
-import mmap
 import io
+import mmap
 import re
+from collections.abc import Callable
+from typing import Any
+
 import numpy as np
 import pint
-from typing import List, Union, Type, Any
-from collections.abc import Callable
 
-from nomad.parsing.file_parser import FileParser
 from nomad.metainfo import Quantity as mQuantity
+from nomad.parsing.file_parser import FileParser
 from nomad.utils import get_logger
 
 
@@ -114,7 +115,7 @@ class Quantity:
         **kwargs,
     ):
         self.name: str
-        self.dtype: str
+        self.dtype: str | Any
         self.unit: str
         self.shape: list[int]
         if isinstance(quantity, str):
@@ -273,6 +274,7 @@ class TextParser(FileParser):
         super().__init__(mainfile, logger=logger, open=kwargs.get('open', None))
         self._quantities: list[Quantity] = quantities
         self.findall: bool = kwargs.get('findall', True)
+        self.findlazy: bool = kwargs.get('findlazy', None)
         self._kwargs = kwargs
         self._file_length: int = kwargs.get('file_length', 0)
         self._file_offset: int = kwargs.get('file_offset', 0)
@@ -488,8 +490,10 @@ class TextParser(FileParser):
                 sub_parser = quantity.sub_parser.copy()
                 sub_parser.mainfile = self.mainfile
                 sub_parser.logger = self.logger
+                if sub_parser.findlazy is None:
+                    sub_parser.findlazy = self.findlazy
                 sub_parser._file_handler = b' '.join([g for g in res.groups() if g])
-                value.append(sub_parser.parse())
+                value.append(sub_parser if sub_parser.findlazy else sub_parser.parse())
 
             else:
                 try:
@@ -562,8 +566,9 @@ class TextParser(FileParser):
                         self._parse_quantity(quantity)
 
         # free up memory
-        if isinstance(self._file_handler, mmap.mmap) and self.findall:
-            self._file_handler.close()
+        if self.findall:
+            if isinstance(self._file_handler, mmap.mmap):
+                self._file_handler.close()
             self._file_handler = b' '
 
         return self
diff --git a/nomad/parsing/file_parser/xml_parser.py b/nomad/parsing/file_parser/xml_parser.py
index e1fecc6b3f32a359ad4c3ca96806cd56731ab4d8..5739ec9bf394c42c376a4a2fcd82de38437494dc 100644
--- a/nomad/parsing/file_parser/xml_parser.py
+++ b/nomad/parsing/file_parser/xml_parser.py
@@ -15,8 +15,9 @@
 
 import os
 import re
-import numpy as np
 from xml.etree import ElementTree
+
+import numpy as np
 from lxml import etree
 
 from nomad.parsing.file_parser import FileParser
diff --git a/nomad/parsing/parser.py b/nomad/parsing/parser.py
index cc74a0297f8c294050b16edce966fc45ad600fe4..9dd04e06fd0d6f699571370befde56b6fecb26dd 100644
--- a/nomad/parsing/parser.py
+++ b/nomad/parsing/parser.py
@@ -16,19 +16,20 @@
 # limitations under the License.
 #
 
-from typing import List, Dict, Union, Any, IO
-from collections.abc import Iterable
-from abc import ABCMeta, abstractmethod
-import re
+import importlib
+import json
 import os
 import os.path
+import re
+from abc import ABCMeta, abstractmethod
+from collections.abc import Iterable
 from functools import lru_cache
-import importlib
-from pydantic import BaseModel, Extra  # pylint: disable=unused-import
-import yaml
+from typing import IO, Any
+
 import h5py
 import numpy as np
-import json
+import yaml
+from pydantic import BaseModel, Extra  # noqa: F401
 
 from nomad import utils
 from nomad.config import config
@@ -427,7 +428,11 @@ class MatchingParserInterface(MatchingParser):
     An interface to the NOMAD parsers.
 
     Arguments:
-        parser_class_name: concatenation of module path and parser class name
+        parser_class_name:
+            path specification in python style up to the parser class
+            in case of a plugin, the path starts from `src/`.
+            E.g. `nomad_parser.parsers.parser.Parser`
+            for a `Parser` under `<plugin_root>/src/nomad_parser/parsers/parser.py`.
     """
 
     def __init__(self, parser_class_name: str, *args, **kwargs):
diff --git a/nomad/parsing/parsers.py b/nomad/parsing/parsers.py
index 3375e235c7803ac90e53969a9a02a685f62d4c4d..47b3653da34dc7836ffe9823189a033f536d71ce 100644
--- a/nomad/parsing/parsers.py
+++ b/nomad/parsing/parsers.py
@@ -17,34 +17,33 @@
 #
 
 import os.path
-from typing import Optional, Tuple, List, Dict
 from collections.abc import Iterable
 
 from nomad.config import config
-from nomad.config.models.plugins import Parser as ParserPlugin, ParserEntryPoint
+from nomad.config.models.plugins import Parser as ParserPlugin
+from nomad.config.models.plugins import ParserEntryPoint
 from nomad.datamodel import EntryArchive, EntryMetadata, results
-from nomad.datamodel.context import Context, ClientContext
+from nomad.datamodel.context import ClientContext, Context
 
+from .artificial import ChaosParser, EmptyParser, GenerateRandomParser, TemplateParser
 from .parser import (
-    MissingParser,
-    BrokenParser,
-    Parser,
     ArchiveParser,
+    BrokenParser,
     MatchingParserInterface,
+    MissingParser,
+    Parser,
 )
-from .artificial import EmptyParser, GenerateRandomParser, TemplateParser, ChaosParser
 from .tabular import TabularDataParser
 
 try:
     # these packages are not available without parsing extra, which is ok, if the
     # parsers are only initialized to load their metainfo definitions
-    import platform
-
-    import magic
-    import gzip
     import bz2
+    import gzip
     import lzma
 
+    import magic
+
     _compressions = {
         b'\x1f\x8b\x08': ('gz', gzip.open),
         b'\x42\x5a\x68': ('bz2', bz2.open),
diff --git a/nomad/parsing/tabular.py b/nomad/parsing/tabular.py
index dcd4eb25946187e48f13fa36f57ccc7955b5f56a..38082a6d814ba919501bdb945d90823910d586a1 100644
--- a/nomad/parsing/tabular.py
+++ b/nomad/parsing/tabular.py
@@ -15,32 +15,31 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
+import json
+import math
 import os
-from typing import List, Dict, Set, Any, Tuple, Union
-from collections.abc import Callable
-from collections.abc import Iterator, Iterable
-
-import pandas as pd
 import re
-import math
+from collections.abc import Callable, Iterable, Iterator
+from typing import Any
+
 import numpy as np
-import json
+import pandas as pd
 import yaml
-from cachetools import cached, LRUCache
+from cachetools import LRUCache, cached
 
 from nomad import utils
-from nomad.parsing import MatchingParser
-from nomad.units import ureg
 from nomad.datamodel.data import ArchiveSection
-from nomad.metainfo import Section, Quantity, Package, Reference, MSection, Property
-from nomad.metainfo.metainfo import MetainfoError, SubSection, MProxy
 from nomad.datamodel.metainfo.annotations import (
     TabularAnnotation,
-    TabularParserAnnotation,
     TabularFileModeEnum,
     TabularMode,
+    TabularParserAnnotation,
 )
+from nomad.metainfo import MSection, Package, Property, Quantity, Reference, Section
+from nomad.metainfo.metainfo import MetainfoError, MProxy, SubSection
 from nomad.metainfo.util import MSubSectionList
+from nomad.parsing import MatchingParser
+from nomad.units import ureg
 from nomad.utils import generate_entry_id
 
 # We define a simple base schema for tabular data. The parser will then generate more
diff --git a/nomad/processing/base.py b/nomad/processing/base.py
index c9a3a52c7197f1a45e0c37fdb481b8acbf1d9f39..40a93ce18ee9eae21f113746ca5307e6322684db 100644
--- a/nomad/processing/base.py
+++ b/nomad/processing/base.py
@@ -16,43 +16,42 @@
 # limitations under the License.
 #
 
-from typing import Any, Tuple, List, Dict, NamedTuple
+import functools
 import logging
-import time
 import os
+import time
 from collections import defaultdict
+from datetime import datetime
+from typing import Any, NamedTuple
+
+import billiard
+from billiard.exceptions import WorkerLostError
 from celery import Celery, Task
-from celery.worker.request import Request
-from celery.bootsteps import StartStopStep
+from celery.exceptions import SoftTimeLimitExceeded
 from celery.signals import (
-    after_setup_task_logger,
     after_setup_logger,
-    worker_process_init,
+    after_setup_task_logger,
     celeryd_after_setup,
+    worker_process_init,
     worker_process_shutdown,
 )
 from celery.utils import worker_direct
-from celery.exceptions import SoftTimeLimitExceeded
-import billiard
-from billiard.exceptions import WorkerLostError
+from celery.worker.request import Request
 from mongoengine import (
-    Document,
-    StringField,
-    ListField,
     DateTimeField,
+    Document,
     IntField,
+    ListField,
+    StringField,
     ValidationError,
 )
 from mongoengine.connection import ConnectionFailure
-from datetime import datetime
-import functools
 
-from nomad import utils, infrastructure
+import nomad.patch  # noqa: F401
+from nomad import infrastructure, utils
+from nomad.app.v1.routers.info import statistics
 from nomad.config import config
 from nomad.config.models.config import CELERY_WORKER_ROUTING
-from nomad.app.v1.routers.info import statistics
-import nomad.patch  # pylint: disable=unused-import
-
 
 if config.logstash.enabled:
     from nomad.utils import structlogging
diff --git a/nomad/processing/data.py b/nomad/processing/data.py
index bc91723d7cb4a88885298b247d430e2a05425135..706ebd80ebe3c3ff45b89cce651efac314d19bb0 100644
--- a/nomad/processing/data.py
+++ b/nomad/processing/data.py
@@ -29,103 +29,84 @@ entries, and files
 """
 
 import base64
-from typing import (
-    Optional,
-    cast,
-    Any,
-    List,
-    Tuple,
-    Set,
-    Dict,
-    Union,
-)
-from collections.abc import Iterator, Iterable, Sequence
-from pydantic import ValidationError
-from pydantic_core import InitErrorDetails, PydanticCustomError
+import copy
+import hashlib
+import os.path
+from collections.abc import Iterable, Iterator, Sequence
+from contextlib import contextmanager
+from datetime import datetime
+from typing import Any, Union, cast
+
+import requests
 import rfc3161ng
+import validators
+from fastapi.exceptions import RequestValidationError
 from mongoengine import (
-    StringField,
-    DateTimeField,
     BooleanField,
-    IntField,
-    ListField,
+    DateTimeField,
     DictField,
     EmbeddedDocument,
     EmbeddedDocumentField,
+    IntField,
+    ListField,
+    StringField,
 )
 from pymongo import UpdateOne
 from structlog import wrap_logger
-from contextlib import contextmanager
-import copy
-import os.path
-from datetime import datetime
-import hashlib
-from structlog.processors import StackInfoRenderer, format_exc_info, TimeStamper
-import requests
-from fastapi.exceptions import RequestValidationError
-import validators
+from structlog.processors import StackInfoRenderer, TimeStamper, format_exc_info
 
-from nomad import (
-    utils,
-    infrastructure,
-    search,
-    datamodel,
-    metainfo,
-    parsing,
-    client,
+from nomad import client, datamodel, infrastructure, metainfo, parsing, search, utils
+from nomad.app.v1.models import (
+    Aggregation,
+    MetadataEditRequest,
+    MetadataPagination,
+    MetadataRequired,
+    TermsAggregation,
+    restrict_query_to_upload,
+)
+from nomad.app.v1.routers.metainfo import store_package_definition
+from nomad.archive import (
+    delete_partial_archives_from_mongo,
+    to_json,
+    write_partial_archive_to_mongo,
 )
-from nomad.config import config
 from nomad.common import is_safe_relative_path
+from nomad.config import config
+from nomad.config.models.config import Reprocess
 from nomad.config.models.plugins import ExampleUploadEntryPoint
-
+from nomad.datamodel import (
+    AuthLevel,
+    EditableUserMetadata,
+    EntryArchive,
+    EntryMetadata,
+    MongoEntryMetadata,
+    MongoSystemMetadata,
+    MongoUploadMetadata,
+    ServerContext,
+)
 from nomad.datamodel.datamodel import RFC3161Timestamp
 from nomad.files import (
-    RawPathInfo,
     PathObject,
-    UploadFiles,
     PublicUploadFiles,
+    RawPathInfo,
     StagingUploadFiles,
+    UploadFiles,
     create_tmp_dir,
 )
-from nomad.groups import user_group_exists, get_group_ids
+from nomad.groups import get_group_ids, user_group_exists
 from nomad.metainfo.data_type import Datatype, Datetime
+from nomad.normalizing import normalizers
+from nomad.parsing import Parser
+from nomad.parsing.parsers import match_parser, parser_dict
 from nomad.processing.base import (
     Proc,
+    ProcessAlreadyRunning,
+    ProcessFailure,
+    ProcessStatus,
     process,
     process_local,
-    ProcessStatus,
-    ProcessFailure,
-    ProcessAlreadyRunning,
-)
-from nomad.parsing import Parser
-from nomad.parsing.parsers import parser_dict, match_parser
-from nomad.normalizing import normalizers
-from nomad.datamodel import (
-    EntryArchive,
-    EntryMetadata,
-    MongoUploadMetadata,
-    MongoEntryMetadata,
-    MongoSystemMetadata,
-    EditableUserMetadata,
-    AuthLevel,
-    ServerContext,
-)
-from nomad.archive import (
-    write_partial_archive_to_mongo,
-    delete_partial_archives_from_mongo,
-    to_json,
 )
-from nomad.app.v1.models import (
-    MetadataEditRequest,
-    Aggregation,
-    TermsAggregation,
-    MetadataPagination,
-    MetadataRequired,
-    restrict_query_to_upload,
-)
-from nomad.app.v1.routers.metainfo import store_package_definition
 from nomad.search import update_metadata as es_update_metadata
-from nomad.config.models.config import Reprocess
 from nomad.utils.pydantic import CustomErrorWrapper
 
 section_metadata = datamodel.EntryArchive.metadata.name
@@ -916,6 +897,7 @@ class Entry(Proc):
             external database where the data was imported from
         nomad_version: the NOMAD version used for the last processing
         nomad_commit: the NOMAD commit used for the last processing
+        nomad_distro_commit_url: the NOMAD distro commit url used for the last processing
         comment: a user provided comment for this entry
         references: user provided references (URLs) for this entry
         entry_coauthors: a user provided list of co-authors specific for this entry. Note
@@ -936,6 +918,7 @@ class Entry(Proc):
     external_id = StringField()
     nomad_version = StringField()
     nomad_commit = StringField()
+    nomad_distro_commit_url = StringField()
     comment = StringField()
     references = ListField(StringField())
     entry_coauthors = ListField()
@@ -1019,8 +1002,11 @@ class Entry(Proc):
         In this case, the timestamp stored in the archive is used.
         If no previous timestamp is available, a new timestamp is generated.
         """
+        distro_commit_url = utils.nomad_distro_metadata()
+        entry_metadata.nomad_version = config.meta.version
         entry_metadata.nomad_version = config.meta.version
         entry_metadata.nomad_commit = ''
+        entry_metadata.nomad_distro_commit_url = distro_commit_url or ''
         entry_metadata.entry_hash = self.upload_files.entry_hash(
             self.mainfile, self.mainfile_key
         )
diff --git a/nomad/search.py b/nomad/search.py
index 1dc31f8abd05cd74a95393253f7fc35b75d2d15d..0a3785622d88504bdd5733db27ae3134972d54ae 100644
--- a/nomad/search.py
+++ b/nomad/search.py
@@ -34,18 +34,9 @@ partially implemented.
 
 import json
 import math
+from collections.abc import Callable, Generator, Iterable, Iterator
 from enum import Enum
-from typing import (
-    Any,
-    Dict,
-    List,
-    Optional,
-    Tuple,
-    Union,
-    cast,
-)
-from collections.abc import Callable
-from collections.abc import Generator, Iterable, Iterator
+from typing import Any, cast
 
 import elasticsearch.helpers
 from elasticsearch.exceptions import RequestError, TransportError
@@ -85,12 +76,7 @@ from nomad.app.v1.models.models import (
     Value,
 )
 from nomad.config import config
-from nomad.datamodel import (
-    EntryArchive,
-    EntryMetadata,
-    AuthorReference,
-    UserReference,
-)
+from nomad.datamodel import AuthorReference, EntryArchive, EntryMetadata, UserReference
 from nomad.groups import MongoUserGroup
 from nomad.metainfo import Datetime, Package, Quantity
 from nomad.metainfo.elasticsearch_extension import (
diff --git a/nomad/utils/__init__.py b/nomad/utils/__init__.py
index 3400910cfb9218c8f0e03ee201b07177556b5543..f124328c29c232a547f4bc6f50992492617f3886 100644
--- a/nomad/utils/__init__.py
+++ b/nomad/utils/__init__.py
@@ -38,7 +38,7 @@ Depending on the configuration all logs will also be send to a central logstash.
 .. autofunc::nomad.utils.strip
 """
 
-from typing import List, Union, Any, Dict, Optional
+from typing import Any
 from collections.abc import Iterable
 from collections import OrderedDict
 from functools import reduce
@@ -54,6 +54,7 @@ from datetime import timedelta
 import collections
 import logging
 import inspect
+from importlib.metadata import PackageNotFoundError, metadata, version
 
 import orjson
 import os
@@ -1147,3 +1148,46 @@ def dict_to_dataframe(
         filtered_df = filter_df_columns_by_prefix(df, keys_to_filter)
         filtered_dict = dataframe_to_dict(filtered_df)
         return pd.json_normalize(filtered_dict, errors='ignore')
+
+
+def nomad_distro_metadata() -> str | None:
+    """
+    Retrieves metadata for the 'nomad-distribution' package, including the
+    repository URL with latest commit hash.
+
+    Returns:
+        The repo url with commit hash or None if unavailable.
+    """
+    try:
+        distro_metadata = metadata('nomad-distribution')
+
+        # Extract repository URL from Project-URL metadata
+        project_urls: list[str] = distro_metadata.get_all('Project-URL', [])
+        repo_url = next(
+            (
+                url.split(', ', 1)[1]
+                for url in project_urls
+                if url.startswith('repository, ')
+            ),
+            None,
+        )
+
+        distro_version = version('nomad-distribution')
+        if '+g' in distro_version:
+            # Split on '+g' to extract the commit hash from the version string, as 'g' is a Git-specific prefix.
+            commit = distro_version.split('+g')[
+                -1
+            ]  # Extract commit hash if present (setuptools_scm format)
+        else:
+            commit = (
+                f'v{distro_version}'  # Otherwise, assume it's a tag and prefix with 'v'
+            )
+
+        if not repo_url or not commit:
+            return None
+
+        commit_url = f'{repo_url}/tree/{commit}'
+
+        return commit_url
+    except (PackageNotFoundError, IndexError, StopIteration, KeyError):
+        return None
diff --git a/nomad/utils/exampledata.py b/nomad/utils/exampledata.py
index 6fec3a33a862a355d12967ccdf3b98d339b43a09..4603a39b65b32d6c9e5afa016c34e1ed1275981d 100644
--- a/nomad/utils/exampledata.py
+++ b/nomad/utils/exampledata.py
@@ -16,16 +16,16 @@
 # limitations under the License.
 #
 
-from typing import List, Optional, Union, Dict, Any
-from datetime import datetime, timedelta
 import os
+from datetime import datetime, timedelta
+from typing import Any
 
-from nomad import search, files
-from nomad.datamodel import EntryMetadata, EntryArchive, Results
+from nomad import files, search
+from nomad.datamodel import EntryArchive, EntryMetadata, Results
+from nomad.datamodel.metainfo import runschema
 from nomad.datamodel.metainfo.workflow import Workflow
-from nomad.processing.data import mongo_upload_metadata
 from nomad.normalizing import normalizers
-from nomad.datamodel.metainfo import runschema
+from nomad.processing.data import mongo_upload_metadata
 
 
 class ExampleData:
@@ -61,8 +61,8 @@ class ExampleData:
         es_nomad_version: str = None,
         archive_nomad_version: str = None,
     ):
-        from tests.test_files import create_test_upload_files
         from nomad import processing as proc
+        from tests.test_files import create_test_upload_files
 
         errors = None
 
@@ -145,8 +145,8 @@ class ExampleData:
         parser_name: str | None = None,
     ):
         """Creates an entry from a mainfile which then gets parsed and normalized."""
-        from nomad.parsing import parsers
         from nomad import parsing
+        from nomad.parsing import parsers
 
         assert upload_id in self.uploads, 'Must create the upload first'
 
diff --git a/nomad/utils/json_transformer.py b/nomad/utils/json_transformer.py
index 9310a8894fae2a9a46ad97781187d84c8aea839c..1a59ace7925db142aa3ab2849de82dd2272890b4 100644
--- a/nomad/utils/json_transformer.py
+++ b/nomad/utils/json_transformer.py
@@ -16,11 +16,11 @@
 # limitations under the License.
 #
 import re
-from typing import Union, Any, Optional
+from typing import Any
 
 import jmespath
 
-from nomad.datamodel.metainfo.annotations import Rules, Rule, Condition
+from nomad.datamodel.metainfo.annotations import Condition, Rule, Rules
 
 
 class Transformer:
diff --git a/nomad/utils/structlogging.py b/nomad/utils/structlogging.py
index 0a97f0a9546af4c65106a12d06abcc3c20a6e4a0..3cd4d85ba4e90b837d2be650e1cb8c6d9420d017 100644
--- a/nomad/utils/structlogging.py
+++ b/nomad/utils/structlogging.py
@@ -25,22 +25,22 @@ take keyword arguments for structured data. Otherwise `get_logger` can
 be used similar to the standard `logging.getLogger`.
 """
 
-from typing import cast, Any
+import json
 import logging
+import os.path
+import re
 from logging.handlers import WatchedFileHandler
+from typing import Any, cast
+
+import logstash
 import structlog
 from structlog.processors import (
+    JSONRenderer,
     StackInfoRenderer,
-    format_exc_info,
     TimeStamper,
-    JSONRenderer,
+    format_exc_info,
 )
 from structlog.stdlib import LoggerFactory
-import logstash
-from contextlib import contextmanager
-import json
-import re
-import os.path
 
 from nomad import utils
 from nomad.config import config
diff --git a/pyproject.toml b/pyproject.toml
index d6781f5710d3eb8b8c68d7ae88fd4d30821e89a7..81e4c3f87c02b97108b3336af9b1a4a86c30cdaf 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -26,6 +26,7 @@ dependencies = [
     'httpx>=0.23.3,<0.28',
     'importlib_metadata~=7.1.0',
     'jmespath>=0.10.0',
+    'jsonpath-ng',
     'kaleido==0.2.1',
     'lxml-html-clean>=0.1.0',
     'lxml>=5.2',
@@ -78,7 +79,7 @@ infrastructure = [
     'celery>=5.0',
     'dockerspawner==13.0.0',
     'elasticsearch>=7.0,<8',
-    'fastapi>0.100',              # later versions pending pydantic v2 upgrade
+    'fastapi>0.100',
     'filelock==3.3.1',
     'gitpython>=3.0',
     'gunicorn>=21.2.0,<22.0.0',
@@ -121,7 +122,7 @@ dev = [
     'mkdocs-material>=9.0',
     'mkdocs-redirects>=1.0',
     'mkdocs>=1.0',
-    'mypy==1.0.1',                                    # bug: incompatible with derived models of pydantic v1
+    'mypy>=1.15',
     'names==0.3.0',
     'uv>=0.2.35',
     'pytest-asyncio>=0.23',
@@ -151,10 +152,12 @@ indent-width = 4
 
 [tool.ruff.lint]
 select = [
-    "E",  # pycodestyle
-    "W",  # pycodestyle
-    "PL", # pylint
-    "UP", # pyupgrade
+    "E",    # pycodestyle
+    "F401", # remove unused import
+    "I001", # sort imports
+    "PL",   # pylint
+    "UP",   # pyupgrade
+    "W",    # pycodestyle
 ]
 ignore = [
     "E501",    # Line too long ({width} > {limit} characters)
@@ -170,11 +173,12 @@ ignore = [
     "PLW2901", # redefined-loop-name
     "PLR1714", # consider-using-in
     "PLR5501", # else-if-used
-    "UP035",   # deprecated-import
 ]
 fixable = ["ALL"]
+isort.split-on-trailing-comma = false
 
 [tool.ruff.lint.extend-per-file-ignores]
+"__init__.py" = ["F401", "I001"]
 "nomad/app/v1/models/graph/utils.py" = [
     "UP007",
 ] # causes pydantic model building errors
@@ -203,7 +207,7 @@ ignore_missing_imports = true
 follow_imports = "silent"
 no_strict_optional = true
 disable_error_code = "import, annotation-unchecked"
-
+plugins = ["pydantic.mypy"]
 
 [tool.setuptools_scm]
 
diff --git a/requirements-dev.txt b/requirements-dev.txt
index 386a6f469eca3518d30c97ed5ba809057afec60b..e1b75bf9ff40359d6ee77f3ce10d105ac3fb64a2 100644
--- a/requirements-dev.txt
+++ b/requirements-dev.txt
@@ -120,7 +120,7 @@ monty==2025.1.9           # via pymatgen, -r requirements.txt
 mpmath==1.3.0             # via sympy, -r requirements.txt
 msgpack==1.1.0            # via -r requirements.txt, nomad-lab (pyproject.toml)
 msgspec==0.19.0           # via -r requirements.txt, nomad-lab (pyproject.toml)
-mypy==1.0.1               # via nomad-lab (pyproject.toml)
+mypy==1.15.0              # via nomad-lab (pyproject.toml)
 mypy-extensions==1.0.0    # via mypy
 names==0.3.0              # via nomad-lab (pyproject.toml)
 netcdf4==1.6.5            # via -r requirements.txt, nomad-lab (pyproject.toml)
diff --git a/tests/app/test_app.py b/tests/app/test_app.py
index 12a36607c0894240027972d8c5fabd4664d73e00..6140f085b980fbecd8dbbd1f1ce20ebff0759611 100644
--- a/tests/app/test_app.py
+++ b/tests/app/test_app.py
@@ -16,9 +16,10 @@
 # limitations under the License.
 #
 
-import pytest
 import os
 
+import pytest
+
 from nomad.config import config
 
 
diff --git a/tests/app/test_dcat.py b/tests/app/test_dcat.py
index 4f08b698f2a66cf80bed6f094aaaa07989a98cb2..1f0ecd711152a3b32edbf0e60731fe4e04016f3e 100644
--- a/tests/app/test_dcat.py
+++ b/tests/app/test_dcat.py
@@ -16,14 +16,15 @@
 # limitations under the License.
 #
 
-import pytest
 from datetime import datetime
+
+import pytest
 from fastapi.testclient import TestClient
 
 from nomad.app.dcat.main import app
 from nomad.app.dcat.mapping import Mapping
-from nomad.datamodel.results import Material, Results
 from nomad.datamodel import Dataset
+from nomad.datamodel.results import Material, Results
 from nomad.utils.exampledata import ExampleData
 
 
diff --git a/tests/app/test_h5grove.py b/tests/app/test_h5grove.py
index 592cb623ec0faca0212bb4fe56f70f17a8620f2c..a5b7446b381109205ee1799ba17acbb10e3aaf30 100644
--- a/tests/app/test_h5grove.py
+++ b/tests/app/test_h5grove.py
@@ -15,15 +15,16 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-import pytest
 import os
+
 import h5py
+import pytest
 from fastapi.testclient import TestClient
 
 from nomad.app import h5grove_app
-from nomad.utils.exampledata import ExampleData
-from nomad.files import StagingUploadFiles
 from nomad.config import config
+from nomad.files import StagingUploadFiles
+from nomad.utils.exampledata import ExampleData
 
 
 @pytest.fixture
diff --git a/tests/app/test_optimade.py b/tests/app/test_optimade.py
index a7841ff89a7a45e7ca0c8977f932047c5dab4623..3b95f9173ba0f0cfad64ecda1cad549457118c8e 100644
--- a/tests/app/test_optimade.py
+++ b/tests/app/test_optimade.py
@@ -17,15 +17,15 @@
 #
 
 import json
+
 import pytest
 
-from nomad.processing import Upload
 from nomad import utils
-from nomad.search import search
 from nomad.app.optimade import parse_filter
 from nomad.app.optimade.common import provider_specific_fields
+from nomad.processing import Upload
+from nomad.search import search
 from nomad.utils.exampledata import ExampleData
-
 from tests.fixtures.infrastructure import clear_elastic, clear_raw_files
 
 
diff --git a/tests/app/test_resources.py b/tests/app/test_resources.py
index e76d787079aabe24883b2fa234e448f52ee16363..4f1b3929fa0e0bcd263241dd16877d77cc56cfcc 100644
--- a/tests/app/test_resources.py
+++ b/tests/app/test_resources.py
@@ -15,21 +15,22 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-import pytest
 import json
-from fastapi.testclient import TestClient
-import httpx
-from urllib.parse import urlencode
 import time
+from urllib.parse import urlencode
+
 import dateutil.parser
+import httpx
+import pytest
+from fastapi.testclient import TestClient
 
-from nomad.config import config
 from nomad.app.resources.main import app, remove_mongo
 from nomad.app.resources.routers.resources import (
     aflow_prototypes_db,
-    springer_materials_db,
     optimade_providers,
+    springer_materials_db,
 )
+from nomad.config import config
 
 
 def _to_datetime(datetime_str):
diff --git a/tests/app/v1/routers/common.py b/tests/app/v1/routers/common.py
index 19acc77e630f4f28250149dcc648a772472b60e9..20f17289ddb49fa9e1b8a23161ce85ada5f1fafe 100644
--- a/tests/app/v1/routers/common.py
+++ b/tests/app/v1/routers/common.py
@@ -16,17 +16,16 @@
 # limitations under the License.
 #
 
-import pytest
-from typing import Set, Literal, Optional, List, Any
 import json
 import re
-from devtools import debug
+from typing import Any, Literal
 from urllib.parse import urlencode
 
-from nomad.utils import deep_get
+import pytest
+from devtools import debug
 
 from nomad.datamodel import results
-
+from nomad.utils import deep_get
 from tests.utils import assert_at_least, assert_url_query_args, build_url
 
 n_code_names = results.Simulation.program_name.a_elasticsearch[
diff --git a/tests/app/v1/routers/test_auth.py b/tests/app/v1/routers/test_auth.py
index c814260a3c4b502c8c841b09a2cb1fd2eb1302fe..2c5e3e67c8f42da033bc0c1bc716025638f89d63 100644
--- a/tests/app/v1/routers/test_auth.py
+++ b/tests/app/v1/routers/test_auth.py
@@ -16,9 +16,10 @@
 # limitations under the License.
 #
 
-import pytest
 from urllib.parse import urlencode
 
+import pytest
+
 
 def perform_get_token_test(client, http_method, status_code, username, password):
     if http_method == 'post':
diff --git a/tests/app/v1/routers/test_datasets.py b/tests/app/v1/routers/test_datasets.py
index eecbdf50251be6853bbadb898d844a11726bf255..7153ee770a9b3a076f6f7072354924a2d0f7deff 100644
--- a/tests/app/v1/routers/test_datasets.py
+++ b/tests/app/v1/routers/test_datasets.py
@@ -16,17 +16,16 @@
 # limitations under the License.
 #
 
-from typing import List
-import pytest
-from urllib.parse import urlencode
 from datetime import datetime
+from urllib.parse import urlencode
+
+import pytest
 
-from nomad.datamodel import Dataset
 from nomad import processing
+from nomad.app.v1.models import Any_, Query
+from nomad.datamodel import Dataset
 from nomad.search import search
-from nomad.app.v1.models import Query, Any_
 from nomad.utils.exampledata import ExampleData
-
 from tests.fixtures.users import admin_user_id
 
 from .common import assert_response
diff --git a/tests/app/v1/routers/test_entries.py b/tests/app/v1/routers/test_entries.py
index 8ec76fef0bfa476fb44668b678ee03be7fa34bc2..36985eb08f55098d5930560ca4707427e14f710a 100644
--- a/tests/app/v1/routers/test_entries.py
+++ b/tests/app/v1/routers/test_entries.py
@@ -16,37 +16,37 @@
 # limitations under the License.
 #
 
-import pytest
-from urllib.parse import urlencode
-import zipfile
 import io
 import json
+import zipfile
+from urllib.parse import urlencode
+
+import pytest
 
 from nomad.metainfo.elasticsearch_extension import entry_type, schema_separator
 from nomad.utils.exampledata import ExampleData
-
-from tests.test_files import example_mainfile_contents, append_raw_files  # pylint: disable=unused-import
+from tests.test_files import append_raw_files, example_mainfile_contents  # noqa: F401
 from tests.variables import python_schema_name
 
 from .common import (
     aggregation_exclude_from_search_test_parameters,
-    assert_response,
+    aggregation_test_parameters,
+    aggregation_test_parameters_default,
+    assert_aggregation_response,
+    assert_aggregations,
     assert_base_metadata_response,
-    assert_query_response,
+    assert_browser_download_headers,
     assert_metadata_response,
-    assert_required,
-    assert_aggregations,
     assert_pagination,
-    assert_browser_download_headers,
-    post_query_test_parameters,
+    assert_query_response,
+    assert_required,
+    assert_response,
     get_query_test_parameters,
-    perform_owner_test,
     owner_test_parameters,
     pagination_test_parameters,
-    aggregation_test_parameters,
-    aggregation_test_parameters_default,
-    assert_aggregation_response,
     perform_entries_metadata_test,
+    perform_owner_test,
+    post_query_test_parameters,
 )
 
 """
diff --git a/tests/app/v1/routers/test_entries_archive_edit.py b/tests/app/v1/routers/test_entries_archive_edit.py
index 8a2e90107dbffd9b62c3d1d9677d5627e4ed7559..ba9903ebde1018599934eccc59a914e1243c6f9b 100644
--- a/tests/app/v1/routers/test_entries_archive_edit.py
+++ b/tests/app/v1/routers/test_entries_archive_edit.py
@@ -17,6 +17,7 @@
 #
 
 import json
+
 import pytest
 
 from nomad.datamodel.datamodel import EntryArchive, EntryMetadata
diff --git a/tests/app/v1/routers/test_entries_edit.py b/tests/app/v1/routers/test_entries_edit.py
index cd793d3fb4c43a5696bf9dc7800296de9b254b88..1d3db4a4df67d2a0e8bd2479212a3c2708f84692 100644
--- a/tests/app/v1/routers/test_entries_edit.py
+++ b/tests/app/v1/routers/test_entries_edit.py
@@ -16,22 +16,22 @@
 # limitations under the License.
 #
 
-import pytest
 from datetime import datetime
 
-from nomad.search import search
+import pytest
+
+from nomad import processing as proc
+from nomad import utils
 from nomad.datamodel import Dataset
-from nomad import processing as proc, utils
+from nomad.search import search
 from nomad.utils.exampledata import ExampleData
-
 from tests.app.v1.routers.common import assert_response
 from tests.processing.test_edit_metadata import (
-    assert_metadata_edited,
-    all_coauthor_entry_metadata,
     all_admin_entry_metadata,
+    all_coauthor_entry_metadata,
+    assert_metadata_edited,
 )
 
-
 logger = utils.get_logger(__name__)
 
 
diff --git a/tests/app/v1/routers/test_federation.py b/tests/app/v1/routers/test_federation.py
index cb990557fa7cc14b526542074f65c298a93002cb..3b8d8284b51d58ccdd43b54bd5b1db150f616b88 100644
--- a/tests/app/v1/routers/test_federation.py
+++ b/tests/app/v1/routers/test_federation.py
@@ -1,9 +1,10 @@
-import requests
 import json
-import pytest
-from copy import deepcopy
 import logging
 import zlib
+from copy import deepcopy
+
+import pytest
+import requests
 
 from nomad.config import config
 from nomad.utils.structlogging import LogstashFormatter
diff --git a/tests/app/v1/routers/test_graph.py b/tests/app/v1/routers/test_graph.py
index 4e1beee2d52e29c09da29061caedfdd2d1ef48fa..5a12924f66a284f8d3af88036857ed051cf8b171 100644
--- a/tests/app/v1/routers/test_graph.py
+++ b/tests/app/v1/routers/test_graph.py
@@ -18,12 +18,11 @@
 
 import pytest
 
-from nomad.graph.graph_reader import EntryReader, Token
 from nomad.datamodel import EntryArchive
-from nomad.metainfo import Section, Quantity
+from nomad.datamodel.metainfo import SCHEMA_IMPORT_ERROR, simulationworkflowschema
+from nomad.graph.graph_reader import EntryReader, Token
 from nomad.utils.exampledata import ExampleData
 from tests.archive.test_archive import assert_dict
-from nomad.datamodel.metainfo import simulationworkflowschema, SCHEMA_IMPORT_ERROR
 
 # try:
 #     from rich.pretty import pprint
diff --git a/tests/app/v1/routers/test_materials.py b/tests/app/v1/routers/test_materials.py
index 3adbb143e28db88adf816bda0b6fb246d073b7c3..a1e998720b957664a3542fda24169942467dabae 100644
--- a/tests/app/v1/routers/test_materials.py
+++ b/tests/app/v1/routers/test_materials.py
@@ -16,27 +16,27 @@
 # limitations under the License.
 #
 
-import pytest
 from urllib.parse import urlencode
 
-from nomad.metainfo.elasticsearch_extension import material_entry_type
-from nomad.datamodel import results
+import pytest
 
-from tests.test_files import example_mainfile_contents  # pylint: disable=unused-import
+from nomad.datamodel import results
+from nomad.metainfo.elasticsearch_extension import material_entry_type
+from tests.test_files import example_mainfile_contents  # noqa: F401
 
 from .common import (
     aggregation_exclude_from_search_test_parameters,
-    assert_pagination,
+    aggregation_test_parameters_default,
+    assert_aggregation_response,
     assert_metadata_response,
+    assert_pagination,
     assert_required,
+    get_query_test_parameters,
+    owner_test_parameters,
+    pagination_test_parameters,
     perform_metadata_test,
     perform_owner_test,
-    owner_test_parameters,
     post_query_test_parameters,
-    get_query_test_parameters,
-    pagination_test_parameters,
-    aggregation_test_parameters_default,
-    assert_aggregation_response,
 )
 
 """
diff --git a/tests/app/v1/routers/test_metainfo.py b/tests/app/v1/routers/test_metainfo.py
index 8d4e2e78a44cde6573f411aac933f685601869d8..a34cb5be582d0e6cd8327987cfd4953f36ff45e2 100644
--- a/tests/app/v1/routers/test_metainfo.py
+++ b/tests/app/v1/routers/test_metainfo.py
@@ -21,11 +21,11 @@ from zipfile import ZipFile
 
 import pytest
 
-from nomad.config import config
 from nomad.app.v1.routers.metainfo import store_package_definition
-from nomad.datamodel import EntryArchive, ClientContext
-from nomad.metainfo import MSection, MetainfoReferenceError
-from nomad.utils import generate_entry_id, create_uuid
+from nomad.config import config
+from nomad.datamodel import ClientContext, EntryArchive
+from nomad.metainfo import MetainfoReferenceError, MSection
+from nomad.utils import create_uuid, generate_entry_id
 from tests.processing.test_data import run_processing
 
 
diff --git a/tests/app/v1/routers/test_suggestions.py b/tests/app/v1/routers/test_suggestions.py
index 937aad115af73b98413d9863cb7a857940bd35f2..093c20ed7eb23f288efbd91dfe87bb89e1cdd657 100644
--- a/tests/app/v1/routers/test_suggestions.py
+++ b/tests/app/v1/routers/test_suggestions.py
@@ -27,8 +27,10 @@ to assert for certain aspects in the responses.
 """
 
 import pytest
+
 from nomad.metainfo.elasticsearch_extension import entry_type
 from nomad.utils.exampledata import ExampleData
+
 from .common import assert_response
 
 
diff --git a/tests/app/v1/routers/test_systems.py b/tests/app/v1/routers/test_systems.py
index debb8bdef085c2f6a3950386e9cbb53c7a161309..6b76eb777c317dfd6b6bf06550c4258914f08c3e 100644
--- a/tests/app/v1/routers/test_systems.py
+++ b/tests/app/v1/routers/test_systems.py
@@ -19,21 +19,21 @@
 import re
 from io import BytesIO, StringIO
 
-import pytest
-import numpy as np
 import ase.io
+import numpy as np
+import pytest
 from ase import Atoms as ASEAtoms
 
-from nomad.units import ureg
-from nomad.normalizing.common import ase_atoms_from_nomad_atoms
+from nomad.app.v1.routers.systems import FormatFeature, WrapModeEnum, format_map
 from nomad.datamodel.datamodel import EntryArchive
-from nomad.datamodel.results import Results, Material, System
 from nomad.datamodel.metainfo import runschema
 from nomad.datamodel.metainfo.system import Atoms
+from nomad.datamodel.results import Material, Results, System
+from nomad.normalizing.common import ase_atoms_from_nomad_atoms
+from nomad.units import ureg
 from nomad.utils.exampledata import ExampleData
-from nomad.app.v1.routers.systems import format_map, FormatFeature, WrapModeEnum
 
-from .common import assert_response, assert_browser_download_headers
+from .common import assert_browser_download_headers, assert_response
 
 
 def ase_atoms(content, format):
diff --git a/tests/app/v1/routers/uploads/test_basic_uploads.py b/tests/app/v1/routers/uploads/test_basic_uploads.py
index f1e6d9bbed79041c3260d0a6d0be12c43bf38f88..a31f7ca2c20bdeadacf48f723536ef0954810e6f 100644
--- a/tests/app/v1/routers/uploads/test_basic_uploads.py
+++ b/tests/app/v1/routers/uploads/test_basic_uploads.py
@@ -18,12 +18,12 @@
 
 import io
 import os
+import tempfile
 import time
 import zipfile
-from datetime import datetime
-import tempfile
-from typing import Any, Dict, List
 from collections.abc import Iterable
+from datetime import datetime
+from typing import Any
 
 import pytest
 import requests
@@ -41,19 +41,18 @@ from tests.app.v1.routers.common import (
     assert_response,
     perform_get,
 )
+from tests.config.models.test_plugins import (
+    mock_example_upload_entry_point,
+    mock_plugin_package,
+)
 from tests.processing.test_edit_metadata import (
     all_admin_metadata,
     all_coauthor_metadata,
     assert_metadata_edited,
 )
-from tests.config.models.test_plugins import (
-    mock_plugin_package,
-    mock_example_upload_entry_point,
-)
 from tests.test_files import (
     assert_upload_files,
     empty_file,
-    example_directory,
     example_file_aux,
     example_file_corrupt_zip,
     example_file_mainfile_different_atoms,
diff --git a/tests/app/v1/test_models.py b/tests/app/v1/test_models.py
index 821fea8dc341e26b531b3b046b979c4492159faa..9210f75d212792c627643cf23845572c02f11926 100644
--- a/tests/app/v1/test_models.py
+++ b/tests/app/v1/test_models.py
@@ -17,15 +17,16 @@
 #
 
 from __future__ import annotations
-from typing import List
+
+import sys
+
 import pytest
-from pydantic import BaseModel, Field, ValidationError
 import yaml
-import sys
+from pydantic import BaseModel, Field, ValidationError
 
-from nomad.utils import strip
 from nomad.app.v1.models.graph import GraphRequest
 from nomad.app.v1.models.graph.utils import generate_request_model, mapped
+from nomad.utils import strip
 
 
 @pytest.fixture()
diff --git a/tests/app/v1/test_utils.py b/tests/app/v1/test_utils.py
index ba6fd19fcfec71d55476496d73df32b2bd351f87..5ed2c55176c8fc573420fafce25fa53937d0f9d3 100644
--- a/tests/app/v1/test_utils.py
+++ b/tests/app/v1/test_utils.py
@@ -1,4 +1,5 @@
 import pytest
+
 from nomad.app.v1.utils import get_query_keys
 
 
diff --git a/tests/archive/test_archive.py b/tests/archive/test_archive.py
index 0d972d4ff9a887c25045db690f50a68cb3a92a47..0c35c9b5b0b8b7d3e89a829d6e5c1c1b2efd2768 100644
--- a/tests/archive/test_archive.py
+++ b/tests/archive/test_archive.py
@@ -15,42 +15,42 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
+import json
+import os.path
 from datetime import datetime
-from typing import Dict, Any, Union
-import pytest
-import msgpack
 from io import BytesIO
-import os.path
-import json
+from typing import Any
 
+import msgpack
+import pytest
 import yaml
 
 from nomad import utils
-from nomad.config import config
+from nomad.archive import (
+    ArchiveQueryError,
+    RequiredReader,
+    compute_required_with_referenced,
+    create_partial_archive,
+    query_archive,
+    read_archive,
+    read_partial_archive_from_mongo,
+    read_partial_archives_from_mongo,
+    write_archive,
+    write_partial_archive_to_mongo,
+)
 from nomad.archive.converter import convert_archive
+from nomad.archive.storage import _decode, _entries_per_block, to_json
+from nomad.config import config
+from nomad.datamodel import ClientContext, EntryArchive
 from nomad.metainfo import (
+    Context,
+    MetainfoError,
+    MProxy,
     MSection,
     Quantity,
+    QuantityReference,
     Reference,
     SubSection,
-    QuantityReference,
-    MetainfoError,
-    Context,
-    MProxy,
-)
-from nomad.datamodel import EntryArchive, ClientContext
-from nomad.archive.storage import _decode, _entries_per_block, to_json
-from nomad.archive import (
-    write_archive,
-    read_archive,
-    ArchiveQueryError,
-    query_archive,
-    write_partial_archive_to_mongo,
-    read_partial_archive_from_mongo,
-    read_partial_archives_from_mongo,
-    create_partial_archive,
-    compute_required_with_referenced,
-    RequiredReader,
 )
 from nomad.utils.exampledata import ExampleData
 
diff --git a/tests/archive/test_storage.py b/tests/archive/test_storage.py
index 4a2de3dbf6bce3c2e3b9ab8bfd8553060b55f3f7..6ce614b53e82aac86d9a5570a4b2801352312cee 100644
--- a/tests/archive/test_storage.py
+++ b/tests/archive/test_storage.py
@@ -5,10 +5,7 @@ import msgpack
 import msgspec.msgpack
 import pytest
 
-from nomad.archive.storage_v2 import (
-    ArchiveList,
-    ArchiveDict,
-)
+from nomad.archive.storage_v2 import ArchiveDict, ArchiveList
 
 
 def generate_random_json(depth=10, width=4, simple=False):
diff --git a/tests/config/models/test_plugins.py b/tests/config/models/test_plugins.py
index a7d2f42ebb06ad971cd2aa00d806058927a4b68c..7d702f33e7f539100f52c98b90559b31bac11af3 100644
--- a/tests/config/models/test_plugins.py
+++ b/tests/config/models/test_plugins.py
@@ -18,12 +18,13 @@
 
 import os
 import tempfile
+
 import pytest
-from nomad.config import Config
 
+from nomad.config import Config
 from nomad.config.models.plugins import (
-    ExampleUploadEntryPoint,
     APIEntryPoint,
+    ExampleUploadEntryPoint,
     UploadResource,
 )
 
diff --git a/tests/config/models/test_ui.py b/tests/config/models/test_ui.py
index ce0626c7c885db5ad1184e7f0cddef0da262e6a5..606e9af0fb35890d54785ef54811806a8b562e34 100644
--- a/tests/config/models/test_ui.py
+++ b/tests/config/models/test_ui.py
@@ -22,13 +22,13 @@ from nomad.config.models.ui import (
     App,
     Axis,
     AxisQuantity,
-    Columns,
     Column,
-    Rows,
+    Columns,
     RowActions,
+    RowActionURL,
     RowDetails,
+    Rows,
     RowSelection,
-    RowActionURL,
 )
 
 
diff --git a/tests/conftest.py b/tests/conftest.py
index bea0af18be93252b6f5715d635e09546e75e1e83..a72d3e2687faca5ff45a9fbf00d1cf1e27a3da6a 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -16,19 +16,17 @@
 # limitations under the License.
 #
 import builtins
-from pathlib import Path
-from io import StringIO
-import pytest
-import time
-import os
-import socket
-import json
 import logging
-import warnings
+import os
+import socketserver
 import tempfile
+import time
+import warnings
+from io import StringIO
+from pathlib import Path
 
+import pytest
 from fastapi.testclient import TestClient
-import socketserver
 
 from nomad.config import config
 from nomad.config.models.plugins import Schema, add_plugin, remove_plugin
@@ -37,8 +35,8 @@ from nomad.config.models.plugins import Schema, add_plugin, remove_plugin
 config.logstash.enabled = False  # noqa: E402  # this must be set *before* the other modules are imported
 
 from nomad import utils
-from nomad.utils import structlogging
 from nomad.app.main import app
+from nomad.utils import structlogging
 
 # Set up pytest to pass control to the debugger on an exception.
 if os.getenv('_PYTEST_RAISE', '0') != '0':
@@ -70,8 +68,8 @@ pytest_plugins = (
 )
 
 
-from structlog.testing import LogCapture
 import structlog
+from structlog.testing import LogCapture
 
 
 @pytest.fixture(scope='function')
diff --git a/tests/data/parsing/file_parser/test.h5 b/tests/data/parsing/file_parser/test.h5
new file mode 100644
index 0000000000000000000000000000000000000000..10da06db996148fbcffec8cf8e31d19c466c3bd0
Binary files /dev/null and b/tests/data/parsing/file_parser/test.h5 differ
diff --git a/tests/data/parsing/file_parser/test.xml b/tests/data/parsing/file_parser/test.xml
new file mode 100644
index 0000000000000000000000000000000000000000..c2d785a189908ae09bce684ba5a4d6f15f0d1073
--- /dev/null
+++ b/tests/data/parsing/file_parser/test.xml
@@ -0,0 +1,51 @@
+<a>
+ <b>
+  <v>0.0 1.0</v>
+  <v>2.0 0.0</v>
+  <c>
+    <d>x</d>
+    <d>y</d>
+  </c>
+  <c>
+    <d>l</d>
+    <d>m</d>
+  </c>
+ </b>
+ <b1>
+  <v>1.0 2.0</v>
+  <v>3.0 4.0</v>
+ </b1>
+ <b1>
+  <v>5.0 6.0</v>
+  <v>7.0 8.0</v>
+  <c>
+    <d>a</d>
+    <d>b</d>
+  </c>
+  <c>
+    <d>c</d>
+    <d>d</d>
+  </c>
+ </b1>
+ <b2>
+  <c>
+    <d>1</d>
+    <e name='item1'>
+    <k n='k1'>f1</k>
+    <k n='k2'>f2</k>
+    </e>
+    <e name='item2'>
+    <k>f3</k>
+    <k>f4</k>
+    </e>
+    <f>
+    <g>
+      <i>1 2 3 4 5</i>
+      <i>6 7 8 9 1</i>
+      <i>2 2 2 1 3</i>
+      <i>0 2 8 4 5</i>
+    </g>
+    </f>
+  </c>
+ </b2>
+</a>
\ No newline at end of file
diff --git a/tests/data/schemas/nomadschemaexample/schema.py b/tests/data/schemas/nomadschemaexample/schema.py
index 2bf501f0067643332f9dd9d0f98312a11d090b1e..2f076b6d5734b77ccdad44b834aa11142d8beba3 100644
--- a/tests/data/schemas/nomadschemaexample/schema.py
+++ b/tests/data/schemas/nomadschemaexample/schema.py
@@ -1,16 +1,17 @@
+import numpy as np
+
+from nomad.datamodel.data import EntryData
+from nomad.datamodel.metainfo.annotations import ELNAnnotation, ELNComponentEnum
 from nomad.metainfo import (
-    Quantity,
-    Package,
-    Section,
-    MEnum,
     Datetime,
+    MEnum,
     MSection,
-    SubSection,
+    Package,
+    Quantity,
+    Section,
     SectionProxy,
+    SubSection,
 )
-from nomad.datamodel.data import EntryData
-from nomad.datamodel.metainfo.annotations import ELNAnnotation, ELNComponentEnum
-import numpy as np
 
 m_package = Package()
 
diff --git a/tests/datamodel/metainfo/eln/test_structure_file.py b/tests/datamodel/metainfo/eln/test_structure_file.py
index 10dad593cb4ff8a36b2b828b6ccf250eda970775..fcdde3dc7dce3ce45777fbf3df5cca38a2026d9c 100644
--- a/tests/datamodel/metainfo/eln/test_structure_file.py
+++ b/tests/datamodel/metainfo/eln/test_structure_file.py
@@ -16,7 +16,6 @@
 # limitations under the License.
 #
 
-from tests.normalizing.conftest import run_normalize
 from tests.normalizing.conftest import run_processing
 
 
diff --git a/tests/datamodel/metainfo/test_annotations.py b/tests/datamodel/metainfo/test_annotations.py
index 2d0f472f42072296571776a88c3d9d122e7e51b0..fb1208394b8348cc54a73facb400d8b366df3a71 100644
--- a/tests/datamodel/metainfo/test_annotations.py
+++ b/tests/datamodel/metainfo/test_annotations.py
@@ -19,13 +19,13 @@
 import pytest
 from pydantic import ValidationError
 
-from nomad.metainfo import Quantity
 from nomad.datamodel.metainfo.annotations import (
-    PlotAnnotation,
     ELNAnnotation,
+    PlotAnnotation,
     PlotlyGraphObjectAnnotation,
 )
 from nomad.datamodel.metainfo.plot import PlotlyError
+from nomad.metainfo import Quantity
 
 
 @pytest.mark.parametrize(
diff --git a/tests/datamodel/metainfo/test_plotly.py b/tests/datamodel/metainfo/test_plotly.py
index 63d6daf2176827b663d4130701745b7d1af9c27c..038cffbd0e329d4233c8d64c0dcab992dbffbcff 100644
--- a/tests/datamodel/metainfo/test_plotly.py
+++ b/tests/datamodel/metainfo/test_plotly.py
@@ -1,4 +1,5 @@
 import json
+
 from tests.normalizing.conftest import run_processing
 
 
diff --git a/tests/datamodel/metainfo/test_substance.py b/tests/datamodel/metainfo/test_substance.py
index 78c63e39a54c7ba8257f95167652b4698bb1b8a3..6b30e54d32b7e67e459cbe769f27e29082fb3128 100644
--- a/tests/datamodel/metainfo/test_substance.py
+++ b/tests/datamodel/metainfo/test_substance.py
@@ -16,12 +16,13 @@
 # limitations under the License.
 #
 
+import json
 from collections.abc import Iterable
+
 import pytest
-import json
 
-from tests.normalizing.conftest import run_processing
 from nomad.datamodel.metainfo.basesections import v1 as basesections
+from tests.normalizing.conftest import run_processing
 
 
 class MockResponse:
diff --git a/tests/datamodel/test_context.py b/tests/datamodel/test_context.py
index 410c385b5d0fea5d2679321c74e5dafaaf2556d4..f8250df5fbd4d35c3590721e94b72b204d4bb80d 100644
--- a/tests/datamodel/test_context.py
+++ b/tests/datamodel/test_context.py
@@ -16,21 +16,20 @@
 # limitations under the License.
 #
 
+import json
 import os
+import re
 
 import pytest
-import json
-import re
-import numpy as np
 
-from nomad import utils, files, processing
-from nomad.metainfo.metainfo import MSection
-from nomad.parsing.parser import ArchiveParser
+from nomad import files, processing, utils
 from nomad.datamodel import Context
-from nomad.datamodel.context import ServerContext, ClientContext, parse_path
+from nomad.datamodel.context import ClientContext, ServerContext, parse_path
 from nomad.datamodel.datamodel import EntryArchive, EntryMetadata
-from nomad.processing import Upload, Entry, ProcessStatus
 from nomad.datamodel.metainfo import runschema
+from nomad.metainfo.metainfo import MSection
+from nomad.parsing.parser import ArchiveParser
+from nomad.processing import Entry, ProcessStatus, Upload
 
 
 @pytest.fixture(scope='module')
diff --git a/tests/datamodel/test_datamodel.py b/tests/datamodel/test_datamodel.py
index ed8916ef3400a5df198af0c9d27800290da9b6ca..e0b263d41e6b0ed67e9ba8fb4b1c38b8a1587c26 100644
--- a/tests/datamodel/test_datamodel.py
+++ b/tests/datamodel/test_datamodel.py
@@ -21,13 +21,12 @@ A generator for random test calculations.
 """
 
 import random
-from essential_generators import DocumentGenerator
+
 import pytest
+from essential_generators import DocumentGenerator
 
-from nomad.datamodel import EntryArchive, EntryMetadata
-from nomad.metainfo import MSection, Quantity, SubSection
+from nomad.datamodel.metainfo import SCHEMA_IMPORT_ERROR, runschema
 from nomad.parsing.parsers import parser_dict
-from nomad.datamodel.metainfo import runschema, SCHEMA_IMPORT_ERROR
 
 number_of = 20
 
@@ -74,7 +73,7 @@ low_numbers_for_geometries = [1, 2, 2, 3, 3, 4, 4]
 @pytest.mark.skipif(runschema is None, reason=SCHEMA_IMPORT_ERROR)
 def test_common_metainfo():
     from runschema.run import Run
-    from runschema.system import System, Atoms
+    from runschema.system import Atoms, System
 
     run = Run()
     system = run.m_create(System)
@@ -85,8 +84,8 @@ def test_common_metainfo():
 
 @pytest.mark.skipif(runschema is None, reason=SCHEMA_IMPORT_ERROR)
 def test_vasp_metainfo():
+    from electronicparsers.vasp.metainfo import vasp  # noqa: F401
     from runschema.run import Run
-    from electronicparsers.vasp.metainfo import vasp  # pylint: disable=unused-import
 
     run = Run()
     assert 'vasp_src_date' in run.m_def.all_quantities
diff --git a/tests/datamodel/test_hdf5.py b/tests/datamodel/test_hdf5.py
index bbdb88e143b4ca15d350410e413cf282231e0417..427b20ccd2b33db0263e020afc0601fb431500f3 100644
--- a/tests/datamodel/test_hdf5.py
+++ b/tests/datamodel/test_hdf5.py
@@ -16,19 +16,18 @@
 # limitations under the License.
 #
 
-import pytest
-import numpy as np
 import os
+
 import h5py
+import numpy as np
+import pytest
 
 from nomad import files, processing
-from nomad.datamodel import EntryData, EntryArchive, EntryMetadata
+from nomad.datamodel import EntryArchive, EntryData, EntryMetadata
 from nomad.datamodel.context import ServerContext
+from nomad.datamodel.hdf5 import HDF5Dataset, HDF5Reference
 from nomad.metainfo import Quantity
 
-from nomad.datamodel.hdf5 import HDF5Reference, HDF5Dataset
-
-
 external_file = 'tests/data/datamodel/context.h5'
 
 
diff --git a/tests/datamodel/test_metadata.py b/tests/datamodel/test_metadata.py
index 09cc397ba545f6498c6ba1dea1a59487955a9e5d..0d6c40fd9fb6bd13f6e3890956dd397d042ac2b8 100644
--- a/tests/datamodel/test_metadata.py
+++ b/tests/datamodel/test_metadata.py
@@ -16,15 +16,16 @@
 # limitations under the License.
 #
 
-import pytest
-import numpy as np
 from datetime import datetime
+
+import numpy as np
+import pytest
 import pytz
 
-from nomad.metainfo import Quantity, MSection, SubSection, Datetime, MEnum
-from nomad.datamodel.datamodel import EntryMetadata, SearchableQuantity, EntryArchive
-from nomad.metainfo.elasticsearch_extension import schema_separator
 from nomad.datamodel import EntryData
+from nomad.datamodel.datamodel import EntryArchive, EntryMetadata, SearchableQuantity
+from nomad.metainfo import Datetime, MEnum, MSection, Quantity, SubSection
+from nomad.metainfo.elasticsearch_extension import schema_separator
 from tests.variables import python_schema_name
 
 
diff --git a/tests/datamodel/test_schema.py b/tests/datamodel/test_schema.py
index 17ccdea3ab84a0a3eb396270e7183376ecd0c421..c06f260122cab7664f40ab2adc3940c1425e60fa 100644
--- a/tests/datamodel/test_schema.py
+++ b/tests/datamodel/test_schema.py
@@ -17,21 +17,21 @@
 #
 
 import os.path
+
 import pytest
 
-from nomad.metainfo import MetainfoError
 from nomad.datamodel.context import ServerContext
+from nomad.datamodel.data import AuthorReference, Query, UserReference
 from nomad.datamodel.datamodel import EntryArchive, EntryMetadata
-from nomad.datamodel.data import UserReference, AuthorReference, Query
-from nomad.datamodel.metainfo.annotations import valid_eln_types, valid_eln_components
+from nomad.datamodel.metainfo.annotations import valid_eln_components, valid_eln_types
+from nomad.metainfo import MetainfoError
 from nomad.metainfo.data_type import Datatype
 from nomad.parsing.parser import ArchiveParser
 from nomad.processing.data import Upload
 from nomad.utils import get_logger, strip
-
+from tests.metainfo.test_yaml_schema import yaml_to_package
 from tests.normalizing.conftest import run_normalize
 from tests.test_files import create_test_upload_files
-from tests.metainfo.test_yaml_schema import yaml_to_package
 
 
 def test_schema_processing(raw_files_function, no_warn):
diff --git a/tests/examples/test_archive_query.py b/tests/examples/test_archive_query.py
index c31de1b4e984c0c34fdae96ca289123cbc10286a..5cad68b2d008bcc4dabad27cddd706c87f2f1e40 100644
--- a/tests/examples/test_archive_query.py
+++ b/tests/examples/test_archive_query.py
@@ -16,15 +16,14 @@
 # limitations under the License.
 #
 
-import os.path
 import importlib
+import os.path
 import sys
 
 from nomad.utils.exampledata import ExampleData
-
-from tests.parsing.test_parsing import run_singular_parser
 from tests.normalizing.conftest import run_normalize
-from tests.test_client import async_api_v1
+from tests.parsing.test_parsing import run_singular_parser
+from tests.test_client import async_api_v1  # noqa: F401
 
 
 def test_archive_query(
diff --git a/tests/examples/test_docs.py b/tests/examples/test_docs.py
index e8b737a9fbc0a21a85cd8e5d7893d0b0a5080697..ef1a43720d217ff262683120194593a606e4bd26 100644
--- a/tests/examples/test_docs.py
+++ b/tests/examples/test_docs.py
@@ -1,12 +1,12 @@
-import yaml
 import json
 import os.path
 
+import yaml
+
 from nomad.metainfo import Package
 from nomad.units import ureg
-
-from tests.parsing.test_parsing import run_singular_parser
 from tests.normalizing.conftest import run_normalize
+from tests.parsing.test_parsing import run_singular_parser
 
 
 def _file(path):
diff --git a/tests/examples/test_metainfo.py b/tests/examples/test_metainfo.py
new file mode 100644
index 0000000000000000000000000000000000000000..739280867837e9d5cf4ef9ccf9d6fb1844f29468
--- /dev/null
+++ b/tests/examples/test_metainfo.py
@@ -0,0 +1,16 @@
+import os
+import runpy
+
+import pytest
+
+prefix = os.path.join(__file__, '../../../examples/metainfo')
+
+
+@pytest.mark.parametrize(
+    'file',
+    [
+        f'{prefix}/data_frames.py',
+    ],
+)
+def test_metainfo(file):
+    runpy.run_path(file)
diff --git a/tests/fixtures/data.py b/tests/fixtures/data.py
index d974026e243d981642b9701349b7d5257dda528a..d5000b0c381251ea99c04fd1fbf1908c08d5f788 100644
--- a/tests/fixtures/data.py
+++ b/tests/fixtures/data.py
@@ -1,7 +1,6 @@
 import math
 import os
 from datetime import datetime, timezone
-from typing import List, Tuple
 
 import pytest
 
@@ -18,7 +17,6 @@ from nomad.datamodel.datamodel import SearchableQuantity
 from nomad.metainfo.elasticsearch_extension import schema_separator
 from nomad.processing import ProcessStatus
 from nomad.utils.exampledata import ExampleData
-from tests.variables import python_schema_name, yaml_schema_name, yaml_schema_root
 from tests.normalizing.conftest import run_normalize
 from tests.parsing import test_parsing
 from tests.processing import test_data as test_processing
@@ -28,6 +26,7 @@ from tests.utils import (
     create_template_upload_file,
     set_upload_entry_metadata,
 )
+from tests.variables import python_schema_name, yaml_schema_name, yaml_schema_root
 
 
 @pytest.fixture(scope='session')
diff --git a/tests/fixtures/group_uploads.py b/tests/fixtures/group_uploads.py
index b0d8109c89dfc6a64e179ae0e36233d505ff6331..290b2b6d0b44eb83eb8533be38b48528687f7d6d 100644
--- a/tests/fixtures/group_uploads.py
+++ b/tests/fixtures/group_uploads.py
@@ -11,6 +11,7 @@ Values:
 """
 
 from collections.abc import Sequence
+
 import pytest
 
 from nomad.utils.exampledata import ExampleData
diff --git a/tests/graph/test_definition_reader.py b/tests/graph/test_definition_reader.py
index f67385897f1c1994fd2602742007eb02aff75e4d..354f9fc00e38e17b3f63649fa45476bf35c942da 100644
--- a/tests/graph/test_definition_reader.py
+++ b/tests/graph/test_definition_reader.py
@@ -21,8 +21,8 @@ import pytest
 
 from nomad.graph.graph_reader import DefinitionReader
 from nomad.metainfo import (
-    Package,
     MSection,
+    Package,
     Quantity,
     Reference,
     SectionProxy,
diff --git a/tests/graph/test_graph_reader.py b/tests/graph/test_graph_reader.py
index ebcdc887caf49afd4a80c6b9011198caed2622ba..d6f9d75cd2a38a1a978894c08620a68dad10b005 100644
--- a/tests/graph/test_graph_reader.py
+++ b/tests/graph/test_graph_reader.py
@@ -22,17 +22,17 @@ from datetime import datetime
 import pytest
 import yaml
 
+from nomad.datamodel import EntryArchive
 from nomad.graph.graph_reader import (
     EntryReader,
-    UploadReader,
-    UserReader,
     FileSystemReader,
-    MongoReader,
     GeneralReader,
+    MongoReader,
     Token,
+    UploadReader,
+    UserReader,
 )
 from nomad.graph.lazy_wrapper import LazyWrapper
-from nomad.datamodel import EntryArchive
 from nomad.utils.exampledata import ExampleData
 from tests.normalizing.conftest import simulationworkflowschema
 
diff --git a/tests/metainfo/test_attributes.py b/tests/metainfo/test_attributes.py
index 13edf2b9d098a417b6ad9508697a78415bcff20c..32db3438decb3c5868d99cf098d2d3bc88ea25ed 100644
--- a/tests/metainfo/test_attributes.py
+++ b/tests/metainfo/test_attributes.py
@@ -18,20 +18,20 @@
 
 import datetime
 
-import pytest
 import numpy as np
+import pytest
 import pytz
 
 from nomad.metainfo import (
-    MSection,
-    Quantity,
     Attribute,
+    Datetime,
     MEnum,
+    MSection,
+    Quantity,
     Reference,
-    Datetime,
     Section,
 )
-from nomad.metainfo.metainfo import MQuantity, Definition
+from nomad.metainfo.metainfo import Definition, MQuantity
 from nomad.metainfo.util import validate_allowable_unit
 from nomad.units import ureg
 
diff --git a/tests/metainfo/test_data_frames.py b/tests/metainfo/test_data_frames.py
new file mode 100644
index 0000000000000000000000000000000000000000..9f1ee888517fef141fb712af31c1f3be6a728c50
--- /dev/null
+++ b/tests/metainfo/test_data_frames.py
@@ -0,0 +1,469 @@
+import datetime
+
+import numpy as np
+import pandas as pd
+import pytest
+import xarray as xr
+
+from nomad.metainfo.data_frames import DataFrameTemplate, ValuesTemplate
+from nomad.metainfo.metainfo import Datetime, MEnum, MSection, Package
+from nomad.units import ureg
+
+m_package = Package()
+
+# Values
+ScalarValue = ValuesTemplate(
+    name='Scalar',
+    description='Scalar',
+    shape=[],
+    type=np.float64,
+    unit='J',
+    iri='',
+)
+
+DatetimeValue = ValuesTemplate(
+    name='Datetime',
+    description='Datetime',
+    shape=[],
+    type=Datetime,
+    unit='',
+    iri='',
+)
+
+StringValue = ValuesTemplate(
+    name='String',
+    description='String',
+    shape=[],
+    type=str,
+    unit='',
+    iri='',
+)
+
+BooleanValue = ValuesTemplate(
+    name='Boolean',
+    description='Boolean',
+    shape=[],
+    type=bool,
+    unit='',
+    iri='',
+)
+
+EnumValue = ValuesTemplate(
+    name='Enum',
+    description='Enum',
+    shape=[],
+    type=MEnum(['A', 'B', 'C']),
+    unit='',
+    iri='',
+)
+
+# Data frames
+GeneralDataFrame = DataFrameTemplate(
+    name='GeneralDataFrame',
+    mandatory_fields=[],
+)
+
+# Examples
+Time = ValuesTemplate(
+    name='Time',
+    type=np.float64,
+    shape=[],
+    unit='s',
+    iri='https://www.wikidata.org/wiki/Q11471',
+)
+
+Temperature = ValuesTemplate(
+    name='Temperature',
+    type=np.float64,
+    shape=[],
+    unit='K',
+    iri='https://www.wikidata.org/wiki/Q11466',
+)
+
+Pressure = ValuesTemplate(
+    name='Pressure',
+    type=np.float64,
+    shape=[],
+    unit='Pa',
+    iri='https://www.wikidata.org/wiki/Q39552',
+)
+
+Latitude = ValuesTemplate(
+    name='Latitude',
+    description='Latitude',
+    shape=[],
+    type=np.float64,
+    unit='deg',
+    iri='',
+)
+
+Longitude = ValuesTemplate(
+    name='Longitude',
+    description='Longitude',
+    shape=[],
+    type=np.float64,
+    unit='deg',
+    iri='',
+)
+
+CauchyStressTensor = ValuesTemplate(
+    name='CauchyStressTensor',
+    type=np.float64,
+    shape=[3, 3],
+    unit='Pa',
+    iri='https://en.wikipedia.org/wiki/Cauchy_stress_tensor',
+)
+
+Stress = DataFrameTemplate(
+    name='Stress',
+    mandatory_fields=[CauchyStressTensor],
+)
+
+ProcessConditions = DataFrameTemplate(
+    name='ProcessConditions',
+    mandatory_fields=[Temperature, Pressure],
+    mandatory_variables=[Time],
+)
+
+TemperatureDataFrame = DataFrameTemplate(
+    name='Temperature',
+    mandatory_fields=[Temperature],
+    mandatory_variables=[Longitude, Latitude, Time, StringValue],
+)
+
+
+class MySection(MSection):
+    # Values
+    datetime_value = DatetimeValue()
+    string_value = StringValue()
+    boolean_value = BooleanValue()
+    enum_value = EnumValue()
+    scalar_value = ScalarValue()
+    # Data frames
+    general_data_frame = GeneralDataFrame()
+    # Examples
+    process_conditions = ProcessConditions()
+    temperature_measurement = TemperatureDataFrame()
+    stress = Stress()
+
+
+m_package.__init_metainfo__()
+
+
+@pytest.mark.parametrize(
+    'values_quantity, input_value, output_value',
+    [
+        pytest.param(
+            'scalar_value',
+            1.6e-19,
+            ureg.Quantity(1.6e-19, 'J'),
+            id='scalar-no-unit',
+        ),
+        pytest.param(
+            'scalar_value',
+            ureg.Quantity(1.6e-19, 'J'),
+            ureg.Quantity(1.6e-19, 'J'),
+            id='scalar-same-unit',
+        ),
+        pytest.param(
+            'scalar_value',
+            ureg.Quantity(1.5, 'kcal'),
+            ureg.Quantity(6276, 'J'),
+            id='scalar-different-unit',
+        ),
+        pytest.param(
+            'string_value',
+            'Hello world',
+            'Hello world',
+            id='string',
+        ),
+        pytest.param(
+            'boolean_value',
+            True,
+            True,
+            id='boolean',
+        ),
+        pytest.param(
+            'enum_value',
+            'A',
+            'A',
+            id='enum',
+        ),
+        pytest.param(
+            'datetime_value',
+            datetime.datetime(2021, 1, 1, 12, 0, 0, tzinfo=datetime.timezone.utc),
+            datetime.datetime(2021, 1, 1, 12, 0, 0, tzinfo=datetime.timezone.utc),
+            id='datetime',
+        ),
+    ],
+)
+def test_set_values(values_quantity, input_value, output_value):
+    my_section = MySection()
+    setattr(my_section, values_quantity, input_value)
+    assert getattr(my_section, values_quantity) == output_value
+
+
+def test_override_values_template():
+    unit = 'degree_Celsius'
+    description = 'My temperature'
+
+    class Test(MSection):
+        temperature = Temperature(unit=unit, description=description)
+
+    assert Test.temperature.unit == unit
+    assert Test.temperature.description == description
+
+
+def test_override_data_frame_template():
+    description = 'My DOS'
+
+    class Test(MSection):
+        my_data_frame = GeneralDataFrame(description=description)
+
+    assert Test.my_data_frame.description == description
+
+
+@pytest.mark.parametrize(
+    'values_template, create_args, second_value',
+    [
+        pytest.param(
+            ScalarValue,
+            (1.6e-19, 1.7e-19),
+            ureg.Quantity(1.7e-19, 'J'),
+            id='multiple_args',
+        ),
+        pytest.param(
+            ScalarValue, ([1.6e-19, 1.7e-19],), ureg.Quantity(1.7e-19, 'J'), id='list'
+        ),
+        pytest.param(
+            ScalarValue,
+            (np.array([1.6e-19, 1.7e-19]),),
+            ureg.Quantity(1.7e-19, 'J'),
+            id='numpy_array',
+        ),
+        pytest.param(
+            ScalarValue,
+            (ureg.Quantity(1.6e-19, 'J'), ureg.Quantity(1.7e-19, 'J')),
+            ureg.Quantity(1.7e-19, 'J'),
+            id='pint_quantity_args',
+        ),
+        pytest.param(
+            ScalarValue,
+            (ureg.Quantity([1.6e-19, 1.7e-19], 'J'),),
+            ureg.Quantity(1.7e-19, 'J'),
+            id='pint_quantity_array',
+        ),
+        pytest.param(
+            StringValue,
+            ('Hello', 'World'),
+            'World',
+            id='string',
+        ),
+        pytest.param(
+            BooleanValue,
+            (True, False),
+            False,
+            id='boolean',
+        ),
+        # pytest.param(
+        #     EnumValue,
+        #     ('A', 'B'),
+        #     'B',
+        #     id='enum',
+        # ),
+        # pytest.param(
+        #     DatetimeValue,
+        #     (
+        #         datetime.datetime(2021, 1, 1, 12, 0, 0, tzinfo=datetime.timezone.utc),
+        #         datetime.datetime(2021, 1, 2, 12, 0, 0, tzinfo=datetime.timezone.utc),
+        #     ),
+        #     datetime.datetime(2021, 1, 2, 12, 0, 0, tzinfo=datetime.timezone.utc),
+        #     id='datetime',
+        # )
+    ],
+)
+def test_set_data_frame_values(
+    values_template: ValuesTemplate, create_args, second_value
+):
+    my_section = MySection(
+        general_data_frame=GeneralDataFrame.create(
+            fields=[values_template.create(*create_args)]
+        ),
+    )
+    assert my_section.general_data_frame.fields[0].get_values()[1] == second_value
+
+
+def test_get_data_frame_values():
+    my_section = MySection(
+        general_data_frame=GeneralDataFrame.create(
+            fields=[ScalarValue.create(1.6e-19, 1.7e-19)]
+        ),
+    )
+    assert my_section.general_data_frame.get_field(ScalarValue).values[
+        0
+    ] == ureg.Quantity(1.6e-19, 'J')
+    assert my_section.general_data_frame.get_field('Scalar_1').values[
+        0
+    ] == ureg.Quantity(1.6e-19, 'J')
+
+
+def test_original_shape():
+    my_section = MySection(
+        stress=Stress.create(
+            fields=[CauchyStressTensor.create(np.random.rand(3, 3, 4, 5))],
+            variables=[
+                Temperature.create(np.random.rand(4)),
+                Pressure.create(np.random.rand(5)),
+            ],
+        ),
+    )
+
+    assert my_section.stress.fields[0].original_shape == [3, 3, 4, 5]
+    assert my_section.stress.variables[0].original_shape == [4]
+    assert my_section.stress.fields[0].values.shape == (3, 3, 20)
+    assert my_section.stress.fields[0].get_values().shape == (3, 3, 4, 5)
+
+    my_section_2 = MySection(
+        stress=Stress.create(
+            fields=[
+                CauchyStressTensor.create(
+                    np.random.rand(3, 3, 20), original_shape=[3, 3, 4, 5]
+                )
+            ],
+            variables=[
+                Temperature.create(np.random.rand(4)),
+                Pressure.create(np.random.rand(5)),
+            ],
+        ),
+    )
+
+    assert my_section_2.stress.fields[0].original_shape == [3, 3, 4, 5]
+
+
+temperature = ureg.Quantity([300.0, 320.0, 340.0], 'K')
+pressure = ureg.Quantity([1e5, 1.2e5, 1.4e5], 'Pa')
+time = ureg.Quantity([1.0, 2.0, 3.0], 's')
+
+
+@pytest.mark.parametrize(
+    'data_frame, ds',
+    [
+        pytest.param(
+            ProcessConditions.create(
+                fields=[
+                    Temperature.create(*temperature, name='my_temp'),
+                    Pressure.create(*pressure),
+                ],
+                variables=[Time.create(*time, name='time')],
+            ),
+            xr.Dataset(
+                data_vars=dict(
+                    my_temp=(
+                        ['time'],
+                        temperature,
+                        dict(
+                            units='kelvin',
+                            long_name=None,
+                            description=None,
+                            iri='https://www.wikidata.org/wiki/Q11466',
+                        ),
+                    ),
+                    Pressure_1=(
+                        ['time'],
+                        pressure,
+                        dict(
+                            units='pascal',
+                            long_name=None,
+                            description=None,
+                            iri='https://www.wikidata.org/wiki/Q39552',
+                        ),
+                    ),
+                ),
+                coords=dict(
+                    time=(
+                        ['time'],
+                        time,
+                        dict(
+                            units='second',
+                            long_name=None,
+                            description=None,
+                            iri='https://www.wikidata.org/wiki/Q11471',
+                        ),
+                    ),
+                ),
+                attrs=dict(
+                    description=None,
+                    long_name=None,
+                ),
+            ),
+            id='single-variable, multiple-fields',
+        ),
+    ],
+)
+def test_to_xarray(data_frame, ds):
+    my_section = MySection(process_conditions=data_frame)
+    assert my_section.process_conditions.to_xarray().equals(ds)
+
+
+@pytest.mark.parametrize(
+    'data_frame, df',
+    [
+        pytest.param(
+            ProcessConditions.create(
+                fields=[
+                    Temperature.create(*temperature, name='my_temp'),
+                    Pressure.create(*pressure),
+                ],
+                variables=[Time.create(*time, name='time')],
+            ),
+            pd.DataFrame(
+                dict(my_temp=temperature.magnitude, Pressure_1=pressure.magnitude),
+                index=pd.Index(time.magnitude, name='time'),
+            ),
+            id='single-variable, multiple-fields',
+        ),
+    ],
+)
+def test_to_pandas(data_frame, df):
+    my_section = MySection(process_conditions=data_frame)
+    assert my_section.process_conditions.to_pandas().equals(df)
+
+
+def test_multiple_spanned_dimensions():
+    np.random.seed(0)
+    temperature = 15 + 8 * np.random.randn(2, 2, 3)
+    lon = np.array([[-99.83, -99.32], [-99.79, -99.23]])
+    lat = np.array([[42.25, 42.21], [42.63, 42.59]])
+    # time = pd.date_range('2014-09-06', periods=3)
+    # reference_time = pd.Timestamp('2014-09-05')
+    time = np.arange(3)
+    reference_time = '2014-09-05'
+
+    ds = xr.DataArray(
+        data=temperature,
+        dims=['x', 'y', 'time'],
+        coords=dict(
+            lon=(['x', 'y'], lon),
+            lat=(['x', 'y'], lat),
+            time=time,
+            reference_time=reference_time,
+        ),
+        attrs=dict(
+            description='Ambient temperature.',
+            units='degC',
+        ),
+    ).to_dataset(name='temperature')
+
+    my_section = MySection()
+    my_section.temperature_measurement = TemperatureDataFrame.create(
+        fields=[Temperature.create(temperature)],
+        variables=[
+            Longitude.create(lon, spanned_dimensions=[0, 1], name='lon'),
+            Latitude.create(lat, spanned_dimensions=[0, 1], name='lat'),
+            Time.create(time, spanned_dimensions=[2], name='time'),
+            StringValue.create(reference_time, name='reference_time'),
+        ],
+    )
+
+    with pytest.raises(NotImplementedError):
+        my_section.temperature_measurement.to_xarray()
diff --git a/tests/metainfo/test_elasticsearch_extension.py b/tests/metainfo/test_elasticsearch_extension.py
index d1e4b04da5ada696fbd9a5a10531dfa417c21b65..e9aafb1575baa292c688786fd5d8429d380f1a99 100644
--- a/tests/metainfo/test_elasticsearch_extension.py
+++ b/tests/metainfo/test_elasticsearch_extension.py
@@ -17,29 +17,28 @@
 #
 
 from datetime import date
-from typing import List
-import pytest
+
 import numpy as np
+import pytest
 from elasticsearch_dsl import Keyword
 
 from nomad.config import config
-from nomad.utils.exampledata import ExampleData
 from nomad.datamodel.datamodel import SearchableQuantity
-from nomad.metainfo import MSection, Quantity, SubSection, Datetime, Unit, MEnum
+from nomad.metainfo import Datetime, MEnum, MSection, Quantity, SubSection, Unit
 from nomad.metainfo.elasticsearch_extension import (
     Elasticsearch,
     create_indices,
-    index_entries_with_materials,
+    create_searchable_quantity,
+    entry_index,
     entry_type,
-    material_type,
+    index_entries_with_materials,
     material_entry_type,
-    entry_index,
     material_index,
-    create_searchable_quantity,
+    material_type,
 )
-
-from tests.fixtures.infrastructure import clear_elastic_infra
+from nomad.utils.exampledata import ExampleData
 from tests.app.v1.routers.common import perform_quantity_search_test
+from tests.fixtures.infrastructure import clear_elastic_infra
 
 
 @pytest.fixture(scope='module')
diff --git a/tests/metainfo/test_full_storage_quantity.py b/tests/metainfo/test_full_storage_quantity.py
index 9acb42dcf198b1c1b404dda216cd464160a31381..e08520aad6f4c8e7d27574b015acba2424922218 100644
--- a/tests/metainfo/test_full_storage_quantity.py
+++ b/tests/metainfo/test_full_storage_quantity.py
@@ -18,12 +18,12 @@
 import pytest
 
 from nomad.metainfo import (
-    MSection,
-    Quantity,
     Attribute,
-    SubSection,
     MetainfoError,
+    MSection,
+    Quantity,
     Section,
+    SubSection,
 )
 from nomad.metainfo.util import MQuantity
 from nomad.units import ureg
diff --git a/tests/metainfo/test_hash_id.py b/tests/metainfo/test_hash_id.py
index 3c3e44bb2e08569e30e48a0236f148d6b0109e2b..6e2b678431a1432dd43dce2d6475071a190e8e90 100644
--- a/tests/metainfo/test_hash_id.py
+++ b/tests/metainfo/test_hash_id.py
@@ -1,4 +1,4 @@
-from nomad.metainfo import Quantity, MSection, MEnum
+from nomad.metainfo import MEnum, MSection, Quantity
 
 
 def simple_quantity():
diff --git a/tests/metainfo/test_metainfo.py b/tests/metainfo/test_metainfo.py
index ea08bba6ca3b6af8324e339dd3db219f178e9207..394d44b588d6d341075f5eefde1857f2bfa15e26 100644
--- a/tests/metainfo/test_metainfo.py
+++ b/tests/metainfo/test_metainfo.py
@@ -20,40 +20,33 @@
 # in-depth tests in test_* files of the same module.
 
 from math import isnan
-import pytest
+
 import numpy as np
 import pandas as pd
 import pint.quantity
+import pytest
 
-from nomad.metainfo.metainfo import (
-    MSection,
-    MCategory,
-    Section,
-    Quantity,
-    SubSection,
-    Definition,
-    Package,
-    DeriveError,
-    MetainfoError,
-    derived,
-)
 from nomad.metainfo import (
     Annotation,
+    AnnotationModel,
     DefinitionAnnotation,
     SectionAnnotation,
-    AnnotationModel,
 )
-from nomad.metainfo.example import (
-    Run,
-    VaspRun,
-    System,
-    SystemHash,
-    Parsing,
-    SCC,
-    m_package as example_package,
+from nomad.metainfo.example import SCC, Parsing, Run, System, SystemHash, VaspRun
+from nomad.metainfo.example import m_package as example_package
+from nomad.metainfo.metainfo import (
+    Definition,
+    DeriveError,
+    MCategory,
+    MetainfoError,
+    MSection,
+    Package,
+    Quantity,
+    Section,
+    SubSection,
+    derived,
 )
 from nomad.units import ureg
-
 from tests.metainfo import MTypes
 
 
diff --git a/tests/metainfo/test_mongodb_extension.py b/tests/metainfo/test_mongodb_extension.py
index 3f03c65a6dc97bb47f92e9532971d2e31d38b5f3..ff1687b7660e10954fc98b88fc9283ddd9160b32 100644
--- a/tests/metainfo/test_mongodb_extension.py
+++ b/tests/metainfo/test_mongodb_extension.py
@@ -17,9 +17,11 @@
 #
 
 import json
+
 import numpy as np
-from nomad.metainfo import MSection, Section, Quantity, SubSection
-from nomad.metainfo.mongoengine_extension import MongoDocument, Mongo
+
+from nomad.metainfo import MSection, Quantity, Section, SubSection
+from nomad.metainfo.mongoengine_extension import Mongo, MongoDocument
 
 
 class B(MSection):
diff --git a/tests/metainfo/test_package.py b/tests/metainfo/test_package.py
index 75d70c3d2c77a30e94bb0c38c65f83585e52b350..583ccb35e6996ba10de5dc7825cb4eaa82795ed4 100644
--- a/tests/metainfo/test_package.py
+++ b/tests/metainfo/test_package.py
@@ -16,7 +16,7 @@
 # limitations under the License.
 #
 
-from nomad.metainfo import Package, MSection
+from nomad.metainfo import MSection, Package
 
 m_package = Package(aliases=['nomad.datamodel.test_package'])
 
diff --git a/tests/metainfo/test_quantities.py b/tests/metainfo/test_quantities.py
index ad39b0e06313fce8e2dfffb4b6845d420cd79adb..0e6842b2d32fd941bbf2525a576b4d2c217c342f 100644
--- a/tests/metainfo/test_quantities.py
+++ b/tests/metainfo/test_quantities.py
@@ -24,14 +24,14 @@ import pytest
 import pytz
 
 from nomad.metainfo.metainfo import (
+    JSON,
+    URL,
     Bytes,
     Capitalized,
     Datetime,
     Dimension,
-    JSON,
     MSection,
     Quantity,
-    URL,
     Unit,
     units,
 )
diff --git a/tests/metainfo/test_references.py b/tests/metainfo/test_references.py
index c9f20fcc985f1ecdc725f3abbe401a6e92fc1e53..8a7f14a00a67c4cac18a049e50c84fa4b6ab52c2 100644
--- a/tests/metainfo/test_references.py
+++ b/tests/metainfo/test_references.py
@@ -16,25 +16,26 @@
 # limitations under the License.
 #
 
+import os.path
 from typing import cast
+
 import pytest
-import os.path
 
-from nomad.datamodel import UserReference, AuthorReference
+from nomad.datamodel import AuthorReference, UserReference
 from nomad.metainfo import (
+    Context,
+    File,
+    MetainfoReferenceError,
+    MProxy,
     MSection,
+    Package,
     Quantity,
+    QuantityReference,
+    Reference,
     Section,
     SubSection,
-    MProxy,
-    Reference,
-    QuantityReference,
-    File,
-    MetainfoReferenceError,
-    Package as MetainfoPackage,
-    Context,
-    Package,
 )
+from nomad.metainfo import Package as MetainfoPackage
 
 
 class Referenced(MSection):
@@ -383,7 +384,7 @@ def test_def_reference():
 
 @pytest.mark.parametrize('mainfile', ['intra-entry', 'inter-entry'])
 def test_parse_with_references(mainfile):
-    from nomad.client import parse, normalize_all
+    from nomad.client import normalize_all, parse
 
     entry_archive = parse(
         os.path.join(
diff --git a/tests/metainfo/test_sections.py b/tests/metainfo/test_sections.py
index 04de671f982ab274ef5ac1f6c363b12a04cad2ab..7ed0276321249910bf0acb51232b3b50504d17b6 100644
--- a/tests/metainfo/test_sections.py
+++ b/tests/metainfo/test_sections.py
@@ -22,7 +22,7 @@
 import pytest
 
 from nomad.metainfo import MSection
-from nomad.metainfo.metainfo import Package, Quantity, SubSection, Section
+from nomad.metainfo.metainfo import Package, Quantity, Section, SubSection
 
 
 def test_base_section():
@@ -251,7 +251,7 @@ def test_path():
     assert SubSection.used_sections[ChildSection.m_def] == [EntryArchive.child]
     assert ChildSection.m_def.path == 'child'
 
-    from nomad.datamodel.metainfo.workflow import Workflow, Task
+    from nomad.datamodel.metainfo.workflow import Task, Workflow
 
     assert Workflow.m_def.path == 'workflow2'
     assert Task.m_def.path == '__no_archive_path__'
diff --git a/tests/metainfo/test_to_dict.py b/tests/metainfo/test_to_dict.py
index 1069001b29f14a2d5395dcd80cc2a4b2a559f687..8c296a8868d559c7dea801d4bfebb38cfcb75dfa 100644
--- a/tests/metainfo/test_to_dict.py
+++ b/tests/metainfo/test_to_dict.py
@@ -16,16 +16,16 @@
 # limitations under the License.
 #
 
-import pytest
 import numpy as np
+import pytest
 import yaml
 
 from nomad.app.v1.routers.metainfo import (
     get_package_by_section_definition_id,
     store_package_definition,
 )
-from nomad.metainfo import MSection, MCategory, Quantity, SubSection
-from nomad.metainfo.metainfo import Datetime, Package, MEnum, Reference, Definition
+from nomad.metainfo import MCategory, MSection, Quantity, SubSection
+from nomad.metainfo.metainfo import Definition, MEnum, Package, Reference
 
 # resolve_references are tested in .test_references
 # type specific serialization is tested in .test_quantities
@@ -66,7 +66,7 @@ expected_child = dict(**values)
 expected_root = dict(
     child=expected_child,
     children=[expected_child, expected_child],
-    abstract=dict(m_def='tests.metainfo.test_to_dict.Child', **expected_child),
+    abstract=dict(m_def='tests.metainfo.test_to_dict.Child', **expected_child),  # type: ignore
     **values,
 )
 
diff --git a/tests/metainfo/test_yaml_schema.py b/tests/metainfo/test_yaml_schema.py
index 67df9908508c25b5a6885ebe8279250379346687..950d8927c61b1ef84704caeb952c150034381599 100644
--- a/tests/metainfo/test_yaml_schema.py
+++ b/tests/metainfo/test_yaml_schema.py
@@ -20,18 +20,18 @@ import numpy as np
 import pytest
 import yaml
 
-from nomad.utils import strip
 from nomad.metainfo import (
-    Package,
+    Context,
+    MetainfoError,
+    MProxy,
     MSection,
+    Package,
     Quantity,
     Reference,
-    SubSection,
     Section,
-    MProxy,
-    MetainfoError,
-    Context,
+    SubSection,
 )
+from nomad.utils import strip
 
 m_package = Package()
 
diff --git a/tests/mkdocs/test_mkdocs_metainfo.py b/tests/mkdocs/test_mkdocs_metainfo.py
new file mode 100644
index 0000000000000000000000000000000000000000..368288ff1cc8caf825772a45b86aeaf8981ef3d8
--- /dev/null
+++ b/tests/mkdocs/test_mkdocs_metainfo.py
@@ -0,0 +1,93 @@
+#
+# Copyright The NOMAD Authors.
+#
+# This file is part of NOMAD. See https://nomad-lab.eu for further info.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+
+import pytest
+
+from nomad.metainfo import Datetime, MSection, Package, Quantity, Reference
+from nomad.mkdocs.metainfo import (
+    get_property_description,
+    get_property_type_info,
+    get_quantity_default,
+)
+
+m_package = Package()
+
+
+class Test(MSection):
+    pass
+
+
+m_package.__init_metainfo__()
+
+
+@pytest.mark.parametrize(
+    'type_, name',
+    [
+        pytest.param(str, '`str`', id='str'),
+        pytest.param(int, '`int`', id='int'),
+        pytest.param(float, '`float`', id='float'),
+        pytest.param(Datetime, '`nomad.metainfo.data_type.Datetime`', id='Datetime'),
+        pytest.param(Reference(Test), '[`Test`](#test)', id='internal-ref'),
+        pytest.param(
+            Reference(Quantity), '`nomad.metainfo.metainfo.Quantity`', id='external-ref'
+        ),
+    ],
+)
+def test_property_type_info(type_, name):
+    class Test(MSection):
+        a = Quantity(type=type_)
+
+    name_found = get_property_type_info(Test.m_def.all_properties['a'], pkg=m_package)
+    assert name_found == name
+
+
+@pytest.mark.parametrize(
+    'description',
+    [
+        pytest.param(None, id='no-description'),
+        pytest.param('This is a test description.', id='string-description'),
+    ],
+)
+def test_property_description(description):
+    class Test(MSection):
+        a: str = Quantity(description=description)
+
+    description_found = get_property_description(Test.m_def.all_properties['a'])
+    assert description_found == description
+
+
+@pytest.mark.parametrize(
+    'default, default_str',
+    [
+        pytest.param(None, '', id='no-default'),
+        pytest.param('test', '`test`', id='str-default'),
+        pytest.param(1, '`1`', id='int-default'),
+        pytest.param(
+            {'test': 'test'},
+            'Complex object, default value not displayed.',
+            id='complex-default',
+        ),
+    ],
+)
+def test_property_default(default, default_str):
+    class Test(MSection):
+        a = Quantity(default=default)
+
+    default_found = get_quantity_default(Test.m_def.all_properties['a'])
+    assert default_found == default_str
diff --git a/tests/test_mkdocs.py b/tests/mkdocs/test_mkdocs_pydantic.py
similarity index 97%
rename from tests/test_mkdocs.py
rename to tests/mkdocs/test_mkdocs_pydantic.py
index baa0c3300b63a7b49772e33bedc3a6664f5635dc..15a9bda32ebfc639342c8f1f0ce92092d2a4736f 100644
--- a/tests/test_mkdocs.py
+++ b/tests/mkdocs/test_mkdocs_pydantic.py
@@ -17,17 +17,18 @@
 #
 
 from enum import Enum
+from typing import Annotated, Literal, Optional, Union  # type: ignore
+
 import pytest
-from typing import Union, List, Dict, Optional
-from typing import Literal, Annotated  # type: ignore
 from pydantic import BaseModel, Field
+
 from nomad.config.models.ui import WidgetHistogram, WidgetTerms
-from nomad.mkdocs import (
-    get_field_type_info,
-    get_field_description,
+from nomad.mkdocs.pydantic import (
     get_field_default,
-    get_field_options,
     get_field_deprecated,
+    get_field_description,
+    get_field_options,
+    get_field_type_info,
 )
 
 
diff --git a/tests/normalizing/conftest.py b/tests/normalizing/conftest.py
index 2ac967401754e234cab41f6c1d59001ad845bbb7..c9c926b861409de40f92b0f30362de8e13718788 100644
--- a/tests/normalizing/conftest.py
+++ b/tests/normalizing/conftest.py
@@ -15,58 +15,47 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
+import re
 from collections import defaultdict
+from typing import Any
+from warnings import warn
+
+import ase.build
 import numpy as np
-from typing import Any, List, Union
-from collections.abc import Iterable
 import pytest
 from ase import Atoms
-import ase.build
-import re
-from warnings import warn
-import importlib
 
-from nomad.config import config
-from nomad.units import ureg
-from nomad.utils import get_logger
-from nomad.normalizing import normalizers
-from nomad.metainfo import SubSection, Quantity
-from nomad.datamodel import EntryArchive, ArchiveSection
-from nomad.datamodel.results import (
-    Relation,
-    SymmetryNew as Symmetry,
-    Cell,
-    WyckoffSet,
-    System as ResultSystem,
-)
-from nomad.datamodel.optimade import Species
-from nomad.normalizing.common import cell_from_ase_atoms, nomad_atoms_from_ase_atoms
-from nomad.datamodel.metainfo.simulation.method import CoreHole
-from nomad.datamodel.metainfo.workflow import Workflow
-from nomad.datamodel.metainfo.workflow import Link, TaskReference
+from nomad.datamodel import ArchiveSection, EntryArchive
+from nomad.datamodel.context import ServerContext
+from nomad.datamodel.datamodel import EntryArchive, EntryMetadata
+from nomad.datamodel.metainfo import runschema, simulationworkflowschema
 from nomad.datamodel.metainfo.measurements import (
+    EELSMeasurement,
+    Instrument,
     Measurement,
     Sample,
-    EELSMeasurement,
     Spectrum,
-    Instrument,
 )
-from nomad.datamodel.results import EELSInstrument
-
-from nomad.datamodel.context import ServerContext
-from nomad.datamodel.datamodel import EntryArchive, EntryMetadata
+from nomad.datamodel.metainfo.simulation.method import CoreHole
+from nomad.datamodel.metainfo.workflow import Workflow
+from nomad.datamodel.optimade import Species
+from nomad.datamodel.results import Cell, EELSInstrument, Relation, WyckoffSet
+from nomad.datamodel.results import SymmetryNew as Symmetry
+from nomad.datamodel.results import System as ResultSystem
+from nomad.metainfo import Quantity, SubSection
+from nomad.normalizing import normalizers
+from nomad.normalizing.common import cell_from_ase_atoms, nomad_atoms_from_ase_atoms
 from nomad.parsing.parser import ArchiveParser
 from nomad.processing.data import Upload
-from tests.parsing.test_parsing import parsed_vasp_example  # pylint: disable=unused-import
-from tests.parsing.test_parsing import parsed_template_example  # pylint: disable=unused-import
-from tests.parsing.test_parsing import parsed_example  # pylint: disable=unused-import
-from tests.parsing.test_parsing import parse_file
-from tests.test_files import create_test_upload_files
-from nomad.datamodel.metainfo import (
-    simulationworkflowschema,
-    runschema,
-    SCHEMA_IMPORT_ERROR,
+from nomad.units import ureg
+from nomad.utils import get_logger
+from tests.parsing.test_parsing import (
+    parse_file,
+    parsed_example,  # noqa: F401
+    parsed_template_example,  # noqa: F401
+    parsed_vasp_example,  # noqa: F401
 )
+from tests.test_files import create_test_upload_files
 
 
 def run_normalize(entry_archive: EntryArchive) -> EntryArchive:
diff --git a/tests/normalizing/test_entry_type_and_name.py b/tests/normalizing/test_entry_type_and_name.py
index c11d002e54fe1bde1eeb938e95689d7639f7086f..dffa84450dda6ebb4a292a09a2e706e4744ae2ef 100644
--- a/tests/normalizing/test_entry_type_and_name.py
+++ b/tests/normalizing/test_entry_type_and_name.py
@@ -16,8 +16,10 @@
 # limitations under the License.
 #
 import pytest
+
 import tests
-from .conftest import run_normalize, run_processing
+
+from .conftest import run_normalize
 
 
 @pytest.fixture(scope='session')
diff --git a/tests/normalizing/test_material.py b/tests/normalizing/test_material.py
index c2b9289da2f68eb3c767fcfd0c2ea6bbf058dcb1..ab422266ef3aa1d335eae0318ad627944779e1bd 100644
--- a/tests/normalizing/test_material.py
+++ b/tests/normalizing/test_material.py
@@ -16,18 +16,18 @@
 # limitations under the License.
 #
 
+import ase.build
 import numpy as np
 import pytest
 from ase import Atoms
-import ase.build
 from matid.symmetry.wyckoffset import WyckoffSet  # pylint: disable=import-error
 
-from nomad.units import ureg
 from nomad import atomutils
 from nomad.config import config
-from nomad.utils import hash
-from nomad.normalizing.common import ase_atoms_from_nomad_atoms
 from nomad.datamodel.results import ElementalComposition
+from nomad.normalizing.common import ase_atoms_from_nomad_atoms
+from nomad.units import ureg
+from nomad.utils import hash
 from tests.normalizing.conftest import get_template_for_structure
 
 
diff --git a/tests/normalizing/test_metainfo.py b/tests/normalizing/test_metainfo.py
index 6fc097c0538354edfb6e2e99f2e87386acbfb943..815ad7df3a7882c324e4bf9b79655b3b0405bc93 100644
--- a/tests/normalizing/test_metainfo.py
+++ b/tests/normalizing/test_metainfo.py
@@ -17,9 +17,9 @@
 #
 import numpy as np
 
-from nomad.datamodel import EntryData, EntryArchive
-from nomad.metainfo import Quantity, SubSection
 from nomad.client import normalize_all
+from nomad.datamodel import EntryArchive, EntryData
+from nomad.metainfo import Quantity, SubSection
 
 
 def test_normalizer_level():
diff --git a/tests/normalizing/test_method.py b/tests/normalizing/test_method.py
index 483753d8e65c234b4ab841cadf39ad6c3632eed5..1f9992a15b47f9d19a6f8b11eb990aa0027e6866 100644
--- a/tests/normalizing/test_method.py
+++ b/tests/normalizing/test_method.py
@@ -17,9 +17,10 @@
 #
 
 import numpy as np
-from nomad.units import ureg
 import pytest
 
+from nomad.units import ureg
+
 
 def approx(value, abs=0, rel=1e-6):
     return pytest.approx(value, abs=abs, rel=rel)
diff --git a/tests/normalizing/test_properties.py b/tests/normalizing/test_properties.py
index ad82111b390280fb02528becf149458c473bcca5..9ad5ae24b2daf9179cab3d23ca33be0686e844fc 100644
--- a/tests/normalizing/test_properties.py
+++ b/tests/normalizing/test_properties.py
@@ -19,20 +19,20 @@
 import numpy as np
 import pytest
 
+from nomad.datamodel.metainfo import SCHEMA_IMPORT_ERROR, simulationworkflowschema
 from nomad.units import ureg
 
 from .conftest import (
-    get_template_dft,
-    add_template_dos,
-    get_template_dos,
     add_template_band_structure,
-    get_template_band_structure,
+    add_template_dos,
     add_template_magnetic_shielding,
-    add_template_spin_spin_coupling,
     add_template_magnetic_susceptibility,
+    add_template_spin_spin_coupling,
+    get_template_band_structure,
+    get_template_dft,
+    get_template_dos,
     run_normalize,
 )
-from nomad.datamodel.metainfo import simulationworkflowschema, SCHEMA_IMPORT_ERROR
 
 
 def test_eels(eels):
diff --git a/tests/normalizing/test_topology.py b/tests/normalizing/test_topology.py
index 7dc058c0834204f2ffc892647a13aacf3c91596f..5940d8e8e9e8c1cbce921ca7adee123c90175eb2 100644
--- a/tests/normalizing/test_topology.py
+++ b/tests/normalizing/test_topology.py
@@ -16,35 +16,35 @@
 # limitations under the License.
 #
 
-import numpy as np
 from collections import defaultdict
+
+import numpy as np
 import pytest
-from nomad.client.processing import normalize
-from nomad.datamodel.metainfo import runschema
 
+from nomad.datamodel.metainfo import runschema
 from nomad.units import ureg
-from tests.normalizing.conftest import (  # pylint: disable=unused-import
-    get_template_for_structure,
-    get_template_topology,
+from tests.normalizing.conftest import (  # noqa: F401
+    boron_nitride,
+    boron_nitride_topology,
+    check_template_active_orbitals,
     conv_bcc,
     conv_fcc,
-    rattle,
-    run_normalize,
-    stack,
-    surf,
-    single_cu_surface_topology,
-    single_cr_surface_topology,
-    stacked_cu_ni_surface_topology,
+    get_template_active_orbitals,
+    get_template_computation,
+    get_template_for_structure,
+    get_template_topology,
     graphene,
     graphene_topology,
-    boron_nitride,
-    boron_nitride_topology,
     mos2,
     mos2_topology,
+    rattle,
+    run_normalize,
+    single_cr_surface_topology,
+    single_cu_surface_topology,
+    stack,
+    stacked_cu_ni_surface_topology,
     stacked_graphene_boron_nitride_topology,
-    get_template_active_orbitals,
-    check_template_active_orbitals,
-    get_template_computation,
+    surf,
 )
 
 
diff --git a/tests/parsing/test_archive_parser.py b/tests/parsing/test_archive_parser.py
index b9666aa564265de69aeb7d5bb64f9dcada4f8245..e577bbb0681973c580f493dcbc1e74ca96cc5782 100644
--- a/tests/parsing/test_archive_parser.py
+++ b/tests/parsing/test_archive_parser.py
@@ -16,14 +16,15 @@
 # limitations under the License.
 #
 
-import pytest
 import json
 import os
 import os.path
 
+import pytest
+
 from nomad.config import config
+from nomad.datamodel import Context, EntryArchive
 from nomad.parsing.parser import ArchiveParser
-from nomad.datamodel import EntryArchive, Context
 
 
 def test_archive_parser(raw_files_function):
diff --git a/tests/parsing/test_file_parser.py b/tests/parsing/test_file_parser.py
index 6f8fb70bb5053104517dda9e920b0cf1c53b7eef..d3a6e0a32c56d6c7f0443a8b54be2f3688dce1fa 100644
--- a/tests/parsing/test_file_parser.py
+++ b/tests/parsing/test_file_parser.py
@@ -1,15 +1,15 @@
-import pytest
 import numpy as np
 import pint
-from nomad.units import ureg
+import pytest
+
+from nomad.datamodel.metainfo.system import Atoms
 from nomad.parsing.file_parser import (
-    TextParser,
-    Quantity,
+    FileParser,
     ParsePattern,
+    Quantity,
+    TextParser,
     XMLParser,
-    FileParser,
 )
-from nomad.datamodel.metainfo.system import Atoms
 
 
 class TestFileParser:
diff --git a/tests/parsing/test_mapping_parser.py b/tests/parsing/test_mapping_parser.py
new file mode 100644
index 0000000000000000000000000000000000000000..a772a8b08cc1e8d9d00c5d9a99b281c1754d6563
--- /dev/null
+++ b/tests/parsing/test_mapping_parser.py
@@ -0,0 +1,595 @@
+from copy import deepcopy
+from typing import Any
+
+import numpy as np
+import pytest
+
+from nomad.datamodel import ArchiveSection
+from nomad.datamodel.metainfo.annotations import Mapper as MapperAnnotation
+from nomad.metainfo import Quantity, SubSection
+from nomad.parsing.file_parser.mapping_parser import (
+    MAPPING_ANNOTATION_KEY,
+    Data,
+    HDF5Parser,
+    Mapper,
+    MappingParser,
+    MetainfoParser,
+    Path,
+    PathParser,
+    TextParser,
+    Transformer,
+    XMLParser,
+)
+from nomad.parsing.file_parser.text_parser import Quantity as TextQuantity
+from nomad.parsing.file_parser.text_parser import TextParser as TextFileParser
+
+
+class BSection(ArchiveSection):
+    v = Quantity(type=np.float64, shape=[2, 2])
+    v.m_annotations[MAPPING_ANNOTATION_KEY] = dict(
+        xml=MapperAnnotation(mapper='.v'),
+        hdf5=MapperAnnotation(mapper=('get_v', ['.v[0].d'])),
+    )
+
+    v2 = Quantity(type=str)
+    v2.m_annotations[MAPPING_ANNOTATION_KEY] = dict(
+        xml=MapperAnnotation(mapper='.c[0].d[1]'),
+        hdf5=MapperAnnotation(mapper='g.v[-2]'),
+        text=MapperAnnotation(mapper='version'),
+    )
+
+    v3 = Quantity(type=float)
+    v3.m_annotations[MAPPING_ANNOTATION_KEY] = dict(
+        text=MapperAnnotation(mapper='.energy')
+    )
+
+
+class CSection(ArchiveSection):
+    i = Quantity(type=int)
+    i.m_annotations[MAPPING_ANNOTATION_KEY] = dict(
+        xml=MapperAnnotation(mapper='.d'),
+        hdf5=MapperAnnotation(mapper='.i | [1]'),
+    )
+
+    e = Quantity(type=str)
+    e.m_annotations[MAPPING_ANNOTATION_KEY] = dict(
+        xml=MapperAnnotation(mapper='a.b2.c.e[?"@name"==\'item2\'].k[1] | [0]'),
+        hdf5=MapperAnnotation(mapper=('to_string', ['.f[?"@index">=`1`].__value'])),
+    )
+
+    g = Quantity(type=np.float64, shape=[2, 5])
+    g.m_annotations[MAPPING_ANNOTATION_KEY] = dict(
+        xml=MapperAnnotation(mapper=('slice', ['a.b2.c.f.g.i'])),
+        hdf5=MapperAnnotation(mapper='g.g.c1.d[:2].e[-5:]'),
+    )
+
+
+class B2Section(ArchiveSection):
+    c = SubSection(sub_section=CSection)
+    c.m_annotations[MAPPING_ANNOTATION_KEY] = dict(
+        xml=MapperAnnotation(mapper='.c'),
+        hdf5=MapperAnnotation(mapper='g.g.c1'),
+    )
+
+    b = SubSection(sub_section=BSection)
+    b.m_annotations[MAPPING_ANNOTATION_KEY] = dict(
+        xml=MapperAnnotation(mapper='a.b'),
+        hdf5=MapperAnnotation(mapper='.c'),
+    )
+
+
+class ExampleSection(ArchiveSection):
+    b = SubSection(sub_section=BSection, repeats=True)
+    b.m_annotations[MAPPING_ANNOTATION_KEY] = dict(
+        xml=MapperAnnotation(mapper='a.b1'),
+        hdf5=MapperAnnotation(mapper='.g1'),
+        text=MapperAnnotation(mapper='calculation'),
+    )
+    b2 = SubSection(sub_section=B2Section)
+    b2.m_annotations[MAPPING_ANNOTATION_KEY] = dict(
+        xml=MapperAnnotation(mapper='a.b2'),
+        hdf5=MapperAnnotation(mapper='g.g'),
+    )
+
+
+ExampleSection.m_def.m_annotations[MAPPING_ANNOTATION_KEY] = dict(
+    xml=MapperAnnotation(mapper='a'),
+    hdf5=MapperAnnotation(mapper='g'),
+    text=MapperAnnotation(),
+)
+
+
+class ExampleXMLParser(XMLParser):
+    @staticmethod
+    def get_eigenvalues_energies(value, n_spin, n_kpoints):
+        array = np.transpose(value)[0].T
+        return np.reshape(array, (n_spin, n_kpoints, len(array[0])))
+
+    @staticmethod
+    def get_version(version, sub_version, platform):
+        return ' '.join([' '.join(s.split()) for s in [version, sub_version, platform]])
+
+    @staticmethod
+    def slice(value):
+        return np.array(value)[2:]
+
+
+class ExampleHDF5Parser(HDF5Parser):
+    @staticmethod
+    def get_v(value):
+        return np.array(value)[1:, :2]
+
+    @staticmethod
+    def to_string(value):
+        return '-'.join([str(n) for n in value])
+
+
+class ExampleParser(MappingParser):
+    def from_dict(self, dct: dict[str, Any]):
+        return super().from_dict(dct)  # type: ignore
+
+    def load_file(self) -> Any:
+        return super().load_file()
+
+    def to_dict(self, **kwargs) -> dict[str, Any]:
+        return super().to_dict(**kwargs)
+
+    def slice(self, value):
+        return value[1]
+
+
+@pytest.fixture(scope='module')
+def text_parser() -> TextParser:
+    outcar_parser = TextFileParser(
+        quantities=[
+            TextQuantity('version', r'vasp\.([\S]+)'),
+            TextQuantity(
+                'calculation',
+                r'(FREE ENERGIE OF THE ION\-ELECTRON SYSTEM[\s\S]+?entropy.+)',
+                repeats=True,
+                sub_parser=TextFileParser(
+                    quantities=[
+                        TextQuantity(
+                            'energy',
+                            r'free\s*energy\s*TOTEN\s*=\s*([\-\d\.]+)',
+                            dtype=float,
+                        )
+                    ]
+                ),
+            ),
+        ]
+    )
+    return TextParser(
+        text_parser=outcar_parser, filepath='tests/data/parsers/vasp_outcar/OUTCAR'
+    )
+
+
+@pytest.fixture(scope='module')
+def xml_parser() -> ExampleXMLParser:
+    return ExampleXMLParser(filepath='tests/data/parsing/file_parser/test.xml')
+
+
+@pytest.fixture(scope='module')
+def hdf5_parser() -> ExampleHDF5Parser:
+    return ExampleHDF5Parser(filepath='tests/data/parsing/file_parser/test.h5')
+
+
+@pytest.fixture(scope='module')
+def archive_parser() -> MetainfoParser:
+    return MetainfoParser()
+
+
+@pytest.fixture(scope='module')
+def data():
+    return {
+        'a': {
+            'b': [
+                {'c': {'d': 1, 'e': 'x'}, 'f': [1.0, 2.0]},
+                {'c': 2, 'd': [{'e': 'y', 'f': np.eye(2)}]},
+            ],
+            'c': [
+                {'n': 'x', 'v': 1},
+                {'n': 'y', 'v': 2},
+            ],
+        },
+        'b': [
+            {
+                'c': [
+                    {'d': 3, 'e': [[1, 2], [3, 4]]},
+                    {'d': 4, 'e': [[1, 0], [2, 0]]},
+                ]
+            },
+            {'c': {'d': 1, 'e': 'z'}},
+        ],
+        'c': [
+            {'n': 1, 'v': 'a'},
+            {'n': 2, 'v': 'b'},
+        ],
+    }
+
+
+def assert_equal(v1, v2):
+    if isinstance(v1, dict):
+        for key, val in v1.items():
+            assert key in v2
+            assert_equal(val, v2[key])
+    elif isinstance(v1, list):
+        assert isinstance(v2, list)
+        for n, v in enumerate(v1):
+            assert_equal(v, v2[n])
+    else:
+        equal = v1 == v2
+        assert equal.all() if isinstance(equal, np.ndarray) else equal
+
+
+class TestPath:
+    @pytest.mark.parametrize(
+        'path, result',
+        [
+            pytest.param('a.b[1].c', 2),
+            pytest.param('b[0].c[0:2].d', [3, 4]),
+            pytest.param('b[0].c[0:2].e', [[[1, 2], [3, 4]], [[1, 0], [2, 0]]]),
+            pytest.param('a.b[1].d[0].f', np.eye(2)),
+            pytest.param('b[1].c.e', 'z'),
+            pytest.param('a[1].b.c.d', None),
+            pytest.param("a.c[?n=='x'].v | [0]", 1),
+            pytest.param('c[?n>=`1`].v | [1]', 'b'),
+        ],
+    )
+    def test_get_data(self, path, result, data):
+        path = Path(path=path)
+        value = path.get_data(data)
+        assert_equal(value, result)
+
+    @pytest.mark.parametrize(
+        'path, data, target, result',
+        [
+            pytest.param('a.b', 'x', {}, 'x'),
+            pytest.param('a[1:3].b.c[0].d[0:3]', 'y', {}, [['y'] * 3] * 2),
+            pytest.param('a.b[0:3].c[1:2]', [[1], [2], [3]], {}, [[1], [2], [3]]),
+            pytest.param('a[1:4].b', ['a', 'b', 'c'], {}, ['a', 'b', 'c']),
+            pytest.param('a[0:2].b.c', [1, 2, 3], {}, [[1, 2, 3]] * 2),
+            pytest.param(
+                'a[0].b[0:2].c[0]',
+                [['x'], ['y']],
+                {'a': [{'b': [{'c': ['a']}, {'c': ['b']}]}]},
+                ['x', 'y'],
+            ),
+            pytest.param(
+                'a[0].b[0:2]',
+                [['x'], ['y']],
+                {'a': [{'b': ['a', 'b']}]},
+                [['x'], ['y']],
+            ),
+        ],
+    )
+    def test_set_data(self, path, data, target, result):
+        path = Path(path=path)
+        path.set_data(data, target)
+        value = path.get_data(target)
+        assert_equal(value, result)
+
+
+class TestMapper:
+    @pytest.mark.parametrize(
+        'dct, expected',
+        [
+            pytest.param(
+                dict(source='a', target='b', mapper='v'),
+                Transformer(
+                    source=Data(path=Path(path='a')),
+                    target=Data(path=Path(path='b')),
+                    function_args=[Path(path='v')],
+                ),
+            ),
+            pytest.param(
+                dict(source='a', path='.v', path_parser='jsonpath_ng'),
+                Transformer(
+                    source=Data(
+                        path=Path(
+                            path='a', parser=PathParser(parser_name='jsonpath_ng')
+                        ),
+                        path_parser=PathParser(parser_name='jsonpath_ng'),
+                    ),
+                    function_args=[
+                        Path(
+                            parser=PathParser(parser_name='jsonpath_ng'),
+                            path='.v',
+                            parent=Path(
+                                path='a', parser=PathParser(parser_name='jsonpath_ng')
+                            ),
+                        )
+                    ],
+                ),
+            ),
+            pytest.param(
+                dict(target='b', source='a', mapper=('eval', ['.a', 'b'])),
+                Transformer(
+                    source=Data(path=Path(path='a')),
+                    target=Data(path=Path(path='b')),
+                    function_name='eval',
+                    function_args=[
+                        Path(path='.a', parent=Path(path='a')),
+                        Path(path='b'),
+                    ],
+                ),
+            ),
+            pytest.param(
+                dict(
+                    source=Data(
+                        transformer=Transformer(function_args=[Path(path='a')])
+                    ),
+                    function_name='eval',
+                    function_args=[Path(path='.b')],
+                ),
+                Transformer(
+                    source=Data(
+                        transformer=Transformer(function_args=[Path(path='a')])
+                    ),
+                    function_name='eval',
+                    function_args=[Path(path='.b', parent=Path(path='a'))],
+                ),
+            ),
+            pytest.param(
+                dict(
+                    source='a',
+                    remove=True,
+                    mapper=[
+                        dict(path='.c', source='.b'),
+                        dict(
+                            path_parser='jsonpath_ng',
+                            mapper=['eval', ['.x']],
+                            remove=False,
+                        ),
+                        dict(
+                            path_parser='jsonpath_ng',
+                            mapper=[dict(mapper='.d', remove=True)],
+                            source='.b',
+                            remove=False,
+                        ),
+                    ],
+                ),
+                Mapper(
+                    source=Data(path=Path(path='a')),
+                    remove=True,
+                    mappers=[
+                        Transformer(
+                            function_args=[
+                                Path(
+                                    path='.c',
+                                    parent=Path(path='.b', parent=Path(path='a')),
+                                )
+                            ],
+                            source=Data(
+                                path=Path(path='.b', parent=Path(path='a')),
+                                parent=Path(path='a'),
+                            ),
+                            remove=True,
+                        ),
+                        Transformer(
+                            function_name='eval',
+                            function_args=[
+                                Path(
+                                    path='.x',
+                                    parser=PathParser(parser_name='jsonpath_ng'),
+                                    parent=Path(path='a'),
+                                )
+                            ],
+                            remove=False,
+                        ),
+                        Mapper(
+                            mappers=[
+                                Transformer(
+                                    function_args=[
+                                        Path(
+                                            path='.d',
+                                            parent=Path(
+                                                path='.b',
+                                                parser=PathParser(
+                                                    parser_name='jsonpath_ng'
+                                                ),
+                                                parent=Path(path='a'),
+                                            ),
+                                        )
+                                    ],
+                                    remove=True,
+                                )
+                            ],
+                            source=Data(
+                                path=Path(
+                                    path='.b',
+                                    parser=PathParser(parser_name='jsonpath_ng'),
+                                    parent=Path(path='a'),
+                                ),
+                                path_parser=PathParser(parser_name='jsonpath_ng'),
+                                parent=Path(path='a'),
+                            ),
+                            remove=False,
+                        ),
+                    ],
+                ),
+            ),
+        ],
+    )
+    def test_from_dict(self, dct, expected):
+        def assert_mappers_equal(m1, m2):
+            assert isinstance(m1, type(m2))
+            assert m1.source == m2.source
+            assert m1.target == m2.target
+            assert m1.remove == m2.remove
+            assert m1.indices == m2.indices
+            if isinstance(m1, Mapper):
+                for n, sm1 in enumerate(m1.mappers):
+                    assert_mappers_equal(sm1, m2.mappers[n])
+            elif isinstance(m1, Transformer):
+                assert m1.function_name == m2.function_name
+                for n, arg in enumerate(m1.function_args):
+                    assert arg == m2.function_args[n]
+
+        mapper = Mapper.from_dict(dct)
+        assert_mappers_equal(mapper, expected)
+
+    @pytest.mark.parametrize('remove', [True, False])
+    @pytest.mark.parametrize(
+        'mapper, expected',
+        [
+            pytest.param(
+                Mapper(
+                    mappers=[
+                        Transformer(
+                            source=Data(
+                                path=Path(
+                                    path='a.b',
+                                    parser=PathParser(parser_name='jsonpath_ng'),
+                                )
+                            ),
+                            function_args=[
+                                Path(
+                                    path='.f',
+                                    parser=PathParser(parser_name='jsonpath_ng'),
+                                )
+                            ],
+                            target=Data(path=Path(path='x')),
+                        )
+                    ]
+                ),
+                dict(x=[1.0, 2.0]),
+            ),
+            pytest.param(
+                Mapper(
+                    mappers=[
+                        Transformer(
+                            source=Data(
+                                transformer=Transformer(
+                                    function_args=[Path(path='a.b')]
+                                )
+                            ),
+                            function_args=[Path(path='.f')],
+                            target=Data(path=Path(path='x')),
+                        )
+                    ],
+                ),
+                dict(x=[1.0, 2.0]),
+            ),
+            pytest.param(
+                Mapper(
+                    mappers=[
+                        Transformer(
+                            source=Data(
+                                transformer=Transformer(
+                                    function_args=[Path(path='b[0].c')]
+                                )
+                            ),
+                            function_args=[Path(path='.e')],
+                            function_name='slice',
+                            target=Data(path=Path(path='x')),
+                        )
+                    ]
+                ),
+                dict(x=[3.0, 4.0]),
+            ),
+            pytest.param(
+                Mapper(
+                    mappers=[
+                        Mapper(
+                            source=Data(path=Path(path='a')),
+                            mappers=[
+                                Mapper(
+                                    mappers=[
+                                        Transformer(
+                                            function_args=[Path(path='.d[0].e')],
+                                            target=Data(path=Path(path='z')),
+                                        )
+                                    ],
+                                    source=Data(path=Path(path='.b')),
+                                    indices=None,
+                                    target=Data(path=Path(path='y')),
+                                )
+                            ],
+                            target=Data(path=Path(path='x')),
+                        ),
+                        Transformer(
+                            function_args=[Path(path='c[?n==`2`].v | [0]')],
+                            target=Data(path=Path(path='x2')),
+                        ),
+                    ]
+                ),
+                dict(x=dict(y=dict(z='y')), x2='b'),
+            ),
+        ],
+    )
+    def test_get_data(self, data, remove, mapper, expected):
+        source = deepcopy(data)
+        parser = ExampleParser(data=source)
+        mapper.remove = remove
+        result = mapper.get_data(source, parser)
+        assert_equal(expected, result)
+        if remove:
+            assert not mapper.get_data(source, parser)
+
+
+class TestMappingParser:
+    def test_convert_xml_to_archive(self, xml_parser, archive_parser):
+        archive_parser.annotation_key = 'xml'
+        archive_parser.data_object = ExampleSection(b=[BSection(v=np.eye(2))])
+
+        xml_parser.convert(archive_parser, update_mode='append')
+        archive = archive_parser.data_object
+        assert len(archive.b) == 3
+        assert archive.b[0].v[0][0] == 1.0
+        assert archive.b[1].v[1][0] == 3.0
+        assert archive.b[2].v[1][1] == 8.0
+        assert archive.b[2].v2 == 'b'
+        assert archive.b2.c.i == 1
+        assert archive.b2.c.e == 'f4'
+        assert archive.b2.c.g[1][2] == 8
+        xml_parser.close()
+
+    def test_convert_archive_to_xml(self, xml_parser, archive_parser):
+        archive_parser.data_object = ExampleSection(b=[BSection(v=np.eye(2))])
+        xml_parser.mapper = Mapper(
+            mappers=[
+                Mapper(
+                    target=Data(path=Path(path='a')),
+                    mappers=[
+                        Mapper(
+                            target=Data(path=Path(path='.b1')),
+                            mappers=[
+                                Transformer(
+                                    function_args=[Path(path='.v')],
+                                    target=Data(path=Path(path='.v')),
+                                )
+                            ],
+                            source=Data(path=Path(path='b')),
+                        )
+                    ],
+                )
+            ],
+        )
+        xml_parser.filepath = None
+        archive_parser.convert(xml_parser)
+        assert xml_parser.data_object.findall('b1')[0].findall('v')[1].text == '0.0 1.0'
+        xml_parser.close()
+
+    def test_convert_hdf5_to_archive(self, hdf5_parser, archive_parser):
+        archive_parser.annotation_key = 'hdf5'
+        archive_parser.data_object = ExampleSection(b=[BSection(v=np.eye(2))])
+        hdf5_parser.convert(archive_parser, update_mode='merge')
+        archive = archive_parser.data_object
+        assert archive.b[0].v[1][1] == 1.0
+        assert archive.b[0].v2 == 'y'
+        assert archive.b2.c.i == 6
+        assert archive.b2.c.e == '2-1'
+        assert archive.b2.c.g[1][3] == 9
+        assert archive.b2.b.v[0][1] == 1
+        hdf5_parser.close()
+
+    def test_convert_text_to_archive(self, text_parser, archive_parser):
+        archive_parser.annotation_key = 'text'
+        archive_parser.data_object = ExampleSection(b=[BSection(v=np.eye(2))])
+        text_parser.convert(archive_parser, update_mode='replace')
+        archive = archive_parser.data_object
+        assert len(archive.b) == 3
+        assert archive.b[0].v2 == '5.3.2'
+        assert archive.b[2].v3 == -7.14173545
+        text_parser.close()
diff --git a/tests/parsing/test_parsing.py b/tests/parsing/test_parsing.py
index adfbece7378ccc90039d204d6b9a42ca1e502ba5..176679775b011990a7309df94b7dea15d2bc2e1f 100644
--- a/tests/parsing/test_parsing.py
+++ b/tests/parsing/test_parsing.py
@@ -19,13 +19,13 @@
 import json
 import os
 from shutil import copyfile
-from unittest.mock import patch, MagicMock
+from unittest.mock import MagicMock, patch
 
 import pytest
 
 from nomad import files, utils
 from nomad.datamodel import EntryArchive
-from nomad.parsing import BrokenParser, MatchingParserInterface, MatchingParser
+from nomad.parsing import BrokenParser, MatchingParser, MatchingParserInterface
 from nomad.parsing.parsers import match_parser, parser_dict, parsers, run_parser
 from nomad.utils import dump_json
 
diff --git a/tests/parsing/test_tabular.py b/tests/parsing/test_tabular.py
index c1554851b48f49e518c3955794a164ecaca2fe12..7acf5017b8cbd3856ac4b7bb958a30f752878888 100644
--- a/tests/parsing/test_tabular.py
+++ b/tests/parsing/test_tabular.py
@@ -15,27 +15,24 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-from io import StringIO
-from unittest.mock import MagicMock, patch
 
-import pandas as pd
-import pytest
+import datetime
 import os
 import os.path
 import re
-import datetime
+
+import pytest
 import yaml
 
+from nomad import files
 from nomad.config import config
-from nomad.datamodel.datamodel import EntryArchive, EntryMetadata
 from nomad.datamodel.context import ClientContext
+from nomad.datamodel.datamodel import EntryArchive, EntryMetadata
+from nomad.parsing.parser import ArchiveParser
 from nomad.parsing.tabular import read_table_data
+from nomad.processing import Entry, ProcessStatus, Upload
 from nomad.utils import generate_entry_id, strip
-from nomad.parsing.parser import ArchiveParser
 from tests.normalizing.conftest import run_normalize
-from nomad.processing import Upload, Entry
-from nomad.processing import ProcessStatus
-from nomad import files
 
 
 def quantity_generator(quantity_name, header_name, shape, to_dict=False):
diff --git a/tests/processing/test_base.py b/tests/processing/test_base.py
index 3a3384ddc1f2c450ac1f5664c89f1ff566febc34..45fbe4560311a106976ca52e1cd65509564333f2 100644
--- a/tests/processing/test_base.py
+++ b/tests/processing/test_base.py
@@ -15,21 +15,20 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-import pytest
-import json
 import random
-import time
 import threading
-from typing import List, Any, Union
+import time
+from typing import Any
 
-from mongoengine import StringField, IntField, ListField
+import pytest
+from mongoengine import IntField, ListField, StringField
 
 from nomad.processing.base import (
     Proc,
     ProcessAlreadyRunning,
+    ProcessStatus,
     process,
     process_local,
-    ProcessStatus,
 )
 
 random.seed(0)
diff --git a/tests/processing/test_data.py b/tests/processing/test_data.py
index 121dc06fbfd1613b5cc187bd56a258617973d009..d538f39a91a3f52cfd8128a1bbbeda7da4edf1fd 100644
--- a/tests/processing/test_data.py
+++ b/tests/processing/test_data.py
@@ -16,40 +16,39 @@
 # limitations under the License.
 #
 
-from typing import Tuple, Dict
-from collections.abc import Generator
-import pytest
+import json
 import os.path
 import re
 import shutil
 import zipfile
-import json
+from collections.abc import Generator
+
+import pytest
 import yaml
 
-from nomad import utils, infrastructure
+from nomad import infrastructure, utils
+from nomad.archive import read_partial_archive_from_mongo, to_json
 from nomad.config import config
 from nomad.config.models.config import BundleImportSettings
-from nomad.archive import read_partial_archive_from_mongo, to_json
-from nomad.files import UploadFiles, StagingUploadFiles, PublicUploadFiles
-from nomad.parsing.parser import Parser
-from nomad.parsing import parsers
 from nomad.datamodel import ServerContext
 from nomad.datamodel.data import EntryData
+from nomad.datamodel.datamodel import ArchiveSection, EntryArchive, EntryData
+from nomad.files import PublicUploadFiles, StagingUploadFiles, UploadFiles
 from nomad.metainfo import Package, Quantity, Reference, SubSection
-from nomad.processing import Upload, Entry, ProcessStatus
-from nomad.search import search, refresh as search_refresh
+from nomad.parsing import parsers
+from nomad.parsing.parser import Parser
+from nomad.processing import Entry, ProcessStatus, Upload
+from nomad.search import refresh as search_refresh
+from nomad.search import search
 from nomad.utils.exampledata import ExampleData
-from nomad.datamodel.datamodel import EntryArchive, EntryData, ArchiveSection
-
-from tests.test_search import assert_search_upload
 from tests.test_files import (
     assert_upload_files,
-    example_file_mainfile,
     example_file_aux,
+    example_file_mainfile,
 )
+from tests.test_search import assert_search_upload
 from tests.utils import create_template_upload_file, set_upload_entry_metadata
 
-
 # Package with some metainfo schemas used only for testing.
 m_package = Package(name='test_schemas')
 
diff --git a/tests/processing/test_edit_metadata.py b/tests/processing/test_edit_metadata.py
index 6d53da2b890cc7743bfbb0f527e40ff97713dbc0..96963944b85296fcf2e4c2418964f14ca579a514 100644
--- a/tests/processing/test_edit_metadata.py
+++ b/tests/processing/test_edit_metadata.py
@@ -15,18 +15,17 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-import pytest
 from datetime import datetime
 
+import pytest
 from fastapi.exceptions import RequestValidationError
 
 from nomad import datamodel, metainfo
 from nomad.metainfo.data_type import Datatype
-from nomad.processing import Upload, MetadataEditRequestHandler
+from nomad.processing import MetadataEditRequestHandler, Upload
 from nomad.processing.data import editable_metadata, mongo_upload_metadata
 from nomad.search import search
 
-
 all_coauthor_metadata = dict(
     # All attributes which a coauthor+ can edit
     upload_name='a humble upload name',
diff --git a/tests/processing/test_rfc3161.py b/tests/processing/test_rfc3161.py
index e559a53246882e5154175d2a570a6bc61bd8ca48..28f1bcc80ba94bbadbdc0bd0192b2e84786cd96a 100644
--- a/tests/processing/test_rfc3161.py
+++ b/tests/processing/test_rfc3161.py
@@ -23,9 +23,9 @@ import httpx
 import pytest
 import rfc3161ng
 
-from nomad.archive import write_archive, read_archive, to_json
+from nomad.archive import read_archive, to_json, write_archive
 from nomad.datamodel.datamodel import RFC3161Timestamp
-from nomad.processing.data import get_rfc3161_token, Entry
+from nomad.processing.data import Entry, get_rfc3161_token
 
 
 @pytest.mark.parametrize(
diff --git a/tests/states/archives/create_archives.py b/tests/states/archives/create_archives.py
index bcee46b0a307349c098720aed9eb54741e85b34a..0b53682f6dbcf3b0d580c005c09522d197edcf6c 100644
--- a/tests/states/archives/create_archives.py
+++ b/tests/states/archives/create_archives.py
@@ -17,6 +17,7 @@
 #
 
 import math
+
 from nomad.utils.exampledata import create_entry_archive
 
 
diff --git a/tests/states/entry.py b/tests/states/entry.py
index 767a77b37087849e30d2dfa81b850bf6edb45490..bbd97182e0c9389beb0731d255391002118eb053 100644
--- a/tests/states/entry.py
+++ b/tests/states/entry.py
@@ -17,10 +17,12 @@
 #
 
 import json
-from nomad import infrastructure, files
+
+from nomad import files, infrastructure
+from nomad.processing import Upload
 from nomad.utils.exampledata import ExampleData, create_entry_archive
+
 from .archives.create_archives import archive_dft_bulk
-from nomad.processing import Upload
 
 
 def dft():
diff --git a/tests/states/search.py b/tests/states/search.py
index e3ef1fc19c585b0ceb5f4cea7d443f897b0047af..6a53a211cc259d5579aa653b461a6c9ab2ff5864 100644
--- a/tests/states/search.py
+++ b/tests/states/search.py
@@ -16,10 +16,11 @@
 # limitations under the License.
 #
 from datetime import datetime, timedelta
+
 from nomad import infrastructure
-from nomad.utils import create_uuid
-from nomad.units import ureg
 from nomad.atomutils import chemical_symbols
+from nomad.units import ureg
+from nomad.utils import create_uuid
 from nomad.utils.exampledata import ExampleData
 
 material_h2o = {
diff --git a/tests/states/uploads.py b/tests/states/uploads.py
index 52ae1ed4e99129aef629545183df7d84693ee127..a79aa893b8bcb3aac07d135887748803d01aca5b 100644
--- a/tests/states/uploads.py
+++ b/tests/states/uploads.py
@@ -16,11 +16,12 @@
 # limitations under the License.
 #
 
-from nomad import infrastructure, files
+from nomad import files, infrastructure
 from nomad.processing import Upload
 from nomad.utils.exampledata import ExampleData
+
 from .archives.create_archives import archive_dft_bulk
-from .groups import init_gui_test_groups, delete_group
+from .groups import delete_group, init_gui_test_groups
 
 default_access = {'coauthors': ['scooper'], 'reviewers': ['ttester']}
 twin_access = {
diff --git a/tests/test_atomutils.py b/tests/test_atomutils.py
index 97655c0eded200dafd7bea8bf237a0bf35a3dd85..e9dc5f5561a5d24803bcc0369b5a081cc73b5ecc 100644
--- a/tests/test_atomutils.py
+++ b/tests/test_atomutils.py
@@ -16,8 +16,9 @@
 # limitations under the License.
 #
 import pytest
+
 from nomad.atomutils import Formula
-from nomad.datamodel.results import Material, ElementalComposition
+from nomad.datamodel.results import ElementalComposition, Material
 
 
 @pytest.mark.parametrize(
diff --git a/tests/test_cli.py b/tests/test_cli.py
index 3ee028698b5e5098b2761c0f25f3066d413e869e..1350894136449b375906f4937adf8020ef4cfaf0 100644
--- a/tests/test_cli.py
+++ b/tests/test_cli.py
@@ -16,20 +16,22 @@
 # limitations under the License.
 #
 
-import pytest
-import click.testing
+import datetime
 import json
 import os
-import datetime
-import time
 import tempfile
+import time
 
-from nomad import processing as proc, files
-from nomad.config import config
-from nomad.search import search
+import click.testing
+import pytest
+
+from nomad import files
+from nomad import processing as proc
 from nomad.cli import cli
 from nomad.cli.cli import POPO
-from nomad.processing import Upload, Entry, ProcessStatus
+from nomad.config import config
+from nomad.processing import Entry, ProcessStatus, Upload
+from nomad.search import search
 from nomad.utils.exampledata import ExampleData
 
 # TODO there is much more to test
diff --git a/tests/test_client.py b/tests/test_client.py
index 75956e8a604f67efc7dbdb7fdb0e54e2725a3037..35211bf701cd3aeed9391f3b6dc89707a19b5e68 100644
--- a/tests/test_client.py
+++ b/tests/test_client.py
@@ -15,29 +15,23 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
+import json
 import os
-from typing import List, Tuple
 
-from httpx import AsyncClient
 import pytest
-import json
-
+from httpx import AsyncClient
 from pydantic import ValidationError
 
 from nomad.app.main import app
 from nomad.client.archive import ArchiveQuery
 from nomad.datamodel import EntryArchive, User
-from nomad.datamodel.metainfo import runschema, SCHEMA_IMPORT_ERROR
-from nomad.datamodel.metainfo.annotations import (
-    Rule,
-    Rules,
-)
+from nomad.datamodel.metainfo import SCHEMA_IMPORT_ERROR, runschema
+from nomad.datamodel.metainfo.annotations import Rule, Rules
 from nomad.metainfo import MSection, SubSection
 from nomad.utils.json_transformer import Transformer
 from tests.fixtures.users import users
 from tests.processing import test_data as test_processing
 
-
 # TODO: more tests
 
 
diff --git a/tests/test_common.py b/tests/test_common.py
index 919538e69ef708a3e7d1fcec9b9ac4a090f83316..432be865b7eb05c5376cb65f6fda6915865fbda0 100644
--- a/tests/test_common.py
+++ b/tests/test_common.py
@@ -17,7 +17,8 @@
 #
 
 import pytest
-from nomad.common import is_safe_relative_path, is_safe_path
+
+from nomad.common import is_safe_path, is_safe_relative_path
 
 
 @pytest.mark.parametrize(
diff --git a/tests/test_config.py b/tests/test_config.py
index 7cc0f5070576d2c59b4fed4bd1acf8cebadb24e2..20fbe5d18f437c9b80ab850331410861264a4da8 100644
--- a/tests/test_config.py
+++ b/tests/test_config.py
@@ -18,12 +18,13 @@
 
 import os
 import re
+
 import pytest
 import yaml
 from pydantic import ValidationError
 
 from nomad.config import load_config
-from nomad.config.models.plugins import Parser, Schema, ParserEntryPoint
+from nomad.config.models.plugins import Parser, ParserEntryPoint, Schema
 from nomad.utils import flatten_dict
 
 from .utils import assert_log
diff --git a/tests/test_doi.py b/tests/test_doi.py
index 60498040f7d707584a64bc152bcaf5729b268f54..bcb4e61df01b8f202029b884856f0fb0976f3f22 100644
--- a/tests/test_doi.py
+++ b/tests/test_doi.py
@@ -15,10 +15,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
+from unittest.mock import MagicMock
+
+import pytest
+
 from nomad.config import config
 from nomad.doi import DOI, DOIException
-import pytest
-from unittest.mock import MagicMock
 
 
 def test_create(mongo_function, user1, no_warn):
diff --git a/tests/test_files.py b/tests/test_files.py
index 77fc88fcf47e91ed4585cc9cc6df94a769059e7f..e2ca1d432ffd9f22b104eb4356b5d3b2a0d6a01b 100644
--- a/tests/test_files.py
+++ b/tests/test_files.py
@@ -16,31 +16,33 @@
 # limitations under the License.
 #
 
-from typing import Any, Dict, Tuple, List, Union
-from collections.abc import Generator, Iterable
-from datetime import datetime
+import itertools
 import os
 import os.path
+import pathlib
+import re
 import shutil
-import pytest
-import itertools
 import zipfile
-import re
-import pathlib
+from collections.abc import Generator, Iterable
+from datetime import datetime
+from typing import Any
+
+import pytest
 
 from nomad import datamodel, utils
-from nomad.config import config
 from nomad.archive import to_json
+from nomad.config import config
 from nomad.files import (
     DirectoryObject,
     PathObject,
-    empty_zip_file_size,
+    PublicUploadFiles,
+    StagingUploadFiles,
+    UploadFiles,
     empty_archive_file_size,
+    empty_zip_file_size,
 )
-from nomad.files import StagingUploadFiles, PublicUploadFiles, UploadFiles
 from nomad.processing import Upload
 
-
 EntryWithFiles = tuple[datamodel.EntryMetadata, str]
 UploadWithFiles = tuple[str, list[datamodel.EntryMetadata], UploadFiles]
 StagingUploadWithFiles = tuple[str, list[datamodel.EntryMetadata], StagingUploadFiles]
diff --git a/tests/test_logtransfer.py b/tests/test_logtransfer.py
index b67262fbeedf50b7db7a447f8e0e58da2374c5fb..122c727b07a823fff4cd0d3293b405c4f687c394 100644
--- a/tests/test_logtransfer.py
+++ b/tests/test_logtransfer.py
@@ -1,11 +1,12 @@
-import logging
 import json
-import pytest
+import logging
 import os.path
 
+import pytest
+
 from nomad import config, utils
-from nomad.utils import structlogging
 from nomad.logtransfer import transfer_logs
+from nomad.utils import structlogging
 
 
 @pytest.fixture(scope='function')
diff --git a/tests/test_search.py b/tests/test_search.py
index 06dd47db9e4621ff0bb0354be80d0c38ad692607..0e6eeb1f709b52d9fff347b04f1f0a243ffe8edf 100644
--- a/tests/test_search.py
+++ b/tests/test_search.py
@@ -17,9 +17,9 @@
 #
 
 import json
+from collections.abc import Iterable
 from datetime import datetime
-from typing import Any, Dict, List, Union
-from collections.abc import Iterable, Sequence
+from typing import Any
 
 import pytest
 
@@ -41,15 +41,8 @@ from nomad.metainfo.elasticsearch_extension import (
 )
 from nomad.metainfo.metainfo import Datetime, Quantity
 from nomad.metainfo.util import MEnum
-from nomad.search import (
-    AuthenticationRequiredError as ARE,
-)
-from nomad.search import (
-    quantity_values,
-    refresh,
-    search,
-    update_by_query,
-)
+from nomad.search import AuthenticationRequiredError as ARE
+from nomad.search import quantity_values, refresh, search, update_by_query
 from nomad.utils import deep_get
 from nomad.utils.exampledata import ExampleData
 from tests.variables import python_schema_name, yaml_schema_name
diff --git a/tests/test_test.py b/tests/test_test.py
index bbca3fb46962648c5bad3662940c5f5101f761d5..ef2353b7ac48692678fcf6afcc0de6017e366d3d 100644
--- a/tests/test_test.py
+++ b/tests/test_test.py
@@ -16,9 +16,10 @@
 # limitations under the License.
 #
 
-import pytest
 import logging
 
+import pytest
+
 
 @pytest.fixture()
 def my_caplog(caplog):
diff --git a/tests/test_utils.py b/tests/test_utils.py
index 1853751a0b4e5c4d6cea5de11ae5e5ed1fe5580f..11f38f65f18387fd3e9db8633888d4009e239b69 100644
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -17,23 +17,23 @@
 #
 
 import time
-import json
-import pytest
+from importlib.metadata import PackageNotFoundError
+
 import pandas as pd
-import logging
+import pytest
 
-from nomad import utils, config
+from nomad import files, utils
 from nomad.metainfo.metainfo import MSection, Quantity, SubSection
-from nomad import files
 from nomad.processing import Upload
 from nomad.utils import (
-    structlogging,
-    flatten_dict,
-    rebuild_dict,
-    prune_dict,
+    dataframe_to_dict,
     deep_get,
     dict_to_dataframe,
-    dataframe_to_dict,
+    flatten_dict,
+    nomad_distro_metadata,
+    prune_dict,
+    rebuild_dict,
+    structlogging,
 )
 
 
@@ -315,3 +315,55 @@ class TestDictDataFrameConverter:
     def test_invalid_input_type(self, invalid_input):
         with pytest.raises(ValueError, match='Input must be a dictionary'):
             dict_to_dataframe(invalid_input)
+
+
+@pytest.mark.parametrize(
+    'project_urls, version_str, expected_url',
+    [
+        (
+            ['repository, https://github.com/example/repo'],
+            '1.2.3+gabcdef',
+            'https://github.com/example/repo/tree/abcdef',
+        ),
+        (['notrepository, https://github.com/example/repo'], '1.2.3+gabcdef', None),
+        (
+            ['repository, https://github.com/example/repo'],
+            '1.2.3',
+            'https://github.com/example/repo/tree/v1.2.3',
+        ),
+        ([], '1.2.3+gabcdef', None),
+        (['repository, '], '1.2.3+gabcdef', None),
+    ],
+)
+def test_nomad_distro_metadata(monkeypatch, project_urls, version_str, expected_url):
+    def mock_metadata(package_name):
+        class MockMetadata:
+            def get_all(self, key, default=[]):
+                if key == 'Project-URL':
+                    return project_urls
+                return default
+
+        return MockMetadata()
+
+    def mock_version(package_name):
+        return version_str
+
+    monkeypatch.setattr('nomad.utils.metadata', lambda x: mock_metadata(x))
+    monkeypatch.setattr('nomad.utils.version', mock_version)
+
+    actual_url = nomad_distro_metadata()
+    assert actual_url == expected_url
+
+
+def test_nomad_distro_package_not_found(monkeypatch):
+    def mock_metadata(package_name):
+        raise PackageNotFoundError
+
+    def mock_version(package_name):
+        return '1.2.3'
+
+    monkeypatch.setattr('nomad.utils.metadata', lambda x: mock_metadata(x))
+    monkeypatch.setattr('nomad.utils.version', mock_version)
+
+    actual_url = nomad_distro_metadata()
+    assert actual_url is None
diff --git a/tests/utils.py b/tests/utils.py
index cfd95f936b141015af038242dcb117dbab630fd3..c24fe475952acda078d98d74bebfc980be751012 100644
--- a/tests/utils.py
+++ b/tests/utils.py
@@ -18,13 +18,12 @@
 
 """Methods to help with testing of nomad@FAIRDI."""
 
-import json
 import os.path
 import urllib.parse
 import zipfile
 from logging import LogRecord
-from typing import Any, Dict, List, Union
-from structlog import get_logger
+from typing import Any
+
 import pytest