diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 7da3dfc7396c74d55120ee14c7eac41e05675d63..3e5448d5b29d1b77c6460aefc0533ce30aae7e72 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -1,27 +1,33 @@ -# default installed image for docker executor is: python:3.6 -# using an image that can do git, docker, docker-compose -image: gitlab-registry.mpcdf.mpg.de/nomad-lab/nomad-fair/ci-runner:latest +# Syntax reference: https://docs.gitlab.com/ci/yaml/ + +# Overview: +# Pipelines run only on merge requests, schedules, tags, or the develop branch (default). +# - A schedule pipeline (e.g. nightly build) runs all the jobs. +# - A tag pipeline runs most of the jobs but skips some (also, v#.#.# tags are special). +# - Merge requests and pushes to develop will run +# - few jobs if only `docs/` files changed, +# - all jobs if any other files changed. -# build directory inside -# https://gitlab.mpcdf.mpg.de/help/ci/runners/configure_runners.md#custom-build-directories +.anchors: + .non-docs-changes: &non-docs-changes + changes: + - examples + - gui + - nomad + - ops + - scripts + - tests + - "*" # all files in root + .manual-allow_failure: &manual-allow_failure + when: manual + allow_failure: true -# https://docs.gitlab.com/ee/ci/yaml/workflow.html -# https://docs.gitlab.com/ee/ci/variables/predefined_variables.html -# if: CI_COMMIT_BRANCH && CI_COMMIT_BEFORE_SHA == "0000000000000000000000000000000000000000" -# A branch pipeline, but it is the first commit for that branch -# if: $CI_COMMIT_BRANCH && $CI_OPEN_MERGE_REQUESTS && $CI_PIPELINE_SOURCE == "push" -# For an existing workflow section to switch from branch pipelines to merge request pipelines when a merge request is created. -# if: $CI_COMMIT_BRANCH && $CI_OPEN_MERGE_REQUESTS -# A branch pipeline, but a merge request is open for that branch, do not run the branch pipeline. -# if: $CI_PIPELINE_SOURCE == "merge_request_event" -# A merge request pipeline, start the pipeline. -# if: $CI_COMMIT_BRANCH -# A branch pipeline, but there is no merge request open for the branch, run the branch pipeline. +# using an image that can do git, docker, docker-compose +image: gitlab-registry.mpcdf.mpg.de/nomad-lab/nomad-fair/ci-runner:latest default: tags: - # Necessary to select the right CI runner - - cloud + - cloud # Necessary to select the right CI runner variables: DOCKER_TAG: ${CI_COMMIT_REF_SLUG} @@ -38,7 +44,6 @@ workflow: - if: $CI_COMMIT_TAG variables: DOCKER_TAG: ${CI_COMMIT_REF_NAME} - - when: never stages: - build @@ -46,6 +51,8 @@ stages: - deploy - release +# JOBS + update changelog: stage: build script: @@ -76,6 +83,9 @@ build gui: variables: TARGET: dev_node DESTINATION: "${CI_REGISTRY_IMAGE}/dev_node:${DOCKER_TAG}" + rules: + - if: $CI_PIPELINE_SOURCE == "schedule" || $CI_COMMIT_TAG + - <<: *non-docs-changes # kaniko image doesn't contain pip, so we have to save the scm_pretend_version in an earlier job and reuse it later update_scm_pretend_version: @@ -93,6 +103,9 @@ build python: variables: TARGET: dev_python DESTINATION: "${CI_REGISTRY_IMAGE}/dev_python:${DOCKER_TAG}" + rules: + - if: $CI_PIPELINE_SOURCE == "schedule" || $CI_COMMIT_TAG + - <<: *non-docs-changes python linting: stage: test @@ -105,7 +118,7 @@ python linting: rules: - if: $CI_COMMIT_TAG when: never - - when: on_success + - <<: *non-docs-changes artifacts: name: "nomad_code_quality" when: always @@ -119,8 +132,9 @@ python package clean up: script: - python scripts/cleanup_packages.py rules: - - when: manual - allow_failure: true + - if: $CI_PIPELINE_SOURCE == "schedule" || $CI_COMMIT_TAG + <<: *manual-allow_failure + - <<: [*non-docs-changes, *manual-allow_failure] check python dependencies: stage: test @@ -130,8 +144,9 @@ check python dependencies: rules: - if: $CI_COMMIT_TAG when: never - - when: manual - allow_failure: true + - if: $CI_PIPELINE_SOURCE == "schedule" + <<: *manual-allow_failure + - <<: [*non-docs-changes, *manual-allow_failure] .base_test: image: ${CI_REGISTRY_IMAGE}/dev_python:${DOCKER_TAG} @@ -168,15 +183,14 @@ generate pytest timings: - python -m pytest --store-durations artifacts: expire_in: 1 days - when: on_success paths: - .test_durations - rules: - if: $CI_COMMIT_TAG when: never - - when: manual - allow_failure: true + - if: $CI_PIPELINE_SOURCE == "schedule" + <<: *manual-allow_failure + - <<: [*non-docs-changes, *manual-allow_failure] python tests: parallel: 3 @@ -187,13 +201,13 @@ python tests: - cp .coverage .coverage_${CI_NODE_INDEX} artifacts: expire_in: 1 days - when: on_success paths: - .coverage_${CI_NODE_INDEX} rules: - if: $CI_COMMIT_TAG when: never - - when: on_success + - if: $CI_PIPELINE_SOURCE == "schedule" + - <<: *non-docs-changes python coverage report: stage: test @@ -213,7 +227,8 @@ python coverage report: rules: - if: $CI_COMMIT_TAG when: never - - when: on_success + - if: $CI_PIPELINE_SOURCE == "schedule" + - <<: *non-docs-changes gui linting: stage: test @@ -227,7 +242,8 @@ gui linting: rules: - if: $CI_COMMIT_TAG when: never - - when: on_success + - if: $CI_PIPELINE_SOURCE == "schedule" + - <<: *non-docs-changes generate gui artifacts: stage: test @@ -239,7 +255,8 @@ generate gui artifacts: rules: - if: $CI_COMMIT_TAG when: never - - when: on_success + - if: $CI_PIPELINE_SOURCE == "schedule" + - <<: *non-docs-changes artifacts: paths: - gui/tests/env.js @@ -278,7 +295,8 @@ gui tests: rules: - if: $CI_COMMIT_TAG when: never - - when: on_success + - if: $CI_PIPELINE_SOURCE == "schedule" + - <<: *non-docs-changes build python package: stage: test @@ -301,13 +319,12 @@ build python package: - cp /app/tests/data/examples/example.out $CI_PROJECT_DIR/ artifacts: expire_in: 1 days - when: on_success paths: - dist/ - archive.json - example.out -install tests: +python package install tests: stage: test parallel: matrix: @@ -327,8 +344,11 @@ install tests: - python -m nomad.cli parse --skip-normalizers archive.json - uv pip install git+https://github.com/nomad-coe/nomad-parser-example.git@ba6027fdd4cda0cf9e0b32546bd809c8fdda79e6 - python -m exampleparser example.out + rules: + - if: $CI_PIPELINE_SOURCE == "schedule" || $CI_COMMIT_TAG + - <<: *non-docs-changes -python package: +release python package: stage: release image: ghcr.io/astral-sh/uv:$UV_VERSION-python$PYTHON_VERSION-bookworm variables: @@ -336,12 +356,11 @@ python package: script: uv publish -u gitlab-ci-token -p ${CI_JOB_TOKEN} --publish-url https://gitlab.mpcdf.mpg.de/api/v4/projects/${CI_PROJECT_ID}/packages/pypi dist/nomad-lab-*.tar.gz rules: - if: $CI_COMMIT_BRANCH == "develop" && $NIGHTLY - when: on_success - - when: manual - allow_failure: true - if: $CI_COMMIT_TAG + <<: *manual-allow_failure + - <<: [*non-docs-changes, *manual-allow_failure] -pypi package: +release pypi package: stage: release variables: GIT_STRATEGY: none @@ -350,9 +369,8 @@ pypi package: script: twine upload -u $CI_TWINE_USER -p $CI_TWINE_PASSWORD dist/nomad-lab-*.tar.gz rules: - if: $CI_COMMIT_TAG - when: manual - allow_failure: true - - when: never + <<: *manual-allow_failure + - <<: [*non-docs-changes, *manual-allow_failure] push to github: stage: release diff --git a/CHANGELOG.md b/CHANGELOG.md index 17a1426c35f24b747611387520ec6e9e4ada59b7..a87a02a1da68401882a62b7e66ce4a09dbb374d7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,53 @@ +## 1.3.15 (2025-03-05) + +### Fixed (1 change) + +- [Fixed user group collection name in MongoDB](https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-FAIR/-/commit/e8daac5fba118c3f2d013f97bc68753cb916754a) ([merge request](https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-FAIR/-/merge_requests/2350)) + +## 1.3.14 (2025-02-28) + +### Added (11 changes) + +- [Add nomad distro commit info](https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-FAIR/-/commit/1a03bfa35ecc17b5040b2f1e5ee20b24d544d17e) ([merge request](https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-FAIR/-/merge_requests/2329)) +- [Resolve "Generic xml parser"](https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-FAIR/-/commit/3f2535f2443892ec71cc2d5881aeb50452052b30) ([merge request](https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-FAIR/-/merge_requests/1655)) +- [Resolve "create edges in the workflow"](https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-FAIR/-/commit/fb04f4f163e61b94c0d088863a8ac8afb468cf53) ([merge request](https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-FAIR/-/merge_requests/2318)) +- [Added implementation and documentation for DataFrames.](https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-FAIR/-/commit/0696502401fc74a0b460ed3a37acf9f2b378cff9) ([merge request](https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-FAIR/-/merge_requests/2211)) +- [Implement h5 ref for axes, aux signal](https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-FAIR/-/commit/7f162a3be13ba454abfd69e2b364739b1b8166d0) ([merge request](https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-FAIR/-/merge_requests/2276)) +- [Added support for targeting any NeXus search quantity in aggregations by...](https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-FAIR/-/commit/bef33475c794afe9328b0e38e9986ae05be78aaf) ([merge request](https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-FAIR/-/merge_requests/2288)) +- [Added support for custom atomic labels, radii and colors in the NGL-based structure viewer.](https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-FAIR/-/commit/d26703add00be4c7b57764646aa4bfcf1f272e61) ([merge request](https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-FAIR/-/merge_requests/2302)) +- [Added documentation about schema versioning.](https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-FAIR/-/commit/dc5fe24bd18a4564ab0c9c3d73b948152377ef7e) ([merge request](https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-FAIR/-/merge_requests/2297)) +- [Added annotation for controlling schema availability and naming in the GUI.](https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-FAIR/-/commit/27f4b87bd937cab1163eb2498020c7aef097b805) ([merge request](https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-FAIR/-/merge_requests/2298)) +- [Resolve "JSON to JSON model transformer"](https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-FAIR/-/commit/573f53dcbb87be87134a26028cdf181d87dd1303) ([merge request](https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-FAIR/-/merge_requests/1806)) +- [Implement wildcard in paths](https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-FAIR/-/commit/fa64155d93bfb2b39bc016a579d5af65a3f8b6f9) ([merge request](https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-FAIR/-/merge_requests/2240)) + +### Fixed (11 changes) + +- [Fixed issue with setting and reading boolean values in queries.](https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-FAIR/-/commit/b37db79e23d15470981cfdd6ffdfe701269b5693) ([merge request](https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-FAIR/-/merge_requests/2347)) +- [Fixed issue with resolving units when using full storage](https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-FAIR/-/commit/b767f85f45377fd979950970234dc1448d1b4dd7) ([merge request](https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-FAIR/-/merge_requests/2339)) +- [Fix properties normalization](https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-FAIR/-/commit/f011e10cc5ad46d8c5d0276d33e429fff9c98f0c) ([merge request](https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-FAIR/-/merge_requests/2337)) +- [Fixed issues with north config optional values](https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-FAIR/-/commit/7ecbbb66ad6cb8b44f045a51deb93e7a77c4defa) ([merge request](https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-FAIR/-/merge_requests/2336)) +- [Fixed issue with suggestions not being shown by default for terms menu items,...](https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-FAIR/-/commit/3446265bf31e085b7659bf2446e88052d90a08b2) ([merge request](https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-FAIR/-/merge_requests/2330)) +- [Updated nomad install documentation and fixed broken links.](https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-FAIR/-/commit/c47bacc6754f7713247c17d1e90a99dd75be6ed0) ([merge request](https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-FAIR/-/merge_requests/2297)) +- [Migrated to datacite kernel-4. Fixes #2245.](https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-FAIR/-/commit/5529523b0af7f81004e3501d430f1d441f753590) ([merge request](https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-FAIR/-/merge_requests/2285)) +- [Merge branch 'fix-gui-archive-browser-for-readonly-archives' into 'develop'](https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-FAIR/-/commit/9cba8ca3e63082e275242c2dac62f22f1f9477bf) ([merge request](https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-FAIR/-/merge_requests/2280)) +- [bug fix if no tasks](https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-FAIR/-/commit/561b12a50c27ae3b203ded11466ee17d5351b39c) ([merge request](https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-FAIR/-/merge_requests/2270)) +- [Fix early return in PubChemPureSubstanceSection to prevent unnecessary processing](https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-FAIR/-/commit/6ab989cef19ad1a5da810c6a7050339dd38b8b23) ([merge request](https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-FAIR/-/merge_requests/2258)) +- [Fixes and improvements](https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-FAIR/-/commit/6b6cbc54920fa58e5e31b200ca65d824d380bcfc) ([merge request](https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-FAIR/-/merge_requests/2234)) + +### Changed (11 changes) + +- [Workflow visualizer](https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-FAIR/-/commit/2db3bee03270dbdd4f1222353925279c385c5208) ([merge request](https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-FAIR/-/merge_requests/2340)) +- [Enable ruff rule F401, remove unused import](https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-FAIR/-/commit/73fb0bb0298b414ec2557cee1de4933593932443) ([merge request](https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-FAIR/-/merge_requests/2325)) +- [Patch h5web in postinstall](https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-FAIR/-/commit/e98b6000cb49556c1c1f74cb593c699219604f57) ([merge request](https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-FAIR/-/merge_requests/2319)) +- [Merge branch 'pydantic-v2' into 'develop'](https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-FAIR/-/commit/5b5acfba8fa96aa746d411ecc0464a556a9cc935) ([merge request](https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-FAIR/-/merge_requests/2304)) +- [Migrate from Pydantic v1 to v2](https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-FAIR/-/commit/07b51f2ea57dbcc6c380fea962e4b7e4e5488950) ([merge request](https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-FAIR/-/merge_requests/2304)) +- [Resolve "Simplification of System/CompositeSystem/PureSubstance base sections"](https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-FAIR/-/commit/4c4a35aa494983b662e41e030416c6b334dbd903) ([merge request](https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-FAIR/-/merge_requests/2300)) +- [Resolve "Simplification of System/CompositeSystem/PureSubstance base sections"](https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-FAIR/-/commit/237115a8963f01fe783eb0f3f1f97b5ac6213eda) ([merge request](https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-FAIR/-/merge_requests/2300)) +- [Reduced RichTextEdit height for BaseSection description](https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-FAIR/-/commit/ef14f78325b12bd992f9ad40db323908ed7b1303) ([merge request](https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-FAIR/-/merge_requests/2264)) +- [Update jupyter image](https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-FAIR/-/commit/2a367112e219581d2faea063a064a763df1ffb63) ([merge request](https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-FAIR/-/merge_requests/2252)) +- [Respect host and port in nomad config](https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-FAIR/-/commit/1302e4efbfe877bbb0107d260e217271938d9fa9) ([merge request](https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-FAIR/-/merge_requests/2243)) +- [Restrict httpx version in dependencies](https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-FAIR/-/commit/543756f04922dd877ad6ccf82c9729019d54ace2) ([merge request](https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-FAIR/-/merge_requests/2237)) + ## 1.3.13 (2024-11-29) No changes. diff --git a/Dockerfile b/Dockerfile index 0309d0f739e6a5c753af747907a455756ffb2cb9..8b6add58bec779926c277feb1b6c81859d1711ce 100644 --- a/Dockerfile +++ b/Dockerfile @@ -20,8 +20,9 @@ # If you need more help, visit the Dockerfile reference guide at # https://docs.docker.com/engine/reference/builder/ -FROM node:20 AS base_node -FROM python:3.12-slim AS base_python +# node20 image local copy +FROM gitlab-registry.mpcdf.mpg.de/nomad-lab/nomad-fair:node AS base_node +FROM ghcr.io/astral-sh/uv:0.5-python3.12-bookworm-slim AS base_python # Keeps Python from buffering stdout and stderr to avoid situations where # the application crashes without emitting any logs due to buffering. ENV PYTHONUNBUFFERED=1 @@ -61,8 +62,6 @@ RUN apt-get update \ WORKDIR /app -# Install UV -COPY --from=ghcr.io/astral-sh/uv:0.4 /uv /bin/uv # Python environment COPY requirements.txt . @@ -92,9 +91,6 @@ RUN apt-get update \ git \ && rm -rf /var/lib/apt/lists/* -# Install UV -COPY --from=ghcr.io/astral-sh/uv:0.4 /uv /bin/uv - # Python environment COPY requirements-dev.txt . diff --git a/docs/howto/customization/mapping_parser.md b/docs/howto/customization/mapping_parser.md new file mode 100644 index 0000000000000000000000000000000000000000..35d1d0c1f2f2e00e9a401b94fb1224a8fc7cd812 --- /dev/null +++ b/docs/howto/customization/mapping_parser.md @@ -0,0 +1,217 @@ +# How to write data to archive with MappingParser + +`MappingParser` is a generic parser class implemented in +`nomad.parsing.file_parser/mapping_parser.py` to handle the conversion to and from a +data object and a python dictionary. We refer to an instance of the +this class as 'mapping parser' throughout this section. In the following, the abstract +properties and methods of the mapping parser are explained. The various implementations of +the mapping parser are also defined and `Mapper` which is required to convert a +mapping parser into another mapping parser is explained as well. + +## MappingParser + +The mapping parser has several abstract properties and methods and the most important +ones are listed in the following: + +- `filepath`: path to the input file to be parsed +- `data_object`: object resulting from loading the file in memory with `load_file` +- `data`: dictionary representation of `data_object` +- `mapper`: instance of `Mapper` required by `convert` +- `load_file`: method to load the file given by `filepath` +- `to_dict`: method to convert `data_object` into `data` +- `from_dict`: method to convert `data` into `data_object` +- `convert`: method to convert to another mapping parser + +`data_object` can be an `XML` element tree or a `metainfo` section for example depending on +the inheriting class. In order to convert a mapping parser to another parser, +the target parser must provide a [`Mapper`](#mapper) object. We refer to this simply as +mapper throughout. + +In the following, we describe the currently implemented mapping parsers. + +### XMLParser + +This is mapping parser for XML files. It uses [`lxml`](https://lxml.de/) to +load the file as an element tree. The dictionary is generated by iteratively parsing the +elements of the tree in `to_dict`. The values parsed from element `text` are automatically +converted to a corresponding data type. If attributes are present, the value is wrapped in +a dictionary with key given by `value_key` ('__value' by default) while the attribute keys +are prefixed by `attribute_prefix` ('@' by default). The following XML: + +```xml +<a> + <b name='item1'>name</b> + <b name='item2'>name2</b> +</a> +``` + +will be converted to: + +```python + data = { + 'a' : { + 'b': [ + {'@name': 'item1', '__value': 'name'}, + {'@name': 'item2', '__value': 'name2'} + ] + } + } +``` + +The conversion can be reversed using the `from_dict` method. + +### HDF5Parser + +This is the mapping parser for HDF5 files. It uses [`h5py`](https://www.h5py.org/) to load +the file as an HDF5 group. Similar to [XMLParser](#xmlparser), the HDF5 datasets are +iteratively parsed from the underlying groups and if attributes are present these are +also parsed. The `from_dict` method is also implemented to convert a dictionary into an +HDF5 group. + +### MetainfoParser + +This is the mapping parser for NOMAD archive files or metainfo sections. +It accepts a schema root node annotated with `MappingAnnotation` as `data_object`. +`create_mapper` generates the actual mapper as matching the `annotation_key`. +If a `filepath` is specified, it instead falls back on the [`ArchiveParser`](--ref--). <!-- TODO: add reference --> + +The annotation should always point to a parsed value via a `path` (JMesPath format). +It may optionally specify a multi-argument `operator` for data mangling. <!-- most operators are binary, would change the name --> +In this case, specify a tuple consisting of: + +- the operator name, defined within the same scope. +- a list of paths with the corresponding values for the operator arguments. <!-- @Alvin: can you verify? --> + +Similar to `MSection`, it can be converted to (`to_dict`) or from (`from_dict`) a Python `dict`. +Other attributes are currently accessible. + +```python +from nomad.datamodel.metainfo.annotations import Mapper as MappingAnnotation + +class BSection(ArchiveSection): + v = Quantity(type=np.float64, shape=[2, 2]) + v.m_annotations['mapping'] = dict( + xml=MappingAnnotation(mapper='.v'), + hdf5=MappingAnnotation(mapper=('get_v', ['.v[0].d'])), + ) + + v2 = Quantity(type=str) + v2.m_annotations['mapping'] = dict( + xml=MappingAnnotation(mapper='.c[0].d[1]'), + hdf5=MappingAnnotation(mapper='g.v[-2]'), + ) + +class ExampleSection(ArchiveSection): + b = SubSection(sub_section=BSection, repeats=True) + b.m_annotations['mapping'] = dict( + xml=MappingAnnotation(mapper='a.b1'), hdf5=MappingAnnotation(mapper='.g1') + ) + +ExampleSection.m_def.m_annotations['mapping'] = dict( + xml=MappingAnnotation(mapper='a'), hdf5=MappingAnnotation(mapper='g') +) + +parser = MetainfoParser() +p.data_object = ExampleSection(b=[BSection()]) +p.annotation_key = 'xml' +p.mapper +# Mapper(source=Path(path='a'.... +``` + +### Converting mapping parsers + +The following is a sample python code to illustrate the mapping of the contents of an +HDF5 file to an archive. First, we create a `MetainfoParser` object for the archive. The +annotation key is set to `hdf5` which will generate a +[mapper](#mapper) from the `hdf5` annotations defined in the definitions. Essentially, +only metainfo sections and quantities with the `hdf5` annotation will be mapped. The mapper +will contain paths for the source (HDF5) and the target (archive). The archive is then +set to the archive parser `data_object`. Here, the archive already contains some data +which should be merged to data that will be parsed. Next, a parser for HDF5 data is +created. We use a custom class of the `HDF5Parser` which implements the `get_v` method +defined in `BSection.v` In this example, we do not read the data from the HDF5 file but +instead generate it from a dictionary by using the `from_dict` method. By invoking the +`convert` method, the archive parser data object is populated with the corresponding +HDF5 data. + +```python + class ExampleHDF5Parser(HDF5Parser): + @staticmethod + def get_v(value): + return np.array(value)[1:, :2] + + archive_parser = MetainfoParser() + archive_parser.annotation_key = 'hdf5' + archive_parser.data_object = ExampleSection(b=[BSection(v=np.eye(2))]) + + hdf5_parser = ExampleHDF5Parser() + d = dict( + g=dict( + g1=dict(v=[dict(d=np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]))]), + v=['x', 'y', 'z'], + g=dict( + c1=dict( + i=[4, 6], + f=[ + {'@index': 0, '__value': 1}, + {'@index': 2, '__value': 2}, + {'@index': 1, '__value': 1}, + ], + d=[dict(e=[3, 0, 4, 8, 1, 6]), dict(e=[1, 7, 8, 3, 9, 1])], + ), + c=dict(v=[dict(d=np.eye(3), e=np.zeros(3)), dict(d=np.ones((3, 3)))]), + ), + ) + ) + hdf5_parser.from_dict(d) + + hdf5_parser.convert(archive_parser) + + # >>> archive_parser.data_object + # ExampleSection(b, b2) + # >>> archive_parser.data_object.b[1].v + # array([[4., 5.], + # [7., 8.]]) +``` + +## Mapper + +A mapper is necessary in order to convert a mapping parser to a target mapping parser +by mapping data from the source to the target. There are three kinds of mapper: `Map`, +`Evaluate` and `Mapper` each inheriting from `BaseMapper`. A mapper has attributes +source and target which define the paths to the source data and target, respectively. +`Map` is intended for mapping data directly from source to target. The path to the data is +given by the attribute `path`. `Evaluate` will execute a function defined by +`function_name` with the arguments given by the mapped values of the paths in +`function_args`. Lastly, `Mapper` allows the nesting of mappers by providing a list of +mappers to its attribute `mapper`. All the paths are instances of `Path` with the string +value of the path to the data given by the attribute `path`. The value of path should +follow the [jmespath specifications](https://jmespath.org/specification.html) but could be +prefixed by `.` which indicates that this is a path relative to the parent. This will communicate to the +mapper which source to get the data. + +```python + Mapper( + source=Path(path='a.b2', target=Path(path='b2'), mapper=[ + Mapper( + source=Path(path='.c', parent=Path(path='a.b2')), + target=Path(path='.c', parent=Path(path='b2')), mapper=[ + Map( + target=Path( + path='.i', parent=Path(path='.c', parent=Path(path='b2')) + ), + path=Path( + path='.d', parent=Path(path='.c' parent=Path(path='a.b2')) + ) + ), + Evaluate( + target=Path( + path='.g', parent=Path(path='.c', parent=Path(path='b2')) + ), + function_name='slice', function_args=[Path(path='a.b2.c.f.g.i')] + ) + ] + ) + ), + ) +``` diff --git a/docs/howto/develop/code.md b/docs/howto/develop/code.md index 51e82c94db6cfcd7855d7d6a06f1aeeec7e49ce7..f74869a9feb7a510b1ffb7ed7c0b4d90a3d46d5f 100644 --- a/docs/howto/develop/code.md +++ b/docs/howto/develop/code.md @@ -5,25 +5,32 @@ about the codebase and ideas about what to look at first. ## Git Projects -There is one [main NOMAD project](https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-FAIR){:target="_blank"} -(and its [fork on GitHub](https://github.com/nomad-coe/nomad){:target="_blank"}). This project contains -all the framework and infrastructure code. It instigates all checks, builds, and -deployments for the public NOMAD service, the NOMAD Oasis, and the `nomad-lab` Python -package. All contributions to NOMAD have to go through this project eventually. +There is one +[main NOMAD project](https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-FAIR){:target="_blank"} +(and its [fork on GitHub](https://github.com/nomad-coe/nomad){:target="_blank"}). +This project contains all the framework and infrastructure code. It instigates all checks, +builds, and deployments for the public NOMAD service, the NOMAD Oasis, and the `nomad-lab` +Python package. All contributions to NOMAD have to go through this project eventually. All (Git) projects that NOMAD depends on are either a Git submodule (you find them all in the `dependencies` directory or its subdirectories) or they are listed as PyPI packages in the `pyproject.toml` of the main project (or one of its submodules). -You can also have a look at the [built-in plugins](../../reference/plugins.md) that constitute the majority of these projects. The only other projects are [MatID](https://github.com/nomad-coe/matid){:target="_blank"}, [DOS fingerprints](https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-dos-fingerprints){:target="_blank"}, and the [NOMAD Remote Tools Hub](https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-remote-tools-hub){:target="_blank"}. +You can also have a look at the [built-in plugins](../../reference/plugins.md) that +constitute the majority of these projects. The only other projects are +[MatID](https://github.com/nomad-coe/matid){:target="_blank"}, +[DOS fingerprints](https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-dos-fingerprints){:target="_blank"}, +and the +[NOMAD Remote Tools Hub](https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-remote-tools-hub){:target="_blank"}. !!! note - The GitLab organization [nomad-lab](https://gitlab.mpcdf.mpg.de/nomad-lab){:target="_blank"} and the - GitHub organizations for [FAIRmat](https://github.com/fairmat-nfdi){:target="_blank"} and the - [NOMAD CoE](https://github.com/nomad-coe){:target="_blank"} all represent larger infrastructure and - research projects, and they include many other Git projects that are not related. - When navigating the codebase, only follow the submodules. + The GitLab organization + [nomad-lab](https://gitlab.mpcdf.mpg.de/nomad-lab){:target="_blank"} and the GitHub + organizations for [FAIRmat](https://github.com/fairmat-nfdi){:target="_blank"} and the + [NOMAD CoE](https://github.com/nomad-coe){:target="_blank"} all represent larger + infrastructure and research projects, and they include many other Git projects that + are not related. When navigating the codebase, only follow the submodules. ## Python code @@ -39,22 +46,26 @@ There are three main directories with Python code: The `nomad` directory contains the following "main" modules. This list is not extensive but should help you to navigate the codebase: -- `app`: The [FastAPI](https://fastapi.tiangolo.com/){:target="_blank"} APIs: v1 and v1.2 NOMAD APIs, - [OPTIMADE](https://www.optimade.org/){:target="_blank"}, [DCAT](https://www.w3.org/TR/vocab-dcat-2/){:target="_blank"}, +- `app`: The [FastAPI](https://fastapi.tiangolo.com/){:target="_blank"} APIs: v1 and v1.2 + NOMAD APIs, + [OPTIMADE](https://www.optimade.org/){:target="_blank"}, + [DCAT](https://www.w3.org/TR/vocab-dcat-2/){:target="_blank"}, [h5grove](https://github.com/silx-kit/h5grove){:target="_blank"}, and more. - `archive`: Functionality to store and access archive files. This is the storage format for all processed data in NOMAD. See also the docs on [structured data](../../explanation/data.md). -- `cli`: The command line interface (based on [Click](https://click.palletsprojects.com){:target="_blank"}). - Subcommands are structured into submodules. +- `cli`: The command line interface (based on + [Click](https://click.palletsprojects.com){:target="_blank"}). Subcommands are + structured into submodules. - `config`: NOMAD is configured through the `nomad.yaml` file. This contains all the - ([Pydantic](https://docs.pydantic.dev/){:target="_blank"}) models and default config parameters. + ([Pydantic](https://docs.pydantic.dev/){:target="_blank"}) models and default config + parameters. -- `datamodel`: The built-in schemas (e.g. `nomad.datamodel.metainfo.workflow` used to construct - workflows). The base sections and section for the shared entry structure. +- `datamodel`: The built-in schemas (e.g. `nomad.datamodel.metainfo.workflow` used to + construct workflows). The base sections and section for the shared entry structure. See also the docs on the [datamodel](../../explanation/data.md) and [processing](../../explanation/basics.md). @@ -68,7 +79,8 @@ but should help you to navigate the codebase: [processing](../../explanation/basics.md#parsing). - `processing`: It's all about processing uploads and entries. The interface to - [Celery](https://docs.celeryq.dev/en/stable/){:target="_blank"} and [MongoDB](https://www.mongodb.com). + [Celery](https://docs.celeryq.dev/en/stable/){:target="_blank"} and + [MongoDB](https://www.mongodb.com). - `units`: The unit and unit conversion system based on [Pint](https://pint.readthedocs.io){:target="_blank"}. @@ -84,16 +96,18 @@ but should help you to navigate the codebase: ## GUI code -The NOMAD UI is written as a [React](https://react.dev/){:target="_blank"} single-page application (SPA). It -uses (among many other libraries) [MUI](https://mui.com/){:target="_blank"}, -[Plotly](https://plotly.com/python/){:target="_blank"}, and [D3](https://d3js.org/){:target="_blank"}. The GUI code is -maintained in the `gui` directory. Most relevant code can be found in -`gui/src/components`. The application entry point is `gui/src/index.js`. +The NOMAD UI is written as a [React](https://react.dev/){:target="_blank"} single-page +application (SPA). It uses (among many other libraries) +[MUI](https://mui.com/){:target="_blank"}, +[Plotly](https://plotly.com/python/){:target="_blank"}, and +[D3](https://d3js.org/){:target="_blank"}. The GUI code is maintained in the `gui` +directory. Most relevant code can be found in `gui/src/components`. The application entry +point is `gui/src/index.js`. ## Documentation -The documentation is based on [MkDocs](https://www.mkdocs.org/){:target="_blank"}. The important files -and directories are: +The documentation is based on [MkDocs](https://www.mkdocs.org/){:target="_blank"}. The +important files and directories are: - `docs`: Contains all the Markdown files that contribute to the documentation system. @@ -101,7 +115,8 @@ and directories are: added here as well. - `nomad/mkdocs.py`: Python code that defines - [macros](https://mkdocs-macros-plugin.readthedocs.io/){:target="_blank"} which can be used in Markdown. + [macros](https://mkdocs-macros-plugin.readthedocs.io/){:target="_blank"} which can be + used in Markdown. ## Other top-level directories diff --git a/docs/howto/overview.md b/docs/howto/overview.md index c8968052d9fe6452abfefdced09bff2676da5558..7a1bcb634ff11325d9e33084526d3e98a10c4cc6 100644 --- a/docs/howto/overview.md +++ b/docs/howto/overview.md @@ -6,7 +6,8 @@ hide: toc ## Users -These how-to guides target NOMAD users and cover data management, exploration, analysis with NOMAD graphical web-interface and APIs. +These how-to guides target NOMAD users and cover data management, exploration, analysis +with NOMAD graphical web-interface and APIs. <div markdown="block" class="home-grid"> <div markdown="block"> @@ -38,7 +39,9 @@ Use NOMAD's functions programmatically and via its APIs. ## Data stewards, administrators, and developers -These how-to guides allow advanced users, NOMAD administrators, data stewards, and developers to customize and operate NOMAD and NOMAD Oasis or contribute to NOMAD's development. +These how-to guides allow advanced users, NOMAD administrators, data stewards, and +developers to customize and operate NOMAD and NOMAD Oasis or contribute to NOMAD's +development. <div markdown="block" class="home-grid"> <div markdown="block"> @@ -71,6 +74,7 @@ Customize NOMAD, write plugins, and tailor NOMAD Oasis. - [Write a schema packages](plugins/schema_packages.md) - [Work with units](customization/units.md) - [Use HDF5 to handle large quantities](customization/hdf5.md) +- [Use Mapping parser to write data on archive](customization/mapping_parser.md) </div> <div markdown="block"> @@ -89,4 +93,6 @@ Become a NOMAD developer and contribute to the source code. <h2>One last thing</h2> -If you can't find what you're looking for in our guides, [contact our team](mailto:support@nomad-lab.eu) for personalized help and assistance. Don't worry, we're here to help and learn what we're doing wrong! +If you can't find what you're looking for in our guides, +[contact our team](mailto:support@nomad-lab.eu) for personalized help and assistance. +Don't worry, we're here to help and learn what we're doing wrong! diff --git a/docs/howto/plugins/schema_packages.md b/docs/howto/plugins/schema_packages.md index c64af853e86c03d7aa8e01ffbd74db99215a1f9b..fbe1ea0863ae1e66542ea37ef2f459fef6580ded 100644 --- a/docs/howto/plugins/schema_packages.md +++ b/docs/howto/plugins/schema_packages.md @@ -178,6 +178,18 @@ we will get a final normalized archive that contains our data like this: } ``` +## Migration guide + +By default, schema packages are identified by the full qualified path to the Python module that contains the definitions. An example of a full qualified path could be `nomad_example.schema_packages.mypackage`, where the first part is the Python package name, second part is a subpackage, and the last part is a Python module containing the definitions. This is the easiest way to prevent conflicts between different schema packages: python package names are unique (prevents clashes between packages) and paths inside a package must point to a single python module (prevents clashes within package). This does, however, mean that _if you move your schema definition in the plugin source code, any references to the old definition will break_. This becomes problematic in installations that have lot of old data processed with the old definition location, as those entries will still refer to the old location and will not work correctly. + +As it might not be possible, or even wise to prevent changes in the source code layout, and reprocessing all old entries might be impractical, we do provide an alias mechanism to help with migration tasks. Imagine your schema package was contained in `nomad_example.schema_packages.mypackage`, and in a newer version of your plugin you want to move it to `nomad_example.schema_packages.mynewpackage`. The way to do this without completely breaking the old entries is to add an alias in the schema package definition: + +```python +m_package = SchemaPackage(aliases=['nomad_example.schema_packages.mypackage']) +``` + +Note that this will only help in scenarious where you have moved the definition and not removed or modified any of them. + ## Definitions The following describes in detail the schema language for the NOMAD Metainfo and how it is expressed in Python. @@ -305,6 +317,12 @@ The above example works, if `System` is eventually defined in the same package. ### Categories +!!! Warning + + Categories are now deprecated. + Their previous occurrences should be replaced + with respective annotations. + In the old metainfo this was known as _abstract types_. Categories are defined with Python classes that have `:class:MCategory` as base class. @@ -317,6 +335,122 @@ class CategoryName(MCategory): m_def = Category(links=['http://further.explanation.eu'], categories=[ParentCategory]) ``` +## Data frames + +On top of the core Metainfo concepts like `Sections`, `Quantities`, and `SubSection`, we provide a mechanism for modeling _data frames_. + +A NOMAD data frame is a multi-index table with named indices (variables) and columns (fields). +All columns should match in length, as they are all parametrized by the same indices. +Both variables and fields are defined standalone using Values. +A DataFrame may contain any number of Values, though a bare minimum can be defined via the `mandatory_variables` and `mandatory_fields` respectively. + +The mechanism is based on a concept called `Values` for storing arrays of numeric data to +represent a _field_ or _variable_ (or axis, dimension, etc.) and a concept called +`DataFrame` that combines _fields_ and _variables_ with matching dimensions into a data frame. +Our `DataFrame` is conceptually close to xarray datasets, pandas data frames, or the NeXus NXData group. + +`Values` and `DataFrame` are usually not used directly, instead you will create +re-usable templates that allow you to use the same type of `Values` (e.g. describing +physical properties like energies, temperatures, pressures, ...) and the same type of `DataFrame` (e.g. +describing material properties at different variables like density of states or band gap). + +### Illustrating example + +```py +--8<-- "examples/metainfo/data_frames.py:9:31" + +--8<-- "examples/metainfo/data_frames.py:41:44" + + +--8<-- "examples/metainfo/data_frames.py:55:63" + +``` + +### Fields vs variables (and dimensions) + +Both _fields_ and _variables_ hold values (i.e. columns) in your data frame. +While _fields_ hold the actual data, _variables_ span the data space and its dimensions (i.e. column indices). + +_Variables_ and _dimensions_ are conceptually slightly different. First, _variables_ provide +the values on a certain dimension (via shared indices). Second, the number of _Variables_ often, but not necessarily, +are equal to the number of dimensions. If some _variables_ depend on each other, they might +span shared dimensions. _Fields_ on the other hand always provide values for all dimensions. + +Let's compare two datasets; one dataset that you could plot in a heatmap and one that +you would plot in a scatter plot. In both cases, we have two _variables_ `Temperature` and `Pressure`, +as well as one _field_ `Energy`. + +In the heatmap scenario, we vary `Temperature` and `Pressure` independently and have a +`Energy` value (i.e. heatmap color/intensity) for each `Temperature` reading at every `Pressure` reading. +For two values on each _variable_, we respectively we have 4 (2x2) _field_ values: + +```py +--8<-- "examples/metainfo/data_frames.py:89:97" +``` + +In the scatter plot scenario, we vary `Temperature` and `Pressure` together. +We only have one _field_ value (y-axis) for each pair of temperature and pressure (two x-axes) +values. +With two combined temperature and pressure readings, we respectively only have two field values: + +```py +--8<-- "examples/metainfo/data_frames.py:100:106" +``` + +We can use the `ValueTemplate` kwarg `spanned_dimenions` to define how `Temperature` and +`Pressure` are related. The given indices refer to the indices of the field values and +represent the logical dimension of the data space. + +The first example without the `spanned_dimensions` is equivalent to this example +with `spanned_dimensions`. Here we span two independent dimensions: + +```py +--8<-- "examples/metainfo/data_frames.py:109:117" +``` + +### Field and variables in the schema vs parsing + +The templates allow you to define _mandatory_ _fields_ and _variables_ in the schema. +These _fields_ and _variables_ have to be provided by the parser when instantiating the +respective dataset. However, parser can provide additional _fields_ and _variables_. +This allows to extend what is defined in the template without requiering new definitions. + +### Data representation + +Each call to `ValueTemplate` and `DatasetTemplate` produces a section definition +inheriting from `Values` and `DataFrame` respectively. + +`Values` sections define a single quantity `values`. The `values` quantity always holds a numpy array +based on the type and shape given in the template. The shape of the `values` quantity +is the shape given in the template plus one dimension of arbitrary length. +_Variable_ values are always a flat list of values anyways (the values themselves can have a higher shape). +_Field_ values are always flattened. You might provide them in a higher dimensional array +according to the dimensionality of the _Variables_, but they are always flattened as the +`value` quantity only provides one additional dimension, because the real number of dimensions +is only available at runtime. The original (runtime) shape of _fields_ is stored int the `original_shape` `Values` quantity. + +`DataFrame` sections define repeating sub-sections for `fields` and `variables`. +The specific `DataFrame` section defined by the template, will also hold an annotation `DatasetAnnotation` that keeps the `mandatory_fields` and `mandatory_variables` for runtime validation. +The `fields` and `variables` sub-sections provide a `Values` +instances for each _field_ in `mandatory_fields` and each _variable_ in `mandatory_variables`, +but they can also hold additional _fields_ and _variables_ to accommodate more +_fields_ and _variables_ determined during parsing. + +When a `ValuesTemplate` is used (e.g. `some_property = Energy()`), a quantity is created. +This quantity is a copy of the `values` quantity created by the template. +This allows to reuse templated value quantities. When a `DatasetTemplate` is used +(e.g. `some_property = BandGap()`), a sub-section is created. This sub-section targets +the `DataFrame` section defined by the template. + +#### Working with xarrays and pandas + +We provide utility function on `DataFrame` that you can use to translate into +respective xarray datasets and pandas data frames. + +!!! Warning + + The documentation on this is still pending. + ## Adding Python schemas to NOMAD The following describes how to integrate new schema modules into the existing code according diff --git a/docs/howto/programmatic/api.md b/docs/howto/programmatic/api.md index b26b0a5b39c635005330d0e1cd170b41d6bf386d..3a20fa79418d6a2693cd7e116a655d97502a98ac 100644 --- a/docs/howto/programmatic/api.md +++ b/docs/howto/programmatic/api.md @@ -1,6 +1,6 @@ # How to use the API -This guide is about using NOMAD's REST APIs directly, e.g. via Python's *request*. +This guide is about using NOMAD's REST APIs directly, e.g. via Python's `requests` library. To access the processed data with our client library `nomad-lab` follow [How to access processed data](archive_query.md). You can also watch our @@ -8,14 +8,17 @@ To access the processed data with our client library `nomad-lab` follow ## Different options to use the API -NOMAD offers all its functionality through application -programming interfaces (APIs). More specifically [RESTful HTTP APIs](https://en.wikipedia.org/wiki/Representational_state_transfer){:target="_blank"} that allows you -to use NOMAD as a set of resources (think data) that can be uploaded, accessed, downloaded, -searched for, etc. via [HTTP requests](https://en.wikipedia.org/wiki/Hypertext_Transfer_Protocol){:target="_blank"}. +NOMAD offers all its functionality through application programming interfaces (APIs). More +specifically +[RESTful HTTP APIs](https://en.wikipedia.org/wiki/Representational_state_transfer){:target="_blank"} +that allows you to use NOMAD as a set of resources (think data) that can be uploaded, +accessed, downloaded, searched for, etc. via +[HTTP requests](https://en.wikipedia.org/wiki/Hypertext_Transfer_Protocol){:target="_blank"}. -You can get an overview on all NOMAD APIs on the [API page]({{ nomad_url() }}../../gui/analyze/apis). -We will focus here on NOMAD's main API (v1). In fact, this API is also used by -the web interface and should provide everything you need. +You can get an overview on all NOMAD APIs on the +[API page]({{ nomad_url() }}../../gui/analyze/apis). We will focus here on NOMAD's main +API (v1). In fact, this API is also used by the web interface and should provide +everything you need. There are different tools and libraries to use the NOMAD API that come with different trade-offs between expressiveness, learning curve, and convenience. @@ -25,24 +28,25 @@ trade-offs between expressiveness, learning curve, and convenience. For example to see the metadata for all entries with elements *Ti* and *O* go here: [{{ nomad_url() }}/v1/entries?elements=Ti&elements=O]({{ nomad_url() }}/v1/entries?elements=Ti&elements=O) -#### Use *curl* or *wget* +#### Use `curl` or `wget` -REST API's use resources located via URLs. You access URLs with *curl* or *wget*. Same +REST API's use resources located via URLs. You access URLs with `curl` or `wget`. Same *Ti*, *O* example as before: + ```sh curl "{{ nomad_url() }}/v1/entries?results.material.elements=Ti&results.material.elements=O" | python -m json.tool ``` -#### Use Python and requests +#### Use Python and `requests` -Requests is a popular Python library to use the internets HTTP protocol that is used to +`Requests` is a popular Python library to use the internet's HTTP protocol that is used to communicate with REST APIs. Install with `pip install requests`. See [the initial example](#using-request). #### Use our dashboard -The NOMAD API has an [OpenAPI dashboard]({{ nomad_url() }}/v1). This is an interactive documentation of all -API functions that allows you to try these functions in the browser. +The NOMAD API has an [OpenAPI dashboard]({{ nomad_url() }}/v1). This is an interactive +documentation of all API functions that allows you to try these functions in the browser. #### Use NOMAD's Python package @@ -50,7 +54,7 @@ Install the [NOMAD Python client library](./pythonlib.md) and use it's `ArchiveQ functionality for a more convenient query based access of archive data following the [How-to access the processed data](archive_query.md) guide. -## Using request +## Using `requests` If you are comfortable with REST APIs and using Pythons `requests` library, this example demonstrates the basic concepts of NOMAD's main API. You can get more documentation and @@ -118,10 +122,10 @@ This will give you something like this: } ``` -The `entry_id` is a unique identifier for, well, entries. You can use it to access -other entry data. For example, you want to access the entry's archive. More -precisely, you want to gather the formula and energies from the main workflow result. -The following requests the archive based on the `entry_id` and only requires some archive sections. +The `entry_id` is a unique identifier for, well, entries. You can use it to access other +entry data. For example, you want to access the entry's archive. More precisely, you want +to gather the formula and energies from the main workflow result. The following requests +the archive based on the `entry_id` and only requires some archive sections. ```py first_entry_id = response_json['data'][0]['entry_id'] @@ -222,11 +226,13 @@ The result will look like this: } ``` -You can work with the results in the given JSON (or respective Python dict/list) data already. -If you have [NOMAD's Python library](./pythonlib.md) installed , -you can take the archive data and use the Python interface. -The [Python interface](../plugins/schema_packages.md#wrap-data-with-python-schema-classes) will help with code-completion (e.g. in notebook environments), -resolve archive references (e.g. from workflow to calculation to system), and allow unit conversion: +You can work with the results in the given JSON (or respective Python dict/list) data +already. If you have [NOMAD's Python library](./pythonlib.md) installed, you can take the +archive data and use the Python interface. The +[Python interface](../plugins/schema_packages.md#wrap-data-with-python-schema-classes) +will help with code-completion (e.g. in notebook environments), resolve archive references +(e.g. from workflow to calculation to system), and allow unit conversion: + ```py from nomad.datamodel import EntryArchive from nomad.metainfo import units @@ -238,6 +244,7 @@ print(result.energy.total.value.to(units('eV'))) ``` This will give you an output like this: + ``` OOSrTiOOOSrTiOOOSrTiOFF -355626.93095025205 electron_volt @@ -252,7 +259,8 @@ the API: - Raw files, the files as they were uploaded to NOMAD. - Archive data, all of the extracted data for an entry. -There are also different entities (see also [Datamodel](../../explanation/basics.md)) with different functions in the API: +There are also different entities (see also [Datamodel](../../explanation/basics.md)) +with different functions in the API: - Entries - Uploads @@ -275,17 +283,20 @@ Let's discuss some of the common concepts. ### Response layout -Functions that have a JSON response, will have a common layout. First, the response will contain all keys and values of the request. The request is not repeated verbatim, but -in a normalized form. Abbreviations in search queries might be expanded, default values for optional parameters are added, or additional response specific information -is included. Second, the response will contain the results under the key `data`. +Functions that have a JSON response, will have a common layout. First, the response will +contain all keys and values of the request. The request is not repeated verbatim, but +in a normalized form. Abbreviations in search queries might be expanded, default values +for optional parameters are added, or additional response specific information is +included. Second, the response will contain the results under the key `data`. ### Owner -All functions that allow a query will also allow to specify the `owner`. Depending on -the API function, its default value will be mostly `visible`. Some values are only -available if you are [logged in](#authentication). +All functions that allow a query will also allow to specify the `owner`. Depending on the +API function, its default value will be mostly `visible`. Some values are only available +if you are [logged in](#authentication). {{ doc_snippet('owner')}} + ### Queries {{ doc_snippet('query') }} @@ -293,10 +304,11 @@ available if you are [logged in](#authentication). ### Pagination When you issue a query, usually not all results can be returned. Instead, an API returns -only one *page*. This behavior is controlled through pagination parameters, -like `page_site`, `page`, `page_offset`, or `page_after_value`. +only one *page*. This behavior is controlled through pagination parameters, like +`page_site`, `page`, `page_offset`, or `page_after_value`. Let's consider a search for entries as an example. + ```py response = requests.post( f'{base_url}/entries/query', @@ -313,8 +325,9 @@ response = requests.post( ) ``` -This will only result in a response with a maximum of 10 entries. The response will contain a -`pagination` object like this: +This will only result in a response with a maximum of 10 entries. The response will +contain a `pagination` object like this: + ```json { "page_size": 10, @@ -345,10 +358,11 @@ response = requests.post( } ) ``` + You will get the next 10 results. -Here is a full example that collects the first 100 formulas from entries that match -a certain query by paginating. +Here is a full example that collects the first 100 formulas from entries that match a +certain query by paginating. ```python --8<-- "examples/docs/api/pagination.py" @@ -357,16 +371,21 @@ a certain query by paginating. ### Authentication Most of the API operations do not require any authorization and can be freely used -without a user or credentials. However, to upload, edit, or view your own and potentially unpublished data, the API needs to authenticate you. +without a user or credentials. However, to upload, edit, or view your own and potentially +unpublished data, the API needs to authenticate you. The NOMAD API uses OAuth and tokens to authenticate users. We provide simple operations that allow you to acquire an *access token* via username and password: ```py +import os + import requests -response = requests.get( - '{{ nomad_url() }}/v1/auth/token', params=dict(username='myname', password='mypassword')) +response = requests.post( + '{{ nomad_url() }}/v1/auth/token', + data={'username': os.getenv('NOMAD_USERNAME'), 'password': os.getenv('NOMAD_PASSWORD')}, +) token = response.json()['access_token'] response = requests.get( @@ -379,12 +398,14 @@ If you have the [NOMAD Python package](./pythonlib.md) installed. You can use it implementation: ```py +import os + import requests from nomad.client import Auth response = requests.get( '{{ nomad_url() }}/v1/uploads', - auth=Auth(user='myname or email', password='mypassword')) + auth=Auth(user=os.getenv('NOMAD_USERNAME'), password=os.getenv('NOMAD_PASSWORD'))) uploads = response.json()['data'] ``` @@ -426,9 +447,9 @@ curl "{{ nomad_url() }}/v1/entries/raw?results.material.elements=Ti&results.mate ``` ## Access processed data (archives) -Above under [using requests](#using-request), you've already learned how to access -archive data. A special feature of the archive API functions is that you can define what is `required` -from the archives. +Above under [using requests](#using-request), you've already learned how to access archive +data. A special feature of the archive API functions is that you can define what is +`required` from the archives. ```py response = requests.post( @@ -476,13 +497,14 @@ or 10 concurrent requests. Consider to use endpoints that allow you to retrieve full pages of resources, instead of endpoints that force you to access resources one at a time. -See also the sections on [types of data](#different-kinds-of-data) and [pagination](#pagination). +See also the sections on [types of data](#different-kinds-of-data) and +[pagination](#pagination). However, pagination also has its limits and you might ask for pages that are too large. -If you get responses in the 400 range, e.g. **422 Unprocessable Content** or **400 Bad request**, -you might hit an api limit. Those responses are typically accompanied by an error message -in the response body that will inform you about the limit, e.g. the maximum allowed -page size. +If you get responses in the 400 range, e.g. **422 Unprocessable Content** or +**400 Bad request**, you might hit an api limit. Those responses are typically accompanied +by an error message in the response body that will inform you about the limit, e.g. the +maximum allowed page size. ## User Groups diff --git a/docs/reference/basesections.md b/docs/reference/basesections.md new file mode 100644 index 0000000000000000000000000000000000000000..3dbf454aedb90c5085b367aa6ccabf8e3924f69d --- /dev/null +++ b/docs/reference/basesections.md @@ -0,0 +1,7 @@ +# Base Sections + +The `nomad.datamodel.metainfo.basesections` Metainfo package contains a set of +_base sections_. They provides shared definitions across materials science domains and schemas. Some functionality, e.g. the workflow visualisation, depend on these definitions. Inherit from these base sections when you create your own schemas to +align your definitions with those of other schemas and to make use of respective functionality. + +{{ metainfo_package('nomad.datamodel.metainfo.basesections') }} diff --git a/examples/metainfo/data_frames.py b/examples/metainfo/data_frames.py new file mode 100644 index 0000000000000000000000000000000000000000..1760f38b5f30edc14bca3d71bd80b3acb6c32264 --- /dev/null +++ b/examples/metainfo/data_frames.py @@ -0,0 +1,146 @@ +import json +import numpy as np +from nomad.metainfo.data_frames import DataFrameTemplate, ValuesTemplate +from nomad.metainfo.metainfo import MSection, Package, Quantity, SubSection + + +m_package = Package() + +Energy = ValuesTemplate( + name='Energy', + type=np.float64, + shape=[], + unit='J', + iri='https://www.wikidata.org/wiki/Q11379', +) + +Temperature = ValuesTemplate( + name='Temperature', + type=np.float64, + shape=[], + unit='K', + iri='https://www.wikidata.org/wiki/Q11466', +) + +Pressure = ValuesTemplate( + name='Pressure', + type=np.float64, + shape=[], + unit='Pa', + iri='https://www.wikidata.org/wiki/Q39552', +) + +Count = ValuesTemplate( + name='Count', + type=np.int64, + shape=[], + unit='1', + iri='https://www.wikidata.org/wiki/Q1520033', +) + +BandGap = DataFrameTemplate( + name='BandGap', + mandatory_fields=[Energy], +) + +Dos = DataFrameTemplate( + name='Dos', + mandatory_fields=[Count], + mandatory_variables=[Energy], +) + +m_package.__init_metainfo__() + + +class MySection(MSection): + band_gaps = BandGap() + + +my_section = MySection() +my_section.band_gaps = BandGap.create() +my_section.band_gaps.fields = [Energy.create(1.0, 1.1)] +my_section.band_gaps.variables = [Temperature.create(200, 220)] + + +# If really necessary, you can specialize the template generated section class, +# but generally we would like to incentivise that users use the containing section +# to do this. +class MyBandGap(BandGap.section_cls): # type: ignore + type = Quantity(type=str) + + def normalize(self, archive, logger): + pass + + +class MySection(MSection): + band_gap = Energy() # Instantiate the Energy values template, creates a quantity + band_gaps = ( + BandGap() + ) # Instantiate the BandGap data frame template, creates a sub section + my_band_gaps = SubSection(section=MyBandGap) + dos = Dos() + + +# Value template instances (quantities) are used like quantities +my_section = MySection() +my_section.band_gap = 1.0 + +# Example of a "heatmap" scenario +my_section = MySection() +my_section.band_gaps = BandGap.create() +my_section.band_gaps.fields = [ + Energy.create(np.array([[1.0, 1.1], [1.3, 1.4], [1.6, 1.7]])) +] +my_section.band_gaps.variables = [ + Temperature.create(200, 220), + Pressure.create(1e5, 1.2e5, 1.4e5), +] + +# Example of a "scatter plot" scenario +my_section = MySection() +my_section.band_gaps = BandGap.create() +my_section.band_gaps.fields = [Energy.create(1.0, 1.1, 1.2)] +my_section.band_gaps.variables = [ + Temperature.create(200, 220, 240, spanned_dimensions=[0]), + Pressure.create(1e5, 1.2e5, 1.4e5, spanned_dimensions=[0]), +] + +# Explicitly spanned dimensions +my_section = MySection() +my_section.band_gaps = BandGap.create() +my_section.band_gaps.fields = [ + Energy.create(np.array([[1.0, 1.1], [1.3, 1.4], [1.6, 1.7]])) +] +my_section.band_gaps.variables = [ + Temperature.create(200, 220, spanned_dimensions=[0]), + Pressure.create(1e5, 1.2e5, 1.4e5, spanned_dimensions=[1]), +] + +# You can also reference values instead of setting them directly +my_section.dos = Dos.create( + fields=[Count.create(1, 2, 2, 4)], + variables=[ + Energy.create(1.0, 1.1), + Temperature.create(my_section.band_gaps.get_variable(Temperature)), + ], +) + +# If you have a specialized template section section its a normal sub section +# and the interface is a bit different +my_band_gaps = MyBandGap(type='foo') +my_section.band_gaps.fields = [Energy.create(1.0, 1.1)] +my_section.band_gaps.variables = [ + Temperature.create(200, 220, spanned_dimensions=[0]), + Pressure.create(1e5, 1.2e5, spanned_dimensions=[0]), +] + +# Access references values +print('###', my_section.dos.get_variable(Temperature).get_values()) + +# Run the constraints to validate field, variables, and dimensions +my_section.m_all_validate() + +print('---- schema ----') +print(json.dumps(m_package.m_to_dict(), indent=2)) +print('---- data ----') +print(json.dumps(my_section.m_to_dict(), indent=2)) diff --git a/gui/src/components/archive/ArchiveBrowser.js b/gui/src/components/archive/ArchiveBrowser.js index 6df3ebce8c7911c870a1769a7d963ec04a2575d6..97f5f9e3e74310db8d4fbb9b15af6aedeef994a0 100644 --- a/gui/src/components/archive/ArchiveBrowser.js +++ b/gui/src/components/archive/ArchiveBrowser.js @@ -33,7 +33,7 @@ import SaveIcon from '@material-ui/icons/Save' import { Alert } from '@material-ui/lab' import classNames from 'classnames' import DOMPurify from 'dompurify' -import { isArray, isNaN, partition, range } from 'lodash' +import { isArray, isNaN, isPlainObject, partition, range } from 'lodash' import { complex, format } from 'mathjs' import PropTypes from 'prop-types' import React, { useCallback, useContext, useEffect, useMemo, useRef, useState } from 'react' @@ -60,6 +60,7 @@ import { useErrors } from '../errors' import Markdown from '../Markdown' import { EntryButton } from '../nav/Routes' import { Quantity as Q } from '../units/Quantity' +import { Unit } from '../units/Unit' import { useDisplayUnit } from '../units/useDisplayUnit' import H5Web from '../visualization/H5Web' import Pagination from '../visualization/Pagination' @@ -673,11 +674,13 @@ class QuantityAdaptor extends ArchiveAdaptor { } render() { - if (quantityUsesFullStorage(this.def)) { - return <FullStorageQuantity value={this.obj} def={this.def}/> - } else { - return <Quantity value={this.obj} def={this.def}/> - } + return <Quantity value={this.obj} def={this.def}> + {this.obj?.m_attributes?.length > 0 && <Compartment title="attributes"> + {Object.keys(this.obj?.m_attributes).map(key => ( + <Item key={key} itemKey={key}>{key}</Item> + ))} + </Compartment>} + </Quantity> } } @@ -694,7 +697,7 @@ const convertComplexArray = (real, imag) => { } export function QuantityItemPreview({value, def}) { - const displayUnit = useDisplayUnit(def) + let {finalValue, displayUnit, storageUnit} = useQuantityData(value, def) if (isReference(def)) { return <Box component="span" fontStyle="italic"> @@ -720,14 +723,14 @@ export function QuantityItemPreview({value, def}) { const dimensions = [] let typeLabel = 'unknown' try { - let current = value.re || value.im || value + let current = finalValue.re || finalValue.im || finalValue for (let i = 0; i < def.shape.length; i++) { dimensions.push(current.length) current = current[0] } if (def.type.type_kind === 'python') { typeLabel = 'list' - } else if (typeof value === 'string') { + } else if (typeof finalValue === 'string') { typeLabel = 'HDF5 array' dimensions.length = 0 } else { @@ -752,17 +755,13 @@ export function QuantityItemPreview({value, def}) { </Typography> </Box> } else { - let finalValue if (def.type.type_data === 'nomad.metainfo.metainfo._Datetime' || def.type.type_data === 'nomad.metainfo.data_type.Datetime') { - finalValue = formatTimestamp(value) + finalValue = formatTimestamp(finalValue) } else if (def.type.type_data.startsWith?.('complex')) { - finalValue = convertComplexArray(value.re, value.im) - } else { - finalValue = value + finalValue = convertComplexArray(finalValue.re, finalValue.im) } - if (displayUnit) { - finalValue = new Q(finalValue, def.unit).to(displayUnit).value() + finalValue = new Q(finalValue, storageUnit).to(displayUnit).value() } return <Box component="span" whiteSpace="nowarp"> <Number component="span" variant="body1" value={finalValue} exp={8}/> @@ -776,12 +775,32 @@ QuantityItemPreview.propTypes = ({ def: PropTypes.object.isRequired }) +/** + * Hook for getting the final value and units for a quantity. Also supports + * quantities using full storage. + * + * @param {*} data Value of the quantity + * @param {*} def Defintion of the quantity + * @returns Object containing the final value, storage unit and display unit. + */ +function useQuantityData(data, def) { + let storageUnit = def.unit + let displayUnit = useDisplayUnit(def) + let finalValue = data + if (quantityUsesFullStorage(def) && isPlainObject(data)) { + displayUnit = data?.m_unit && new Unit(data.m_unit) + storageUnit = data?.m_original_unit + finalValue = data?.m_value + } + return {finalValue, displayUnit, storageUnit} +} + export const QuantityValue = React.memo(function QuantityValue({value, def}) { const {uploadId} = useEntryStore() || {} - const displayUnit = useDisplayUnit(def) + let {finalValue, displayUnit, storageUnit} = useQuantityData(value, def) - const getRenderValue = useCallback(value => { - let finalValue + const getRenderValue = useCallback((value) => { + let finalValue, finalUnit if (def.type.type_data === 'nomad.metainfo.metainfo._Datetime' || def.type.type_data === 'nomad.metainfo.data_type.Datetime') { finalValue = formatTimestamp(value) } else if (def.type.type_data.startsWith?.('complex')) { @@ -789,22 +808,24 @@ export const QuantityValue = React.memo(function QuantityValue({value, def}) { } else { finalValue = value } - let finalUnit - if (def.unit && typeof finalValue !== 'string') { - const systemUnitQ = new Q(finalValue, def.unit).to(displayUnit) + + if (typeof finalValue !== 'string' && storageUnit && displayUnit) { + const systemUnitQ = new Q(finalValue, storageUnit).to(displayUnit) finalValue = systemUnitQ.value() finalUnit = systemUnitQ.label() } + return [finalValue, finalUnit] - }, [def, displayUnit]) + }, [def, storageUnit, displayUnit]) const isMathValue = (def.type.type_kind === 'numpy' || def.type.type_kind === 'python') && typeof value !== 'string' if (isMathValue) { - const [finalValue, finalUnit] = getRenderValue(value) + const [renderValue, finalUnit] = getRenderValue(finalValue) if (def.shape.length > 0) { + console.log(renderValue) return <Box textAlign="center"> <Matrix - values={finalValue} + values={renderValue} shape={def.shape} invert={def.shape.length === 1} type={def.type.type_data} @@ -818,54 +839,53 @@ export const QuantityValue = React.memo(function QuantityValue({value, def}) { {finalUnit && <Typography noWrap>{finalUnit}</Typography>} </Box> } else { - return <Number value={finalValue} exp={16} variant="body1" unit={finalUnit}/> + return <Number value={renderValue} exp={16} variant="body1" unit={finalUnit}/> } } else if (def.m_annotations?.browser?.[0]?.render_value === 'HtmlValue' || def.m_annotations?.eln?.[0]?.component === 'RichTextEditQuantity') { - const html = DOMPurify.sanitize(value) + const html = DOMPurify.sanitize(finalValue) return <div dangerouslySetInnerHTML={{__html: html}}/> } else if (def.type?.type_data === 'nomad.metainfo.metainfo._JSON' || def.type?.type_data === 'nomad.metainfo.data_type.JSON') { return <ReactJson name="value" - src={value} + src={finalValue} enableClipboard={false} collapsed={2} displayObjectSize={false} /> } else { if (def.type.type_data.startsWith?.('complex')) { - value = convertComplexArray(value.re, value.im) + finalValue = convertComplexArray(finalValue.re, finalValue.im) - return Array.isArray(value) + return Array.isArray(finalValue) ? <ul style={{margin: 0}}> - {value.map((value, index) => <li key={index}><Typography>{value}</Typography></li>)} + {finalValue.map((value, index) => <li key={index}><Typography>{value}</Typography></li>)} </ul> - : <Typography>{value}</Typography> - } else if (Array.isArray(value)) { + : <Typography>{finalValue}</Typography> + } else if (Array.isArray(finalValue)) { return <ul style={{margin: 0}}> - {value.map((value, index) => { - const [finalValue] = getRenderValue(value) + {finalValue.map((value, index) => { + const [renderValue] = getRenderValue(value) return <li key={index}> - <Typography>{typeof finalValue === 'object' ? JSON.stringify(finalValue) : finalValue?.toString()}</Typography> + <Typography>{typeof renderValue === 'object' ? JSON.stringify(renderValue) : renderValue?.toString()}</Typography> </li> })} </ul> } else if (def.type?.type_data === 'nomad.datamodel.hdf5.HDF5Dataset' || def.type?.type_data === 'nomad.datamodel.hdf5.HDF5Reference') { - const {h5UploadId, h5File, h5Source, h5Path} = matchH5Path(value) + const {h5UploadId, h5File, h5Source, h5Path} = matchH5Path(finalValue) return <Compartment title='hdf5'> <H5Web upload_id={h5UploadId || uploadId} filename={h5File} initialPath={h5Path} source={h5Source} sidebarOpen={false}></H5Web> </Compartment> } else if (def?.type?.type_kind === 'custom' && def?.type?.type_data === 'nomad.datamodel.data.Query') { - return <Query value={value} def={def}/> + return <Query value={finalValue} def={def}/> } else { - const [finalValue] = getRenderValue(value) - return <Typography>{typeof finalValue === 'object' ? JSON.stringify(finalValue) : finalValue?.toString()}</Typography> + const [renderValue] = getRenderValue(finalValue) + return <Typography>{typeof renderValue === 'object' ? JSON.stringify(renderValue) : renderValue?.toString()}</Typography> } } }) QuantityValue.propTypes = ({ value: PropTypes.any, - def: PropTypes.object.isRequired, - unit: PropTypes.string + def: PropTypes.object.isRequired }) const InheritingSections = React.memo(function InheritingSections({def, section, lane}) { @@ -1072,7 +1092,7 @@ export function Section({section, def, property, parentRelation, sectionIsEditab const storage = section[quantityDef.name] || {} return <React.Fragment key={key}> {Object.keys(storage).map(quantityName => - renderQuantityItem(key, quantityName, quantityDef, storage[quantityName]?.m_value, disabled) + renderQuantityItem(key, quantityName, quantityDef, storage[quantityName], disabled) )} </React.Fragment> } else { @@ -1623,23 +1643,7 @@ SectionPlots.propTypes = { entryId: PropTypes.string } -function FullStorageQuantity({value, def}) { - const attributes = value.m_attributes || {} - return <Quantity value={value.m_value} def={def} unit={value.m_unit}> - {Object.keys(attributes).length > 0 && <Compartment title="attributes"> - {Object.keys(attributes).map(key => ( - <Item key={key} itemKey={key}>{key}</Item> - ))} - </Compartment>} - </Quantity> -} - -FullStorageQuantity.propTypes = ({ - value: PropTypes.any, - def: PropTypes.object.isRequired -}) - -function Quantity({value, def, unit, children}) { +function Quantity({value, def, children}) { const {prev} = useLane() return <Content> <ArchiveTitle def={def} data={value} kindLabel="value"/> @@ -1657,7 +1661,6 @@ function Quantity({value, def, unit, children}) { <QuantityValue value={value} def={def} - unit={unit} /> </Compartment> {children} diff --git a/gui/src/components/archive/Quantity.spec.js b/gui/src/components/archive/Quantity.spec.js index 0e269c998b643e6f17c9c64fd58951a7aa357f9f..f1ba5a24c2676b06e8b6b047e2089da57c278bdf 100644 --- a/gui/src/components/archive/Quantity.spec.js +++ b/gui/src/components/archive/Quantity.spec.js @@ -67,6 +67,18 @@ test.each([ undefined, 'mm', '3500Â mm' + ], + [ + 'full storage', + { + m_value: 3.5, + m_unit: 'm', + m_original_unit: 'm' + }, + undefined, + undefined, + 'm', + '3.50000Â m' ] ])('Test QuantityItemPreview %s', async (name, value, unit, displayUnit, elnUnit, expected) => { const def = { @@ -85,7 +97,7 @@ test.each([ render( <QuantityItemPreview - def={{name: 'value1', shape: [], type: {type_kind: 'python', type_data: 'float'}, ...def}} + def={{name: 'value1', shape: [], type: {type_kind: 'python', type_data: 'float'}, variable: !!value?.m_value, ...def}} value={value} /> ) @@ -182,6 +194,22 @@ describe("Test QuantityValue", () => { false, '(1)', 'mm' + ], + [ + 'full storage', + { + m_value: [3.5], + m_unit: 'm', + m_original_unit: 'm' + }, + [1], + undefined, + undefined, + undefined, + '3.50000', + false, + '(1)', + 'm' ] ])('%s', async (name, value, shape, unit, displayUnit, elnUnit, expectedValue, scientific, expectedDim, expectedUnit) => { const def = { @@ -201,7 +229,7 @@ describe("Test QuantityValue", () => { const screen = render( <QuantityValue - def={{name: 'value1', type: {type_kind: 'python', type_data: 'float'}, ...def}} + def={{name: 'value1', type: {type_kind: 'python', type_data: 'float'}, variable: !!value?.m_value, ...def}} value={value} /> ) diff --git a/gui/src/components/entry/properties/WorkflowCard.js b/gui/src/components/entry/properties/WorkflowCard.js index 2e8eb39577051491cb924da00216ea305d582fdf..0ca4c6a8275f954106a376e36536ae14d076fd69 100644 --- a/gui/src/components/entry/properties/WorkflowCard.js +++ b/gui/src/components/entry/properties/WorkflowCard.js @@ -22,7 +22,7 @@ import { makeStyles, Tooltip, IconButton, TextField, FormControl } from '@materi import { Replay, Undo, Label, LabelOff, PlayArrowSharp, StopSharp, Clear } from '@material-ui/icons' import { useHistory } from 'react-router-dom' import { isPlainObject } from 'lodash' -import { PropertyCard, PropertyGrid } from './PropertyCard' +import { PropertyCard } from './PropertyCard' import { resolveNomadUrl, resolveInternalRef, createEntryUrl } from '../../../utils' import { useApi } from '../../api' import { getUrl } from '../../nav/Routes' @@ -203,6 +203,15 @@ const getLinks = async (source, query) => { const isLinked = (source, target) => { if (source.url === target.url) return false + const inputs = [] + if (target.type === 'tasks' && target.nodes) { + inputs.push(...target.nodes.filter(node => node.type && node.type.startsWith('inputs')).map(node => node.url)) + } else { + inputs.push(target.url) + } + + if (inputs.includes(source.url)) return true + const outputs = [] if (source.type === 'tasks' && source.nodes) { outputs.push(...source.nodes.filter(node => node.type === 'outputs').map(node => node.url)) @@ -210,12 +219,7 @@ const getLinks = async (source, query) => { outputs.push(source.url) } - const inputs = [] - if (target.type === 'tasks' && target.nodes) { - inputs.push(...target.nodes.filter(node => node.type && node.type.startsWith('inputs')).map(node => node.url)) - } else { - inputs.push(target.url) - } + if (outputs.includes(target.url)) return true let linked = false for (const output of outputs) { @@ -287,9 +291,10 @@ const Graph = React.memo(({ const svgRef = useRef() const history = useHistory() const asyncError = useAsyncError() + const width = document.getElementById('container')?.clientWidth || 675 const finalLayout = useMemo(() => { const defaultLayout = { - width: 700, + width: width, margin: {top: 60, bottom: 60, left: 40, right: 40}, circleRadius: 30, markerWidth: 4, @@ -313,7 +318,7 @@ const Graph = React.memo(({ } } return {...defaultLayout, ...layout} - }, [layout]) + }, [layout, width]) const [tooltipContent, setTooltipContent] = useState('') const [tooltipPosition, setTooltipPosition] = useState({x: undefined, y: undefined}) const [showTooltip, setShowTooltip] = useState(false) @@ -328,6 +333,7 @@ const Graph = React.memo(({ const svg = d3.select(svgRef.current) svg.selectAll('g').remove() + svg.selectAll('defs').remove() const inOutColor = d3.interpolateRgb(color.input, color.output)(0.5) @@ -351,6 +357,11 @@ const Graph = React.memo(({ return tasks.length > 0 } + const isRoot = (d) => { + const rootSections = ['data'] + return rootSections.includes(d.sectionType) + } + const nodeColor = (d) => { if (d.color) return d.color if (d.type === 'link') return '#ffffff' @@ -434,29 +445,21 @@ const Graph = React.memo(({ .style('alignment-baseline', 'middle') gLegend - .on('mouseover', () => { + .on('mouseenter', () => { let tooltip = '' if (label === 'input') { - tooltip = <p> - Input to a task or workflow. - </p> + tooltip = 'Input to a task or workflow.' } else if (label === 'output') { - tooltip = <p> - Output from a task or workflow. - </p> + tooltip = 'Output from a task or workflow.' } else if (label === 'workflow') { - tooltip = <p> - Task containing further sub-tasks. - </p> + tooltip = 'Task containing further sub-tasks.' } else if (label === 'task') { - tooltip = <p> - Elementary task with inputs and outputs. - </p> + tooltip = 'Elementary task with inputs and outputs.' } setShowTooltip(true && legend.attr('visibility') === 'visible') setTooltipContent(tooltip) }) - .on('mouseout', () => { + .on('mouseleave', () => { setShowTooltip(false) }) } @@ -887,15 +890,15 @@ const Graph = React.memo(({ d3.select(`#icon-${d.id}`).style('stroke-opacity', 1).style('stroke', color.outlineHigh) if (d.id === source.id) { if (!previousNode || previousNode === 'root') return - // setShowTooltip(true) - setTooltipContent(<p>Click to go back up</p>) + setShowTooltip(true) + setTooltipContent('Click to go back up') } else if (['inputs', 'outputs'].includes(d.type)) { setShowTooltip(true) - setTooltipContent(<p>Click to switch {d.type} filter</p>) + setTooltipContent(`Click to switch ${d.type} filter`) } else if (d.type === 'tasks') { const sectionType = d.sectionType === 'tasks' ? 'task' : 'workflow' setShowTooltip(true) - setTooltipContent(<p>Click to expand {sectionType}</p>) + setTooltipContent(`Click to expand ${sectionType}`) } } @@ -961,8 +964,8 @@ const Graph = React.memo(({ if (d.id === source.id) return 0.2 return 1 }) - .on('mouseover', handleMouseOverIcon) - .on('mouseout', handleMouseOutIcon) + .on('mouseenter', handleMouseOverIcon) + .on('mouseleave', handleMouseOutIcon) .on('click', handleClickIcon) node @@ -972,8 +975,8 @@ const Graph = React.memo(({ .attr('id', d => `icon-${d.id}`) .attr('stroke', color.outline) .attr('fill', d => nodeColor(d)) - .on('mouseover', handleMouseOverIcon) - .on('mouseout', handleMouseOutIcon) + .on('mouseenter', handleMouseOverIcon) + .on('mouseleave', handleMouseOutIcon) .on('click', handleClickIcon) node.append('text') @@ -988,22 +991,22 @@ const Graph = React.memo(({ if (!d.entryId || !d.parent) return let path = `entry/id/${d.entryId}` const sectionPath = d.path ? d.path.replace(/\/(?=\d)/g, ':') : null - path = isWorkflow(d) ? path : sectionPath ? `${path}/data${sectionPath}` : path + path = isWorkflow(d) || isRoot(d) ? path : sectionPath ? `${path}/data${sectionPath}` : path const url = getUrl(path) history.push(url) }) - .on('mouseover', d => { + .on('mouseenter', d => { if (!d.type || !d.parent) return if (!d.sectionType) return d3.select(`#text-${d.id}`).style('font-weight', 'bold') .text(d.name) - const text = isWorkflow(d) ? 'overview page' : 'archive section' + const text = isWorkflow(d) || isRoot(d) ? 'overview page' : 'archive section' if (d.entryId) { setShowTooltip(true) - setTooltipContent(<p>Click to go to {text}</p>) + setTooltipContent(`Click to go to ${text}`) } }) - .on('mouseout', d => { + .on('mouseleave', d => { setShowTooltip(false) d3.select(`#text-${d.id}`).style('font-weight', null) .text(d => trimName(d.name)) @@ -1142,15 +1145,15 @@ const Graph = React.memo(({ setNodesPosition(linkNode) }) }) - .on('mouseover', d => { + .on('mouseenter', d => { d3.select(`#link-${d.id}`).style('stroke-opacity', 1.0).style('stroke', color.linkHigh) svg.select(`.marker-${d.id}`).attr('fill-opacity', 1.0).style('stroke', color.linkHigh).style('fill', color.linkHigh) d3.select(`#icon-${d.source.id}`).style('stroke', color.linkHigh).style('stroke-opacity', 1.0) d3.select(`#icon-${d.target.id}`).style('stroke', color.linkHigh).style('stroke-opacity', 1.0) setShowTooltip(d.label) - setTooltipContent(<p>{d.label}</p>) + setTooltipContent(d.label) }) - .on('mouseout', d => { + .on('mouseleave', d => { d3.select(`#link-${d.id}`).style('stroke-opacity', 0.5).style('stroke', color.link) svg.select(`.marker-${d.id}`).attr('fill-opacity', 0.5).style('stroke', color.link).style('fill', color.link) d3.select(`#icon-${d.source.id}`).style('stroke', color.outline).style('stroke-opacity', 0.5) @@ -1194,8 +1197,6 @@ const Graph = React.memo(({ <Tooltip title={tooltipContent} open={showTooltip} - enterDelay={1000} - enterNextDelay={0} onMouseMove={event => setTooltipPosition({x: event.pageX, y: event.pageY})} PopperProps={ {anchorEl: { @@ -1212,7 +1213,7 @@ const Graph = React.memo(({ }} } > - <div id='tooltip'> + <div> <svg className={classes.root} ref={svgRef}></svg> </div> </Tooltip> @@ -1316,9 +1317,9 @@ const WorkflowCard = React.memo(({archive}) => { </div> return graph && <PropertyCard title='Workflow Graph' action={actions}> - <PropertyGrid> + <div id='container'> {graph} - </PropertyGrid> + </div> </PropertyCard> }) diff --git a/gui/src/components/search/Filter.js b/gui/src/components/search/Filter.js index dd4d4782e05af72ae5b0b76f271b03d0c6f2ef3e..a7f863ea7c689a5c808bbf9c83907c4b6737fa0f 100644 --- a/gui/src/components/search/Filter.js +++ b/gui/src/components/search/Filter.js @@ -144,9 +144,7 @@ export class Filter { this.parent = parent this.group = params.group this.placeholder = params?.placeholder - this.multiple = params?.multiple === undefined - ? multiTypes.has(this.dtype) - : params?.multiple + this.multiple = params?.multiple ?? multiTypes.has(this.dtype) this.exclusive = params?.exclusive === undefined ? true : params?.exclusive this.queryMode = params?.queryMode || (this.multiple ? 'any' : undefined) this.options = params?.options || getEnumOptions(this.quantity) diff --git a/gui/src/components/search/FilterRegistry.js b/gui/src/components/search/FilterRegistry.js index 4d91c63e73f5a763f4b272f9d82bb300b4d34e4f..cfe9f46e9a459985458cdb846340574e8a890db0 100644 --- a/gui/src/components/search/FilterRegistry.js +++ b/gui/src/components/search/FilterRegistry.js @@ -143,7 +143,7 @@ const termQuantityAll = {aggs: {terms: {size: 5}}, exclusive: false, multiple: t const termQuantityAllNonExclusive = {...termQuantityNonExclusive, queryMode: 'all'} const noAggQuantity = {} const nestedQuantity = {} -const noQueryQuantity = {multiple: false, global: true} +const noQueryQuantity = {global: true} const numberHistogramQuantity = {multiple: false, exclusive: false} // Filters that directly correspond to a metainfo value @@ -587,6 +587,7 @@ registerFilter( 'combine', { ...noQueryQuantity, + dtype: DType.Boolean, default: true, description: 'If selected, your filters may be matched from several entries that contain the same material. When unchecked, the material has to have a single entry that matches all your filters.' } diff --git a/gui/src/components/search/input/InputTerms.js b/gui/src/components/search/input/InputTerms.js index 690e04920be11881fb12e014e0342df7c5e404be..e71af1169764c90f0f993f44a9ed4684627c24cc 100644 --- a/gui/src/components/search/input/InputTerms.js +++ b/gui/src/components/search/input/InputTerms.js @@ -28,10 +28,24 @@ import InputUnavailable from './InputUnavailable' import Placeholder from '../../visualization/Placeholder' import { useSearchContext } from '../SearchContext' import { isNil, isNumber } from 'lodash' +import { DType } from '../../../utils' import Pagination from '../../visualization/Pagination' import { guiState } from '../../GUIMenu' import { InputTextQuantity } from './InputText' +/** + * Converts a string value to its appropriate type based on the provided dtype. + * Needed for converting booleans used as object keys (object keys are always + * strings). + * + * @param {string} value - The value to be converted. + * @param {DType} dtype - The data type to convert the value to. + * @returns {boolean|string} - The converted value. + */ +function getFinalKey(value, dtype) { + return dtype === DType.Boolean ? value === 'true' : value +} + /** * Generic input component that can be configured to fit several use cases. The * most typical configufations are: @@ -152,6 +166,7 @@ const InputTerms = React.memo(({ // results or change in the available options. useEffect(() => { let options = Object.entries(finalOptions).reduce((opt, [key, value]) => { + key = getFinalKey(key, filterData[searchQuantity]?.dtype) const selected = filter?.has(key) || false opt[key] = { checked: selected, @@ -184,7 +199,7 @@ const InputTerms = React.memo(({ } setVisibleOptions(options) - }, [agg?.data, filter, finalOptions, fixedOptions, isStatisticsEnabled, showStatistics, sortStatic]) + }, [agg?.data, filter, filterData, finalOptions, fixedOptions, isStatisticsEnabled, searchQuantity, showStatistics, sortStatic]) // Show more values const handleShowMore = useCallback(() => { @@ -214,9 +229,9 @@ const InputTerms = React.memo(({ newOptions[key].checked = selected const checked = Object.entries(newOptions) .filter(([key, value]) => value.checked) - .map(([key, value]) => key) + .map(([key, value]) => getFinalKey(key, filterData[searchQuantity]?.dtype)) setFilter(new Set(checked)) - }, [setFilter, visibleOptions]) + }, [setFilter, visibleOptions, filterData, searchQuantity]) // Create the search component const searchComponent = useMemo(() => { diff --git a/gui/src/components/units/useDisplayUnit.js b/gui/src/components/units/useDisplayUnit.js index 8036688340a99776072b817e7fa198ce3600327b..52d5937f15e7a4b1a768df694e3d16477bc74ed8 100644 --- a/gui/src/components/units/useDisplayUnit.js +++ b/gui/src/components/units/useDisplayUnit.js @@ -4,6 +4,12 @@ import {Unit} from "./Unit" import {useUnitContext} from "./UnitContext" import {getFieldProps} from "../editQuantity/StringEditQuantity" +/** + * Used to retrieve the unit to use for displaying a quantity. + * + * @param {*} quantityDef Definition for the quantity + * @returns {Unit} The unit to use for displaying the quantity. + */ export function useDisplayUnit(quantityDef) { const {units} = useUnitContext() const {raiseError} = useErrors() diff --git a/gui/src/components/uploads/UploadPage.spec.js b/gui/src/components/uploads/UploadPage.spec.js index dcba24b1698988e8d38b65e5600b49a39e64c7be..8cb86d5573293210bd7bed710a3e2a6b1106f0bf 100644 --- a/gui/src/components/uploads/UploadPage.spec.js +++ b/gui/src/components/uploads/UploadPage.spec.js @@ -516,7 +516,7 @@ test('Toggle visible for all checkbox; check embargo, icon', async () => { await user.click(await screen.findByRole('option', { name: '36' })) expect(embargoButton).toHaveTextContent('36') expect(embargoHelper).toHaveTextContent('months before the data becomes public') - expect(screen.getByTooltip('Unpublished, accessible by you, coauthors and reviewers')).toBeInTheDocument() + expect(screen.getByTooltip('Unpublished, accessible only by you')).toBeInTheDocument() await testAndToggleCheckbox(false) expect(embargoButton).toHaveAttribute('aria-disabled', 'true') @@ -528,7 +528,7 @@ test('Toggle visible for all checkbox; check embargo, icon', async () => { expect(embargoButton).not.toHaveAttribute('aria-disabled', 'true') expect(embargoButton).toHaveTextContent('No embargo') expect(embargoHelper).toHaveTextContent('publish without embargo') - expect(screen.getByTooltip('Unpublished, accessible by you, coauthors and reviewers')).toBeInTheDocument() + expect(screen.getByTooltip('Unpublished, accessible only by you')).toBeInTheDocument() await testAndToggleCheckbox(false, { skipToggle: true }) }) diff --git a/gui/src/components/uploads/UploadStatusIcon.js b/gui/src/components/uploads/UploadStatusIcon.js index 88f106402ddee6caafa42db076d0ec0b489af260..b03db43d641ff3e528ad0e8160f96035245992f4 100644 --- a/gui/src/components/uploads/UploadStatusIcon.js +++ b/gui/src/components/uploads/UploadStatusIcon.js @@ -70,7 +70,7 @@ const UploadStatusIcon = React.memo(({data, user, ...props}) => { if (isVisibleForAll) { tooltip = "Unpublished but accessible by everyone" } else if (isMainAuthor) { - tooltip = "Unpublished, accessible by you, coauthors and reviewers" + tooltip = "Unpublished, accessible only by you" } else if (isCoauthor) { tooltip = "Unpublished, accessible by you as a coauthor" } else if (isReviewer) { diff --git a/gui/src/components/uploads/UploadStatusIcon.spec.js b/gui/src/components/uploads/UploadStatusIcon.spec.js index 4220c96dd1fe9084ed8657e593d794104d862d2c..6ee021c2ae2a4b23a517d0af85f145a34047534e 100644 --- a/gui/src/components/uploads/UploadStatusIcon.spec.js +++ b/gui/src/components/uploads/UploadStatusIcon.spec.js @@ -30,7 +30,7 @@ describe('test different states', function() { ['published, embargo, viewer', 'Published with embargo and accessible by you as a reviewer', {published: true, with_embargo: true, main_author: 'a', viewers: [{user_id: 'b'}]}, {sub: 'b'}], ['published, embargo, external', 'Published with embargo and not accessible by you', {published: true, with_embargo: true, main_author: 'a', viewers: [{user_id: 'b'}]}, {sub: 'c'}], ['published, embargo, no user data', 'Published with embargo and might become accessible after login', {published: true, with_embargo: true, main_author: 'a', viewers: [{user_id: 'b'}]}, undefined], - ['unpublished, main author', 'Unpublished, accessible by you, coauthors and reviewers', {published: false, main_author: 'a'}, {sub: 'a'}], + ['unpublished, main author', 'Unpublished, accessible only by you', {published: false, main_author: 'a'}, {sub: 'a'}], ['unpublished, coauthor', 'Unpublished, accessible by you as a coauthor', {published: false, main_author: 'a', coauthors: ['b']}, {sub: 'b'}], ['unpublished, author', 'Unpublished, accessible by you as a coauthor', {published: false, main_author: 'a', authors: [{user_id: 'b'}]}, {sub: 'b'}], ['unpublished, reviewer', 'Unpublished, accessible by you as a reviewer', {published: false, main_author: 'a', reviewers: ['b']}, {sub: 'b'}], diff --git a/gui/src/components/uploads/UploadsPage.spec.js b/gui/src/components/uploads/UploadsPage.spec.js index b774b88a0a1f504394484cca960f75c50e63d945..7f26706834cf8468505380ae9501f4f3f1ac82fa 100644 --- a/gui/src/components/uploads/UploadsPage.spec.js +++ b/gui/src/components/uploads/UploadsPage.spec.js @@ -51,7 +51,7 @@ test('Render uploads page: sort by upload create time', async () => { expect(within(rows[i]).queryByText(`dft_upload_${11 - i}`)).toBeInTheDocument() expect(within(rows[i]).queryByTitle(((i + 1) % 2 === 0 ? 'Published and accessible by everyone' - : 'Unpublished, accessible by you, coauthors and reviewers' + : 'Unpublished, accessible only by you' ))).toBeInTheDocument() } @@ -77,7 +77,7 @@ test('Render uploads page: sort by upload create time', async () => { expect(within(rows[i]).queryByText(`dft_upload_${i + 1}`)).toBeInTheDocument() expect(within(rows[i]).queryByTitle(((i + 1) % 2 === 0 ? 'Published and accessible by everyone' - : 'Unpublished, accessible by you, coauthors and reviewers' + : 'Unpublished, accessible only by you' ))).toBeInTheDocument() } diff --git a/mkdocs.yml b/mkdocs.yml index 4fa2bc73058f0316339b91ae8a986de7f7aa2428..bbb8a6363b34e159fe26e9f29244577404279f8b 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -87,6 +87,7 @@ nav: - Reference: - reference/config.md - reference/annotations.md + - reference/basesections.md - reference/cli.md - reference/plugins.md - reference/parsers.md @@ -140,7 +141,7 @@ use_directory_urls: false plugins: - search - macros: - module_name: nomad/mkdocs + module_name: nomad/mkdocs/__init__ - redirects: redirect_maps: "pythonlib.md": "howto/programmatic/pythonlib.md" diff --git a/nomad/app/dcat/common.py b/nomad/app/dcat/common.py index 9a91b70c9feb5bc6c6314f217c015c26852d4337..90e6fc4876220a103f264a83c77806d0d34e9391 100644 --- a/nomad/app/dcat/common.py +++ b/nomad/app/dcat/common.py @@ -16,14 +16,13 @@ # limitations under the License. # -from typing import Optional -from fastapi import Response, Query, Header import urllib.parse -from rdflib import Graph from enum import Enum -from nomad.config import config +from fastapi import Header, Query, Response +from rdflib import Graph +from nomad.config import config root_path = f'{config.services.api_base_path}/dcat' base_url = config.api_url(api='dcat') diff --git a/nomad/app/dcat/main.py b/nomad/app/dcat/main.py index 49d4d68d50fcfb197fb6d2afb15718fcf9414ce4..0fbfa0d4f6d193a255273a904c3afdca47f25769 100644 --- a/nomad/app/dcat/main.py +++ b/nomad/app/dcat/main.py @@ -16,10 +16,11 @@ # limitations under the License. # -from fastapi import FastAPI, status, Request +import traceback + +from fastapi import FastAPI, Request, status from fastapi.middleware.cors import CORSMiddleware from fastapi.responses import JSONResponse, RedirectResponse -import traceback from nomad import utils from nomad.config import config @@ -27,7 +28,6 @@ from nomad.config import config from .common import root_path from .routers import dcat - logger = utils.get_logger(__name__) diff --git a/nomad/app/dcat/mapping.py b/nomad/app/dcat/mapping.py index d22399a7628fba69adf3d0309b1b1cf1f63072dc..c15ec0a87693b66ae4be9c291f64ff839517dc42 100644 --- a/nomad/app/dcat/mapping.py +++ b/nomad/app/dcat/mapping.py @@ -16,15 +16,15 @@ # limitations under the License. # -from rdflib import Graph, Literal, RDF, URIRef, BNode -from rdflib.namespace import Namespace, DCAT, DCTERMS as DCT, FOAF, RDF +from rdflib import RDF, BNode, Graph, Literal, URIRef +from rdflib.namespace import DCAT, FOAF, RDF, Namespace +from rdflib.namespace import DCTERMS as DCT from nomad.config import config from nomad.datamodel import User from .common import url - VCARD = Namespace('http://www.w3.org/2006/vcard/ns#') HYDRA = Namespace('http://www.w3.org/ns/hydra/core#') diff --git a/nomad/app/dcat/routers/dcat.py b/nomad/app/dcat/routers/dcat.py index 7dc52a01be1fa3dea138428aebff2653e6552b61..0aa5b6b6ece36af188994b1a6ac63f91d0b5ffb6 100644 --- a/nomad/app/dcat/routers/dcat.py +++ b/nomad/app/dcat/routers/dcat.py @@ -16,22 +16,27 @@ # limitations under the License. # -from typing import Union -from fastapi import APIRouter, Query, Path, HTTPException, status, Depends -from datetime import datetime, date +from datetime import date, datetime +from enum import Enum + from elasticsearch_dsl import Q +from fastapi import APIRouter, Depends, HTTPException, Path, Query, status from nomad import utils -from nomad.utils import strip -from nomad.search import search -from nomad.app.v1.models import MetadataPagination, HTTPExceptionModel +from nomad.app.v1.models import HTTPExceptionModel, MetadataPagination from nomad.app.v1.utils import create_responses +from nomad.search import search +from nomad.utils import strip from ..common import rdf_response from ..mapping import Mapping router = APIRouter() -default_tag = 'dcat' + + +class APITag(str, Enum): + DEFAULT = 'dcat' + logger = utils.get_logger(__name__) @@ -58,7 +63,7 @@ _raw_response = ( @router.get( '/datasets/{entry_id}', - tags=[default_tag], + tags=[APITag.DEFAULT], summary='Returns a DCAT dataset for a given NOMAD entry id.', responses=create_responses(_bad_id_response, _raw_response), ) @@ -83,7 +88,7 @@ async def get_dataset( @router.get( '/catalog/', - tags=[default_tag], + tags=[APITag.DEFAULT], summary='Returns a DCAT dataset for a given NOMAD entry id.', responses=create_responses(_raw_response), ) diff --git a/nomad/app/h5grove_app.py b/nomad/app/h5grove_app.py index 110eb589a7cdac8d0c436fb84d5b4ddbae3c9896..8e01225ff41914e31848e83f5e3d250ef4300f13 100644 --- a/nomad/app/h5grove_app.py +++ b/nomad/app/h5grove_app.py @@ -17,23 +17,24 @@ # from __future__ import annotations -from fastapi import FastAPI, status, Request, Depends -from fastapi.middleware.cors import CORSMiddleware -from fastapi.responses import JSONResponse -import traceback import re +import traceback import urllib.parse -import h5py -from typing import Dict, Any, IO from collections.abc import Callable +from typing import IO, Any -from h5grove import fastapi_utils as h5grove_router, utils as h5grove_utils +import h5py +from fastapi import Depends, FastAPI, Request, status +from fastapi.middleware.cors import CORSMiddleware +from fastapi.responses import JSONResponse +from h5grove import fastapi_utils as h5grove_router +from h5grove import utils as h5grove_utils from nomad import utils -from nomad.files import UploadFiles, PublicUploadFiles from nomad.app.v1.models import User from nomad.app.v1.routers.auth import create_user_dependency from nomad.app.v1.routers.uploads import get_upload_with_read_access +from nomad.files import PublicUploadFiles, UploadFiles logger = utils.get_logger(__name__) @@ -43,8 +44,9 @@ def open_zipped_h5_file( create_error: Callable[[int, str], Exception], h5py_options: dict[str, Any] = {}, ) -> h5py.File: - import re import io + import re + from nomad import files """ diff --git a/nomad/app/main.py b/nomad/app/main.py index 2168e5fd965f7e7bd939d5779e7f707dd2cf7c6f..29829f997ace12d293728e89518a0cb1c668850b 100644 --- a/nomad/app/main.py +++ b/nomad/app/main.py @@ -23,16 +23,17 @@ from fastapi import FastAPI, Response, status from fastapi.exception_handlers import ( http_exception_handler as default_http_exception_handler, ) -from starlette.exceptions import HTTPException as StarletteHTTPException from fastapi.responses import HTMLResponse, JSONResponse +from starlette.exceptions import HTTPException as StarletteHTTPException from starlette.middleware.base import BaseHTTPMiddleware from nomad import infrastructure from nomad.config import config from nomad.config.models.plugins import APIEntryPoint +from .static import GuiFiles +from .static import app as static_files_app from .v1.main import app as v1_app -from .static import app as static_files_app, GuiFiles class OasisAuthenticationMiddleware(BaseHTTPMiddleware): @@ -164,11 +165,10 @@ async def http_exception_handler(request, exc): @app.on_event('startup') async def startup_event(): - from nomad.cli.dev import get_gui_artifacts_js - from nomad.cli.dev import get_gui_config - from nomad.parsing.parsers import import_all_parsers from nomad import infrastructure + from nomad.cli.dev import get_gui_artifacts_js, get_gui_config from nomad.metainfo.elasticsearch_extension import entry_type + from nomad.parsing.parsers import import_all_parsers import_all_parsers() diff --git a/nomad/app/optimade/common.py b/nomad/app/optimade/common.py index 623cd3847cdf3cb2ebe36363bef0ba18ed62841b..45824ef06bce9789286da9887d30f52151adbc1d 100644 --- a/nomad/app/optimade/common.py +++ b/nomad/app/optimade/common.py @@ -16,12 +16,11 @@ # limitations under the License. # -from typing import Dict, cast +from typing import cast from nomad.metainfo.data_type import Datatype, to_optimade_type -from nomad.metainfo.metainfo import Quantity, Reference from nomad.metainfo.elasticsearch_extension import SearchQuantity, entry_type - +from nomad.metainfo.metainfo import Quantity, Reference _provider_specific_fields: dict[str, SearchQuantity] = None diff --git a/nomad/app/optimade/elasticsearch.py b/nomad/app/optimade/elasticsearch.py index 910306b4199e4808c0f3d4268f8ab3deb4721c42..d966f7d7ccbba75f9e8c7b8c47d14695b43494a0 100644 --- a/nomad/app/optimade/elasticsearch.py +++ b/nomad/app/optimade/elasticsearch.py @@ -1,23 +1,23 @@ -from typing import List, Dict, Set, Any -from elasticsearch_dsl import Q +from typing import Any +from elasticsearch_dsl import Q from optimade.filterparser import LarkParser +from optimade.models import StructureResource from optimade.server.entry_collections import EntryCollection from optimade.server.exceptions import BadRequest from optimade.server.mappers import StructureMapper from optimade.server.mappers.entries import classproperty -from optimade.models import StructureResource -from nomad.units import ureg -from nomad.atomutils import Formula -from nomad.search import search +from nomad import datamodel, files, utils from nomad.app.v1.models import MetadataPagination, MetadataRequired +from nomad.atomutils import Formula from nomad.config import config -from nomad import datamodel, files, utils +from nomad.search import search +from nomad.units import ureg -from .filterparser import _get_transformer as get_transformer -from .common import provider_specific_fields from ...archive import to_json +from .common import provider_specific_fields +from .filterparser import _get_transformer as get_transformer logger = utils.get_logger(__name__) diff --git a/nomad/app/optimade/filterparser.py b/nomad/app/optimade/filterparser.py index c24b6c540071f11b06f95e60e5aa484a0e5807c5..11caebaf9e9625d820b365292fc1b8ddaeb1a448 100644 --- a/nomad/app/optimade/filterparser.py +++ b/nomad/app/optimade/filterparser.py @@ -16,19 +16,16 @@ # limitations under the License. # -from typing import Dict -from elasticsearch_dsl import Q from cachetools import cached - +from elasticsearch_dsl import Q from optimade.filterparser import LarkParser +from optimade.filtertransformers.elasticsearch import ElasticsearchQuantity as Quantity from optimade.filtertransformers.elasticsearch import ( - ElasticsearchQuantity as Quantity, ElasticTransformer as OPTElasticTransformer, ) from .common import provider_specific_fields - _parser = LarkParser(version=(1, 0, 1)) diff --git a/nomad/app/resources/common.py b/nomad/app/resources/common.py index f0cedfb72be356e1a06565929ef2d59554392ec2..96a0e9c977d4e6b25fb515c2b45d029f5563b760 100644 --- a/nomad/app/resources/common.py +++ b/nomad/app/resources/common.py @@ -18,6 +18,5 @@ from nomad.config import config - root_path = f'{config.services.api_base_path}/resources' base_url = config.api_url(api='resources') diff --git a/nomad/app/resources/main.py b/nomad/app/resources/main.py index e5c4b6992de2ffdd2ea96610a917f511023567f9..ab6e17d11d972638ef073fb55525c9156328d475 100644 --- a/nomad/app/resources/main.py +++ b/nomad/app/resources/main.py @@ -16,18 +16,18 @@ # limitations under the License. # -from fastapi import FastAPI, status, Request -from fastapi.middleware.cors import CORSMiddleware -from fastapi.responses import JSONResponse import traceback + from celery.signals import worker_process_init +from fastapi import FastAPI, Request, status +from fastapi.middleware.cors import CORSMiddleware +from fastapi.responses import JSONResponse from nomad import utils from nomad.config import config from .routers import resources - logger = utils.get_logger(__name__) mongo_client_resources = None diff --git a/nomad/app/resources/routers/resources.py b/nomad/app/resources/routers/resources.py index 878d3a3ea335a78e2c7d92bcab4566c9a0d7aaf6..45f4f9559bd3e8a20bc373aca798d13b4d6eb6d4 100644 --- a/nomad/app/resources/routers/resources.py +++ b/nomad/app/resources/routers/resources.py @@ -16,39 +16,44 @@ # limitations under the License. # -import re -import os -import io -import bs4 import asyncio -import httpx -from fastapi import APIRouter, Query as FastApiQuery -from pydantic import BaseModel, Field -from typing import List, Any, Dict, Optional +import io +import os +import re from datetime import datetime +from enum import Enum +from typing import Any + import ase.io +import bs4 +import httpx +from asgiref.sync import async_to_sync +from fastapi import APIRouter +from fastapi import Query as FastApiQuery from mongoengine import ( - Document, - StringField, + BooleanField, DateTimeField, + Document, IntField, ListField, - BooleanField, + StringField, ) from mongoengine.queryset.visitor import Q -from asgiref.sync import async_to_sync +from pydantic import BaseModel, Field from nomad import utils -from nomad.config import config from nomad.atomutils import Formula +from nomad.config import config from nomad.processing.base import app - logger = utils.get_logger(__name__) router = APIRouter() -default_tag = 'resources' + +class APITag(str, Enum): + DEFAULT = 'resources' + # TODO generate list from optimade api optimade_providers = { @@ -664,7 +669,7 @@ def retrieve_resources( @router.get( '/', - tags=[default_tag], + tags=[APITag.DEFAULT], summary='Get a list of external resources.', response_model=ResourcesModel, response_model_exclude_unset=True, diff --git a/nomad/app/v1/main.py b/nomad/app/v1/main.py index 1d1e00590cc633cd8bb0a5c9611dfc4c100764e3..af4def92478e5e50ecd01f38c3e3fc652838063c 100644 --- a/nomad/app/v1/main.py +++ b/nomad/app/v1/main.py @@ -18,9 +18,9 @@ import traceback -from fastapi import FastAPI, status, Request +from fastapi import FastAPI, Request, status from fastapi.middleware.cors import CORSMiddleware -from fastapi.responses import JSONResponse, RedirectResponse, ORJSONResponse +from fastapi.responses import JSONResponse, ORJSONResponse, RedirectResponse from pyinstrument import Profiler from starlette.middleware import Middleware from starlette.middleware.base import BaseHTTPMiddleware @@ -29,22 +29,23 @@ from starlette.types import ASGIApp, Receive, Scope, Send from nomad import utils from nomad.config import config + from .common import root_path from .routers import ( - users, - entries, - materials, auth, - info, datasets, - uploads, - suggestions, - metainfo, - north, - systems, + entries, federation, graph, groups, + info, + materials, + metainfo, + north, + suggestions, + systems, + uploads, + users, ) logger = utils.get_logger(__name__) @@ -123,18 +124,18 @@ async def unicorn_exception_handler(request: Request, e: Exception): ) -app.include_router(info.router, prefix='/info') app.include_router(auth.router, prefix='/auth') +app.include_router(datasets.router, prefix='/datasets') +app.include_router(entries.router, prefix='/entries') app.include_router(federation.router, prefix='/federation') +app.include_router(graph.router, prefix='/graph') +app.include_router(groups.router, prefix='/groups') +app.include_router(info.router, prefix='/info') app.include_router(materials.router, prefix='/materials') -app.include_router(entries.router, prefix='/entries') -app.include_router(datasets.router, prefix='/datasets') -app.include_router(uploads.router, prefix='/uploads') app.include_router(metainfo.router, prefix='/metainfo') -app.include_router(users.router, prefix='/users') -app.include_router(suggestions.router, prefix='/suggestions') if config.north.enabled: app.include_router(north.router, prefix='/north') +app.include_router(suggestions.router, prefix='/suggestions') app.include_router(systems.router, prefix='/systems') -app.include_router(graph.router, prefix='/graph') -app.include_router(groups.router, prefix='/groups') +app.include_router(uploads.router, prefix='/uploads') +app.include_router(users.router, prefix='/users') diff --git a/nomad/app/v1/models/graph/graph_models.py b/nomad/app/v1/models/graph/graph_models.py index acb7b59ffc1fb17c93a989a2ef918fa21a714f10..dff9d7d42bdf3fc109d4d23e1a32c92613bccf00 100644 --- a/nomad/app/v1/models/graph/graph_models.py +++ b/nomad/app/v1/models/graph/graph_models.py @@ -17,35 +17,37 @@ # from __future__ import annotations -from typing import Optional, List, Union, Any, Literal -from pydantic import BaseModel, ConfigDict, Field, Extra -from ..groups import UserGroup, UserGroupPagination, UserGroupQuery +from typing import Any, Literal -from nomad.graph.model import ( - RequestConfig, - DatasetQuery, - MetainfoQuery, - MetainfoPagination, +from pydantic import BaseModel, ConfigDict, Extra, Field + +from nomad.app.v1.models.graph.utils import ( + generate_request_model, + generate_response_model, + mapped, ) -from nomad.metainfo.pydantic_extension import PydanticModel -from nomad.datamodel.data import User as UserModel from nomad.app.v1.models.models import Metadata, MetadataResponse -from nomad.app.v1.routers.datasets import Dataset as DatasetV1, DatasetPagination +from nomad.app.v1.routers.datasets import Dataset as DatasetV1 +from nomad.app.v1.routers.datasets import DatasetPagination from nomad.app.v1.routers.uploads import ( + EntryProcData, + EntryProcDataPagination, + PaginationResponse, UploadProcData, UploadProcDataPagination, UploadProcDataQuery, - PaginationResponse, - EntryProcData, - EntryProcDataPagination, ) - -from nomad.app.v1.models.graph.utils import ( - generate_request_model, - generate_response_model, - mapped, +from nomad.datamodel.data import User as UserModel +from nomad.graph.model import ( + DatasetQuery, + MetainfoPagination, + MetainfoQuery, + RequestConfig, ) +from nomad.metainfo.pydantic_extension import PydanticModel + +from ..groups import UserGroup, UserGroupPagination, UserGroupQuery class Error(BaseModel): diff --git a/nomad/app/v1/models/graph/utils.py b/nomad/app/v1/models/graph/utils.py index 6d420a6e1002c51bca54bd55ccbb815ddac4c34d..6c871c55478ef09754f5998ac8aa97686b1e645f 100644 --- a/nomad/app/v1/models/graph/utils.py +++ b/nomad/app/v1/models/graph/utils.py @@ -17,32 +17,33 @@ # from __future__ import annotations + +import sys +from collections.abc import Callable +from datetime import datetime +from types import UnionType from typing import ( - Optional, - Literal, - Union, Any, ForwardRef, - get_type_hints, - get_origin, - get_args, + Literal, + Optional, + Union, cast, + get_args, + get_origin, + get_type_hints, ) -from collections.abc import Callable -from types import UnionType -from datetime import datetime + from pydantic import ( BaseModel, ConfigDict, + Field, TypeAdapter, + ValidationError, create_model, - Field, model_validator, - ValidationError, ) from pydantic.config import ConfigDict as BaseConfigDict -import sys - ref_prefix = '#/components/schemas' request_suffix = 'Request' diff --git a/nomad/app/v1/models/groups.py b/nomad/app/v1/models/groups.py index 1e7c34bc4d6f6af56489a2ff73ba7deb4a80728e..dbdae164fba850f19314e7cb855e8312bf9d10d3 100644 --- a/nomad/app/v1/models/groups.py +++ b/nomad/app/v1/models/groups.py @@ -1,13 +1,4 @@ -from typing import List, Optional, Set - -from pydantic import ( - BaseModel, - ConfigDict, - Field, - field_validator, - root_validator, - validator, -) +from pydantic import BaseModel, ConfigDict, Field, field_validator from pydantic_core import PydanticCustomError from .pagination import Direction, Pagination, PaginationResponse diff --git a/nomad/app/v1/models/models.py b/nomad/app/v1/models/models.py index 4513d7b03e1f3d136364f408db12446779825e7e..7dab17d0f8e71bbe934b4646085151a71b885b2e 100644 --- a/nomad/app/v1/models/models.py +++ b/nomad/app/v1/models/models.py @@ -16,31 +16,30 @@ # limitations under the License. # import datetime -import enum import fnmatch import json import re -from typing import Any from collections.abc import Mapping +from enum import Enum +from typing import Annotated, Any -import pydantic from fastapi import Body, HTTPException, Request from fastapi import Query as FastApiQuery -from pydantic import ( # pylint: disable=unused-import - field_validator, - model_validator, - StringConstraints, - ConfigDict, +from pydantic import ( # noqa: F401 BaseModel, + ConfigDict, Field, StrictBool, StrictFloat, StrictInt, + StringConstraints, + field_validator, + model_validator, ) from pydantic.main import create_model from pydantic_core import PydanticCustomError -from nomad import datamodel, metainfo # pylint: disable=unused-import +from nomad import datamodel, metainfo # noqa: F401 from nomad.app.v1.utils import parameter_dependency_from_model from nomad.metainfo.elasticsearch_extension import ( DocumentType, @@ -50,8 +49,6 @@ from nomad.metainfo.elasticsearch_extension import ( from nomad.utils import strip from .pagination import Pagination, PaginationResponse -from typing import Annotated - User: Any = datamodel.User.m_def.a_pydantic.model # It is important that datetime.datetime comes last. Otherwise, number valued strings @@ -81,7 +78,7 @@ owner_documentation = strip( ) -class Owner(str, enum.Enum): +class Owner(str, Enum): __doc__ = owner_documentation # There seems to be a slight bug in fast API. When it creates the example in OpenAPI @@ -917,7 +914,7 @@ class HistogramAggregation(BucketAggregation): """ ), ) - offset: float | None = Field(None, gte=0) + offset: float | None = Field(None) extended_bounds: Bounds | None = None @model_validator(mode='before') diff --git a/nomad/app/v1/models/pagination.py b/nomad/app/v1/models/pagination.py index 2a127218625f3a7a4a552e72e1e351406cbbe592..c3fba17369172950363d5af640d5c8390cadb97c 100644 --- a/nomad/app/v1/models/pagination.py +++ b/nomad/app/v1/models/pagination.py @@ -1,21 +1,14 @@ -import enum -from typing import Optional +from enum import Enum + from fastapi import HTTPException, Request -from pydantic import ( - BaseModel, - ConfigDict, - Field, - field_validator, - model_validator, - validator, -) +from pydantic import BaseModel, ConfigDict, Field, field_validator, model_validator from pydantic_core import PydanticCustomError from nomad.app.v1.utils import update_url_query_arguments from nomad.utils import strip -class Direction(str, enum.Enum): +class Direction(str, Enum): """ Order direction, either ascending (`asc`) or descending (`desc`) """ diff --git a/nomad/app/v1/routers/auth.py b/nomad/app/v1/routers/auth.py index 8cf79a9df7e5f22536959550c6dca67ff4a011f5..01740cda39d9ec2026a2d50fd7694631716c9052 100644 --- a/nomad/app/v1/routers/auth.py +++ b/nomad/app/v1/routers/auth.py @@ -16,39 +16,38 @@ # limitations under the License. # -import hmac +import datetime import hashlib +import hmac import uuid -import requests -from typing import cast, Union from collections.abc import Callable -from inspect import Parameter, signature +from enum import Enum from functools import wraps -from fastapi import ( - APIRouter, - Depends, - Query as FastApiQuery, - Request, - HTTPException, - status, -) +from inspect import Parameter, signature +from typing import cast + +import jwt +import requests +from fastapi import APIRouter, Depends, HTTPException, Request, status +from fastapi import Query as FastApiQuery from fastapi.security import OAuth2PasswordBearer, OAuth2PasswordRequestForm from pydantic import BaseModel -import jwt -import datetime -from nomad import utils, infrastructure, datamodel +from nomad import datamodel, infrastructure, utils from nomad.config import config from nomad.utils import get_logger, strip from ..common import root_path -from ..models import User, HTTPExceptionModel +from ..models import HTTPExceptionModel, User from ..utils import create_responses logger = get_logger(__name__) router = APIRouter() -default_tag = 'auth' + + +class APITag(str, Enum): + DEFAULT = 'auth' class Token(BaseModel): @@ -126,7 +125,7 @@ def create_user_dependency( except Exception as e: logger = utils.get_logger(__name__) logger.error( - 'Api usage by unknown user. Possible missconfiguration', exc_info=e + 'API usage by unknown user. Possible misconfiguration', exc_info=e ) raise HTTPException( status_code=status.HTTP_401_UNAUTHORIZED, @@ -284,8 +283,8 @@ def _get_user_signature_token_auth(signature_token: str, request: Request) -> Us corresponding user object, or None, if no upload_token provided. """ if signature_token: - user = _get_user_from_simple_token(signature_token) - return user + return _get_user_from_simple_token(signature_token) + elif request: auth_cookie = request.cookies.get('Authorization') if auth_cookie: @@ -293,11 +292,11 @@ def _get_user_signature_token_auth(signature_token: str, request: Request) -> Us auth_cookie = requests.utils.unquote(auth_cookie) # type: ignore if auth_cookie.startswith('Bearer '): cookie_bearer_token = auth_cookie[7:] - user = cast( + return cast( datamodel.User, infrastructure.keycloak.tokenauth(cookie_bearer_token), ) - return user + except infrastructure.KeycloakError as e: raise HTTPException( status_code=status.HTTP_401_UNAUTHORIZED, @@ -347,7 +346,7 @@ _bad_credentials_response = ( @router.post( '/token', - tags=[default_tag], + tags=[APITag.DEFAULT], summary='Get an access token', responses=create_responses(_bad_credentials_response), response_model=Token, @@ -382,15 +381,22 @@ async def get_token(form_data: OAuth2PasswordRequestForm = Depends()): @router.get( '/token', - tags=[default_tag], + tags=[APITag.DEFAULT], summary='Get an access token', responses=create_responses(_bad_credentials_response), response_model=Token, + deprecated=True, ) async def get_token_via_query(username: str, password: str): """ - This is an convenience alternative to the **POST** version of this operation. - It allows you to retrieve an *access token* by providing username and password. + **[DEPRECATED]** This endpoint is **no longer recommended**. + Please use the **POST** endpoint instead. + + This was a convenience alternative to the **POST** version, allowing retrieval of + an *access token* by providing a username and password via query parameters. + + **Why is this deprecated?** + Query parameters expose credentials in URLs, which can be logged or cached. """ try: access_token = infrastructure.keycloak.basicauth(username, password) @@ -406,7 +412,7 @@ async def get_token_via_query(username: str, password: str): @router.get( '/signature_token', - tags=[default_tag], + tags=[APITag.DEFAULT], summary='Get a signature token', response_model=SignatureToken, ) @@ -423,7 +429,7 @@ async def get_signature_token( @router.get( '/app_token', - tags=[default_tag], + tags=[APITag.DEFAULT], summary='Get an app token', response_model=AppToken, ) @@ -452,8 +458,7 @@ def generate_simple_token(user_id, expires_in: int): """ expires_at = datetime.datetime.utcnow() + datetime.timedelta(seconds=expires_in) payload = dict(user=user_id, exp=expires_at) - token = jwt.encode(payload, config.services.api_secret, 'HS256') - return token + return jwt.encode(payload, config.services.api_secret, 'HS256') def generate_upload_token(user): diff --git a/nomad/app/v1/routers/datasets.py b/nomad/app/v1/routers/datasets.py index c4850b0d29b59b4b23576a740281691fe63e7488..6038f44f98ccd1af3a4174e7022a3161e0b72d2c 100644 --- a/nomad/app/v1/routers/datasets.py +++ b/nomad/app/v1/routers/datasets.py @@ -17,46 +17,44 @@ # import re -from typing import cast, Optional, List -from fastapi import ( - APIRouter, - Request, - Depends, - Query as FastApiQuery, - Path, - HTTPException, - status, -) -from pydantic import field_validator, BaseModel, Field, validator from datetime import datetime -import enum +from enum import Enum +from typing import cast + +from fastapi import APIRouter, Depends, HTTPException, Path, Request, status +from fastapi import Query as FastApiQuery +from pydantic import BaseModel, Field, field_validator -from nomad import utils, datamodel, processing +from nomad import datamodel, processing, utils from nomad.config import config -from nomad.metainfo.elasticsearch_extension import entry_type -from nomad.utils import strip, create_uuid from nomad.datamodel import Dataset as DatasetDefinitionCls from nomad.doi import DOI, DOIException +from nomad.metainfo.elasticsearch_extension import entry_type from nomad.search import search, update_by_query +from nomad.utils import create_uuid, strip -from .auth import create_user_dependency -from .entries import _do_exhaustive_search -from ..utils import create_responses, parameter_dependency_from_model from ..models import ( + Any_, + Direction, + HTTPExceptionModel, + MetadataPagination, + MetadataRequired, + Owner, Pagination, PaginationResponse, - MetadataPagination, Query, - HTTPExceptionModel, User, - Direction, - Owner, - Any_, ) - +from ..utils import create_responses, parameter_dependency_from_model +from .auth import create_user_dependency +from .entries import _do_exhaustive_search router = APIRouter() -default_tag = 'datasets' + + +class APITag(str, Enum): + DEFAULT = 'datasets' + logger = utils.get_logger(__name__) @@ -178,7 +176,10 @@ Dataset = datamodel.Dataset.m_def.a_pydantic.model def _delete_dataset(user: User, dataset_id, dataset): es_query = cast(Query, {'datasets.dataset_id': dataset_id}) entries = _do_exhaustive_search( - owner=Owner.user, query=es_query, user=user, include=['entry_id'] + owner=Owner.user, + query=es_query, + user=user, + required=MetadataRequired(include=['entry_id']), ) entry_ids = [entry['entry_id'] for entry in entries] mongo_query = {'_id': {'$in': entry_ids}} @@ -257,7 +258,7 @@ class DatasetResponse(BaseModel): data: Dataset = Field() # type: ignore -class DatasetType(str, enum.Enum): +class DatasetType(str, Enum): owned = 'owned' foreign = 'foreign' @@ -271,7 +272,7 @@ class DatasetCreate(BaseModel): # type: ignore @router.get( '/', - tags=[default_tag], + tags=[APITag.DEFAULT], summary='Get a list of datasets', response_model=DatasetsResponse, response_model_exclude_unset=True, @@ -318,7 +319,7 @@ async def get_datasets( @router.get( '/{dataset_id}', - tags=[default_tag], + tags=[APITag.DEFAULT], summary='Get a list of datasets', response_model=DatasetResponse, responses=create_responses(_bad_id_response), @@ -347,7 +348,7 @@ async def get_dataset( @router.post( '/', - tags=[default_tag], + tags=[APITag.DEFAULT], summary='Create a new dataset', response_model=DatasetResponse, responses=create_responses(_existing_name_response), @@ -408,7 +409,10 @@ async def post_datasets( empty = True else: entries = _do_exhaustive_search( - owner=Owner.user, query=es_query, user=user, include=['entry_id'] + owner=Owner.user, + query=es_query, + user=user, + required=MetadataRequired(include=['entry_id']), ) entry_ids = [entry['entry_id'] for entry in entries] mongo_query = {'_id': {'$in': entry_ids}} @@ -436,7 +440,7 @@ async def post_datasets( @router.delete( '/{dataset_id}', - tags=[default_tag], + tags=[APITag.DEFAULT], summary='Delete a dataset', response_model=DatasetResponse, responses=create_responses( @@ -482,7 +486,7 @@ async def delete_dataset( @router.post( '/{dataset_id}/action/doi', - tags=[default_tag], + tags=[APITag.DEFAULT], summary='Assign a DOI to a dataset', response_model=DatasetResponse, responses=create_responses( diff --git a/nomad/app/v1/routers/entries.py b/nomad/app/v1/routers/entries.py index 5e1836894cffe5c628a433c56d36bb0f15c172df..f974e154214bba6607ac9a7d8f79ff0a7267ab12 100644 --- a/nomad/app/v1/routers/entries.py +++ b/nomad/app/v1/routers/entries.py @@ -15,95 +15,85 @@ # See the License for the specific language governing permissions and # limitations under the License. # +import io +import json +import os.path +from collections.abc import Iterator from datetime import datetime - from enum import Enum from typing import Any -from collections.abc import Iterator -from fastapi import ( - APIRouter, - Depends, - Path, - status, - HTTPException, - Request, - Query as QueryParameter, - Body, -) -from fastapi.responses import StreamingResponse, ORJSONResponse -from fastapi.exceptions import RequestValidationError -from pydantic import ( - ConfigDict, - field_validator, - BaseModel, - Field, -) -import os.path -import io -import json + import orjson +import yaml +from fastapi import APIRouter, Body, Depends, HTTPException, Path, Request, status +from fastapi import Query as QueryParameter +from fastapi.exceptions import RequestValidationError +from fastapi.responses import ORJSONResponse, StreamingResponse +from pydantic import BaseModel, ConfigDict, Field, field_validator from pydantic.main import create_model from starlette.responses import Response -import yaml -from nomad import files, utils, metainfo, processing as proc -from nomad import datamodel +from nomad import datamodel, files, metainfo, utils +from nomad import processing as proc +from nomad.archive import ArchiveQueryError, RequiredReader, RequiredValidationError from nomad.config import config from nomad.config.models.config import Reprocess from nomad.datamodel import EditableUserMetadata from nomad.datamodel.context import ServerContext from nomad.files import StreamedFile, create_zipstream_async -from nomad.processing.data import Upload -from nomad.utils import strip -from nomad.archive import RequiredReader, RequiredValidationError, ArchiveQueryError from nomad.groups import get_group_ids +from nomad.metainfo.elasticsearch_extension import entry_type +from nomad.processing.data import Upload from nomad.search import ( AuthenticationRequiredError, QueryValidationError, SearchError, search, - update_metadata as es_update_metadata, ) -from nomad.metainfo.elasticsearch_extension import entry_type +from nomad.search import update_metadata as es_update_metadata +from nomad.utils import strip -from .auth import create_user_dependency -from ..utils import ( - create_download_stream_zipped, - create_download_stream_raw_file, - browser_download_headers, - DownloadItem, - create_responses, - log_query, -) from ..models import ( Aggregation, - Pagination, - PaginationResponse, + Files, + HTTPExceptionModel, + Metadata, + MetadataEditRequest, MetadataPagination, - TermsAggregation, - WithQuery, - WithQueryAndPagination, MetadataRequired, MetadataResponse, - Metadata, - MetadataEditRequest, - Files, - Query, - User, Owner, + Pagination, + PaginationResponse, + Query, QueryParameters, - metadata_required_parameters, + TermsAggregation, + User, + WithQuery, + WithQueryAndPagination, files_parameters, metadata_pagination_parameters, - HTTPExceptionModel, + metadata_required_parameters, ) - +from ..utils import ( + DownloadItem, + browser_download_headers, + create_download_stream_raw_file, + create_download_stream_zipped, + create_responses, + log_query, +) +from .auth import create_user_dependency router = APIRouter() -default_tag = 'entries' -metadata_tag = 'entries/metadata' -raw_tag = 'entries/raw' -archive_tag = 'entries/archive' + + +class APITag(str, Enum): + DEFAULT = 'entries' + METADATA = 'entries/metadata' + RAW = 'entries/raw' + ARCHIVE = 'entries/archive' + logger = utils.get_logger(__name__) @@ -277,19 +267,21 @@ class EntryMetadataResponse(BaseModel): class EntryMetadataEditActionField(BaseModel): - value: str = Field(None, description='The value/values that is set as a string.') + value: str | None = Field( + None, description='The value/values that is set as a string.' + ) success: bool | None = Field( None, description='If this can/could be done. Only in API response.' ) message: str | None = Field( None, - descriptin='A message that details the action result. Only in API response.', + description='A message that details the action result. Only in API response.', ) -EntryMetadataEditActions = create_model( - 'EntryMetadataEditActions', - **{ # type: ignore +EntryMetadataEditActions: Any = create_model( + 'EntryMetadataEditActions', # type: ignore + **{ quantity.name: ( EntryMetadataEditActionField | None if quantity.is_scalar @@ -307,7 +299,7 @@ class EntryMetadataEdit(WithQuery): actions: EntryMetadataEditActions = Field( # type: ignore None, description='Each action specifies a single value (even for multi valued quantities).', - ) + ) # type: ignore @field_validator('owner') @classmethod @@ -495,7 +487,7 @@ def perform_search(*args, **kwargs): @router.post( '/query', - tags=[metadata_tag], + tags=[APITag.METADATA], summary='Search entries and retrieve their metadata', response_model=MetadataResponse, responses=create_responses(_bad_owner_response), @@ -536,7 +528,7 @@ async def post_entries_metadata_query( @router.get( '', - tags=[metadata_tag], + tags=[APITag.METADATA], summary='Search entries and retrieve their metadata', response_model=MetadataResponse, responses=create_responses(_bad_owner_response), @@ -574,17 +566,32 @@ async def get_entries_metadata( def _do_exhaustive_search( - owner: Owner, query: Query, include: list[str], user: User + owner: Owner, + query: Query, + required: MetadataRequired, + user: User, + page_size: int = 100, ) -> Iterator[dict[str, Any]]: - page_after_value = None + """Perform a paginated search. + + Args: + owner (Owner): The owner defining the search scope. + query (Query): The query specifying search filters and conditions. + required (MetadataRequired): Includes and excludes for the response. + user (User): The user performing the search, used for authorization. + page_size (int): The number of results per page. + """ + page_after_value: str | None = None while True: response = perform_search( owner=owner, query=query, pagination=MetadataPagination( - page_size=100, page_after_value=page_after_value, order_by='upload_id' + page_size=page_size, + page_after_value=page_after_value, + order_by='upload_id', ), - required=MetadataRequired(include=include), + required=required, user_id=user.user_id if user is not None else None, ) @@ -719,7 +726,10 @@ def _answer_entries_raw_request(owner: Owner, query: Query, files: Files, user: def download_items_generator(): # go through all entries that match the query for entry_metadata in _do_exhaustive_search( - owner, query, include=search_includes, user=user + owner, + query, + required=MetadataRequired(include=search_includes), + user=user, ): upload_id = entry_metadata['upload_id'] mainfile = entry_metadata['mainfile'] @@ -770,7 +780,7 @@ _entries_rawdir_query_docstring = strip( @router.post( '/rawdir/query', - tags=[raw_tag], + tags=[APITag.RAW], summary='Search entries and get their raw files metadata', description=_entries_rawdir_query_docstring, response_model=EntriesRawDirResponse, @@ -790,7 +800,7 @@ async def post_entries_rawdir_query( @router.get( '/rawdir', - tags=[raw_tag], + tags=[APITag.RAW], summary='Search entries and get their raw files metadata', description=_entries_rawdir_query_docstring, response_model=EntriesRawDirResponse, @@ -834,7 +844,7 @@ _entries_raw_query_docstring = strip( @router.post( '/raw/query', - tags=[raw_tag], + tags=[APITag.RAW], summary='Search entries and download their raw files', description=_entries_raw_query_docstring, response_class=StreamingResponse, @@ -850,7 +860,7 @@ async def post_entries_raw_query( @router.get( '/raw', - tags=[raw_tag], + tags=[APITag.RAW], summary='Search entries and download their raw files', description=_entries_raw_query_docstring, response_class=StreamingResponse, @@ -991,7 +1001,7 @@ _entries_archive_docstring = strip( @router.post( '/archive/query', - tags=[archive_tag], + tags=[APITag.ARCHIVE], summary='Search entries and access their archives', description=_entries_archive_docstring, response_model=EntriesArchiveResponse, @@ -1025,7 +1035,7 @@ async def post_entries_archive_query( @router.get( '/archive', - tags=[archive_tag], + tags=[APITag.ARCHIVE], summary='Search entries and access their archives', description=_entries_archive_docstring, response_model=EntriesArchiveResponse, @@ -1090,7 +1100,7 @@ def _answer_entries_archive_download_request( def streamed_files(): # go through all entries that match the query for entry_metadata in _do_exhaustive_search( - owner, query, include=search_includes, user=user + owner, query, required=MetadataRequired(include=search_includes), user=user ): path = os.path.join( entry_metadata['upload_id'], f'{entry_metadata["entry_id"]}.json' @@ -1142,7 +1152,7 @@ _entries_archive_download_docstring = strip( @router.post( '/archive/download/query', - tags=[archive_tag], + tags=[APITag.ARCHIVE], summary='Search entries and download their archives', description=_entries_archive_download_docstring, response_class=StreamingResponse, @@ -1164,7 +1174,7 @@ async def post_entries_archive_download_query( @router.get( '/archive/download', - tags=[archive_tag], + tags=[APITag.ARCHIVE], summary='Search entries and download their archives', description=_entries_archive_download_docstring, response_class=StreamingResponse, @@ -1188,7 +1198,7 @@ async def get_entries_archive_download( @router.get( '/{entry_id}', - tags=[metadata_tag], + tags=[APITag.METADATA], summary='Get the metadata of an entry by its id', response_model=EntryMetadataResponse, responses=create_responses(_bad_id_response), @@ -1225,7 +1235,7 @@ async def get_entry_metadata( @router.get( '/{entry_id}/rawdir', - tags=[raw_tag], + tags=[APITag.RAW], summary='Get the raw files metadata for an entry by its id', response_model=EntryRawDirResponse, responses=create_responses(_bad_id_response), @@ -1264,7 +1274,7 @@ async def get_entry_rawdir( @router.get( '/{entry_id}/raw', - tags=[raw_tag], + tags=[APITag.RAW], summary='Get the raw data of an entry by its id', response_class=StreamingResponse, responses=create_responses(_bad_id_response, _raw_response), @@ -1300,7 +1310,7 @@ async def get_entry_raw( @router.get( '/{entry_id}/raw/{path}', - tags=[raw_tag], + tags=[APITag.RAW], summary='Get the raw data of an entry by its id', response_class=StreamingResponse, responses=create_responses( @@ -1430,7 +1440,7 @@ def answer_entry_archive_request( @router.post( '/{entry_id}/edit', - tags=[raw_tag], + tags=[APITag.RAW], summary='Edit a raw mainfile in archive format.', response_model=EntryEditResponse, response_model_exclude_unset=True, @@ -1561,7 +1571,7 @@ async def post_entry_edit( @router.get( '/{entry_id}/archive', - tags=[archive_tag], + tags=[APITag.ARCHIVE], summary='Get the archive for an entry by its id', response_model=EntryArchiveResponse, response_model_exclude_unset=True, @@ -1585,7 +1595,7 @@ async def get_entry_archive( @router.get( '/{entry_id}/archive/download', - tags=[archive_tag], + tags=[APITag.ARCHIVE], summary='Get the archive for an entry by its id as plain archive json', responses=create_responses(_bad_id_response, _archive_download_response), ) @@ -1607,7 +1617,7 @@ async def get_entry_archive_download( @router.post( '/{entry_id}/archive/query', - tags=[archive_tag], + tags=[APITag.ARCHIVE], summary='Get the archive for an entry by its id', response_model=EntryArchiveResponse, response_model_exclude_unset=True, @@ -1641,7 +1651,10 @@ def edit( upload_ids: set[str] = set() with utils.timer(logger, 'edit query executed'): all_entries = _do_exhaustive_search( - owner=Owner.user, query=query, include=['entry_id', 'upload_id'], user=user + owner=Owner.user, + query=query, + required=MetadataRequired(include=['entry_id', 'upload_id']), + user=user, ) for entry_dict in all_entries: @@ -1705,7 +1718,7 @@ _editable_quantities = { @router.post( '/edit_v0', - tags=[metadata_tag], + tags=[APITag.METADATA], summary='Edit the user metadata of a set of entries', response_model=EntryMetadataEditResponse, response_model_exclude_unset=True, @@ -1896,7 +1909,7 @@ async def post_entry_metadata_edit( @router.post( '/edit', - tags=[metadata_tag], + tags=[APITag.METADATA], summary='Edit the user metadata of a set of entries', response_model=MetadataEditRequest, response_model_exclude_unset=True, diff --git a/nomad/app/v1/routers/federation.py b/nomad/app/v1/routers/federation.py index 29cf1b2e08dbaf6a67916b8ecdeecf8c5d6d2736..c554228d0bffcc7b92666bed3a3f82dbcc7a4329 100644 --- a/nomad/app/v1/routers/federation.py +++ b/nomad/app/v1/routers/federation.py @@ -22,8 +22,9 @@ API endpoint to receive telemetry data (in logstash format) from local installat import socket import zlib +from enum import Enum -from fastapi import Request, HTTPException +from fastapi import HTTPException, Request from fastapi.routing import APIRouter from nomad import utils @@ -32,12 +33,15 @@ from nomad.config import config logger = utils.get_logger(__name__) router = APIRouter() -default_tag = 'federation' + + +class APITag(str, Enum): + DEFAULT = 'federation' @router.post( '/logs/', - tags=[default_tag], + tags=[APITag.DEFAULT], summary='Receive logs in logstash format from other Nomad installations and store into central logstash ' 'for further analysis.', ) diff --git a/nomad/app/v1/routers/graph.py b/nomad/app/v1/routers/graph.py index 907e4c866521dc5252c5655fd9e329d1bafaef78..1d1006c53c11f2f8198d23cda71807b6c1003ee8 100644 --- a/nomad/app/v1/routers/graph.py +++ b/nomad/app/v1/routers/graph.py @@ -16,24 +16,30 @@ # limitations under the License. # -from fastapi import Depends, APIRouter, Body, HTTPException +from enum import Enum + +from fastapi import APIRouter, Body, Depends, HTTPException from fastapi.responses import ORJSONResponse +from nomad.app.v1.models.graph import GraphRequest, GraphResponse from nomad.graph.graph_reader import ( - MongoReader, ConfigError, GeneralReader, - UserReader, + MongoReader, Token, + UserReader, ) from nomad.graph.lazy_wrapper import LazyWrapper + +from ..models import User from .auth import create_user_dependency from .entries import EntriesArchive -from ..models import User -from nomad.app.v1.models.graph import GraphRequest, GraphResponse router = APIRouter() -default_tag = 'graph' + + +class APITag(str, Enum): + DEFAULT = 'graph' def unwrap_response(result): @@ -69,7 +75,7 @@ def relocate_children(request): @router.post( '/raw_query', - tags=[default_tag], + tags=[APITag.DEFAULT], summary='Query the database with a graph style without verification.', description='Query the database with a graph style without verification.', response_class=GraphJSONResponse, @@ -84,7 +90,7 @@ async def raw_query( @router.post( '/query', - tags=[default_tag], + tags=[APITag.DEFAULT], summary='Query the database with a graph style.', description='Query the database with a graph style.', response_model=GraphResponse, @@ -112,7 +118,7 @@ async def basic_query( @router.post( '/archive/query', - tags=[default_tag], + tags=[APITag.DEFAULT], summary='Search entries and access their archives', response_class=GraphJSONResponse, ) diff --git a/nomad/app/v1/routers/groups.py b/nomad/app/v1/routers/groups.py index 26697b9bf23aee9a19f65a0461bcd94f9155b31f..945b95f75706b66025c04ac8970047ae2b48e5bd 100644 --- a/nomad/app/v1/routers/groups.py +++ b/nomad/app/v1/routers/groups.py @@ -16,6 +16,8 @@ # limitations under the License. # +from enum import Enum + from fastapi import APIRouter, Depends, HTTPException, Request, status from nomad.app.v1.models.groups import ( @@ -27,11 +29,6 @@ from nomad.app.v1.models.groups import ( ) from nomad.app.v1.models.pagination import PaginationResponse from nomad.app.v1.utils import parameter_dependency_from_model -from typing import List, Optional, Set - -from fastapi import APIRouter, Depends, HTTPException, Query, status -from pydantic import ConfigDict, BaseModel, Field - from nomad.datamodel import User as UserDataModel from nomad.groups import MongoUserGroup from nomad.groups import create_user_group as create_mongo_user_group @@ -41,7 +38,10 @@ from ..models import User from .auth import create_user_dependency router = APIRouter() -default_tag = 'groups' + + +class APITag(str, Enum): + DEFAULT = 'groups' user_group_query_parameters = parameter_dependency_from_model( @@ -93,7 +93,7 @@ def check_user_may_edit_user_group(user: User, user_group: MongoUserGroup): @router.get( '', - tags=[default_tag], + tags=[APITag.DEFAULT], summary='List user groups. Use at most one filter.', response_model=UserGroupResponse, ) @@ -118,7 +118,7 @@ async def get_user_groups( @router.get( '/{group_id}', - tags=[default_tag], + tags=[APITag.DEFAULT], summary='Get data about user group.', response_model=UserGroup, ) @@ -131,7 +131,7 @@ async def get_user_group(group_id: str): @router.post( '', - tags=[default_tag], + tags=[APITag.DEFAULT], status_code=status.HTTP_201_CREATED, summary='Create user group.', response_model=UserGroup, @@ -153,7 +153,7 @@ async def create_user_group( @router.post( '/{group_id}/edit', - tags=[default_tag], + tags=[APITag.DEFAULT], summary='Update user group.', response_model=UserGroup, ) @@ -179,7 +179,7 @@ async def update_user_group( @router.delete( '/{group_id}', - tags=[default_tag], + tags=[APITag.DEFAULT], status_code=status.HTTP_204_NO_CONTENT, summary='Delete user group.', ) diff --git a/nomad/app/v1/routers/info.py b/nomad/app/v1/routers/info.py index 8f123c6ec80955219fa4d1e30eee11b1e4b15158..0e63c5778822a8f4cca30a14645ac58435b09d2a 100644 --- a/nomad/app/v1/routers/info.py +++ b/nomad/app/v1/routers/info.py @@ -20,24 +20,28 @@ API endpoint that deliver backend configuration details. """ -from typing import Dict, Any, List, Optional from datetime import datetime +from enum import Enum +from typing import Any + from fastapi.routing import APIRouter from pydantic.fields import Field from pydantic.main import BaseModel from nomad import normalizing +from nomad.app.v1.models import Aggregation, StatisticsAggregation from nomad.config import config -from nomad.utils import strip -from nomad.search import search +from nomad.metainfo.elasticsearch_extension import entry_type from nomad.parsing import parsers from nomad.parsing.parsers import code_metadata -from nomad.app.v1.models import Aggregation, StatisticsAggregation -from nomad.metainfo.elasticsearch_extension import entry_type - +from nomad.search import search +from nomad.utils import strip router = APIRouter() -default_tag = 'info' + + +class APITag(str, Enum): + DEFAULT = 'info' class MetainfoModel(BaseModel): @@ -89,11 +93,11 @@ class InfoModel(BaseModel): normalizers: list[str] plugin_entry_points: list[dict] = Field( None, - desciption='List of plugin entry points that are activated in this deployment.', + description='List of plugin entry points that are activated in this deployment.', ) plugin_packages: list[dict] = Field( None, - desciption='List of plugin packages that are installed in this deployment.', + description='List of plugin packages that are installed in this deployment.', ) statistics: StatisticsModel = Field(None, description='General NOMAD statistics') search_quantities: dict @@ -145,7 +149,7 @@ def statistics(): @router.get( '', - tags=[default_tag], + tags=[APITag.DEFAULT], summary='Get information about the nomad backend and its configuration', response_model_exclude_unset=True, response_model_exclude_none=True, diff --git a/nomad/app/v1/routers/materials.py b/nomad/app/v1/routers/materials.py index 579342c5fb1e8e0ec7a6fd89f6642fc8c562300c..70c6ef56b6b52531df4ac31d5a9090e82abcfeb3 100644 --- a/nomad/app/v1/routers/materials.py +++ b/nomad/app/v1/routers/materials.py @@ -17,32 +17,36 @@ # from typing import Any -from fastapi import APIRouter, Depends, Path, status, HTTPException, Request + +from fastapi import APIRouter, Depends, HTTPException, Path, Request, status from fastapi.exception_handlers import RequestValidationError from pydantic import BaseModel, Field from nomad import utils +from nomad.metainfo.elasticsearch_extension import material_index, material_type +from nomad.search import ( + AuthenticationRequiredError, + QueryValidationError, + SearchError, + search, +) from nomad.utils import strip -from nomad.search import AuthenticationRequiredError, SearchError -from nomad.search import search, QueryValidationError -from nomad.metainfo.elasticsearch_extension import material_type, material_index -from .auth import create_user_dependency -from ..utils import create_responses from ..models import ( - User, - Owner, - WithQuery, - MetadataResponse, + HTTPExceptionModel, Metadata, MetadataPagination, MetadataRequired, + MetadataResponse, + Owner, + QueryParameters, + User, + WithQuery, metadata_pagination_parameters, metadata_required_parameters, - QueryParameters, - HTTPExceptionModel, ) - +from ..utils import create_responses +from .auth import create_user_dependency router = APIRouter() diff --git a/nomad/app/v1/routers/metainfo.py b/nomad/app/v1/routers/metainfo.py index fe3477ce5b1452b1d45904d97342101d0b937e39..a92fccbb529875004a86ca8dc889a7ab047dea92 100644 --- a/nomad/app/v1/routers/metainfo.py +++ b/nomad/app/v1/routers/metainfo.py @@ -17,18 +17,18 @@ # import copy import datetime -from typing import Any, Dict +from typing import Any -from fastapi import APIRouter, Path, status, HTTPException +from fastapi import APIRouter, HTTPException, Path, status from pydantic import BaseModel, Field from nomad.app.v1.models import HTTPExceptionModel from nomad.app.v1.utils import create_responses -from nomad.metainfo import Package -from nomad.metainfo.metainfo import MSection, Section, Quantity, Datetime, JSON -from nomad.metainfo.mongoengine_extension import MongoDocument, Mongo -from nomad.utils import strip, get_logger from nomad.config import config +from nomad.metainfo import Package +from nomad.metainfo.metainfo import JSON, Datetime, MSection, Quantity, Section +from nomad.metainfo.mongoengine_extension import Mongo, MongoDocument +from nomad.utils import get_logger, strip logger = get_logger(__name__) diff --git a/nomad/app/v1/routers/north.py b/nomad/app/v1/routers/north.py index 0ad4cb4105f3952479a09455a42f50636e4247e2..73c795bcf98baf1a2e78a37a02c94c2ccfe14312 100644 --- a/nomad/app/v1/routers/north.py +++ b/nomad/app/v1/routers/north.py @@ -17,30 +17,33 @@ # import os -import requests - -from typing import List, Dict, Optional from enum import Enum -from nomad.groups import get_group_ids -from pydantic import BaseModel -from fastapi import APIRouter, Depends, status, HTTPException + +import requests +from fastapi import APIRouter, Depends, HTTPException, status from mongoengine.queryset.visitor import Q +from pydantic import BaseModel +from nomad.app.v1.routers.auth import generate_simple_token from nomad.config import config from nomad.config.models.north import NORTHTool -from nomad.utils import strip, get_logger, slugify +from nomad.groups import get_group_ids from nomad.processing import Upload -from nomad.app.v1.routers.auth import generate_simple_token -from .auth import create_user_dependency, oauth2_scheme -from ..models import User, HTTPExceptionModel -from ..utils import create_responses +from nomad.utils import get_logger, slugify, strip +from ..models import HTTPExceptionModel, User +from ..utils import create_responses +from .auth import create_user_dependency TOOLS = {k: v for k, v in config.north.tools.filtered_items()} -default_tag = 'north' router = APIRouter() + +class APITag(str, Enum): + DEFAULT = 'north' + + hub_api_headers = {'Authorization': f'Bearer {config.north.hub_service_api_token}'} logger = get_logger(__name__) @@ -108,7 +111,7 @@ def _get_status(tool: ToolModel, user: User) -> ToolModel: @router.get( '/', - tags=[default_tag], + tags=[APITag.DEFAULT], response_model=ToolsResponseModel, summary='Get a list of all configured tools and their current state.', response_model_exclude_unset=True, @@ -135,7 +138,7 @@ async def tool(name: str) -> ToolModel: @router.get( '/{name}', - tags=[default_tag], + tags=[APITag.DEFAULT], summary='Get information for a specific tool.', response_model=ToolResponseModel, responses=create_responses(_bad_tool_response), @@ -153,7 +156,7 @@ async def get_tool( @router.post( '/{name}', - tags=[default_tag], + tags=[APITag.DEFAULT], response_model=ToolResponseModel, summary='Start a tool.', response_model_exclude_unset=True, @@ -301,7 +304,7 @@ async def start_tool( @router.delete( '/{name}', - tags=[default_tag], + tags=[APITag.DEFAULT], response_model=ToolResponseModel, summary='Stop a tool.', response_model_exclude_unset=True, diff --git a/nomad/app/v1/routers/suggestions.py b/nomad/app/v1/routers/suggestions.py index a528fde90bf9788b78131c7acde5948207713367..2bd6e515e3a2d446c29cde595e9bd5bb126f8195 100644 --- a/nomad/app/v1/routers/suggestions.py +++ b/nomad/app/v1/routers/suggestions.py @@ -16,19 +16,18 @@ # limitations under the License. # -from typing import List, Dict, Optional, Set from collections import defaultdict -from pydantic import BaseModel, Field -from fastapi import APIRouter, Depends, Request, HTTPException, status + +from elasticsearch.exceptions import RequestError from elasticsearch_dsl import Search from elasticsearch_dsl.utils import AttrList -from elasticsearch.exceptions import RequestError +from fastapi import APIRouter, Depends, HTTPException, Request, status +from pydantic import BaseModel, Field from nomad.metainfo.elasticsearch_extension import entry_index, entry_type -from .auth import create_user_dependency from ..models import User - +from .auth import create_user_dependency router = APIRouter() diff --git a/nomad/app/v1/routers/systems.py b/nomad/app/v1/routers/systems.py index d392014a7fb30e1d0665c71b38140a0b0a731076..c92e1c44f4520b1198a395817a214afec987ccc9 100644 --- a/nomad/app/v1/routers/systems.py +++ b/nomad/app/v1/routers/systems.py @@ -15,33 +15,33 @@ # See the License for the specific language governing permissions and # limitations under the License. # -from typing import Dict, List, Union -from io import StringIO, BytesIO import sys from collections import OrderedDict from enum import Enum +from io import BytesIO, StringIO +import ase.build +import ase.io import numpy as np -from fastapi import APIRouter, Depends, Path, Query, status, HTTPException +from fastapi import APIRouter, Depends, HTTPException, Path, Query, status from fastapi.responses import Response -import ase.io -import ase.build -from nomad.units import ureg -from nomad.utils import strip, deep_get, query_list_to_dict -from nomad.atomutils import Formula, wrap_positions, unwrap_positions -from nomad.normalizing.common import ( - ase_atoms_from_nomad_atoms, -) +from nomad.atomutils import Formula, unwrap_positions, wrap_positions from nomad.datamodel.metainfo.system import Atoms as NOMADAtoms -from .entries import answer_entry_archive_request +from nomad.normalizing.common import ase_atoms_from_nomad_atoms +from nomad.units import ureg +from nomad.utils import deep_get, query_list_to_dict, strip -from .auth import create_user_dependency +from ..models import HTTPExceptionModel, User from ..utils import create_responses -from ..models import User, HTTPExceptionModel +from .auth import create_user_dependency +from .entries import answer_entry_archive_request router = APIRouter() -default_tag = 'systems' + + +class APITag(str, Enum): + DEFAULT = 'systems' def write_pdb(atoms: NOMADAtoms, entry_id: str = None, formula: str = None) -> str: @@ -284,7 +284,7 @@ _serialization_error_response = ( @router.get( '/{entry_id}', - tags=[default_tag], + tags=[APITag.DEFAULT], summary=strip( """ Build and retrieve an atomistic structure file from data within an entry. diff --git a/nomad/app/v1/routers/uploads.py b/nomad/app/v1/routers/uploads.py index 3043cc64b41cdf0d5c4b2918d80876bb77ad3635..fa6ff235ca0c7454634a8c64fe93f062507048f1 100644 --- a/nomad/app/v1/routers/uploads.py +++ b/nomad/app/v1/routers/uploads.py @@ -15,98 +15,91 @@ # See the License for the specific language governing permissions and # limitations under the License. # -import os import io +import os import shutil -from enum import Enum +import tarfile +import zipfile from datetime import datetime -from typing import Tuple, List, Set, Dict, Any, Optional, Union, cast -from pydantic import ( - field_validator, - ConfigDict, - BaseModel, - Field, - model_validator, -) -from mongoengine.queryset.visitor import Q +from enum import Enum +from typing import Any, cast from urllib.parse import unquote + from fastapi import ( APIRouter, - Request, + Body, + Depends, File, + HTTPException, + Path, + Request, UploadFile, status, - Depends, - Body, - Path, - Query as FastApiQuery, - HTTPException, ) -from fastapi.responses import StreamingResponse, FileResponse +from fastapi import Query as FastApiQuery from fastapi.exceptions import RequestValidationError +from fastapi.responses import FileResponse, StreamingResponse +from mongoengine.queryset.visitor import Q +from pydantic import BaseModel, ConfigDict, Field, field_validator, model_validator from pydantic_core import PydanticCustomError -from nomad import utils, files -from nomad.common import is_safe_relative_path, is_safe_basename -from nomad.config import config -from nomad.config.models.plugins import ExampleUploadEntryPoint -from nomad.files import ( - StagingUploadFiles, - PublicUploadFiles, -) +from nomad import files, utils from nomad.bundles import BundleExporter, BundleImporter +from nomad.common import get_compression_format, is_safe_basename, is_safe_relative_path +from nomad.config import config from nomad.config.models.config import Reprocess +from nomad.config.models.plugins import ExampleUploadEntryPoint +from nomad.files import PublicUploadFiles, StagingUploadFiles from nomad.groups import get_group_ids from nomad.processing import ( - Upload, Entry, + MetadataEditRequestHandler, ProcessAlreadyRunning, ProcessStatus, - MetadataEditRequestHandler, + Upload, ) -from nomad.common import get_compression_format +from nomad.search import QueryValidationError, search, search_iterator +from nomad.search import refresh as search_refresh from nomad.utils import strip -from nomad.search import ( - search, - search_iterator, - refresh as search_refresh, - QueryValidationError, -) -from .auth import create_user_dependency, generate_upload_token from ..models import ( - MetadataPagination, - User, Direction, - Pagination, - PaginationResponse, - HTTPExceptionModel, Files, - files_parameters, + HTTPExceptionModel, + MetadataEditRequest, + MetadataPagination, + MetadataRequired, Owner, + Pagination, + PaginationResponse, + User, WithQuery, - MetadataRequired, - MetadataEditRequest, + files_parameters, restrict_query_to_upload, ) -from .entries import EntryArchiveResponse, answer_entry_archive_request from ..utils import ( - parameter_dependency_from_model, - create_responses, DownloadItem, browser_download_headers, - create_download_stream_zipped, create_download_stream_raw_file, + create_download_stream_zipped, + create_responses, create_stream_from_string, + parameter_dependency_from_model, ) +from .auth import create_user_dependency, generate_upload_token +from .entries import EntryArchiveResponse, answer_entry_archive_request router = APIRouter() -default_tag = 'uploads' -metadata_tag = 'uploads/metadata' -raw_tag = 'uploads/raw' -archive_tag = 'uploads/archive' -action_tag = 'uploads/action' -bundle_tag = 'uploads/bundle' + + +class APITag(str, Enum): + DEFAULT = 'uploads' + METADATA = 'uploads/metadata' + RAW = 'uploads/raw' + ARCHIVE = 'uploads/archive' + ACTION = 'uploads/action' + BUNDLE = 'uploads/bundle' + logger = utils.get_logger(__name__) @@ -139,7 +132,7 @@ class ProcData(BaseModel): 'process is currently running.', ) errors: list[str] = Field( - descriptions='A list of error messages that occurred during the last processing' + description='A list of error messages that occurred during the last processing' ) warnings: list[str] = Field( description='A list of warning messages that occurred during the last processing' @@ -737,7 +730,7 @@ and publish your data.""" @router.get( '/command-examples', - tags=[default_tag], + tags=[APITag.DEFAULT], summary='Get example commands for shell based uploads.', response_model=UploadCommandExamplesResponse, responses=create_responses(_not_authorized), @@ -768,7 +761,7 @@ async def get_command_examples( @router.get( '', - tags=[metadata_tag], + tags=[APITag.METADATA], summary='List uploads of authenticated user.', response_model=UploadProcDataQueryResponse, responses=create_responses(_not_authorized, _bad_pagination), @@ -835,7 +828,7 @@ async def get_uploads( @router.get( '/{upload_id}', - tags=[metadata_tag], + tags=[APITag.METADATA], summary='Get a specific upload', response_model=UploadProcDataResponse, responses=create_responses(_upload_not_found, _not_authorized_to_upload), @@ -857,7 +850,7 @@ async def get_upload( @router.get( '/{upload_id}/entries', - tags=[metadata_tag], + tags=[APITag.METADATA], summary='Get the entries of the specific upload as a list', response_model=EntryProcDataQueryResponse, responses=create_responses( @@ -934,7 +927,7 @@ async def get_upload_entries( @router.get( '/{upload_id}/entries/{entry_id}', - tags=[metadata_tag], + tags=[APITag.METADATA], summary='Get a specific entry for a specific upload', response_model=EntryProcDataResponse, responses=create_responses(_entry_not_found, _not_authorized_to_entry), @@ -970,7 +963,7 @@ async def get_upload_entry( @router.get( '/{upload_id}/rawdir/{path:path}', - tags=[raw_tag], + tags=[APITag.RAW], summary='Get the metadata for the raw file or folder located at the specified path in the specified upload.', response_model=RawDirResponse, responses=create_responses( @@ -1085,7 +1078,7 @@ async def get_upload_rawdir_path( @router.get( '/{upload_id}/raw', - tags=[raw_tag], + tags=[APITag.RAW], summary='Downloads the published upload .zip file with all the raw files of the upload.', response_class=StreamingResponse, responses=create_responses( @@ -1133,7 +1126,7 @@ async def get_upload_raw( @router.get( '/{upload_id}/raw/{path:path}', - tags=[raw_tag], + tags=[APITag.RAW], summary='Download the raw file or folder located at the specified path in the specified upload.', response_class=StreamingResponse, responses=create_responses( @@ -1308,7 +1301,7 @@ async def get_upload_raw_path( @router.put( '/{upload_id}/raw/{path:path}', - tags=[raw_tag], + tags=[APITag.RAW], summary='Upload a raw file to the specified path (directory) in the specified upload.', response_class=StreamingResponse, responses=create_responses( @@ -1625,7 +1618,7 @@ async def put_upload_raw_path( @router.delete( '/{upload_id}/raw/{path:path}', - tags=[raw_tag], + tags=[APITag.RAW], summary='Delete the raw file or folder located at the specified path in the specified upload.', response_model=UploadProcDataResponse, responses=create_responses( @@ -1674,7 +1667,7 @@ async def delete_upload_raw_path( @router.post( '/{upload_id}/raw-create-dir/{path:path}', - tags=[raw_tag], + tags=[APITag.RAW], summary='Create a new empty directory with the specified path in the specified upload.', response_model=UploadProcDataResponse, responses=create_responses( @@ -1721,7 +1714,7 @@ async def post_upload_raw_create_dir_path( @router.get( '/{upload_id}/archive/mainfile/{mainfile:path}', - tags=[archive_tag], + tags=[APITag.ARCHIVE], summary='Get the full archive for the given upload and mainfile path.', response_model=EntryArchiveResponse, response_model_exclude_unset=True, @@ -1751,7 +1744,7 @@ async def get_upload_entry_archive_mainfile( @router.get( '/{upload_id}/archive/{entry_id}', - tags=[archive_tag], + tags=[APITag.ARCHIVE], summary='Get the full archive for the given upload and entry.', response_model=EntryArchiveResponse, response_model_exclude_unset=True, @@ -1775,7 +1768,7 @@ async def get_upload_entry_archive( @router.post( '', - tags=[default_tag], + tags=[APITag.DEFAULT], summary='Submit a new upload', response_class=StreamingResponse, responses=create_responses(_post_upload_response, _not_authorized, _bad_request), @@ -1961,7 +1954,7 @@ async def post_upload( @router.post( '/{upload_id}/edit', - tags=[metadata_tag], + tags=[APITag.METADATA], summary='Updates the metadata of the specified upload.', response_model=UploadProcDataResponse, responses=create_responses( @@ -2008,7 +2001,7 @@ async def post_upload_edit( @router.delete( '/{upload_id}', - tags=[default_tag], + tags=[APITag.DEFAULT], summary='Delete an upload', response_model=UploadProcDataResponse, responses=create_responses( @@ -2054,7 +2047,7 @@ async def delete_upload( @router.post( '/{upload_id}/action/publish', - tags=[action_tag], + tags=[APITag.ACTION], summary='Publish an upload', response_model=UploadProcDataResponse, responses=create_responses( @@ -2162,7 +2155,7 @@ async def post_upload_action_publish( @router.post( '/{upload_id}/action/process', - tags=[action_tag], + tags=[APITag.ACTION], summary='Manually triggers processing of an upload.', response_model=UploadProcDataResponse, responses=create_responses( @@ -2191,7 +2184,7 @@ async def post_upload_action_process( @router.post( '/{upload_id}/action/delete-entry-files', - tags=[action_tag], + tags=[APITag.ACTION], summary='Deletes the files of the entries specified by a query.', response_model=UploadProcDataResponse, responses=create_responses( @@ -2262,7 +2255,7 @@ async def post_upload_action_delete_entry_files( @router.post( '/{upload_id}/action/lift-embargo', - tags=[action_tag], + tags=[APITag.ACTION], summary='Lifts the embargo of an upload.', response_model=UploadProcDataResponse, responses=create_responses( @@ -2314,7 +2307,7 @@ async def post_upload_action_lift_embargo( @router.get( '/{upload_id}/bundle', - tags=[bundle_tag], + tags=[APITag.BUNDLE], summary='Gets an *upload bundle* for the specified upload.', response_class=StreamingResponse, responses=create_responses( @@ -2388,7 +2381,7 @@ async def get_upload_bundle( @router.post( '/bundle', - tags=[bundle_tag], + tags=[APITag.BUNDLE], summary='Posts an *upload bundle* to this NOMAD deployment.', response_model=UploadProcDataResponse, responses=create_responses(_not_authorized, _bad_request), @@ -2691,9 +2684,9 @@ async def _get_files_if_provided( # Only ok if uploaded file is a zip or a tar archive. ext = ( '.zip' - if files.zipfile.is_zipfile(upload_path) + if zipfile.is_zipfile(upload_path) else '.tar' - if files.tarfile.is_tarfile(upload_path) + if tarfile.is_tarfile(upload_path) else None ) if not ext: diff --git a/nomad/app/v1/routers/users.py b/nomad/app/v1/routers/users.py index 3b47845604a862381f4434284b4f7d06a485b952..3e48ee899559efa081d41d6c16481e6ea2200028 100644 --- a/nomad/app/v1/routers/users.py +++ b/nomad/app/v1/routers/users.py @@ -16,20 +16,24 @@ # limitations under the License. # -from typing import List, Union, Optional -from fastapi import Depends, APIRouter, status, HTTPException, Query +from enum import Enum + +from fastapi import APIRouter, Depends, HTTPException, Query, status from pydantic.main import BaseModel -from nomad import infrastructure, datamodel +from nomad import datamodel, infrastructure from nomad.config import config from nomad.utils import strip -from .auth import create_user_dependency -from ..models import User, HTTPExceptionModel +from ..models import HTTPExceptionModel, User from ..utils import create_responses +from .auth import create_user_dependency router = APIRouter() -default_tag = 'users' + + +class APITag(str, Enum): + DEFAULT = 'users' _authentication_required_response = ( @@ -62,7 +66,7 @@ class Users(BaseModel): @router.get( '/me', - tags=[default_tag], + tags=[APITag.DEFAULT], summary='Get your account data', description='Returns the account data of the authenticated user.', responses=create_responses(_authentication_required_response), @@ -83,7 +87,7 @@ async def read_users_me( @router.get( '', - tags=[default_tag], + tags=[APITag.DEFAULT], summary='Get existing users', description='Get existing users for given criteria', response_model_exclude_unset=True, @@ -163,7 +167,7 @@ class PublicUserInfo(BaseModel): @router.get( '/{user_id}', - tags=[default_tag], + tags=[APITag.DEFAULT], summary='Get existing users', description='Get the user using the given user_id', response_model_exclude_unset=True, @@ -178,7 +182,7 @@ async def get_user(user_id: str): @router.put( '/invite', - tags=[default_tag], + tags=[APITag.DEFAULT], summary='Invite a new user', responses=create_responses(_authentication_required_response, _bad_invite_response), response_model=User, diff --git a/nomad/app/v1/utils.py b/nomad/app/v1/utils.py index 315c2de571d2d29a3e2fba35c29166efb564b8dd..c62316fe4e8a02263d5a69014c25d6ea913939d7 100644 --- a/nomad/app/v1/utils.py +++ b/nomad/app/v1/utils.py @@ -16,19 +16,21 @@ # limitations under the License. # -from typing import List, Dict, Tuple, Set, Iterator, Any, Optional, Union -from collections.abc import Iterator -from types import FunctionType -import urllib +import gzip +import inspect import io import json -import os -import inspect -from fastapi import Request, Query, HTTPException, status # pylint: disable=unused-import -from pydantic import ValidationError, BaseModel # pylint: disable=unused-import -import gzip import lzma -from nomad.files import UploadFiles, StreamedFile, create_zipstream +import os +import urllib +from collections.abc import Iterator +from types import FunctionType +from typing import Any + +from fastapi import HTTPException, Query, Request, status # noqa: F401 +from pydantic import BaseModel, ValidationError # noqa: F401 + +from nomad.files import StreamedFile, UploadFiles, create_zipstream def parameter_dependency_from_model( diff --git a/nomad/archive/converter.py b/nomad/archive/converter.py index b72233f3ffdf3053471751b669d3cd11ca181780..96b6e494fd78e1b69613958571f556b81c4273ef 100644 --- a/nomad/archive/converter.py +++ b/nomad/archive/converter.py @@ -21,15 +21,14 @@ import functools import hashlib import os.path import signal +from collections.abc import Callable, Iterable from concurrent.futures import ProcessPoolExecutor from multiprocessing import Manager -from collections.abc import Callable -from collections.abc import Iterable -from nomad.config import config -from nomad.archive import to_json, read_archive +from nomad.archive import read_archive, to_json from nomad.archive.storage_v2 import ArchiveWriter as ArchiveWriterNew -from nomad.files import StagingUploadFiles, PublicUploadFiles +from nomad.config import config +from nomad.files import PublicUploadFiles, StagingUploadFiles from nomad.infrastructure import setup from nomad.processing import Upload diff --git a/nomad/archive/partial.py b/nomad/archive/partial.py index a1b0e44a244bbb432380916cf742b128d9fc1194..6d0f4012f0096975be51eb4ab53d6596a5b4436b 100644 --- a/nomad/archive/partial.py +++ b/nomad/archive/partial.py @@ -16,20 +16,20 @@ # limitations under the License. # -from typing import Any, Tuple, Dict, Union, List +from typing import Any from nomad import infrastructure from nomad.config import config +from nomad.datamodel import EntryArchive +from nomad.datamodel.metainfo.common import FastAccess from nomad.metainfo import ( - MSection, Definition, + MSection, Quantity, Reference, - SubSection, Section, + SubSection, ) -from nomad.datamodel import EntryArchive -from nomad.datamodel.metainfo.common import FastAccess def create_partial_archive(archive: EntryArchive) -> dict: diff --git a/nomad/archive/query.py b/nomad/archive/query.py index 05bbc2700eade759945cfa1312fc3c1917332784..09696fcdbcb95ce81219b57fe8511f4e984ce78a 100644 --- a/nomad/archive/query.py +++ b/nomad/archive/query.py @@ -18,13 +18,13 @@ import functools import re -from typing import Any, Dict, Union, Tuple from collections.abc import Callable from io import BytesIO +from typing import Any from nomad import utils -from .storage import ArchiveReader, ArchiveList, ArchiveDict, to_json, read_archive +from .storage import ArchiveDict, ArchiveList, ArchiveReader, read_archive, to_json _query_archive_key_pattern = re.compile(r'^([\s\w\-]+)(\[([-?0-9]*)(:([-?0-9]*))?])?$') diff --git a/nomad/archive/required.py b/nomad/archive/required.py index 6a08652900df662c92b9eb958cf3c29d149e938b..402e116ad6fdaea53b8cfb08ae25137145984c31 100644 --- a/nomad/archive/required.py +++ b/nomad/archive/required.py @@ -21,31 +21,32 @@ import copy import dataclasses import functools import re -from typing import cast, Union, Dict, Tuple +from typing import cast from fastapi import HTTPException from nomad import utils from nomad.metainfo import ( Definition, - Section, + Package, Quantity, - SubSection, - Reference, QuantityReference, + Reference, + Section, SectionReference, - Package, + SubSection, ) + +from ..datamodel.context import ServerContext, parse_path from .query import ( ArchiveQueryError, - to_json, - _query_archive_key_pattern, - _extract_key_and_index, _extract_child, + _extract_key_and_index, + _query_archive_key_pattern, + to_json, ) -from .storage import ArchiveReader, ArchiveList, ArchiveError, ArchiveDict +from .storage import ArchiveDict, ArchiveError, ArchiveList, ArchiveReader from .storage_v2 import ArchiveDict as NewArchiveDict -from ..datamodel.context import parse_path, ServerContext class RequiredValidationError(Exception): diff --git a/nomad/archive/storage.py b/nomad/archive/storage.py index 6d33ecfdd378ec709decfa3d6d476bbe26f5b7ed..29102e1eae6ddd68cf10542a797954c4804d41f2 100644 --- a/nomad/archive/storage.py +++ b/nomad/archive/storage.py @@ -17,12 +17,10 @@ # from __future__ import annotations -from typing import Any, Tuple, Dict, Union, cast -from collections.abc import Generator -from io import BytesIO, BufferedReader -from collections.abc import Mapping, Sequence - import struct +from collections.abc import Generator, Mapping, Sequence +from io import BufferedReader, BytesIO +from typing import Any, cast import msgspec @@ -328,10 +326,8 @@ def read_archive(file_or_path: str | BytesIO, **kwargs) -> ArchiveReader: will lazily load data as it is used. The mapping needs to be closed or used within a 'with' statement to free the underlying file resource after use. """ - from .storage_v2 import ( - ArchiveWriter as ArchiveWriterNew, - ArchiveReader as ArchiveReaderNew, - ) + from .storage_v2 import ArchiveReader as ArchiveReaderNew + from .storage_v2 import ArchiveWriter as ArchiveWriterNew # todo: replace implementation to enable automatic conversion # if isinstance(file_or_path, str): diff --git a/nomad/archive/storage_v2.py b/nomad/archive/storage_v2.py index 6a5d1e59af5aa7ad6dd82a3b87d647c44927df1c..ffb8fdaf6fdfdc0eca76797f591756a71ea0934a 100644 --- a/nomad/archive/storage_v2.py +++ b/nomad/archive/storage_v2.py @@ -27,8 +27,8 @@ from bitarray import bitarray from msgpack import Unpacker from nomad import utils -from nomad.config import config from nomad.archive import ArchiveError +from nomad.config import config _packer = msgpack.Packer(autoreset=True, use_bin_type=True) diff --git a/nomad/atomutils.py b/nomad/atomutils.py index 0e142213d4d1c50728d9e681366be3102764b97d..d0deb7ded1bd8e66ea46a6b4bb693d7ad44391b1 100644 --- a/nomad/atomutils.py +++ b/nomad/atomutils.py @@ -23,18 +23,10 @@ import itertools import logging import math import re +from collections.abc import Iterable from functools import reduce from string import ascii_uppercase -from typing import ( - TYPE_CHECKING, - Any, - Dict, - List, - Tuple, - Union, - cast, -) -from collections.abc import Iterable +from typing import TYPE_CHECKING, Any, cast import ase.data import ase.geometry diff --git a/nomad/bundles.py b/nomad/bundles.py index 126f0a2c71aa003a72ee277beab12c621f05efff..93c45612e1fa0d47e75ed604e9ae4406517a5794 100644 --- a/nomad/bundles.py +++ b/nomad/bundles.py @@ -8,35 +8,36 @@ # See the License for the specific language governing permissions and # limitations under the License. # -from typing import cast, Any, Tuple, List, Set, Dict -from collections.abc import Iterable -import os import json +import os +from collections.abc import Iterable from datetime import datetime, timedelta +from typing import Any, cast + +from fastapi import HTTPException, status from packaging import version -from nomad import utils, datamodel, search +from nomad import datamodel, search, utils from nomad.config import config -from nomad.config.models.config import BundleImportSettings, BundleExportSettings +from nomad.config.models.config import BundleExportSettings, BundleImportSettings from nomad.files import ( - zipfile, + BrowsableFileSource, + CombinedFileSource, + DiskFileSource, + FileSource, PathObject, - UploadFiles, PublicUploadFiles, StagingUploadFiles, - FileSource, - BrowsableFileSource, - CombinedFileSource, + StandardJSONDecoder, StreamedFileSource, - DiskFileSource, + UploadFiles, ZipFileSource, - json_to_streamed_file, bundle_info_filename, - StandardJSONDecoder, + json_to_streamed_file, + zipfile, ) from nomad.processing.base import ProcessStatus -from nomad.processing.data import Upload, Entry, mongo_entry_metadata -from fastapi import HTTPException, status +from nomad.processing.data import Entry, Upload, mongo_entry_metadata class BundleExporter: diff --git a/nomad/cli/admin/admin.py b/nomad/cli/admin/admin.py index 7c02a16cdd51457e9b241def679e6d26b8a492ba..75f5cc88179cf5d44b972ddc2b668d05fa898419 100644 --- a/nomad/cli/admin/admin.py +++ b/nomad/cli/admin/admin.py @@ -18,8 +18,8 @@ import click -from nomad.config import config from nomad.cli.cli import cli +from nomad.config import config @cli.group( @@ -68,7 +68,8 @@ def reset(remove, i_am_really_sure): def reset_processing(zero_complete_time): from datetime import datetime - from nomad import infrastructure, processing as proc + from nomad import infrastructure + from nomad import processing as proc infrastructure.setup_mongo() @@ -109,9 +110,11 @@ def reset_processing(zero_complete_time): ) def lift_embargo(dry, parallel): from datetime import datetime + from dateutil.relativedelta import relativedelta - from nomad import infrastructure, processing as proc + from nomad import infrastructure + from nomad import processing as proc from nomad.search import quantity_values infrastructure.setup_mongo() @@ -415,6 +418,7 @@ def migrate_mongo( import sys from pymongo.database import Database + from nomad import infrastructure from nomad.cli.admin import migrate @@ -490,9 +494,10 @@ def migrate_mongo( ) def rewrite_doi_urls(dois, dry, save_existing_records): import json + import requests - from nomad.doi import edit_doi_url, _create_dataset_url + from nomad.doi import _create_dataset_url, edit_doi_url existing_records = [] diff --git a/nomad/cli/admin/clean.py b/nomad/cli/admin/clean.py index 6ca3d6c7ca5773b59bf79b7a716508f890d9be98..2522466c06d18e88161157447efe52e7d6264092 100644 --- a/nomad/cli/admin/clean.py +++ b/nomad/cli/admin/clean.py @@ -39,13 +39,13 @@ from .admin import admin def clean(dry, skip_entries, skip_fs, skip_es, staging_too, force): import os import shutil - import tabulate + import elasticsearch_dsl + import tabulate from nomad import infrastructure, processing from nomad.config import config as nomad_config - from nomad.search import delete_by_query - from nomad.search import quantity_values + from nomad.search import delete_by_query, quantity_values mongo_client = infrastructure.setup_mongo() infrastructure.setup_elastic() diff --git a/nomad/cli/admin/entries.py b/nomad/cli/admin/entries.py index 54ac693eed7dcd6510287497f0733e912432729a..91728ae86d3c413947cacffb9e98d7d3782efe7c 100644 --- a/nomad/cli/admin/entries.py +++ b/nomad/cli/admin/entries.py @@ -33,7 +33,8 @@ def entries(): ) @click.option('--skip-mongo', help='Keep uploads and entries in mongo.', is_flag=True) def rm(entries, skip_es, skip_mongo): - from nomad import processing as proc, infrastructure, search + from nomad import infrastructure, search + from nomad import processing as proc infrastructure.setup_mongo() infrastructure.setup_elastic() diff --git a/nomad/cli/admin/migrate.py b/nomad/cli/admin/migrate.py index e857c013e657121fb330afa213aee002ea31e4f1..5f28df89b19ae28ec8b7378d3a8545489e355b8c 100644 --- a/nomad/cli/admin/migrate.py +++ b/nomad/cli/admin/migrate.py @@ -18,16 +18,16 @@ import time from datetime import datetime -from typing import List, Dict, Set, Any, Optional -from pydantic import BaseModel +from typing import Any +from pydantic import BaseModel from pymongo import ReplaceOne -from pymongo.database import Database, Collection +from pymongo.database import Collection, Database + from nomad import utils -from nomad.processing import ProcessStatus, Upload, Entry from nomad.datamodel import Dataset from nomad.parsing.parsers import parser_dict - +from nomad.processing import Entry, ProcessStatus, Upload _upload_keys_to_remove_v0 = ( 'published', @@ -69,10 +69,10 @@ class _CollectionStatistics(BaseModel): class _UpgradeStatistics(BaseModel): - uploads = _CollectionStatistics(collection_name='Uploads') - entries = _CollectionStatistics(collection_name='Entries') - datasets = _CollectionStatistics(collection_name='Datasets') - dois = _CollectionStatistics(collection_name='DOIs') + uploads: _CollectionStatistics = _CollectionStatistics(collection_name='Uploads') + entries: _CollectionStatistics = _CollectionStatistics(collection_name='Entries') + datasets: _CollectionStatistics = _CollectionStatistics(collection_name='Datasets') + dois: _CollectionStatistics = _CollectionStatistics(collection_name='DOIs') class _DatasetCacheItem(BaseModel): diff --git a/nomad/cli/admin/run.py b/nomad/cli/admin/run.py index ab846f3e0e203bbe039094be8d0393eac403915d..1739b89c1e2dc22d887f9172f5dec9c5e68002ed 100644 --- a/nomad/cli/admin/run.py +++ b/nomad/cli/admin/run.py @@ -75,9 +75,9 @@ def run_app( # port = port or config.services.api_port if with_gui: + import glob import os import os.path - import glob import shutil gui_folder = os.path.abspath( @@ -122,9 +122,10 @@ def run_app( from nomad.utils import get_logger if gunicorn: - from gunicorn.app.wsgiapp import WSGIApplication import logging.config + from gunicorn.app.wsgiapp import WSGIApplication + if log_config: logging.config.fileConfig(log_config) @@ -152,7 +153,7 @@ def run_app( get_logger(__name__).info('created gunicorn server', data=str(gunicorn_app.cfg)) gunicorn_app.run() else: - from uvicorn import Server, Config + from uvicorn import Config, Server kwargs['log_config'] = log_config @@ -182,10 +183,11 @@ def run_worker(*, workers=None): def run_hub(): - from jupyterhub.app import main - import sys import os import subprocess + import sys + + from jupyterhub.app import main if 'JUPYTERHUB_CRYPT_KEY' not in os.environ: crypt_key = config.north.jupyterhub_crypt_key diff --git a/nomad/cli/admin/springer.py b/nomad/cli/admin/springer.py index 7c468d179682ee84ea3c5c333073a538a40b2444..dec3b88fdc0c4f71670124933d6757971dceebcc 100644 --- a/nomad/cli/admin/springer.py +++ b/nomad/cli/admin/springer.py @@ -23,17 +23,18 @@ http://materials.springer.com. The database is stuctured as space_group_number : normalized_formula : springer_id : entry """ -from typing import Dict, List, Any -import requests +import os.path import re -import bs4 import time -import os.path +from typing import Any + +import bs4 +import requests import nomad.archive.storage_v2 from nomad import archive -from nomad.config import config from nomad.archive import read_archive +from nomad.config import config required_items = { 'Alphabetic Formula:': 'alphabetic_formula', diff --git a/nomad/cli/admin/uploads.py b/nomad/cli/admin/uploads.py index 258e8affdbe03cb23a239b250ac28f59d5de3921..c55c64948f43df3568160bffa7438f1f9bc04783 100644 --- a/nomad/cli/admin/uploads.py +++ b/nomad/cli/admin/uploads.py @@ -20,12 +20,12 @@ import json import os import os.path import traceback -import typing import click from orjson import dumps from nomad.config import config + from .admin import admin @@ -35,7 +35,8 @@ def _run_parallel( import threading import time - from nomad import utils, processing as proc + from nomad import processing as proc + from nomad import utils if isinstance(uploads, tuple | list): uploads_count = len(uploads) @@ -238,11 +239,13 @@ def _query_uploads( list of upoad ids and further filter parameters. """ - from typing import Set, cast import json + from typing import cast + from mongoengine import Q - from nomad import infrastructure, processing as proc, search + from nomad import infrastructure, search + from nomad import processing as proc from nomad.app.v1 import models infrastructure.setup_mongo() @@ -352,13 +355,14 @@ def _query_uploads( @click.pass_context def export(ctx, uploads, required, output: str): import sys - from nomad.processing import Entry - from nomad.utils import get_logger - from nomad.files import UploadFiles - from nomad.archive import ArchiveQueryError, RequiredReader import time import zipfile + from nomad.archive import ArchiveQueryError, RequiredReader + from nomad.files import UploadFiles + from nomad.processing import Entry + from nomad.utils import get_logger + logger = get_logger(__name__) if not output: @@ -630,7 +634,8 @@ def index(ctx, uploads, parallel, transformer, skip_materials, print_progress): def delete_upload( upload, skip_es: bool = False, skip_files: bool = False, skip_mongo: bool = False ): - from nomad import search, files, utils, processing as proc + from nomad import files, search, utils + from nomad import processing as proc # delete elastic if not skip_es: @@ -783,7 +788,8 @@ def re_pack(ctx, uploads): def stop(ctx, uploads, entries: bool, kill: bool, no_celery: bool): import mongoengine - from nomad import utils, processing as proc + from nomad import processing as proc + from nomad import utils query, _ = _query_uploads(uploads, **ctx.obj.uploads_kwargs) @@ -912,9 +918,8 @@ def integrity( ): from nomad.app.v1.models import MetadataPagination, MetadataRequired from nomad.archive.storage_v2 import ArchiveWriter - from nomad.files import StagingUploadFiles, PublicUploadFiles - from nomad.processing import Entry - from nomad.processing import Upload + from nomad.files import PublicUploadFiles, StagingUploadFiles + from nomad.processing import Entry, Upload from nomad.search import search def search_params(upload_id: str): @@ -1301,8 +1306,8 @@ def export_bundle( def import_bundle( ctx, input_path, multi, settings, embargo_length, use_celery, ignore_errors ): - from nomad.bundles import BundleImporter from nomad import infrastructure + from nomad.bundles import BundleImporter for key, value in ctx.obj.uploads_kwargs.items(): if value: diff --git a/nomad/cli/admin/users.py b/nomad/cli/admin/users.py index 21253fb313395efbbc99604d35c864f22b40e3ed..7752903f2977a4f8dc0431260abfd215c72aa258 100644 --- a/nomad/cli/admin/users.py +++ b/nomad/cli/admin/users.py @@ -29,10 +29,10 @@ def users(): @users.command(help='Import users to keycloak from a JSON file.', name='import') @click.argument('PATH_TO_USERS_FILE', type=str, nargs=1) def import_command(path_to_users_file): - import json import datetime + import json - from nomad import infrastructure, datamodel, utils + from nomad import datamodel, infrastructure, utils with open(path_to_users_file) as f: users = json.load(f) diff --git a/nomad/cli/aflow.py b/nomad/cli/aflow.py index 6912ec39dde56d95bd5106ead7305b83d0efadcf..eec1729ad7319cf9e3cce68591a3270b9ee55de3 100644 --- a/nomad/cli/aflow.py +++ b/nomad/cli/aflow.py @@ -20,28 +20,27 @@ # code will fail. # TODO The metadata should not be set via API, but added to the uploads as nomad.json. -from typing import List -import requests +import io +import json +import os import re import subprocess -from urllib import parse as urllib_parse -import os import tarfile import threading import time -import typing -import io -import re import uuid -import json -import numpy as np +from urllib import parse as urllib_parse + import ase import bs4 import matid # pylint: disable=import-error +import numpy as np +import requests -from nomad import atomutils, client, processing as proc -from nomad.config import config +from nomad import atomutils, client +from nomad import processing as proc from nomad.client import api, upload_file +from nomad.config import config class DbUpdater: diff --git a/nomad/cli/cli.py b/nomad/cli/cli.py index 4120b883cc02d3b73789554071ff3c2fa4af150d..316293d1cbfc0fe6fd86009c60130b078289a95a 100644 --- a/nomad/cli/cli.py +++ b/nomad/cli/cli.py @@ -16,10 +16,11 @@ # limitations under the License. # -import click import logging import os +import click + from nomad import utils from nomad.config import config diff --git a/nomad/cli/client/integrationtests.py b/nomad/cli/client/integrationtests.py index 744cba59c894a99e755cf379fc887877e2ba088d..0f99ca2327b36d8faf2d049f1de3a4e23eed4393 100644 --- a/nomad/cli/client/integrationtests.py +++ b/nomad/cli/client/integrationtests.py @@ -21,9 +21,9 @@ A command that runs some example operations on a working nomad@FAIRDI installati as a final integration test. """ -import time -import os import json +import os +import time from nomad.client import api diff --git a/nomad/cli/dev.py b/nomad/cli/dev.py index e7ff602405fd1c7f177d26d301180a72ba10afdc..4491c228e9c25be42b5474982c0e95a14e4f861c 100644 --- a/nomad/cli/dev.py +++ b/nomad/cli/dev.py @@ -15,18 +15,18 @@ # limitations under the License. # -from typing import Tuple, Any -import sys import json import os -import click +import sys +from typing import Any +import click from pint import Unit from pint.errors import UndefinedUnitError from nomad.config import config -from nomad.config.models.plugins import ExampleUploadEntryPoint from nomad.metainfo.elasticsearch_extension import schema_separator + from .cli import cli @@ -81,8 +81,8 @@ def gui_qa(skip_tests: bool): @dev.command(help='Export an API model in JSON schema.') @click.argument('model') def api_model(model): - import json import importlib + import json def remove_null_types(data): """ @@ -124,11 +124,11 @@ def api_model(model): 'nomad.app.v1.models.graph.GraphRequest', 'nomad.app.v1.models.graph.GraphResponse', ]: + from nomad.app.v1.models.graph.graph_models import Graph from nomad.app.v1.models.graph.utils import ( generate_request_model, generate_response_model, ) - from nomad.app.v1.models.graph.graph_models import Graph sys.modules['nomad.app.v1.models.graph.utils'].ref_prefix = '#/definitions' sys.modules['nomad.app.v1.models.graph.utils'].graph_model_export = True @@ -189,8 +189,8 @@ def metainfo(): def _generate_search_quantities(): # Currently only quantities with "entry_type" are included. - from nomad.metainfo.elasticsearch_extension import entry_type, Elasticsearch from nomad.datamodel import EntryArchive + from nomad.metainfo.elasticsearch_extension import Elasticsearch, entry_type def to_dict(search_quantity, section=False, repeats=False): if section: @@ -350,8 +350,9 @@ def gui_config(): '--parser', help='Only updated the README of the given parsers subdirctory.' ) def update_parser_readmes(parser): - from glob import glob import re + from glob import glob + import yaml os.chdir(os.path.join(os.path.dirname(__file__), '../..')) @@ -490,8 +491,10 @@ def example_data(username: str): def _generate_units_json() -> tuple[Any, Any]: - from pint.converters import ScaleConverter from collections import defaultdict + + from pint.converters import ScaleConverter + from nomad.units import ureg # TODO: Check that all units are unambiguously defined, and that there are diff --git a/nomad/cli/parse.py b/nomad/cli/parse.py index 723796dfbac151b3372d61ca5592c47c59dcde89..275cdf80ce8c1dbd42f50c8589b2545148d48212 100644 --- a/nomad/cli/parse.py +++ b/nomad/cli/parse.py @@ -84,12 +84,12 @@ def _parse( save_plot_dir, ): import json - import sys import os + import sys + from nomad import utils from nomad.client import normalize_all, parse from nomad.datamodel.metainfo.plot import resolve_plot_references - from nomad import utils kwargs = dict( strict=not not_strict, diff --git a/nomad/client/api.py b/nomad/client/api.py index 414eb02a03ad0eb6c0cdf0aed8843cf3519d9c9c..fe85171217a799fc0f6a4d52e5959e0262580a26 100644 --- a/nomad/client/api.py +++ b/nomad/client/api.py @@ -16,9 +16,10 @@ # limitations under the License. # +import time + import requests from keycloak import KeycloakOpenID -import time from nomad.config import config diff --git a/nomad/client/archive.py b/nomad/client/archive.py index f68d6f88757dc1614259829dbee625f140542bc6..1c0f496e09df39b92e5ba41ec22620eda40df145 100644 --- a/nomad/client/archive.py +++ b/nomad/client/archive.py @@ -18,19 +18,19 @@ from __future__ import annotations import asyncio +import threading from asyncio import Semaphore from itertools import islice -from typing import Any, Union from time import monotonic -import threading +from typing import Any from click import progressbar -from httpx import Timeout, AsyncClient +from httpx import AsyncClient, Timeout from keycloak import KeycloakOpenID from nomad import metainfo as mi from nomad.config import config -from nomad.datamodel import EntryArchive, ClientContext +from nomad.datamodel import ClientContext, EntryArchive from nomad.utils import dict_to_dataframe @@ -413,7 +413,7 @@ class ArchiveQuery: ] results = await asyncio.gather(*tasks) - return [archive for result in results if result for archive in result] + return [archive for result in results if result for archive in result] # type: ignore async def _acquire( self, diff --git a/nomad/client/processing.py b/nomad/client/processing.py index 3445fc88711a48284f432cd15507ac0c93955adb..03d54ba78cd8bd8ec47892cd42d83a7ea02c711c 100644 --- a/nomad/client/processing.py +++ b/nomad/client/processing.py @@ -16,15 +16,14 @@ # limitations under the License. # -import os import io -import typing +import os import sys +import typing -from nomad import utils, datamodel +from nomad import datamodel, utils from nomad.config import config - from .api import Auth diff --git a/nomad/client/upload.py b/nomad/client/upload.py index e800b8da04781ad004150d26b8cc47042d5917e6..8b2c6f4c38d1e974cd1b72637338df1c1854cf6f 100644 --- a/nomad/client/upload.py +++ b/nomad/client/upload.py @@ -41,8 +41,8 @@ def upload_file( Returns: The upload_id if successful or None if not. """ - from nomad.processing import ProcessStatus from nomad.client import api + from nomad.processing import ProcessStatus if local_path: response = api.post( diff --git a/nomad/common.py b/nomad/common.py index ce0d536ff66afc9088d9e245b69b1567f6010d30..181c232cbe600ea4f0914ace29c5a710f7cb4825 100644 --- a/nomad/common.py +++ b/nomad/common.py @@ -24,11 +24,11 @@ source code without circular imports. import os import pkgutil import shutil -import zipfile import tarfile -from typing import Optional -from typing import Literal +import zipfile from tempfile import TemporaryDirectory +from typing import Literal + import httpx diff --git a/nomad/config/__init__.py b/nomad/config/__init__.py index 303d2c0d4c63ba26481c65728d8d667be1dd2e2f..b898ed97ee7ca4eba0232640f8ba57c041251b7f 100644 --- a/nomad/config/__init__.py +++ b/nomad/config/__init__.py @@ -37,7 +37,7 @@ import sys import yaml import logging import os.path -from typing import Dict, Any +from typing import Any from nomad.config.models.config import Config # use std python logger, since logging is not configured while loading configuration @@ -94,7 +94,7 @@ def _load_config_env() -> dict[str, Any]: # Some environment variables starting with NOMAD_ are unavoidable # in docker/kubernetes environments. We should ignore them here, # before they cause a warning later when the config is validated. - if all([not key.startswith(field) for field in Config.__fields__.keys()]): + if all([not key.startswith(field) for field in Config.model_fields.keys()]): continue add_deep(config_data, key, value) @@ -156,6 +156,6 @@ config = load_config() # Expose config fields under this module for backwards compatibility _module = sys.modules[__name__] -_fields = Config.__fields__ +_fields = Config.model_fields for field_name in _fields.keys(): setattr(_module, field_name, getattr(config, field_name)) diff --git a/nomad/config/models/common.py b/nomad/config/models/common.py index f091f34cf60fb03b5dde1fe12447935c70dd42bb..02f68c3814f991baaaf0f26089e97ef342817919 100644 --- a/nomad/config/models/common.py +++ b/nomad/config/models/common.py @@ -16,9 +16,9 @@ # limitations under the License. # -import logging -from typing import List, Dict, Tuple, Any, Optional, Union, cast, TypeVar -from pydantic import ConfigDict, model_validator, BaseModel, Field # pylint: disable=unused-import +from typing import Any, TypeVar, cast + +from pydantic import BaseModel, ConfigDict, Field, model_validator # noqa: F401 ConfigBaseModelBound = TypeVar('ConfigBaseModelBound', bound='ConfigBaseModel') diff --git a/nomad/config/models/config.py b/nomad/config/models/config.py index 2ec981ae5b53d69f9af968692f5499e2e23b53f3..385f0d8ba62b059589d96448b1e237d1b804b0c7 100644 --- a/nomad/config/models/config.py +++ b/nomad/config/models/config.py @@ -18,20 +18,12 @@ import logging import os -import sys import warnings from importlib.metadata import version -from typing import Any, Dict, List, Optional, Tuple, Union +from typing import Any import yaml -from pydantic import ( - BaseModel, - field_validator, - model_validator, - Field, - validator, - ConfigDict, -) +from pydantic import BaseModel, ConfigDict, Field, field_validator, model_validator try: __version__ = version('nomad-lab') @@ -41,15 +33,12 @@ except Exception: # noqa from importlib.metadata import entry_points +from nomad.common import get_package_path -from .common import ( - ConfigBaseModel, - Options, -) +from .common import ConfigBaseModel, Options from .north import NORTH from .plugins import EntryPointType, PluginPackage, Plugins from .ui import UI -from nomad.common import get_package_path warnings.filterwarnings('ignore', message='numpy.dtype size changed') warnings.filterwarnings('ignore', message='numpy.ufunc size changed') @@ -510,7 +499,7 @@ class Logtransfer(ConfigBaseModel): ) # Validators - _level = validator('level', allow_reuse=True)(normalize_loglevel) + _level = field_validator('level', mode='before')(normalize_loglevel) class Tests(ConfigBaseModel): diff --git a/nomad/config/models/north.py b/nomad/config/models/north.py index c99bf6c9fbe7841b20d4457fff7d0fb4d91b03e9..dced965c79bed434fd47dcecd6adca71631d2533 100644 --- a/nomad/config/models/north.py +++ b/nomad/config/models/north.py @@ -17,7 +17,6 @@ # from enum import Enum -from typing import Dict, List, Optional, Union from pydantic import BaseModel, Field diff --git a/nomad/config/models/plugins.py b/nomad/config/models/plugins.py index eeade496bca2cea5bfd59018510aa28a2d85d805..6ab41803294bc867da0327930d7c622ef6859fa4 100644 --- a/nomad/config/models/plugins.py +++ b/nomad/config/models/plugins.py @@ -16,15 +16,16 @@ # limitations under the License. # +import importlib import os -import sys import shutil +import sys from abc import ABCMeta, abstractmethod -import importlib -from typing import Optional, Dict, Union, List, Literal, cast, TYPE_CHECKING -from pydantic import model_validator, BaseModel, Field +from typing import TYPE_CHECKING, Literal, Union, cast + +from pydantic import BaseModel, Field, model_validator -from nomad.common import get_package_path, download_file, is_url, is_safe_relative_path +from nomad.common import download_file, get_package_path, is_safe_relative_path, is_url from .common import Options from .ui import App @@ -32,10 +33,11 @@ from .ui import App example_prefix = '__examples__' if TYPE_CHECKING: + from fastapi import FastAPI + from nomad.metainfo import SchemaPackage from nomad.normalizing import Normalizer as NormalizerBaseClass from nomad.parsing import Parser as ParserBaseClass - from fastapi import FastAPI class EntryPoint(BaseModel): @@ -762,8 +764,8 @@ def add_plugin(plugin: Schema) -> None: def remove_plugin(plugin) -> None: """Function for removing a plugin.""" from nomad.config import config - from nomad.metainfo.elasticsearch_extension import entry_type from nomad.metainfo import Package + from nomad.metainfo.elasticsearch_extension import entry_type # Remove from path try: diff --git a/nomad/config/models/ui.py b/nomad/config/models/ui.py index 88845aba8eaff855fdb7e5af268f9532a0637597..ed171ebd81925128e2e02dcf960a5d2483f900f3 100644 --- a/nomad/config/models/ui.py +++ b/nomad/config/models/ui.py @@ -17,18 +17,17 @@ # from enum import Enum -from typing import List, Dict, Union, Optional -from typing import Literal -from typing import Annotated -from pydantic import BaseModel, ConfigDict, model_validator, Field +from typing import Annotated, Literal, Union + +from pydantic import BaseModel, ConfigDict, Field, model_validator from .common import ( ConfigBaseModel, Options, - OptionsSingle, - OptionsMulti, - OptionsGlob, OptionsBase, + OptionsGlob, + OptionsMulti, + OptionsSingle, ) @@ -116,9 +115,10 @@ class UnitSystem(ConfigBaseModel): values = values.model_dump(exclude_none=True) """Adds SI defaults for dimensions that are missing a unit.""" units = values.get('units', {}) - from nomad.units import ureg from pint import UndefinedUnitError + from nomad.units import ureg + # Check that only supported dimensions and units are used for key in units.keys(): if key not in dimensions: diff --git a/nomad/datamodel/context.py b/nomad/datamodel/context.py index dca4c701c1bd5bd8615b059889505c7021f4682f..3c4529c903c1b6107159863d0976416c76390676 100644 --- a/nomad/datamodel/context.py +++ b/nomad/datamodel/context.py @@ -16,26 +16,19 @@ # limitations under the License. # -from typing import Dict, Any -from urllib.parse import urlsplit, urlunsplit -import re import os.path +import re +from urllib.parse import urlsplit, urlunsplit -import h5py import requests from nomad import utils from nomad.config import config -from nomad.datamodel.util import parse_path -from nomad.datamodel.datamodel import EntryMetadata -from nomad.metainfo import ( - Context as MetainfoContext, - MSection, - Quantity, - MetainfoReferenceError, - Package, -) from nomad.datamodel import EntryArchive +from nomad.datamodel.datamodel import EntryMetadata +from nomad.datamodel.util import parse_path +from nomad.metainfo import Context as MetainfoContext +from nomad.metainfo import MetainfoReferenceError, MSection, Package, Quantity class Context(MetainfoContext): diff --git a/nomad/datamodel/data.py b/nomad/datamodel/data.py index 36b55b2d65bd0802c0921e2c1292e4a238e15e02..ca029f896327fc8748978d8c9380f74d5ab4a41e 100644 --- a/nomad/datamodel/data.py +++ b/nomad/datamodel/data.py @@ -17,24 +17,23 @@ # import os.path +from typing import Any from cachetools import TTLCache, cached - -from typing import Dict, Any, Optional from pydantic import Field from nomad.config import config from nomad.metainfo.elasticsearch_extension import Elasticsearch, material_entry_type from nomad.metainfo.metainfo import ( + JSON, + Capitalized, Category, + Datetime, MCategory, MSection, Quantity, - Capitalized, - Section, - Datetime, Reference, - JSON, + Section, ) from nomad.metainfo.pydantic_extension import PydanticModel @@ -96,8 +95,8 @@ class EntryData(ArchiveSection): def normalize(self, archive, logger): super().normalize(archive, logger) - from nomad.datamodel.results import Results from nomad.datamodel import EntryArchive + from nomad.datamodel.results import Results # TODO entry_type should only be assigned if not already defined (done to pass eln test) if archive.metadata: diff --git a/nomad/datamodel/datamodel.py b/nomad/datamodel/datamodel.py index 2bf32ebd83c6091725c8048090d19d636020f335..3b048eec5182b6980d3b40a9dabe92e5194a4d80 100644 --- a/nomad/datamodel/datamodel.py +++ b/nomad/datamodel/datamodel.py @@ -18,39 +18,40 @@ """All generic entry metadata and related classes.""" -from typing import List, Any -from enum import Enum import os.path +from enum import Enum +from typing import Any import rfc3161ng from elasticsearch_dsl import analyzer, tokenizer from nomad import utils from nomad.datamodel.metainfo.common import FastAccess -from nomad.metainfo.mongoengine_extension import Mongo, MongoDocument -from nomad.metainfo.pydantic_extension import PydanticModel from nomad.metainfo.elasticsearch_extension import ( Elasticsearch, - material_entry_type, - entry_type as es_entry_type, create_searchable_quantity, + material_entry_type, ) -from .util import parse_path +from nomad.metainfo.elasticsearch_extension import entry_type as es_entry_type +from nomad.metainfo.mongoengine_extension import Mongo, MongoDocument +from nomad.metainfo.pydantic_extension import PydanticModel + from ..metainfo import ( + JSON, Bytes, - Package, + Datetime, Definition, - MSection, MCategory, + MEnum, + MSection, + Package, + Quantity, Section, SubSection, - Quantity, - MEnum, - Datetime, - JSON, ) from ..metainfo.data_type import m_str from ..metainfo.metainfo import Reference +from .util import parse_path # This is usually defined automatically when the first metainfo definition is evaluated, but # due to the next imports requiring the m_package already, this would be too late. @@ -723,6 +724,12 @@ class EntryMetadata(MSection): a_elasticsearch=Elasticsearch(), ) + nomad_distro_commit_url = Quantity( + type=str, + description='The NOMAD distro commit url used for the last processing', + categories=[MongoEntryMetadata], + a_elasticsearch=Elasticsearch(), + ) comment = Quantity( type=str, categories=[MongoEntryMetadata, EditableUserMetadata], diff --git a/nomad/datamodel/hdf5.py b/nomad/datamodel/hdf5.py index fa04a76f9465b26cfa6eb6f0271cd40e7f0cd396..daf56c68ad8fdd8de9750f7de76a07d8abff82f5 100644 --- a/nomad/datamodel/hdf5.py +++ b/nomad/datamodel/hdf5.py @@ -17,16 +17,16 @@ # from __future__ import annotations -from typing import Any -import h5py import re +from typing import Any +import h5py import numpy as np import pint from h5py import File -from nomad.metainfo.data_type import NonPrimitive from nomad.datamodel.metainfo.annotations import H5WebAnnotation +from nomad.metainfo.data_type import NonPrimitive from nomad.utils import get_logger LOGGER = get_logger(__name__) diff --git a/nomad/datamodel/metainfo/action.py b/nomad/datamodel/metainfo/action.py index a4c73d3de524f16e174415d663477c1bbde37261..8f219878de54194a6c1eb9040efbd3a7a4f8e184 100644 --- a/nomad/datamodel/metainfo/action.py +++ b/nomad/datamodel/metainfo/action.py @@ -16,7 +16,7 @@ # limitations under the License. # from nomad.datamodel.data import ArchiveSection -from nomad.metainfo import Quantity, Package +from nomad.metainfo import Package, Quantity m_package = Package() diff --git a/nomad/datamodel/metainfo/annotations.py b/nomad/datamodel/metainfo/annotations.py index 719b5e7c886f65e41e22d8f45f99852118b11aef..ed177b2541d635769a755fa10fc544f220765c29 100644 --- a/nomad/datamodel/metainfo/annotations.py +++ b/nomad/datamodel/metainfo/annotations.py @@ -16,18 +16,19 @@ # limitations under the License. # -from typing import List, Any, Union, Dict, Optional -from enum import Enum -from pydantic import field_validator, ConfigDict, Field, validator import re +from enum import Enum +from typing import Any +from pydantic import ConfigDict, Field, field_validator, model_validator from pydantic.main import BaseModel +from nomad.metainfo import AnnotationModel, Datetime, MEnum, Quantity, Reference from nomad.utils import strip -from nomad.metainfo import AnnotationModel, MEnum, Datetime, Reference, Quantity -from .plot import PlotlyError -from ..data import Query + from ...metainfo.data_type import Datatype +from ..data import Query +from .plot import PlotlyError class ELNComponentEnum(str, Enum): @@ -1140,6 +1141,69 @@ class SchemaAnnotation(AnnotationModel): ) +class Mapper(BaseModel): + """ + Specifications to map the contents from a source specified by mapper. If string, + will be a path to the data following the jmespath grammar + (see https://jmespath.org/specification.html) eg: + + 'length(.array.set.set)' + + If additional transformation is required to the data before assignment, one can + provide a tuple of function name and list of paths to the source data. The data are + resolved then passed to the function which should be implemented in the parser + class method. + + For example: + + ('get_eigenvalues_energies', + [ + '.array.set.set[].set[].r', + 'length(.array.set.set)', + 'length(.array.set.set[0].set)' + ] + ) + """ + + mapper: str | tuple[str, list[str]] | tuple[str, list[str], dict[str, Any]] = Field( + '', + description="""Mapper from dictionary to archive property either as path""" + """ or Tuple of name of transformer function and list of paths to be resolved""" + """ as argument to the function.""", + ) + remove: bool = Field(None, description="""Removes data from source.""") + cache: bool = Field(None, description="""Store value.""") + path_parser: str = Field( + 'jmespath', description="""Name of the parser for paths.""" + ) + unit: str = Field(None, description="""Pint unit to be applied to value.""") + indices: str = Field( + None, description="""Name of function to evaluate indices to include in data""" + ) + search: str = Field(None, description="""Path to search on value.""") + + +class MappingAnnotation(AnnotationModel): + """ + Annotation model used with mapping parser. + + class MySection(MSection): + + m_def = Section(a_mapping={'hdf5': {mapper: 'data'}}) + """ + + @model_validator(mode='before') + def validate_mapper(cls, values): + for name, value in values.items(): + if name in cls.model_fields: + continue + values[name] = Mapper.model_validate(value) + return values + + class Config: + extra = 'allow' + + AnnotationModel.m_registry['eln'] = ELNAnnotation AnnotationModel.m_registry['browser'] = BrowserAnnotation AnnotationModel.m_registry['tabular_parser'] = TabularParserAnnotation @@ -1148,3 +1212,4 @@ AnnotationModel.m_registry['hdf5'] = HDF5Annotation AnnotationModel.m_registry['plot'] = PlotAnnotation AnnotationModel.m_registry['h5web'] = H5WebAnnotation AnnotationModel.m_registry['schema'] = SchemaAnnotation +AnnotationModel.m_registry['mapping'] = MappingAnnotation diff --git a/nomad/datamodel/metainfo/basesections/v1.py b/nomad/datamodel/metainfo/basesections/v1.py index 5604e971d4830f0ca9911bf7a3d27c01c8e3a6ec..eacde4ab5eab8fb1ba55d743ad57606d1ef5b009 100644 --- a/nomad/datamodel/metainfo/basesections/v1.py +++ b/nomad/datamodel/metainfo/basesections/v1.py @@ -20,8 +20,8 @@ import os import random import re import time -from typing import TYPE_CHECKING, Dict, List from collections.abc import Iterable +from typing import TYPE_CHECKING import h5py import numpy as np @@ -29,22 +29,17 @@ import requests from ase.data import atomic_masses, atomic_numbers, chemical_symbols from unidecode import unidecode -from nomad.metainfo import SchemaPackage from nomad.datamodel.metainfo.workflow import Link, Task, TaskReference, Workflow +from nomad.metainfo import SchemaPackage from nomad.metainfo.data_type import m_str if TYPE_CHECKING: - from structlog.stdlib import ( - BoundLogger, - ) + from structlog.stdlib import BoundLogger from nomad import utils from nomad.atomutils import Formula from nomad.datamodel.data import ArchiveSection, EntryData -from nomad.datamodel.metainfo.annotations import ( - ELNAnnotation, - HDF5Annotation, -) +from nomad.datamodel.metainfo.annotations import ELNAnnotation, HDF5Annotation from nomad.datamodel.results import ELN, Material, Results from nomad.datamodel.results import ElementalComposition as ResultsElementalComposition from nomad.datamodel.util import create_custom_mapping @@ -233,12 +228,12 @@ class BaseSection(ArchiveSection): def normalize(self, archive, logger: 'BoundLogger') -> None: """ - The normalizer for the `BaseSection` class. - - Args: - archive (EntryArchive): The archive containing the section that is being - normalized. - logger ('BoundLogger'): A structlog logger. + - If the instance is of type `EntryData`, it sets the archive's entry name based on the instance's name. + - Sets the `datetime` field to the current time if it is not already set. + - Manages the `lab_id` field and updates the archive's `results.eln.lab_ids` list. + - Adds the instance's `name` and `description` to the archive's `results.eln.names` and `results.eln.descriptions` lists, respectively. + - Handles the `tags` attribute, if present, and updates the archive's `results.eln.tags` list. + - Appends the section's name to the archive's `results.eln.sections` list. """ super().normalize(archive, logger) @@ -385,12 +380,8 @@ class Activity(BaseSection): def normalize(self, archive, logger: 'BoundLogger') -> None: """ - The normalizer for the `Activity` class. - - Args: - archive (EntryArchive): The archive containing the section that is being - normalized. - logger ('BoundLogger'): A structlog logger. + - Ensures the `results.eln.methods` list is initialized and appends the method or section name. + - Converts each step in `self.steps` to a task, using the steps `to_task()` method, and assigns it to `archive.workflow2.tasks`. """ super().normalize(archive, logger) @@ -450,13 +441,7 @@ class EntityReference(SectionReference): def normalize(self, archive, logger: 'BoundLogger') -> None: """ - The normalizer for the `EntityReference` class. Will attempt to fill the `reference` from the `lab_id` or vice versa. - - Args: - archive (EntryArchive): The archive containing the section that is being - normalized. - logger ('BoundLogger'): A structlog logger. """ super().normalize(archive, logger) if self.reference is None and self.lab_id is not None: @@ -513,15 +498,9 @@ class ExperimentStep(ActivityStep): def normalize(self, archive, logger: 'BoundLogger') -> None: """ - The normalizer for the `ExperimentStep` class. Will attempt to fill the `activity` from the `lab_id` or vice versa. If the activity reference is filled but the start time is not the time will be taken from the `datetime` property of the referenced activity. - - Args: - archive (EntryArchive): The archive containing the section that is being - normalized. - logger ('BoundLogger'): A structlog logger. """ super().normalize(archive, logger) if self.activity is None and self.lab_id is not None: @@ -618,17 +597,12 @@ class ElementalComposition(ArchiveSection): def normalize(self, archive, logger: 'BoundLogger') -> None: """ - The normalizer for the `ElementalComposition` class. Will add a + Will add a results.material subsection if none exists. Will append the element to the elements property of that subsection and a nomad.datamodel.results.ElementalComposition instances to the elemental_composition property using the element and atomic fraction from this section. - - Args: - archive (EntryArchive): The archive containing the section that is being - normalized. - logger ('BoundLogger'): A structlog logger. """ super().normalize(archive, logger) @@ -740,13 +714,8 @@ class System(Entity): def normalize(self, archive, logger: 'BoundLogger') -> None: """ - The normalizer for the `System` class. Will attempt to fill mass fractions or + Will attempt to fill mass fractions or atomic fractions if left blank. - - Args: - archive (EntryArchive): The archive containing the section that is being - normalized. - logger ('BoundLogger'): A structlog logger. """ super().normalize(archive, logger) @@ -761,12 +730,7 @@ class Instrument(Entity): def normalize(self, archive, logger: 'BoundLogger') -> None: """ - The normalizer for the `Instrument` class. - - Args: - archive (EntryArchive): The archive containing the section that is being - normalized. - logger ('BoundLogger'): A structlog logger. + Adds the name of the instrument to the `results.eln.instruments` list. """ super().normalize(archive, logger) @@ -827,14 +791,9 @@ class SystemComponent(Component): def normalize(self, archive, logger: 'BoundLogger') -> None: """ - The normalizer for the `SystemComponent` class. If none is set, the normalizer + If none is set, the normalizer will set the name of the component to be that of the referenced system if it has one. - - Args: - archive (EntryArchive): The archive containing the section that is being - normalized. - logger ('BoundLogger'): A structlog logger. """ super().normalize(archive, logger) if self.name is None and self.system is not None: @@ -950,14 +909,9 @@ class PureSubstanceComponent(Component): def normalize(self, archive, logger: 'BoundLogger') -> None: """ - The normalizer for the `PureSubstanceComponent` class. If none is set, the + If none is set, the normalizer will set the name of the component to be the molecular formula of the substance. - - Args: - archive (EntryArchive): The archive containing the section that is being - normalized. - logger ('BoundLogger'): A structlog logger. """ super().normalize(archive, logger) if self.substance_name and self.pure_substance is None: @@ -1076,18 +1030,13 @@ class CompositeSystem(System): def normalize(self, archive, logger: 'BoundLogger') -> None: """ - The normalizer for the `CompositeSystem` class. If the elemental composition list is + If the elemental composition list is empty, the normalizer will iterate over the components and extract all the elements for populating the elemental composition list. If masses are provided for all components and the elemental composition of all components contain atomic fractions the normalizer will also calculate the atomic fractions for the composite system. The populated elemental composition list is added to the results by the normalizer in the `System` super class. - - Args: - archive (EntryArchive): The archive containing the section that is being - normalized. - logger ('BoundLogger'): A structlog logger. """ if logger is None: logger = utils.get_logger(__name__) @@ -1229,12 +1178,9 @@ class Process(Activity): def normalize(self, archive, logger: 'BoundLogger') -> None: """ - The normalizer for the `Process` class. - - Args: - archive (EntryArchive): The archive containing the section that is being - normalized. - logger ('BoundLogger'): A structlog logger. + - Sets the start time for each step in `self.steps` if not already set, based on the `datetime` and `duration` fields. + - Sets the `end_time` field to the calculated end time if it is not already set. + - Updates the `archive.workflow2.outputs` list with links to the samples processed. """ super().normalize(archive, logger) if ( @@ -1297,12 +1243,8 @@ class Analysis(Activity): def normalize(self, archive, logger: 'BoundLogger') -> None: """ - The normalizer for the `Analysis` section. - - Args: - archive (EntryArchive): The archive containing the section that is being - normalized. - logger ('BoundLogger'): A structlog logger. + - Updates the `archive.workflow2.inputs` list with links to the input data. + - Updates the `archive.workflow2.outputs` list with links to the output data. """ super().normalize(archive, logger) archive.workflow2.inputs = [ @@ -1364,12 +1306,8 @@ class Measurement(Activity): def normalize(self, archive, logger: 'BoundLogger') -> None: """ - The normalizer for the `Measurement` section. - - Args: - archive (EntryArchive): The archive containing the section that is being - normalized. - logger ('BoundLogger'): A structlog logger. + - Updates the `archive.workflow2.inputs` list with links to the input samples. + - Updates the `archive.workflow2.outputs` list with links to the measurement results. """ super().normalize(archive, logger) archive.workflow2.inputs = [ @@ -1420,13 +1358,8 @@ class PureSubstance(System): def normalize(self, archive, logger: 'BoundLogger') -> None: """ - The normalizer method for the `Substance` class. This method will populate the results.material section and the elemental composition sub section using the molecular formula. - - Args: - archive (EntryArchive): The archive that is being normalized. - logger ('BoundLogger'): A structlog logger. """ super().normalize(archive, logger) if logger is None: @@ -1625,11 +1558,11 @@ class PubChemPureSubstanceSection(PureSubstanceSection): def normalize(self, archive, logger: 'BoundLogger') -> None: """ - The normalizer method for the `PubChemSubstanceSection` class. This method will attempt to get data on the substance instance from the PubChem PUG REST API: https://pubchem.ncbi.nlm.nih.gov/docs/pug-rest If a PubChem CID is specified the details are retrieved directly. Otherwise a search query is made for the filled attributes in the following order: + 1. `smile` 2. `canonical_smile` 3. `inchi_key` @@ -1637,10 +1570,6 @@ class PubChemPureSubstanceSection(PureSubstanceSection): 5. `name` 6. `molecular_formula` 7. `cas_number` - - Args: - archive (EntryArchive): The archive that is being normalized. - logger ('BoundLogger'): A structlog logger. """ if logger is None: logger = utils.get_logger(__name__) @@ -1881,7 +1810,6 @@ class CASPureSubstanceSection(PureSubstanceSection): def normalize(self, archive, logger: 'BoundLogger') -> None: """ - The normalizer method for the `CASPureSubstanceSection` class. This method will attempt to get data on the pure substance instance from the CAS API: https://commonchemistry.cas.org/api-overview If a CAS number is specified the details are retrieved directly. @@ -1893,10 +1821,6 @@ class CASPureSubstanceSection(PureSubstanceSection): 4. `smile` 5. `canonical_smile` 6. `name` - - Args: - archive (EntryArchive): The archive that is being normalized. - logger ('BoundLogger'): A structlog logger. """ if logger is None: logger = utils.get_logger(__name__) @@ -1968,23 +1892,19 @@ class ReadableIdentifiers(ArchiveSection): def normalize(self, archive, logger: 'BoundLogger') -> None: """ - The normalizer for the `ReadableIdentifiers` class. If owner is not filled the field will be filled by the first two letters of the first name joined with the first two letters of the last name of the author. If the institute is not filled a institute abreviations will be constructed from the author's affiliation. + If no datetime is filled, the datetime will be taken from the `datetime` property of the parent, if it exists, otherwise the current date and time will be used. + If no short name is filled, the name will be taken from the parent name, if it exists, otherwise it will be taken from the archive metadata entry name, if it exists, and finally if no other options are available it will use the name of the mainfile. - - Args: - archive (EntryArchive): The archive containing the section that is being - normalized. - logger ('BoundLogger'): A structlog logger. """ super().normalize(archive, logger) @@ -2106,16 +2026,15 @@ class PublicationReference(ArchiveSection): def normalize(self, archive, logger: 'BoundLogger') -> None: """ - The normalizer for the `PublicationReference` class. - - Args: - archive (EntryArchive): The archive containing the section that is being - normalized. - logger ('BoundLogger'): A structlog logger. + - If a DOI number is provided, retrieves publication details from the CrossRef API. + - Populates the `publication_authors`, `journal`, `publication_title`, and `publication_date` fields based on the CrossRef response. + - Ensures the DOI number has the prefix `https://doi.org/`. + - Updates the archive's metadata references with the DOI number if it is not already present. """ super().normalize(archive, logger) import dateutil.parser import requests + from nomad.datamodel.datamodel import EntryMetadata # Parse journal name, lead author and publication date from crossref diff --git a/nomad/datamodel/metainfo/basesections/v2.py b/nomad/datamodel/metainfo/basesections/v2.py index b2b442bbb7fcc6fe4d48dd55e61ad9a30eaf7222..1ad7e401f79d68d49a8ba972424a624f9e427b15 100644 --- a/nomad/datamodel/metainfo/basesections/v2.py +++ b/nomad/datamodel/metainfo/basesections/v2.py @@ -15,72 +15,39 @@ # See the License for the specific language governing permissions and # limitations under the License. # +import datetime import os -from typing import TYPE_CHECKING -from collections.abc import Iterable import random -import time -import datetime import re -from typing import ( - Dict, - List, -) +import time +from collections.abc import Iterable +from typing import TYPE_CHECKING -from unidecode import unidecode -import numpy as np import h5py -from ase.data import ( - chemical_symbols, - atomic_numbers, - atomic_masses, -) +import numpy as np import requests +from ase.data import atomic_masses, atomic_numbers, chemical_symbols +from unidecode import unidecode -from nomad.datamodel.metainfo.workflow import Link, Task, TaskReference, Workflow +from nomad.datamodel.metainfo.workflow import Link, Task, Workflow from nomad.metainfo.data_type import m_str if TYPE_CHECKING: - from structlog.stdlib import ( - BoundLogger, - ) -from nomad.atomutils import ( - Formula, -) -from nomad import ( - utils, -) -from nomad.units import ( - ureg, -) -from nomad.metainfo import ( - Quantity, - Datetime, - Reference, - Section, - SectionProxy, - SubSection, -) -from nomad.metainfo.util import MEnum -from nomad.datamodel.util import create_custom_mapping -from nomad.datamodel.data import ( - ArchiveSection, - EntryData, -) -from nomad.datamodel.results import ( - Results, - ELN, - ElementalComposition as ResultsElementalComposition, - Material, -) + from structlog.stdlib import BoundLogger +from nomad import utils +from nomad.datamodel.data import ArchiveSection from nomad.datamodel.metainfo.annotations import ( ELNAnnotation, - ELNComponentEnum, Filter, - SectionProperties, HDF5Annotation, + SectionProperties, ) - +from nomad.datamodel.results import ELN, Material, Results +from nomad.datamodel.results import ElementalComposition as ResultsElementalComposition +from nomad.datamodel.util import create_custom_mapping +from nomad.metainfo import Datetime, Quantity, Section, SectionProxy, SubSection +from nomad.metainfo.util import MEnum +from nomad.units import ureg PUB_CHEM_PUG_PATH = 'https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound' CAS_API_PATH = 'https://commonchemistry.cas.org/api' @@ -291,7 +258,7 @@ class ActivityStep(ArchiveSection): """, a_eln=ELNAnnotation(component='DateTimeEditQuantity', label='starting time'), ) - comment = Quantity( + description = Quantity( type=str, description=""" Any additional information about the step not captured by the other fields. @@ -418,7 +385,7 @@ class EntityReference(SectionReference): """ super().normalize(archive, logger) if self.reference is None and self.lab_id is not None: - from nomad.search import search, MetadataPagination + from nomad.search import MetadataPagination, search query = {'results.eln.lab_ids': self.lab_id} search_result = search( @@ -449,6 +416,16 @@ class ExperimentStep(ActivityStep): Any dependant step of an `Experiment`. """ + lab_id = Quantity( + type=str, + description=""" + The readable identifier for the activity. + """, + a_eln=ELNAnnotation( + component='StringEditQuantity', + label='activity ID', + ), + ) activity = Quantity( type=Activity, description=""" @@ -458,66 +435,48 @@ class ExperimentStep(ActivityStep): component='ReferenceEditQuantity', ), ) - lab_id = Quantity( - type=str, + + +class NestedExperimentStep(ExperimentStep): + """ + A step of an Experiment. + + This class is a wrapper for the `Activity` class and is used to describe + the metadata of an activity when it is a step of another, larger, experiment. + + The `Activity` class instance can be instantiated in the `activity` property + as a nested subsection. + + A normalizer will create a link in the activity property inherited from + the ExperimentStep class. + + """ + + m_def = Section( + a_eln=ELNAnnotation( + properties=SectionProperties( + visible=Filter( + exclude=[ + 'activity', + ], + ), + ) + ) + ) + + nested_activity = SubSection( + section_def=Activity, description=""" - The readable identifier for the activity. + Section describing the activity that is the step on an experiment. """, - a_eln=ELNAnnotation( - component='StringEditQuantity', - label='activity ID', - ), + label='activity', ) def normalize(self, archive, logger: 'BoundLogger') -> None: - """ - The normalizer for the `ExperimentStep` class. - Will attempt to fill the `activity` from the `lab_id` or vice versa. - If the activity reference is filled but the start time is not the time will be - taken from the `datetime` property of the referenced activity. - - Args: - archive (EntryArchive): The archive containing the section that is being - normalized. - logger ('BoundLogger'): A structlog logger. - """ super().normalize(archive, logger) - if self.activity is None and self.lab_id is not None: - from nomad.search import search, MetadataPagination - - query = {'results.eln.lab_ids': self.lab_id} - search_result = search( - owner='all', - query=query, - pagination=MetadataPagination(page_size=1), - user_id=archive.metadata.main_author.user_id, - ) - if search_result.pagination.total > 0: - entry_id = search_result.data[0]['entry_id'] - upload_id = search_result.data[0]['upload_id'] - self.activity = f'../uploads/{upload_id}/archive/{entry_id}#data' - if search_result.pagination.total > 1: - logger.warn( - f'Found {search_result.pagination.total} entries with lab_id: ' - f'"{self.lab_id}". Will use the first one found.' - ) - else: - logger.warn(f'Found no entries with lab_id: "{self.lab_id}".') - elif self.lab_id is None and self.activity is not None: - self.lab_id = self.activity.lab_id - if self.name is None and self.lab_id is not None: - self.name = self.lab_id - if ( - self.activity is not None - and self.start_time is None - and self.activity.datetime - ): - self.start_time = self.activity.datetime - def to_task(self) -> Task: - if self.activity is None: - return Task(name=self.name) - return TaskReference(task=self.activity.m_parent.workflow2) + if self.nested_activity: + self.activity = self.nested_activity class Experiment(Activity): @@ -525,8 +484,13 @@ class Experiment(Activity): A section for grouping activities together into an experiment. """ - steps = Activity.steps.m_copy() - steps.section_def = ExperimentStep + steps = SubSection( + section_def=ExperimentStep, + description=""" + An ordered list of all the dependant steps that make up this experiment. + """, + repeats=True, + ) class Collection(Entity): @@ -1608,10 +1572,11 @@ class PublicationReference(ArchiveSection): logger ('BoundLogger'): A structlog logger. """ super().normalize(archive, logger) - from nomad.datamodel.datamodel import EntryMetadata import dateutil.parser import requests + from nomad.datamodel.datamodel import EntryMetadata + # Parse journal name, lead author and publication date from crossref if self.DOI_number: try: diff --git a/nomad/datamodel/metainfo/common.py b/nomad/datamodel/metainfo/common.py index 14bc1024ec90ec024b8bb863729254b268c6dbfa..beeb755d6d910afe3250dbe525b763c6ed54a4b2 100644 --- a/nomad/datamodel/metainfo/common.py +++ b/nomad/datamodel/metainfo/common.py @@ -16,8 +16,7 @@ # limitations under the License. # -from nomad.metainfo import MCategory, Category -from nomad.metainfo import MSection, Section, SubSection, Quantity +from nomad.metainfo import Category, MCategory, MSection, Quantity, Section, SubSection from nomad.metainfo.elasticsearch_extension import Elasticsearch, material_entry_type diff --git a/nomad/datamodel/metainfo/downloads.py b/nomad/datamodel/metainfo/downloads.py index 9ff64389d79966909ddf158a38a2cc249d61f781..8ada69e13bc3735549b3955517b4a5ccd762af1e 100644 --- a/nomad/datamodel/metainfo/downloads.py +++ b/nomad/datamodel/metainfo/downloads.py @@ -18,10 +18,8 @@ import os.path -from nomad.metainfo import MSection, Package, Quantity, SubSection - from nomad.datamodel.data import ArchiveSection - +from nomad.metainfo import MSection, Package, Quantity, SubSection m_package = Package(name='downloads') @@ -137,7 +135,8 @@ class Downloads(ArchiveSection): import pathlib import urllib.request - from nomad.common import get_compression_format, extract_file + + from nomad.common import extract_file, get_compression_format # download and extract files skip_download = True diff --git a/nomad/datamodel/metainfo/eln/__init__.py b/nomad/datamodel/metainfo/eln/__init__.py index 319780330ac46b02638b985932d49f33e2b26f12..e9faca60422ce4a611331e40ec44327e5dccefbd 100644 --- a/nomad/datamodel/metainfo/eln/__init__.py +++ b/nomad/datamodel/metainfo/eln/__init__.py @@ -18,7 +18,7 @@ import datetime import re -from typing import TYPE_CHECKING, Any, Dict, List +from typing import TYPE_CHECKING, Any import numpy as np from unidecode import unidecode diff --git a/nomad/datamodel/metainfo/measurements.py b/nomad/datamodel/metainfo/measurements.py index c10787470caf5c12a1fde04e29fd44a6daa80698..a6bb1fca5cf666c4c9699d8b15d3ec1228248011 100644 --- a/nomad/datamodel/metainfo/measurements.py +++ b/nomad/datamodel/metainfo/measurements.py @@ -18,10 +18,9 @@ import numpy as np -from nomad.metainfo import MSection, Package, Quantity, SubSection, Datetime -from nomad.metainfo.metainfo import Reference, SectionProxy from nomad.datamodel import Author - +from nomad.metainfo import Datetime, MSection, Package, Quantity, SubSection +from nomad.metainfo.metainfo import Reference, SectionProxy m_package = Package(name='measurements') diff --git a/nomad/datamodel/metainfo/plot.py b/nomad/datamodel/metainfo/plot.py index 3618263f8e78b5f19f434f6af17791de836de6a4..caaf30037f341d1d9038009d2d75094d12a85b2a 100644 --- a/nomad/datamodel/metainfo/plot.py +++ b/nomad/datamodel/metainfo/plot.py @@ -15,14 +15,16 @@ # See the License for the specific language governing permissions and # limitations under the License. # -from nomad.datamodel.data import ArchiveSection -from nomad.metainfo import Quantity, SubSection, Package, MSection, JSON, Section +from copy import deepcopy +from datetime import datetime + +import numpy as np import plotly.express as px import plotly.graph_objs as go from plotly.subplots import make_subplots -import numpy as np -from copy import deepcopy -from datetime import datetime + +from nomad.datamodel.data import ArchiveSection +from nomad.metainfo import JSON, MSection, Package, Quantity, Section, SubSection class PlotlyError(Exception): diff --git a/nomad/datamodel/metainfo/simulation/calculation.py b/nomad/datamodel/metainfo/simulation/calculation.py index 5f017bf063cd11eed29826612a37d24838c4019a..a4ecc9095459db3682c480b3cfcd6b9693620f14 100644 --- a/nomad/datamodel/metainfo/simulation/calculation.py +++ b/nomad/datamodel/metainfo/simulation/calculation.py @@ -20,28 +20,28 @@ # Only for purpose of compatibility. Use run schema plugin. # https://github.com/nomad-coe/nomad-schema-plugin-run.git -import numpy as np # pylint: disable=unused-import -from nomad.metainfo import ( # pylint: disable=unused-import - MSection, - MCategory, +import numpy as np # noqa: F401 + +from nomad.datamodel.data import ArchiveSection +from nomad.datamodel.metainfo.common import PropertySection, ProvenanceTracker +from nomad.datamodel.metainfo.simulation.method import HoppingMatrix, Method +from nomad.datamodel.metainfo.simulation.system import AtomsGroup, System +from nomad.metainfo import ( # noqa: F401 Category, + MCategory, + MEnum, + MSection, Package, Quantity, + Reference, Section, - SubSection, SectionProxy, - Reference, - MEnum, + SubSection, derived, ) -from nomad.datamodel.metainfo.common import ProvenanceTracker, PropertySection -from nomad.datamodel.metainfo.simulation.system import System, AtomsGroup -from nomad.datamodel.metainfo.simulation.method import Method, HoppingMatrix -from nomad.datamodel.data import ArchiveSection from ..common import FastAccess - m_package = Package() diff --git a/nomad/datamodel/metainfo/simulation/legacy_workflows.py b/nomad/datamodel/metainfo/simulation/legacy_workflows.py index 52a62e4a1f4833c630a4ef2202e7a739863088ac..7a4eef3e5d887f275b911570155e4d03d2b81b4c 100644 --- a/nomad/datamodel/metainfo/simulation/legacy_workflows.py +++ b/nomad/datamodel/metainfo/simulation/legacy_workflows.py @@ -22,26 +22,26 @@ import numpy as np from nptyping import NDArray + +from nomad.datamodel.metainfo.common import FastAccess +from nomad.datamodel.metainfo.simulation.calculation import ( + BandStructure, + Calculation, + Dos, +) +from nomad.datamodel.metainfo.simulation.run import Run +from nomad.datamodel.metainfo.simulation.system import Atoms, AtomsGroup, System from nomad.metainfo import ( - MSection, MEnum, + MSection, + Package, Quantity, + Reference, Section, - SubSection, SectionProxy, - Reference, - Package, + SubSection, derived, ) -from nomad.datamodel.metainfo.simulation.calculation import ( - Calculation, - Dos, - BandStructure, -) -from nomad.datamodel.metainfo.simulation.run import Run -from nomad.datamodel.metainfo.simulation.system import System, Atoms, AtomsGroup -from nomad.datamodel.metainfo.common import FastAccess - m_package = Package() diff --git a/nomad/datamodel/metainfo/simulation/method.py b/nomad/datamodel/metainfo/simulation/method.py index 3cb23695ca1cfda7c8d102f097550f9e697ffe5f..0d88668563fd6c5dcbd90c76a15f92d0e65381d9 100644 --- a/nomad/datamodel/metainfo/simulation/method.py +++ b/nomad/datamodel/metainfo/simulation/method.py @@ -20,28 +20,28 @@ # Only for purpose of compatibility. Use run schema plugin. # https://github.com/nomad-coe/nomad-schema-plugin-run.git -from logging import Logger -import numpy as np # pylint: disable=unused-import import typing +from logging import Logger + +import numpy as np # noqa: F401 +from pint.util import SharedRegistryObject # noqa: F401 -from pint.util import SharedRegistryObject # pylint: disable=unused-import from nomad.datamodel.data import ArchiveSection -from nomad.metainfo import ( # pylint: disable=unused-import - MSection, - MCategory, +from nomad.metainfo import ( # noqa: F401 Category, + MCategory, + MEnum, + MSection, Package, Quantity, + Reference, Section, - SubSection, SectionProxy, - Reference, - MEnum, + SubSection, ) -from nomad.metainfo.metainfo import derived from nomad.quantum_states import RussellSaundersState -from ..common import FastAccess +from ..common import FastAccess m_package = Package() diff --git a/nomad/datamodel/metainfo/simulation/run.py b/nomad/datamodel/metainfo/simulation/run.py index 99dab5d713c0d0f2740b665e81c4d2fab70c14f6..2dd64bf26696ef6d4b7450071f435851a39a8eed 100644 --- a/nomad/datamodel/metainfo/simulation/run.py +++ b/nomad/datamodel/metainfo/simulation/run.py @@ -20,24 +20,24 @@ # Only for purpose of compatibility. Use run schema plugin. # https://github.com/nomad-coe/nomad-schema-plugin-run.git -import numpy as np # pylint: disable=unused-import +import numpy as np # noqa: F401 -from nomad.metainfo import ( # pylint: disable=unused-import - MSection, - MCategory, +from nomad.datamodel.data import ArchiveSection +from nomad.datamodel.metainfo.common import FastAccess +from nomad.datamodel.metainfo.simulation.calculation import Calculation +from nomad.datamodel.metainfo.simulation.method import Method +from nomad.datamodel.metainfo.simulation.system import System +from nomad.metainfo import ( # noqa: F401 Category, + MCategory, + MSection, Package, Quantity, + Reference, Section, - SubSection, SectionProxy, - Reference, + SubSection, ) -from nomad.datamodel.metainfo.simulation.method import Method -from nomad.datamodel.metainfo.simulation.system import System -from nomad.datamodel.metainfo.simulation.calculation import Calculation -from nomad.datamodel.metainfo.common import FastAccess -from nomad.datamodel.data import ArchiveSection m_package = Package() diff --git a/nomad/datamodel/metainfo/simulation/system.py b/nomad/datamodel/metainfo/simulation/system.py index e3bcc8d7862d1d51efe89a2957048467a34f53a6..c5645aebbf0f6aa5dd19826b6213f13a52f88019 100644 --- a/nomad/datamodel/metainfo/simulation/system.py +++ b/nomad/datamodel/metainfo/simulation/system.py @@ -20,26 +20,28 @@ # Only for purpose of compatibility. Use run schema plugin. # https://github.com/nomad-coe/nomad-schema-plugin-run.git -import numpy as np # pylint: disable=unused-import -import typing # pylint: disable=unused-import -from nomad.metainfo import ( # pylint: disable=unused-import - MSection, - MCategory, +import typing # noqa: F401 + +import numpy as np # noqa: F401 + +from nomad.datamodel.data import ArchiveSection +from nomad.metainfo import ( # noqa: F401 Category, + MCategory, + MEnum, + MSection, Package, Quantity, + Reference, Section, - SubSection, SectionProxy, - Reference, - MEnum, + SubSection, derived, ) -from nomad.datamodel.data import ArchiveSection from nomad.metainfo.data_type import m_float64 +from nomad.units import ureg from ..common import FastAccess -from nomad.units import ureg m_package = Package() diff --git a/nomad/datamodel/metainfo/simulation/workflow.py b/nomad/datamodel/metainfo/simulation/workflow.py index 36374441b67d139e489cc44509c4381fa312271f..aa77ddb2d01bccb48645324e4d9986c897058ba3 100644 --- a/nomad/datamodel/metainfo/simulation/workflow.py +++ b/nomad/datamodel/metainfo/simulation/workflow.py @@ -20,7 +20,6 @@ # Only for purpose of compatibility. Use simulation workflow schema plugin. # https://github.com/nomad-coe/nomad-schema-plugin-simulation-workflow.git -from typing import List import numpy as np from ase import Atoms from ase.eos import EquationOfState as aseEOS @@ -28,45 +27,48 @@ from nptyping import NDArray from nomad.atomutils import get_volume from nomad.datamodel.data import ArchiveSection -from nomad.units import ureg -from nomad.metainfo import ( - MSection, - SubSection, - Section, - Quantity, - MEnum, - Reference, - Package, - derived, -) from nomad.datamodel.metainfo.common import FastAccess -from nomad.datamodel.metainfo.workflow import Workflow, Link, Task -from nomad.datamodel.metainfo.simulation.system import System, AtomsGroup -from nomad.datamodel.metainfo.simulation.method import ( - Method, - XCFunctional, - BasisSetContainer, - GW as GWMethodology, - TB as TBMethodology, - DMFT as DMFTMethodology, - BSE as BSEMethodology, -) from nomad.datamodel.metainfo.simulation.calculation import ( - Calculation, + BandEnergies, BandGap, - Dos, BandStructure, - BandEnergies, + Calculation, Density, - Potential, - Spectra, + Dos, ElectronicStructureProvenance, + EnergyEntry, GreensFunctions, + Potential, + Spectra, +) +from nomad.datamodel.metainfo.simulation.calculation import ( RadiusOfGyration as RadiusOfGyrationCalculation, +) +from nomad.datamodel.metainfo.simulation.calculation import ( RadiusOfGyrationValues as RadiusOfGyrationValuesCalculation, - EnergyEntry, ) - +from nomad.datamodel.metainfo.simulation.method import BSE as BSEMethodology +from nomad.datamodel.metainfo.simulation.method import DMFT as DMFTMethodology +from nomad.datamodel.metainfo.simulation.method import GW as GWMethodology +from nomad.datamodel.metainfo.simulation.method import TB as TBMethodology +from nomad.datamodel.metainfo.simulation.method import ( + BasisSetContainer, + Method, + XCFunctional, +) +from nomad.datamodel.metainfo.simulation.system import AtomsGroup, System +from nomad.datamodel.metainfo.workflow import Link, Task, Workflow +from nomad.metainfo import ( + MEnum, + MSection, + Package, + Quantity, + Reference, + Section, + SubSection, + derived, +) +from nomad.units import ureg # TODO remove this after reprocessing with the new schema defined in # simulationworkflowschema plug in https://github.com/nomad-coe/nomad-schema-plugin-simulation-workflow.git @@ -2203,11 +2205,11 @@ class MolecularDynamicsResults(ThermodynamicsResults): super().normalize(archive, logger) try: - from simulationworkflowschema.molecular_dynamics import archive_to_universe from simulationworkflowschema.molecular_dynamics import ( - calc_molecular_rdf, + archive_to_universe, calc_molecular_mean_squared_displacements, calc_molecular_radius_of_gyration, + calc_molecular_rdf, ) universe = archive_to_universe(archive) diff --git a/nomad/datamodel/metainfo/system.py b/nomad/datamodel/metainfo/system.py index bcff5a8172b802abbdf02991f982c954c32b47b2..05fce8c21c63a8dc2c831dde8a8647fef4c31ca0 100644 --- a/nomad/datamodel/metainfo/system.py +++ b/nomad/datamodel/metainfo/system.py @@ -15,11 +15,11 @@ # See the License for the specific language governing permissions and # limitations under the License. # -import numpy as np import ase +import numpy as np -from nomad.metainfo import Package, Quantity, Section, SubSection, SectionProxy from nomad.datamodel.data import ArchiveSection +from nomad.metainfo import Package, Quantity, Section, SectionProxy, SubSection from nomad.units import ureg # TODO System should be redefined from base section diff --git a/nomad/datamodel/metainfo/tabulartree.py b/nomad/datamodel/metainfo/tabulartree.py index 806be1c2c6651d346cf82af29a3006dd3b66984f..496090623503842216ee0543243152407caa7f84 100644 --- a/nomad/datamodel/metainfo/tabulartree.py +++ b/nomad/datamodel/metainfo/tabulartree.py @@ -20,7 +20,6 @@ import typing from nomad.metainfo import MSection, Package, Quantity, SubSection - m_package = Package(name='tabulartree') diff --git a/nomad/datamodel/metainfo/workflow.py b/nomad/datamodel/metainfo/workflow.py index a77f0af398ff271e2fae073b16230995a311bfd1..a1c3fe137a10327b3ec04aeb1ad514d736e9c8b1 100644 --- a/nomad/datamodel/metainfo/workflow.py +++ b/nomad/datamodel/metainfo/workflow.py @@ -16,9 +16,8 @@ # limitations under the License. # -from nomad.metainfo import Quantity, SubSection, Section - from nomad.datamodel.data import ArchiveSection, EntryData, WorkflowsElnCategory +from nomad.metainfo import Quantity, Section, SubSection class Link(ArchiveSection): @@ -98,9 +97,18 @@ class TaskReference(Task): def normalize(self, archive, logger): super().normalize(archive, logger) - if not self.name and self.task: + if self.task is None: + return + + if not self.name: self.name = self.task.name + # add task inputs/outputs to inputs/outputs + self.inputs.extend([inp for inp in self.task.inputs if inp not in self.inputs]) + self.outputs.extend( + [out for out in self.task.outputs if out not in self.outputs] + ) + class Workflow(Task, EntryData): """ diff --git a/nomad/datamodel/optimade.py b/nomad/datamodel/optimade.py index f72fa69cbf3a836fc54d7b3ab0af320716d91868..5dfea496884e1083320992269c16d073e97cdb3e 100644 --- a/nomad/datamodel/optimade.py +++ b/nomad/datamodel/optimade.py @@ -16,19 +16,19 @@ # limitations under the License. # -from ase.data import chemical_symbols import numpy as np +from ase.data import chemical_symbols -from nomad.units import ureg from nomad.metainfo import ( + DefinitionAnnotation, + MEnum, MSection, - Section, Quantity, + Section, SubSection, - MEnum, - DefinitionAnnotation, ) from nomad.metainfo.elasticsearch_extension import Elasticsearch +from nomad.units import ureg def optimade_links(section: str): diff --git a/nomad/datamodel/results.py b/nomad/datamodel/results.py index 75c1e962798ff5f3357a217bb18d405d4b995f3e..2942b4bd44cfe49ccaeae5cae4dfd147b145d576 100644 --- a/nomad/datamodel/results.py +++ b/nomad/datamodel/results.py @@ -17,45 +17,41 @@ # from logging import Logger -from typing import List, Optional, TYPE_CHECKING -import numpy as np -from elasticsearch_dsl import Text +from typing import TYPE_CHECKING +import numpy as np from ase.data import chemical_symbols +from elasticsearch_dsl import Text -from nomad import utils from nomad.config import config -from nomad.datamodel.metainfo.common import ProvenanceTracker, PropertySection +from nomad.datamodel.metainfo.annotations import H5WebAnnotation +from nomad.datamodel.metainfo.common import PropertySection, ProvenanceTracker from nomad.datamodel.metainfo.simulation.method import CoreHole as CoreHoleRun -from nomad.metainfo.elasticsearch_extension import ( - Elasticsearch, - material_type, - material_entry_type, - get_tokenizer, -) - +from nomad.datamodel.optimade import Species as OptimadeSpecies # noqa from nomad.metainfo import ( - MSection, - Section, - SubSection, - Quantity, + Datetime, MEnum, + MSection, Package, - Datetime, + Quantity, Reference, + Section, + SubSection, +) +from nomad.metainfo.elasticsearch_extension import ( + Elasticsearch, + get_tokenizer, + material_entry_type, + material_type, ) -from nomad.datamodel.metainfo.common import ProvenanceTracker, PropertySection -from nomad.datamodel.optimade import Species as OptimadeSpecies # noqa -from nomad.datamodel.metainfo.annotations import H5WebAnnotation try: import runschema runschema.run_schema_entry_point.load() - import runschema.method import runschema.calculation + import runschema.method import runschema.system - import simulationworkflowschema simulationworkflowschema.simulationworkflow_schema_entry_point.load() diff --git a/nomad/datamodel/util.py b/nomad/datamodel/util.py index 63ab9e0c481a61278f3dbee0373d5beb7626780e..60780338245abd42e1bfc44f3715804742c4a8f9 100644 --- a/nomad/datamodel/util.py +++ b/nomad/datamodel/util.py @@ -17,19 +17,19 @@ # import math import re -from typing import Any from collections.abc import Callable +from typing import Any import numpy as np from nomad import utils from nomad.metainfo import ( - Section, AnnotationModel, + MetainfoError, MSection, - SubSection, Property, - MetainfoError, + Section, + SubSection, ) from nomad.units import ureg diff --git a/nomad/doi.py b/nomad/doi.py index a755e19c31e6059503dbef60b6d59340b954bbda..fe1df5f0492c2500fee31111c48572c19ab58f76 100644 --- a/nomad/doi.py +++ b/nomad/doi.py @@ -21,17 +21,18 @@ This module contains all functions necessary to manage DOI via datacite.org and MDS API (https://support.datacite.org/docs/mds-api-guide). """ -import xml.etree.ElementTree as ET import datetime +import xml.etree.ElementTree as ET + import requests -from requests.auth import HTTPBasicAuth -from mongoengine import Document, StringField, DateTimeField +from fastapi import HTTPException +from mongoengine import DateTimeField, Document, StringField from mongoengine.errors import NotUniqueError +from requests.auth import HTTPBasicAuth -from nomad.datamodel import User -from nomad.config import config from nomad import utils -from fastapi import HTTPException +from nomad.config import config +from nomad.datamodel import User class DOIException(Exception): diff --git a/nomad/files.py b/nomad/files.py index f9c98ecb86b644bf68119ba866f8b07806222848..d40bf0e1024af97eaaf1339d03d6c8021c39d542 100644 --- a/nomad/files.py +++ b/nomad/files.py @@ -45,43 +45,35 @@ original mainfile, and vice versa. from __future__ import annotations -from abc import ABCMeta -from typing import ( - IO, - Set, - Dict, - List, - Tuple, - Any, - NamedTuple, -) -from collections.abc import Callable -from collections.abc import Iterable, Iterator -from pydantic import BaseModel -from datetime import datetime -import os.path -import os -import shutil -import zipstream import hashlib import io import json -import yaml -import magic +import os +import os.path +import shutil +import tarfile # noqa: F401 import zipfile -import tarfile +from abc import ABCMeta +from collections.abc import Callable, Iterable, Iterator +from datetime import datetime +from typing import IO, Any, NamedTuple + +import magic +import yaml +import zipstream +from pydantic import BaseModel -from nomad import utils, datamodel +from nomad import datamodel, utils +from nomad.archive import ArchiveReader, read_archive, to_json, write_archive +from nomad.archive.storage_v2 import combine_archive from nomad.common import ( - get_compression_format, extract_file, + get_compression_format, is_safe_basename, is_safe_relative_path, ) from nomad.config import config -from nomad.archive.storage_v2 import combine_archive -from nomad.config.models.config import BundleImportSettings, BundleExportSettings -from nomad.archive import write_archive, read_archive, ArchiveReader, to_json +from nomad.config.models.config import BundleExportSettings, BundleImportSettings bundle_info_filename = 'bundle_info.json' diff --git a/nomad/graph/graph_reader.py b/nomad/graph/graph_reader.py index 63473f36cda60f050161a2ea53e53649ca6e10b1..24139f771409b93ca2c6d3ee3cd064f12790534d 100644 --- a/nomad/graph/graph_reader.py +++ b/nomad/graph/graph_reader.py @@ -24,11 +24,10 @@ import functools import itertools import os import re -from collections.abc import AsyncIterator, Iterator +from collections.abc import AsyncIterator, Callable, Iterator from contextlib import contextmanager from threading import Lock -from typing import Any, Type, Union -from collections.abc import Callable +from typing import Any import orjson from cachetools import TTLCache @@ -55,23 +54,10 @@ from nomad.app.v1.routers.uploads import ( get_upload_with_read_access, upload_to_pydantic, ) -from nomad.archive import ( - ArchiveDict, - ArchiveList, - to_json, -) -from nomad.archive.storage_v2 import ( - ArchiveDict as ArchiveDictNew, -) -from nomad.archive.storage_v2 import ( - ArchiveList as ArchiveListNew, -) -from nomad.datamodel import ( - Dataset, - EntryArchive, - ServerContext, - User, -) +from nomad.archive import ArchiveDict, ArchiveList, to_json +from nomad.archive.storage_v2 import ArchiveDict as ArchiveDictNew +from nomad.archive.storage_v2 import ArchiveList as ArchiveListNew +from nomad.datamodel import Dataset, EntryArchive, ServerContext, User from nomad.datamodel.util import parse_path from nomad.files import RawPathInfo, UploadFiles from nomad.graph.lazy_wrapper import ( @@ -102,16 +88,14 @@ from nomad.metainfo import ( SectionReference, SubSection, ) -from nomad.metainfo.data_type import Any as AnyType from nomad.metainfo.data_type import JSON, Datatype +from nomad.metainfo.data_type import Any as AnyType from nomad.metainfo.util import MSubSectionList, split_python_definition from nomad.processing import Entry, ProcessStatus, Upload from nomad.utils import timer logger = utils.get_logger(__name__) -# bug when used in isinstance() with mypy -# see https://github.com/python/mypy/issues/11673 GenericList = list | ArchiveList | ArchiveListNew GenericDict = dict | ArchiveDict | ArchiveDictNew diff --git a/nomad/graph/lazy_wrapper.py b/nomad/graph/lazy_wrapper.py index 87b777b4833de13f0c7344454a94a0315cb25fa9..148a6c079de5944615cf1edbfd162360050392d4 100644 --- a/nomad/graph/lazy_wrapper.py +++ b/nomad/graph/lazy_wrapper.py @@ -26,6 +26,7 @@ Different wrappers are catered for different types of objects/operations. """ from __future__ import annotations + from functools import cached_property from nomad.datamodel import User diff --git a/nomad/graph/model.py b/nomad/graph/model.py index 0358df9756165728654fbdacb0a01ef764718a28..669d7f1276e4a8386e1aadea61583917473cc7ed 100644 --- a/nomad/graph/model.py +++ b/nomad/graph/model.py @@ -21,18 +21,19 @@ import functools import re from enum import Enum from hashlib import sha1 -from typing import Annotated, Optional, Union +from typing import Annotated, Union from pydantic import ( AfterValidator, - field_validator, - ConfigDict, BaseModel, + ConfigDict, Field, ValidationError, + field_validator, ) from nomad.app.v1.models import Direction, Metadata, MetadataPagination, Pagination +from nomad.app.v1.models.groups import UserGroupPagination, UserGroupQuery from nomad.app.v1.routers.datasets import DatasetPagination from nomad.app.v1.routers.uploads import ( EntryProcDataPagination, @@ -40,7 +41,6 @@ from nomad.app.v1.routers.uploads import ( UploadProcDataPagination, UploadProcDataQuery, ) -from nomad.app.v1.models.groups import UserGroupQuery, UserGroupPagination class DatasetQuery(BaseModel): diff --git a/nomad/groups.py b/nomad/groups.py index c964fcc560877c56d050eeee6b05d9b1b86c938a..ee792271d02ddaa82ef8bef3226e8a9aae1c08a9 100644 --- a/nomad/groups.py +++ b/nomad/groups.py @@ -18,7 +18,6 @@ from __future__ import annotations -from typing import Optional, Union from collections.abc import Iterable from mongoengine import Document, ListField, Q, QuerySet, StringField diff --git a/nomad/infrastructure.py b/nomad/infrastructure.py index e5f458d0070e5408163b76f4aa2d5ee4856180bd..0d98646a160f725c7ce489d61a6d16ce8187ac95 100644 --- a/nomad/infrastructure.py +++ b/nomad/infrastructure.py @@ -23,33 +23,34 @@ is run once for each *api* and *worker* process. Individual functions for partia exist to facilitate testing, aspects of :py:mod:`nomad.cli`, etc. """ -import os.path +import json import os +import os.path +import re import shutil -from elasticsearch_dsl import connections -from mongoengine import connect, disconnect -from mongoengine.connection import ConnectionFailure import smtplib + +# TODO put somemore thought into warnings +import warnings +from datetime import datetime from email.mime.text import MIMEText -from keycloak import KeycloakOpenID, KeycloakAdmin -from keycloak.exceptions import KeycloakAuthenticationError, KeycloakGetError -import json + import jwt -from datetime import datetime -import re import unidecode +from elasticsearch_dsl import connections +from keycloak import KeycloakAdmin, KeycloakOpenID +from keycloak.exceptions import KeycloakAuthenticationError, KeycloakGetError +from mongoengine import connect, disconnect +from mongoengine.connection import ConnectionFailure from nomad import utils from nomad.config import config -from nomad.utils.structlogging import get_logger # The metainfo is defined and used during imports. This is problematic. # We import all parsers very early in the infrastructure setup. This will populate # the metainfo with parser specific definitions, before the metainfo might be used. -from nomad.parsing import parsers # pylint: disable=unused-import - -# TODO put somemore thought into warnings -import warnings +from nomad.parsing import parsers # noqa: F401 +from nomad.utils.structlogging import get_logger warnings.filterwarnings('ignore') @@ -102,12 +103,13 @@ def setup_mongo(client=False): def check_mongo(): db = mongo_client.get_database(config.mongo.db_name) - names = db.list_collection_names() + names = set(db.list_collection_names()) expected_names = {'upload', 'user_group', 'entry', 'dataset', 'archive'} - if names != expected_names: + if not expected_names.issuperset(names): logger.warning( - f'Expected MongoDB collections: {expected_names} but found: {names}' + f'Expected MongoDB collections: {sorted(expected_names)}; ' + f'but found: {sorted(names)}' ) # regression https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-FAIR/-/issues/2281 diff --git a/nomad/logtransfer.py b/nomad/logtransfer.py index 6c884540c21a379be6f8eb4bbf005ef5ec207a7b..f85170d8bd3d43d93212bc83b3c9e6609bf7937b 100644 --- a/nomad/logtransfer.py +++ b/nomad/logtransfer.py @@ -16,15 +16,15 @@ # limitations under the License. # -import requests -import zlib -import os.path import os +import os.path import time +import zlib +import requests -from nomad.config import config from nomad import utils +from nomad.config import config logger = utils.get_logger(__name__) diff --git a/nomad/metainfo/__init__.py b/nomad/metainfo/__init__.py index 357685a971207f10ba108d3f5528d14d6e5d6540..048d0d6622dba7ba5f21c8fda627db34c12b8e81 100644 --- a/nomad/metainfo/__init__.py +++ b/nomad/metainfo/__init__.py @@ -72,3 +72,22 @@ from .annotation import ( SectionAnnotation, AnnotationModel, ) +from .data_type import ( + Datatype, + Primitive, + Number, + ExactNumber, + InexactNumber, + NonPrimitive, + URL, + File, + Any, + Capitalized, + Bytes, + JSON, + Dimension, + Unit, + Callable, + Datetime, + Enum, +) diff --git a/nomad/metainfo/annotation.py b/nomad/metainfo/annotation.py index 8e2a7c5abd1311aa4fa9cbd9d6abc77003435505..6ee540abe928d152d28b324e2ca686c93f708f75 100644 --- a/nomad/metainfo/annotation.py +++ b/nomad/metainfo/annotation.py @@ -18,9 +18,9 @@ from __future__ import annotations -from typing import Annotated, Any, ClassVar, ForwardRef, Optional +from typing import Any, ClassVar, ForwardRef -from pydantic import ConfigDict, BaseModel, Field +from pydantic import BaseModel, ConfigDict, Field class Annotation: diff --git a/nomad/metainfo/data_frames.py b/nomad/metainfo/data_frames.py new file mode 100644 index 0000000000000000000000000000000000000000..4f2f1fc7d1079f32328e601cc83a30c5d7138d14 --- /dev/null +++ b/nomad/metainfo/data_frames.py @@ -0,0 +1,301 @@ +import inspect +import types +from collections.abc import Iterable +from typing import Union, cast + +import numpy as np +import xarray as xr +from pydantic import BaseModel + +from nomad.metainfo import MSection, Package, Quantity, Section, SubSection, constraint +from nomad.metainfo.metainfo import _placeholder_quantity +from nomad.units import ureg + +m_package = Package() + + +class Values(MSection): + name = Quantity(type=str) + values = _placeholder_quantity + values_ref = Quantity(type='Values', shape=[]) + spanned_dimensions = Quantity(type=int, shape=['*']) + original_shape = Quantity(type=int, shape=['*']) + + def get_values(self, reshape: bool = True) -> np.ndarray: + if self.values_ref: + return self.values_ref.m_resolved().get_values() + values = self.values + if not isinstance(self.values, np.ndarray | ureg.Quantity): + values = np.array(self.values) + if reshape: + return cast(np.ndarray, values).reshape(self.original_shape) + return values + + def __init__(self, *args, **kwargs): + values_ref = None + values: list = [] + if len(args) == 0: + pass + elif len(args) == 1 and isinstance(args[0], list | np.ndarray | ureg.Quantity): + values = args[0] + elif len(args) == 1 and isinstance(args[0], (Values)): + values_ref = args[0] + values = None + else: + values = args + + original_shape = kwargs.pop('original_shape', None) + if isinstance(values, np.ndarray): + values_shape = values.shape + quantity_shape = self.m_def.all_quantities['values'].shape[:-1] + if len(values_shape) < len(quantity_shape): + raise ValueError( + f'The quantity shape, {quantity_shape}, does not meet the ' + f'lower-bound set by the values shape, {values_shape}' + ) + flat_shape = values_shape[: len(quantity_shape)] + (-1,) + values = values.reshape(flat_shape) + if original_shape is None: + original_shape = values_shape + elif isinstance(values, Iterable): + original_shape = [len(values)] + + super().__init__( + values=values, + values_ref=values_ref, + original_shape=original_shape, + **kwargs, + ) + + def xarray_attrs(self) -> dict[str, str]: + return dict( + units=self.m_def.all_quantities['values'].unit, + long_name=self.m_def.all_quantities['values'].label, + description=self.m_def.all_quantities['values'].description, + iri=self.m_def.all_quantities['values'].iri, + ) + + +def _get_default_names(iterable: Iterable[Values]) -> list[str]: + names = [] + for values in iterable: + counter = 0 + while True: + counter += 1 + unique_name = f'{values.m_def.name}_{counter}' + if unique_name not in names: + names.append(unique_name) + break + return names + + +def _get_names(iterable: Iterable[Values]) -> list[str]: + default_names = _get_default_names(iterable) + return [ + values.name if values.name else default + for values, default in zip(iterable, default_names) + ] + + +def _get_values( + iterable: Iterable[Values], values: Union[str, 'ValuesTemplate'] +) -> Values: + return_values = None + if isinstance(values, str): + default_names = _get_default_names(iterable) + for v, default in zip(iterable, default_names): + if v.name == values or default == values: + if return_values is not None: + raise ValueError(f'Multiple values matching {values}') + return_values = v + return return_values + for v in iterable: + if v.m_def == values.section_def: + if return_values is not None: + raise ValueError(f'Multiple values matching {values}') + return_values = v + return return_values + + +class DataFrame(MSection): + fields = SubSection(section='Values', repeats=True) + variables = SubSection(section='Values', repeats=True) + + def get_field(self, field: Union[str, 'ValuesTemplate']) -> Values: + return _get_values(self.fields, field) + + def get_variable(self, variable: Union[str, 'ValuesTemplate']) -> Values: + return _get_values(self.variables, variable) + + @constraint(warning=False) + def check_dimensions(self): + # TODO constrains that validate the soundness of field and variable dimensions + pass + + @constraint(warning=False) + def check_mandatory_fields_and_variables(self): + data_frame_annotation = self.m_def.m_get_annotation(DataFrameAnnotation) + if data_frame_annotation is not None: + for index, field in enumerate(data_frame_annotation.mandatory_fields): + assert index < len(self.fields), f'Mandatory field {field} missing' + assert self.fields[index].m_def == field.section_def, ( + f'Field {field} missing' + ) + + for index, variable in enumerate(data_frame_annotation.mandatory_variables): + assert index < len(self.variables), ( + f'Mandatory field {variable} missing' + ) + assert self.variables[index].m_def == variable.section_def, ( + f'Field {variable} missing' + ) + + def to_xarray(self) -> xr.Dataset: + shape = [] + dims = [] + coords = {} + var: Values + for var, name in zip(self.variables, _get_names(self.variables)): + if var.spanned_dimensions is None or len(var.spanned_dimensions) == 0: + coord_dims = [name] + shape.append(len(var.values)) + dims.append(name) + elif len(var.spanned_dimensions) == 1: + dim = var.spanned_dimensions[0] + if dim >= len(shape): + shape.append(len(var.values)) + dims.append(f'm_dim_{dim}') + coord_dims = [f'm_dim_{dim}'] + else: + raise NotImplementedError('Only one spanned dimension supported') + coords[name] = ( + coord_dims, + var.values, + var.xarray_attrs(), + ) + data_vars = {} + field: Values + for field, name in zip(self.fields, _get_names(self.fields)): + data_vars[name] = ( + dims, + cast(np.ndarray, field.values).reshape(shape), + field.xarray_attrs(), + ) + return xr.Dataset( + data_vars=data_vars, + coords=coords, + attrs=dict( + description=self.m_def.description, + long_name=self.m_def.label, + ), + ) + + def to_pandas(self): + return self.to_xarray().to_dataframe() + + +def _get_package(): + package = inspect.currentframe().f_back.f_back.f_globals.get('m_package', None) + assert package is not None, ( + 'PhysicalQuantities have to be defined within a python package with global ' + 'Package m_package variable' + ) + assert isinstance(m_package, Package), 'm_package has to be a Package instance' + return package + + +class ValuesTemplate: + """ + A generator for quantities of a certain template with type, shape, unit, name, description, iri, etc. + """ + + def __init__(self, **kwargs): + self.quantity = Quantity(**kwargs) + assert self.quantity.name is not None, ( + 'Values templates must be explicitly named' + ) + + class ValuesTemplate(Values): + m_def = Section(name=self.quantity.name) + values = self(name='values', shape=self.quantity.shape + ['*']) + + _get_package().section_definitions.append(ValuesTemplate.m_def) + self.section_def = ValuesTemplate.m_def + self.create = ValuesTemplate + self.section_cls = ValuesTemplate + + def __call__(self, **kwargs): + # Make a deep copy of the quantity via m_from_dict(m_to_dict) + quantity = Quantity.m_from_dict(self.quantity.m_to_dict()) + quantity.m_update(**kwargs) + return quantity + + +class DataFrameAnnotation(BaseModel): + class Config: + arbitrary_types_allowed = True + + mandatory_fields: list[ValuesTemplate] + mandatory_variables: list[ValuesTemplate] + + def dict(self, *args, **kwargs): + return dict( + mandatory_fields=[ + field.section_def.qualified_name() for field in self.mandatory_fields + ], + mandatory_variables=[ + variable.section_def.qualified_name() + for variable in self.mandatory_variables + ], + ) + + +class DataFrameTemplate: + """ + A generator for data frames with specific mandatory fields and default variables. + """ + + def __init__( + self, + mandatory_fields: list[ValuesTemplate], + mandatory_variables: list[ValuesTemplate] = [], + **kwargs, + ): + self.sub_section = SubSection(**kwargs) + self.fields = mandatory_fields + self.variables = mandatory_variables + + assert self.sub_section.name is not None, ( + 'DataFrame templates must be explicitly named' + ) + + class DataFrameTemplate(DataFrame): + m_def = Section(name=self.sub_section.name) + + # TODO validation that default fields and variables are actually present + + DataFrameTemplate.m_def.m_annotations['data_frame'] = DataFrameAnnotation( + mandatory_fields=mandatory_fields, + mandatory_variables=mandatory_variables, + ) + + _get_package().section_definitions.append(DataFrameTemplate.m_def) + self.create = DataFrameTemplate + self.section_cls = DataFrameTemplate + self.section_def = DataFrameTemplate.m_def + self.sub_section.section = self.section_def + + def __call__(self, **kwargs): + sub_section = self.sub_section.m_copy() + sub_section.m_update(**kwargs) + + def __init_metainfo__(self): + # TODO here we can add a more specialised section def to the caller + # definition (e.g. MySection) as an inner_section_definition + pass + + sub_section.__init_metainfo__ = types.MethodType(__init_metainfo__, sub_section) + return sub_section + + +m_package.__init_metainfo__() diff --git a/nomad/metainfo/data_type.py b/nomad/metainfo/data_type.py index 04b4c724651ac2a2307d9d3c55b8aba26799605b..6f9b89f798ae35a02ff7bcb57afcb6cb714d36dd 100644 --- a/nomad/metainfo/data_type.py +++ b/nomad/metainfo/data_type.py @@ -22,7 +22,7 @@ import importlib import re import typing from base64 import b64decode, b64encode -from datetime import datetime, date +from datetime import date, datetime from functools import reduce from inspect import isclass from typing import Any as TypingAny @@ -1248,12 +1248,12 @@ def to_optimade_type(in_type: Datatype): def to_mongo_type(in_type: Datatype): from mongoengine import ( - IntField, - FloatField, BooleanField, - StringField, DateTimeField, DictField, + FloatField, + IntField, + StringField, ) standard_type = in_type.standard_type() diff --git a/nomad/metainfo/elasticsearch_extension.py b/nomad/metainfo/elasticsearch_extension.py index 605b0bb779cc0f2f453ac3f2d63557ea42912b49..9d386d432962409b16d2d4f8cc34019d87cf0eb3 100644 --- a/nomad/metainfo/elasticsearch_extension.py +++ b/nomad/metainfo/elasticsearch_extension.py @@ -159,25 +159,15 @@ sub-sections as if they were direct sub-sections. import math import re from collections import defaultdict -from typing import ( - TYPE_CHECKING, - Any, - DefaultDict, - Dict, - List, - Optional, - Set, - Tuple, - Union, - cast, -) from collections.abc import Callable +from typing import TYPE_CHECKING, Any, Optional, cast from elasticsearch_dsl import Q +from pint import Quantity as PintQuantity + from nomad import utils from nomad.config import config from nomad.config.models.plugins import Parser, Schema, SchemaPackageEntryPoint -from pint import Quantity as PintQuantity from . import DefinitionAnnotation from .data_type import Datatype, to_elastic_type @@ -193,7 +183,7 @@ from .metainfo import ( ) if TYPE_CHECKING: - from nomad.datamodel.datamodel import EntryArchive, SearchableQuantity + from nomad.datamodel.datamodel import SearchableQuantity schema_separator = '#' dtype_separator = '#' diff --git a/nomad/metainfo/example.py b/nomad/metainfo/example.py index 3e7f7443b41013722c1058de5bf70e0491629b7d..d7acce8ee9bae8c5514cce7113692b3e3663a5de 100644 --- a/nomad/metainfo/example.py +++ b/nomad/metainfo/example.py @@ -18,21 +18,22 @@ """An example metainfo package.""" -import numpy as np from datetime import datetime -from nomad.units import ureg +import numpy as np + from nomad.metainfo import ( - MSection, + Datetime, MCategory, - Section, - Quantity, + MEnum, + MSection, Package, + Quantity, + Section, SubSection, - MEnum, - Datetime, constraint, ) +from nomad.units import ureg m_package = Package(links=['https://nomad-lab.eu/prod/rae/docs/metainfo.html']) diff --git a/nomad/metainfo/metainfo.py b/nomad/metainfo/metainfo.py index c5e99bac8c8f4c16709beda576e15beb8a7b1c43..67072e835dee58c21f0007d82df38f6dc6ae3e1f 100644 --- a/nomad/metainfo/metainfo.py +++ b/nomad/metainfo/metainfo.py @@ -24,19 +24,17 @@ import json import re import sys import warnings +from collections.abc import Callable as TypingCallable from collections.abc import Iterable from copy import deepcopy from functools import wraps -from typing import Any -from typing import Callable as TypingCallable -from typing import Literal, TypeVar, cast +from typing import Any, Literal, TypeVar, cast from urllib.parse import urlsplit, urlunsplit import docstring_parser import jmespath import pint -from pydantic import TypeAdapter, ValidationError -from typing_extensions import deprecated # type: ignore +from pydantic import BaseModel, TypeAdapter, ValidationError from nomad.config import config from nomad.metainfo.data_type import JSON as JSONType @@ -45,14 +43,20 @@ from nomad.metainfo.data_type import Any as AnyType from nomad.metainfo.data_type import Bytes as BytesType from nomad.metainfo.data_type import Callable as CallableType from nomad.metainfo.data_type import Capitalized as CapitalizedType -from nomad.metainfo.data_type import Datatype +from nomad.metainfo.data_type import ( + Datatype, + Enum, + ExactNumber, + InexactNumber, + Number, + check_dimensionality, + m_str, + normalize_type, +) from nomad.metainfo.data_type import Datetime as DatetimeType from nomad.metainfo.data_type import Dimension as DimensionType -from nomad.metainfo.data_type import Enum, ExactNumber from nomad.metainfo.data_type import File as FileType -from nomad.metainfo.data_type import InexactNumber, Number from nomad.metainfo.data_type import Unit as UnitType -from nomad.metainfo.data_type import check_dimensionality, m_str, normalize_type from nomad.metainfo.util import ( MQuantity, MSubSectionList, @@ -64,8 +68,6 @@ from nomad.metainfo.util import ( to_dict, ) from nomad.units import ureg as units -from pydantic import ValidationError, parse_obj_as -from typing_extensions import deprecated # type: ignore from .annotation import ( Annotation, @@ -1348,9 +1350,6 @@ class MSection(metaclass=MObjectMeta): if not definition.repeats or target is None: return _wrap(target) - # this practically does nothing only to make mypy happy - # it is guaranteed to be a MSubSectionList - target = cast(MSubSectionList, target) if isinstance(index, str) and target.has_duplicated_key(): raise MetainfoError(f'Multiple sections with key {index} exist.') @@ -2017,6 +2016,9 @@ class MSection(metaclass=MObjectMeta): if isinstance(annotation, Annotation): return annotation.m_to_dict() + if isinstance(annotation, BaseModel): + return annotation.dict() + if not isinstance(annotation, dict): return str(annotation) @@ -2758,7 +2760,7 @@ class Definition(MSection): Python references, e.g. in `m_def`. variable: - A boolean that indicates this property as variable parts in its name. + A boolean that indicates this property has variable parts in its name. If this is set to true, all capital letters in the name can be replaced with arbitrary strings. However, variable names work similar to aliases and can be considered on-demand aliases. Other aliases and the diff --git a/nomad/metainfo/mongoengine_extension.py b/nomad/metainfo/mongoengine_extension.py index 3d98ea4a8b63bb61f36cfbdf3832982c4bb74d51..c7dd4baa97feed7ab5921ed8e3f926708c902558 100644 --- a/nomad/metainfo/mongoengine_extension.py +++ b/nomad/metainfo/mongoengine_extension.py @@ -33,14 +33,11 @@ Adds mongoengine supports to the metainfo. Allows to create, save, and get metai sections from mongoengine. The annotation key is 'mongo'. """ -from typing import Any, Dict, List +from typing import Any -from .data_type import Datatype, to_mongo_type -from .metainfo import ( - MSection, - Quantity, -) from . import Annotation, DefinitionAnnotation, SectionAnnotation +from .data_type import Datatype, to_mongo_type +from .metainfo import MSection, Quantity class Mongo(DefinitionAnnotation): diff --git a/nomad/metainfo/pydantic_extension.py b/nomad/metainfo/pydantic_extension.py index 8aee87b63121022518bb76e467e030017e8f8dee..155140406df838c76120e987d18527833f04861f 100644 --- a/nomad/metainfo/pydantic_extension.py +++ b/nomad/metainfo/pydantic_extension.py @@ -32,16 +32,13 @@ Allows to create pydantic models from section definitions. """ -from typing import Optional, cast, Type -from pydantic import create_model, Field, BaseModel +from typing import cast + +from pydantic import BaseModel, Field, create_model -from .data_type import to_pydantic_type -from .metainfo import ( - Definition, - Section, - Quantity, -) from . import DefinitionAnnotation +from .data_type import to_pydantic_type +from .metainfo import Definition, Quantity, Section class PydanticModel(DefinitionAnnotation): diff --git a/nomad/metainfo/util.py b/nomad/metainfo/util.py index 53e59e2e3fb8bebecde44edc416a05e12a5821d6..a46e70d43c38bbb9eda3fb6807a3bd37af570357 100644 --- a/nomad/metainfo/util.py +++ b/nomad/metainfo/util.py @@ -19,7 +19,7 @@ from __future__ import annotations import hashlib import re -from typing import Any, Optional +from typing import Any import pint @@ -361,13 +361,15 @@ def resolve_variadic_name(definitions: dict, name: str, hint: str | None = None) candidates = {} hint_candidates = {} - for definition in definitions: - match_score = get_namefit(name, definition) + for dname, definition in definitions.items(): + if not definition.variable: # TODO: also if type does not match + continue + match_score = get_namefit(name, dname) if match_score >= 0: - candidates[definition] = match_score + candidates[dname] = match_score # Check if the hint exists in the definition if hint and hint in definition.all_attributes: - hint_candidates[definition] = match_score + hint_candidates[dname] = match_score if len(candidates) == 0: raise ValueError(f'Cannot find a proper definition for name "{name}".') diff --git a/nomad/mkdocs.py b/nomad/mkdocs/__init__.py similarity index 69% rename from nomad/mkdocs.py rename to nomad/mkdocs/__init__.py index d033a51bd83b0c9b0563adc22bf91527e960a338..e0fbf44fac3c772faa9a1a8aff5545b0e29e720a 100644 --- a/nomad/mkdocs.py +++ b/nomad/mkdocs/__init__.py @@ -24,186 +24,36 @@ from types import UnionType from pydantic.fields import FieldInfo import yaml import json -from enum import Enum -from pydantic import BaseModel import os.path -from typing import Annotated, Any, Union, get_args, get_origin -from typing import Literal -from inspect import isclass -from markdown.extensions.toc import slugify - -from nomad.utils import strip -from nomad.config import config -from nomad.config.models.plugins import ParserEntryPoint, EntryPointType -from nomad.app.v1.models import query_documentation, owner_documentation -from nomad.app.v1.routers.entries import archive_required_documentation -from nomad import utils - - -exported_config_models = set() # type: ignore - - -doc_snippets = { - 'query': query_documentation, - 'owner': owner_documentation, - 'archive-required': archive_required_documentation, -} - - -def get_field_type_info(field: FieldInfo) -> tuple[str, set[Any]]: - """Used to recursively walk through a type definition, building up a cleaned - up type name and returning all of the classes that were used. - - Args: - type_: The type to inspect. Can be any valid type definition. - - Returns: - Tuple containing the cleaned up type name and a set of classes - found inside. - """ - classes = set() - annotation = field.annotation - - def get_class_name(ann: Any) -> str: - if hasattr(ann, '__name__'): - name = ann.__name__ - return 'None' if name == 'NoneType' else name - return str(ann) - - def _recursive_extract(ann: Any, type_str: str = '') -> str: - nonlocal classes - - origin = get_origin(ann) - args = get_args(ann) - - if origin is None and issubclass(ann, Enum): - classes.add(ann) - # Determine base type for Enums - if issubclass(ann, str): - return get_class_name(str) - elif issubclass(ann, int): - return get_class_name(int) - else: - return get_class_name(ann) - elif origin is None: - classes.add(ann) - return get_class_name(ann) - if origin is list: - classes.add(origin) - if type_str: - type_str += '[' + _recursive_extract(args[0]) + ']' - else: - type_str = 'list[' + _recursive_extract(args[0]) + ']' - elif origin is dict: - classes.add(origin) - if type_str: - type_str += ( - '[' - + _recursive_extract(args[0]) - + ', ' - + _recursive_extract(args[1]) - + ']' - ) - else: - type_str = ( - 'dict[' - + _recursive_extract(args[0]) - + ', ' - + _recursive_extract(args[1]) - + ']' - ) - - elif origin is UnionType or origin is Union: - # Handle Union types (e.g., Optional[str] is equivalent to Union[str, None]) - union_types = [] - for arg in args: - union_types.append(_recursive_extract(arg)) - type_str = ' | '.join(union_types) - elif origin is Literal: - classes.add(origin) - return get_class_name( - type(args[0]) - ) # Add name of the literal value (e.g., str) - elif origin is Annotated: - # Extract the underlying type from Annotated - return _recursive_extract(args[0]) - else: - # Handle generic types - classes.add(origin) - return get_class_name(ann) - - return type_str - - type_name = _recursive_extract(annotation) - return type_name, classes - - -def get_field_description(field: FieldInfo) -> str | None: - """Retrieves the description for a pydantic field as a markdown string. - - Args: - field: The pydantic field to inspect. - - Returns: - Markdown string for the description. - """ - value = field.description - if value: - value = utils.strip(value) - value = value.replace('\n\n', '<br/>').replace('\n', ' ') - return value +from typing import get_args +from inspect import isclass -def get_field_default(field: FieldInfo) -> str | None: - """Retrieves the default value from a pydantic field as a markdown string. - - Args: - field: The pydantic field to inspect. - - Returns: - Markdown string for the default value. - """ - default_value = field.default - if default_value is not None: - if isinstance(default_value, dict | BaseModel): - default_value = 'Complex object, default value not displayed.' - elif default_value == '': - default_value = '""' - else: - default_value = f'`{default_value}`' - return default_value - - -def get_field_options(field: FieldInfo) -> dict[str, str | None]: - """Retrieves a dictionary of value-description pairs from a pydantic field. - - Args: - field: The pydantic field to inspect. +from pydantic.fields import FieldInfo - Returns: - Dictionary containing the possible options and their description for - this field. The description may be None indicating that it does not exist. - """ - options: dict[str, str | None] = {} - if isclass(field.annotation) and issubclass(field.annotation, Enum): - for x in field.annotation: - options[str(x.value)] = None - return options +from pydantic import BaseModel +from markdown.extensions.toc import slugify -def get_field_deprecated(field: FieldInfo) -> bool: - """Returns whether the given pydantic field is deprecated or not. +from nomad.utils import strip +from nomad.config import config +from nomad import utils - Args: - field: The pydantic field to inspect. +from nomad.mkdocs.pydantic import ( + exported_config_models, + get_field_default, + get_field_deprecated, + get_field_description, + get_field_options, + get_field_type_info, +) +from nomad.mkdocs.metainfo import ( + section_markdown_from_section_cls, + package_markdown_from_package, +) - Returns: - Whether the field is deprecated. - """ - if field.deprecated: - return True - return False +from nomad.config.models.plugins import ParserEntryPoint, EntryPointType class MyYamlDumper(yaml.Dumper): @@ -225,6 +75,14 @@ def define_env(env): @env.macro def doc_snippet(key): # pylint: disable=unused-variable + from nomad.app.v1.models import query_documentation, owner_documentation + from nomad.app.v1.routers.entries import archive_required_documentation + + doc_snippets = { + 'query': query_documentation, + 'owner': owner_documentation, + 'archive-required': archive_required_documentation, + } return doc_snippets[key] @env.macro @@ -258,7 +116,7 @@ def define_env(env): path = f'{path}:' file_path, json_path = path.split(':') - file_path = os.path.join(os.path.dirname(__file__), '..', file_path) + file_path = os.path.join(os.path.dirname(__file__), '../..', file_path) with open(file_path) as f: if file_path.endswith('.yaml'): @@ -517,3 +375,19 @@ def define_env(env): for category, plugins in categories.items() ] ) + + @env.macro + def metainfo_package(path, heading=None, hide=[]): # pylint: disable=unused-variable + """ + Produces markdown code for the given metainfo package. + + Arguments: + path: The python qualified name of the package. + """ + import importlib + + module_name, name = path.rsplit('.', 1) + module = importlib.import_module(path) + pkg = getattr(module, 'm_package') + + return package_markdown_from_package(pkg) diff --git a/nomad/mkdocs/metainfo.py b/nomad/mkdocs/metainfo.py new file mode 100644 index 0000000000000000000000000000000000000000..710a6383dc7cb8ab8ae30b4a667077a7a45e49b3 --- /dev/null +++ b/nomad/mkdocs/metainfo.py @@ -0,0 +1,158 @@ +# +# This file is part of NOMAD. See https://nomad-lab.eu for further info. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from nomad import utils +from nomad.datamodel.data import ArchiveSection +from nomad.metainfo import Datatype, Property, Quantity, Reference, SubSection + + +def get_reference(section_def, pkg) -> str: + if section_def.m_parent == pkg: + return f'[`{section_def.name}`](#{section_def.name.lower()})' + + return f'`{section_def.qualified_name()}`' + + +def get_property_type_info(property: Property, pkg=None) -> str: + if isinstance(property, Quantity): + type = property.type + if isinstance(type, Reference): + return get_reference(type.target_section_def, pkg) + if isinstance(type, Datatype): + try: + return f'`{type.serialize_self()["type_data"]}`' + except NotImplementedError: + pass + + if isinstance(property, SubSection): + return get_reference(property.section_def, pkg) + + return '*unknown type*' + + +def get_property_description(property: Property) -> str | None: + value = property.description + if value: + value = utils.strip(value) + value = value.replace('\n\n', '<br/>').replace('\n', ' ') + + return value + + +def get_quantity_default(quantity: Quantity) -> str: + default = quantity.default + if isinstance(default, dict): + return 'Complex object, default value not displayed.' + return f'`{str(quantity.default)}`' if quantity.default is not None else '' + + +def get_property_options(property: Property) -> str: + options: list[str] = [] + if isinstance(property, Quantity): + if property.shape != []: + options.append(f'**shape**=`{property.shape}`') + if property.unit: + options.append(f'**unit**=`{property.unit}`') + + default = get_quantity_default(property) + if default != '': + options.append(f'**default**=`{default}`') + + if isinstance(property, SubSection): + options.append('**sub-section**') + if property.repeats: + options.append('**repeats**') + + return ', '.join(options) + + +def section_markdown_from_section_cls( + section_cls, name=None, heading=None, hide=[], pkg=None +): + section_def = section_cls.m_def + properties = section_def.quantities + section_def.sub_sections + + if not name: + name = section_cls.__name__ + + def content(property): + result = [] + description = get_property_description(property) + if description: + result.append(description) + options = get_property_options(property) + if options != '': + result.append(options) + + return '</br>'.join(result) + + def property_row(property): + if property.name.startswith('m_'): + return '' + type_name = get_property_type_info(property, pkg) + return f'|{property.name}|{type_name}|{content(property)}|\n' + + if heading is None: + result = f'### {name}\n' + else: + result = heading + '\n' + + if section_def.description and section_def.description != '': + result += f'**description**: {utils.strip(section_def.description)}\n\n' + + if len(section_def.base_sections) > 0: + base_sections = [ + get_reference(base_section, pkg) + for base_section in section_def.base_sections + ] + result += f'**inherits from**: {", ".join(base_sections)}\n\n' + + if section_def.links: + links = [f'[{link}]({link})' for link in section_def.links] + result += f'**links**: {", ".join(links)}\n\n' + + if len(properties) > 0: + result += '**properties**:\n\n' + result += '|name|type| |\n' + result += '|----|----|-|\n' + result += ''.join( + [ + property_row(property) + for property in properties + if property.name not in hide + ] + ) + result += '\n\n' + + if ( + section_cls.normalize + and section_cls.normalize.__doc__ != ArchiveSection.normalize.__doc__ + ): + if section_cls.normalize.__doc__: + result += f'**normalization**: \n\n{utils.strip(section_cls.normalize.__doc__)}\n\n' + else: + result += f'**normalization** without further documentation\n\n' + + return result + + +def package_markdown_from_package(pkg): + return ''.join( + [ + section_markdown_from_section_cls(section_def.section_cls, pkg=pkg) + for section_def in pkg.section_definitions + ] + ) diff --git a/nomad/mkdocs/pydantic.py b/nomad/mkdocs/pydantic.py new file mode 100644 index 0000000000000000000000000000000000000000..774843aa3d48ee149cdda56f32517b3e9543fd54 --- /dev/null +++ b/nomad/mkdocs/pydantic.py @@ -0,0 +1,189 @@ +# +# Copyright The NOMAD Authors. +# +# This file is part of NOMAD. See https://nomad-lab.eu for further info. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +""" +Definitions that are used in the documentation via mkdocs-macro-plugin. +""" + +from enum import Enum +from inspect import isclass +from types import UnionType +from typing import Annotated, Any, Literal, Union, get_args, get_origin + +from pydantic import BaseModel +from pydantic.fields import FieldInfo + +from nomad import utils + +exported_config_models = set() # type: ignore + + +def get_field_type_info(field: FieldInfo) -> tuple[str, set[Any]]: + """Used to recursively walk through a type definition, building up a cleaned + up type name and returning all of the classes that were used. + + Args: + type_: The type to inspect. Can be any valid type definition. + + Returns: + Tuple containing the cleaned up type name and a set of classes + found inside. + """ + classes = set() + annotation = field.annotation + + def get_class_name(ann: Any) -> str: + if hasattr(ann, '__name__'): + name = ann.__name__ + return 'None' if name == 'NoneType' else name + return str(ann) + + def _recursive_extract(ann: Any, type_str: str = '') -> str: + nonlocal classes + + origin = get_origin(ann) + args = get_args(ann) + + if origin is None and issubclass(ann, Enum): + classes.add(ann) + # Determine base type for Enums + if issubclass(ann, str): + return get_class_name(str) + elif issubclass(ann, int): + return get_class_name(int) + else: + return get_class_name(ann) + elif origin is None: + classes.add(ann) + return get_class_name(ann) + if origin is list: + classes.add(origin) + if type_str: + type_str += '[' + _recursive_extract(args[0]) + ']' + else: + type_str = 'list[' + _recursive_extract(args[0]) + ']' + elif origin is dict: + classes.add(origin) + if type_str: + type_str += ( + '[' + + _recursive_extract(args[0]) + + ', ' + + _recursive_extract(args[1]) + + ']' + ) + else: + type_str = ( + 'dict[' + + _recursive_extract(args[0]) + + ', ' + + _recursive_extract(args[1]) + + ']' + ) + + elif origin is UnionType or origin is Union: + # Handle Union types (e.g., Optional[str] is equivalent to Union[str, None]) + union_types = [] + for arg in args: + union_types.append(_recursive_extract(arg)) + type_str = ' | '.join(union_types) + elif origin is Literal: + classes.add(origin) + return get_class_name( + type(args[0]) + ) # Add name of the literal value (e.g., str) + elif origin is Annotated: + # Extract the underlying type from Annotated + return _recursive_extract(args[0]) + else: + # Handle generic types + classes.add(origin) + return get_class_name(ann) + + return type_str + + type_name = _recursive_extract(annotation) + return type_name, classes + + +def get_field_description(field: FieldInfo) -> str | None: + """Retrieves the description for a pydantic field as a markdown string. + + Args: + field: The pydantic field to inspect. + + Returns: + Markdown string for the description. + """ + value = field.description + if value: + value = utils.strip(value) + value = value.replace('\n\n', '<br/>').replace('\n', ' ') + + return value + + +def get_field_default(field: FieldInfo) -> str | None: + """Retrieves the default value from a pydantic field as a markdown string. + + Args: + field: The pydantic field to inspect. + + Returns: + Markdown string for the default value. + """ + default_value = field.default + if default_value is not None: + if isinstance(default_value, dict | BaseModel): + default_value = 'Complex object, default value not displayed.' + elif default_value == '': + default_value = '""' + else: + default_value = f'`{default_value}`' + return default_value + + +def get_field_options(field: FieldInfo) -> dict[str, str | None]: + """Retrieves a dictionary of value-description pairs from a pydantic field. + + Args: + field: The pydantic field to inspect. + + Returns: + Dictionary containing the possible options and their description for + this field. The description may be None indicating that it does not exist. + """ + options: dict[str, str | None] = {} + if isclass(field.annotation) and issubclass(field.annotation, Enum): + for x in field.annotation: + options[str(x.value)] = None + return options + + +def get_field_deprecated(field: FieldInfo) -> bool: + """Returns whether the given pydantic field is deprecated or not. + + Args: + field: The pydantic field to inspect. + + Returns: + Whether the field is deprecated. + """ + if field.deprecated: + return True + return False diff --git a/nomad/normalizing/common.py b/nomad/normalizing/common.py index 75d5c2bdf7935570c29893928482bfa0252baa7c..f03185579dc8dbdbec5a0075841fa9f115be9ade 100644 --- a/nomad/normalizing/common.py +++ b/nomad/normalizing/common.py @@ -15,27 +15,24 @@ # See the License for the specific language governing permissions and # limitations under the License. # +from typing import Any + +import matid.geometry # pylint: disable=import-error import numpy as np -from math import isnan from ase import Atoms -from typing import List, Set, Any, Optional, Dict, Union -from nptyping import NDArray from matid import SymmetryAnalyzer # pylint: disable=import-error -from matid.symmetry.wyckoffset import WyckoffSet as WyckoffSetMatID # pylint: disable=import-error -import matid.geometry # pylint: disable=import-error +from matid.symmetry.wyckoffset import ( + WyckoffSet as WyckoffSetMatID, # pylint: disable=import-error +) +from nptyping import NDArray from nomad import atomutils from nomad.config import config -from nomad.utils import hash -from nomad.units import ureg from nomad.datamodel.metainfo.system import Atoms as NOMADAtoms from nomad.datamodel.optimade import Species -from nomad.datamodel.results import ( - Cell, - Structure, - LatticeParameters, - WyckoffSet, -) +from nomad.datamodel.results import Cell, LatticeParameters, Structure, WyckoffSet +from nomad.units import ureg +from nomad.utils import hash def wyckoff_sets_from_matid(wyckoff_sets: list[WyckoffSetMatID]) -> list[WyckoffSet]: diff --git a/nomad/normalizing/material.py b/nomad/normalizing/material.py index 078e4f0452e60c6d3dbe1bcb3ee98352f4c9b8b6..9192162c68cabba23ff671779e183f158afb39b6 100644 --- a/nomad/normalizing/material.py +++ b/nomad/normalizing/material.py @@ -16,25 +16,24 @@ # import re -from typing import Union, Dict, List -from nptyping import NDArray import ase.data from matid.classification.classifications import ( - Class0D, Atom, + Class0D, Class1D, Class2D, + Class3D, Material2D, Surface, - Class3D, ) +from nptyping import NDArray from nomad import atomutils from nomad.atomutils import Formula -from nomad.normalizing.common import material_id_bulk, material_id_2d, material_id_1d +from nomad.datamodel.results import Material, Symmetry, structure_name_map +from nomad.normalizing.common import material_id_1d, material_id_2d, material_id_bulk from nomad.normalizing.topology import TopologyNormalizer -from nomad.datamodel.results import Symmetry, Material, structure_name_map class MaterialNormalizer: diff --git a/nomad/normalizing/metainfo.py b/nomad/normalizing/metainfo.py index b76df84ca7d766f35c6556a831a857fec93980bc..73e3ddd01829687edab628a8065c73107b60fe26 100644 --- a/nomad/normalizing/metainfo.py +++ b/nomad/normalizing/metainfo.py @@ -18,8 +18,7 @@ from nomad.datamodel import EntryArchive from nomad.datamodel.data import ArchiveSection -from nomad.datamodel import EntryArchive -from typing import Optional + from . import Normalizer diff --git a/nomad/normalizing/method.py b/nomad/normalizing/method.py index 0fc961ec59be8499fe209547351a58d40abe2f8b..2a3355f8b13bf6b9c87d5be54b4f0c5aa54011e1 100644 --- a/nomad/normalizing/method.py +++ b/nomad/normalizing/method.py @@ -15,33 +15,33 @@ # See the License for the specific language governing permissions and # limitations under the License. # +import re from abc import ABC, abstractmethod -from ase.dft.kpoints import monkhorst_pack, get_monkhorst_pack_size_and_offset from collections import OrderedDict -import re + import numpy as np +from ase.dft.kpoints import get_monkhorst_pack_size_and_offset, monkhorst_pack -from nomad.datamodel import EntryArchive, ArchiveSection -from nomad.metainfo import MSection -from nomad.metainfo.data_type import Number -from nomad.units import ureg -from nomad.metainfo import Section -from nomad.utils import RestrictedDict from nomad.config import config +from nomad.datamodel import ArchiveSection, EntryArchive from nomad.datamodel.results import ( - Method, - Simulation, - HubbardKanamoriModel, - DFT, - TB, - GW, BSE, + DFT, DMFT, - Precision, + GW, + TB, + HubbardKanamoriModel, Material, + Method, + Precision, + Simulation, xc_treatments, xc_treatments_extended, ) +from nomad.metainfo import MSection, Section +from nomad.metainfo.data_type import Number +from nomad.units import ureg +from nomad.utils import RestrictedDict class MethodNormalizer: # TODO: add normalizer for atom_parameters.label diff --git a/nomad/normalizing/normalizer.py b/nomad/normalizing/normalizer.py index 82eb23aa250800f302c03f9d2a228438cced1cea..0c3976f29788071f990c8c94349cacf36134cc81 100644 --- a/nomad/normalizing/normalizer.py +++ b/nomad/normalizing/normalizer.py @@ -17,11 +17,10 @@ # from abc import ABCMeta, abstractmethod -from typing import List, Optional -from nomad.utils import get_logger -from nomad.metainfo import MSection from nomad.datamodel import EntryArchive +from nomad.metainfo import MSection +from nomad.utils import get_logger class Normalizer(metaclass=ABCMeta): diff --git a/nomad/normalizing/optimade.py b/nomad/normalizing/optimade.py index d19b0e09853d7e0cfaa1cadd486058de90b53ac8..3c78ab4ca58561576318bd6507cce93685270a8e 100644 --- a/nomad/normalizing/optimade.py +++ b/nomad/normalizing/optimade.py @@ -16,19 +16,18 @@ # limitations under the License. # -from typing import Any, Dict -import numpy as np import re +from typing import Any + import ase.data import ase.formula +import numpy as np import pint.quantity -from nomad.datamodel import EntryArchive from nomad.atomutils import Formula +from nomad.datamodel import EntryArchive, EntryMetadata, OptimadeEntry, Species from nomad.normalizing.normalizer import SystemBasedNormalizer from nomad.units import ureg -from nomad.datamodel import OptimadeEntry, Species, EntryMetadata - species_re = re.compile(r'^([A-Z][a-z]?)(\d*)$') atom_label_re = re.compile( diff --git a/nomad/normalizing/results.py b/nomad/normalizing/results.py index 6f5f4ebde1e15ef57b0668b85d49efb63c332d82..2316cb4510002da124d5deb5408224a3354218ea 100644 --- a/nomad/normalizing/results.py +++ b/nomad/normalizing/results.py @@ -17,77 +17,78 @@ # import re -import numpy as np -from typing import Union, Any, Optional +from typing import Any + import ase.data -from matid import SymmetryAnalyzer # pylint: disable=import-error import matid.geometry # pylint: disable=import-error +import numpy as np +from matid import SymmetryAnalyzer # pylint: disable=import-error from nomad import atomutils -from nomad.config import config -from nomad.utils import traverse_reversed, extract_section from nomad.atomutils import Formula -from nomad.normalizing.normalizer import Normalizer -from nomad.normalizing.method import MethodNormalizer -from nomad.normalizing.material import MaterialNormalizer +from nomad.config import config from nomad.datamodel import EntryArchive -from nomad.datamodel.metainfo.workflow import Workflow from nomad.datamodel.data import ArchiveSection -from nomad.normalizing.common import structures_2d +from nomad.datamodel.metainfo.workflow import Workflow from nomad.datamodel.results import ( BandGap, BandGapDeprecated, - RadialDistributionFunction, - RadiusOfGyration, - MeanSquaredDisplacement, - Results, - Material, - Method, - GeometryOptimization, - Trajectory, - MolecularDynamics, - MDProvenance, - TemperatureDynamic, - VolumeDynamic, - PressureDynamic, - EnergyDynamic, - Properties, - StructuralProperties, - DynamicalProperties, - EnergyVolumeCurve, - BulkModulus, - ShearModulus, - MechanicalProperties, - ElectronicProperties, - VibrationalProperties, - ThermodynamicProperties, BandStructureElectronic, BandStructurePhonon, + BulkModulus, + DensityCharge, DOSElectronic, - DOSNew, DOSElectronicNew, + DOSNew, DOSPhonon, - GreensFunctionsElectronic, + DynamicalProperties, + EELSMethodology, + ElectricFieldGradient, + ElectronicProperties, + EnergyDynamic, EnergyFreeHelmholtz, + EnergyVolumeCurve, + GeometryOptimization, + GreensFunctionsElectronic, HeatCapacityConstantVolume, - SpectroscopicProperties, - EELSMethodology, - SpectraProvenance, - Spectra, MagneticProperties, MagneticShielding, MagneticSusceptibility, - ElectricFieldGradient, + Material, + MDProvenance, + MeanSquaredDisplacement, + MechanicalProperties, + Method, + MolecularDynamics, + PressureDynamic, + Properties, + RadialDistributionFunction, + RadiusOfGyration, + Results, + ShearModulus, + Spectra, + SpectraProvenance, + SpectroscopicProperties, SpinSpinCoupling, - DensityCharge, + StructuralProperties, + TemperatureDynamic, + ThermodynamicProperties, + Trajectory, + VibrationalProperties, + VolumeDynamic, ) +from nomad.normalizing.common import structures_2d +from nomad.normalizing.material import MaterialNormalizer +from nomad.normalizing.method import MethodNormalizer +from nomad.normalizing.normalizer import Normalizer +from nomad.utils import extract_section, traverse_reversed try: import runschema runschema.run_schema_entry_point.load() - import runschema.method import runschema.calculation + import runschema.method import runschema.system except Exception as e: runschema, simulationworkflowschema = None, None @@ -288,11 +289,12 @@ class ResultsNormalizer(Normalizer): else: self.entry_archive.metadata.entry_name = f'{type_tag}' - def resolve_band_gap(self) -> list[BandGap]: + def resolve_band_gap( + self, path: list[str] = ['run', 'calculation', 'band_gap'] + ) -> list[BandGap]: """Extract all band gaps from the given `path` and return them in a list along with their provenance. """ - path = ['run', 'calculation', 'band_gap'] bg_root: list[BandGap] = [] if band_gaps := traverse_reversed(self.entry_archive, path): for bg in band_gaps: @@ -306,7 +308,9 @@ class ResultsNormalizer(Normalizer): bg_root.insert(0, bg_results) return bg_root - def resolve_band_structure(self) -> list[BandStructureElectronic]: + def resolve_band_structure( + self, path: list[str] = ['run', 'calculation', 'band_structure_electronic'] + ) -> list[BandStructureElectronic]: """Returns a new section containing an electronic band structure. In the case of multiple valid band structures, only the latest one is considered. @@ -315,7 +319,6 @@ class ResultsNormalizer(Normalizer): - There is a non-empty array of kpoints. - There is a non-empty array of energies. """ - path = ['run', 'calculation', 'band_structure_electronic'] bs_root: list[BandStructureElectronic] = [] if band_structures := traverse_reversed(self.entry_archive, path): for bs in band_structures: @@ -346,7 +349,9 @@ class ResultsNormalizer(Normalizer): bs_root.insert(0, bs_results) return bs_root - def resolve_dos_deprecated(self) -> list[DOSElectronic]: + def resolve_dos_deprecated( + self, path: list[str] = ['run', 'calculation', 'dos_electronic'] + ) -> list[DOSElectronic]: """Returns a reference to the section containing an electronic dos. In the case of multiple valid DOSes, only the latest one is reported. @@ -358,7 +363,6 @@ class ResultsNormalizer(Normalizer): to an old schema which will be deleted. The new function `resolve_dos` should be the one which persists over time. """ - path = ['run', 'calculation', 'dos_electronic'] dos_sections = extract_section(self.entry_archive, path, full_list=True) # The old mapping does not work for the new spin-polarized schema if ( @@ -376,7 +380,9 @@ class ResultsNormalizer(Normalizer): dos_results.energy_fermi = dos.energy_fermi return [dos_results] if dos_results else [] - def resolve_dos(self) -> list[DOSElectronicNew]: + def resolve_dos( + self, path: list[str] = ['run', 'calculation', 'dos_electronic'] + ) -> list[DOSElectronicNew]: """Returns a section containing the references for an electronic DOS. This section is then stored under `archive.results.properties.electronic.dos_electronic_new`. @@ -393,7 +399,6 @@ class ResultsNormalizer(Normalizer): Returns: List[DOSElectronicNew]: the mapped DOS. """ - path = ['run', 'calculation', 'dos_electronic'] dos_result = None # only instantiate `dos_results` if the tests below pass if dos_sections := extract_section(self.entry_archive, path, full_list=True): for dos_section in dos_sections: @@ -434,7 +439,7 @@ class ResultsNormalizer(Normalizer): return [dos_result] if dos_result else [] def resolve_greens_functions( - self, path: list[str] + self, path: list[str] = ['run', 'calculation', 'greens_functions'] ) -> list[GreensFunctionsElectronic]: """Returns a section containing the references of the electronic Greens functions. This section is then stored under `archive.results.properties.electronic`. @@ -496,8 +501,9 @@ class ResultsNormalizer(Normalizer): gfs_root.append(gfs_results) return gfs_root - def fetch_charge_density(self) -> list[DensityCharge]: - path = ['run', 'calculation', 'density_charge', 'value_hdf5'] + def fetch_charge_density( + self, path: list[str] = ['run', 'calculation', 'density_charge', 'value_hdf5'] + ) -> list[DensityCharge]: return_list: list[DensityCharge] = [] if runschema and ( hdf5_wrappers := list(traverse_reversed(self.entry_archive, path)) @@ -508,7 +514,9 @@ class ResultsNormalizer(Normalizer): return_list.append(d) return return_list - def resolve_electric_field_gradient(self) -> list[ElectricFieldGradient]: + def resolve_electric_field_gradient( + self, path: list[str] = ['run', 'calculation', 'electric_field_gradient'] + ) -> list[ElectricFieldGradient]: """Returns a section containing the references for the Electric Field Gradient. This section is then stored under `archive.results.properties.electronic`. @@ -522,7 +530,6 @@ class ResultsNormalizer(Normalizer): Returns: list[ElectricFieldGradient]: the mapped Electric Field Gradient. """ - path = ['run', 'calculation', 'electric_field_gradient'] mapped_data: list[ElectricFieldGradient] = [] if stored_data := traverse_reversed(self.entry_archive, path): for data in stored_data: @@ -671,6 +678,10 @@ class ResultsNormalizer(Normalizer): methods (list[str]): the list of methods from which the properties are resolved. properties (list[str]): the list of properties to be resolved from `workflow2.results`. """ + properties_map = { + 'dos': 'dos_electronic_new', + 'band_structure': 'band_structure_electronic', + } for method in methods: name = ( 'MaxEnt' @@ -680,7 +691,9 @@ class ResultsNormalizer(Normalizer): else method.upper() ) for prop in properties: - property_list = self.electronic_properties.get(prop) + property_list = self.electronic_properties.get( + properties_map.get(prop, prop) + ) method_property_resolved = getattr(self, f'resolve_{prop}')( ['workflow2', 'results', f'{method}_outputs', prop] ) @@ -1147,9 +1160,7 @@ class ResultsNormalizer(Normalizer): 'dos_electronic': self.resolve_dos_deprecated(), 'dos_electronic_new': self.resolve_dos(), 'band_structure_electronic': self.resolve_band_structure(), - 'greens_functions_electronic': self.resolve_greens_functions( - ['run', 'calculation', 'greens_functions'] - ), + 'greens_functions_electronic': self.resolve_greens_functions(), 'density_charge': self.fetch_charge_density(), 'electric_field_gradient': self.resolve_electric_field_gradient(), } diff --git a/nomad/normalizing/topology.py b/nomad/normalizing/topology.py index da0cf2ee5ae501af6ad9a80bc5106bd8ea900b9e..2477f91ad67dbb7dd591002a481001838c511919 100644 --- a/nomad/normalizing/topology.py +++ b/nomad/normalizing/topology.py @@ -16,48 +16,45 @@ # limitations under the License. # -from typing import Dict, List, Optional, Union -from collections import defaultdict -import pathlib import json -from math import isnan +import pathlib +from collections import defaultdict +import numpy as np from ase import Atoms from ase.data import chemical_symbols -import numpy as np -from matid.clustering import SBC, Cluster -from matid.symmetry.symmetryanalyzer import SymmetryAnalyzer from matid.classification.classifications import ( - Class0D, Atom, + Class0D, Class1D, Class2D, + Class3D, Material2D, Surface, - Class3D, ) +from matid.clustering import SBC, Cluster +from matid.symmetry.symmetryanalyzer import SymmetryAnalyzer -from nomad import utils +from nomad import atomutils, utils from nomad.config import config -from nomad import atomutils +from nomad.datamodel.datamodel import EntryArchive from nomad.datamodel.results import ( CoreHole, - SymmetryNew as Symmetry, Material, - System, Relation, + System, structure_name_map, ) -from nomad.datamodel.datamodel import EntryArchive +from nomad.datamodel.results import SymmetryNew as Symmetry from nomad.normalizing.common import ( - cell_from_ase_atoms, ase_atoms_from_nomad_atoms, + cell_from_ase_atoms, + material_id_1d, + material_id_2d, + material_id_bulk, nomad_atoms_from_ase_atoms, - wyckoff_sets_from_matid, structures_2d, - material_id_bulk, - material_id_2d, - material_id_1d, + wyckoff_sets_from_matid, ) conventional_description = 'The conventional cell of the material from which the subsystem is constructed from.' diff --git a/nomad/parsing/artificial.py b/nomad/parsing/artificial.py index 6302b72dde31231c95de7924a889b9d38d20423e..ae4f1b74b182c8bda27412689866722071b53fbd 100644 --- a/nomad/parsing/artificial.py +++ b/nomad/parsing/artificial.py @@ -21,19 +21,20 @@ Parser for creating artificial test, brenchmark, and demonstration data. """ import json +import os import os.path import random -from ase.data import chemical_symbols -import numpy +import signal import sys import time -import os -import signal + +import numpy +from ase.data import chemical_symbols from nomad.datamodel import EntryArchive from nomad.datamodel.metainfo import runschema -from .parser import Parser, MatchingParser +from .parser import MatchingParser, Parser class EmptyParser(MatchingParser): diff --git a/nomad/parsing/file_parser/__init__.py b/nomad/parsing/file_parser/__init__.py index e6a541c281a03db0a00a926a97293e57e8e9693c..6c50c195c5b9899c62fdedf5bf025212b8bf83d4 100644 --- a/nomad/parsing/file_parser/__init__.py +++ b/nomad/parsing/file_parser/__init__.py @@ -1,6 +1,7 @@ -from .file_parser import FileParser, Parser +from .file_parser import FileParser, ArchiveWriter from .text_parser import TextParser, DataTextParser, Quantity, ParsePattern from .xml_parser import XMLParser from .tar_parser import TarParser UnstructuredTextFileParser = TextParser +Parser = ArchiveWriter diff --git a/nomad/parsing/file_parser/file_parser.py b/nomad/parsing/file_parser/file_parser.py index 79b4f9086a8fc43858b11dbd5fe5ee17c26c72af..b9bef402991694ccb5fa5b64d5d25f5eedecbeb4 100644 --- a/nomad/parsing/file_parser/file_parser.py +++ b/nomad/parsing/file_parser/file_parser.py @@ -12,20 +12,21 @@ # See the License for the specific language governing permissions and # limitations under the License. -from abc import ABC, abstractmethod -import os -import pint -from typing import Any, Dict, IO, Union, List -from collections.abc import Callable -import gzip import bz2 +import gzip import lzma +import os import tarfile +from abc import ABC, abstractmethod +from collections.abc import Callable from contextlib import contextmanager +from typing import IO, Any +import pint + +from nomad.datamodel import EntryArchive from nomad.metainfo import MSection, SubSection from nomad.utils import get_logger -from nomad.datamodel import EntryArchive class FileParser(ABC): @@ -221,12 +222,20 @@ class FileParser(ABC): def parse(self, quantity_key: str = None, **kwargs): pass + def pop(self, key, default=None): + return self._results.pop(key, default) + def __getitem__(self, key): if isinstance(key, str): return self.get(key) elif isinstance(key, int): return self[int] + def __setitem__(self, key, val): + if self._results is None: + self._results = {} + self._results[key] = val + def __getattr__(self, key): if self._results is None: self._results = {} @@ -258,11 +267,11 @@ class FileParser(ABC): pass -class Parser(ABC): +class ArchiveWriter(ABC): mainfile: str = None archive: EntryArchive = None logger = None - child_archives = None + child_archives: dict[str, EntryArchive] = None def get_mainfile_keys(self, filename: str, decoded_buffer: str) -> bool | list[str]: """ @@ -307,11 +316,11 @@ class Parser(ABC): self.archive.m_update_from_dict(self.to_dict()) - def parse( + def write( self, mainfile: str, archive: EntryArchive, logger=None, child_archives=None ) -> None: """ - Main interface to the nomad parsing infrastructure. + Wrapper to write_to_archive method. """ self.mainfile = mainfile self.archive = archive @@ -319,3 +328,11 @@ class Parser(ABC): self.child_archives = child_archives self.write_to_archive() + + def parse( + self, mainfile: str, archive: EntryArchive, logger=None, child_archives=None + ) -> None: + """ + Wraps write method for backwards compatibility. + """ + self.write(mainfile, archive, logger, child_archives) diff --git a/nomad/parsing/file_parser/mapping_parser.py b/nomad/parsing/file_parser/mapping_parser.py new file mode 100644 index 0000000000000000000000000000000000000000..5d0be33bcab33d614518f8d1528a47c37a635750 --- /dev/null +++ b/nomad/parsing/file_parser/mapping_parser.py @@ -0,0 +1,1498 @@ +import json +import os +import re +from abc import ABC, abstractmethod +from io import BytesIO +from typing import Any, Optional + +import h5py +import jmespath +import jmespath.visitor +import numpy as np +from jsonpath_ng.parser import JsonPathParser +from lxml import etree +from pydantic import BaseModel, Field, model_validator + +from nomad.datamodel import EntryArchive +from nomad.datamodel.metainfo.annotations import Mapper as MapperAnnotation +from nomad.metainfo import MSection, SubSection +from nomad.parsing.file_parser import TextParser as TextFileParser +from nomad.parsing.parser import ArchiveParser +from nomad.units import ureg +from nomad.utils import get_logger + +MAPPING_ANNOTATION_KEY = 'mapping' + + +class JmespathOptions(jmespath.visitor.Options): + def __init__(self, **kwargs): + self.pop = False + self.search = True + + for key in list(kwargs.keys()): + if not hasattr(super(), key): + setattr(self, key, kwargs[key]) + del kwargs[key] + super().__init__(**kwargs) + + +LOGGER = get_logger(__name__) + + +class TreeInterpreter(jmespath.visitor.TreeInterpreter): + def __init__(self, options=None): + self.stack = [] + self._current_node = None + self.current_stack = None + self._parent = None + self.nodes = [] + self.indices = [] + self.keys = [] + self._cache = [] + self._parent_key = '__parent' + super().__init__(options) + + def visit(self, node, *args, **kwargs): + node_type = node.get('type') + for child in node.get('children'): + if hasattr(child, 'get'): + child[self._parent_key] = node_type + + value = super().visit(node, *args, **kwargs) + node.pop(self._parent_key, None) + return value + + def visit_field(self, node, value): + parent = node.get(self._parent_key, None) + if isinstance(value, list): + if not value and not self._options.search: + value.append({}) + if not value: + return None + value = value[-1] + if not hasattr(value, 'get'): + return None + + if not self._options.search: + if parent == 'index_expression' and not isinstance( + value.get(node['value']), list + ): + value[node['value']] = [] + + value.setdefault(node['value'], [] if parent == 'index_expression' else {}) + + if self.stack and not self.indices[-1]: + parent_stack = self.stack[-1].get(self.keys[-1], {}) + if value == parent_stack or ( + isinstance(parent_stack, list) and value in parent_stack + ): + self.indices[-1] = [0] + + if parent != 'comparator': + self.indices.append([]) + self.stack.append(value) + self.keys.append(node['value']) + + try: + return value.get(node['value']) + except AttributeError: + return None + + def visit_index_expression(self, node, value): + value = super().visit_index_expression(node, value) + if node.get(self._parent_key) == 'pipe' and self.indices: + self.indices[-1] = [] + return value + + def visit_index(self, node, value): + if not isinstance(value, list): + return None + + index = node['value'] + n_value = len(value) + if self._options.search and index >= n_value: + return None + + n_target = abs(index) - n_value + (0 if index < 0 else 1) + value.extend([{} for _ in range(n_target)]) + + if self.indices: + self.indices[-1] = [index] + return value[index] + + def visit_slice(self, node, value): + if not isinstance(value, list): + return None + + s = slice(*node['children']) + n_value = len(value) + indices = list(range(s.start or 0, s.stop or n_value or 1, s.step or 1)) + if indices: + max_index = max(np.abs(indices)) + min_index = min(indices) + n_target = ( + max_index + - n_value + + (0 if min_index < 0 and max_index == -min_index else 1) + ) + + if max_index >= n_value and self._options.search: + return None + + value.extend([{} for _ in range(n_target)]) + # if isinstance(value, h5py.Group): + # return [g for g in value.values()][s] + self.indices[-1] = indices + return value[s] + + +class ParsedResult(jmespath.parser.ParsedResult): + def _set_value(self, value, options, data): + self._interpreter = TreeInterpreter(options=options) + result = self._interpreter.visit(self.parsed, value) + + values = [] + if not options.pop and data is None: + return result, values + + stack, stack_indices, stack_keys = [], [], [] + for n, s in enumerate(self._interpreter.stack): + add = s == self._interpreter.stack[-1] + if not add: + val = s[self._interpreter.keys[n]] + add = val and not hasattr( + val[0] if isinstance(val, list) else val, 'get' + ) + if add: + stack.append(s) + stack_indices.append(self._interpreter.indices[n]) + stack_keys.append(self._interpreter.keys[n]) + + for n, indices in enumerate(stack_indices): + d = ( + data[n] + if isinstance(data, list) + and len(data) > 1 + and len(data) == len(stack_indices) + else data + ) + if not indices: + stack[n][stack_keys[n]] = d + v = ( + stack[n][stack_keys[n]] + if not options.pop + else stack[n].pop(stack_keys[n]) + ) + values.append(v) + continue + map_data = isinstance(d, list) and len(d) == len(indices) + for nd in range(len(indices) - 1, -1, -1): + index = indices[nd] + stack[n][stack_keys[n]][index] = d[nd] if map_data else d + v = ( + stack[n][stack_keys[n]][index] + if not options.pop + else stack[n][stack_keys[n]].pop(index) + ) + values.append(v) + + return result, values[0] if len(values) == 1 else values + + def search(self, value, **kwargs): + options = JmespathOptions(search=True, **kwargs) + return self._set_value(value, options, None)[0] + + def set(self, value, data, **kwargs): + options = JmespathOptions(search=False, **kwargs) + return self._set_value(value, options, data)[1] + + +class JmespathParser(jmespath.parser.Parser): + """ + JmespathParser extension implementing search with pop and set functionalities. + """ + + def parse(self, expression): + parsed_result = super().parse(expression) + return ParsedResult(parsed_result.expression, parsed_result.parsed) + + +class PathParser(BaseModel): + parser_name: str = Field( + 'jmespath', description="""Name of the parser to perform parsing.""" + ) + + def get_data(self, path, source, **kwargs) -> Any: + if self.parser_name == 'jmespath': + + def _get(path, source, **kwargs): + return JmespathParser().parse(path).search(source, **kwargs) + + return _get(path, source, **kwargs) + elif self.parser_name == 'jsonpath_ng': + + def _get(path, source, **kwargs): + parser = JsonPathParser().parse(path) + results = [match.value for match in parser.find(source)] + if kwargs.get('pop'): + # TODO is find and filter somehow can be performed simulatenously + parser.filter(lambda v: True, source) + return results[0] if len(results) == 1 else results + + return _get(path, source, **kwargs) + + return None + + def set_data(self, path, target, data, **kwargs) -> Any: + if self.parser_name == 'jmespath': + + def _set(path, target, data, **kwargs): + return JmespathParser().parse(path).set(target, data, **kwargs) + + return _set(path, target, data, **kwargs) + + elif self.parser_name == 'jsonpath_ng': + + def _set(path, target, data, **kwargs): + return JsonPathParser().parse(path).update(target, data) + + return _set(path, target, data) + + return None + + +class Path(BaseModel, validate_assignment=True): + """ + Wrapper for jmespath parser to get/set data from/to an input dictionary. + """ + + path: str = Field('', description="""User-defined path to the data.""") + parent: Optional['Path'] = Field(None, description="""Parent path.""") + relative_path: str = Field('', description="""Relative path to the data.""") + absolute_path: str = Field('', description="""Absolute path to the data.""") + reduced_path: str = Field('', description="""Reduced absolute path.""") + parser: PathParser = Field( + PathParser(), description="""The parser to use to search and set data.""" + ) + + @model_validator(mode='before') + def get_relative_path(cls, values: dict[str, Any]) -> dict[str, Any]: + relative_path = values.get('path', '') + parent = values.get('parent') + match = re.match(r'^\.(.+)|(.+\()\.(.+)', relative_path) + if match: + relative_path = ''.join([g for g in match.groups() if g]) + values['relative_path'] = relative_path + + absolute_path = relative_path + if parent: + segments = [parent.absolute_path, absolute_path] + absolute_path = '.'.join([s for s in segments if s != '@' and s]) + values['absolute_path'] = absolute_path + + values['reduced_path'] = re.sub(r'\[.+?\]|\|', '', absolute_path) + + return values + + def is_relative_path(self): + return self.relative_path != self.path or self.parent is not None + + def get_data(self, source: dict[str, Any], **kwargs) -> Any: + try: + return self.parser.get_data(self.relative_path, source, **kwargs) + except Exception: + return kwargs.get('default') + + def set_data(self, data: Any, target: dict[str, Any], **kwargs) -> Any: + cur_data = self.get_data(target, **kwargs) + update_mode = kwargs.get('update_mode') + path = self.relative_path + + def update(source: Any, target: Any): + if not isinstance(source, type(target)): + return ( + target if update_mode == 'append' and target is not None else source + ) + + if isinstance(source, dict): + if update_mode != 'replace': + for key in list(source.keys()): + target[f'.{key}'] = update( + source.get(key), target.get(f'.{key}') + ) + return target + + if isinstance(source, list): + merge = re.match(r'merge(?:@(.+))*', update_mode or '') + if merge: + merge_at = merge.groups()[0] + if not merge_at or merge_at == 'start': + start = 0 + elif merge_at == 'last': + start = len(source) - len(target) + else: + start = int(merge_at) + if start < 0: + start += len(source) + for n, d in enumerate(source): + if n >= start and n < start + len(target): + update(d, target[n - start]) + else: + target.insert(n, d) + elif update_mode == 'append': + for n, d in enumerate(source): + target.insert(n, update(d, {})) + return target + + return target if update_mode == 'append' and target is not None else source + + res = self.parser.set_data(path, target, data, **kwargs) + + update(cur_data, res) + + return res + + +Path.model_rebuild() + + +class Data(BaseModel, validate_assignment=True): + """ + Wrapper for the path to the data or a transformer to extract the data. + """ + + path: Path = Field(None, description="""Path to the data.""") + transformer: 'Transformer' = Field( + None, description="""Transformer to extract data.""" + ) + parent: Path = Field(None, description="""Parent path.""") + path_parser: PathParser = Field( + None, description="""Parser used to search and set data.""" + ) + + @model_validator(mode='before') + def set_attributes(cls, values: dict[str, Any]) -> dict[str, Any]: + if values.get('path') is None and values.get('transformer'): + transformer = values['transformer'] + if len(transformer.function_args) == 1: + values['path'] = transformer.function_args[0] + else: + values['path'] = Path(path='@') + + if values.get('parent'): + if values.get('transformer'): + for arg in values['transformer'].function_args: + if arg.is_relative_path(): + arg.parent = values['parent'] + if values.get('path') and values['path'].is_relative_path(): + values['path'].parent = values['parent'] + + if values.get('path_parser'): + if values.get('path'): + values['path'].parser = values['path_parser'] + if values.get('transformer'): + for arg in values['transformer'].function_args: + arg.parser = values['path_parser'] + + return values + + def get_data( + self, source_data: dict[str, Any], parser: 'MappingParser' = None, **kwargs + ) -> Any: + if self.transformer: + value = self.transformer.get_data(source_data, parser, **kwargs) + return self.transformer.normalize_data(value) + elif self.path: + return self.path.get_data( + source_data if self.path.is_relative_path() else parser.data, **kwargs + ) + + +class BaseMapper(BaseModel): + """ + Base class for a mapper. + """ + + source: 'Data' = Field(None, description="""Source data.""") + target: 'Data' = Field(None, description="""Target data.""") + indices: list[int] | str | None = Field( + None, description="""List of indices of data to include.""" + ) + order: int = Field(None, description="""Execution order.""") + remove: bool | None = Field(None, description="""Remove data from source.""") + cache: bool | None = Field(None, description="""Store the result of the mapper.""") + all_paths: list[str] = Field( + [], description="""List of all unindexed abs. paths.""" + ) + + def get_data(self, source_data: Any, parser: 'MappingParser', **kwargs) -> Any: + return None + + def normalize_data(self, data: Any) -> Any: + return data + + @staticmethod + def from_dict(dct: dict[str, Any], parent: 'BaseMapper' = None) -> 'BaseMapper': + """ + Convert dictionary to a BaseMapper object. Dictionary may contain the following + source: str or Path or tuple or Transformer to extract source data + target: str or Path object of target data + mapper: + str or Path object returns Transfomer with identity function + Tuple[str, List[str]] returns Transformer + List[Dict] returns Mapper + path: str or Path object returns Map object + function_name: str name of transformation function + function_args: List[str] of paths of data as arguments to function + indices: str or List of indices of data to include + str is function name to evaluate indices + remove: Remove data from source + """ + paths: dict[str, Data] = {} + path_parser = dct.get('path_parser') + + for ptype in ['source', 'target']: + path = dct.get(ptype) + if isinstance(path, str): + path_obj = Data(path=Path(path=path)) + elif isinstance(path, tuple): + args = [Path(path=p) for p in path[1]] + path_obj = Data( + transformer=Transformer(function_name=path[0], function_args=args) + ) + if len(path) == 3: + path_obj.transformer.function_kwargs = path[2] + path_obj.transformer.cache = dct.get('cache') + elif isinstance(path, Data): + path_obj = path + else: + path_obj = None + + if path_obj: + parent_path = getattr(parent, ptype, None) + if parent_path is not None: + path_obj.parent = parent_path.path + if path_parser: + path_obj.path_parser = PathParser(parser_name=path_parser) + paths[ptype] = path_obj + + mapper = ( + dct.get('mapper') + or dct.get('path') + or (dct.get('function_name'), dct.get('function_args')) + ) + obj: BaseMapper = BaseMapper() + if isinstance(mapper, tuple) and None in mapper: + return obj + + def add_path_attrs(path: Path): + if path.is_relative_path(): + source_path = paths.get('source', parent.source if parent else None) + if source_path: + path.parent = source_path.path + if path_parser: + path.parser = PathParser(parser_name=path_parser) + + if isinstance(mapper, str | Path): + path = Path(path=mapper) if isinstance(mapper, str) else mapper + obj = Transformer() + add_path_attrs(path) + obj.function_args.append(path) + + elif ( + isinstance(mapper, tuple | list) + and len(mapper) in [2, 3] + and isinstance(mapper[0], str) + and isinstance(mapper[1], list) + ): + function_args = [] + for v in mapper[1]: + arg = v + if isinstance(v, str): + arg = Path(path=v) + add_path_attrs(arg) + function_args.append(arg) + obj = Transformer(function_name=mapper[0], function_args=function_args) + if len(mapper) == 3: + obj.function_kwargs = mapper[2] + + elif isinstance(mapper, list) and isinstance(mapper[0], dict): + obj = Mapper() + else: + LOGGER.error('Unknown mapper type.') + + for key in ['indices', 'remove', 'cache']: + if dct.get(key) is not None: + setattr(obj, key, dct.get(key)) + if paths.get('source'): + obj.source = paths.get('source') + if paths.get('target'): + obj.target = paths.get('target') + + if isinstance(obj, Mapper): + mappers = [] + for v in mapper: + m = BaseMapper.from_dict(v, obj) + mappers.append(m) + obj.mappers = mappers + + return obj + + def get_required_paths(self) -> list[str]: + def get_path_segments(parsed: dict[str, Any]) -> list[str]: + segments: list[str] = [] + value = parsed.get('value') + ptype = parsed.get('type') + + if ptype == 'comparator': + return segments + + if value and ptype == 'field': + segments.append(value) + + for children in parsed.get('children', []): + if not isinstance(children, dict): + continue + segments.extend(get_path_segments(children)) + + return segments + + def filter_path(path: str) -> list[str]: + parsed = JmespathParser().parse(path).parsed + segments = get_path_segments(parsed) + return ['.'.join(segments[:n]) for n in range(1, len(segments) + 1)] + + def get_paths(mapper: BaseMapper) -> list[str]: + paths = [] + if mapper.source and mapper.source.transformer: + for path in mapper.source.transformer.function_args: + paths.extend(filter_path(path.absolute_path)) + + if isinstance(mapper, Mapper): + for sub_mapper in mapper.mappers: + paths.extend(get_paths(sub_mapper)) + + elif isinstance(mapper, Transformer): + for path in mapper.function_args: + paths.extend(filter_path(path.absolute_path)) + + return paths + + return list(set(get_paths(self))) + + +class Transformer(BaseMapper): + """ + Mapper to perform a transformation of the data. + + A static method with function_name should be implemented in the parser class. + + class Parser(MappingParser): + @staticmethod + def get_eigenvalues_energies(array: np.ndarray, n_spin: int, n_kpoints: int): + array = np.transpose(array)[0].T + return np.reshape(array, (n_spin, n_kpoints, len(array[0]))) + + If function is not defined, identity transformation is applied. + """ + + function_name: str = Field( + '', description="""Name of the function defined in the parser.""" + ) + function_args: list[Path] = Field( + [], description="""Paths to the data as arguments to the function.""" + ) + function_kwargs: dict[str, Any] = Field( + {}, description="""Keyword args to pass to function.""" + ) + order: int = 1 + + def get_data( + self, source_data: dict[str, Any], parser: 'MappingParser', **kwargs + ) -> Any: + remove: bool = kwargs.get('remove', self.remove) + func = ( + getattr(parser, self.function_name, None) + if self.function_name + else lambda x: x + ) + args = [ + m.get_data( + source_data if m.is_relative_path() else parser.data, + pop=remove and self.all_paths.count(m.reduced_path) <= 1, + ) + for m in self.function_args + ] + try: + return ( + func(*args) + if not self.function_kwargs + else func(*args, **self.function_kwargs) + ) + except Exception: + # if self.function_name == 'get_positions': + # raise + return None + + +Data.model_rebuild() + + +class Mapper(BaseMapper, validate_assignment=True): + """ + Mapper for nested mappers. + """ + + mappers: list[BaseMapper] = Field([], description="""List of sub mappers.""") + order: int = 0 + __cache: dict[str, Any] = {} + + @model_validator(mode='before') + def set_attributes(cls, values: dict[str, Any]) -> dict[str, Any]: + def get_paths(mapper: BaseMapper) -> list[str]: + paths = [] + if isinstance(mapper, Transformer): + paths.extend([p.reduced_path for p in mapper.function_args]) + elif isinstance(mapper, Mapper): + for m in mapper.mappers: + paths.extend(get_paths(m)) + return paths + + def set_paths(mapper: BaseMapper, paths: list[str]): + mapper.all_paths = paths + if isinstance(mapper, Mapper): + for m in mapper.mappers: + set_paths(m, paths) + + def set_remove(mapper: BaseMapper, remove: bool): + mapper.remove = remove + if isinstance(mapper, Mapper): + for m in mapper.mappers: + set_remove(m, remove) + + paths = [] + for mapper in values.get('mappers', []): + paths.extend(get_paths(mapper)) + + # propagate all properties to all mappers + for mapper in values.get('mappers', []): + if not values.get('all_paths'): + set_paths(mapper, paths) + set_remove(mapper, values.get('remove')) + + if not values.get('all_paths'): + values['all_paths'] = paths + + return values + + def get_data( + self, source_data: dict[str, Any], parser: 'MappingParser', **kwargs + ) -> Any: + dct = {} + for mapper in self.mappers: + data = source_data + if mapper.source: + data = None + if mapper.source.transformer and mapper.source.transformer.cache: + data = self.__cache.get(mapper.source.transformer.function_name) + if data is None: + data = mapper.source.get_data(source_data, parser, **kwargs) + if mapper.source.transformer and mapper.source.transformer.cache: + self.__cache.setdefault( + mapper.source.transformer.function_name, data + ) + + def is_not_value(value: Any) -> bool: + if isinstance(value, np.ndarray): + return value.size == 0 + if hasattr(value, 'magnitude'): + return is_not_value(value.magnitude) + + not_value: Any + for not_value in [None, [], {}]: + test = value == not_value + result = test.any() if isinstance(test, np.ndarray) else test + if result: + return bool(result) + + return False + + indices = mapper.indices + if isinstance(indices, str): + indices = getattr(parser, indices, []) + if callable(indices): + indices = indices() + + value: list[Any] = [] + if isinstance(mapper, Transformer) and mapper.cache: + value = self.__cache.get(mapper.function_name, value) + + if not value: + for n, d in enumerate(data if isinstance(data, list) else [data]): + v = mapper.get_data(d, parser, **kwargs) + if indices and n not in indices: + continue + if not is_not_value(v): + value.append(v) + if value and mapper.cache and isinstance(mapper, Transformer): + self.__cache.setdefault(mapper.function_name, value) + if value: + normalized_value = [mapper.normalize_data(v) for v in value] + dct[mapper.target.path.path] = ( + normalized_value[0] if mapper.indices is None else normalized_value + ) + return dct + + def sort(self, recursive=True): + self.mappers.sort(key=lambda m: m.order) + if recursive: + for mapper in self.mappers: + if isinstance(mapper, Mapper): + mapper.sort() + + +Mapper.model_rebuild() + + +class MappingParser(ABC): + """ + A generic parser class to convert the contents of a file specified by filepath to a + dictionary. The data object is the abstract interface to the data which can defined + by implementing the load_file method. + + If attributes are parsed, the data is wrapped in a dictionary with the attribute keys + prefixed by attribute_prefix while the value can be accesed by value_key. + + data = { + 'a' : { + 'b': [ + {'@name': 'item1', '__value': 'name'}, + {'@name': 'item2', '__value': 'name2'} + ] + } + } + a.b[?"@name"==\'item2\'].__value + >> name2 + + A mapping parser can be converted to another mapping parser using the convert method + by providing a mapper object. + + Attributes: + parse_only_required + Parse only data required by target parser. + attribute_prefix + Added to start of key to denote it is a data attribute. + value_key + Key to the value of the data. + """ + + parse_only_required: bool = False + attribute_prefix: str = '@' + value_key: str = '__value' + logger = get_logger(__name__) + + def __init__(self, **kwargs): + for key, val in kwargs.items(): + if hasattr(self, key): + setattr(self, key, val) + self._mapper: BaseMapper = kwargs.get('mapper') + self._filepath: str = kwargs.get('filepath') + self._data: dict[str, Any] = kwargs.get('data', {}) + self._data_object: Any = kwargs.get('data_object') + self._required_paths: list[str] = kwargs.get('required_paths', []) + + @abstractmethod + def load_file(self) -> Any: + return {} + + @abstractmethod + def to_dict(self, **kwargs) -> dict[str, Any]: + return {} + + @abstractmethod + def from_dict(self, dct: dict[str, Any]): + pass + + def build_mapper(self) -> BaseMapper: + return Mapper() + + @property + def filepath(self) -> str: + return self._filepath + + @filepath.setter + def filepath(self, value: str): + self._filepath = value + self._data_object = None + self._data = None + + @property + def data(self): + if not self._data: + self._data = self.to_dict() + return self._data + + @property + def data_object(self): + if self._data_object is None: + self._data_object = self.load_file() + return self._data_object + + @data_object.setter + def data_object(self, value: Any): + self._data_object = value + self._data = None + self._filepath = None + + @property + def mapper(self) -> BaseMapper: + if self._mapper is None: + self._mapper = self.build_mapper() + return self._mapper + + @mapper.setter + def mapper(self, value: BaseMapper): + self._mapper = value + + def set_data(self, data: Any, target: dict[str, Any], **kwargs) -> None: + if isinstance(data, dict): + for key in list(data.keys()): + path = Path(path=key) + new_data = path.set_data( + data.pop(key) if kwargs.get('remove') else data[key], + data if path.is_relative_path() else target, + update_mode=kwargs.get('update_mode', 'merge'), + ) + self.set_data(new_data, target, remove=True) + + elif isinstance(data, list): + for val in data: + self.set_data(val, target, **kwargs) + + def get_data( + self, + mapper: BaseMapper, + source_data: dict[str, Any], + ) -> Any: + return mapper.get_data(source_data, self) + + def convert( + self, + target: 'MappingParser', + mapper: 'BaseMapper' = None, + update_mode: str = 'merge', + remove: bool = False, + ): + if mapper is None: + mapper = target.mapper + if self.parse_only_required and mapper and not self._required_paths: + self._required_paths = mapper.get_required_paths() + source_data = self.data + if mapper.source: + source_data = mapper.source.get_data(self.data, self) + result = mapper.get_data(source_data, self, remove=remove) + target.set_data(result, target.data, update_mode=update_mode) + target.from_dict(target.data) + + def close(self): + if hasattr(self._data_object, 'close'): + self._data_object.close() + self._data_object = None + self._data = {} + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + self.close() + + def __repr__(self) -> str: + string = f'{self.__class__.__name__}' + if self.filepath: + string += f'({os.path.basename(self.filepath)})' + if self._data_object: + string += f': {type(self._data_object).__name__}' + if self._data: + keys = list(self._data.keys()) + keys = keys[: min(len(keys), 5)] + string += f' -> data.keys: {", ".join([key for key in keys])}' + if len(self._data.keys()) > 5: + string += '...' + return string + + +class MetainfoBaseMapper(BaseMapper): + @staticmethod + def from_dict(dct: dict[str, Any], parent: BaseMapper = None) -> 'BaseMapper': + parent = BaseMapper.from_dict(dct) if parent is None else parent + + if isinstance(parent, Transformer): + transformer = MetainfoTransformer() + for key in parent.model_fields.keys(): + val = getattr(parent, key) + if val is not None: + setattr(transformer, key, val) + for key in ['unit', 'search']: + if dct.get(key): + setattr(transformer, key, dct.get(key)) + return transformer + elif isinstance(parent, Mapper): + mdct = dct.get('mapper') + mapper = MetainfoMapper() + for key in parent.model_fields.keys(): + val = getattr(parent, key) + if val is not None: + setattr(mapper, key, val) + if dct.get('m_def'): + mapper.m_def = dct.get('m_def') + for n, obj in enumerate(parent.mappers): + parent.mappers[n] = MetainfoBaseMapper.from_dict(mdct[n], obj) + mapper.mappers = parent.mappers + return mapper + return parent + + +class MetainfoMapper(MetainfoBaseMapper, Mapper): + m_def: str = Field(None, description="""Section definition.""") + + def get_data( + self, source_data: dict[str, Any], parser: MappingParser, **kwargs + ) -> Any: + dct = super().get_data(source_data, parser, **kwargs) + if self.m_def: + dct['.m_def'] = self.m_def + return dct + + +class MetainfoTransformer(MetainfoBaseMapper, Transformer): + unit: str = Field(None, description="""Pint unit to be applied to value.""") + search: str = Field(None, description="""Path to search value.""") + + def normalize_data(self, value: Any): + if self.search: + path = Path(path=self.search) + value = path.get_data(value) + if self.unit is not None and value is not None and not hasattr(value, 'units'): + value = value * ureg(self.unit) + return value + + +class MetainfoParser(MappingParser): + """ + A parser for metainfo sections. + """ + + def __init__(self, **kwargs): + self._annotation_key: str = kwargs.get('annotation_key', 'mapping') + self.max_nested_level: int = 1 + super().__init__(**kwargs) + + @property + def annotation_key(self) -> str: + return self._annotation_key + + @annotation_key.setter + def annotation_key(self, value): + self._annotation_key = value + self._mapper = None + + def load_file(self) -> MSection: + if self._data_object is not None: + with open(self.filepath) as f: + return self._data_object.m_from_dict(json.load(f)) + elif self.filepath: + try: + archive = EntryArchive() + ArchiveParser().parse(self.filepath, archive) + return archive + except Exception: + self.logger.errror('Error loading archive file.') + return None + + def to_dict(self, **kwargs) -> dict[str, Any]: + if self.data_object is not None: + return self.data_object.m_to_dict() + return {} + + def from_dict(self, dct: dict[str, Any], root: MSection = None) -> None: + # if self.data_object is not None: + # self.data_object = self.data_object.m_from_dict(dct) + # return + + # TODO this is a temporary fix for nomad_simulations PhysicalProperty + # error with m_from_dict + if self.data_object is None: + return + + if root is None: + root = self.data_object + + for key, val in dct.items(): + if not hasattr(root, key): + continue + + section = getattr(root.m_def.section_cls, key) + if isinstance(section, SubSection): + val_list = [val] if isinstance(val, dict) else val + m_def = val_list[-1].get('m_def') + section_def = section.sub_section + if m_def is not None and m_def != section.qualified_name(): + for isection in section.sub_section.all_inheriting_sections: + if isection.qualified_name() == m_def: + section_def = isection + break + + for n, val_n in enumerate(val_list): + quantities = section_def.all_quantities + try: + sub_section = root.m_get_sub_section(section, n) + except Exception: + sub_section = None + if sub_section is None: + sub_section = section_def.section_cls( + **{ + n: val_n.get(n) + for n, q in quantities.items() + if not q.derived and n in val_n and n != 'value' + } + ) + root.m_add_sub_section(section, sub_section) + self.from_dict(val_n, sub_section) + value = val_n.get('value') + if value is not None: + sub_section.value = value + continue + + if key == 'm_def' or key == 'value': + continue + + try: + root.m_set(root.m_get_quantity_definition(key), val) + except Exception: + pass + + def build_mapper(self, max_level: int = None) -> BaseMapper: + """ + Builds a mapper for source data from the another parser with path or operator + specified in metainfo annotation with key annotation_key. The target path is + given by the sub section key. + """ + + def fill_mapper( + mapper: dict[str, Any], + annotation: MapperAnnotation, + attributes: list[str], + ) -> None: + for key in attributes: + value = getattr(annotation, key, None) + if value is not None: + mapper.setdefault(key, value) + + def build_section_mapper( + section: SubSection | MSection, level: int = 0 + ) -> dict[str, Any]: + mapper: dict[str, Any] = {} + if level >= (max_level or self.max_nested_level): + return mapper + + section_def = ( + section.sub_section + if isinstance(section, SubSection) + else section.m_def + ) + + if not section_def: + return mapper + + # try to get annotation from sub-section + annotation: MapperAnnotation = ( + (section if isinstance(section, SubSection) else section_def) + .m_get_annotations(MAPPING_ANNOTATION_KEY, {}) + .get(self.annotation_key) + ) + + if not annotation: + # get it from def + annotation = section_def.m_get_annotations( + MAPPING_ANNOTATION_KEY, {} + ).get(self.annotation_key) + + if isinstance(section, SubSection) and not annotation: + # search also all inheriting sections + for inheriting_section in section_def.all_inheriting_sections: + annotation = inheriting_section.m_get_annotations( + MAPPING_ANNOTATION_KEY, {} + ).get(self.annotation_key) + if annotation: + # TODO this does not work as it will applies to base class + # section.sub_section = inheriting_section + # TODO this is a hacky patch, metainfo should have an alternative + # way to resolve the sub-section def + mapper['m_def'] = inheriting_section.qualified_name() + section_def = inheriting_section + break + + if not annotation: + return mapper + + fill_mapper(mapper, annotation, ['remove', 'cache', 'path_parser']) + mapper['source'] = annotation.mapper + + mapper['mapper'] = [] + for name, quantity_def in section_def.all_quantities.items(): + qannotation = quantity_def.m_get_annotations( + MAPPING_ANNOTATION_KEY, {} + ).get(self.annotation_key) + if qannotation: + quantity_mapper = { + 'mapper': qannotation.mapper, + 'target': f'{"" if section == self.data_object else "."}{name}', + } + fill_mapper( + quantity_mapper, + qannotation, + ['remove', 'cache', 'path_parser', 'unit', 'search'], + ) + mapper['mapper'].append(quantity_mapper) + + all_ids = [section_def.definition_id] + all_ids.extend([s.definition_id for s in section_def.all_base_sections]) + for name, sub_section in section_def.all_sub_sections.items(): + # avoid recursion + # if sub_section.sub_section.definition_id in all_ids: + # continue + # allow recursion up to max_level + nested = sub_section.sub_section.definition_id in all_ids + sub_section_mapper = build_section_mapper( + sub_section, level + (1 if nested else 0) + ) + if sub_section_mapper and sub_section_mapper.get('mapper'): + sub_section_mapper['target'] = ( + f'{"" if section == self.data_object else "."}{name}' + ) + sub_section_mapper['indices'] = [] if sub_section.repeats else None + sannotation = sub_section.m_get_annotations( + MAPPING_ANNOTATION_KEY, {} + ).get(self.annotation_key) + if sannotation: + sub_section_mapper['source'] = sannotation.mapper + fill_mapper( + sub_section_mapper, + sannotation, + ['remove', 'cache', 'path_parser', 'indices'], + ) + mapper['mapper'].append(sub_section_mapper) + + return mapper + + dct = build_section_mapper(self.data_object) + return MetainfoMapper.from_dict(dct) + + +class HDF5Parser(MappingParser): + """ + Mapping parser for HDF5. + """ + + def load_file(self, **kwargs) -> h5py.Group: + try: + filepath = kwargs.get('file', self.filepath) + mode = ( + 'w' + if isinstance(filepath, str) and not os.path.isfile(filepath) + else 'r' + ) + return h5py.File(filepath, kwargs.get('mode', mode)) + except Exception: + self.logger.error('Cannot read HDF5 file.') + + def to_dict(self, **kwargs) -> dict[str, Any]: + if self.data_object is None: + return {} + + def set_attributes(val: h5py.Dataset | h5py.Group, dct: dict[str, Any]): + for name, attr in val.attrs.items(): + dct[f'{self.attribute_prefix}{name}'] = ( + attr.tolist() if hasattr(attr, 'tolist') else attr + ) + + def group_to_dict( + group: h5py.Group, root: dict[str, Any] | list[dict[str, Any]] + ): + for key, val in group.items(): + key = int(key) if key.isdecimal() else key + path = '.'.join( + [p for p in val.name.split('/') if not p.isdecimal() and p] + ) + if self._required_paths and path not in self._required_paths: + continue + if isinstance(root, list) and isinstance(val, h5py.Group): + group_to_dict(val, root[key]) + set_attributes(val, root[key]) + elif isinstance(root, dict) and isinstance(val, h5py.Group): + default: list[dict[str, Any]] = [ + {} if k.isdecimal() else None for k in val.keys() + ] + group_to_dict( + val, root.setdefault(key, {} if None in default else default) + ) + if not root[key]: + root[key] = {} + set_attributes(val, root[key]) + elif isinstance(val, h5py.Dataset): + data = val[()] + v = ( + data.astype(str if data.dtype == np.object_ else data.dtype) + if isinstance(data, np.ndarray) + else data.decode() + if isinstance(data, bytes) + else data + ) + v = v.tolist() if hasattr(v, 'tolist') else v + attrs = list(val.attrs.keys()) + if attrs: + root[key] = {self.value_key: v} + set_attributes(val, root[key]) + else: + root[key] = v # type: ignore + return root + + dct: dict[str, Any] = {} + group_to_dict(self.data_object, dct) + return dct + + def from_dict(self, dct: dict[str, Any]) -> None: + if self._data_object is not None: + self._data_object.close() + + root = self.load_file(mode='a', file=self.filepath or BytesIO()) + + def dict_to_hdf5(dct: dict[str, Any], root: h5py.Group) -> h5py.Group: + for key, val in dct.items(): + if key.startswith(self.attribute_prefix): + root.attrs[key.lstrip(self.attribute_prefix)] = val + elif isinstance(val, dict) and self.value_key not in val: + group = root.require_group(key) + dict_to_hdf5(val, group) + elif isinstance(val, list) and val and isinstance(val[0], dict): + data = {} + for n, v in enumerate(val): + if self.value_key not in v: + group = root.require_group(f'{key}/{n}') + dict_to_hdf5(v, group) + else: + data[f'{key}/{n}'] = v + dict_to_hdf5(data, root) + else: + attrs = val if isinstance(val, dict) else {} + v = attrs.get(self.value_key, None) if attrs else val + if v is None: + continue + + if isinstance(v, list): + v = np.array(v) + + shape = v.shape if hasattr(v, 'shape') else () + dtype = v.dtype.type if hasattr(v, 'dtype') else type(v) + if dtype in [np.str_, str]: + dtype = h5py.string_dtype() + dataset = root.require_dataset(key, shape, dtype) + dataset[...] = v.tolist() if hasattr(v, 'tolist') else v + for name, attr in attrs.items(): + if name == self.value_key: + continue + dataset.attrs[name.lstrip(self.attribute_prefix)] = attr + + return root + + self._data_object = dict_to_hdf5(dct, root) + + +class XMLParser(MappingParser): + """ + A mapping parser for XML files. The contents of the xml file are converted into + a dictionary using the lxml module (see https://lxml.de/). + """ + + def from_dict(self, dct: dict[str, Any]) -> None: + def to_string(val: Any) -> str | None: + val = val.tolist() if hasattr(val, 'tolist') else val + if not isinstance(val, list): + return str(val) + string = '' + for v in val: + if not isinstance(v, str | float | int): + return None + string += f' {v}' + return string.strip() + + def data_to_element( + tag: str, data: Any, root: etree._Element = None + ) -> etree._Element: + if tag.startswith(self.attribute_prefix) and root is not None: + root.set(tag.lstrip(self.attribute_prefix), data) + elif tag.startswith(self.value_key) and root is not None: + root.text = to_string(data) + elif isinstance(data, dict): + root = ( + etree.Element(tag) if root is None else etree.SubElement(root, tag) + ) + for key, val in data.items(): + data_to_element(key, val, root) + elif isinstance(data, list): + string = to_string(data) + if string is not None: + element = etree.SubElement(root, tag) + element.text = string + else: + for val in data: + data_to_element(tag, val, root) + elif hasattr(data, 'tolist'): + data_to_element(tag, data.tolist(), root) + else: + element = etree.SubElement(root, tag) + element.text = to_string(data) + return root + + self._data_object = data_to_element('root', dct).getchildren()[0] + + def to_dict(self, **kwargs) -> dict[str, Any]: + def convert(text: str) -> Any: + val = text.strip() + try: + val_array = np.array(val.split(), dtype=float) + if np.all(np.mod(val_array, 1) == 0): + val_array = np.array(val_array, dtype=int) + val_array = val_array.tolist() + return val_array[0] if len(val_array) == 1 else val_array + except Exception: + return val + + stack: list[dict[str, Any]] = [] + results: dict[str, Any] = {} + if self.filepath is None: + return results + + current_path = '' + # TODO determine if iterparse is better than iterwalk + with open(self.filepath, 'rb') as f: + for event, element in etree.iterparse(f, events=('start', 'end')): + tag = element.tag + if event == 'start': + current_path = tag if not current_path else f'{current_path}.{tag}' + if ( + self._required_paths + and current_path not in self._required_paths + ): + continue + stack.append({tag: {}}) + else: + path = current_path + current_path = current_path.rsplit('.', 1)[0] + if self._required_paths and path not in self._required_paths: + continue + data = stack.pop(-1) + text = element.text.strip() if element.text else None + attrib = element.attrib + if attrib: + data.setdefault(tag, {}) + data[tag].update( + (f'{self.attribute_prefix}{k}', v) + for k, v in attrib.items() + ) + if text: + value = convert(text) + if attrib or data[tag]: + data[tag][self.value_key] = value + else: + data[tag] = value + if stack and data: + parent = stack[-1][list(stack[-1].keys())[0]] + if tag in parent: + if ( + isinstance(data[tag], list) + and isinstance(parent[tag], list) + and parent[tag] + and not isinstance(parent[tag][0], list) + ): + parent[tag] = [parent[tag]] + if isinstance(parent[tag], list): + parent[tag].append(data[tag]) + else: + parent[tag] = [ + parent[tag], + data[tag], + ] + else: + # parent[tag] = [data[tag]] if attrib else data[tag] + parent[tag] = data[tag] + else: + results = data + return results + + def load_file(self) -> etree._Element: + try: + return etree.parse(self.filepath) + except Exception: + self.logger.error('Cannot read XML file') + + +class TextParser(MappingParser): + """ + Interface to text file parser. + """ + + text_parser: TextFileParser = None + + def to_dict(self, **kwargs) -> dict[str, Any]: + if self.data_object: + self.data_object.parse() + return self.data_object._results + return {} + + def from_dict(self, dct: dict[str, Any]): + raise NotImplementedError + + def load_file(self) -> Any: + if self.filepath: + self.text_parser.findlazy = True + self.text_parser.mainfile = self.filepath + return self.text_parser + + +if __name__ == '__main__': + from nomad.parsing.file_parser.mapping_parser import MetainfoParser + from tests.parsing.test_mapping_parser import ( + BSection, + ExampleHDF5Parser, + ExampleSection, + ) + + with MetainfoParser() as archive_parser, ExampleHDF5Parser() as hdf5_parser: + archive_parser.annotation_key = 'hdf5' + archive_parser.data_object = ExampleSection(b=[BSection(v=np.eye(2))]) + + d = dict( + g=dict( + g1=dict(v=[dict(d=np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]))]), + v=['x', 'y', 'z'], + g=dict( + c1=dict( + i=[4, 6], + f=[ + {'@index': 0, '__value': 1}, + {'@index': 2, '__value': 2}, + {'@index': 1, '__value': 1}, + ], + d=[dict(e=[3, 0, 4, 8, 1, 6]), dict(e=[1, 7, 8, 3, 9, 1])], + ), + c=dict( + v=[dict(d=np.eye(3), e=np.zeros(3)), dict(d=np.ones((3, 3)))] + ), + ), + ) + ) + + hdf5_parser.from_dict(d) + + hdf5_parser.convert(archive_parser) diff --git a/nomad/parsing/file_parser/text_parser.py b/nomad/parsing/file_parser/text_parser.py index 10802db4aa74c8fee7928a14df1aaedac06f97eb..a742e171ebae728ad626fd92ec69f010acd41f39 100644 --- a/nomad/parsing/file_parser/text_parser.py +++ b/nomad/parsing/file_parser/text_parser.py @@ -13,16 +13,17 @@ # limitations under the License. -import mmap import io +import mmap import re +from collections.abc import Callable +from typing import Any + import numpy as np import pint -from typing import List, Union, Type, Any -from collections.abc import Callable -from nomad.parsing.file_parser import FileParser from nomad.metainfo import Quantity as mQuantity +from nomad.parsing.file_parser import FileParser from nomad.utils import get_logger @@ -114,7 +115,7 @@ class Quantity: **kwargs, ): self.name: str - self.dtype: str + self.dtype: str | Any self.unit: str self.shape: list[int] if isinstance(quantity, str): @@ -273,6 +274,7 @@ class TextParser(FileParser): super().__init__(mainfile, logger=logger, open=kwargs.get('open', None)) self._quantities: list[Quantity] = quantities self.findall: bool = kwargs.get('findall', True) + self.findlazy: bool = kwargs.get('findlazy', None) self._kwargs = kwargs self._file_length: int = kwargs.get('file_length', 0) self._file_offset: int = kwargs.get('file_offset', 0) @@ -488,8 +490,10 @@ class TextParser(FileParser): sub_parser = quantity.sub_parser.copy() sub_parser.mainfile = self.mainfile sub_parser.logger = self.logger + if sub_parser.findlazy is None: + sub_parser.findlazy = self.findlazy sub_parser._file_handler = b' '.join([g for g in res.groups() if g]) - value.append(sub_parser.parse()) + value.append(sub_parser if sub_parser.findlazy else sub_parser.parse()) else: try: @@ -562,8 +566,9 @@ class TextParser(FileParser): self._parse_quantity(quantity) # free up memory - if isinstance(self._file_handler, mmap.mmap) and self.findall: - self._file_handler.close() + if self.findall: + if isinstance(self._file_handler, mmap.mmap): + self._file_handler.close() self._file_handler = b' ' return self diff --git a/nomad/parsing/file_parser/xml_parser.py b/nomad/parsing/file_parser/xml_parser.py index e1fecc6b3f32a359ad4c3ca96806cd56731ab4d8..5739ec9bf394c42c376a4a2fcd82de38437494dc 100644 --- a/nomad/parsing/file_parser/xml_parser.py +++ b/nomad/parsing/file_parser/xml_parser.py @@ -15,8 +15,9 @@ import os import re -import numpy as np from xml.etree import ElementTree + +import numpy as np from lxml import etree from nomad.parsing.file_parser import FileParser diff --git a/nomad/parsing/parser.py b/nomad/parsing/parser.py index cc74a0297f8c294050b16edce966fc45ad600fe4..9dd04e06fd0d6f699571370befde56b6fecb26dd 100644 --- a/nomad/parsing/parser.py +++ b/nomad/parsing/parser.py @@ -16,19 +16,20 @@ # limitations under the License. # -from typing import List, Dict, Union, Any, IO -from collections.abc import Iterable -from abc import ABCMeta, abstractmethod -import re +import importlib +import json import os import os.path +import re +from abc import ABCMeta, abstractmethod +from collections.abc import Iterable from functools import lru_cache -import importlib -from pydantic import BaseModel, Extra # pylint: disable=unused-import -import yaml +from typing import IO, Any + import h5py import numpy as np -import json +import yaml +from pydantic import BaseModel, Extra # noqa: F401 from nomad import utils from nomad.config import config @@ -427,7 +428,11 @@ class MatchingParserInterface(MatchingParser): An interface to the NOMAD parsers. Arguments: - parser_class_name: concatenation of module path and parser class name + parser_class_name: + path specification in python style up to the parser class + in case of a plugin, the path starts from `src/`. + E.g. `nomad_parser.parsers.parser.Parser` + for a `Parser` under `<plugin_root>/src/nomad_parser/parsers/parser.py`. """ def __init__(self, parser_class_name: str, *args, **kwargs): diff --git a/nomad/parsing/parsers.py b/nomad/parsing/parsers.py index 3375e235c7803ac90e53969a9a02a685f62d4c4d..47b3653da34dc7836ffe9823189a033f536d71ce 100644 --- a/nomad/parsing/parsers.py +++ b/nomad/parsing/parsers.py @@ -17,34 +17,33 @@ # import os.path -from typing import Optional, Tuple, List, Dict from collections.abc import Iterable from nomad.config import config -from nomad.config.models.plugins import Parser as ParserPlugin, ParserEntryPoint +from nomad.config.models.plugins import Parser as ParserPlugin +from nomad.config.models.plugins import ParserEntryPoint from nomad.datamodel import EntryArchive, EntryMetadata, results -from nomad.datamodel.context import Context, ClientContext +from nomad.datamodel.context import ClientContext, Context +from .artificial import ChaosParser, EmptyParser, GenerateRandomParser, TemplateParser from .parser import ( - MissingParser, - BrokenParser, - Parser, ArchiveParser, + BrokenParser, MatchingParserInterface, + MissingParser, + Parser, ) -from .artificial import EmptyParser, GenerateRandomParser, TemplateParser, ChaosParser from .tabular import TabularDataParser try: # these packages are not available without parsing extra, which is ok, if the # parsers are only initialized to load their metainfo definitions - import platform - - import magic - import gzip import bz2 + import gzip import lzma + import magic + _compressions = { b'\x1f\x8b\x08': ('gz', gzip.open), b'\x42\x5a\x68': ('bz2', bz2.open), diff --git a/nomad/parsing/tabular.py b/nomad/parsing/tabular.py index dcd4eb25946187e48f13fa36f57ccc7955b5f56a..38082a6d814ba919501bdb945d90823910d586a1 100644 --- a/nomad/parsing/tabular.py +++ b/nomad/parsing/tabular.py @@ -15,32 +15,31 @@ # See the License for the specific language governing permissions and # limitations under the License. # +import json +import math import os -from typing import List, Dict, Set, Any, Tuple, Union -from collections.abc import Callable -from collections.abc import Iterator, Iterable - -import pandas as pd import re -import math +from collections.abc import Callable, Iterable, Iterator +from typing import Any + import numpy as np -import json +import pandas as pd import yaml -from cachetools import cached, LRUCache +from cachetools import LRUCache, cached from nomad import utils -from nomad.parsing import MatchingParser -from nomad.units import ureg from nomad.datamodel.data import ArchiveSection -from nomad.metainfo import Section, Quantity, Package, Reference, MSection, Property -from nomad.metainfo.metainfo import MetainfoError, SubSection, MProxy from nomad.datamodel.metainfo.annotations import ( TabularAnnotation, - TabularParserAnnotation, TabularFileModeEnum, TabularMode, + TabularParserAnnotation, ) +from nomad.metainfo import MSection, Package, Property, Quantity, Reference, Section +from nomad.metainfo.metainfo import MetainfoError, MProxy, SubSection from nomad.metainfo.util import MSubSectionList +from nomad.parsing import MatchingParser +from nomad.units import ureg from nomad.utils import generate_entry_id # We define a simple base schema for tabular data. The parser will then generate more diff --git a/nomad/processing/base.py b/nomad/processing/base.py index c9a3a52c7197f1a45e0c37fdb481b8acbf1d9f39..40a93ce18ee9eae21f113746ca5307e6322684db 100644 --- a/nomad/processing/base.py +++ b/nomad/processing/base.py @@ -16,43 +16,42 @@ # limitations under the License. # -from typing import Any, Tuple, List, Dict, NamedTuple +import functools import logging -import time import os +import time from collections import defaultdict +from datetime import datetime +from typing import Any, NamedTuple + +import billiard +from billiard.exceptions import WorkerLostError from celery import Celery, Task -from celery.worker.request import Request -from celery.bootsteps import StartStopStep +from celery.exceptions import SoftTimeLimitExceeded from celery.signals import ( - after_setup_task_logger, after_setup_logger, - worker_process_init, + after_setup_task_logger, celeryd_after_setup, + worker_process_init, worker_process_shutdown, ) from celery.utils import worker_direct -from celery.exceptions import SoftTimeLimitExceeded -import billiard -from billiard.exceptions import WorkerLostError +from celery.worker.request import Request from mongoengine import ( - Document, - StringField, - ListField, DateTimeField, + Document, IntField, + ListField, + StringField, ValidationError, ) from mongoengine.connection import ConnectionFailure -from datetime import datetime -import functools -from nomad import utils, infrastructure +import nomad.patch # noqa: F401 +from nomad import infrastructure, utils +from nomad.app.v1.routers.info import statistics from nomad.config import config from nomad.config.models.config import CELERY_WORKER_ROUTING -from nomad.app.v1.routers.info import statistics -import nomad.patch # pylint: disable=unused-import - if config.logstash.enabled: from nomad.utils import structlogging diff --git a/nomad/processing/data.py b/nomad/processing/data.py index bc91723d7cb4a88885298b247d430e2a05425135..706ebd80ebe3c3ff45b89cce651efac314d19bb0 100644 --- a/nomad/processing/data.py +++ b/nomad/processing/data.py @@ -29,103 +29,84 @@ entries, and files """ import base64 -from typing import ( - Optional, - cast, - Any, - List, - Tuple, - Set, - Dict, - Union, -) -from collections.abc import Iterator, Iterable, Sequence -from pydantic import ValidationError -from pydantic_core import InitErrorDetails, PydanticCustomError +import copy +import hashlib +import os.path +from collections.abc import Iterable, Iterator, Sequence +from contextlib import contextmanager +from datetime import datetime +from typing import Any, Union, cast + +import requests import rfc3161ng +import validators +from fastapi.exceptions import RequestValidationError from mongoengine import ( - StringField, - DateTimeField, BooleanField, - IntField, - ListField, + DateTimeField, DictField, EmbeddedDocument, EmbeddedDocumentField, + IntField, + ListField, + StringField, ) from pymongo import UpdateOne from structlog import wrap_logger -from contextlib import contextmanager -import copy -import os.path -from datetime import datetime -import hashlib -from structlog.processors import StackInfoRenderer, format_exc_info, TimeStamper -import requests -from fastapi.exceptions import RequestValidationError -import validators +from structlog.processors import StackInfoRenderer, TimeStamper, format_exc_info -from nomad import ( - utils, - infrastructure, - search, - datamodel, - metainfo, - parsing, - client, +from nomad import client, datamodel, infrastructure, metainfo, parsing, search, utils +from nomad.app.v1.models import ( + Aggregation, + MetadataEditRequest, + MetadataPagination, + MetadataRequired, + TermsAggregation, + restrict_query_to_upload, +) +from nomad.app.v1.routers.metainfo import store_package_definition +from nomad.archive import ( + delete_partial_archives_from_mongo, + to_json, + write_partial_archive_to_mongo, ) -from nomad.config import config from nomad.common import is_safe_relative_path +from nomad.config import config +from nomad.config.models.config import Reprocess from nomad.config.models.plugins import ExampleUploadEntryPoint - +from nomad.datamodel import ( + AuthLevel, + EditableUserMetadata, + EntryArchive, + EntryMetadata, + MongoEntryMetadata, + MongoSystemMetadata, + MongoUploadMetadata, + ServerContext, +) from nomad.datamodel.datamodel import RFC3161Timestamp from nomad.files import ( - RawPathInfo, PathObject, - UploadFiles, PublicUploadFiles, + RawPathInfo, StagingUploadFiles, + UploadFiles, create_tmp_dir, ) -from nomad.groups import user_group_exists, get_group_ids +from nomad.groups import get_group_ids, user_group_exists from nomad.metainfo.data_type import Datatype, Datetime +from nomad.normalizing import normalizers +from nomad.parsing import Parser +from nomad.parsing.parsers import match_parser, parser_dict from nomad.processing.base import ( Proc, + ProcessAlreadyRunning, + ProcessFailure, + ProcessStatus, process, process_local, - ProcessStatus, - ProcessFailure, - ProcessAlreadyRunning, -) -from nomad.parsing import Parser -from nomad.parsing.parsers import parser_dict, match_parser -from nomad.normalizing import normalizers -from nomad.datamodel import ( - EntryArchive, - EntryMetadata, - MongoUploadMetadata, - MongoEntryMetadata, - MongoSystemMetadata, - EditableUserMetadata, - AuthLevel, - ServerContext, -) -from nomad.archive import ( - write_partial_archive_to_mongo, - delete_partial_archives_from_mongo, - to_json, ) -from nomad.app.v1.models import ( - MetadataEditRequest, - Aggregation, - TermsAggregation, - MetadataPagination, - MetadataRequired, - restrict_query_to_upload, -) -from nomad.app.v1.routers.metainfo import store_package_definition from nomad.search import update_metadata as es_update_metadata -from nomad.config.models.config import Reprocess from nomad.utils.pydantic import CustomErrorWrapper section_metadata = datamodel.EntryArchive.metadata.name @@ -916,6 +897,7 @@ class Entry(Proc): external database where the data was imported from nomad_version: the NOMAD version used for the last processing nomad_commit: the NOMAD commit used for the last processing + nomad_distro_commit_url: the NOMAD distro commit url used for the last processing comment: a user provided comment for this entry references: user provided references (URLs) for this entry entry_coauthors: a user provided list of co-authors specific for this entry. Note @@ -936,6 +918,7 @@ class Entry(Proc): external_id = StringField() nomad_version = StringField() nomad_commit = StringField() + nomad_distro_commit_url = StringField() comment = StringField() references = ListField(StringField()) entry_coauthors = ListField() @@ -1019,8 +1002,11 @@ class Entry(Proc): In this case, the timestamp stored in the archive is used. If no previous timestamp is available, a new timestamp is generated. """ + distro_commit_url = utils.nomad_distro_metadata() + entry_metadata.nomad_version = config.meta.version entry_metadata.nomad_version = config.meta.version entry_metadata.nomad_commit = '' + entry_metadata.nomad_distro_commit_url = distro_commit_url or '' entry_metadata.entry_hash = self.upload_files.entry_hash( self.mainfile, self.mainfile_key ) diff --git a/nomad/search.py b/nomad/search.py index 1dc31f8abd05cd74a95393253f7fc35b75d2d15d..0a3785622d88504bdd5733db27ae3134972d54ae 100644 --- a/nomad/search.py +++ b/nomad/search.py @@ -34,18 +34,9 @@ partially implemented. import json import math +from collections.abc import Callable, Generator, Iterable, Iterator from enum import Enum -from typing import ( - Any, - Dict, - List, - Optional, - Tuple, - Union, - cast, -) -from collections.abc import Callable -from collections.abc import Generator, Iterable, Iterator +from typing import Any, cast import elasticsearch.helpers from elasticsearch.exceptions import RequestError, TransportError @@ -85,12 +76,7 @@ from nomad.app.v1.models.models import ( Value, ) from nomad.config import config -from nomad.datamodel import ( - EntryArchive, - EntryMetadata, - AuthorReference, - UserReference, -) +from nomad.datamodel import AuthorReference, EntryArchive, EntryMetadata, UserReference from nomad.groups import MongoUserGroup from nomad.metainfo import Datetime, Package, Quantity from nomad.metainfo.elasticsearch_extension import ( diff --git a/nomad/utils/__init__.py b/nomad/utils/__init__.py index 3400910cfb9218c8f0e03ee201b07177556b5543..f124328c29c232a547f4bc6f50992492617f3886 100644 --- a/nomad/utils/__init__.py +++ b/nomad/utils/__init__.py @@ -38,7 +38,7 @@ Depending on the configuration all logs will also be send to a central logstash. .. autofunc::nomad.utils.strip """ -from typing import List, Union, Any, Dict, Optional +from typing import Any from collections.abc import Iterable from collections import OrderedDict from functools import reduce @@ -54,6 +54,7 @@ from datetime import timedelta import collections import logging import inspect +from importlib.metadata import PackageNotFoundError, metadata, version import orjson import os @@ -1147,3 +1148,46 @@ def dict_to_dataframe( filtered_df = filter_df_columns_by_prefix(df, keys_to_filter) filtered_dict = dataframe_to_dict(filtered_df) return pd.json_normalize(filtered_dict, errors='ignore') + + +def nomad_distro_metadata() -> str | None: + """ + Retrieves metadata for the 'nomad-distribution' package, including the + repository URL with latest commit hash. + + Returns: + The repo url with commit hash or None if unavailable. + """ + try: + distro_metadata = metadata('nomad-distribution') + + # Extract repository URL from Project-URL metadata + project_urls: list[str] = distro_metadata.get_all('Project-URL', []) + repo_url = next( + ( + url.split(', ', 1)[1] + for url in project_urls + if url.startswith('repository, ') + ), + None, + ) + + distro_version = version('nomad-distribution') + if '+g' in distro_version: + # Split on '+g' to extract the commit hash from the version string, as 'g' is a Git-specific prefix. + commit = distro_version.split('+g')[ + -1 + ] # Extract commit hash if present (setuptools_scm format) + else: + commit = ( + f'v{distro_version}' # Otherwise, assume it's a tag and prefix with 'v' + ) + + if not repo_url or not commit: + return None + + commit_url = f'{repo_url}/tree/{commit}' + + return commit_url + except (PackageNotFoundError, IndexError, StopIteration, KeyError): + return None diff --git a/nomad/utils/exampledata.py b/nomad/utils/exampledata.py index 6fec3a33a862a355d12967ccdf3b98d339b43a09..4603a39b65b32d6c9e5afa016c34e1ed1275981d 100644 --- a/nomad/utils/exampledata.py +++ b/nomad/utils/exampledata.py @@ -16,16 +16,16 @@ # limitations under the License. # -from typing import List, Optional, Union, Dict, Any -from datetime import datetime, timedelta import os +from datetime import datetime, timedelta +from typing import Any -from nomad import search, files -from nomad.datamodel import EntryMetadata, EntryArchive, Results +from nomad import files, search +from nomad.datamodel import EntryArchive, EntryMetadata, Results +from nomad.datamodel.metainfo import runschema from nomad.datamodel.metainfo.workflow import Workflow -from nomad.processing.data import mongo_upload_metadata from nomad.normalizing import normalizers -from nomad.datamodel.metainfo import runschema +from nomad.processing.data import mongo_upload_metadata class ExampleData: @@ -61,8 +61,8 @@ class ExampleData: es_nomad_version: str = None, archive_nomad_version: str = None, ): - from tests.test_files import create_test_upload_files from nomad import processing as proc + from tests.test_files import create_test_upload_files errors = None @@ -145,8 +145,8 @@ class ExampleData: parser_name: str | None = None, ): """Creates an entry from a mainfile which then gets parsed and normalized.""" - from nomad.parsing import parsers from nomad import parsing + from nomad.parsing import parsers assert upload_id in self.uploads, 'Must create the upload first' diff --git a/nomad/utils/json_transformer.py b/nomad/utils/json_transformer.py index 9310a8894fae2a9a46ad97781187d84c8aea839c..1a59ace7925db142aa3ab2849de82dd2272890b4 100644 --- a/nomad/utils/json_transformer.py +++ b/nomad/utils/json_transformer.py @@ -16,11 +16,11 @@ # limitations under the License. # import re -from typing import Union, Any, Optional +from typing import Any import jmespath -from nomad.datamodel.metainfo.annotations import Rules, Rule, Condition +from nomad.datamodel.metainfo.annotations import Condition, Rule, Rules class Transformer: diff --git a/nomad/utils/structlogging.py b/nomad/utils/structlogging.py index 0a97f0a9546af4c65106a12d06abcc3c20a6e4a0..3cd4d85ba4e90b837d2be650e1cb8c6d9420d017 100644 --- a/nomad/utils/structlogging.py +++ b/nomad/utils/structlogging.py @@ -25,22 +25,22 @@ take keyword arguments for structured data. Otherwise `get_logger` can be used similar to the standard `logging.getLogger`. """ -from typing import cast, Any +import json import logging +import os.path +import re from logging.handlers import WatchedFileHandler +from typing import Any, cast + +import logstash import structlog from structlog.processors import ( + JSONRenderer, StackInfoRenderer, - format_exc_info, TimeStamper, - JSONRenderer, + format_exc_info, ) from structlog.stdlib import LoggerFactory -import logstash -from contextlib import contextmanager -import json -import re -import os.path from nomad import utils from nomad.config import config diff --git a/pyproject.toml b/pyproject.toml index d6781f5710d3eb8b8c68d7ae88fd4d30821e89a7..81e4c3f87c02b97108b3336af9b1a4a86c30cdaf 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -26,6 +26,7 @@ dependencies = [ 'httpx>=0.23.3,<0.28', 'importlib_metadata~=7.1.0', 'jmespath>=0.10.0', + 'jsonpath-ng', 'kaleido==0.2.1', 'lxml-html-clean>=0.1.0', 'lxml>=5.2', @@ -78,7 +79,7 @@ infrastructure = [ 'celery>=5.0', 'dockerspawner==13.0.0', 'elasticsearch>=7.0,<8', - 'fastapi>0.100', # later versions pending pydantic v2 upgrade + 'fastapi>0.100', 'filelock==3.3.1', 'gitpython>=3.0', 'gunicorn>=21.2.0,<22.0.0', @@ -121,7 +122,7 @@ dev = [ 'mkdocs-material>=9.0', 'mkdocs-redirects>=1.0', 'mkdocs>=1.0', - 'mypy==1.0.1', # bug: incompatible with derived models of pydantic v1 + 'mypy>=1.15', 'names==0.3.0', 'uv>=0.2.35', 'pytest-asyncio>=0.23', @@ -151,10 +152,12 @@ indent-width = 4 [tool.ruff.lint] select = [ - "E", # pycodestyle - "W", # pycodestyle - "PL", # pylint - "UP", # pyupgrade + "E", # pycodestyle + "F401", # remove unused import + "I001", # sort imports + "PL", # pylint + "UP", # pyupgrade + "W", # pycodestyle ] ignore = [ "E501", # Line too long ({width} > {limit} characters) @@ -170,11 +173,12 @@ ignore = [ "PLW2901", # redefined-loop-name "PLR1714", # consider-using-in "PLR5501", # else-if-used - "UP035", # deprecated-import ] fixable = ["ALL"] +isort.split-on-trailing-comma = false [tool.ruff.lint.extend-per-file-ignores] +"__init__.py" = ["F401", "I001"] "nomad/app/v1/models/graph/utils.py" = [ "UP007", ] # causes pydantic model building errors @@ -203,7 +207,7 @@ ignore_missing_imports = true follow_imports = "silent" no_strict_optional = true disable_error_code = "import, annotation-unchecked" - +plugins = ["pydantic.mypy"] [tool.setuptools_scm] diff --git a/requirements-dev.txt b/requirements-dev.txt index 386a6f469eca3518d30c97ed5ba809057afec60b..e1b75bf9ff40359d6ee77f3ce10d105ac3fb64a2 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -120,7 +120,7 @@ monty==2025.1.9 # via pymatgen, -r requirements.txt mpmath==1.3.0 # via sympy, -r requirements.txt msgpack==1.1.0 # via -r requirements.txt, nomad-lab (pyproject.toml) msgspec==0.19.0 # via -r requirements.txt, nomad-lab (pyproject.toml) -mypy==1.0.1 # via nomad-lab (pyproject.toml) +mypy==1.15.0 # via nomad-lab (pyproject.toml) mypy-extensions==1.0.0 # via mypy names==0.3.0 # via nomad-lab (pyproject.toml) netcdf4==1.6.5 # via -r requirements.txt, nomad-lab (pyproject.toml) diff --git a/tests/app/test_app.py b/tests/app/test_app.py index 12a36607c0894240027972d8c5fabd4664d73e00..6140f085b980fbecd8dbbd1f1ce20ebff0759611 100644 --- a/tests/app/test_app.py +++ b/tests/app/test_app.py @@ -16,9 +16,10 @@ # limitations under the License. # -import pytest import os +import pytest + from nomad.config import config diff --git a/tests/app/test_dcat.py b/tests/app/test_dcat.py index 4f08b698f2a66cf80bed6f094aaaa07989a98cb2..1f0ecd711152a3b32edbf0e60731fe4e04016f3e 100644 --- a/tests/app/test_dcat.py +++ b/tests/app/test_dcat.py @@ -16,14 +16,15 @@ # limitations under the License. # -import pytest from datetime import datetime + +import pytest from fastapi.testclient import TestClient from nomad.app.dcat.main import app from nomad.app.dcat.mapping import Mapping -from nomad.datamodel.results import Material, Results from nomad.datamodel import Dataset +from nomad.datamodel.results import Material, Results from nomad.utils.exampledata import ExampleData diff --git a/tests/app/test_h5grove.py b/tests/app/test_h5grove.py index 592cb623ec0faca0212bb4fe56f70f17a8620f2c..a5b7446b381109205ee1799ba17acbb10e3aaf30 100644 --- a/tests/app/test_h5grove.py +++ b/tests/app/test_h5grove.py @@ -15,15 +15,16 @@ # See the License for the specific language governing permissions and # limitations under the License. # -import pytest import os + import h5py +import pytest from fastapi.testclient import TestClient from nomad.app import h5grove_app -from nomad.utils.exampledata import ExampleData -from nomad.files import StagingUploadFiles from nomad.config import config +from nomad.files import StagingUploadFiles +from nomad.utils.exampledata import ExampleData @pytest.fixture diff --git a/tests/app/test_optimade.py b/tests/app/test_optimade.py index a7841ff89a7a45e7ca0c8977f932047c5dab4623..3b95f9173ba0f0cfad64ecda1cad549457118c8e 100644 --- a/tests/app/test_optimade.py +++ b/tests/app/test_optimade.py @@ -17,15 +17,15 @@ # import json + import pytest -from nomad.processing import Upload from nomad import utils -from nomad.search import search from nomad.app.optimade import parse_filter from nomad.app.optimade.common import provider_specific_fields +from nomad.processing import Upload +from nomad.search import search from nomad.utils.exampledata import ExampleData - from tests.fixtures.infrastructure import clear_elastic, clear_raw_files diff --git a/tests/app/test_resources.py b/tests/app/test_resources.py index e76d787079aabe24883b2fa234e448f52ee16363..4f1b3929fa0e0bcd263241dd16877d77cc56cfcc 100644 --- a/tests/app/test_resources.py +++ b/tests/app/test_resources.py @@ -15,21 +15,22 @@ # See the License for the specific language governing permissions and # limitations under the License. # -import pytest import json -from fastapi.testclient import TestClient -import httpx -from urllib.parse import urlencode import time +from urllib.parse import urlencode + import dateutil.parser +import httpx +import pytest +from fastapi.testclient import TestClient -from nomad.config import config from nomad.app.resources.main import app, remove_mongo from nomad.app.resources.routers.resources import ( aflow_prototypes_db, - springer_materials_db, optimade_providers, + springer_materials_db, ) +from nomad.config import config def _to_datetime(datetime_str): diff --git a/tests/app/v1/routers/common.py b/tests/app/v1/routers/common.py index 19acc77e630f4f28250149dcc648a772472b60e9..20f17289ddb49fa9e1b8a23161ce85ada5f1fafe 100644 --- a/tests/app/v1/routers/common.py +++ b/tests/app/v1/routers/common.py @@ -16,17 +16,16 @@ # limitations under the License. # -import pytest -from typing import Set, Literal, Optional, List, Any import json import re -from devtools import debug +from typing import Any, Literal from urllib.parse import urlencode -from nomad.utils import deep_get +import pytest +from devtools import debug from nomad.datamodel import results - +from nomad.utils import deep_get from tests.utils import assert_at_least, assert_url_query_args, build_url n_code_names = results.Simulation.program_name.a_elasticsearch[ diff --git a/tests/app/v1/routers/test_auth.py b/tests/app/v1/routers/test_auth.py index c814260a3c4b502c8c841b09a2cb1fd2eb1302fe..2c5e3e67c8f42da033bc0c1bc716025638f89d63 100644 --- a/tests/app/v1/routers/test_auth.py +++ b/tests/app/v1/routers/test_auth.py @@ -16,9 +16,10 @@ # limitations under the License. # -import pytest from urllib.parse import urlencode +import pytest + def perform_get_token_test(client, http_method, status_code, username, password): if http_method == 'post': diff --git a/tests/app/v1/routers/test_datasets.py b/tests/app/v1/routers/test_datasets.py index eecbdf50251be6853bbadb898d844a11726bf255..7153ee770a9b3a076f6f7072354924a2d0f7deff 100644 --- a/tests/app/v1/routers/test_datasets.py +++ b/tests/app/v1/routers/test_datasets.py @@ -16,17 +16,16 @@ # limitations under the License. # -from typing import List -import pytest -from urllib.parse import urlencode from datetime import datetime +from urllib.parse import urlencode + +import pytest -from nomad.datamodel import Dataset from nomad import processing +from nomad.app.v1.models import Any_, Query +from nomad.datamodel import Dataset from nomad.search import search -from nomad.app.v1.models import Query, Any_ from nomad.utils.exampledata import ExampleData - from tests.fixtures.users import admin_user_id from .common import assert_response diff --git a/tests/app/v1/routers/test_entries.py b/tests/app/v1/routers/test_entries.py index 8ec76fef0bfa476fb44668b678ee03be7fa34bc2..36985eb08f55098d5930560ca4707427e14f710a 100644 --- a/tests/app/v1/routers/test_entries.py +++ b/tests/app/v1/routers/test_entries.py @@ -16,37 +16,37 @@ # limitations under the License. # -import pytest -from urllib.parse import urlencode -import zipfile import io import json +import zipfile +from urllib.parse import urlencode + +import pytest from nomad.metainfo.elasticsearch_extension import entry_type, schema_separator from nomad.utils.exampledata import ExampleData - -from tests.test_files import example_mainfile_contents, append_raw_files # pylint: disable=unused-import +from tests.test_files import append_raw_files, example_mainfile_contents # noqa: F401 from tests.variables import python_schema_name from .common import ( aggregation_exclude_from_search_test_parameters, - assert_response, + aggregation_test_parameters, + aggregation_test_parameters_default, + assert_aggregation_response, + assert_aggregations, assert_base_metadata_response, - assert_query_response, + assert_browser_download_headers, assert_metadata_response, - assert_required, - assert_aggregations, assert_pagination, - assert_browser_download_headers, - post_query_test_parameters, + assert_query_response, + assert_required, + assert_response, get_query_test_parameters, - perform_owner_test, owner_test_parameters, pagination_test_parameters, - aggregation_test_parameters, - aggregation_test_parameters_default, - assert_aggregation_response, perform_entries_metadata_test, + perform_owner_test, + post_query_test_parameters, ) """ diff --git a/tests/app/v1/routers/test_entries_archive_edit.py b/tests/app/v1/routers/test_entries_archive_edit.py index 8a2e90107dbffd9b62c3d1d9677d5627e4ed7559..ba9903ebde1018599934eccc59a914e1243c6f9b 100644 --- a/tests/app/v1/routers/test_entries_archive_edit.py +++ b/tests/app/v1/routers/test_entries_archive_edit.py @@ -17,6 +17,7 @@ # import json + import pytest from nomad.datamodel.datamodel import EntryArchive, EntryMetadata diff --git a/tests/app/v1/routers/test_entries_edit.py b/tests/app/v1/routers/test_entries_edit.py index cd793d3fb4c43a5696bf9dc7800296de9b254b88..1d3db4a4df67d2a0e8bd2479212a3c2708f84692 100644 --- a/tests/app/v1/routers/test_entries_edit.py +++ b/tests/app/v1/routers/test_entries_edit.py @@ -16,22 +16,22 @@ # limitations under the License. # -import pytest from datetime import datetime -from nomad.search import search +import pytest + +from nomad import processing as proc +from nomad import utils from nomad.datamodel import Dataset -from nomad import processing as proc, utils +from nomad.search import search from nomad.utils.exampledata import ExampleData - from tests.app.v1.routers.common import assert_response from tests.processing.test_edit_metadata import ( - assert_metadata_edited, - all_coauthor_entry_metadata, all_admin_entry_metadata, + all_coauthor_entry_metadata, + assert_metadata_edited, ) - logger = utils.get_logger(__name__) diff --git a/tests/app/v1/routers/test_federation.py b/tests/app/v1/routers/test_federation.py index cb990557fa7cc14b526542074f65c298a93002cb..3b8d8284b51d58ccdd43b54bd5b1db150f616b88 100644 --- a/tests/app/v1/routers/test_federation.py +++ b/tests/app/v1/routers/test_federation.py @@ -1,9 +1,10 @@ -import requests import json -import pytest -from copy import deepcopy import logging import zlib +from copy import deepcopy + +import pytest +import requests from nomad.config import config from nomad.utils.structlogging import LogstashFormatter diff --git a/tests/app/v1/routers/test_graph.py b/tests/app/v1/routers/test_graph.py index 4e1beee2d52e29c09da29061caedfdd2d1ef48fa..5a12924f66a284f8d3af88036857ed051cf8b171 100644 --- a/tests/app/v1/routers/test_graph.py +++ b/tests/app/v1/routers/test_graph.py @@ -18,12 +18,11 @@ import pytest -from nomad.graph.graph_reader import EntryReader, Token from nomad.datamodel import EntryArchive -from nomad.metainfo import Section, Quantity +from nomad.datamodel.metainfo import SCHEMA_IMPORT_ERROR, simulationworkflowschema +from nomad.graph.graph_reader import EntryReader, Token from nomad.utils.exampledata import ExampleData from tests.archive.test_archive import assert_dict -from nomad.datamodel.metainfo import simulationworkflowschema, SCHEMA_IMPORT_ERROR # try: # from rich.pretty import pprint diff --git a/tests/app/v1/routers/test_materials.py b/tests/app/v1/routers/test_materials.py index 3adbb143e28db88adf816bda0b6fb246d073b7c3..a1e998720b957664a3542fda24169942467dabae 100644 --- a/tests/app/v1/routers/test_materials.py +++ b/tests/app/v1/routers/test_materials.py @@ -16,27 +16,27 @@ # limitations under the License. # -import pytest from urllib.parse import urlencode -from nomad.metainfo.elasticsearch_extension import material_entry_type -from nomad.datamodel import results +import pytest -from tests.test_files import example_mainfile_contents # pylint: disable=unused-import +from nomad.datamodel import results +from nomad.metainfo.elasticsearch_extension import material_entry_type +from tests.test_files import example_mainfile_contents # noqa: F401 from .common import ( aggregation_exclude_from_search_test_parameters, - assert_pagination, + aggregation_test_parameters_default, + assert_aggregation_response, assert_metadata_response, + assert_pagination, assert_required, + get_query_test_parameters, + owner_test_parameters, + pagination_test_parameters, perform_metadata_test, perform_owner_test, - owner_test_parameters, post_query_test_parameters, - get_query_test_parameters, - pagination_test_parameters, - aggregation_test_parameters_default, - assert_aggregation_response, ) """ diff --git a/tests/app/v1/routers/test_metainfo.py b/tests/app/v1/routers/test_metainfo.py index 8d4e2e78a44cde6573f411aac933f685601869d8..a34cb5be582d0e6cd8327987cfd4953f36ff45e2 100644 --- a/tests/app/v1/routers/test_metainfo.py +++ b/tests/app/v1/routers/test_metainfo.py @@ -21,11 +21,11 @@ from zipfile import ZipFile import pytest -from nomad.config import config from nomad.app.v1.routers.metainfo import store_package_definition -from nomad.datamodel import EntryArchive, ClientContext -from nomad.metainfo import MSection, MetainfoReferenceError -from nomad.utils import generate_entry_id, create_uuid +from nomad.config import config +from nomad.datamodel import ClientContext, EntryArchive +from nomad.metainfo import MetainfoReferenceError, MSection +from nomad.utils import create_uuid, generate_entry_id from tests.processing.test_data import run_processing diff --git a/tests/app/v1/routers/test_suggestions.py b/tests/app/v1/routers/test_suggestions.py index 937aad115af73b98413d9863cb7a857940bd35f2..093c20ed7eb23f288efbd91dfe87bb89e1cdd657 100644 --- a/tests/app/v1/routers/test_suggestions.py +++ b/tests/app/v1/routers/test_suggestions.py @@ -27,8 +27,10 @@ to assert for certain aspects in the responses. """ import pytest + from nomad.metainfo.elasticsearch_extension import entry_type from nomad.utils.exampledata import ExampleData + from .common import assert_response diff --git a/tests/app/v1/routers/test_systems.py b/tests/app/v1/routers/test_systems.py index debb8bdef085c2f6a3950386e9cbb53c7a161309..6b76eb777c317dfd6b6bf06550c4258914f08c3e 100644 --- a/tests/app/v1/routers/test_systems.py +++ b/tests/app/v1/routers/test_systems.py @@ -19,21 +19,21 @@ import re from io import BytesIO, StringIO -import pytest -import numpy as np import ase.io +import numpy as np +import pytest from ase import Atoms as ASEAtoms -from nomad.units import ureg -from nomad.normalizing.common import ase_atoms_from_nomad_atoms +from nomad.app.v1.routers.systems import FormatFeature, WrapModeEnum, format_map from nomad.datamodel.datamodel import EntryArchive -from nomad.datamodel.results import Results, Material, System from nomad.datamodel.metainfo import runschema from nomad.datamodel.metainfo.system import Atoms +from nomad.datamodel.results import Material, Results, System +from nomad.normalizing.common import ase_atoms_from_nomad_atoms +from nomad.units import ureg from nomad.utils.exampledata import ExampleData -from nomad.app.v1.routers.systems import format_map, FormatFeature, WrapModeEnum -from .common import assert_response, assert_browser_download_headers +from .common import assert_browser_download_headers, assert_response def ase_atoms(content, format): diff --git a/tests/app/v1/routers/uploads/test_basic_uploads.py b/tests/app/v1/routers/uploads/test_basic_uploads.py index f1e6d9bbed79041c3260d0a6d0be12c43bf38f88..a31f7ca2c20bdeadacf48f723536ef0954810e6f 100644 --- a/tests/app/v1/routers/uploads/test_basic_uploads.py +++ b/tests/app/v1/routers/uploads/test_basic_uploads.py @@ -18,12 +18,12 @@ import io import os +import tempfile import time import zipfile -from datetime import datetime -import tempfile -from typing import Any, Dict, List from collections.abc import Iterable +from datetime import datetime +from typing import Any import pytest import requests @@ -41,19 +41,18 @@ from tests.app.v1.routers.common import ( assert_response, perform_get, ) +from tests.config.models.test_plugins import ( + mock_example_upload_entry_point, + mock_plugin_package, +) from tests.processing.test_edit_metadata import ( all_admin_metadata, all_coauthor_metadata, assert_metadata_edited, ) -from tests.config.models.test_plugins import ( - mock_plugin_package, - mock_example_upload_entry_point, -) from tests.test_files import ( assert_upload_files, empty_file, - example_directory, example_file_aux, example_file_corrupt_zip, example_file_mainfile_different_atoms, diff --git a/tests/app/v1/test_models.py b/tests/app/v1/test_models.py index 821fea8dc341e26b531b3b046b979c4492159faa..9210f75d212792c627643cf23845572c02f11926 100644 --- a/tests/app/v1/test_models.py +++ b/tests/app/v1/test_models.py @@ -17,15 +17,16 @@ # from __future__ import annotations -from typing import List + +import sys + import pytest -from pydantic import BaseModel, Field, ValidationError import yaml -import sys +from pydantic import BaseModel, Field, ValidationError -from nomad.utils import strip from nomad.app.v1.models.graph import GraphRequest from nomad.app.v1.models.graph.utils import generate_request_model, mapped +from nomad.utils import strip @pytest.fixture() diff --git a/tests/app/v1/test_utils.py b/tests/app/v1/test_utils.py index ba6fd19fcfec71d55476496d73df32b2bd351f87..5ed2c55176c8fc573420fafce25fa53937d0f9d3 100644 --- a/tests/app/v1/test_utils.py +++ b/tests/app/v1/test_utils.py @@ -1,4 +1,5 @@ import pytest + from nomad.app.v1.utils import get_query_keys diff --git a/tests/archive/test_archive.py b/tests/archive/test_archive.py index 0d972d4ff9a887c25045db690f50a68cb3a92a47..0c35c9b5b0b8b7d3e89a829d6e5c1c1b2efd2768 100644 --- a/tests/archive/test_archive.py +++ b/tests/archive/test_archive.py @@ -15,42 +15,42 @@ # See the License for the specific language governing permissions and # limitations under the License. # +import json +import os.path from datetime import datetime -from typing import Dict, Any, Union -import pytest -import msgpack from io import BytesIO -import os.path -import json +from typing import Any +import msgpack +import pytest import yaml from nomad import utils -from nomad.config import config +from nomad.archive import ( + ArchiveQueryError, + RequiredReader, + compute_required_with_referenced, + create_partial_archive, + query_archive, + read_archive, + read_partial_archive_from_mongo, + read_partial_archives_from_mongo, + write_archive, + write_partial_archive_to_mongo, +) from nomad.archive.converter import convert_archive +from nomad.archive.storage import _decode, _entries_per_block, to_json +from nomad.config import config +from nomad.datamodel import ClientContext, EntryArchive from nomad.metainfo import ( + Context, + MetainfoError, + MProxy, MSection, Quantity, + QuantityReference, Reference, SubSection, - QuantityReference, - MetainfoError, - Context, - MProxy, -) -from nomad.datamodel import EntryArchive, ClientContext -from nomad.archive.storage import _decode, _entries_per_block, to_json -from nomad.archive import ( - write_archive, - read_archive, - ArchiveQueryError, - query_archive, - write_partial_archive_to_mongo, - read_partial_archive_from_mongo, - read_partial_archives_from_mongo, - create_partial_archive, - compute_required_with_referenced, - RequiredReader, ) from nomad.utils.exampledata import ExampleData diff --git a/tests/archive/test_storage.py b/tests/archive/test_storage.py index 4a2de3dbf6bce3c2e3b9ab8bfd8553060b55f3f7..6ce614b53e82aac86d9a5570a4b2801352312cee 100644 --- a/tests/archive/test_storage.py +++ b/tests/archive/test_storage.py @@ -5,10 +5,7 @@ import msgpack import msgspec.msgpack import pytest -from nomad.archive.storage_v2 import ( - ArchiveList, - ArchiveDict, -) +from nomad.archive.storage_v2 import ArchiveDict, ArchiveList def generate_random_json(depth=10, width=4, simple=False): diff --git a/tests/config/models/test_plugins.py b/tests/config/models/test_plugins.py index a7d2f42ebb06ad971cd2aa00d806058927a4b68c..7d702f33e7f539100f52c98b90559b31bac11af3 100644 --- a/tests/config/models/test_plugins.py +++ b/tests/config/models/test_plugins.py @@ -18,12 +18,13 @@ import os import tempfile + import pytest -from nomad.config import Config +from nomad.config import Config from nomad.config.models.plugins import ( - ExampleUploadEntryPoint, APIEntryPoint, + ExampleUploadEntryPoint, UploadResource, ) diff --git a/tests/config/models/test_ui.py b/tests/config/models/test_ui.py index ce0626c7c885db5ad1184e7f0cddef0da262e6a5..606e9af0fb35890d54785ef54811806a8b562e34 100644 --- a/tests/config/models/test_ui.py +++ b/tests/config/models/test_ui.py @@ -22,13 +22,13 @@ from nomad.config.models.ui import ( App, Axis, AxisQuantity, - Columns, Column, - Rows, + Columns, RowActions, + RowActionURL, RowDetails, + Rows, RowSelection, - RowActionURL, ) diff --git a/tests/conftest.py b/tests/conftest.py index bea0af18be93252b6f5715d635e09546e75e1e83..a72d3e2687faca5ff45a9fbf00d1cf1e27a3da6a 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -16,19 +16,17 @@ # limitations under the License. # import builtins -from pathlib import Path -from io import StringIO -import pytest -import time -import os -import socket -import json import logging -import warnings +import os +import socketserver import tempfile +import time +import warnings +from io import StringIO +from pathlib import Path +import pytest from fastapi.testclient import TestClient -import socketserver from nomad.config import config from nomad.config.models.plugins import Schema, add_plugin, remove_plugin @@ -37,8 +35,8 @@ from nomad.config.models.plugins import Schema, add_plugin, remove_plugin config.logstash.enabled = False # noqa: E402 # this must be set *before* the other modules are imported from nomad import utils -from nomad.utils import structlogging from nomad.app.main import app +from nomad.utils import structlogging # Set up pytest to pass control to the debugger on an exception. if os.getenv('_PYTEST_RAISE', '0') != '0': @@ -70,8 +68,8 @@ pytest_plugins = ( ) -from structlog.testing import LogCapture import structlog +from structlog.testing import LogCapture @pytest.fixture(scope='function') diff --git a/tests/data/parsing/file_parser/test.h5 b/tests/data/parsing/file_parser/test.h5 new file mode 100644 index 0000000000000000000000000000000000000000..10da06db996148fbcffec8cf8e31d19c466c3bd0 Binary files /dev/null and b/tests/data/parsing/file_parser/test.h5 differ diff --git a/tests/data/parsing/file_parser/test.xml b/tests/data/parsing/file_parser/test.xml new file mode 100644 index 0000000000000000000000000000000000000000..c2d785a189908ae09bce684ba5a4d6f15f0d1073 --- /dev/null +++ b/tests/data/parsing/file_parser/test.xml @@ -0,0 +1,51 @@ +<a> + <b> + <v>0.0 1.0</v> + <v>2.0 0.0</v> + <c> + <d>x</d> + <d>y</d> + </c> + <c> + <d>l</d> + <d>m</d> + </c> + </b> + <b1> + <v>1.0 2.0</v> + <v>3.0 4.0</v> + </b1> + <b1> + <v>5.0 6.0</v> + <v>7.0 8.0</v> + <c> + <d>a</d> + <d>b</d> + </c> + <c> + <d>c</d> + <d>d</d> + </c> + </b1> + <b2> + <c> + <d>1</d> + <e name='item1'> + <k n='k1'>f1</k> + <k n='k2'>f2</k> + </e> + <e name='item2'> + <k>f3</k> + <k>f4</k> + </e> + <f> + <g> + <i>1 2 3 4 5</i> + <i>6 7 8 9 1</i> + <i>2 2 2 1 3</i> + <i>0 2 8 4 5</i> + </g> + </f> + </c> + </b2> +</a> \ No newline at end of file diff --git a/tests/data/schemas/nomadschemaexample/schema.py b/tests/data/schemas/nomadschemaexample/schema.py index 2bf501f0067643332f9dd9d0f98312a11d090b1e..2f076b6d5734b77ccdad44b834aa11142d8beba3 100644 --- a/tests/data/schemas/nomadschemaexample/schema.py +++ b/tests/data/schemas/nomadschemaexample/schema.py @@ -1,16 +1,17 @@ +import numpy as np + +from nomad.datamodel.data import EntryData +from nomad.datamodel.metainfo.annotations import ELNAnnotation, ELNComponentEnum from nomad.metainfo import ( - Quantity, - Package, - Section, - MEnum, Datetime, + MEnum, MSection, - SubSection, + Package, + Quantity, + Section, SectionProxy, + SubSection, ) -from nomad.datamodel.data import EntryData -from nomad.datamodel.metainfo.annotations import ELNAnnotation, ELNComponentEnum -import numpy as np m_package = Package() diff --git a/tests/datamodel/metainfo/eln/test_structure_file.py b/tests/datamodel/metainfo/eln/test_structure_file.py index 10dad593cb4ff8a36b2b828b6ccf250eda970775..fcdde3dc7dce3ce45777fbf3df5cca38a2026d9c 100644 --- a/tests/datamodel/metainfo/eln/test_structure_file.py +++ b/tests/datamodel/metainfo/eln/test_structure_file.py @@ -16,7 +16,6 @@ # limitations under the License. # -from tests.normalizing.conftest import run_normalize from tests.normalizing.conftest import run_processing diff --git a/tests/datamodel/metainfo/test_annotations.py b/tests/datamodel/metainfo/test_annotations.py index 2d0f472f42072296571776a88c3d9d122e7e51b0..fb1208394b8348cc54a73facb400d8b366df3a71 100644 --- a/tests/datamodel/metainfo/test_annotations.py +++ b/tests/datamodel/metainfo/test_annotations.py @@ -19,13 +19,13 @@ import pytest from pydantic import ValidationError -from nomad.metainfo import Quantity from nomad.datamodel.metainfo.annotations import ( - PlotAnnotation, ELNAnnotation, + PlotAnnotation, PlotlyGraphObjectAnnotation, ) from nomad.datamodel.metainfo.plot import PlotlyError +from nomad.metainfo import Quantity @pytest.mark.parametrize( diff --git a/tests/datamodel/metainfo/test_plotly.py b/tests/datamodel/metainfo/test_plotly.py index 63d6daf2176827b663d4130701745b7d1af9c27c..038cffbd0e329d4233c8d64c0dcab992dbffbcff 100644 --- a/tests/datamodel/metainfo/test_plotly.py +++ b/tests/datamodel/metainfo/test_plotly.py @@ -1,4 +1,5 @@ import json + from tests.normalizing.conftest import run_processing diff --git a/tests/datamodel/metainfo/test_substance.py b/tests/datamodel/metainfo/test_substance.py index 78c63e39a54c7ba8257f95167652b4698bb1b8a3..6b30e54d32b7e67e459cbe769f27e29082fb3128 100644 --- a/tests/datamodel/metainfo/test_substance.py +++ b/tests/datamodel/metainfo/test_substance.py @@ -16,12 +16,13 @@ # limitations under the License. # +import json from collections.abc import Iterable + import pytest -import json -from tests.normalizing.conftest import run_processing from nomad.datamodel.metainfo.basesections import v1 as basesections +from tests.normalizing.conftest import run_processing class MockResponse: diff --git a/tests/datamodel/test_context.py b/tests/datamodel/test_context.py index 410c385b5d0fea5d2679321c74e5dafaaf2556d4..f8250df5fbd4d35c3590721e94b72b204d4bb80d 100644 --- a/tests/datamodel/test_context.py +++ b/tests/datamodel/test_context.py @@ -16,21 +16,20 @@ # limitations under the License. # +import json import os +import re import pytest -import json -import re -import numpy as np -from nomad import utils, files, processing -from nomad.metainfo.metainfo import MSection -from nomad.parsing.parser import ArchiveParser +from nomad import files, processing, utils from nomad.datamodel import Context -from nomad.datamodel.context import ServerContext, ClientContext, parse_path +from nomad.datamodel.context import ClientContext, ServerContext, parse_path from nomad.datamodel.datamodel import EntryArchive, EntryMetadata -from nomad.processing import Upload, Entry, ProcessStatus from nomad.datamodel.metainfo import runschema +from nomad.metainfo.metainfo import MSection +from nomad.parsing.parser import ArchiveParser +from nomad.processing import Entry, ProcessStatus, Upload @pytest.fixture(scope='module') diff --git a/tests/datamodel/test_datamodel.py b/tests/datamodel/test_datamodel.py index ed8916ef3400a5df198af0c9d27800290da9b6ca..e0b263d41e6b0ed67e9ba8fb4b1c38b8a1587c26 100644 --- a/tests/datamodel/test_datamodel.py +++ b/tests/datamodel/test_datamodel.py @@ -21,13 +21,12 @@ A generator for random test calculations. """ import random -from essential_generators import DocumentGenerator + import pytest +from essential_generators import DocumentGenerator -from nomad.datamodel import EntryArchive, EntryMetadata -from nomad.metainfo import MSection, Quantity, SubSection +from nomad.datamodel.metainfo import SCHEMA_IMPORT_ERROR, runschema from nomad.parsing.parsers import parser_dict -from nomad.datamodel.metainfo import runschema, SCHEMA_IMPORT_ERROR number_of = 20 @@ -74,7 +73,7 @@ low_numbers_for_geometries = [1, 2, 2, 3, 3, 4, 4] @pytest.mark.skipif(runschema is None, reason=SCHEMA_IMPORT_ERROR) def test_common_metainfo(): from runschema.run import Run - from runschema.system import System, Atoms + from runschema.system import Atoms, System run = Run() system = run.m_create(System) @@ -85,8 +84,8 @@ def test_common_metainfo(): @pytest.mark.skipif(runschema is None, reason=SCHEMA_IMPORT_ERROR) def test_vasp_metainfo(): + from electronicparsers.vasp.metainfo import vasp # noqa: F401 from runschema.run import Run - from electronicparsers.vasp.metainfo import vasp # pylint: disable=unused-import run = Run() assert 'vasp_src_date' in run.m_def.all_quantities diff --git a/tests/datamodel/test_hdf5.py b/tests/datamodel/test_hdf5.py index bbdb88e143b4ca15d350410e413cf282231e0417..427b20ccd2b33db0263e020afc0601fb431500f3 100644 --- a/tests/datamodel/test_hdf5.py +++ b/tests/datamodel/test_hdf5.py @@ -16,19 +16,18 @@ # limitations under the License. # -import pytest -import numpy as np import os + import h5py +import numpy as np +import pytest from nomad import files, processing -from nomad.datamodel import EntryData, EntryArchive, EntryMetadata +from nomad.datamodel import EntryArchive, EntryData, EntryMetadata from nomad.datamodel.context import ServerContext +from nomad.datamodel.hdf5 import HDF5Dataset, HDF5Reference from nomad.metainfo import Quantity -from nomad.datamodel.hdf5 import HDF5Reference, HDF5Dataset - - external_file = 'tests/data/datamodel/context.h5' diff --git a/tests/datamodel/test_metadata.py b/tests/datamodel/test_metadata.py index 09cc397ba545f6498c6ba1dea1a59487955a9e5d..0d6c40fd9fb6bd13f6e3890956dd397d042ac2b8 100644 --- a/tests/datamodel/test_metadata.py +++ b/tests/datamodel/test_metadata.py @@ -16,15 +16,16 @@ # limitations under the License. # -import pytest -import numpy as np from datetime import datetime + +import numpy as np +import pytest import pytz -from nomad.metainfo import Quantity, MSection, SubSection, Datetime, MEnum -from nomad.datamodel.datamodel import EntryMetadata, SearchableQuantity, EntryArchive -from nomad.metainfo.elasticsearch_extension import schema_separator from nomad.datamodel import EntryData +from nomad.datamodel.datamodel import EntryArchive, EntryMetadata, SearchableQuantity +from nomad.metainfo import Datetime, MEnum, MSection, Quantity, SubSection +from nomad.metainfo.elasticsearch_extension import schema_separator from tests.variables import python_schema_name diff --git a/tests/datamodel/test_schema.py b/tests/datamodel/test_schema.py index 17ccdea3ab84a0a3eb396270e7183376ecd0c421..c06f260122cab7664f40ab2adc3940c1425e60fa 100644 --- a/tests/datamodel/test_schema.py +++ b/tests/datamodel/test_schema.py @@ -17,21 +17,21 @@ # import os.path + import pytest -from nomad.metainfo import MetainfoError from nomad.datamodel.context import ServerContext +from nomad.datamodel.data import AuthorReference, Query, UserReference from nomad.datamodel.datamodel import EntryArchive, EntryMetadata -from nomad.datamodel.data import UserReference, AuthorReference, Query -from nomad.datamodel.metainfo.annotations import valid_eln_types, valid_eln_components +from nomad.datamodel.metainfo.annotations import valid_eln_components, valid_eln_types +from nomad.metainfo import MetainfoError from nomad.metainfo.data_type import Datatype from nomad.parsing.parser import ArchiveParser from nomad.processing.data import Upload from nomad.utils import get_logger, strip - +from tests.metainfo.test_yaml_schema import yaml_to_package from tests.normalizing.conftest import run_normalize from tests.test_files import create_test_upload_files -from tests.metainfo.test_yaml_schema import yaml_to_package def test_schema_processing(raw_files_function, no_warn): diff --git a/tests/examples/test_archive_query.py b/tests/examples/test_archive_query.py index c31de1b4e984c0c34fdae96ca289123cbc10286a..5cad68b2d008bcc4dabad27cddd706c87f2f1e40 100644 --- a/tests/examples/test_archive_query.py +++ b/tests/examples/test_archive_query.py @@ -16,15 +16,14 @@ # limitations under the License. # -import os.path import importlib +import os.path import sys from nomad.utils.exampledata import ExampleData - -from tests.parsing.test_parsing import run_singular_parser from tests.normalizing.conftest import run_normalize -from tests.test_client import async_api_v1 +from tests.parsing.test_parsing import run_singular_parser +from tests.test_client import async_api_v1 # noqa: F401 def test_archive_query( diff --git a/tests/examples/test_docs.py b/tests/examples/test_docs.py index e8b737a9fbc0a21a85cd8e5d7893d0b0a5080697..ef1a43720d217ff262683120194593a606e4bd26 100644 --- a/tests/examples/test_docs.py +++ b/tests/examples/test_docs.py @@ -1,12 +1,12 @@ -import yaml import json import os.path +import yaml + from nomad.metainfo import Package from nomad.units import ureg - -from tests.parsing.test_parsing import run_singular_parser from tests.normalizing.conftest import run_normalize +from tests.parsing.test_parsing import run_singular_parser def _file(path): diff --git a/tests/examples/test_metainfo.py b/tests/examples/test_metainfo.py new file mode 100644 index 0000000000000000000000000000000000000000..739280867837e9d5cf4ef9ccf9d6fb1844f29468 --- /dev/null +++ b/tests/examples/test_metainfo.py @@ -0,0 +1,16 @@ +import os +import runpy + +import pytest + +prefix = os.path.join(__file__, '../../../examples/metainfo') + + +@pytest.mark.parametrize( + 'file', + [ + f'{prefix}/data_frames.py', + ], +) +def test_metainfo(file): + runpy.run_path(file) diff --git a/tests/fixtures/data.py b/tests/fixtures/data.py index d974026e243d981642b9701349b7d5257dda528a..d5000b0c381251ea99c04fd1fbf1908c08d5f788 100644 --- a/tests/fixtures/data.py +++ b/tests/fixtures/data.py @@ -1,7 +1,6 @@ import math import os from datetime import datetime, timezone -from typing import List, Tuple import pytest @@ -18,7 +17,6 @@ from nomad.datamodel.datamodel import SearchableQuantity from nomad.metainfo.elasticsearch_extension import schema_separator from nomad.processing import ProcessStatus from nomad.utils.exampledata import ExampleData -from tests.variables import python_schema_name, yaml_schema_name, yaml_schema_root from tests.normalizing.conftest import run_normalize from tests.parsing import test_parsing from tests.processing import test_data as test_processing @@ -28,6 +26,7 @@ from tests.utils import ( create_template_upload_file, set_upload_entry_metadata, ) +from tests.variables import python_schema_name, yaml_schema_name, yaml_schema_root @pytest.fixture(scope='session') diff --git a/tests/fixtures/group_uploads.py b/tests/fixtures/group_uploads.py index b0d8109c89dfc6a64e179ae0e36233d505ff6331..290b2b6d0b44eb83eb8533be38b48528687f7d6d 100644 --- a/tests/fixtures/group_uploads.py +++ b/tests/fixtures/group_uploads.py @@ -11,6 +11,7 @@ Values: """ from collections.abc import Sequence + import pytest from nomad.utils.exampledata import ExampleData diff --git a/tests/graph/test_definition_reader.py b/tests/graph/test_definition_reader.py index f67385897f1c1994fd2602742007eb02aff75e4d..354f9fc00e38e17b3f63649fa45476bf35c942da 100644 --- a/tests/graph/test_definition_reader.py +++ b/tests/graph/test_definition_reader.py @@ -21,8 +21,8 @@ import pytest from nomad.graph.graph_reader import DefinitionReader from nomad.metainfo import ( - Package, MSection, + Package, Quantity, Reference, SectionProxy, diff --git a/tests/graph/test_graph_reader.py b/tests/graph/test_graph_reader.py index ebcdc887caf49afd4a80c6b9011198caed2622ba..d6f9d75cd2a38a1a978894c08620a68dad10b005 100644 --- a/tests/graph/test_graph_reader.py +++ b/tests/graph/test_graph_reader.py @@ -22,17 +22,17 @@ from datetime import datetime import pytest import yaml +from nomad.datamodel import EntryArchive from nomad.graph.graph_reader import ( EntryReader, - UploadReader, - UserReader, FileSystemReader, - MongoReader, GeneralReader, + MongoReader, Token, + UploadReader, + UserReader, ) from nomad.graph.lazy_wrapper import LazyWrapper -from nomad.datamodel import EntryArchive from nomad.utils.exampledata import ExampleData from tests.normalizing.conftest import simulationworkflowschema diff --git a/tests/metainfo/test_attributes.py b/tests/metainfo/test_attributes.py index 13edf2b9d098a417b6ad9508697a78415bcff20c..32db3438decb3c5868d99cf098d2d3bc88ea25ed 100644 --- a/tests/metainfo/test_attributes.py +++ b/tests/metainfo/test_attributes.py @@ -18,20 +18,20 @@ import datetime -import pytest import numpy as np +import pytest import pytz from nomad.metainfo import ( - MSection, - Quantity, Attribute, + Datetime, MEnum, + MSection, + Quantity, Reference, - Datetime, Section, ) -from nomad.metainfo.metainfo import MQuantity, Definition +from nomad.metainfo.metainfo import Definition, MQuantity from nomad.metainfo.util import validate_allowable_unit from nomad.units import ureg diff --git a/tests/metainfo/test_data_frames.py b/tests/metainfo/test_data_frames.py new file mode 100644 index 0000000000000000000000000000000000000000..9f1ee888517fef141fb712af31c1f3be6a728c50 --- /dev/null +++ b/tests/metainfo/test_data_frames.py @@ -0,0 +1,469 @@ +import datetime + +import numpy as np +import pandas as pd +import pytest +import xarray as xr + +from nomad.metainfo.data_frames import DataFrameTemplate, ValuesTemplate +from nomad.metainfo.metainfo import Datetime, MEnum, MSection, Package +from nomad.units import ureg + +m_package = Package() + +# Values +ScalarValue = ValuesTemplate( + name='Scalar', + description='Scalar', + shape=[], + type=np.float64, + unit='J', + iri='', +) + +DatetimeValue = ValuesTemplate( + name='Datetime', + description='Datetime', + shape=[], + type=Datetime, + unit='', + iri='', +) + +StringValue = ValuesTemplate( + name='String', + description='String', + shape=[], + type=str, + unit='', + iri='', +) + +BooleanValue = ValuesTemplate( + name='Boolean', + description='Boolean', + shape=[], + type=bool, + unit='', + iri='', +) + +EnumValue = ValuesTemplate( + name='Enum', + description='Enum', + shape=[], + type=MEnum(['A', 'B', 'C']), + unit='', + iri='', +) + +# Data frames +GeneralDataFrame = DataFrameTemplate( + name='GeneralDataFrame', + mandatory_fields=[], +) + +# Examples +Time = ValuesTemplate( + name='Time', + type=np.float64, + shape=[], + unit='s', + iri='https://www.wikidata.org/wiki/Q11471', +) + +Temperature = ValuesTemplate( + name='Temperature', + type=np.float64, + shape=[], + unit='K', + iri='https://www.wikidata.org/wiki/Q11466', +) + +Pressure = ValuesTemplate( + name='Pressure', + type=np.float64, + shape=[], + unit='Pa', + iri='https://www.wikidata.org/wiki/Q39552', +) + +Latitude = ValuesTemplate( + name='Latitude', + description='Latitude', + shape=[], + type=np.float64, + unit='deg', + iri='', +) + +Longitude = ValuesTemplate( + name='Longitude', + description='Longitude', + shape=[], + type=np.float64, + unit='deg', + iri='', +) + +CauchyStressTensor = ValuesTemplate( + name='CauchyStressTensor', + type=np.float64, + shape=[3, 3], + unit='Pa', + iri='https://en.wikipedia.org/wiki/Cauchy_stress_tensor', +) + +Stress = DataFrameTemplate( + name='Stress', + mandatory_fields=[CauchyStressTensor], +) + +ProcessConditions = DataFrameTemplate( + name='ProcessConditions', + mandatory_fields=[Temperature, Pressure], + mandatory_variables=[Time], +) + +TemperatureDataFrame = DataFrameTemplate( + name='Temperature', + mandatory_fields=[Temperature], + mandatory_variables=[Longitude, Latitude, Time, StringValue], +) + + +class MySection(MSection): + # Values + datetime_value = DatetimeValue() + string_value = StringValue() + boolean_value = BooleanValue() + enum_value = EnumValue() + scalar_value = ScalarValue() + # Data frames + general_data_frame = GeneralDataFrame() + # Examples + process_conditions = ProcessConditions() + temperature_measurement = TemperatureDataFrame() + stress = Stress() + + +m_package.__init_metainfo__() + + +@pytest.mark.parametrize( + 'values_quantity, input_value, output_value', + [ + pytest.param( + 'scalar_value', + 1.6e-19, + ureg.Quantity(1.6e-19, 'J'), + id='scalar-no-unit', + ), + pytest.param( + 'scalar_value', + ureg.Quantity(1.6e-19, 'J'), + ureg.Quantity(1.6e-19, 'J'), + id='scalar-same-unit', + ), + pytest.param( + 'scalar_value', + ureg.Quantity(1.5, 'kcal'), + ureg.Quantity(6276, 'J'), + id='scalar-different-unit', + ), + pytest.param( + 'string_value', + 'Hello world', + 'Hello world', + id='string', + ), + pytest.param( + 'boolean_value', + True, + True, + id='boolean', + ), + pytest.param( + 'enum_value', + 'A', + 'A', + id='enum', + ), + pytest.param( + 'datetime_value', + datetime.datetime(2021, 1, 1, 12, 0, 0, tzinfo=datetime.timezone.utc), + datetime.datetime(2021, 1, 1, 12, 0, 0, tzinfo=datetime.timezone.utc), + id='datetime', + ), + ], +) +def test_set_values(values_quantity, input_value, output_value): + my_section = MySection() + setattr(my_section, values_quantity, input_value) + assert getattr(my_section, values_quantity) == output_value + + +def test_override_values_template(): + unit = 'degree_Celsius' + description = 'My temperature' + + class Test(MSection): + temperature = Temperature(unit=unit, description=description) + + assert Test.temperature.unit == unit + assert Test.temperature.description == description + + +def test_override_data_frame_template(): + description = 'My DOS' + + class Test(MSection): + my_data_frame = GeneralDataFrame(description=description) + + assert Test.my_data_frame.description == description + + +@pytest.mark.parametrize( + 'values_template, create_args, second_value', + [ + pytest.param( + ScalarValue, + (1.6e-19, 1.7e-19), + ureg.Quantity(1.7e-19, 'J'), + id='multiple_args', + ), + pytest.param( + ScalarValue, ([1.6e-19, 1.7e-19],), ureg.Quantity(1.7e-19, 'J'), id='list' + ), + pytest.param( + ScalarValue, + (np.array([1.6e-19, 1.7e-19]),), + ureg.Quantity(1.7e-19, 'J'), + id='numpy_array', + ), + pytest.param( + ScalarValue, + (ureg.Quantity(1.6e-19, 'J'), ureg.Quantity(1.7e-19, 'J')), + ureg.Quantity(1.7e-19, 'J'), + id='pint_quantity_args', + ), + pytest.param( + ScalarValue, + (ureg.Quantity([1.6e-19, 1.7e-19], 'J'),), + ureg.Quantity(1.7e-19, 'J'), + id='pint_quantity_array', + ), + pytest.param( + StringValue, + ('Hello', 'World'), + 'World', + id='string', + ), + pytest.param( + BooleanValue, + (True, False), + False, + id='boolean', + ), + # pytest.param( + # EnumValue, + # ('A', 'B'), + # 'B', + # id='enum', + # ), + # pytest.param( + # DatetimeValue, + # ( + # datetime.datetime(2021, 1, 1, 12, 0, 0, tzinfo=datetime.timezone.utc), + # datetime.datetime(2021, 1, 2, 12, 0, 0, tzinfo=datetime.timezone.utc), + # ), + # datetime.datetime(2021, 1, 2, 12, 0, 0, tzinfo=datetime.timezone.utc), + # id='datetime', + # ) + ], +) +def test_set_data_frame_values( + values_template: ValuesTemplate, create_args, second_value +): + my_section = MySection( + general_data_frame=GeneralDataFrame.create( + fields=[values_template.create(*create_args)] + ), + ) + assert my_section.general_data_frame.fields[0].get_values()[1] == second_value + + +def test_get_data_frame_values(): + my_section = MySection( + general_data_frame=GeneralDataFrame.create( + fields=[ScalarValue.create(1.6e-19, 1.7e-19)] + ), + ) + assert my_section.general_data_frame.get_field(ScalarValue).values[ + 0 + ] == ureg.Quantity(1.6e-19, 'J') + assert my_section.general_data_frame.get_field('Scalar_1').values[ + 0 + ] == ureg.Quantity(1.6e-19, 'J') + + +def test_original_shape(): + my_section = MySection( + stress=Stress.create( + fields=[CauchyStressTensor.create(np.random.rand(3, 3, 4, 5))], + variables=[ + Temperature.create(np.random.rand(4)), + Pressure.create(np.random.rand(5)), + ], + ), + ) + + assert my_section.stress.fields[0].original_shape == [3, 3, 4, 5] + assert my_section.stress.variables[0].original_shape == [4] + assert my_section.stress.fields[0].values.shape == (3, 3, 20) + assert my_section.stress.fields[0].get_values().shape == (3, 3, 4, 5) + + my_section_2 = MySection( + stress=Stress.create( + fields=[ + CauchyStressTensor.create( + np.random.rand(3, 3, 20), original_shape=[3, 3, 4, 5] + ) + ], + variables=[ + Temperature.create(np.random.rand(4)), + Pressure.create(np.random.rand(5)), + ], + ), + ) + + assert my_section_2.stress.fields[0].original_shape == [3, 3, 4, 5] + + +temperature = ureg.Quantity([300.0, 320.0, 340.0], 'K') +pressure = ureg.Quantity([1e5, 1.2e5, 1.4e5], 'Pa') +time = ureg.Quantity([1.0, 2.0, 3.0], 's') + + +@pytest.mark.parametrize( + 'data_frame, ds', + [ + pytest.param( + ProcessConditions.create( + fields=[ + Temperature.create(*temperature, name='my_temp'), + Pressure.create(*pressure), + ], + variables=[Time.create(*time, name='time')], + ), + xr.Dataset( + data_vars=dict( + my_temp=( + ['time'], + temperature, + dict( + units='kelvin', + long_name=None, + description=None, + iri='https://www.wikidata.org/wiki/Q11466', + ), + ), + Pressure_1=( + ['time'], + pressure, + dict( + units='pascal', + long_name=None, + description=None, + iri='https://www.wikidata.org/wiki/Q39552', + ), + ), + ), + coords=dict( + time=( + ['time'], + time, + dict( + units='second', + long_name=None, + description=None, + iri='https://www.wikidata.org/wiki/Q11471', + ), + ), + ), + attrs=dict( + description=None, + long_name=None, + ), + ), + id='single-variable, multiple-fields', + ), + ], +) +def test_to_xarray(data_frame, ds): + my_section = MySection(process_conditions=data_frame) + assert my_section.process_conditions.to_xarray().equals(ds) + + +@pytest.mark.parametrize( + 'data_frame, df', + [ + pytest.param( + ProcessConditions.create( + fields=[ + Temperature.create(*temperature, name='my_temp'), + Pressure.create(*pressure), + ], + variables=[Time.create(*time, name='time')], + ), + pd.DataFrame( + dict(my_temp=temperature.magnitude, Pressure_1=pressure.magnitude), + index=pd.Index(time.magnitude, name='time'), + ), + id='single-variable, multiple-fields', + ), + ], +) +def test_to_pandas(data_frame, df): + my_section = MySection(process_conditions=data_frame) + assert my_section.process_conditions.to_pandas().equals(df) + + +def test_multiple_spanned_dimensions(): + np.random.seed(0) + temperature = 15 + 8 * np.random.randn(2, 2, 3) + lon = np.array([[-99.83, -99.32], [-99.79, -99.23]]) + lat = np.array([[42.25, 42.21], [42.63, 42.59]]) + # time = pd.date_range('2014-09-06', periods=3) + # reference_time = pd.Timestamp('2014-09-05') + time = np.arange(3) + reference_time = '2014-09-05' + + ds = xr.DataArray( + data=temperature, + dims=['x', 'y', 'time'], + coords=dict( + lon=(['x', 'y'], lon), + lat=(['x', 'y'], lat), + time=time, + reference_time=reference_time, + ), + attrs=dict( + description='Ambient temperature.', + units='degC', + ), + ).to_dataset(name='temperature') + + my_section = MySection() + my_section.temperature_measurement = TemperatureDataFrame.create( + fields=[Temperature.create(temperature)], + variables=[ + Longitude.create(lon, spanned_dimensions=[0, 1], name='lon'), + Latitude.create(lat, spanned_dimensions=[0, 1], name='lat'), + Time.create(time, spanned_dimensions=[2], name='time'), + StringValue.create(reference_time, name='reference_time'), + ], + ) + + with pytest.raises(NotImplementedError): + my_section.temperature_measurement.to_xarray() diff --git a/tests/metainfo/test_elasticsearch_extension.py b/tests/metainfo/test_elasticsearch_extension.py index d1e4b04da5ada696fbd9a5a10531dfa417c21b65..e9aafb1575baa292c688786fd5d8429d380f1a99 100644 --- a/tests/metainfo/test_elasticsearch_extension.py +++ b/tests/metainfo/test_elasticsearch_extension.py @@ -17,29 +17,28 @@ # from datetime import date -from typing import List -import pytest + import numpy as np +import pytest from elasticsearch_dsl import Keyword from nomad.config import config -from nomad.utils.exampledata import ExampleData from nomad.datamodel.datamodel import SearchableQuantity -from nomad.metainfo import MSection, Quantity, SubSection, Datetime, Unit, MEnum +from nomad.metainfo import Datetime, MEnum, MSection, Quantity, SubSection, Unit from nomad.metainfo.elasticsearch_extension import ( Elasticsearch, create_indices, - index_entries_with_materials, + create_searchable_quantity, + entry_index, entry_type, - material_type, + index_entries_with_materials, material_entry_type, - entry_index, material_index, - create_searchable_quantity, + material_type, ) - -from tests.fixtures.infrastructure import clear_elastic_infra +from nomad.utils.exampledata import ExampleData from tests.app.v1.routers.common import perform_quantity_search_test +from tests.fixtures.infrastructure import clear_elastic_infra @pytest.fixture(scope='module') diff --git a/tests/metainfo/test_full_storage_quantity.py b/tests/metainfo/test_full_storage_quantity.py index 9acb42dcf198b1c1b404dda216cd464160a31381..e08520aad6f4c8e7d27574b015acba2424922218 100644 --- a/tests/metainfo/test_full_storage_quantity.py +++ b/tests/metainfo/test_full_storage_quantity.py @@ -18,12 +18,12 @@ import pytest from nomad.metainfo import ( - MSection, - Quantity, Attribute, - SubSection, MetainfoError, + MSection, + Quantity, Section, + SubSection, ) from nomad.metainfo.util import MQuantity from nomad.units import ureg diff --git a/tests/metainfo/test_hash_id.py b/tests/metainfo/test_hash_id.py index 3c3e44bb2e08569e30e48a0236f148d6b0109e2b..6e2b678431a1432dd43dce2d6475071a190e8e90 100644 --- a/tests/metainfo/test_hash_id.py +++ b/tests/metainfo/test_hash_id.py @@ -1,4 +1,4 @@ -from nomad.metainfo import Quantity, MSection, MEnum +from nomad.metainfo import MEnum, MSection, Quantity def simple_quantity(): diff --git a/tests/metainfo/test_metainfo.py b/tests/metainfo/test_metainfo.py index ea08bba6ca3b6af8324e339dd3db219f178e9207..394d44b588d6d341075f5eefde1857f2bfa15e26 100644 --- a/tests/metainfo/test_metainfo.py +++ b/tests/metainfo/test_metainfo.py @@ -20,40 +20,33 @@ # in-depth tests in test_* files of the same module. from math import isnan -import pytest + import numpy as np import pandas as pd import pint.quantity +import pytest -from nomad.metainfo.metainfo import ( - MSection, - MCategory, - Section, - Quantity, - SubSection, - Definition, - Package, - DeriveError, - MetainfoError, - derived, -) from nomad.metainfo import ( Annotation, + AnnotationModel, DefinitionAnnotation, SectionAnnotation, - AnnotationModel, ) -from nomad.metainfo.example import ( - Run, - VaspRun, - System, - SystemHash, - Parsing, - SCC, - m_package as example_package, +from nomad.metainfo.example import SCC, Parsing, Run, System, SystemHash, VaspRun +from nomad.metainfo.example import m_package as example_package +from nomad.metainfo.metainfo import ( + Definition, + DeriveError, + MCategory, + MetainfoError, + MSection, + Package, + Quantity, + Section, + SubSection, + derived, ) from nomad.units import ureg - from tests.metainfo import MTypes diff --git a/tests/metainfo/test_mongodb_extension.py b/tests/metainfo/test_mongodb_extension.py index 3f03c65a6dc97bb47f92e9532971d2e31d38b5f3..ff1687b7660e10954fc98b88fc9283ddd9160b32 100644 --- a/tests/metainfo/test_mongodb_extension.py +++ b/tests/metainfo/test_mongodb_extension.py @@ -17,9 +17,11 @@ # import json + import numpy as np -from nomad.metainfo import MSection, Section, Quantity, SubSection -from nomad.metainfo.mongoengine_extension import MongoDocument, Mongo + +from nomad.metainfo import MSection, Quantity, Section, SubSection +from nomad.metainfo.mongoengine_extension import Mongo, MongoDocument class B(MSection): diff --git a/tests/metainfo/test_package.py b/tests/metainfo/test_package.py index 75d70c3d2c77a30e94bb0c38c65f83585e52b350..583ccb35e6996ba10de5dc7825cb4eaa82795ed4 100644 --- a/tests/metainfo/test_package.py +++ b/tests/metainfo/test_package.py @@ -16,7 +16,7 @@ # limitations under the License. # -from nomad.metainfo import Package, MSection +from nomad.metainfo import MSection, Package m_package = Package(aliases=['nomad.datamodel.test_package']) diff --git a/tests/metainfo/test_quantities.py b/tests/metainfo/test_quantities.py index ad39b0e06313fce8e2dfffb4b6845d420cd79adb..0e6842b2d32fd941bbf2525a576b4d2c217c342f 100644 --- a/tests/metainfo/test_quantities.py +++ b/tests/metainfo/test_quantities.py @@ -24,14 +24,14 @@ import pytest import pytz from nomad.metainfo.metainfo import ( + JSON, + URL, Bytes, Capitalized, Datetime, Dimension, - JSON, MSection, Quantity, - URL, Unit, units, ) diff --git a/tests/metainfo/test_references.py b/tests/metainfo/test_references.py index c9f20fcc985f1ecdc725f3abbe401a6e92fc1e53..8a7f14a00a67c4cac18a049e50c84fa4b6ab52c2 100644 --- a/tests/metainfo/test_references.py +++ b/tests/metainfo/test_references.py @@ -16,25 +16,26 @@ # limitations under the License. # +import os.path from typing import cast + import pytest -import os.path -from nomad.datamodel import UserReference, AuthorReference +from nomad.datamodel import AuthorReference, UserReference from nomad.metainfo import ( + Context, + File, + MetainfoReferenceError, + MProxy, MSection, + Package, Quantity, + QuantityReference, + Reference, Section, SubSection, - MProxy, - Reference, - QuantityReference, - File, - MetainfoReferenceError, - Package as MetainfoPackage, - Context, - Package, ) +from nomad.metainfo import Package as MetainfoPackage class Referenced(MSection): @@ -383,7 +384,7 @@ def test_def_reference(): @pytest.mark.parametrize('mainfile', ['intra-entry', 'inter-entry']) def test_parse_with_references(mainfile): - from nomad.client import parse, normalize_all + from nomad.client import normalize_all, parse entry_archive = parse( os.path.join( diff --git a/tests/metainfo/test_sections.py b/tests/metainfo/test_sections.py index 04de671f982ab274ef5ac1f6c363b12a04cad2ab..7ed0276321249910bf0acb51232b3b50504d17b6 100644 --- a/tests/metainfo/test_sections.py +++ b/tests/metainfo/test_sections.py @@ -22,7 +22,7 @@ import pytest from nomad.metainfo import MSection -from nomad.metainfo.metainfo import Package, Quantity, SubSection, Section +from nomad.metainfo.metainfo import Package, Quantity, Section, SubSection def test_base_section(): @@ -251,7 +251,7 @@ def test_path(): assert SubSection.used_sections[ChildSection.m_def] == [EntryArchive.child] assert ChildSection.m_def.path == 'child' - from nomad.datamodel.metainfo.workflow import Workflow, Task + from nomad.datamodel.metainfo.workflow import Task, Workflow assert Workflow.m_def.path == 'workflow2' assert Task.m_def.path == '__no_archive_path__' diff --git a/tests/metainfo/test_to_dict.py b/tests/metainfo/test_to_dict.py index 1069001b29f14a2d5395dcd80cc2a4b2a559f687..8c296a8868d559c7dea801d4bfebb38cfcb75dfa 100644 --- a/tests/metainfo/test_to_dict.py +++ b/tests/metainfo/test_to_dict.py @@ -16,16 +16,16 @@ # limitations under the License. # -import pytest import numpy as np +import pytest import yaml from nomad.app.v1.routers.metainfo import ( get_package_by_section_definition_id, store_package_definition, ) -from nomad.metainfo import MSection, MCategory, Quantity, SubSection -from nomad.metainfo.metainfo import Datetime, Package, MEnum, Reference, Definition +from nomad.metainfo import MCategory, MSection, Quantity, SubSection +from nomad.metainfo.metainfo import Definition, MEnum, Package, Reference # resolve_references are tested in .test_references # type specific serialization is tested in .test_quantities @@ -66,7 +66,7 @@ expected_child = dict(**values) expected_root = dict( child=expected_child, children=[expected_child, expected_child], - abstract=dict(m_def='tests.metainfo.test_to_dict.Child', **expected_child), + abstract=dict(m_def='tests.metainfo.test_to_dict.Child', **expected_child), # type: ignore **values, ) diff --git a/tests/metainfo/test_yaml_schema.py b/tests/metainfo/test_yaml_schema.py index 67df9908508c25b5a6885ebe8279250379346687..950d8927c61b1ef84704caeb952c150034381599 100644 --- a/tests/metainfo/test_yaml_schema.py +++ b/tests/metainfo/test_yaml_schema.py @@ -20,18 +20,18 @@ import numpy as np import pytest import yaml -from nomad.utils import strip from nomad.metainfo import ( - Package, + Context, + MetainfoError, + MProxy, MSection, + Package, Quantity, Reference, - SubSection, Section, - MProxy, - MetainfoError, - Context, + SubSection, ) +from nomad.utils import strip m_package = Package() diff --git a/tests/mkdocs/test_mkdocs_metainfo.py b/tests/mkdocs/test_mkdocs_metainfo.py new file mode 100644 index 0000000000000000000000000000000000000000..368288ff1cc8caf825772a45b86aeaf8981ef3d8 --- /dev/null +++ b/tests/mkdocs/test_mkdocs_metainfo.py @@ -0,0 +1,93 @@ +# +# Copyright The NOMAD Authors. +# +# This file is part of NOMAD. See https://nomad-lab.eu for further info. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + + +import pytest + +from nomad.metainfo import Datetime, MSection, Package, Quantity, Reference +from nomad.mkdocs.metainfo import ( + get_property_description, + get_property_type_info, + get_quantity_default, +) + +m_package = Package() + + +class Test(MSection): + pass + + +m_package.__init_metainfo__() + + +@pytest.mark.parametrize( + 'type_, name', + [ + pytest.param(str, '`str`', id='str'), + pytest.param(int, '`int`', id='int'), + pytest.param(float, '`float`', id='float'), + pytest.param(Datetime, '`nomad.metainfo.data_type.Datetime`', id='Datetime'), + pytest.param(Reference(Test), '[`Test`](#test)', id='internal-ref'), + pytest.param( + Reference(Quantity), '`nomad.metainfo.metainfo.Quantity`', id='external-ref' + ), + ], +) +def test_property_type_info(type_, name): + class Test(MSection): + a = Quantity(type=type_) + + name_found = get_property_type_info(Test.m_def.all_properties['a'], pkg=m_package) + assert name_found == name + + +@pytest.mark.parametrize( + 'description', + [ + pytest.param(None, id='no-description'), + pytest.param('This is a test description.', id='string-description'), + ], +) +def test_property_description(description): + class Test(MSection): + a: str = Quantity(description=description) + + description_found = get_property_description(Test.m_def.all_properties['a']) + assert description_found == description + + +@pytest.mark.parametrize( + 'default, default_str', + [ + pytest.param(None, '', id='no-default'), + pytest.param('test', '`test`', id='str-default'), + pytest.param(1, '`1`', id='int-default'), + pytest.param( + {'test': 'test'}, + 'Complex object, default value not displayed.', + id='complex-default', + ), + ], +) +def test_property_default(default, default_str): + class Test(MSection): + a = Quantity(default=default) + + default_found = get_quantity_default(Test.m_def.all_properties['a']) + assert default_found == default_str diff --git a/tests/test_mkdocs.py b/tests/mkdocs/test_mkdocs_pydantic.py similarity index 97% rename from tests/test_mkdocs.py rename to tests/mkdocs/test_mkdocs_pydantic.py index baa0c3300b63a7b49772e33bedc3a6664f5635dc..15a9bda32ebfc639342c8f1f0ce92092d2a4736f 100644 --- a/tests/test_mkdocs.py +++ b/tests/mkdocs/test_mkdocs_pydantic.py @@ -17,17 +17,18 @@ # from enum import Enum +from typing import Annotated, Literal, Optional, Union # type: ignore + import pytest -from typing import Union, List, Dict, Optional -from typing import Literal, Annotated # type: ignore from pydantic import BaseModel, Field + from nomad.config.models.ui import WidgetHistogram, WidgetTerms -from nomad.mkdocs import ( - get_field_type_info, - get_field_description, +from nomad.mkdocs.pydantic import ( get_field_default, - get_field_options, get_field_deprecated, + get_field_description, + get_field_options, + get_field_type_info, ) diff --git a/tests/normalizing/conftest.py b/tests/normalizing/conftest.py index 2ac967401754e234cab41f6c1d59001ad845bbb7..c9c926b861409de40f92b0f30362de8e13718788 100644 --- a/tests/normalizing/conftest.py +++ b/tests/normalizing/conftest.py @@ -15,58 +15,47 @@ # See the License for the specific language governing permissions and # limitations under the License. # +import re from collections import defaultdict +from typing import Any +from warnings import warn + +import ase.build import numpy as np -from typing import Any, List, Union -from collections.abc import Iterable import pytest from ase import Atoms -import ase.build -import re -from warnings import warn -import importlib -from nomad.config import config -from nomad.units import ureg -from nomad.utils import get_logger -from nomad.normalizing import normalizers -from nomad.metainfo import SubSection, Quantity -from nomad.datamodel import EntryArchive, ArchiveSection -from nomad.datamodel.results import ( - Relation, - SymmetryNew as Symmetry, - Cell, - WyckoffSet, - System as ResultSystem, -) -from nomad.datamodel.optimade import Species -from nomad.normalizing.common import cell_from_ase_atoms, nomad_atoms_from_ase_atoms -from nomad.datamodel.metainfo.simulation.method import CoreHole -from nomad.datamodel.metainfo.workflow import Workflow -from nomad.datamodel.metainfo.workflow import Link, TaskReference +from nomad.datamodel import ArchiveSection, EntryArchive +from nomad.datamodel.context import ServerContext +from nomad.datamodel.datamodel import EntryArchive, EntryMetadata +from nomad.datamodel.metainfo import runschema, simulationworkflowschema from nomad.datamodel.metainfo.measurements import ( + EELSMeasurement, + Instrument, Measurement, Sample, - EELSMeasurement, Spectrum, - Instrument, ) -from nomad.datamodel.results import EELSInstrument - -from nomad.datamodel.context import ServerContext -from nomad.datamodel.datamodel import EntryArchive, EntryMetadata +from nomad.datamodel.metainfo.simulation.method import CoreHole +from nomad.datamodel.metainfo.workflow import Workflow +from nomad.datamodel.optimade import Species +from nomad.datamodel.results import Cell, EELSInstrument, Relation, WyckoffSet +from nomad.datamodel.results import SymmetryNew as Symmetry +from nomad.datamodel.results import System as ResultSystem +from nomad.metainfo import Quantity, SubSection +from nomad.normalizing import normalizers +from nomad.normalizing.common import cell_from_ase_atoms, nomad_atoms_from_ase_atoms from nomad.parsing.parser import ArchiveParser from nomad.processing.data import Upload -from tests.parsing.test_parsing import parsed_vasp_example # pylint: disable=unused-import -from tests.parsing.test_parsing import parsed_template_example # pylint: disable=unused-import -from tests.parsing.test_parsing import parsed_example # pylint: disable=unused-import -from tests.parsing.test_parsing import parse_file -from tests.test_files import create_test_upload_files -from nomad.datamodel.metainfo import ( - simulationworkflowschema, - runschema, - SCHEMA_IMPORT_ERROR, +from nomad.units import ureg +from nomad.utils import get_logger +from tests.parsing.test_parsing import ( + parse_file, + parsed_example, # noqa: F401 + parsed_template_example, # noqa: F401 + parsed_vasp_example, # noqa: F401 ) +from tests.test_files import create_test_upload_files def run_normalize(entry_archive: EntryArchive) -> EntryArchive: diff --git a/tests/normalizing/test_entry_type_and_name.py b/tests/normalizing/test_entry_type_and_name.py index c11d002e54fe1bde1eeb938e95689d7639f7086f..dffa84450dda6ebb4a292a09a2e706e4744ae2ef 100644 --- a/tests/normalizing/test_entry_type_and_name.py +++ b/tests/normalizing/test_entry_type_and_name.py @@ -16,8 +16,10 @@ # limitations under the License. # import pytest + import tests -from .conftest import run_normalize, run_processing + +from .conftest import run_normalize @pytest.fixture(scope='session') diff --git a/tests/normalizing/test_material.py b/tests/normalizing/test_material.py index c2b9289da2f68eb3c767fcfd0c2ea6bbf058dcb1..ab422266ef3aa1d335eae0318ad627944779e1bd 100644 --- a/tests/normalizing/test_material.py +++ b/tests/normalizing/test_material.py @@ -16,18 +16,18 @@ # limitations under the License. # +import ase.build import numpy as np import pytest from ase import Atoms -import ase.build from matid.symmetry.wyckoffset import WyckoffSet # pylint: disable=import-error -from nomad.units import ureg from nomad import atomutils from nomad.config import config -from nomad.utils import hash -from nomad.normalizing.common import ase_atoms_from_nomad_atoms from nomad.datamodel.results import ElementalComposition +from nomad.normalizing.common import ase_atoms_from_nomad_atoms +from nomad.units import ureg +from nomad.utils import hash from tests.normalizing.conftest import get_template_for_structure diff --git a/tests/normalizing/test_metainfo.py b/tests/normalizing/test_metainfo.py index 6fc097c0538354edfb6e2e99f2e87386acbfb943..815ad7df3a7882c324e4bf9b79655b3b0405bc93 100644 --- a/tests/normalizing/test_metainfo.py +++ b/tests/normalizing/test_metainfo.py @@ -17,9 +17,9 @@ # import numpy as np -from nomad.datamodel import EntryData, EntryArchive -from nomad.metainfo import Quantity, SubSection from nomad.client import normalize_all +from nomad.datamodel import EntryArchive, EntryData +from nomad.metainfo import Quantity, SubSection def test_normalizer_level(): diff --git a/tests/normalizing/test_method.py b/tests/normalizing/test_method.py index 483753d8e65c234b4ab841cadf39ad6c3632eed5..1f9992a15b47f9d19a6f8b11eb990aa0027e6866 100644 --- a/tests/normalizing/test_method.py +++ b/tests/normalizing/test_method.py @@ -17,9 +17,10 @@ # import numpy as np -from nomad.units import ureg import pytest +from nomad.units import ureg + def approx(value, abs=0, rel=1e-6): return pytest.approx(value, abs=abs, rel=rel) diff --git a/tests/normalizing/test_properties.py b/tests/normalizing/test_properties.py index ad82111b390280fb02528becf149458c473bcca5..9ad5ae24b2daf9179cab3d23ca33be0686e844fc 100644 --- a/tests/normalizing/test_properties.py +++ b/tests/normalizing/test_properties.py @@ -19,20 +19,20 @@ import numpy as np import pytest +from nomad.datamodel.metainfo import SCHEMA_IMPORT_ERROR, simulationworkflowschema from nomad.units import ureg from .conftest import ( - get_template_dft, - add_template_dos, - get_template_dos, add_template_band_structure, - get_template_band_structure, + add_template_dos, add_template_magnetic_shielding, - add_template_spin_spin_coupling, add_template_magnetic_susceptibility, + add_template_spin_spin_coupling, + get_template_band_structure, + get_template_dft, + get_template_dos, run_normalize, ) -from nomad.datamodel.metainfo import simulationworkflowschema, SCHEMA_IMPORT_ERROR def test_eels(eels): diff --git a/tests/normalizing/test_topology.py b/tests/normalizing/test_topology.py index 7dc058c0834204f2ffc892647a13aacf3c91596f..5940d8e8e9e8c1cbce921ca7adee123c90175eb2 100644 --- a/tests/normalizing/test_topology.py +++ b/tests/normalizing/test_topology.py @@ -16,35 +16,35 @@ # limitations under the License. # -import numpy as np from collections import defaultdict + +import numpy as np import pytest -from nomad.client.processing import normalize -from nomad.datamodel.metainfo import runschema +from nomad.datamodel.metainfo import runschema from nomad.units import ureg -from tests.normalizing.conftest import ( # pylint: disable=unused-import - get_template_for_structure, - get_template_topology, +from tests.normalizing.conftest import ( # noqa: F401 + boron_nitride, + boron_nitride_topology, + check_template_active_orbitals, conv_bcc, conv_fcc, - rattle, - run_normalize, - stack, - surf, - single_cu_surface_topology, - single_cr_surface_topology, - stacked_cu_ni_surface_topology, + get_template_active_orbitals, + get_template_computation, + get_template_for_structure, + get_template_topology, graphene, graphene_topology, - boron_nitride, - boron_nitride_topology, mos2, mos2_topology, + rattle, + run_normalize, + single_cr_surface_topology, + single_cu_surface_topology, + stack, + stacked_cu_ni_surface_topology, stacked_graphene_boron_nitride_topology, - get_template_active_orbitals, - check_template_active_orbitals, - get_template_computation, + surf, ) diff --git a/tests/parsing/test_archive_parser.py b/tests/parsing/test_archive_parser.py index b9666aa564265de69aeb7d5bb64f9dcada4f8245..e577bbb0681973c580f493dcbc1e74ca96cc5782 100644 --- a/tests/parsing/test_archive_parser.py +++ b/tests/parsing/test_archive_parser.py @@ -16,14 +16,15 @@ # limitations under the License. # -import pytest import json import os import os.path +import pytest + from nomad.config import config +from nomad.datamodel import Context, EntryArchive from nomad.parsing.parser import ArchiveParser -from nomad.datamodel import EntryArchive, Context def test_archive_parser(raw_files_function): diff --git a/tests/parsing/test_file_parser.py b/tests/parsing/test_file_parser.py index 6f8fb70bb5053104517dda9e920b0cf1c53b7eef..d3a6e0a32c56d6c7f0443a8b54be2f3688dce1fa 100644 --- a/tests/parsing/test_file_parser.py +++ b/tests/parsing/test_file_parser.py @@ -1,15 +1,15 @@ -import pytest import numpy as np import pint -from nomad.units import ureg +import pytest + +from nomad.datamodel.metainfo.system import Atoms from nomad.parsing.file_parser import ( - TextParser, - Quantity, + FileParser, ParsePattern, + Quantity, + TextParser, XMLParser, - FileParser, ) -from nomad.datamodel.metainfo.system import Atoms class TestFileParser: diff --git a/tests/parsing/test_mapping_parser.py b/tests/parsing/test_mapping_parser.py new file mode 100644 index 0000000000000000000000000000000000000000..a772a8b08cc1e8d9d00c5d9a99b281c1754d6563 --- /dev/null +++ b/tests/parsing/test_mapping_parser.py @@ -0,0 +1,595 @@ +from copy import deepcopy +from typing import Any + +import numpy as np +import pytest + +from nomad.datamodel import ArchiveSection +from nomad.datamodel.metainfo.annotations import Mapper as MapperAnnotation +from nomad.metainfo import Quantity, SubSection +from nomad.parsing.file_parser.mapping_parser import ( + MAPPING_ANNOTATION_KEY, + Data, + HDF5Parser, + Mapper, + MappingParser, + MetainfoParser, + Path, + PathParser, + TextParser, + Transformer, + XMLParser, +) +from nomad.parsing.file_parser.text_parser import Quantity as TextQuantity +from nomad.parsing.file_parser.text_parser import TextParser as TextFileParser + + +class BSection(ArchiveSection): + v = Quantity(type=np.float64, shape=[2, 2]) + v.m_annotations[MAPPING_ANNOTATION_KEY] = dict( + xml=MapperAnnotation(mapper='.v'), + hdf5=MapperAnnotation(mapper=('get_v', ['.v[0].d'])), + ) + + v2 = Quantity(type=str) + v2.m_annotations[MAPPING_ANNOTATION_KEY] = dict( + xml=MapperAnnotation(mapper='.c[0].d[1]'), + hdf5=MapperAnnotation(mapper='g.v[-2]'), + text=MapperAnnotation(mapper='version'), + ) + + v3 = Quantity(type=float) + v3.m_annotations[MAPPING_ANNOTATION_KEY] = dict( + text=MapperAnnotation(mapper='.energy') + ) + + +class CSection(ArchiveSection): + i = Quantity(type=int) + i.m_annotations[MAPPING_ANNOTATION_KEY] = dict( + xml=MapperAnnotation(mapper='.d'), + hdf5=MapperAnnotation(mapper='.i | [1]'), + ) + + e = Quantity(type=str) + e.m_annotations[MAPPING_ANNOTATION_KEY] = dict( + xml=MapperAnnotation(mapper='a.b2.c.e[?"@name"==\'item2\'].k[1] | [0]'), + hdf5=MapperAnnotation(mapper=('to_string', ['.f[?"@index">=`1`].__value'])), + ) + + g = Quantity(type=np.float64, shape=[2, 5]) + g.m_annotations[MAPPING_ANNOTATION_KEY] = dict( + xml=MapperAnnotation(mapper=('slice', ['a.b2.c.f.g.i'])), + hdf5=MapperAnnotation(mapper='g.g.c1.d[:2].e[-5:]'), + ) + + +class B2Section(ArchiveSection): + c = SubSection(sub_section=CSection) + c.m_annotations[MAPPING_ANNOTATION_KEY] = dict( + xml=MapperAnnotation(mapper='.c'), + hdf5=MapperAnnotation(mapper='g.g.c1'), + ) + + b = SubSection(sub_section=BSection) + b.m_annotations[MAPPING_ANNOTATION_KEY] = dict( + xml=MapperAnnotation(mapper='a.b'), + hdf5=MapperAnnotation(mapper='.c'), + ) + + +class ExampleSection(ArchiveSection): + b = SubSection(sub_section=BSection, repeats=True) + b.m_annotations[MAPPING_ANNOTATION_KEY] = dict( + xml=MapperAnnotation(mapper='a.b1'), + hdf5=MapperAnnotation(mapper='.g1'), + text=MapperAnnotation(mapper='calculation'), + ) + b2 = SubSection(sub_section=B2Section) + b2.m_annotations[MAPPING_ANNOTATION_KEY] = dict( + xml=MapperAnnotation(mapper='a.b2'), + hdf5=MapperAnnotation(mapper='g.g'), + ) + + +ExampleSection.m_def.m_annotations[MAPPING_ANNOTATION_KEY] = dict( + xml=MapperAnnotation(mapper='a'), + hdf5=MapperAnnotation(mapper='g'), + text=MapperAnnotation(), +) + + +class ExampleXMLParser(XMLParser): + @staticmethod + def get_eigenvalues_energies(value, n_spin, n_kpoints): + array = np.transpose(value)[0].T + return np.reshape(array, (n_spin, n_kpoints, len(array[0]))) + + @staticmethod + def get_version(version, sub_version, platform): + return ' '.join([' '.join(s.split()) for s in [version, sub_version, platform]]) + + @staticmethod + def slice(value): + return np.array(value)[2:] + + +class ExampleHDF5Parser(HDF5Parser): + @staticmethod + def get_v(value): + return np.array(value)[1:, :2] + + @staticmethod + def to_string(value): + return '-'.join([str(n) for n in value]) + + +class ExampleParser(MappingParser): + def from_dict(self, dct: dict[str, Any]): + return super().from_dict(dct) # type: ignore + + def load_file(self) -> Any: + return super().load_file() + + def to_dict(self, **kwargs) -> dict[str, Any]: + return super().to_dict(**kwargs) + + def slice(self, value): + return value[1] + + +@pytest.fixture(scope='module') +def text_parser() -> TextParser: + outcar_parser = TextFileParser( + quantities=[ + TextQuantity('version', r'vasp\.([\S]+)'), + TextQuantity( + 'calculation', + r'(FREE ENERGIE OF THE ION\-ELECTRON SYSTEM[\s\S]+?entropy.+)', + repeats=True, + sub_parser=TextFileParser( + quantities=[ + TextQuantity( + 'energy', + r'free\s*energy\s*TOTEN\s*=\s*([\-\d\.]+)', + dtype=float, + ) + ] + ), + ), + ] + ) + return TextParser( + text_parser=outcar_parser, filepath='tests/data/parsers/vasp_outcar/OUTCAR' + ) + + +@pytest.fixture(scope='module') +def xml_parser() -> ExampleXMLParser: + return ExampleXMLParser(filepath='tests/data/parsing/file_parser/test.xml') + + +@pytest.fixture(scope='module') +def hdf5_parser() -> ExampleHDF5Parser: + return ExampleHDF5Parser(filepath='tests/data/parsing/file_parser/test.h5') + + +@pytest.fixture(scope='module') +def archive_parser() -> MetainfoParser: + return MetainfoParser() + + +@pytest.fixture(scope='module') +def data(): + return { + 'a': { + 'b': [ + {'c': {'d': 1, 'e': 'x'}, 'f': [1.0, 2.0]}, + {'c': 2, 'd': [{'e': 'y', 'f': np.eye(2)}]}, + ], + 'c': [ + {'n': 'x', 'v': 1}, + {'n': 'y', 'v': 2}, + ], + }, + 'b': [ + { + 'c': [ + {'d': 3, 'e': [[1, 2], [3, 4]]}, + {'d': 4, 'e': [[1, 0], [2, 0]]}, + ] + }, + {'c': {'d': 1, 'e': 'z'}}, + ], + 'c': [ + {'n': 1, 'v': 'a'}, + {'n': 2, 'v': 'b'}, + ], + } + + +def assert_equal(v1, v2): + if isinstance(v1, dict): + for key, val in v1.items(): + assert key in v2 + assert_equal(val, v2[key]) + elif isinstance(v1, list): + assert isinstance(v2, list) + for n, v in enumerate(v1): + assert_equal(v, v2[n]) + else: + equal = v1 == v2 + assert equal.all() if isinstance(equal, np.ndarray) else equal + + +class TestPath: + @pytest.mark.parametrize( + 'path, result', + [ + pytest.param('a.b[1].c', 2), + pytest.param('b[0].c[0:2].d', [3, 4]), + pytest.param('b[0].c[0:2].e', [[[1, 2], [3, 4]], [[1, 0], [2, 0]]]), + pytest.param('a.b[1].d[0].f', np.eye(2)), + pytest.param('b[1].c.e', 'z'), + pytest.param('a[1].b.c.d', None), + pytest.param("a.c[?n=='x'].v | [0]", 1), + pytest.param('c[?n>=`1`].v | [1]', 'b'), + ], + ) + def test_get_data(self, path, result, data): + path = Path(path=path) + value = path.get_data(data) + assert_equal(value, result) + + @pytest.mark.parametrize( + 'path, data, target, result', + [ + pytest.param('a.b', 'x', {}, 'x'), + pytest.param('a[1:3].b.c[0].d[0:3]', 'y', {}, [['y'] * 3] * 2), + pytest.param('a.b[0:3].c[1:2]', [[1], [2], [3]], {}, [[1], [2], [3]]), + pytest.param('a[1:4].b', ['a', 'b', 'c'], {}, ['a', 'b', 'c']), + pytest.param('a[0:2].b.c', [1, 2, 3], {}, [[1, 2, 3]] * 2), + pytest.param( + 'a[0].b[0:2].c[0]', + [['x'], ['y']], + {'a': [{'b': [{'c': ['a']}, {'c': ['b']}]}]}, + ['x', 'y'], + ), + pytest.param( + 'a[0].b[0:2]', + [['x'], ['y']], + {'a': [{'b': ['a', 'b']}]}, + [['x'], ['y']], + ), + ], + ) + def test_set_data(self, path, data, target, result): + path = Path(path=path) + path.set_data(data, target) + value = path.get_data(target) + assert_equal(value, result) + + +class TestMapper: + @pytest.mark.parametrize( + 'dct, expected', + [ + pytest.param( + dict(source='a', target='b', mapper='v'), + Transformer( + source=Data(path=Path(path='a')), + target=Data(path=Path(path='b')), + function_args=[Path(path='v')], + ), + ), + pytest.param( + dict(source='a', path='.v', path_parser='jsonpath_ng'), + Transformer( + source=Data( + path=Path( + path='a', parser=PathParser(parser_name='jsonpath_ng') + ), + path_parser=PathParser(parser_name='jsonpath_ng'), + ), + function_args=[ + Path( + parser=PathParser(parser_name='jsonpath_ng'), + path='.v', + parent=Path( + path='a', parser=PathParser(parser_name='jsonpath_ng') + ), + ) + ], + ), + ), + pytest.param( + dict(target='b', source='a', mapper=('eval', ['.a', 'b'])), + Transformer( + source=Data(path=Path(path='a')), + target=Data(path=Path(path='b')), + function_name='eval', + function_args=[ + Path(path='.a', parent=Path(path='a')), + Path(path='b'), + ], + ), + ), + pytest.param( + dict( + source=Data( + transformer=Transformer(function_args=[Path(path='a')]) + ), + function_name='eval', + function_args=[Path(path='.b')], + ), + Transformer( + source=Data( + transformer=Transformer(function_args=[Path(path='a')]) + ), + function_name='eval', + function_args=[Path(path='.b', parent=Path(path='a'))], + ), + ), + pytest.param( + dict( + source='a', + remove=True, + mapper=[ + dict(path='.c', source='.b'), + dict( + path_parser='jsonpath_ng', + mapper=['eval', ['.x']], + remove=False, + ), + dict( + path_parser='jsonpath_ng', + mapper=[dict(mapper='.d', remove=True)], + source='.b', + remove=False, + ), + ], + ), + Mapper( + source=Data(path=Path(path='a')), + remove=True, + mappers=[ + Transformer( + function_args=[ + Path( + path='.c', + parent=Path(path='.b', parent=Path(path='a')), + ) + ], + source=Data( + path=Path(path='.b', parent=Path(path='a')), + parent=Path(path='a'), + ), + remove=True, + ), + Transformer( + function_name='eval', + function_args=[ + Path( + path='.x', + parser=PathParser(parser_name='jsonpath_ng'), + parent=Path(path='a'), + ) + ], + remove=False, + ), + Mapper( + mappers=[ + Transformer( + function_args=[ + Path( + path='.d', + parent=Path( + path='.b', + parser=PathParser( + parser_name='jsonpath_ng' + ), + parent=Path(path='a'), + ), + ) + ], + remove=True, + ) + ], + source=Data( + path=Path( + path='.b', + parser=PathParser(parser_name='jsonpath_ng'), + parent=Path(path='a'), + ), + path_parser=PathParser(parser_name='jsonpath_ng'), + parent=Path(path='a'), + ), + remove=False, + ), + ], + ), + ), + ], + ) + def test_from_dict(self, dct, expected): + def assert_mappers_equal(m1, m2): + assert isinstance(m1, type(m2)) + assert m1.source == m2.source + assert m1.target == m2.target + assert m1.remove == m2.remove + assert m1.indices == m2.indices + if isinstance(m1, Mapper): + for n, sm1 in enumerate(m1.mappers): + assert_mappers_equal(sm1, m2.mappers[n]) + elif isinstance(m1, Transformer): + assert m1.function_name == m2.function_name + for n, arg in enumerate(m1.function_args): + assert arg == m2.function_args[n] + + mapper = Mapper.from_dict(dct) + assert_mappers_equal(mapper, expected) + + @pytest.mark.parametrize('remove', [True, False]) + @pytest.mark.parametrize( + 'mapper, expected', + [ + pytest.param( + Mapper( + mappers=[ + Transformer( + source=Data( + path=Path( + path='a.b', + parser=PathParser(parser_name='jsonpath_ng'), + ) + ), + function_args=[ + Path( + path='.f', + parser=PathParser(parser_name='jsonpath_ng'), + ) + ], + target=Data(path=Path(path='x')), + ) + ] + ), + dict(x=[1.0, 2.0]), + ), + pytest.param( + Mapper( + mappers=[ + Transformer( + source=Data( + transformer=Transformer( + function_args=[Path(path='a.b')] + ) + ), + function_args=[Path(path='.f')], + target=Data(path=Path(path='x')), + ) + ], + ), + dict(x=[1.0, 2.0]), + ), + pytest.param( + Mapper( + mappers=[ + Transformer( + source=Data( + transformer=Transformer( + function_args=[Path(path='b[0].c')] + ) + ), + function_args=[Path(path='.e')], + function_name='slice', + target=Data(path=Path(path='x')), + ) + ] + ), + dict(x=[3.0, 4.0]), + ), + pytest.param( + Mapper( + mappers=[ + Mapper( + source=Data(path=Path(path='a')), + mappers=[ + Mapper( + mappers=[ + Transformer( + function_args=[Path(path='.d[0].e')], + target=Data(path=Path(path='z')), + ) + ], + source=Data(path=Path(path='.b')), + indices=None, + target=Data(path=Path(path='y')), + ) + ], + target=Data(path=Path(path='x')), + ), + Transformer( + function_args=[Path(path='c[?n==`2`].v | [0]')], + target=Data(path=Path(path='x2')), + ), + ] + ), + dict(x=dict(y=dict(z='y')), x2='b'), + ), + ], + ) + def test_get_data(self, data, remove, mapper, expected): + source = deepcopy(data) + parser = ExampleParser(data=source) + mapper.remove = remove + result = mapper.get_data(source, parser) + assert_equal(expected, result) + if remove: + assert not mapper.get_data(source, parser) + + +class TestMappingParser: + def test_convert_xml_to_archive(self, xml_parser, archive_parser): + archive_parser.annotation_key = 'xml' + archive_parser.data_object = ExampleSection(b=[BSection(v=np.eye(2))]) + + xml_parser.convert(archive_parser, update_mode='append') + archive = archive_parser.data_object + assert len(archive.b) == 3 + assert archive.b[0].v[0][0] == 1.0 + assert archive.b[1].v[1][0] == 3.0 + assert archive.b[2].v[1][1] == 8.0 + assert archive.b[2].v2 == 'b' + assert archive.b2.c.i == 1 + assert archive.b2.c.e == 'f4' + assert archive.b2.c.g[1][2] == 8 + xml_parser.close() + + def test_convert_archive_to_xml(self, xml_parser, archive_parser): + archive_parser.data_object = ExampleSection(b=[BSection(v=np.eye(2))]) + xml_parser.mapper = Mapper( + mappers=[ + Mapper( + target=Data(path=Path(path='a')), + mappers=[ + Mapper( + target=Data(path=Path(path='.b1')), + mappers=[ + Transformer( + function_args=[Path(path='.v')], + target=Data(path=Path(path='.v')), + ) + ], + source=Data(path=Path(path='b')), + ) + ], + ) + ], + ) + xml_parser.filepath = None + archive_parser.convert(xml_parser) + assert xml_parser.data_object.findall('b1')[0].findall('v')[1].text == '0.0 1.0' + xml_parser.close() + + def test_convert_hdf5_to_archive(self, hdf5_parser, archive_parser): + archive_parser.annotation_key = 'hdf5' + archive_parser.data_object = ExampleSection(b=[BSection(v=np.eye(2))]) + hdf5_parser.convert(archive_parser, update_mode='merge') + archive = archive_parser.data_object + assert archive.b[0].v[1][1] == 1.0 + assert archive.b[0].v2 == 'y' + assert archive.b2.c.i == 6 + assert archive.b2.c.e == '2-1' + assert archive.b2.c.g[1][3] == 9 + assert archive.b2.b.v[0][1] == 1 + hdf5_parser.close() + + def test_convert_text_to_archive(self, text_parser, archive_parser): + archive_parser.annotation_key = 'text' + archive_parser.data_object = ExampleSection(b=[BSection(v=np.eye(2))]) + text_parser.convert(archive_parser, update_mode='replace') + archive = archive_parser.data_object + assert len(archive.b) == 3 + assert archive.b[0].v2 == '5.3.2' + assert archive.b[2].v3 == -7.14173545 + text_parser.close() diff --git a/tests/parsing/test_parsing.py b/tests/parsing/test_parsing.py index adfbece7378ccc90039d204d6b9a42ca1e502ba5..176679775b011990a7309df94b7dea15d2bc2e1f 100644 --- a/tests/parsing/test_parsing.py +++ b/tests/parsing/test_parsing.py @@ -19,13 +19,13 @@ import json import os from shutil import copyfile -from unittest.mock import patch, MagicMock +from unittest.mock import MagicMock, patch import pytest from nomad import files, utils from nomad.datamodel import EntryArchive -from nomad.parsing import BrokenParser, MatchingParserInterface, MatchingParser +from nomad.parsing import BrokenParser, MatchingParser, MatchingParserInterface from nomad.parsing.parsers import match_parser, parser_dict, parsers, run_parser from nomad.utils import dump_json diff --git a/tests/parsing/test_tabular.py b/tests/parsing/test_tabular.py index c1554851b48f49e518c3955794a164ecaca2fe12..7acf5017b8cbd3856ac4b7bb958a30f752878888 100644 --- a/tests/parsing/test_tabular.py +++ b/tests/parsing/test_tabular.py @@ -15,27 +15,24 @@ # See the License for the specific language governing permissions and # limitations under the License. # -from io import StringIO -from unittest.mock import MagicMock, patch -import pandas as pd -import pytest +import datetime import os import os.path import re -import datetime + +import pytest import yaml +from nomad import files from nomad.config import config -from nomad.datamodel.datamodel import EntryArchive, EntryMetadata from nomad.datamodel.context import ClientContext +from nomad.datamodel.datamodel import EntryArchive, EntryMetadata +from nomad.parsing.parser import ArchiveParser from nomad.parsing.tabular import read_table_data +from nomad.processing import Entry, ProcessStatus, Upload from nomad.utils import generate_entry_id, strip -from nomad.parsing.parser import ArchiveParser from tests.normalizing.conftest import run_normalize -from nomad.processing import Upload, Entry -from nomad.processing import ProcessStatus -from nomad import files def quantity_generator(quantity_name, header_name, shape, to_dict=False): diff --git a/tests/processing/test_base.py b/tests/processing/test_base.py index 3a3384ddc1f2c450ac1f5664c89f1ff566febc34..45fbe4560311a106976ca52e1cd65509564333f2 100644 --- a/tests/processing/test_base.py +++ b/tests/processing/test_base.py @@ -15,21 +15,20 @@ # See the License for the specific language governing permissions and # limitations under the License. # -import pytest -import json import random -import time import threading -from typing import List, Any, Union +import time +from typing import Any -from mongoengine import StringField, IntField, ListField +import pytest +from mongoengine import IntField, ListField, StringField from nomad.processing.base import ( Proc, ProcessAlreadyRunning, + ProcessStatus, process, process_local, - ProcessStatus, ) random.seed(0) diff --git a/tests/processing/test_data.py b/tests/processing/test_data.py index 121dc06fbfd1613b5cc187bd56a258617973d009..d538f39a91a3f52cfd8128a1bbbeda7da4edf1fd 100644 --- a/tests/processing/test_data.py +++ b/tests/processing/test_data.py @@ -16,40 +16,39 @@ # limitations under the License. # -from typing import Tuple, Dict -from collections.abc import Generator -import pytest +import json import os.path import re import shutil import zipfile -import json +from collections.abc import Generator + +import pytest import yaml -from nomad import utils, infrastructure +from nomad import infrastructure, utils +from nomad.archive import read_partial_archive_from_mongo, to_json from nomad.config import config from nomad.config.models.config import BundleImportSettings -from nomad.archive import read_partial_archive_from_mongo, to_json -from nomad.files import UploadFiles, StagingUploadFiles, PublicUploadFiles -from nomad.parsing.parser import Parser -from nomad.parsing import parsers from nomad.datamodel import ServerContext from nomad.datamodel.data import EntryData +from nomad.datamodel.datamodel import ArchiveSection, EntryArchive, EntryData +from nomad.files import PublicUploadFiles, StagingUploadFiles, UploadFiles from nomad.metainfo import Package, Quantity, Reference, SubSection -from nomad.processing import Upload, Entry, ProcessStatus -from nomad.search import search, refresh as search_refresh +from nomad.parsing import parsers +from nomad.parsing.parser import Parser +from nomad.processing import Entry, ProcessStatus, Upload +from nomad.search import refresh as search_refresh +from nomad.search import search from nomad.utils.exampledata import ExampleData -from nomad.datamodel.datamodel import EntryArchive, EntryData, ArchiveSection - -from tests.test_search import assert_search_upload from tests.test_files import ( assert_upload_files, - example_file_mainfile, example_file_aux, + example_file_mainfile, ) +from tests.test_search import assert_search_upload from tests.utils import create_template_upload_file, set_upload_entry_metadata - # Package with some metainfo schemas used only for testing. m_package = Package(name='test_schemas') diff --git a/tests/processing/test_edit_metadata.py b/tests/processing/test_edit_metadata.py index 6d53da2b890cc7743bfbb0f527e40ff97713dbc0..96963944b85296fcf2e4c2418964f14ca579a514 100644 --- a/tests/processing/test_edit_metadata.py +++ b/tests/processing/test_edit_metadata.py @@ -15,18 +15,17 @@ # See the License for the specific language governing permissions and # limitations under the License. # -import pytest from datetime import datetime +import pytest from fastapi.exceptions import RequestValidationError from nomad import datamodel, metainfo from nomad.metainfo.data_type import Datatype -from nomad.processing import Upload, MetadataEditRequestHandler +from nomad.processing import MetadataEditRequestHandler, Upload from nomad.processing.data import editable_metadata, mongo_upload_metadata from nomad.search import search - all_coauthor_metadata = dict( # All attributes which a coauthor+ can edit upload_name='a humble upload name', diff --git a/tests/processing/test_rfc3161.py b/tests/processing/test_rfc3161.py index e559a53246882e5154175d2a570a6bc61bd8ca48..28f1bcc80ba94bbadbdc0bd0192b2e84786cd96a 100644 --- a/tests/processing/test_rfc3161.py +++ b/tests/processing/test_rfc3161.py @@ -23,9 +23,9 @@ import httpx import pytest import rfc3161ng -from nomad.archive import write_archive, read_archive, to_json +from nomad.archive import read_archive, to_json, write_archive from nomad.datamodel.datamodel import RFC3161Timestamp -from nomad.processing.data import get_rfc3161_token, Entry +from nomad.processing.data import Entry, get_rfc3161_token @pytest.mark.parametrize( diff --git a/tests/states/archives/create_archives.py b/tests/states/archives/create_archives.py index bcee46b0a307349c098720aed9eb54741e85b34a..0b53682f6dbcf3b0d580c005c09522d197edcf6c 100644 --- a/tests/states/archives/create_archives.py +++ b/tests/states/archives/create_archives.py @@ -17,6 +17,7 @@ # import math + from nomad.utils.exampledata import create_entry_archive diff --git a/tests/states/entry.py b/tests/states/entry.py index 767a77b37087849e30d2dfa81b850bf6edb45490..bbd97182e0c9389beb0731d255391002118eb053 100644 --- a/tests/states/entry.py +++ b/tests/states/entry.py @@ -17,10 +17,12 @@ # import json -from nomad import infrastructure, files + +from nomad import files, infrastructure +from nomad.processing import Upload from nomad.utils.exampledata import ExampleData, create_entry_archive + from .archives.create_archives import archive_dft_bulk -from nomad.processing import Upload def dft(): diff --git a/tests/states/search.py b/tests/states/search.py index e3ef1fc19c585b0ceb5f4cea7d443f897b0047af..6a53a211cc259d5579aa653b461a6c9ab2ff5864 100644 --- a/tests/states/search.py +++ b/tests/states/search.py @@ -16,10 +16,11 @@ # limitations under the License. # from datetime import datetime, timedelta + from nomad import infrastructure -from nomad.utils import create_uuid -from nomad.units import ureg from nomad.atomutils import chemical_symbols +from nomad.units import ureg +from nomad.utils import create_uuid from nomad.utils.exampledata import ExampleData material_h2o = { diff --git a/tests/states/uploads.py b/tests/states/uploads.py index 52ae1ed4e99129aef629545183df7d84693ee127..a79aa893b8bcb3aac07d135887748803d01aca5b 100644 --- a/tests/states/uploads.py +++ b/tests/states/uploads.py @@ -16,11 +16,12 @@ # limitations under the License. # -from nomad import infrastructure, files +from nomad import files, infrastructure from nomad.processing import Upload from nomad.utils.exampledata import ExampleData + from .archives.create_archives import archive_dft_bulk -from .groups import init_gui_test_groups, delete_group +from .groups import delete_group, init_gui_test_groups default_access = {'coauthors': ['scooper'], 'reviewers': ['ttester']} twin_access = { diff --git a/tests/test_atomutils.py b/tests/test_atomutils.py index 97655c0eded200dafd7bea8bf237a0bf35a3dd85..e9dc5f5561a5d24803bcc0369b5a081cc73b5ecc 100644 --- a/tests/test_atomutils.py +++ b/tests/test_atomutils.py @@ -16,8 +16,9 @@ # limitations under the License. # import pytest + from nomad.atomutils import Formula -from nomad.datamodel.results import Material, ElementalComposition +from nomad.datamodel.results import ElementalComposition, Material @pytest.mark.parametrize( diff --git a/tests/test_cli.py b/tests/test_cli.py index 3ee028698b5e5098b2761c0f25f3066d413e869e..1350894136449b375906f4937adf8020ef4cfaf0 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -16,20 +16,22 @@ # limitations under the License. # -import pytest -import click.testing +import datetime import json import os -import datetime -import time import tempfile +import time -from nomad import processing as proc, files -from nomad.config import config -from nomad.search import search +import click.testing +import pytest + +from nomad import files +from nomad import processing as proc from nomad.cli import cli from nomad.cli.cli import POPO -from nomad.processing import Upload, Entry, ProcessStatus +from nomad.config import config +from nomad.processing import Entry, ProcessStatus, Upload +from nomad.search import search from nomad.utils.exampledata import ExampleData # TODO there is much more to test diff --git a/tests/test_client.py b/tests/test_client.py index 75956e8a604f67efc7dbdb7fdb0e54e2725a3037..35211bf701cd3aeed9391f3b6dc89707a19b5e68 100644 --- a/tests/test_client.py +++ b/tests/test_client.py @@ -15,29 +15,23 @@ # See the License for the specific language governing permissions and # limitations under the License. # +import json import os -from typing import List, Tuple -from httpx import AsyncClient import pytest -import json - +from httpx import AsyncClient from pydantic import ValidationError from nomad.app.main import app from nomad.client.archive import ArchiveQuery from nomad.datamodel import EntryArchive, User -from nomad.datamodel.metainfo import runschema, SCHEMA_IMPORT_ERROR -from nomad.datamodel.metainfo.annotations import ( - Rule, - Rules, -) +from nomad.datamodel.metainfo import SCHEMA_IMPORT_ERROR, runschema +from nomad.datamodel.metainfo.annotations import Rule, Rules from nomad.metainfo import MSection, SubSection from nomad.utils.json_transformer import Transformer from tests.fixtures.users import users from tests.processing import test_data as test_processing - # TODO: more tests diff --git a/tests/test_common.py b/tests/test_common.py index 919538e69ef708a3e7d1fcec9b9ac4a090f83316..432be865b7eb05c5376cb65f6fda6915865fbda0 100644 --- a/tests/test_common.py +++ b/tests/test_common.py @@ -17,7 +17,8 @@ # import pytest -from nomad.common import is_safe_relative_path, is_safe_path + +from nomad.common import is_safe_path, is_safe_relative_path @pytest.mark.parametrize( diff --git a/tests/test_config.py b/tests/test_config.py index 7cc0f5070576d2c59b4fed4bd1acf8cebadb24e2..20fbe5d18f437c9b80ab850331410861264a4da8 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -18,12 +18,13 @@ import os import re + import pytest import yaml from pydantic import ValidationError from nomad.config import load_config -from nomad.config.models.plugins import Parser, Schema, ParserEntryPoint +from nomad.config.models.plugins import Parser, ParserEntryPoint, Schema from nomad.utils import flatten_dict from .utils import assert_log diff --git a/tests/test_doi.py b/tests/test_doi.py index 60498040f7d707584a64bc152bcaf5729b268f54..bcb4e61df01b8f202029b884856f0fb0976f3f22 100644 --- a/tests/test_doi.py +++ b/tests/test_doi.py @@ -15,10 +15,12 @@ # See the License for the specific language governing permissions and # limitations under the License. # +from unittest.mock import MagicMock + +import pytest + from nomad.config import config from nomad.doi import DOI, DOIException -import pytest -from unittest.mock import MagicMock def test_create(mongo_function, user1, no_warn): diff --git a/tests/test_files.py b/tests/test_files.py index 77fc88fcf47e91ed4585cc9cc6df94a769059e7f..e2ca1d432ffd9f22b104eb4356b5d3b2a0d6a01b 100644 --- a/tests/test_files.py +++ b/tests/test_files.py @@ -16,31 +16,33 @@ # limitations under the License. # -from typing import Any, Dict, Tuple, List, Union -from collections.abc import Generator, Iterable -from datetime import datetime +import itertools import os import os.path +import pathlib +import re import shutil -import pytest -import itertools import zipfile -import re -import pathlib +from collections.abc import Generator, Iterable +from datetime import datetime +from typing import Any + +import pytest from nomad import datamodel, utils -from nomad.config import config from nomad.archive import to_json +from nomad.config import config from nomad.files import ( DirectoryObject, PathObject, - empty_zip_file_size, + PublicUploadFiles, + StagingUploadFiles, + UploadFiles, empty_archive_file_size, + empty_zip_file_size, ) -from nomad.files import StagingUploadFiles, PublicUploadFiles, UploadFiles from nomad.processing import Upload - EntryWithFiles = tuple[datamodel.EntryMetadata, str] UploadWithFiles = tuple[str, list[datamodel.EntryMetadata], UploadFiles] StagingUploadWithFiles = tuple[str, list[datamodel.EntryMetadata], StagingUploadFiles] diff --git a/tests/test_logtransfer.py b/tests/test_logtransfer.py index b67262fbeedf50b7db7a447f8e0e58da2374c5fb..122c727b07a823fff4cd0d3293b405c4f687c394 100644 --- a/tests/test_logtransfer.py +++ b/tests/test_logtransfer.py @@ -1,11 +1,12 @@ -import logging import json -import pytest +import logging import os.path +import pytest + from nomad import config, utils -from nomad.utils import structlogging from nomad.logtransfer import transfer_logs +from nomad.utils import structlogging @pytest.fixture(scope='function') diff --git a/tests/test_search.py b/tests/test_search.py index 06dd47db9e4621ff0bb0354be80d0c38ad692607..0e6eeb1f709b52d9fff347b04f1f0a243ffe8edf 100644 --- a/tests/test_search.py +++ b/tests/test_search.py @@ -17,9 +17,9 @@ # import json +from collections.abc import Iterable from datetime import datetime -from typing import Any, Dict, List, Union -from collections.abc import Iterable, Sequence +from typing import Any import pytest @@ -41,15 +41,8 @@ from nomad.metainfo.elasticsearch_extension import ( ) from nomad.metainfo.metainfo import Datetime, Quantity from nomad.metainfo.util import MEnum -from nomad.search import ( - AuthenticationRequiredError as ARE, -) -from nomad.search import ( - quantity_values, - refresh, - search, - update_by_query, -) +from nomad.search import AuthenticationRequiredError as ARE +from nomad.search import quantity_values, refresh, search, update_by_query from nomad.utils import deep_get from nomad.utils.exampledata import ExampleData from tests.variables import python_schema_name, yaml_schema_name diff --git a/tests/test_test.py b/tests/test_test.py index bbca3fb46962648c5bad3662940c5f5101f761d5..ef2353b7ac48692678fcf6afcc0de6017e366d3d 100644 --- a/tests/test_test.py +++ b/tests/test_test.py @@ -16,9 +16,10 @@ # limitations under the License. # -import pytest import logging +import pytest + @pytest.fixture() def my_caplog(caplog): diff --git a/tests/test_utils.py b/tests/test_utils.py index 1853751a0b4e5c4d6cea5de11ae5e5ed1fe5580f..11f38f65f18387fd3e9db8633888d4009e239b69 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -17,23 +17,23 @@ # import time -import json -import pytest +from importlib.metadata import PackageNotFoundError + import pandas as pd -import logging +import pytest -from nomad import utils, config +from nomad import files, utils from nomad.metainfo.metainfo import MSection, Quantity, SubSection -from nomad import files from nomad.processing import Upload from nomad.utils import ( - structlogging, - flatten_dict, - rebuild_dict, - prune_dict, + dataframe_to_dict, deep_get, dict_to_dataframe, - dataframe_to_dict, + flatten_dict, + nomad_distro_metadata, + prune_dict, + rebuild_dict, + structlogging, ) @@ -315,3 +315,55 @@ class TestDictDataFrameConverter: def test_invalid_input_type(self, invalid_input): with pytest.raises(ValueError, match='Input must be a dictionary'): dict_to_dataframe(invalid_input) + + +@pytest.mark.parametrize( + 'project_urls, version_str, expected_url', + [ + ( + ['repository, https://github.com/example/repo'], + '1.2.3+gabcdef', + 'https://github.com/example/repo/tree/abcdef', + ), + (['notrepository, https://github.com/example/repo'], '1.2.3+gabcdef', None), + ( + ['repository, https://github.com/example/repo'], + '1.2.3', + 'https://github.com/example/repo/tree/v1.2.3', + ), + ([], '1.2.3+gabcdef', None), + (['repository, '], '1.2.3+gabcdef', None), + ], +) +def test_nomad_distro_metadata(monkeypatch, project_urls, version_str, expected_url): + def mock_metadata(package_name): + class MockMetadata: + def get_all(self, key, default=[]): + if key == 'Project-URL': + return project_urls + return default + + return MockMetadata() + + def mock_version(package_name): + return version_str + + monkeypatch.setattr('nomad.utils.metadata', lambda x: mock_metadata(x)) + monkeypatch.setattr('nomad.utils.version', mock_version) + + actual_url = nomad_distro_metadata() + assert actual_url == expected_url + + +def test_nomad_distro_package_not_found(monkeypatch): + def mock_metadata(package_name): + raise PackageNotFoundError + + def mock_version(package_name): + return '1.2.3' + + monkeypatch.setattr('nomad.utils.metadata', lambda x: mock_metadata(x)) + monkeypatch.setattr('nomad.utils.version', mock_version) + + actual_url = nomad_distro_metadata() + assert actual_url is None diff --git a/tests/utils.py b/tests/utils.py index cfd95f936b141015af038242dcb117dbab630fd3..c24fe475952acda078d98d74bebfc980be751012 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -18,13 +18,12 @@ """Methods to help with testing of nomad@FAIRDI.""" -import json import os.path import urllib.parse import zipfile from logging import LogRecord -from typing import Any, Dict, List, Union -from structlog import get_logger +from typing import Any + import pytest