diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 7da3dfc7396c74d55120ee14c7eac41e05675d63..3e5448d5b29d1b77c6460aefc0533ce30aae7e72 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -1,27 +1,33 @@ -# default installed image for docker executor is: python:3.6 -# using an image that can do git, docker, docker-compose -image: gitlab-registry.mpcdf.mpg.de/nomad-lab/nomad-fair/ci-runner:latest +# Syntax reference: https://docs.gitlab.com/ci/yaml/ + +# Overview: +# Pipelines run only on merge requests, schedules, tags, or the develop branch (default). +# - A schedule pipeline (e.g. nightly build) runs all the jobs. +# - A tag pipeline runs most of the jobs but skips some (also, v#.#.# tags are special). +# - Merge requests and pushes to develop will run +# - few jobs if only `docs/` files changed, +# - all jobs if any other files changed. -# build directory inside -# https://gitlab.mpcdf.mpg.de/help/ci/runners/configure_runners.md#custom-build-directories +.anchors: + .non-docs-changes: &non-docs-changes + changes: + - examples + - gui + - nomad + - ops + - scripts + - tests + - "*" # all files in root + .manual-allow_failure: &manual-allow_failure + when: manual + allow_failure: true -# https://docs.gitlab.com/ee/ci/yaml/workflow.html -# https://docs.gitlab.com/ee/ci/variables/predefined_variables.html -# if: CI_COMMIT_BRANCH && CI_COMMIT_BEFORE_SHA == "0000000000000000000000000000000000000000" -# A branch pipeline, but it is the first commit for that branch -# if: $CI_COMMIT_BRANCH && $CI_OPEN_MERGE_REQUESTS && $CI_PIPELINE_SOURCE == "push" -# For an existing workflow section to switch from branch pipelines to merge request pipelines when a merge request is created. -# if: $CI_COMMIT_BRANCH && $CI_OPEN_MERGE_REQUESTS -# A branch pipeline, but a merge request is open for that branch, do not run the branch pipeline. -# if: $CI_PIPELINE_SOURCE == "merge_request_event" -# A merge request pipeline, start the pipeline. -# if: $CI_COMMIT_BRANCH -# A branch pipeline, but there is no merge request open for the branch, run the branch pipeline. +# using an image that can do git, docker, docker-compose +image: gitlab-registry.mpcdf.mpg.de/nomad-lab/nomad-fair/ci-runner:latest default: tags: - # Necessary to select the right CI runner - - cloud + - cloud # Necessary to select the right CI runner variables: DOCKER_TAG: ${CI_COMMIT_REF_SLUG} @@ -38,7 +44,6 @@ workflow: - if: $CI_COMMIT_TAG variables: DOCKER_TAG: ${CI_COMMIT_REF_NAME} - - when: never stages: - build @@ -46,6 +51,8 @@ stages: - deploy - release +# JOBS + update changelog: stage: build script: @@ -76,6 +83,9 @@ build gui: variables: TARGET: dev_node DESTINATION: "${CI_REGISTRY_IMAGE}/dev_node:${DOCKER_TAG}" + rules: + - if: $CI_PIPELINE_SOURCE == "schedule" || $CI_COMMIT_TAG + - <<: *non-docs-changes # kaniko image doesn't contain pip, so we have to save the scm_pretend_version in an earlier job and reuse it later update_scm_pretend_version: @@ -93,6 +103,9 @@ build python: variables: TARGET: dev_python DESTINATION: "${CI_REGISTRY_IMAGE}/dev_python:${DOCKER_TAG}" + rules: + - if: $CI_PIPELINE_SOURCE == "schedule" || $CI_COMMIT_TAG + - <<: *non-docs-changes python linting: stage: test @@ -105,7 +118,7 @@ python linting: rules: - if: $CI_COMMIT_TAG when: never - - when: on_success + - <<: *non-docs-changes artifacts: name: "nomad_code_quality" when: always @@ -119,8 +132,9 @@ python package clean up: script: - python scripts/cleanup_packages.py rules: - - when: manual - allow_failure: true + - if: $CI_PIPELINE_SOURCE == "schedule" || $CI_COMMIT_TAG + <<: *manual-allow_failure + - <<: [*non-docs-changes, *manual-allow_failure] check python dependencies: stage: test @@ -130,8 +144,9 @@ check python dependencies: rules: - if: $CI_COMMIT_TAG when: never - - when: manual - allow_failure: true + - if: $CI_PIPELINE_SOURCE == "schedule" + <<: *manual-allow_failure + - <<: [*non-docs-changes, *manual-allow_failure] .base_test: image: ${CI_REGISTRY_IMAGE}/dev_python:${DOCKER_TAG} @@ -168,15 +183,14 @@ generate pytest timings: - python -m pytest --store-durations artifacts: expire_in: 1 days - when: on_success paths: - .test_durations - rules: - if: $CI_COMMIT_TAG when: never - - when: manual - allow_failure: true + - if: $CI_PIPELINE_SOURCE == "schedule" + <<: *manual-allow_failure + - <<: [*non-docs-changes, *manual-allow_failure] python tests: parallel: 3 @@ -187,13 +201,13 @@ python tests: - cp .coverage .coverage_${CI_NODE_INDEX} artifacts: expire_in: 1 days - when: on_success paths: - .coverage_${CI_NODE_INDEX} rules: - if: $CI_COMMIT_TAG when: never - - when: on_success + - if: $CI_PIPELINE_SOURCE == "schedule" + - <<: *non-docs-changes python coverage report: stage: test @@ -213,7 +227,8 @@ python coverage report: rules: - if: $CI_COMMIT_TAG when: never - - when: on_success + - if: $CI_PIPELINE_SOURCE == "schedule" + - <<: *non-docs-changes gui linting: stage: test @@ -227,7 +242,8 @@ gui linting: rules: - if: $CI_COMMIT_TAG when: never - - when: on_success + - if: $CI_PIPELINE_SOURCE == "schedule" + - <<: *non-docs-changes generate gui artifacts: stage: test @@ -239,7 +255,8 @@ generate gui artifacts: rules: - if: $CI_COMMIT_TAG when: never - - when: on_success + - if: $CI_PIPELINE_SOURCE == "schedule" + - <<: *non-docs-changes artifacts: paths: - gui/tests/env.js @@ -278,7 +295,8 @@ gui tests: rules: - if: $CI_COMMIT_TAG when: never - - when: on_success + - if: $CI_PIPELINE_SOURCE == "schedule" + - <<: *non-docs-changes build python package: stage: test @@ -301,13 +319,12 @@ build python package: - cp /app/tests/data/examples/example.out $CI_PROJECT_DIR/ artifacts: expire_in: 1 days - when: on_success paths: - dist/ - archive.json - example.out -install tests: +python package install tests: stage: test parallel: matrix: @@ -327,8 +344,11 @@ install tests: - python -m nomad.cli parse --skip-normalizers archive.json - uv pip install git+https://github.com/nomad-coe/nomad-parser-example.git@ba6027fdd4cda0cf9e0b32546bd809c8fdda79e6 - python -m exampleparser example.out + rules: + - if: $CI_PIPELINE_SOURCE == "schedule" || $CI_COMMIT_TAG + - <<: *non-docs-changes -python package: +release python package: stage: release image: ghcr.io/astral-sh/uv:$UV_VERSION-python$PYTHON_VERSION-bookworm variables: @@ -336,12 +356,11 @@ python package: script: uv publish -u gitlab-ci-token -p ${CI_JOB_TOKEN} --publish-url https://gitlab.mpcdf.mpg.de/api/v4/projects/${CI_PROJECT_ID}/packages/pypi dist/nomad-lab-*.tar.gz rules: - if: $CI_COMMIT_BRANCH == "develop" && $NIGHTLY - when: on_success - - when: manual - allow_failure: true - if: $CI_COMMIT_TAG + <<: *manual-allow_failure + - <<: [*non-docs-changes, *manual-allow_failure] -pypi package: +release pypi package: stage: release variables: GIT_STRATEGY: none @@ -350,9 +369,8 @@ pypi package: script: twine upload -u $CI_TWINE_USER -p $CI_TWINE_PASSWORD dist/nomad-lab-*.tar.gz rules: - if: $CI_COMMIT_TAG - when: manual - allow_failure: true - - when: never + <<: *manual-allow_failure + - <<: [*non-docs-changes, *manual-allow_failure] push to github: stage: release diff --git a/docs/howto/develop/code.md b/docs/howto/develop/code.md index 51e82c94db6cfcd7855d7d6a06f1aeeec7e49ce7..f74869a9feb7a510b1ffb7ed7c0b4d90a3d46d5f 100644 --- a/docs/howto/develop/code.md +++ b/docs/howto/develop/code.md @@ -5,25 +5,32 @@ about the codebase and ideas about what to look at first. ## Git Projects -There is one [main NOMAD project](https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-FAIR){:target="_blank"} -(and its [fork on GitHub](https://github.com/nomad-coe/nomad){:target="_blank"}). This project contains -all the framework and infrastructure code. It instigates all checks, builds, and -deployments for the public NOMAD service, the NOMAD Oasis, and the `nomad-lab` Python -package. All contributions to NOMAD have to go through this project eventually. +There is one +[main NOMAD project](https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-FAIR){:target="_blank"} +(and its [fork on GitHub](https://github.com/nomad-coe/nomad){:target="_blank"}). +This project contains all the framework and infrastructure code. It instigates all checks, +builds, and deployments for the public NOMAD service, the NOMAD Oasis, and the `nomad-lab` +Python package. All contributions to NOMAD have to go through this project eventually. All (Git) projects that NOMAD depends on are either a Git submodule (you find them all in the `dependencies` directory or its subdirectories) or they are listed as PyPI packages in the `pyproject.toml` of the main project (or one of its submodules). -You can also have a look at the [built-in plugins](../../reference/plugins.md) that constitute the majority of these projects. The only other projects are [MatID](https://github.com/nomad-coe/matid){:target="_blank"}, [DOS fingerprints](https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-dos-fingerprints){:target="_blank"}, and the [NOMAD Remote Tools Hub](https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-remote-tools-hub){:target="_blank"}. +You can also have a look at the [built-in plugins](../../reference/plugins.md) that +constitute the majority of these projects. The only other projects are +[MatID](https://github.com/nomad-coe/matid){:target="_blank"}, +[DOS fingerprints](https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-dos-fingerprints){:target="_blank"}, +and the +[NOMAD Remote Tools Hub](https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-remote-tools-hub){:target="_blank"}. !!! note - The GitLab organization [nomad-lab](https://gitlab.mpcdf.mpg.de/nomad-lab){:target="_blank"} and the - GitHub organizations for [FAIRmat](https://github.com/fairmat-nfdi){:target="_blank"} and the - [NOMAD CoE](https://github.com/nomad-coe){:target="_blank"} all represent larger infrastructure and - research projects, and they include many other Git projects that are not related. - When navigating the codebase, only follow the submodules. + The GitLab organization + [nomad-lab](https://gitlab.mpcdf.mpg.de/nomad-lab){:target="_blank"} and the GitHub + organizations for [FAIRmat](https://github.com/fairmat-nfdi){:target="_blank"} and the + [NOMAD CoE](https://github.com/nomad-coe){:target="_blank"} all represent larger + infrastructure and research projects, and they include many other Git projects that + are not related. When navigating the codebase, only follow the submodules. ## Python code @@ -39,22 +46,26 @@ There are three main directories with Python code: The `nomad` directory contains the following "main" modules. This list is not extensive but should help you to navigate the codebase: -- `app`: The [FastAPI](https://fastapi.tiangolo.com/){:target="_blank"} APIs: v1 and v1.2 NOMAD APIs, - [OPTIMADE](https://www.optimade.org/){:target="_blank"}, [DCAT](https://www.w3.org/TR/vocab-dcat-2/){:target="_blank"}, +- `app`: The [FastAPI](https://fastapi.tiangolo.com/){:target="_blank"} APIs: v1 and v1.2 + NOMAD APIs, + [OPTIMADE](https://www.optimade.org/){:target="_blank"}, + [DCAT](https://www.w3.org/TR/vocab-dcat-2/){:target="_blank"}, [h5grove](https://github.com/silx-kit/h5grove){:target="_blank"}, and more. - `archive`: Functionality to store and access archive files. This is the storage format for all processed data in NOMAD. See also the docs on [structured data](../../explanation/data.md). -- `cli`: The command line interface (based on [Click](https://click.palletsprojects.com){:target="_blank"}). - Subcommands are structured into submodules. +- `cli`: The command line interface (based on + [Click](https://click.palletsprojects.com){:target="_blank"}). Subcommands are + structured into submodules. - `config`: NOMAD is configured through the `nomad.yaml` file. This contains all the - ([Pydantic](https://docs.pydantic.dev/){:target="_blank"}) models and default config parameters. + ([Pydantic](https://docs.pydantic.dev/){:target="_blank"}) models and default config + parameters. -- `datamodel`: The built-in schemas (e.g. `nomad.datamodel.metainfo.workflow` used to construct - workflows). The base sections and section for the shared entry structure. +- `datamodel`: The built-in schemas (e.g. `nomad.datamodel.metainfo.workflow` used to + construct workflows). The base sections and section for the shared entry structure. See also the docs on the [datamodel](../../explanation/data.md) and [processing](../../explanation/basics.md). @@ -68,7 +79,8 @@ but should help you to navigate the codebase: [processing](../../explanation/basics.md#parsing). - `processing`: It's all about processing uploads and entries. The interface to - [Celery](https://docs.celeryq.dev/en/stable/){:target="_blank"} and [MongoDB](https://www.mongodb.com). + [Celery](https://docs.celeryq.dev/en/stable/){:target="_blank"} and + [MongoDB](https://www.mongodb.com). - `units`: The unit and unit conversion system based on [Pint](https://pint.readthedocs.io){:target="_blank"}. @@ -84,16 +96,18 @@ but should help you to navigate the codebase: ## GUI code -The NOMAD UI is written as a [React](https://react.dev/){:target="_blank"} single-page application (SPA). It -uses (among many other libraries) [MUI](https://mui.com/){:target="_blank"}, -[Plotly](https://plotly.com/python/){:target="_blank"}, and [D3](https://d3js.org/){:target="_blank"}. The GUI code is -maintained in the `gui` directory. Most relevant code can be found in -`gui/src/components`. The application entry point is `gui/src/index.js`. +The NOMAD UI is written as a [React](https://react.dev/){:target="_blank"} single-page +application (SPA). It uses (among many other libraries) +[MUI](https://mui.com/){:target="_blank"}, +[Plotly](https://plotly.com/python/){:target="_blank"}, and +[D3](https://d3js.org/){:target="_blank"}. The GUI code is maintained in the `gui` +directory. Most relevant code can be found in `gui/src/components`. The application entry +point is `gui/src/index.js`. ## Documentation -The documentation is based on [MkDocs](https://www.mkdocs.org/){:target="_blank"}. The important files -and directories are: +The documentation is based on [MkDocs](https://www.mkdocs.org/){:target="_blank"}. The +important files and directories are: - `docs`: Contains all the Markdown files that contribute to the documentation system. @@ -101,7 +115,8 @@ and directories are: added here as well. - `nomad/mkdocs.py`: Python code that defines - [macros](https://mkdocs-macros-plugin.readthedocs.io/){:target="_blank"} which can be used in Markdown. + [macros](https://mkdocs-macros-plugin.readthedocs.io/){:target="_blank"} which can be + used in Markdown. ## Other top-level directories diff --git a/docs/howto/programmatic/api.md b/docs/howto/programmatic/api.md index a6789ec51cd3247570d25c7ed70ba103e928c3c4..3a20fa79418d6a2693cd7e116a655d97502a98ac 100644 --- a/docs/howto/programmatic/api.md +++ b/docs/howto/programmatic/api.md @@ -8,14 +8,17 @@ To access the processed data with our client library `nomad-lab` follow ## Different options to use the API -NOMAD offers all its functionality through application -programming interfaces (APIs). More specifically [RESTful HTTP APIs](https://en.wikipedia.org/wiki/Representational_state_transfer){:target="_blank"} that allows you -to use NOMAD as a set of resources (think data) that can be uploaded, accessed, downloaded, -searched for, etc. via [HTTP requests](https://en.wikipedia.org/wiki/Hypertext_Transfer_Protocol){:target="_blank"}. - -You can get an overview on all NOMAD APIs on the [API page]({{ nomad_url() }}../../gui/analyze/apis). -We will focus here on NOMAD's main API (v1). In fact, this API is also used by -the web interface and should provide everything you need. +NOMAD offers all its functionality through application programming interfaces (APIs). More +specifically +[RESTful HTTP APIs](https://en.wikipedia.org/wiki/Representational_state_transfer){:target="_blank"} +that allows you to use NOMAD as a set of resources (think data) that can be uploaded, +accessed, downloaded, searched for, etc. via +[HTTP requests](https://en.wikipedia.org/wiki/Hypertext_Transfer_Protocol){:target="_blank"}. + +You can get an overview on all NOMAD APIs on the +[API page]({{ nomad_url() }}../../gui/analyze/apis). We will focus here on NOMAD's main +API (v1). In fact, this API is also used by the web interface and should provide +everything you need. There are different tools and libraries to use the NOMAD API that come with different trade-offs between expressiveness, learning curve, and convenience. @@ -29,6 +32,7 @@ For example to see the metadata for all entries with elements *Ti* and *O* go he REST API's use resources located via URLs. You access URLs with `curl` or `wget`. Same *Ti*, *O* example as before: + ```sh curl "{{ nomad_url() }}/v1/entries?results.material.elements=Ti&results.material.elements=O" | python -m json.tool ``` @@ -41,8 +45,8 @@ See [the initial example](#using-request). #### Use our dashboard -The NOMAD API has an [OpenAPI dashboard]({{ nomad_url() }}/v1). This is an interactive documentation of all -API functions that allows you to try these functions in the browser. +The NOMAD API has an [OpenAPI dashboard]({{ nomad_url() }}/v1). This is an interactive +documentation of all API functions that allows you to try these functions in the browser. #### Use NOMAD's Python package @@ -118,10 +122,10 @@ This will give you something like this: } ``` -The `entry_id` is a unique identifier for, well, entries. You can use it to access -other entry data. For example, you want to access the entry's archive. More -precisely, you want to gather the formula and energies from the main workflow result. -The following requests the archive based on the `entry_id` and only requires some archive sections. +The `entry_id` is a unique identifier for, well, entries. You can use it to access other +entry data. For example, you want to access the entry's archive. More precisely, you want +to gather the formula and energies from the main workflow result. The following requests +the archive based on the `entry_id` and only requires some archive sections. ```py first_entry_id = response_json['data'][0]['entry_id'] @@ -222,11 +226,13 @@ The result will look like this: } ``` -You can work with the results in the given JSON (or respective Python dict/list) data already. -If you have [NOMAD's Python library](./pythonlib.md) installed , -you can take the archive data and use the Python interface. -The [Python interface](../plugins/schema_packages.md#wrap-data-with-python-schema-classes) will help with code-completion (e.g. in notebook environments), -resolve archive references (e.g. from workflow to calculation to system), and allow unit conversion: +You can work with the results in the given JSON (or respective Python dict/list) data +already. If you have [NOMAD's Python library](./pythonlib.md) installed, you can take the +archive data and use the Python interface. The +[Python interface](../plugins/schema_packages.md#wrap-data-with-python-schema-classes) +will help with code-completion (e.g. in notebook environments), resolve archive references +(e.g. from workflow to calculation to system), and allow unit conversion: + ```py from nomad.datamodel import EntryArchive from nomad.metainfo import units @@ -238,6 +244,7 @@ print(result.energy.total.value.to(units('eV'))) ``` This will give you an output like this: + ``` OOSrTiOOOSrTiOOOSrTiOFF -355626.93095025205 electron_volt @@ -252,7 +259,8 @@ the API: - Raw files, the files as they were uploaded to NOMAD. - Archive data, all of the extracted data for an entry. -There are also different entities (see also [Datamodel](../../explanation/basics.md)) with different functions in the API: +There are also different entities (see also [Datamodel](../../explanation/basics.md)) +with different functions in the API: - Entries - Uploads @@ -275,17 +283,20 @@ Let's discuss some of the common concepts. ### Response layout -Functions that have a JSON response, will have a common layout. First, the response will contain all keys and values of the request. The request is not repeated verbatim, but -in a normalized form. Abbreviations in search queries might be expanded, default values for optional parameters are added, or additional response specific information -is included. Second, the response will contain the results under the key `data`. +Functions that have a JSON response, will have a common layout. First, the response will +contain all keys and values of the request. The request is not repeated verbatim, but +in a normalized form. Abbreviations in search queries might be expanded, default values +for optional parameters are added, or additional response specific information is +included. Second, the response will contain the results under the key `data`. ### Owner -All functions that allow a query will also allow to specify the `owner`. Depending on -the API function, its default value will be mostly `visible`. Some values are only -available if you are [logged in](#authentication). +All functions that allow a query will also allow to specify the `owner`. Depending on the +API function, its default value will be mostly `visible`. Some values are only available +if you are [logged in](#authentication). {{ doc_snippet('owner')}} + ### Queries {{ doc_snippet('query') }} @@ -293,10 +304,11 @@ available if you are [logged in](#authentication). ### Pagination When you issue a query, usually not all results can be returned. Instead, an API returns -only one *page*. This behavior is controlled through pagination parameters, -like `page_site`, `page`, `page_offset`, or `page_after_value`. +only one *page*. This behavior is controlled through pagination parameters, like +`page_site`, `page`, `page_offset`, or `page_after_value`. Let's consider a search for entries as an example. + ```py response = requests.post( f'{base_url}/entries/query', @@ -313,8 +325,9 @@ response = requests.post( ) ``` -This will only result in a response with a maximum of 10 entries. The response will contain a -`pagination` object like this: +This will only result in a response with a maximum of 10 entries. The response will +contain a `pagination` object like this: + ```json { "page_size": 10, @@ -345,10 +358,11 @@ response = requests.post( } ) ``` + You will get the next 10 results. -Here is a full example that collects the first 100 formulas from entries that match -a certain query by paginating. +Here is a full example that collects the first 100 formulas from entries that match a +certain query by paginating. ```python --8<-- "examples/docs/api/pagination.py" @@ -357,7 +371,8 @@ a certain query by paginating. ### Authentication Most of the API operations do not require any authorization and can be freely used -without a user or credentials. However, to upload, edit, or view your own and potentially unpublished data, the API needs to authenticate you. +without a user or credentials. However, to upload, edit, or view your own and potentially +unpublished data, the API needs to authenticate you. The NOMAD API uses OAuth and tokens to authenticate users. We provide simple operations that allow you to acquire an *access token* via username and password: @@ -432,9 +447,9 @@ curl "{{ nomad_url() }}/v1/entries/raw?results.material.elements=Ti&results.mate ``` ## Access processed data (archives) -Above under [using requests](#using-request), you've already learned how to access -archive data. A special feature of the archive API functions is that you can define what is `required` -from the archives. +Above under [using requests](#using-request), you've already learned how to access archive +data. A special feature of the archive API functions is that you can define what is +`required` from the archives. ```py response = requests.post( @@ -482,13 +497,14 @@ or 10 concurrent requests. Consider to use endpoints that allow you to retrieve full pages of resources, instead of endpoints that force you to access resources one at a time. -See also the sections on [types of data](#different-kinds-of-data) and [pagination](#pagination). +See also the sections on [types of data](#different-kinds-of-data) and +[pagination](#pagination). However, pagination also has its limits and you might ask for pages that are too large. -If you get responses in the 400 range, e.g. **422 Unprocessable Content** or **400 Bad request**, -you might hit an api limit. Those responses are typically accompanied by an error message -in the response body that will inform you about the limit, e.g. the maximum allowed -page size. +If you get responses in the 400 range, e.g. **422 Unprocessable Content** or +**400 Bad request**, you might hit an api limit. Those responses are typically accompanied +by an error message in the response body that will inform you about the limit, e.g. the +maximum allowed page size. ## User Groups