diff --git a/docs/api.md b/docs/api.md index 6b55af62ae75dd0bcee56ffec58376ec7b7a9797..04c5f5d2774b00c709edbec37bf3d6117427b9cf 100644 --- a/docs/api.md +++ b/docs/api.md @@ -397,7 +397,7 @@ curl "{{ nomad_url() }}/v1/entries/raw/download?results.material.elements=Ti&res ## Access archives Above under [getting started](#getting started), you'll already learned how to access -archive data. A speciality of archive API functions is that you can define is `required` +archive data. A speciality of archive API functions is that you can define what is `required` from the archives. ``` @@ -419,4 +419,6 @@ response = requests.post( }) ``` -{{ doc_snippet('archive-required') }} \ No newline at end of file +{{ doc_snippet('archive-required') }} + +{{ metainfo_data() }} \ No newline at end of file diff --git a/docs/pythonlib.md b/docs/pythonlib.md index 514ba87cdb8d7829971e61fc987065ba1f11151e..9ce5ec085ad7d9b84820450fce4b933d20fa3221 100644 --- a/docs/pythonlib.md +++ b/docs/pythonlib.md @@ -1,3 +1,202 @@ -# The NOMAD Python package -- nomad-lab +# Using the Python library -comming soon ... \ No newline at end of file +NOMAD provides a Python package called `nomad-lab`. +## Install + +The package is hosted on [pypi](https://pypi.org/project/nomad-lab/) +and you can install it with *pip* (or conda). + +To install the latest stable pypi release, simply use pip: +```sh +pip install nomad-lab +``` + +Since NOMAD v1 is still in beta, this will still give you the Python library for +the NOMAD v0.10.x version. + +To install the latest release developer release (e.g. v1) from our servers use: +```sh +pip install nomad-lab --extra-index-url https://gitlab.mpcdf.mpg.de/api/v4/projects/2187/packages/pypi/simple +``` + +There are different layers of dependencies that you have to install, in order to use certain functions of NOMAD. +The base install above, will only install the necessary packages for +accessing the NOMAD Archive and use the NOMAD metainfo (see access the archive). + +Other functions, e.g. using the NOMAD parsers to parse your code output, require additional dependencies. +You can use the [extra] notation to install these extra requirements: + +``` +pip install nomad-lab[parsing] +pip install nomad-lab[infrastructure] +pip install nomad-lab[dev] +pip install nomad-lab[all] +``` +The various extras have the following meaning: + +- *parsing*, everything necessary to run the parsers + +- *infrastructure*, everything to run NOMAD services + +- *dev*, additional tools that are necessary to develop NOMAD + +- *all*, all of the above + +## Access parsed NOMAD data with `ArchiveQuery` + +The `ArchiveQuery` allows you to search for entries and access their parsed *archive* data +at the same time. Furthermore, all data is accessible through a convenient Python interface +based on the [NOMAD metainfo](metainfo.html) rather than plain JSON. + +Here is an example: +```py +query = ArchiveQuery( + query={ + 'results.method.simulation.program_name': 'VASP', + 'results.material.elements': ['Ti', 'O'], + 'results.method.simulation.geometry_optimization': { + 'convergence_tolerance_energy_difference:lt': 1e-22 + } + }, + required={ + 'workflow': { + 'calculation_result_ref': { + 'energy': '*', + 'system_ref': { + 'chemical_composition_reduced': '*' + } + } + } + }, + parallel=10, + max=100) +``` + +This instantiates an `ArchiveQuery`. You can print some details about the query: + +```py +print(query) +``` + +This gives you a general overview about the query. For example what search is used on +the NOMAD API. How many entries were found. What was already downloaded, etc. +``` +Query: { + "and": [ + { + "results.method.simulation.program_name": "VASP", + "results.material.elements": [ + "Ti", + "O" + ], + "results.method.simulation.geometry_optimization": { + "convergence_tolerance_energy_difference:lt": 1e-22 + } + }, + { + "quantities": [ + "run.system.chemical_composition_reduced", + "run.calculation.system_ref", + "run.calculation.energy", + "workflow", + "workflow.calculation_result_ref" + ] + } + ] +} +Total number of entries that fulfil the query: 252 +Number queried entries: 252 +Number of entries loaded in the last api call: 70 +Bytes loaded in the last api call: 53388 +Bytes loaded from this query: 53388 +Number of downloaded entries: 70 +Number of made api calls: 1 +``` + +This `ArchiveQuery` is not downloaded all archive data immediately. More and more data +will be downloaded as you iterate through the query: +```py +for result in query: + calc = result.workflow[0].calculation_result_ref + formula = calc.system_ref.chemical_composition_reduced + total_energy = calc.energy.total.value.to(units.eV) + print(f'{formula}: {total_energy}') +``` + +The resulting output can look like this: +``` +O10K2Ti3La2: -136.76387842 electron_volt +Li2O10Ti3La2: -139.15455203 electron_volt +O8Ti4: -107.30373862 electron_volt +O8Ca2Ti4: -116.52240913000001 electron_volt +... +``` + +Let's discuss the used `ArchiveQuery` parameters: + +- `query`, this is an arbitrary API query as discussed in the under [Queries in the API section](api.html#queries). +- `required`, this optional parameter allows you to specify what parts of an archive you need. This is also +described in under [Access archives in API section](api.html#access-archives). +- `per_page`, with this optional parameter you can determine, how many results are downloaded at a time. For bulk downloading many results, we recommend ~100. If you are just interested in the first results a lower number might increase performance. +- `max`, with this optional parameter, we limit the maximum amount of entries that are downloaded, just to avoid accidentally iterating through a result set of unknown and potentially large size. +- `owner` and `auth`, allows you to access private data or specify you only want to +query your data. See also [owner](api.html#owner) and [auth](api.html#authentication) in the API section. Her is an example with authentication: +```py +from nomad.client import ArchiveQuery, Auth + +query = ArchiveQuery( + owner='user', + required={ + 'run': { + 'system[-1]': '*' + } + }, + authentication=Auth(user='yourusername', password='yourpassword')) +``` + +The archive query object can be treated as a Python list-like. You use indices and ranges to select results. Each result is a Python object. The attributes of these objects are +determined by NOMAD's schema, [the metainfo and it's Python interface](metainfo). +This energy value is a number with an attached unit (Joule), which can be converted to something else (e.g. eV). {{ metainfo_data() }} + +The create query object keeps all results in memory. Keep this in mind, when you are accessing a large amount of query results. + +## Use NOMAD parser locally + +If you install `nomad-lab[parsers]`, you can use the NOMAD parsers locally on your computer. +To use the NOMAD parsers from the command line, you can use the parse CLI command. The parse command will automatically match the right parser to your code output file and run the parser. There are two output formats, `--show-metadata` (a JSON representation of the basic metadata), `--show-archive` (a JSON representation of the full parse results). + +```sh +nomad parser --show-archive <path-to-your-mainfile-code-output-file> +``` + +You can also use the NOMAD parsers from within Python. This will give you the parse results as metainfo objects to conveniently analyse the results in Python. See metainfo for more details on how to use the metainfo in Python. + +```python +import sys +from nomad.client import parse, normalize_all + +# match and run the parser +archive = parse(sys.argv[1]) +# run all normalizers +normalize_all(archive) + +# get the 'main section' section_run as a metainfo object +section_run = archive.run[0] + +# get the same data as JSON serializable Python dict +python_dict = section_run.m_to_dict() +``` + + +You can also clone a parser project and use this to debug or fix a parser: +```sh +git clone https://github.com/nomad-coe/nomad-parser-vasp.git +cd nomad-parser-vasp +git checkout metainfo-refactor +python -m nomad.cli nomad parser --show-archive <path-to-your-vasp-file> +``` + +Our parsers are hosted in github. They are in the [nomad-coe](https://github.com/nomad-coe) organization. They are typically named `nomad-parser-<code-name>`. The parser version +that fits the NOMAD v1 metainfo schema is typically in the `metainfo-refactor` branch. +Run the CLI with `python -m nomad.cli` to automatically include the current working directory +in the Python path. This will use the cloned parser code over the installed parser code. \ No newline at end of file diff --git a/examples/archive/archive_query.py b/examples/archive/archive_query.py index 6c651688489efffbd9c27a1a6de09b15e414dc05..dea765eed240d8ed2400d09576e01c5478def17a 100644 --- a/examples/archive/archive_query.py +++ b/examples/archive/archive_query.py @@ -3,24 +3,33 @@ A simple example used in the NOMAD webinar API tutorial ''' from nomad.client import ArchiveQuery +from nomad.metainfo import units query = ArchiveQuery( - url='http://nomad-lab.eu/prod/rae/api', query={ - 'dft.code_name': 'VASP', - 'atoms': ['Ti', 'O'] + 'results.method.simulation.program_name': 'VASP', + 'results.material.elements': ['Ti', 'O'], + 'results.method.simulation.geometry_optimization': { + 'convergence_tolerance_energy_difference:lt': 1e-22, + } }, required={ - 'section_run': { - 'section_single_configuration_calculation[-1]': { - 'energy_total': '*', - 'section_dos': '*' + 'workflow': { + 'calculation_result_ref': { + 'energy': '*', + 'system_ref': { + 'chemical_composition_reduced': '*' + } } } }, - parallel=1, - max=10) + parallel=10, + max=100) print(query) -result = query[0] -print(result.section_run[0].section_single_configuration_calculation[-1].section_dos[0].dos_energies) + +for result in query: + calc = result.workflow[0].calculation_result_ref + formula = calc.system_ref.chemical_composition_reduced + total_energy = calc.energy.total.value.to(units.eV) + print(f'{formula}: {total_energy}') diff --git a/examples/archive/authenticated.py b/examples/archive/authenticated.py new file mode 100644 index 0000000000000000000000000000000000000000..ee9d5966b168689fb1a382f063893c40efa94775 --- /dev/null +++ b/examples/archive/authenticated.py @@ -0,0 +1,16 @@ +''' +A simple example used in the NOMAD webinar API tutorial +''' + +from nomad.client import ArchiveQuery, Auth + +query = ArchiveQuery( + owner='user', + required={ + 'run': { + 'system[-1]': '*' + } + }, + authentication=Auth(user='yourusername', password='yourpassword')) + +print(query) diff --git a/examples/archive/client.ipynb b/examples/archive/client.ipynb deleted file mode 100644 index 5a1a3d29aaa9ba7c2fc3074a7a3188d1b5300e72..0000000000000000000000000000000000000000 --- a/examples/archive/client.ipynb +++ /dev/null @@ -1,248 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "ExecuteTime": { - "end_time": "2020-04-07T16:02:49.442840Z", - "start_time": "2020-04-07T16:02:49.414197Z" - } - }, - "outputs": [], - "source": [ - "from nomad import config\n", - "# this will not be necessary, once this is the official NOMAD version\n", - "config.client.url = 'http://labdev-nomad.esc.rzg.mpg.de/fairdi/nomad/testing-major/api'" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": { - "ExecuteTime": { - "end_time": "2020-04-07T16:02:51.148955Z", - "start_time": "2020-04-07T16:02:49.788395Z" - } - }, - "outputs": [], - "source": [ - "import matplotlib.pyplot as plt\n", - "\n", - "from nomad.client import query_archive\n", - "from nomad.metainfo import units" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": { - "ExecuteTime": { - "end_time": "2020-04-07T16:02:55.371271Z", - "start_time": "2020-04-07T16:02:51.150679Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "total 2398\n", - "-4.506215193227115e-18 joule\n", - "-5.8515279132483456e-18 joule\n", - "-4.251737530685669e-18 joule\n", - "-3.2217721980540896e-18 joule\n", - "-4.437416946700958e-18 joule\n", - "Number queries entries: 2398\n", - "Number of entries loaded in the last api call: 100\n", - "Bytes loaded in the last api call: 34531\n", - "Bytes loaded from this query: 345235\n", - "Number of downloaded entries: 1000\n", - "Number of made api calls: 10\n", - "\n" - ] - } - ], - "source": [ - "aq = query_archive(\n", - " query={\n", - " 'upload_id': ['b5rGMO6dT4Gzqn3JaLjPpw']\n", - " },\n", - " required={\n", - " 'section_run': {\n", - " 'section_single_configuration_calculation[0]': {\n", - " 'energy_total': '*'\n", - " } \n", - " }\n", - " }, \n", - " per_page=100, max=1000)\n", - "\n", - "print('total', aq.total)\n", - "\n", - "for i, e in enumerate(aq):\n", - " if i % 200 == 0:\n", - " print(e.section_run[0].section_single_configuration_calculation[0].energy_total)\n", - " \n", - "print(aq)" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": { - "ExecuteTime": { - "end_time": "2020-04-07T16:02:55.379162Z", - "start_time": "2020-04-07T16:02:55.373328Z" - } - }, - "outputs": [ - { - "data": { - "text/plain": [ - "{'section_run': [{'section_single_configuration_calculation': [{'energy_total': -4.506215193227115e-18}]}]}" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "aq[0].m_to_dict()" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": { - "ExecuteTime": { - "end_time": "2020-04-07T16:02:56.680987Z", - "start_time": "2020-04-07T16:02:55.381150Z" - } - }, - "outputs": [], - "source": [ - "aq = query_archive(\n", - " query={\n", - " 'dft.quantities': ['section_dos']\n", - " },\n", - " per_page=1)\n", - "entry = aq[0]" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": { - "ExecuteTime": { - "end_time": "2020-04-07T16:02:56.686165Z", - "start_time": "2020-04-07T16:02:56.682662Z" - } - }, - "outputs": [], - "source": [ - "run = entry.section_run[0]\n", - "dos = next(\n", - " scc.section_dos[0] \n", - " for scc in run.section_single_configuration_calculation\n", - " if len(scc.section_dos) > 0)" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": { - "ExecuteTime": { - "end_time": "2020-04-07T16:02:56.927561Z", - "start_time": "2020-04-07T16:02:56.687981Z" - } - }, - "outputs": [ - { - "data": { - "text/plain": [ - "[<matplotlib.lines.Line2D at 0x1157d5898>]" - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "image/png": "\n", - "text/plain": [ - "<Figure size 432x288 with 1 Axes>" - ] - }, - "metadata": { - "needs_background": "light" - }, - "output_type": "display_data" - } - ], - "source": [ - "plt.plot(dos.dos_energies.to(units.hartree).m, dos.dos_values[0])" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": { - "ExecuteTime": { - "end_time": "2020-04-07T16:02:56.933874Z", - "start_time": "2020-04-07T16:02:56.929657Z" - } - }, - "outputs": [ - { - "data": { - "text/plain": [ - "'Al6Si40Sr8'" - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "next(system for system in run.section_system if system.is_representative).chemical_composition_bulk_reduced" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "file_extension": ".py", - "kernelspec": { - "display_name": "Python 3.6.3 64-bit ('.pyenv': virtualenv)", - "language": "python", - "name": "python36364bitpyenvvirtualenv11a6404af23a4e178b049a429667c260" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.3" - }, - "mimetype": "text/x-python", - "name": "python", - "npconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": 3 - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/examples/archive/client.py b/examples/archive/client.py deleted file mode 100644 index 3241b4f0972d45aab5feb41d2ee8edcdd5a2ece0..0000000000000000000000000000000000000000 --- a/examples/archive/client.py +++ /dev/null @@ -1,30 +0,0 @@ -''' -A simple example that uses the NOMAD client library to access the archive. -''' - -from nomad.client import ArchiveQuery -from nomad.metainfo import units - - -query = ArchiveQuery( - # url='http://nomad-lab.eu/prod/rae/beta/api', - query={ - 'dft.code_name': 'VASP' - }, - required={ - 'section_run': { - 'section_single_configuration_calculation': '*', - 'section_system': '*' - } - }, - per_page=10, - max=100) - -print(query) - -for i, result in enumerate(query): - if i < 10: - calc = result.section_run[0].section_single_configuration_calculation[-1] - energy = calc.energy_total - formula = calc.single_configuration_calculation_to_system_ref.chemical_composition_reduced - print('%s: energy %s' % (formula, energy.to(units.hartree))) diff --git a/examples/archive/client_workflows.py b/examples/archive/client_workflows.py deleted file mode 100644 index ac6fe94843c6a8daf309db5cfad8cd1381c30d9f..0000000000000000000000000000000000000000 --- a/examples/archive/client_workflows.py +++ /dev/null @@ -1,38 +0,0 @@ -''' -A simple example that uses the NOMAD client library to access the archive. -''' - -from nomad.client import ArchiveQuery -from nomad.metainfo import units - - -query = ArchiveQuery( - query={ - '$and': [ - {'dft.code_name': 'VASP'}, - {'$gte': {'n_atoms': 3}}, - {'$lte': {'dft.workflow.section_geometry_optimization.final_energy_difference': 1e-24}} - ] - }, - required={ - 'section_workflow': { - 'calculation_result_ref': { - 'energy_total': '*', - 'single_configuration_calculation_to_system_ref': { - 'chemical_composition_reduced': '*' - } - } - } - }, - parallel=10, - per_page=10, - max=100) - -for i, result in enumerate(query): - if i < 10: - calc = result.section_workflow.calculation_result_ref - energy = calc.energy_total - formula = calc.single_configuration_calculation_to_system_ref.chemical_composition_reduced - print('%s: energy %s' % (formula, energy.to(units.hartree))) - -print(query) diff --git a/examples/parse.py b/examples/parse.py index a3516fd9684a1795bf4583f08fa7ca3c65b122df..7f194146324698b0d8d44b46ff30685db386045b 100644 --- a/examples/parse.py +++ b/examples/parse.py @@ -7,7 +7,7 @@ archive = parse(sys.argv[1]) normalize_all(archive) # get the 'main section' section_run as a metainfo object -section_run = archive.section_run[0] +section_run = archive.run[0] # get the same data as JSON serializable Python dict python_dict = section_run.m_to_dict() diff --git a/mkdocs.yml b/mkdocs.yml index 0b6f72f5922e8d61bd48e7198322abcfbf2fdb51..b2fd8274e6842a905b0547b6044bcb86309b03bd 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -6,8 +6,8 @@ nav: - Introduction: index.md - web.md - api.md - - Using the AI Toolkit and other remote tools: aitoolkit.md - - Using the Python library: pythonlib.md + - pythonlib.md + # - Using the AI Toolkit and other remote tools: aitoolkit.md - Extending and Developing NOMAD: - developers.md - metainfo.md diff --git a/nomad/client/archive.py b/nomad/client/archive.py index 4655a05469a75b04a0829ab2f36d1ea2b0c2f830..d899ceb7204e9ff678a4ed24e6562922a13b6dd6 100644 --- a/nomad/client/archive.py +++ b/nomad/client/archive.py @@ -17,117 +17,10 @@ # ''' -Access the NOMAD archive with the NOMAD client library -______________________________________________________ - Retrieve and analyse (large amounts) of NOMAD's archive data. This allows to use queries to filter for desired entries, bulk download the required parts of the respective archives, and navigate the results using NOMAD's metainfo Python API. - -.. literalinclude:: ../examples/archive/client.py - :language: python - -This script should yield a result like this: - -.. code:: - - Number queries entries: 7628 - Number of entries loaded in the last api call: 10 - Bytes loaded in the last api call: 118048 - Bytes loaded from this query: 118048 - Number of downloaded entries: 10 - Number of made api calls: 1 - - Cd2O2: energy -11467.827149010665 hartree - Sr2O2: energy -6551.45699684026 hartree - Sr2O2: energy -6551.461104765451 hartree - Be2O2: energy -178.6990610734937 hartree - Ca2O2: energy -1510.3938165430286 hartree - Ca2O2: energy -1510.3937761449583 hartree - Ba2O2: energy -16684.667362890417 hartree - Mg2O2: energy -548.9736595672932 hartree - Mg2O2: energy -548.9724185656775 hartree - Ca2O2: energy -1510.3908614326358 hartree - -Let's discuss the different elements here. First, we have a set of imports. The NOMAD source -codes comes with various sub-modules. The `client` module contains everything related -to what is described here; the `metainfo` is the Python interface to NOMAD's common -archive data format and its data type definitions; the `config` module simply contains -configuration values (like the URL to the NOMAD API). - -Next, we create an :class:`ArchiveQuery` instance. This object will be responsible for talking -to NOMAD's API for us in a transparent and lazy manner. This means, it will not download -all data right away, but do so when we are actually iterating through the results. - -The archive query takes several parameters: - -- The ``query`` is a dictionary of search criteria. The query is used to filter all of NOMAD's - entry down to a set of desired entries. You can use NOMAD's GUI to create queries and - copy their Python equivalent with the ``<>``-code button on the result list. -- The ``required`` part, allows to specify what parts of the archive should be downloaded. - Leave it out to download the whole archives. Based on NOMAD's Metainfo (the 'schema' of - all archives), you can determine what sections to include and which to leave out. Here, - we are interested in the first run (usually entries only have one run) and the first - calculation result. -- With the optional ``per_page`` you can determine, how many results are downloaded at - a time. For bulk downloading many results, we recommend ~100. If you are just interested - in the first results a lower number might increase performance. -- With the optional ``max``, we limit the maximum amount of entries that are downloaded, - just to avoid accidentely iterating through a result set of unknown and potentially large - size. - -When you print the archive query object, you will get some basic statistics about the -query and downloaded data. - -The archive query object can be treated as a Python list-like. You use indices and ranges -to select results. Here we iterate through a slice and print the calculated energies -from the first calculation of the entries. Each result is a Python object with attributes -governed by the NOMAD Metainfo. Quantities yield numbers, string, or numpy arrays, while -sub-sections return lists of further objects. Here we navigate the sections ``run`` and -sub-section ``energy`` and sub-section ``total`` to access the quantity ``value``. This quantity is a -number with an attached unit (Joule), which can be converted to something else (e.g. Hartree). - -The create query object keeps all results in memory. Keep this in mind, when you are -accessing a large amount of query results. You should use :func:`ArchiveQuery.clear` -to remove unnecessary results. - -The NOMAD Metainfo -__________________ - -You can imagine the NOMAD Metainfo as a complex schema for hiearchically organized scientific -data. In this sense, the NOMAD Metainfo is a set of data type definitions. These definitions -then govern how the archive for an data entry in NOMAD might look like. You can browse the -hierarchy of definitions in our `Metainfo browser <../metainfo>`_. - -Be aware, that the definitions entail everything that an entry could possibly contain, but -not all entries contain all sections and all quantities. What an entry contains depends -on the information that the respective uploaded data contained, what could be extracted, -and of course what was calculated in the first place. To see what the archive of an concrete -entry looks like, you can use the `search interface <../search>`_, select an entry from the -list fo search results, and click on the *Archive* tab. - -To *see inside* an archive object in Python, you can use :func:`nomad.metainfo.MSection.m_to_dict` -which is provided by all archive objects. This will convert a (part of an) archive into a -regular, JSON-serializable Python dictionary. - -For more details on the metainfo Python interface, consult the `metainfo documentation <metainfo.html>`_. - -The ArchiveQuery class -______________________ - -.. autoclass:: ArchiveQuery - -Working with private data -_________________________ - -Public NOMAD data can be accessed without any authentication; everyone can use our API -without the need for an account or login. However, if you want to work with your own -data that is not yet published, or embargoed data was shared with you, you need to -authenticate before accessing this data. Otherwise, you will simply not find it with -your queries. To authenticate simply provide your NOMAD username and password to the -:class:`ArchiveQuery` constructor. - ''' from typing import Dict, Any, List @@ -136,6 +29,7 @@ import requests from io import StringIO import math import multiprocessing +import json from nomad import config from nomad import metainfo as mi @@ -152,6 +46,9 @@ class QueryError(Exception): class ApiStatistics(mi.MSection): + total = mi.Quantity( + type=int, default=0, + description='Total number of entries that fulfil the query') nentries = mi.Quantity( type=int, default=0, @@ -194,7 +91,7 @@ class ProcState: self.url = archive_query.url self.query_and_list = archive_query.query_and_list self.request: Dict[str, Any] = dict( - owner='visible', + owner=archive_query.owner, required=archive_query.required) self.per_page = archive_query.per_page self.authentication = archive_query.authentication @@ -279,12 +176,13 @@ class ArchiveQuery(collections.abc.Sequence): call. ''' def __init__( - self, + self, owner: str = 'visible', query: dict = None, required: dict = None, url: str = None, username: str = None, password: str = None, parallel: int = 1, per_page: int = 10, max: int = 10000, - authentication: Auth = None): + authentication: Auth = None, auth: Auth = None): + self.owner = owner self.page = 1 self.parallel = parallel self.per_page = per_page @@ -300,32 +198,57 @@ class ArchiveQuery(collections.abc.Sequence): # results with those properties are returned. quantities = set() - def collect(required, parent_def_name: str = None): + def collect(required, parent_section: mi.Section, parent_path: str = None): if not isinstance(required, dict): return for key, value in required.items(): def_name = key.split('[')[0] - qualified_def_name = def_name - if parent_def_name: - qualified_def_name = f'{parent_def_name}.{def_name}' + definition = parent_section.all_properties.get(def_name) + if definition is None: + raise KeyError(f'{def_name} is not a property of {parent_section}') + + if parent_path: + qualified_def_name = f'{parent_path}.{def_name}' + else: + qualified_def_name = def_name quantities.add(qualified_def_name) - collect(value, qualified_def_name) - collect(required) + if isinstance(definition, mi.SubSection): + collect( + value, + parent_section=definition.section_def, + parent_path=qualified_def_name) + elif isinstance(definition, mi.Quantity) and isinstance(definition.type, mi.Reference): + next_parent_section = definition.type.target_section_def.m_resolved() + parent_path = next_parent_section.path + if parent_path in ['__ambiguous__', '__no_archive_path__']: + continue + collect( + value, + parent_section=next_parent_section, + parent_path=parent_path) + + collect(required, parent_section=EntryArchive.m_def) self.query_and_list.append({'quantities': list(quantities)}) self.password = password self.username = username self.url = config.client.url if url is None else url self._authentication = authentication + if not self._authentication: + self._authentication = auth self._total = -1 self._results: List[dict] = [] self._statistics = ApiStatistics() self._proc_states: List[ProcState] = None + @property + def query(self): + return {'and': self.query_and_list} + @property def authentication(self): ''' @@ -352,10 +275,8 @@ class ArchiveQuery(collections.abc.Sequence): while True: uploads_request = { - 'owner': 'visible', - 'query': { - 'and': self.query_and_list - }, + 'owner': self.owner, + 'query': self.query, 'pagination': { 'page_size': 0 }, @@ -382,6 +303,7 @@ class ArchiveQuery(collections.abc.Sequence): raise Exception( 'Error requesting NOMAD API: HTTP %d' % response.status_code) + total = response_json['pagination']['total'] agg_data = response_json['aggregations']['uploads']['terms'] after = agg_data['pagination'].get('next_page_after_value', None) values = {bucket['value']: bucket for bucket in agg_data['data']} @@ -420,6 +342,7 @@ class ArchiveQuery(collections.abc.Sequence): self._proc_states.append(proc_state) self._total = nentries + self._statistics.total = total self._statistics.nentries = nentries def call_api(self): @@ -475,7 +398,7 @@ class ArchiveQuery(collections.abc.Sequence): if self._total == -1: self.call_api() - return str(self._statistics) + return f'Query: {json.dumps(self.query, indent=2)}\n{self._statistics}' def __getitem__(self, key): if isinstance(key, slice): diff --git a/nomad/datamodel/datamodel.py b/nomad/datamodel/datamodel.py index 76299f7c391ec37e1757d83373fa5ece2db85b2c..df3ee99c566963fc16aabd28c4f9029b6fa4945a 100644 --- a/nomad/datamodel/datamodel.py +++ b/nomad/datamodel/datamodel.py @@ -701,3 +701,6 @@ class EntryArchive(metainfo.MSection): sub_section=Results, categories=[FastAccess], a_elasticsearch=Elasticsearch(auto_include_subsections=True)) + + +m_package.__init_metainfo__() diff --git a/nomad/metainfo/metainfo.py b/nomad/metainfo/metainfo.py index dfd0b51675b052a59494394d538ae4569f5d1598..f06e352551bbcbb079ebf7135cb64f1ff6dd6a13 100644 --- a/nomad/metainfo/metainfo.py +++ b/nomad/metainfo/metainfo.py @@ -2637,6 +2637,8 @@ class SubSection(Property): times in the parent section. ''' + _used_sections: Dict['Section', Set['SubSection']] = {} + sub_section: 'Quantity' = _placeholder_quantity repeats: 'Quantity' = _placeholder_quantity @@ -2824,6 +2826,9 @@ class Section(Definition): A helper attribute that gives all inner_section_definitions including their aliases by name. + path: Shortest path from a root section to this section. This is not the path + in the metainfo schema (`m_path`) but a archive path in potential data. + event_handlers: Event handler are functions that get called when the section data is changed. There are two types of events: ``set`` and ``add_sub_section``. The handler type @@ -2859,6 +2864,7 @@ class Section(Definition): all_sub_sections_by_section: 'Quantity' = _placeholder_quantity all_aliases: 'Quantity' = _placeholder_quantity all_inner_section_definitions: 'Quantity' = _placeholder_quantity + path: 'Quantity' = _placeholder_quantity def __init__(self, *args, validate: bool = True, **kwargs): self._section_cls: Type[MSection] = None @@ -3016,8 +3022,10 @@ class Package(Definition): if isinstance(content.type, MProxy): content.type.m_proxy_resolve() elif isinstance(content, SubSection): - if isinstance(content.sub_section, MProxy): - content.sub_section.m_proxy_resolve() + target = content.sub_section + if isinstance(target, MProxy): + target = target.m_proxy_resolve() + SubSection._used_sections.setdefault(target, []).append(content) elif isinstance(content, Section): for base_section in content.base_sections: if isinstance(base_section, MProxy): @@ -3227,6 +3235,30 @@ def all_inner_section_definitions(self) -> Dict[str, Section]: return result +@derived(cached=True) +def section_path(self) -> str: + used_in_sub_sections: List[SubSection] = SubSection._used_sections.get(self, []) # type: ignore + if len(used_in_sub_sections) == 0: + if self.name == 'EntryArchive': + return None + else: + return '__no_archive_path__' + + if len(used_in_sub_sections) > 1: + return '__ambiguous__' + + parent_section = used_in_sub_sections[0].m_parent + parent_path = parent_section.path + + if parent_path is None: + return used_in_sub_sections[0].name + + if parent_path.startswith('__'): + return parent_path + + return f'{parent_path}.{used_in_sub_sections[0].name}' + + Section.inherited_sections = inherited_sections Section.all_base_sections = all_base_sections Section.all_properties = all_properties @@ -3235,6 +3267,8 @@ Section.all_sub_sections = all_sub_sections Section.all_sub_sections_by_section = all_sub_sections_by_section Section.all_aliases = all_aliases Section.all_inner_section_definitions = all_inner_section_definitions +Section.path = section_path + Property.template = Quantity(type=bool, name='template', default=False) diff --git a/nomad/mkdocs.py b/nomad/mkdocs.py index a0082d1c20dbfc09d7c2de1a1c4f49b13e335b37..19fa2d2eef08b5c8bf55d66ea9e91378a8a571df 100644 --- a/nomad/mkdocs.py +++ b/nomad/mkdocs.py @@ -24,7 +24,7 @@ from nomad.app.v1.models import ( query_documentation, owner_documentation) from nomad.app.v1.routers.entries import archive_required_documentation -from nomad import config +from nomad import config, utils doc_snippets = { 'query': query_documentation, @@ -41,3 +41,10 @@ def define_env(env): @env.macro def doc_snippet(key): # pylint: disable=unused-variable return doc_snippets[key] + + @env.macro + def metainfo_data(): # pylint: disable=unused-variable + return utils.strip(''' + You can browse the [NOMAD metainfo schema]({{ nomad_url() }}/../gui/analyze/metainfo) + or the archive of each entry (e.g. [a VASP example]({{ nomad_url() }}/../gui/search/entries/entry/id/d5OYC0SJTDevHMPk7YHd4A/-7j8ojKkna2NLXdytv_OjV4zsBXw/archive)) + in the web-interface.''') diff --git a/tests/metainfo/test_sections.py b/tests/metainfo/test_sections.py index caaa18b6b45405e1719921597071a6c6a15d76d2..d8482c50ec559ef6843298930839fbc260493d58 100644 --- a/tests/metainfo/test_sections.py +++ b/tests/metainfo/test_sections.py @@ -22,7 +22,7 @@ import pytest from nomad.metainfo import MSection -from nomad.metainfo.metainfo import Quantity, SubSection, Section +from nomad.metainfo.metainfo import Package, Quantity, SubSection, Section def test_base_section(): @@ -193,3 +193,26 @@ def test_inner_sections_inheritance(): section = OuterSection( test_sub_section=OuterSection.InnerSection(test_quantity='test_value')) assert section.test_sub_section.test_quantity == 'test_value' + + +def test_path(): + class ChildSection(MSection): + pass + + class EntryArchive(MSection): + child = SubSection(sub_section=ChildSection.m_def) + + pkg = Package() + pkg.section_definitions.append(ChildSection.m_def) + pkg.section_definitions.append(EntryArchive.m_def) + pkg.__init_metainfo__() + + assert SubSection._used_sections[ChildSection.m_def] == [EntryArchive.child] + assert ChildSection.m_def.path == 'child' + + from nomad.datamodel.metainfo.simulation.calculation import Calculation, Energy + from nomad.datamodel.metainfo.simulation.system import System + from nomad.datamodel import EntryArchive # pylint: disable=unused-import + assert Calculation.m_def.path == 'run.calculation' + assert System.m_def.path == 'run.system' + assert Energy.m_def.path == '__no_archive_path__'