diff --git a/examples/client.ipynb b/examples/client.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..cb6d69439800616f9f1795d39359d4124009d049 --- /dev/null +++ b/examples/client.ipynb @@ -0,0 +1,103 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "from nomad.client import query_archive\n", + "import matplotlib.pyplot as plt" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "run = query_archive()[1]\n", + "dos = run.section_single_configuration_calculation[0].section_dos[0]" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[<matplotlib.lines.Line2D at 0x12f316b70>]" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAWoAAAEDCAYAAAAcI05xAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8QZhcZAAAMtElEQVR4nO3df8zudV3H8efLcwQ8UnLg3JoCddC1GjgDu8vUzSFYopQ6o0UbpbbGWmVmscLRhlluQr+sZbYzstwSMFE3V87AlKmrnbrPARU4IkdABEFusiRxSsS7P+7vgZu7m3Nf55zre1/vw/18bNfOdV/X57p4f869Pfnue13XuVJVSJL6etKsB5Ak7Z+hlqTmDLUkNWeoJak5Qy1JzRlqSWputFAneU+Se5PcMMHalyTZneShJOesuO+SJDcMl58da15J6mrMI+q/Bc6acO0dwOuBy5ffmORs4PnAqcALgAuSfPf0RpSk/kYLdVV9Cvj68tuSPCfJx5LsSvLpJD84rL29qj4HPLziaU4GPlVVD1XVA8DnmDz+kvSEsN7nqHcAb6yqHwYuAP5yjfWfBc5KsiXJNuClwIkjzyhJrWxer/9QkqOBFwEfSLLv5iP395iqujrJjwD/AiwC/wr875hzSlI36xZqlo7e/6uqTj2QB1XV24G3AyS5HPjiCLNJUlvrduqjqu4HbkvyMwBZ8kP7e0ySTUmOG64/D3gecPXow0pSIxnrX89LcgVwOrAN+BpwMfAJ4N3AM4EnA1dW1duG0xsfBrYC3wbuqapTkhwF7B6e8n7gl6vq+lEGlqSmRgu1JGk6/GSiJDU3youJ27Ztq+3bt4/x1JL0hLRr1677qmputftGCfX27dtZWFgY46kl6QkpyZcf7z5PfUhSc4Zakpoz1JLUnKGWpOYMtSQ1Z6glqTlDLUnNGWptGOddtpPzLts56zGkA7ae/8ypNFOf2XvfrEeQDopH1JLUnKGWpOYMtSQ1Z6glqTlDLUnNGWpJas5QS1JzhlqSmjPUktScoZak5gy1JDVnqCWpOUMtSc1NFOokb05yY5IbklyR5KixB5MkLVkz1EmOB34dmK+q5wKbgHPHHkyStGTSUx+bgack2QxsAb463kiSpOXWDHVV3QX8EXAHcDfwjaq6euW6JOcnWUiysLi4OP1JJWmDmuTUx1bg1cBJwLOApyY5b+W6qtpRVfNVNT83Nzf9SSVpg5rk1MfLgNuqarGq/gf4EPCicceSJO0zSajvAH4syZYkAc4E9ow7liRpn0nOUe8ErgJ2A58fHrNj5LkkSYOJvoW8qi4GLh55FknSKvxkoiQ1Z6glqTlDLUnNGWpJas5QS1JzhlqSmjPUktScoZak5gy1JDVnqCWpOUMtSc0ZaklqzlBLUnOGWpKaM9SS1JyhlqTmDLUkNWeoJak5Qy1JzRlqSWrOUEtSc4Zakpoz1JLUnKGWpOYMtSQ1Z6glqTlDLUnNGWpJas5QS1JzhlqSmjPUktScoZak5gy1JDVnqCWpOUMtSc0ZaklqbqJQJzkmyVVJvpBkT5IXjj2YJGnJ5gnX/Rnwsao6J8kRwJYRZ5IkLbNmqJM8DXgJ8HqAqnoQeHDcsSRJ+0xy6uMkYBH4myTXJbksyVNXLkpyfpKFJAuLi4tTH1SSNqpJQr0ZeD7w7qo6DXgAuHDloqraUVXzVTU/Nzc35TElaeOaJNR3AndW1c7h56tYCrckaR2sGeqqugf4SpIfGG46E7hp1KkkSY+Y9F0fbwTeN7zj41bgDeONJElabqJQV9X1wPzIs0iSVuEnEyWpOUMtSc0ZaklqzlBLUnOGWpKaM9SS1JyhlqTmDLUkNWeoJak5Qy1JzRlqSWrOUEtSc4Zakpoz1JLUnKGWpOYMtSQ1Z6glqTlDLUnNGWpJas5QS1JzhlqSmjPUktScoZak5gy1JDVnqCWpOUMtSc0ZaklqzlBLUnOGWpKaM9SS1JyhlqTmDLUkNWeoJak5Qy1JzRlqSWrOUEtScxOHOsmmJNcl+YcxB5IkPdaBHFG/Cdgz1iCSpNVNFOokJwBnA5eNO44kaaVJj6jfCfw28PDjLUhyfpKFJAuLi4tTGU6SNEGok/wkcG9V7drfuqraUVXzVTU/Nzc3tQElaaOb5Ij6xcCrktwOXAmckeTvRp1KkvSINUNdVW+pqhOqajtwLvCJqjpv9MkkSYDvo5ak9jYfyOKquha4dpRJJEmr8ohakpoz1JLUnKGWpOYMtSQ1Z6glqTlDLUnNGWpJas5QS1JzhlqSmjPUktScoZak5gy1JDVnqCWpOUMtSc0ZaklqzlBLUnOGWpKaM9SS1JyhlqTmDLUkNWeoJak5Qy1JzRlqSWrOUEtSc4Zakpoz1JLUnKGWpOYMtSQ1Z6glqTlDLUnNGWpJas5QS1JzhlqSmjPUktScoZak5gy1JDW3ZqiTnJjkk0luSnJjkjetx2CSpCWbJ1jzEPBbVbU7yXcBu5JcU1U3jTybJIkJjqir6u6q2j1c/29gD3D82INJkpYc0DnqJNuB04CdYwwjSfr/Jg51kqOBDwK/UVX3r3L/+UkWkiwsLi5Oc0ZJ2tAmCnWSJ7MU6fdV1YdWW1NVO6pqvqrm5+bmpjmjJG1ok7zrI8BfA3uq6k/GH0mStNwkR9QvBn4eOCPJ9cPllSPPJUkarPn2vKr6DJB1mEWStAo/mShJzRlqSWrOUEtSc4Zakpoz1JLUnKGWpOYMtSQ1Z6glqTlDLUnNGWpJas5QS1JzhlqSmjPUktScoZak5gy1JDVnqCWpOUMtSc0ZaklqzlBLUnOGWpKaM9SS1JyhlqTmDLUkNWeoJak5Qy1JzRlqSWrOUEtSc4Zakpoz1JLUnKGWpOYMtSQ1Z6glqTlDLUnNGWpJas5QS1JzhlqSmjPUktTcRKFOclaSm5PsTXLh2ENJkh61ZqiTbALeBbwCOBn4uSQnjz2YJGnJJEfUPwrsrapbq+pB4Erg1eOOJUnaZ5JQHw98ZdnPdw63PUaS85MsJFlYXFyc1nyStOFtntYTVdUOYAfA/Px8Tet5pWm5/R1nz3oE6aBMckR9F3Disp9PGG6TJK2DSUL978D3JzkpyRHAucBHxh1LkrTPmqc+quqhJL8G/BOwCXhPVd04+mSSJGDCc9RV9VHgoyPPIklahZ9MlKTmDLUkNWeoJak5Qy1JzaVq+p9NSbIIfHnqTzyubcB9sx5inbnnjcE9Hx6+r6rmVrtjlFAfjpIsVNX8rOdYT+55Y3DPhz9PfUhSc4Zakpoz1I/aMesBZsA9bwzu+TDnOWpJas4jaklqzlBLUnMbKtRJjk1yTZJbhj+3Ps661w1rbknyulXu/0iSG8af+NAdyp6TbEnyj0m+kOTGJO9Y3+kPzFpfwpzkyCTvH+7fmWT7svveMtx+c5KXr+fcB+tg95vkx5PsSvL54c8z1nv2g3Uov+Ph/u9N8s0kF6zXzFNRVRvmAlwKXDhcvxC4ZJU1xwK3Dn9uHa5vXXb/a4HLgRtmvZ+x9wxsAV46rDkC+DTwilnv6XH2uQn4EvDsYdbPAievWPMrwF8N188F3j9cP3lYfyRw0vA8m2a9pxH3exrwrOH6c4G7Zr2fsfe87P6rgA8AF8x6Pwdy2VBH1Cx9Ke97h+vvBV6zypqXA9dU1der6j+Ba4CzAJIcDfwm8AfrMOu0HPSeq+pbVfVJgFr6YuPdLH3DT0eTfAnz8r+Lq4Azk2S4/cqq+k5V3QbsHZ6vs4Peb1VdV1VfHW6/EXhKkiPXZepDcyi/Y5K8BriNpT0fVjZaqJ9RVXcP1+8BnrHKmv19me/vA38MfGu0CafvUPcMQJJjgJ8C/nmMIadgki9hfmRNVT0EfAM4bsLHdnMo+13up4HdVfWdkeacpoPe83CQ9TvA763DnFM3tS+37SLJx4HvWeWui5b/UFWVZOL3JiY5FXhOVb155XmvWRtrz8uefzNwBfDnVXXrwU2pbpKcAlwC/MSsZ1kHbwX+tKq+ORxgH1aecKGuqpc93n1JvpbkmVV1d5JnAveusuwu4PRlP58AXAu8EJhPcjtLf29PT3JtVZ3OjI245312ALdU1TunMO5YJvkS5n1r7hz+5/M04D8mfGw3h7JfkpwAfBj4har60vjjTsWh7PkFwDlJLgWOAR5O8u2q+ovxx56CWZ8kX88L8Ic89oW1S1dZcyxL57G2DpfbgGNXrNnO4fNi4iHtmaXz8R8EnjTrvayxz80svQh6Eo++0HTKijW/ymNfaPr74fopPPbFxFvp/2Lioez3mGH9a2e9j/Xa84o1b+UwezFx5gOs8y/6OJbOsd4CfHxZjOaBy5at+0WWXlDaC7xhlec5nEJ90Htm6YilgD3A9cPll2a9p/3s9ZXAF1l6Z8BFw21vA141XD+KpVf89wL/Bjx72WMvGh53M03f2TKt/QK/Czyw7Hd6PfD0We9n7N/xsuc47ELtR8glqbmN9q4PSTrsGGpJas5QS1JzhlqSmjPUktScoZak5gy1JDX3f7oRUgqc7vmOAAAAAElFTkSuQmCC\n", + "text/plain": [ + "<Figure size 432x288 with 1 Axes>" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "plt.plot(dos.dos_energies.m, dos.dos_values[0])" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'O3SrTi'" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "run.section_system[0].chemical_composition_bulk_reduced" + ] + } + ], + "metadata": { + "file_extension": ".py", + "kernelspec": { + "display_name": "Python 3.6.3 64-bit ('.pyenv': virtualenv)", + "language": "python", + "name": "python36364bitpyenvvirtualenv11a6404af23a4e178b049a429667c260" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.3" + }, + "mimetype": "text/x-python", + "name": "python", + "npconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": 3 + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/nomad/app/api/archive.py b/nomad/app/api/archive.py index 774eece9d6669128f401364b61ed946de596d39f..80594088106c0e3e257712ff6faba619d718172d 100644 --- a/nomad/app/api/archive.py +++ b/nomad/app/api/archive.py @@ -17,7 +17,7 @@ The archive API of the nomad@FAIRDI APIs. This API is about serving processed (parsed and normalized) calculation data in nomad's *meta-info* format. ''' -from typing import Dict, Any +from typing import Dict, Any, List from io import BytesIO import os.path from flask import send_file, request, g @@ -29,9 +29,8 @@ import urllib.parse import metainfo from nomad.files import UploadFiles, Restricted -from nomad import search, config +from nomad import search, config, archive from nomad.app import common -from nomad.archive import query_archive from .auth import authenticate, create_authorization_predicate from .api import api @@ -265,7 +264,7 @@ class ArchiveQueryResource(Resource): search_request.owner('all') apply_search_parameters(search_request, query) - search_request.include('calc_id', 'upload_id', 'with_embargo') + search_request.include('calc_id', 'upload_id', 'with_embargo', 'parser_name') try: if scroll: @@ -286,29 +285,42 @@ class ArchiveQueryResource(Resource): data = [] calcs = results['results'] - archive_files = None + archive_readers: List[archive.ArchiveReader] = [] current_upload_id = None for entry in calcs: upload_id = entry['upload_id'] calc_id = entry['calc_id'] - if archive_files is None or current_upload_id != upload_id: + if current_upload_id is None or current_upload_id != upload_id: upload_files = UploadFiles.get(upload_id, create_authorization_predicate(upload_id)) if upload_files is None: return [] - archive_files = upload_files.archive_file_msgs() + for archive_reader in archive_readers: + if archive_reader is not None: + archive_reader.close() + + archive_readers = [ + archive.ArchiveReader(f) if f is not None else None + for f in upload_files.archive_file_msgs()] + current_upload_id = upload_id if entry['with_embargo']: - archive_file = archive_files[1] + archive_reader = archive_readers[1] else: - archive_file = archive_files[0] + archive_reader = archive_readers[0] - if archive_file is None: + if archive_reader is None: continue - data.append(query_archive(archive_file, {calc_id: query_schema})) + data.append( + { + 'calc_id': calc_id, + 'parser_name': entry['parser_name'], + 'archive': archive.query_archive( + archive_reader, {calc_id: query_schema})[calc_id] + }) # assign archive data to results results['results'] = data diff --git a/nomad/archive.py b/nomad/archive.py index 67963f1d0268431be6485241c59fca03bbab3e26..a7d104ff86e115b4d91e427d63babf2cb13ca841 100644 --- a/nomad/archive.py +++ b/nomad/archive.py @@ -480,7 +480,7 @@ def read_archive(file_or_path: str, **kwargs) -> ArchiveReader: return ArchiveReader(file_or_path, **kwargs) -def query_archive(f, query_dict: dict): +def query_archive(f_or_archive_reader: Union[ArchiveReader, BytesIO], query_dict: dict): def _load_data(query_dict: Dict[str, Any], archive_item: ArchiveObject, main_section: bool = False): if not isinstance(query_dict, dict): @@ -529,8 +529,15 @@ def query_archive(f, query_dict: dict): return res - with ArchiveReader(f) as archive: - return _load_data(query_dict, archive, True) + if isinstance(f_or_archive_reader, ArchiveReader): + return _load_data(query_dict, f_or_archive_reader, True) + + elif isinstance(f_or_archive_reader, BytesIO): + with ArchiveReader(f_or_archive_reader) as archive: + return _load_data(query_dict, archive, True) + + else: + raise TypeError('%s is neither a file-like nor ArchiveReader' % f_or_archive_reader) if __name__ == '__main__': diff --git a/nomad/archive_query.py b/nomad/archive_query.py deleted file mode 100644 index cf1af8b3d510d1e1aa655be215cb732930496720..0000000000000000000000000000000000000000 --- a/nomad/archive_query.py +++ /dev/null @@ -1,298 +0,0 @@ -# Copyright 2019 Alvin Noe Ladines, Markus Scheidgen -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an"AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -''' -Contains interfaces to the archive metainfo and query. - -In module ``ArchiveMetainfo``, the data is provided either from raw -json data or as a filename of an existing msgpack database. The metainfo -can then queried by providing a schema. - -.. code-block: python - am = ArchiveMetainfo(archive_data) - for calc in am.calcs: - c.section_run.section_single_configuration_calculation[0]({'energy_total':None}) - -The ArchiveQuery enables a query interface to the archive data. A dict of query parameters -and a query schema similar to the archive json format can be provided to filter archive. - -.. code-block: python - q = ArchiveQuery({'atoms':'Si'}) - metainfo = q.query() - for c in metainfo.calcs: - print(c.section_run.section_single_configuration_calculation[0]({'energy_total':'*'})) -''' - -import numpy as np -import requests -import os.path -from urllib.parse import urlparse -from typing import Dict, List, Any - -from nomad.metainfo import MSection, Quantity, SubSection -from nomad.metainfo.metainfo import MObjectMeta -from nomad import config as nomad_config -from nomad.cli.client.client import KeycloakAuthenticator - - -class ArchiveMetainfo: - ''' - Converts archive data in json format to the new nomad metainfo model - Arguments: - archive_data: the archive data in json format - ''' - def __init__(self, archive_data: List[Dict[str, Any]]): - self._archive_data = archive_data - self.metainfo = None - self._metacls = None - self._calcs: Dict[str, MSection] = {} - self._calc_ids: List[str] = [] - self._base_metacls = None - self._base_data = None - self._prefix = 'calc' - self._init_calcs() - - def _init_calcs(self): - for calc in self._archive_data: - calc_id = list(calc.keys())[0] - data = calc[calc_id] - self._calc_ids.append(calc_id) - self._calcs[calc_id] = self._build_meta_cls(data, calc_id).m_from_dict(data) - - def __getitem__(self, key): - if isinstance(key, str): - calc = self._calcs.get(key, None) - if calc: - calc.calc_id = key - return calc - elif isinstance(key, int): - calc_id = self._calc_ids[key] - calc = self._calcs[calc_id] - calc.calc_id = calc_id - return calc - else: - calc_ids = self._calc_ids[key] - calcs = [] - for calc_id in calc_ids: - calc = self._calcs[calc_id] - calc.calc_id = calc_id - calcs.append(calc) - return calcs - - def __len__(self): - return len(self._calcs) - - def __iter__(self): - self._n = -1 - return self - - def __next__(self): - self._n += 1 - if self._n >= len(self): - raise StopIteration - calc = list(self._calcs.values())[self._n] - calc.calc_id = list(self._calcs.keys())[self._n] - return calc - - @property - def calcs(self): - ''' - Calculations in metainfo form which can be actively queried by using the get - functionality and providing a schema - ''' - if not self._calcs: - self._init_calcs() - for calc_id, calc in self._calcs.items(): - calc.calc_id = calc_id - yield calc - - @property - def base_data(self): - if self._base_data is None: - calc_id = self._calc_ids[0] - self._base_data = self._archive_data[calc_id] - return self._base_data - - @property - def base_metacls(self): - ''' - The base metaclass to apply a calculation - ''' - if self._base_metacls is None: - name = self._prefix - self._base_metacls = self._build_meta_cls(self.base_data, name) - return self._base_metacls - - def _get_dtype(self, data): - if isinstance(data, np.ndarray): - if len(data) == 0: - dtype = int - else: - dtype = self._get_dtype(data[0]) - else: - dtype = type(data) - return dtype - - def _to_meta_obj(self, content): - if isinstance(content, Quantity): - return content - if isinstance(content, MObjectMeta): - return SubSection(sub_section=content, repeats=content.repeats) - else: - if isinstance(content, list): - content = np.array(content) - dtype = self._get_dtype(content) - if isinstance(content, np.ndarray): - dtype = np.dtype(dtype) - shape = np.shape(content) - return Quantity(type=dtype, shape=shape) - else: - return Quantity(type=dtype) - - def _create_section(self, name, contents): - section = type(name.title(), (MSection,), contents) - return section - - def _build_meta_cls(self, data, name, return_section=True): - if isinstance(data, dict): - contents = {} - for key, val in data.items(): - content = self._build_meta_cls(val, key, True) - content = self._to_meta_obj(content) - contents[key] = content - if return_section: - section = self._create_section(name, contents) - section.repeats = False - return section - else: - return contents - - elif isinstance(data, list): - if not data: - return self._to_meta_obj(data) - if not isinstance(data[0], dict): - return self._to_meta_obj(data) - contents = {} - for i in range(len(data)): - content = self._build_meta_cls(data[i], name, False) - contents.update(content) - section = self._create_section(name, contents) - section.repeats = True - return section - - else: - return self._to_meta_obj(data) - - -class ArchiveQuery: - def __init__(self, *args, **kwargs): - self.archive_data = [] - self._scroll_id = None - self._page = None - self._query_params = {} - if args: - self._query_params = args[0] - if kwargs: - self._query_params.update(kwargs) - self._max_n_pages = self._query_params.pop('max_n_pages', 100000) - self._authentication = self._query_params.pop('authentication', None) - self._url = self._query_params.pop('url', None) - self._user = self._query_params.pop('user', None) - self._password = self._query_params.pop('password', None) - if self._url: - nomad_config.client.url = self._url - if self._user: - nomad_config.client.user = self._user - if self._password: - nomad_config.client.password = self._password - - def _get_value(self, name, in_dict): - if not isinstance(in_dict, dict): - return - for key, val in in_dict.items(): - if key == name: - res = val - else: - res = self._get_value(name, val) - return res - - def _set_value(self, name, value, in_dict): - if not isinstance(in_dict, dict): - return - for key, val in in_dict.items(): - if key == name: - in_dict[name] = value - return - else: - self._set_value(name, value, val) - in_dict[name] = value - - def _get_authentication(self): - if self._authentication is None: - host = urlparse(nomad_config.client.url).netloc.split(':')[0] - self._authentication = KeycloakAuthenticator( - host=host, - user=nomad_config.client.user, - password=nomad_config.client.password, - server_url=nomad_config.keycloak.server_external_url, - realm_name=nomad_config.keycloak.realm_name, - client_id=nomad_config.keycloak.public_client_id) - if isinstance(self._authentication, KeycloakAuthenticator): - return self._authentication.apply() - else: - return self._authentication - - def _api_query(self): - url = os.path.join(nomad_config.client.url, 'archive', 'query') - - if self._scroll_id is not None: - self._query_params['scroll']['scroll_id'] = self._scroll_id - elif self._page is not None: - self._query_params['pagination']['page'] = self._page + 1 - - response = requests.post(url, headers=self._get_authentication(), json=self._query_params) - if response.status_code != 200: - raise response.raise_for_status() - - data = response.json - if not isinstance(data, dict): - data = data() - - results = data.get('results', []) - self._scroll_id = data.get('scroll', {}).get('scroll_id', None) - self._page = data.get('pagination', {}).get('page', None) - - return results - - def _get_archive_data(self): - n_page = 0 - while True: - results = self._api_query() - self.archive_data += results - n_page += 1 - if n_page >= self._max_n_pages: - break - if len(results) == 0: - break - - def query(self): - self._get_archive_data() - if self.archive_data: - self.metainfo = ArchiveMetainfo(archive_data=self.archive_data) - - -def query(*args, **kwargs): - archive_query_obj = ArchiveQuery(*args, **kwargs) - archive_query_obj.query() - return archive_query_obj.metainfo diff --git a/nomad/client.py b/nomad/client.py new file mode 100644 index 0000000000000000000000000000000000000000..7567f8d12fb5be5e57c04b8690cbdc47f7c1da88 --- /dev/null +++ b/nomad/client.py @@ -0,0 +1,158 @@ +# Copyright 2019 Alvin Noe Ladines, Markus Scheidgen +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an"AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +''' +Contains the Python client side library to access the NOMAD archive. + +# TODO +In module ``ArchiveMetainfo``, the data is provided either from raw +json data or as a filename of an existing msgpack database. The metainfo +can then queried by providing a schema. + +.. code-block: python + am = ArchiveMetainfo(archive_data) + for calc in am.calcs: + c.section_run.section_single_configuration_calculation[0]({'energy_total':None}) + +The ArchiveQuery enables a query interface to the archive data. A dict of query parameters +and a query schema similar to the archive json format can be provided to filter archive. + +.. code-block: python + q = ArchiveQuery({'atoms':'Si'}) + metainfo = q.query() + for c in metainfo.calcs: + print(c.section_run.section_single_configuration_calculation[0]({'energy_total':'*'})) +''' + +from typing import Dict, Union, Any, List +from collections import Sequence +import requests +from urllib.parse import urlparse + +from nomad import config, metainfo, parsing +from nomad.cli.client.client import KeycloakAuthenticator + + +class ArchiveQuery(Sequence): + def __init__( + self, + query: dict = None, query_schema: dict = None, + url: str = None, username: str = None, password: str = None, + scroll: bool = False, + authentication: Union[Dict[str, str], KeycloakAuthenticator] = None, **kwargs): + + self.scroll = scroll + self._scroll_id = None + self._page = 1 + + self.query: Dict[str, Any] = { + 'query': {} + } + if query is not None: + self.query['query'].update(query) + if query_schema is not None: + self.query['query_schema'] = query_schema + self.query['query'].update(kwargs) + + self.password = password + self.username = username + self.url = config.client.url if url is None else url + self._authentication = authentication + + self._total = -1 + self._results: List[dict] = [] + + @property + def authentication(self): + if self._authentication is None and self.username is not None and self.password is not None: + host = urlparse(self.url).netloc.split(':')[0] + self._authentication = KeycloakAuthenticator( + host=host, + user=self.username, + password=self.password, + server_url=config.keycloak.server_url, + realm_name=config.keycloak.realm_name, + client_id=config.keycloak.client_id) + + if isinstance(self._authentication, KeycloakAuthenticator): + return self._authentication.apply() + + else: + return self._authentication + + def call_api(self): + url = '%s/%s/%s' % (self.url, 'archive', 'query') + + if self.scroll: + scroll_config = self.query.setdefault('scroll', {'scroll': True}) + if self._scroll_id is not None: + scroll_config['scroll_id'] = self._scroll_id + + else: + self.query.setdefault('pagination', {})['page'] = self._page + + response = requests.post(url, headers=self.authentication, json=self.query) + if response.status_code != 200: + raise response.raise_for_status() + + data = response.json + if not isinstance(data, dict): + data = data() + + if self.scroll: + scroll = data['scroll'] + self._scroll_id = scroll['scroll_id'] + self._total = scroll['total'] + + else: + pagination = data['pagination'] + self._total = pagination['total'] + self._page = pagination['page'] + 1 + + results = data.get('results', []) + + for result in results: + parser_name = result['parser_name'] + parser = parsing.parser_dict[parser_name] + metainfo_env = parser.metainfo_env + + root_section_key = next(iter(result['archive'])) + section_def = metainfo_env.resolve_definition(root_section_key, metainfo.Section) + archive = section_def.section_cls.m_from_dict(result['archive'][root_section_key]) + + self._results.append(archive) + + def __getitem__(self, key): + if key >= self.__len__(): + raise IndexError() + + while len(self._results) < key: + self.call_api() + + return self._results[key] + + def __len__(self): # pylint: disable=invalid-length-returned + if self._total == -1: + self.call_api() + + return self._total + + +def query_archive(*args, **kwargs): + return ArchiveQuery(*args, **kwargs) + + +if __name__ == '__main__': + run = query_archive()[1] + run.section_system[1].atom_labels diff --git a/nomad/metainfo/metainfo.py b/nomad/metainfo/metainfo.py index 1d15777b0994fd4cf00428b09d37036149defaac..d8554da8132309ee55111ca29ddf205f7e4d093c 100644 --- a/nomad/metainfo/metainfo.py +++ b/nomad/metainfo/metainfo.py @@ -937,7 +937,7 @@ class MSection(metaclass=MObjectMeta): # TODO find a way to make this a subclas else: return self.__dict__.get(sub_section_def.name, None) - def m_get_sub_sections(self, sub_section_def: 'SubSection') -> Iterable['MSection']: + def m_get_sub_sections(self, sub_section_def: 'SubSection') -> List['MSection']: ''' Retrieves all sub sections of the given sub section definition. ''' try: if sub_section_def.repeats: diff --git a/nomad/parsing/artificial.py b/nomad/parsing/artificial.py index 6830ae9a2b0fc6779c7d3921ab80eaf7cb0afe24..1fa3775ae28a6bc3ea6f2edf012c21d31706e4d7 100644 --- a/nomad/parsing/artificial.py +++ b/nomad/parsing/artificial.py @@ -28,6 +28,7 @@ import os import signal from nomad import metainfo +from nomad.datamodel.metainfo import m_env as metainfo_env from .legacy import Backend from .parser import Parser, MatchingParser @@ -66,6 +67,10 @@ class TemplateParser(ArtificalParser): ''' name = 'parsers/template' + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.metainfo_env = metainfo_env + def is_mainfile( self, filename: str, mime: str, buffer: bytes, decoded_buffer: str, compression: str = None) -> bool: diff --git a/nomad/parsing/legacy.py b/nomad/parsing/legacy.py index 06b2b16e776a843ec6f91620bb52d7bd07b28681..7e1949ab6a0838e2d7def8b105957aee8846bc14 100644 --- a/nomad/parsing/legacy.py +++ b/nomad/parsing/legacy.py @@ -543,6 +543,9 @@ class LegacyParser(MatchingParser): module = importlib.import_module('.'.join(module_name)) self.parser_class = getattr(module, parser_class) + module = importlib.import_module('.'.join(module_name + ['metainfo'])) + self.metainfo_env = getattr(module, 'm_env') + def run(self, mainfile: str, logger=None) -> Backend: # TODO we need a homogeneous interface to parsers, but we dont have it right now. # There are some hacks to distinguish between ParserInterface parser and simple_parser diff --git a/nomad/parsing/parser.py b/nomad/parsing/parser.py index 4abfbb33b535b5f27554c21af93116606cad36c9..9acddadb8f960a2cc29b0a8ba66632884d2746cf 100644 --- a/nomad/parsing/parser.py +++ b/nomad/parsing/parser.py @@ -16,6 +16,8 @@ from typing import List from abc import ABCMeta, abstractmethod import re +from nomad.metainfo import Environment + class Parser(metaclass=ABCMeta): ''' @@ -25,6 +27,7 @@ class Parser(metaclass=ABCMeta): def __init__(self): self.domain = 'dft' + self.metainfo_env: Environment = None @abstractmethod def is_mainfile( diff --git a/tests/app/test_api.py b/tests/app/test_api.py index b00c82167de3b093e54756dd38b9827db6bfc50a..a403a2f5863f4ba5f69121dd6d8a45c8c5c8c6d5 100644 --- a/tests/app/test_api.py +++ b/tests/app/test_api.py @@ -681,7 +681,7 @@ class TestArchive(UploadFilesBasedTests): assert rv.status_code == 200 assert_zip_file(rv, files=1) - def test_post_archive_query(self, api, published_wo_user_metadata): + def test_archive_query(self, api, published_wo_user_metadata): schema = { 'section_run': { 'section_single_configuration_calculation': { @@ -689,11 +689,19 @@ class TestArchive(UploadFilesBasedTests): data = {'results': [schema], 'per_page': 5} uri = '/archive/query' rv = api.post(uri, content_type='application/json', data=json.dumps(data)) + assert rv.status_code == 200 data = rv.get_json() + assert data results = data.get('results', None) - assert results is not None + assert len(results) > 0 + for result in results: + assert 'calc_id' in result + assert 'parser_name' in result + assert 'archive' in result + + # TODO assert archive contents class TestRepo(): diff --git a/tests/test_archive_query.py b/tests/test_archive_query.py deleted file mode 100644 index 1d78eacd5973cbd6f0f024fd58daf1acbf29c340..0000000000000000000000000000000000000000 --- a/tests/test_archive_query.py +++ /dev/null @@ -1,47 +0,0 @@ -import pytest - -from nomad.archive_query import ArchiveQuery, ArchiveMetainfo -from tests.app.test_app import BlueprintClient - - -class TestArchiveMetainfo: - @pytest.fixture(scope='function') - def data(self): - return [ - {'calc_1': {'secA': {'propA': 1.0, 'propB': 'X'}}}, - {'calc_2': {'secA': {'propA': 2.0, 'propB': 'Y'}}}] - - def assert_metainfo(self, metainfo): - for calc in metainfo.calcs: - assert isinstance(calc.secA.propA, float) - assert calc.secA.m_to_dict() is not None - - def test_query_from_data(self, data): - metainfo = ArchiveMetainfo(archive_data=data) - self.assert_metainfo(metainfo) - - -class TestArchiveQuery: - @pytest.fixture(scope='function') - def api(self, client, monkeypatch): - monkeypatch.setattr('nomad.config.client.url', '') - return BlueprintClient(client, '/api') - - def test_query_from_json(self, api, published_wo_user_metadata, test_user_auth, monkeypatch): - monkeypatch.setattr('nomad.archive_query.requests', api) - q_params = {'pagination': {'order': 1, 'per_page': 5}} - q_schema = {'section_entry_info': '*'} - q = ArchiveQuery(q_params, query_schema=q_schema, authentication=test_user_auth) - q.query() - for calc in q.metainfo: - assert calc.section_entry_info.calc_id is not None - - def test_query_from_kwargs(self, api, published_wo_user_metadata, other_test_user_auth, monkeypatch): - monkeypatch.setattr('nomad.archive_query.requests', api) - q_schema = {'section_entry_info': '*'} - q = ArchiveQuery( - scroll=dict(scroll=True), pagination=dict(per_page=5), query_schema=q_schema, - authentication=other_test_user_auth) - q.query() - for calc in q.metainfo: - assert calc.section_entry_info.calc_id is not None diff --git a/tests/test_bravado.py b/tests/test_bravado.py new file mode 100644 index 0000000000000000000000000000000000000000..354bca0f0cd0318be179021f26f97be865896764 --- /dev/null +++ b/tests/test_bravado.py @@ -0,0 +1,44 @@ +# Copyright 2018 Markus Scheidgen +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an"AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import time + +from nomad.processing import SUCCESS +from nomad.datamodel import EntryMetadata + +from tests.test_files import example_file +from tests.test_search import create_entry + + +def test_get_upload_command(bravado, no_warn): + assert bravado.uploads.get_upload_command().response().result.upload_command is not None + + +def test_upload(bravado, proc_infra, no_warn): + with open(example_file, 'rb') as f: + upload = bravado.uploads.upload(file=f, name='test_upload').response().result + + while upload.tasks_running: + upload = bravado.uploads.get_upload(upload_id=upload.upload_id).response().result + time.sleep(0.1) + + assert upload.tasks_status == SUCCESS + + +def test_get_repo_calc(bravado, proc_infra, raw_files): + create_entry(EntryMetadata( + domain='dft', calc_id='0', upload_id='test_upload', published=True, with_embargo=False)) + repo = bravado.repo.get_repo_calc(upload_id='test_upload', calc_id='0').response().result + assert repo is not None + assert repo['calc_id'] is not None diff --git a/tests/test_client.py b/tests/test_client.py index 354bca0f0cd0318be179021f26f97be865896764..57f81388e8f99e92bbf8f9083f87adafdcdb371e 100644 --- a/tests/test_client.py +++ b/tests/test_client.py @@ -1,44 +1,53 @@ -# Copyright 2018 Markus Scheidgen -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an"AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. +from typing import List +import pytest -import time +from nomad.client import query_archive +from nomad.metainfo import MSection, SubSection +from nomad.datamodel.metainfo.public import section_run -from nomad.processing import SUCCESS -from nomad.datamodel import EntryMetadata +from tests.app.test_app import BlueprintClient -from tests.test_files import example_file -from tests.test_search import create_entry +# TODO with the existing published_wo_user_metadata fixture there is only one entry +# that does not allow to properly test pagination and scrolling -def test_get_upload_command(bravado, no_warn): - assert bravado.uploads.get_upload_command().response().result.upload_command is not None +@pytest.fixture(scope='function') +def api(client, monkeypatch): + monkeypatch.setattr('nomad.config.client.url', '') + api = BlueprintClient(client, '/api') + monkeypatch.setattr('nomad.client.requests', api) + return api -def test_upload(bravado, proc_infra, no_warn): - with open(example_file, 'rb') as f: - upload = bravado.uploads.upload(file=f, name='test_upload').response().result +def assert_results( + results: List[MSection], + sub_sections: List[SubSection] = None, + total=1): + assert len(results) == total + for result in results: + if sub_sections: + for sub_section in result.m_def.all_sub_sections.values(): + if sub_section in sub_sections: + assert len(result.m_get_sub_sections(sub_section)) > 0 + else: + assert len(result.m_get_sub_sections(sub_section)) == 0 - while upload.tasks_running: - upload = bravado.uploads.get_upload(upload_id=upload.upload_id).response().result - time.sleep(0.1) - assert upload.tasks_status == SUCCESS +def test_query(api, published_wo_user_metadata): + assert_results(query_archive()) -def test_get_repo_calc(bravado, proc_infra, raw_files): - create_entry(EntryMetadata( - domain='dft', calc_id='0', upload_id='test_upload', published=True, with_embargo=False)) - repo = bravado.repo.get_repo_calc(upload_id='test_upload', calc_id='0').response().result - assert repo is not None - assert repo['calc_id'] is not None +def test_query_schema(api, published_wo_user_metadata): + q_schema = {'section_run': {'section_system': '*'}} + assert_results( + query_archive(query_schema=q_schema), + sub_sections=[section_run.section_system]) + + +def test_query_scroll(api, published_wo_user_metadata): + assert_results(query_archive(scroll=True)) + + +def test_query_authentication(api, published, other_test_user_auth, test_user_auth): + assert_results(query_archive(authentication=other_test_user_auth)) + assert_results(query_archive(authentication=test_user_auth), total=0)