Commit 64a67b2e authored by Alvin Noe Ladines's avatar Alvin Noe Ladines
Browse files

Fixed archive_query after refactoring

parent 21616509
Pipeline #69804 passed with stages
in 20 minutes and 55 seconds
......@@ -126,7 +126,6 @@ def add_search_parameters(request_parser):
action=quantity.argparse_action if quantity.multi else None)
def apply_search_parameters(search_request: search.SearchRequest, args: Dict[str, Any]):
Help that adds query relevant request args to the given SearchRequest.
......@@ -20,7 +20,7 @@ json data or as a filename of an existing msgpack database. The metainfo
can then queried by providing a schema.
.. code-block: python
am = ArchiveMetainfo("db.msg")
am = ArchiveMetainfo(archive_data)
for calc in am.calcs:
......@@ -35,16 +35,13 @@ and a query schema similar to the archive json format can be provided to filter
import numpy as np
from io import BytesIO
import json
import requests
import os.path
from urllib.parse import urlparse
from typing import Dict, List, Any, Union
from typing import Dict, List, Any
from nomad.metainfo import MSection, Quantity, SubSection
from nomad.metainfo.metainfo import MObjectMeta
# from nomad.archive import ArchiveFileDB
from nomad import config as nomad_config
from nomad.cli.client.client import KeycloakAuthenticator
......@@ -53,54 +50,25 @@ class ArchiveMetainfo:
Converts archive data in json format to the new nomad metainfo model
archive_data: the archive data in json format or msgdb filename
archive_schema: dict with the desired quantities as keys and None as placeholder
for the values which are queried from the data
archive_data: the archive data in json format
def __init__(self, archive_data, archive_schema=None):
def __init__(self, archive_data: List[Dict[str, Any]]):
self._archive_data = archive_data
self._archive_schema = archive_schema
self.metainfo = None
self._metacls = None
self._calcs = {}
self._calc_ids = []
self._archive_db = None
self._calcs: Dict[str, MSection] = {}
self._calc_ids: List[str] = []
self._base_metacls = None
self._base_metainfo = None
self._base_data = None
self._prefix = 'calc'
def _load_archive_db(self):
if isinstance(self._archive_data, str):
self._archive_db = ArchiveFileDB(self._archive_data)
db = ArchiveFileDB(BytesIO(), mode='wb')
if isinstance(self._archive_data, dict):
for calc_id, run in self._archive_data.items():
db.add_data({calc_id: run})
elif isinstance(self._archive_data, list):
for entry in self._archive_data:
if not entry:
self._archive_db = db
def archive_schema(self):
return json.loads(json.dumps(self._archive_schema))
def _init_calcs(self):
for i in range(len(self.calc_ids)):
calc_id = self.calc_ids[i]
if self._archive_schema is None:
self._calcs[calc_id] = self.base_metainfo
data = self._archive_db.query({calc_id: self.archive_schema})[calc_id]
self._calcs[calc_id] = self.base_metacls.m_from_dict(data)
self._calcs[calc_id].archive_db = self._archive_db
for calc in self._archive_data:
calc_id = list(calc.keys())[0]
data = calc[calc_id]
self._calcs[calc_id] = self._build_meta_cls(data, calc_id).m_from_dict(data)
def __getitem__(self, key):
if isinstance(key, str):
......@@ -135,52 +103,8 @@ class ArchiveMetainfo:
raise StopIteration
calc = list(self._calcs.values())[self._n]
calc.calc_id = list(self._calcs.keys())[self._n]
calc.archive_db = self._archive_db
return calc
def to_nested_dict(path_str: Union[str, List]) -> Dict[str, Any]:
if isinstance(path_str, str):
path_str = path_str.split('/')
if len(path_str) == 1:
return {path_str[0]: '*'}
pdict = {}
pdict[path_str[0]] = ArchiveMetainfo.to_nested_dict(path_str[1:])
return pdict
def append_data(entry: Dict[str, Any], val: Any) -> Dict[str, Any]:
for k, v in entry.items():
if not isinstance(v, dict):
entry[k] = val
entry[k] = ArchiveMetainfo.append_data(v, val)
return entry
def get_path_from_section(content):
path = content.m_path()
path = path.split('/')
s = ''
for p in path:
p = int(p)
s += '[%s]' % p
except ValueError:
s += '/%s' % p
return s[1:]
def get_data_from_db(content, qschema):
db = content.m_root().archive_db
calc_id = content.m_root().calc_id
root = calc_id + ArchiveMetainfo.get_path_from_section(content)
qs = ArchiveMetainfo.append_data(ArchiveMetainfo.to_nested_dict(root), qschema)
data = db.query(qs)
return data
def calcs(self):
......@@ -191,39 +115,13 @@ class ArchiveMetainfo:
for calc_id, calc in self._calcs.items():
calc.calc_id = calc_id
calc.archive_db = self._archive_db
yield calc
def calc_ids(self):
if not self._calc_ids:
self._calc_ids = [s.strip() for s in self._archive_db.ids]
return self._calc_ids
def _nullify_metainfo(self, metainfo):
if hasattr(metainfo, 'm_contents'):
for content in metainfo.m_contents():
return metainfo
def _nullify_data(self, data):
if not data:
elif isinstance(data, dict):
for key, val in data.items():
data[key] = self._nullify_data(val)
elif isinstance(data, list) and isinstance(data[0], dict):
for i in range(len(data)):
data[i] = self._nullify_data(data[i])
data = None
return data
def base_data(self):
if self._base_data is None:
calc_id = self.calc_ids[0]
self._base_data = self._archive_db.query({calc_id: self.archive_schema})[calc_id]
calc_id = self._calc_ids[0]
self._base_data = self._archive_data[calc_id]
return self._base_data
......@@ -236,23 +134,12 @@ class ArchiveMetainfo:
self._base_metacls = self._build_meta_cls(self.base_data, name)
return self._base_metacls
def base_metainfo(self):
The base metainfo to enable auto completion for each calc
if self._base_metainfo is None:
metacls = self.base_metacls
base_data = self._nullify_data(self.base_data)
self._base_metainfo = metacls.m_from_dict(base_data)
return self._base_metainfo
def get_dtype(self, data):
def _get_dtype(self, data):
if isinstance(data, np.ndarray):
if len(data) == 0:
dtype = int
dtype = self.get_dtype(data[0])
dtype = self._get_dtype(data[0])
dtype = type(data)
return dtype
......@@ -265,7 +152,7 @@ class ArchiveMetainfo:
if isinstance(content, list):
content = np.array(content)
dtype = self.get_dtype(content)
dtype = self._get_dtype(content)
if isinstance(content, np.ndarray):
dtype = np.dtype(dtype)
shape = np.shape(content)
......@@ -274,17 +161,10 @@ class ArchiveMetainfo:
return Quantity(type=dtype)
def _create_section(self, name, contents):
contents['get'] = ArchiveMetainfo.get_data_from_db
section = type(name.title(), (MSection,), contents)
section.__call__ = ArchiveMetainfo.get_data_from_db
return section
def _build_meta_cls(self, data=None, name=None, return_section=True):
if name is None:
data = self._archive_data
name = self._prefix
if data is None:
def _build_meta_cls(self, data, name, return_section=True):
if isinstance(data, dict):
contents = {}
for key, val in data.items():
......@@ -314,16 +194,9 @@ class ArchiveMetainfo:
return self._to_meta_obj(data)
def to_metainfo(self, data=None):
if data is None:
data = self._archive_data
self.metainfo = self.base_metacls.m_from_dict(data)
class ArchiveQuery:
def __init__(self, *args, **kwargs):
self._archive_path = 'archive'
self._query_path = 'query'
self.archive_data = []
self._scroll_id = None
self._page = None
......@@ -332,9 +205,6 @@ class ArchiveQuery:
self._query_params = args[0]
if kwargs:
self._archive_schema = self._query_params.pop('archive_data', None)
if not isinstance(self._archive_schema, list):
self._archive_schema = [self._archive_schema]
self._max_n_pages = self._query_params.pop('max_n_pages', 100000)
self._authentication = self._query_params.pop('authentication', None)
self._url = self._query_params.pop('url', None)
......@@ -384,17 +254,14 @@ class ArchiveQuery:
return self._authentication
def _api_query(self):
url = os.path.join(nomad_config.client.url, self._archive_path, self._query_path)
data = self._query_params
data['results'] = self._archive_schema
url = os.path.join(nomad_config.client.url, 'archive', 'query')
if self._page is not None:
# increment the page number
self._set_value('page', self._page + 1, data)
if self._scroll_id is not None:
self._set_value('scroll_id', self._scroll_id, data)
self._query_params['scroll']['scroll_id'] = self._scroll_id
elif self._page is not None:
self._query_params['pagination']['page'] = self._page + 1
response =, headers=self._get_authentication(), json=data)
response =, headers=self._get_authentication(), json=self._query_params)
if response.status_code != 200:
raise response.raise_for_status()
......@@ -403,12 +270,8 @@ class ArchiveQuery:
data = data()
results = data.get('results', [])
scroll = data.get('Scroll', None)
if scroll:
self._scroll_id = scroll.get('scroll_id', None)
pagination = data.get('Pagination', None)
if pagination:
self._page = pagination.get('page', None)
self._scroll_id = data.get('scroll', {}).get('scroll_id', None)
self._page = data.get('pagination', {}).get('page', None)
return results
......@@ -426,8 +289,10 @@ class ArchiveQuery:
def query(self):
if self.archive_data:
self.metainfo = ArchiveMetainfo(archive_data=self.archive_data, archive_schema='*')
self.metainfo = ArchiveMetainfo(archive_data=self.archive_data)
# def query()...
\ No newline at end of file
def query(*args, **kwargs):
archive_query_obj = ArchiveQuery(*args, **kwargs)
return archive_query_obj.metainfo
import pytest
import os
from nomad.archive import ArchiveFileDB
from nomad.archive_query import ArchiveQuery, ArchiveMetainfo
from import BlueprintClient
def example_msgdb():
def create_msgdb(payload):
filename = 'archive_test.msg'
msgdbo = ArchiveFileDB(filename, mode='w', entry_toc_depth=1)
msgdbo = ArchiveFileDB(filename, mode='r')
return msgdbo
filename = 'archive_test.msg'
yield create_msgdb
class TestArchiveMetainfo:
def data(self):
......@@ -30,13 +13,8 @@ class TestArchiveMetainfo:
def assert_metainfo(self, metainfo):
for calc in metainfo.calcs:
assert calc.secA({'propA': '*'}) is not None
assert calc({'secA': {'propA': '*', 'propB': '*'}}) is not None
def test_query_from_file(self, data, example_msgdb):
_ = example_msgdb(data)
metainfo = ArchiveMetainfo(archive_data='archive_test.msg', archive_schema={'secA': '*'})
assert isinstance(calc.secA.propA, float)
assert calc.secA.m_to_dict() is not None
def test_query_from_data(self, data):
metainfo = ArchiveMetainfo(archive_data=data)
......@@ -51,9 +29,9 @@ class TestArchiveQuery:
def test_query_from_json(self, api, published_wo_user_metadata, test_user_auth, monkeypatch):
monkeypatch.setattr('nomad.archive_query.requests', api)
q_params = {'Pagination': {'order': 1, 'per_page': 5}}
q_params = {'pagination': {'order': 1, 'per_page': 5}}
q_schema = {'section_entry_info': '*'}
q = ArchiveQuery(q_params, archive_data=q_schema, authentication=test_user_auth)
q = ArchiveQuery(q_params, query_schema=q_schema, authentication=test_user_auth)
for calc in q.metainfo:
assert calc.section_entry_info.calc_id is not None
......@@ -61,7 +39,9 @@ class TestArchiveQuery:
def test_query_from_kwargs(self, api, published_wo_user_metadata, other_test_user_auth, monkeypatch):
monkeypatch.setattr('nomad.archive_query.requests', api)
q_schema = {'section_entry_info': '*'}
q = ArchiveQuery(order=1, per_page=5, scroll=True, archive_data=q_schema, authentication=other_test_user_auth)
q = ArchiveQuery(
scroll=dict(scroll=True), pagination=dict(per_page=5), query_schema=q_schema,
for calc in q.metainfo:
assert calc.section_entry_info.calc_id is not None
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment