Commit dfe0f7a4 authored by Markus Scheidgen's avatar Markus Scheidgen
Browse files

Client library uses new metainfo.

parent abe20c0c
%% Cell type:code id: tags:
``` python
from nomad.client import query_archive
import matplotlib.pyplot as plt
```
%% Cell type:code id: tags:
``` python
run = query_archive()[1]
dos = run.section_single_configuration_calculation[0].section_dos[0]
```
%% Cell type:code id: tags:
``` python
plt.plot(dos.dos_energies.m, dos.dos_values[0])
```
%% Output
[<matplotlib.lines.Line2D at 0x12f316b70>]
%% Cell type:code id: tags:
``` python
run.section_system[0].chemical_composition_bulk_reduced
```
%% Output
'O3SrTi'
......@@ -17,7 +17,7 @@ The archive API of the nomad@FAIRDI APIs. This API is about serving processed
(parsed and normalized) calculation data in nomad's *meta-info* format.
'''
from typing import Dict, Any
from typing import Dict, Any, List
from io import BytesIO
import os.path
from flask import send_file, request, g
......@@ -29,9 +29,8 @@ import urllib.parse
import metainfo
from nomad.files import UploadFiles, Restricted
from nomad import search, config
from nomad import search, config, archive
from nomad.app import common
from nomad.archive import query_archive
from .auth import authenticate, create_authorization_predicate
from .api import api
......@@ -265,7 +264,7 @@ class ArchiveQueryResource(Resource):
search_request.owner('all')
apply_search_parameters(search_request, query)
search_request.include('calc_id', 'upload_id', 'with_embargo')
search_request.include('calc_id', 'upload_id', 'with_embargo', 'parser_name')
try:
if scroll:
......@@ -286,29 +285,42 @@ class ArchiveQueryResource(Resource):
data = []
calcs = results['results']
archive_files = None
archive_readers: List[archive.ArchiveReader] = []
current_upload_id = None
for entry in calcs:
upload_id = entry['upload_id']
calc_id = entry['calc_id']
if archive_files is None or current_upload_id != upload_id:
if current_upload_id is None or current_upload_id != upload_id:
upload_files = UploadFiles.get(upload_id, create_authorization_predicate(upload_id))
if upload_files is None:
return []
archive_files = upload_files.archive_file_msgs()
for archive_reader in archive_readers:
if archive_reader is not None:
archive_reader.close()
archive_readers = [
archive.ArchiveReader(f) if f is not None else None
for f in upload_files.archive_file_msgs()]
current_upload_id = upload_id
if entry['with_embargo']:
archive_file = archive_files[1]
archive_reader = archive_readers[1]
else:
archive_file = archive_files[0]
archive_reader = archive_readers[0]
if archive_file is None:
if archive_reader is None:
continue
data.append(query_archive(archive_file, {calc_id: query_schema}))
data.append(
{
'calc_id': calc_id,
'parser_name': entry['parser_name'],
'archive': archive.query_archive(
archive_reader, {calc_id: query_schema})[calc_id]
})
# assign archive data to results
results['results'] = data
......
......@@ -480,7 +480,7 @@ def read_archive(file_or_path: str, **kwargs) -> ArchiveReader:
return ArchiveReader(file_or_path, **kwargs)
def query_archive(f, query_dict: dict):
def query_archive(f_or_archive_reader: Union[ArchiveReader, BytesIO], query_dict: dict):
def _load_data(query_dict: Dict[str, Any], archive_item: ArchiveObject, main_section: bool = False):
if not isinstance(query_dict, dict):
......@@ -529,8 +529,15 @@ def query_archive(f, query_dict: dict):
return res
with ArchiveReader(f) as archive:
return _load_data(query_dict, archive, True)
if isinstance(f_or_archive_reader, ArchiveReader):
return _load_data(query_dict, f_or_archive_reader, True)
elif isinstance(f_or_archive_reader, BytesIO):
with ArchiveReader(f_or_archive_reader) as archive:
return _load_data(query_dict, archive, True)
else:
raise TypeError('%s is neither a file-like nor ArchiveReader' % f_or_archive_reader)
if __name__ == '__main__':
......
# Copyright 2019 Alvin Noe Ladines, Markus Scheidgen
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an"AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
'''
Contains interfaces to the archive metainfo and query.
In module ``ArchiveMetainfo``, the data is provided either from raw
json data or as a filename of an existing msgpack database. The metainfo
can then queried by providing a schema.
.. code-block: python
am = ArchiveMetainfo(archive_data)
for calc in am.calcs:
c.section_run.section_single_configuration_calculation[0]({'energy_total':None})
The ArchiveQuery enables a query interface to the archive data. A dict of query parameters
and a query schema similar to the archive json format can be provided to filter archive.
.. code-block: python
q = ArchiveQuery({'atoms':'Si'})
metainfo = q.query()
for c in metainfo.calcs:
print(c.section_run.section_single_configuration_calculation[0]({'energy_total':'*'}))
'''
import numpy as np
import requests
import os.path
from urllib.parse import urlparse
from typing import Dict, List, Any
from nomad.metainfo import MSection, Quantity, SubSection
from nomad.metainfo.metainfo import MObjectMeta
from nomad import config as nomad_config
from nomad.cli.client.client import KeycloakAuthenticator
class ArchiveMetainfo:
'''
Converts archive data in json format to the new nomad metainfo model
Arguments:
archive_data: the archive data in json format
'''
def __init__(self, archive_data: List[Dict[str, Any]]):
self._archive_data = archive_data
self.metainfo = None
self._metacls = None
self._calcs: Dict[str, MSection] = {}
self._calc_ids: List[str] = []
self._base_metacls = None
self._base_data = None
self._prefix = 'calc'
self._init_calcs()
def _init_calcs(self):
for calc in self._archive_data:
calc_id = list(calc.keys())[0]
data = calc[calc_id]
self._calc_ids.append(calc_id)
self._calcs[calc_id] = self._build_meta_cls(data, calc_id).m_from_dict(data)
def __getitem__(self, key):
if isinstance(key, str):
calc = self._calcs.get(key, None)
if calc:
calc.calc_id = key
return calc
elif isinstance(key, int):
calc_id = self._calc_ids[key]
calc = self._calcs[calc_id]
calc.calc_id = calc_id
return calc
else:
calc_ids = self._calc_ids[key]
calcs = []
for calc_id in calc_ids:
calc = self._calcs[calc_id]
calc.calc_id = calc_id
calcs.append(calc)
return calcs
def __len__(self):
return len(self._calcs)
def __iter__(self):
self._n = -1
return self
def __next__(self):
self._n += 1
if self._n >= len(self):
raise StopIteration
calc = list(self._calcs.values())[self._n]
calc.calc_id = list(self._calcs.keys())[self._n]
return calc
@property
def calcs(self):
'''
Calculations in metainfo form which can be actively queried by using the get
functionality and providing a schema
'''
if not self._calcs:
self._init_calcs()
for calc_id, calc in self._calcs.items():
calc.calc_id = calc_id
yield calc
@property
def base_data(self):
if self._base_data is None:
calc_id = self._calc_ids[0]
self._base_data = self._archive_data[calc_id]
return self._base_data
@property
def base_metacls(self):
'''
The base metaclass to apply a calculation
'''
if self._base_metacls is None:
name = self._prefix
self._base_metacls = self._build_meta_cls(self.base_data, name)
return self._base_metacls
def _get_dtype(self, data):
if isinstance(data, np.ndarray):
if len(data) == 0:
dtype = int
else:
dtype = self._get_dtype(data[0])
else:
dtype = type(data)
return dtype
def _to_meta_obj(self, content):
if isinstance(content, Quantity):
return content
if isinstance(content, MObjectMeta):
return SubSection(sub_section=content, repeats=content.repeats)
else:
if isinstance(content, list):
content = np.array(content)
dtype = self._get_dtype(content)
if isinstance(content, np.ndarray):
dtype = np.dtype(dtype)
shape = np.shape(content)
return Quantity(type=dtype, shape=shape)
else:
return Quantity(type=dtype)
def _create_section(self, name, contents):
section = type(name.title(), (MSection,), contents)
return section
def _build_meta_cls(self, data, name, return_section=True):
if isinstance(data, dict):
contents = {}
for key, val in data.items():
content = self._build_meta_cls(val, key, True)
content = self._to_meta_obj(content)
contents[key] = content
if return_section:
section = self._create_section(name, contents)
section.repeats = False
return section
else:
return contents
elif isinstance(data, list):
if not data:
return self._to_meta_obj(data)
if not isinstance(data[0], dict):
return self._to_meta_obj(data)
contents = {}
for i in range(len(data)):
content = self._build_meta_cls(data[i], name, False)
contents.update(content)
section = self._create_section(name, contents)
section.repeats = True
return section
else:
return self._to_meta_obj(data)
class ArchiveQuery:
def __init__(self, *args, **kwargs):
self.archive_data = []
self._scroll_id = None
self._page = None
self._query_params = {}
if args:
self._query_params = args[0]
if kwargs:
self._query_params.update(kwargs)
self._max_n_pages = self._query_params.pop('max_n_pages', 100000)
self._authentication = self._query_params.pop('authentication', None)
self._url = self._query_params.pop('url', None)
self._user = self._query_params.pop('user', None)
self._password = self._query_params.pop('password', None)
if self._url:
nomad_config.client.url = self._url
if self._user:
nomad_config.client.user = self._user
if self._password:
nomad_config.client.password = self._password
def _get_value(self, name, in_dict):
if not isinstance(in_dict, dict):
return
for key, val in in_dict.items():
if key == name:
res = val
else:
res = self._get_value(name, val)
return res
def _set_value(self, name, value, in_dict):
if not isinstance(in_dict, dict):
return
for key, val in in_dict.items():
if key == name:
in_dict[name] = value
return
else:
self._set_value(name, value, val)
in_dict[name] = value
def _get_authentication(self):
if self._authentication is None:
host = urlparse(nomad_config.client.url).netloc.split(':')[0]
self._authentication = KeycloakAuthenticator(
host=host,
user=nomad_config.client.user,
password=nomad_config.client.password,
server_url=nomad_config.keycloak.server_external_url,
realm_name=nomad_config.keycloak.realm_name,
client_id=nomad_config.keycloak.public_client_id)
if isinstance(self._authentication, KeycloakAuthenticator):
return self._authentication.apply()
else:
return self._authentication
def _api_query(self):
url = os.path.join(nomad_config.client.url, 'archive', 'query')
if self._scroll_id is not None:
self._query_params['scroll']['scroll_id'] = self._scroll_id
elif self._page is not None:
self._query_params['pagination']['page'] = self._page + 1
response = requests.post(url, headers=self._get_authentication(), json=self._query_params)
if response.status_code != 200:
raise response.raise_for_status()
data = response.json
if not isinstance(data, dict):
data = data()
results = data.get('results', [])
self._scroll_id = data.get('scroll', {}).get('scroll_id', None)
self._page = data.get('pagination', {}).get('page', None)
return results
def _get_archive_data(self):
n_page = 0
while True:
results = self._api_query()
self.archive_data += results
n_page += 1
if n_page >= self._max_n_pages:
break
if len(results) == 0:
break
def query(self):
self._get_archive_data()
if self.archive_data:
self.metainfo = ArchiveMetainfo(archive_data=self.archive_data)
def query(*args, **kwargs):
archive_query_obj = ArchiveQuery(*args, **kwargs)
archive_query_obj.query()
return archive_query_obj.metainfo
# Copyright 2019 Alvin Noe Ladines, Markus Scheidgen
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an"AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
'''
Contains the Python client side library to access the NOMAD archive.
# TODO
In module ``ArchiveMetainfo``, the data is provided either from raw
json data or as a filename of an existing msgpack database. The metainfo
can then queried by providing a schema.
.. code-block: python
am = ArchiveMetainfo(archive_data)
for calc in am.calcs:
c.section_run.section_single_configuration_calculation[0]({'energy_total':None})
The ArchiveQuery enables a query interface to the archive data. A dict of query parameters
and a query schema similar to the archive json format can be provided to filter archive.
.. code-block: python
q = ArchiveQuery({'atoms':'Si'})
metainfo = q.query()
for c in metainfo.calcs:
print(c.section_run.section_single_configuration_calculation[0]({'energy_total':'*'}))
'''
from typing import Dict, Union, Any, List
from collections import Sequence
import requests
from urllib.parse import urlparse
from nomad import config, metainfo, parsing
from nomad.cli.client.client import KeycloakAuthenticator
class ArchiveQuery(Sequence):
def __init__(
self,
query: dict = None, query_schema: dict = None,
url: str = None, username: str = None, password: str = None,
scroll: bool = False,
authentication: Union[Dict[str, str], KeycloakAuthenticator] = None, **kwargs):
self.scroll = scroll
self._scroll_id = None
self._page = 1
self.query: Dict[str, Any] = {
'query': {}
}
if query is not None:
self.query['query'].update(query)
if query_schema is not None:
self.query['query_schema'] = query_schema
self.query['query'].update(kwargs)
self.password = password
self.username = username
self.url = config.client.url if url is None else url
self._authentication = authentication
self._total = -1
self._results: List[dict] = []
@property
def authentication(self):
if self._authentication is None and self.username is not None and self.password is not None:
host = urlparse(self.url).netloc.split(':')[0]
self._authentication = KeycloakAuthenticator(
host=host,
user=self.username,