Commit d17184d9 authored by Lauri Himanen's avatar Lauri Himanen
Browse files

Merge branch 'v0.8.0' into encyclopedia

parents bd66a69b 60364e1e
.DS_Store
.env/
.pyenv/
.*env/
.pytest/
.python-version
.ipynb_checkpoints/
......
Subproject commit 75a663a7e1ba8ff13c49bcdc62bca8bdb2f2d108
Subproject commit f0350d256e6d15d9ab089f6bb0ce9c87b5c42243
This diff is collapsed.
This diff is collapsed.
......@@ -5,4 +5,4 @@ from nomad.cli.parse import parse
utils.configure_logging()
parse(sys.argv[1], '.')
parse(sys.argv[1])
......@@ -30,7 +30,7 @@ import urllib.parse
import metainfo
from nomad.files import UploadFiles, Restricted
from nomad.archive import query_archive
from nomad.archive import query_archive, ArchiveQueryError
from nomad import search, config
from nomad.app import common
......@@ -265,7 +265,7 @@ class ArchiveQueryResource(Resource):
search_request.owner('all')
apply_search_parameters(search_request, query)
search_request.include('calc_id', 'upload_id', 'with_embargo', 'parser_name')
search_request.include('calc_id', 'upload_id', 'with_embargo', 'published', 'parser_name')
try:
if scroll:
......@@ -289,8 +289,11 @@ class ArchiveQueryResource(Resource):
upload_files = None
current_upload_id = None
for entry in calcs:
with_embargo = entry['with_embargo']
upload_id = entry['upload_id']
calc_id = entry['calc_id']
if upload_files is None or current_upload_id != upload_id:
if upload_files is not None:
upload_files.close()
......@@ -302,7 +305,7 @@ class ArchiveQueryResource(Resource):
current_upload_id = upload_id
if entry['with_embargo']:
if with_embargo:
access = 'restricted'
else:
access = 'public'
......@@ -315,9 +318,11 @@ class ArchiveQueryResource(Resource):
'archive': query_archive(
archive, {calc_id: query_schema})[calc_id]
})
except ArchiveQueryError as e:
abort(400, str(e))
except Restricted:
# optimize and not access restricted for same upload again
# TODO in reality this should not happen
pass
if upload_files is not None:
......
......@@ -320,11 +320,15 @@ def create_authorization_predicate(upload_id, calc_id=None):
if g.user.user_id == upload.user_id:
return True
try:
calc = processing.Calc.get(calc_id)
except KeyError:
return False
return g.user.user_id in calc.metadata.get('shared_with', [])
# TODO I doubt if shared_with is actually working
if calc_id is not None:
try:
calc = processing.Calc.get(calc_id)
except KeyError:
return False
return g.user.user_id in calc.metadata.get('shared_with', [])
return False
except KeyError as e:
logger = utils.get_logger(__name__, upload_id=upload_id, calc_id=calc_id)
......
......@@ -541,14 +541,14 @@ class UploadCommandResource(Resource):
# Upload via streaming data tends to work much easier, e.g. no mime type issues, etc.
# It is also easier for the user to unterstand IMHO.
upload_command = 'curl %s -T <local_file>' % upload_url
upload_command = 'curl "%s" -T <local_file>' % upload_url
upload_command_form = 'curl %s -X PUT -F file=@<local_file>' % upload_url
upload_command_form = 'curl "%s" -X PUT -F file=@<local_file>' % upload_url
upload_command_with_name = 'curl "%s" -X PUT -T <local_file>' % upload_url_with_name
upload_progress_command = upload_command + ' | xargs echo'
upload_tar_command = 'tar -cf - <local_folder> | curl -# -H %s -T - | xargs echo' % upload_url
upload_tar_command = 'tar -cf - <local_folder> | curl -# -H "%s" -T - | xargs echo' % upload_url
return dict(
upload_url=upload_url,
......
# Copyright 2018 Markus Scheidgen, Alvin Noe Ladines
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an"AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from typing import Iterable, Any, Tuple, Dict, BinaryIO, Union, List, cast
from io import BytesIO, BufferedReader
from collections.abc import Mapping, Sequence
......@@ -29,6 +43,15 @@ def adjust_uuid_size(uuid):
class ArchiveError(Exception):
''' An error that indicates a broken archive. '''
pass
class ArchiveQueryError(Exception):
'''
An error that indicates that an archive query is either not valid or does not fit to
the queried archive.
'''
pass
......@@ -511,60 +534,88 @@ def read_archive(file_or_path: str, **kwargs) -> ArchiveReader:
return ArchiveReader(file_or_path, **kwargs)
def query_archive(f_or_archive_reader: Union[str, ArchiveReader, BytesIO], query_dict: dict):
__query_archive_key_pattern = re.compile(r'^([\s\w\-]+)(\[([-?0-9]*)(:([-?0-9]*))?\])?$')
def query_archive(f_or_archive_reader: Union[str, ArchiveReader, BytesIO], query_dict: dict, **kwargs):
def _to_son(data):
if isinstance(data, (ArchiveList, List)):
data = [_to_son(item) for item in data]
elif isinstance(data, ArchiveObject):
data = data.to_dict()
return data
def _load_data(query_dict: Dict[str, Any], archive_item: ArchiveObject, main_section: bool = False):
if not isinstance(query_dict, dict):
if isinstance(archive_item, ArchiveObject):
return archive_item.to_dict()
elif isinstance(archive_item, ArchiveList):
return list(archive_item)
else:
return archive_item
return _to_son(archive_item)
res = {}
result = {}
for key, val in query_dict.items():
key = key.strip()
# process array indices
match = re.match(r'(\w+)\[([-?0-9:]+)\]', key)
match = __query_archive_key_pattern.match(key)
index: Tuple[int, int] = None
if match:
archive_key = match.group(1)
index_str = match.group(2)
match = re.match(r'([-?0-9]*):([-?0-9]*)', index_str)
if match:
index = (
0 if match.group(1) == '' else int(match.group(1)),
None if match.group(2) == '' else int(match.group(2)))
key = match.group(1)
if match.group(2) is not None:
first_index, last_index = None, None
group = match.group(3)
first_index = None if group == '' else int(group)
if match.group(4) is not None:
group = match.group(5)
last_index = None if group == '' else int(group)
index = (0 if first_index is None else first_index, last_index)
else:
index = (first_index, first_index + 1) # one item
else:
index = int(index_str) # type: ignore
key = archive_key
index = None
else:
archive_key = key
index = None
raise ArchiveQueryError('invalid key format: %s' % key)
# support for shorter uuids
archive_key = key.split('[')[0]
if main_section:
archive_key = adjust_uuid_size(key)
else:
archive_key = key
try:
archive_child = archive_item[archive_key]
is_list = isinstance(archive_child, (ArchiveList, list))
if index is None and is_list:
index = (0, None)
elif index is not None and not is_list:
raise ArchiveQueryError('cannot use list key on none list %s' % key)
if index is None:
res[key] = _load_data(val, archive_item[archive_key])
elif isinstance(index, int):
res[key] = _load_data(val, archive_item[archive_key])[index]
pass
else:
res[key] = _load_data(val, archive_item[archive_key])[index[0]: index[1]]
archive_child = archive_child[index[0]: index[1]]
except Exception:
if isinstance(archive_child, (ArchiveList, list)):
result[key] = [_load_data(val, item) for item in archive_child]
else:
result[key] = _load_data(val, archive_child)
except (KeyError, IndexError):
continue
return res
return result
if isinstance(f_or_archive_reader, ArchiveReader):
return _load_data(query_dict, f_or_archive_reader, True)
elif isinstance(f_or_archive_reader, (BytesIO, str)):
with ArchiveReader(f_or_archive_reader) as archive:
with ArchiveReader(f_or_archive_reader, **kwargs) as archive:
return _load_data(query_dict, archive, True)
else:
......
......@@ -38,6 +38,7 @@ lazy_import.lazy_module('nomad.metainfo')
lazy_import.lazy_module('nomad.processing')
lazy_import.lazy_module('nomad.client')
lazy_import.lazy_module('nomadcore')
lazy_import.lazy_module('nomadcore.simple_parser')
from . import dev, parse, admin, client # noqa
from .cli import cli # noqa
......
# Copyright 2018 Markus Scheidgen
# Copyright 2018 Markus Scheidgen, Alvin Noe Ladines
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
......
......@@ -36,14 +36,16 @@ and a query schema similar to the archive json format can be provided to filter
'''
from typing import Dict, Union, Any, List
from collections import Sequence
import collections.abc
import requests
from urllib.parse import urlparse
from bravado import requests_client as bravado_requests_client
import time
from keycloak import KeycloakOpenID
from io import StringIO
from nomad import config
from nomad import metainfo as mi
from nomad.datamodel import EntryArchive
# TODO this import is necessary to load all metainfo defintions that the parsers are using
......@@ -77,26 +79,61 @@ class KeycloakAuthenticator(bravado_requests_client.Authenticator):
return dict(Authorization='Bearer %s' % self.token['access_token'])
class ArchiveQuery(Sequence):
class ApiStatistics(mi.MSection):
nentries = mi.Quantity(
type=int, default=0,
description='Number queries entries')
last_response_nentries = mi.Quantity(
type=int, default=0,
description='Number of entries loaded in the last api call')
last_response_data_size = mi.Quantity(
type=int, unit=mi.units.bytes, default=0,
description='Bytes loaded in the last api call')
loaded_data_size = mi.Quantity(
type=int, unit=mi.units.bytes, default=0,
description='Bytes loaded from this query')
loaded_nentries = mi.Quantity(
type=int, default=0,
description='Number of downloaded entries')
napi_calls = mi.Quantity(
type=int, default=0,
description='Number of made api calls')
def __repr__(self):
out = StringIO()
for quantity in self.m_def.all_quantities.values():
out.write('%s: %s\n' % (quantity.description, self.m_get(quantity)))
return out.getvalue()
class ArchiveQuery(collections.abc.Sequence):
def __init__(
self,
query: dict = None, query_schema: dict = None,
query: dict = None, required: dict = None,
url: str = None, username: str = None, password: str = None,
scroll: bool = False,
authentication: Union[Dict[str, str], KeycloakAuthenticator] = None, **kwargs):
scroll: bool = False, per_page: int = 10, max: int = None,
authentication: Union[Dict[str, str], KeycloakAuthenticator] = None):
self.scroll = scroll
self._scroll_id = None
self._page = 1
self.page = 1
self.per_page = per_page
self.max = max
self.query: Dict[str, Any] = {
'query': {}
}
if query is not None:
self.query['query'].update(query)
if query_schema is not None:
self.query['query_schema'] = query_schema
self.query['query'].update(kwargs)
if required is not None:
self.query['query_schema'] = required
self.password = password
self.username = username
......@@ -104,7 +141,9 @@ class ArchiveQuery(Sequence):
self._authentication = authentication
self._total = -1
self._capped_total = -1
self._results: List[dict] = []
self._statistics = ApiStatistics()
@property
def authentication(self):
......@@ -133,7 +172,8 @@ class ArchiveQuery(Sequence):
scroll_config['scroll_id'] = self._scroll_id
else:
self.query.setdefault('pagination', {})['page'] = self._page
self.query.setdefault('pagination', {}).update(
page=self.page, per_page=self.per_page)
response = requests.post(url, headers=self.authentication, json=self.query)
if response.status_code != 200:
......@@ -151,7 +191,12 @@ class ArchiveQuery(Sequence):
else:
pagination = data['pagination']
self._total = pagination['total']
self._page = pagination['page'] + 1
self.page = pagination['page'] + 1
if self.max is not None:
self._capped_total = min(self.max, self._total)
else:
self._capped_total = self._total
results = data.get('results', [])
......@@ -160,21 +205,56 @@ class ArchiveQuery(Sequence):
self._results.append(archive)
try:
data_size = len(response.content)
self._statistics.last_response_data_size = data_size
self._statistics.loaded_data_size += data_size
self._statistics.nentries = self._total
self._statistics.last_response_nentries = len(results)
self._statistics.loaded_nentries = len(self._results)
self._statistics.napi_calls += 1
except Exception:
# fails in test due to mocked requests library
pass
def __repr__(self):
if self._total == -1:
self.call_api()
return str(self._statistics)
def __getitem__(self, key):
if isinstance(key, slice):
return [self[i] for i in range(*key.indices(len(self)))]
if key >= self.__len__():
raise IndexError()
while len(self._results) < key:
while len(self._results) < key + 1:
self.call_api()
return self._results[key]
def __len__(self): # pylint: disable=invalid-length-returned
if self._capped_total == -1:
self.call_api()
return self._capped_total
@property
def total(self):
if self._total == -1:
self.call_api()
return self._total
@property
def statistics(self):
if self._total == -1:
self.call_api()
return self._statistics
def query_archive(*args, **kwargs):
return ArchiveQuery(*args, **kwargs)
......
......@@ -442,7 +442,7 @@ class EntryMetadata(metainfo.MSection):
a_search=Search(many_and='append', derived=_only_atoms))
n_atoms = metainfo.Quantity(
type=int, categories=[DomainMetadata],
type=int, categories=[DomainMetadata], default=0,
description='The number of atoms in the entry\'s material',
a_search=Search())
......
......@@ -189,20 +189,20 @@ class DFTMetadata(MSection):
a_search=Search())
n_geometries = Quantity(
type=int, description='Number of unique geometries.',
type=int, default=0, description='Number of unique geometries.',
a_search=Search(metric_name='geometries', metric='sum'))
n_calculations = Quantity(
type=int,
type=int, default=0,
description='Number of single configuration calculation sections',
a_search=Search(metric_name='calculations', metric='sum'))
n_total_energies = Quantity(
type=int, description='Number of total energy calculations',
type=int, default=0, description='Number of total energy calculations',
a_search=Search(metric_name='total_energies', metric='sum'))
n_quantities = Quantity(
type=int, description='Number of metainfo quantities parsed from the entry.',
type=int, default=0, description='Number of metainfo quantities parsed from the entry.',
a_search=Search(metric='sum', metric_name='quantities'))
quantities = Quantity(
......
......@@ -110,7 +110,7 @@ class OptimadeEntry(MSection):
''')
nelements = Quantity(
type=int,
type=int, default=0,
links=optimade_links('h.6.2.2'),
a_search=Search(),
a_optimade=Optimade(query=True, entry=True),
......@@ -200,7 +200,7 @@ class OptimadeEntry(MSection):
''')
nsites = Quantity(
type=int,
type=int, default=0,
links=optimade_links('h.6.2.11'),
a_search=Search(),
a_optimade=Optimade(query=True, entry=True), description='''
......
......@@ -1812,15 +1812,6 @@ class Quantity(Property):
if self.synonym_for is not None:
self._synonym = self.m_parent.all_quantities[self.synonym_for]
if self.type == int and self.is_scalar and self.default is None:
self.default = 0
if self.type == float and self.is_scalar and self.default is None:
self.default = 0.0
if self.type == bool and self.is_scalar and self.default is None:
self.default = False
# replace the quantity implementation with an optimized version for the most
# primitive quantities if applicable
is_primitive = not self.synonym_for and not self.derived
......
This diff is collapsed.
This diff is collapsed.
......@@ -602,16 +602,16 @@ class TestM1:
assert copy.systems[0].m_parent_index == 0
assert copy.systems[0].m_parent_sub_section is run.systems[0].m_parent_sub_section
def test_default_defaults(self):
def test_not_default_defaults(self):
class TestSection(MSection):
int_quantity = Quantity(type=int)
float_quantity = Quantity(type=float)
bool_quantity = Quantity(type=bool)
section = TestSection()
assert section.int_quantity == 0
assert section.float_quantity == 0.0
assert section.bool_quantity is False
assert section.int_quantity is None
assert section.float_quantity is None
assert section.bool_quantity is None
class TestDatatypes:
......
# Copyright 2018 Markus Scheidgen, Alvin Noe Ladines
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an"AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from typing import Dict, Any
import pytest
import msgpack
from io import BytesIO
import os.path
from nomad import utils, config
from nomad.archive import TOCPacker, write_archive, read_archive, ArchiveReader, query_archive
from nomad.archive import TOCPacker, write_archive, read_archive, ArchiveReader, ArchiveQueryError, query_archive
def create_example_uuid(index: int = 0):
......@@ -198,35 +212,58 @@ def test_read_archive_multi(example_uuid, example_entry, use_blocked_toc):
reader.get(create_example_uuid(i)) is not None
def test_query():
payload = {
'c1': {
's1': {
'ss1': [{'p1': 1.0, 'p2': 'x'}, {'p1': 1.5, 'p2': 'y'}]
},
's2': {'p1': ['a', 'b']}
test_query_example: Dict[Any, Any] = {
'c1': {
's1': {
'ss1': [{'p1': 1.0, 'p2': 'x'}, {'p1': 1.5, 'p2': 'y'}]
},
'c2': {
's1': {'ss1': [{'p1': 2.0}]},
's2': {'p1': ['c', 'd']}
}
's2': [{'p1': ['a', 'b'], 'p2': True}]
},
'c2': {
's1': {
'ss1': [{'p1': 2.0}]
},
's2': [{'p1': ['c', 'd']}]