Commit 76eeefe3 authored by Markus Scheidgen's avatar Markus Scheidgen
Browse files

Minor metainfo serialization fix.

parent eacff261
Pipeline #71383 passed with stages
in 30 minutes and 40 seconds
Subproject commit 660338bb96b347b9118ea9942d8f76d6cfb1aa0e
Subproject commit 1fe73174d90df73f237cbe14a6456e2501acad52
......@@ -37,25 +37,9 @@ from .docs import blueprint as docs_blueprint
from . import common
__new_line = '\n'.encode()
# replace the json implementation of flask_restplus
def output_json(data, code, headers=None):
def default(data):
if isinstance(data, collections.OrderedDict):
return dict(data)
if isinstance(data, BaseList):
return list(data)
raise TypeError
# always end the json dumps with a new line
# see https://github.com/mitsuhiko/flask/pull/1262
dumped = orjson.dumps(
data, default=default,
option=orjson.OPT_INDENT_2 | orjson.OPT_NON_STR_KEYS) + __new_line
dumped = nomad_utils.dumps(data) + b'\n'
resp = make_response(dumped, code)
resp.headers.extend(headers or {})
......
......@@ -1051,6 +1051,73 @@ class MSection(metaclass=MObjectMeta): # TODO find a way to make this a subclas
else:
raise TypeError('%s is not a category' % category)
def serialize_quantity(quantity, is_set):
quantity_type = quantity.type
serialize: TypingCallable[[Any], Any] = str
if isinstance(quantity_type, Reference):
def reference_serialize(value):
if isinstance(value, MProxy):
return value.m_proxy_url
else:
return quantity_type.serialize(self, quantity, value)
serialize = reference_serialize
elif isinstance(quantity_type, DataType):
def data_type_serialize(value):
return quantity_type.serialize(self, quantity, value)
serialize = data_type_serialize
elif quantity_type in _primitive_types:
serialize = _primitive_types[quantity_type]
elif type(quantity_type) == np.dtype:
def serialize_dtype(x):
return x.item()
serialize = serialize_dtype
elif isinstance(quantity_type, MEnum):
serialize = str
elif quantity_type == Any:
def _serialize(value: Any):
if type(value) not in [str, int, float, bool, list, dict, type(None)]:
raise MetainfoError(
'Only python primitives are allowed for Any typed non '
'virtual quantities: %s of quantity %s in section %s' %
(value, quantity, self))
return value
serialize = _serialize
else:
raise MetainfoError(
'Do not know how to serialize data with type %s for quantity %s' %
(quantity_type, quantity))
if is_set:
value = self.__dict__[quantity.name]
else:
value = quantity.default
if type(quantity_type) == np.dtype and len(quantity.shape) > 0:
serializable_value = value.tolist()
else:
if len(quantity.shape) == 0:
serializable_value = serialize(value)
elif len(quantity.shape) == 1:
serializable_value = [serialize(i) for i in value]
else:
raise NotImplementedError('Higher shapes (%s) not supported: %s' % (quantity.shape, quantity))
return serializable_value
def items() -> Iterable[Tuple[str, Any]]:
# metadata
if with_meta:
......@@ -1076,71 +1143,11 @@ class MSection(metaclass=MObjectMeta): # TODO find a way to make this a subclas
if not include_defaults or not quantity.m_is_set(Quantity.default):
continue
quantity_type = quantity.type
serialize: TypingCallable[[Any], Any] = str
if isinstance(quantity_type, Reference):
def reference_serialize(value):
if isinstance(value, MProxy):
return value.m_proxy_url
else:
return quantity_type.serialize(self, quantity, value)
serialize = reference_serialize
elif isinstance(quantity_type, DataType):
def data_type_serialize(value):
return quantity_type.serialize(self, quantity, value)
serialize = data_type_serialize
elif quantity_type in _primitive_types:
serialize = _primitive_types[quantity_type]
elif type(quantity_type) == np.dtype:
def serialize_dtype(x):
return x.item()
serialize = serialize_dtype
elif isinstance(quantity_type, MEnum):
serialize = str
elif quantity_type == Any:
def _serialize(value: Any):
if type(value) not in [str, int, float, bool, list, dict, type(None)]:
raise MetainfoError(
'Only python primitives are allowed for Any typed non '
'virtual quantities: %s of quantity %s in section %s' %
(value, quantity, self))
return value
serialize = _serialize
else:
raise MetainfoError(
'Do not know how to serialize data with type %s for quantity %s' %
(quantity_type, quantity))
if is_set:
value = self.__dict__[name]
else:
value = quantity.default
if type(quantity_type) == np.dtype and len(quantity.shape) > 0:
serializable_value = value.tolist()
else:
if len(quantity.shape) == 0:
serializable_value = serialize(value)
elif len(quantity.shape) == 1:
serializable_value = [serialize(i) for i in value]
else:
raise NotImplementedError('Higher shapes (%s) not supported: %s' % (quantity.shape, quantity))
try:
yield name, serialize_quantity(quantity, is_set)
yield name, serializable_value
except ValueError as e:
raise ValueError('Value error (%s) for %s' % (str(e), quantity))
# sub sections
for name, sub_section_def in self.m_def.all_sub_sections.items():
......
......@@ -524,6 +524,8 @@ class LegacyParser(MatchingParser):
# TODO we need a homogeneous interface to parsers, but we dont have it right now.
# There are some hacks to distinguish between ParserInterface parser and simple_parser
# using hasattr, kwargs, etc.
if logger is None:
logger = utils.get_logger('__name__')
if issubclass(self.parser_class, CoEParser):
# TODO reuse parser
......
......@@ -42,6 +42,7 @@ from structlog.stdlib import LoggerFactory
import logstash
from contextlib import contextmanager
import json
import orjson
import uuid
import time
import re
......@@ -49,6 +50,7 @@ from werkzeug.exceptions import HTTPException
import hashlib
import sys
from datetime import timedelta
import collections
from nomad import config
......@@ -56,6 +58,21 @@ default_hash_len = 28
''' Length of hashes and hash-based ids (e.g. calc, upload) in nomad. '''
def dumps(data):
def default(data):
if isinstance(data, collections.OrderedDict):
return dict(data)
if data.__class__.__name__ == 'BaseList':
return list(data)
raise TypeError
return orjson.dumps(
data, default=default,
option=orjson.OPT_INDENT_2 | orjson.OPT_NON_STR_KEYS)
def decode_handle_id(handle_str: str):
result = 0
for c in handle_str:
......
......@@ -17,7 +17,7 @@ import numpy as np
from ase import Atoms
import ase.build
from nomad import datamodel, config
from nomad import datamodel, config, utils
from nomad.parsing import Backend
from nomad.normalizing import normalizers
......@@ -241,6 +241,8 @@ def assert_normalized(backend: Backend):
assert metadata[key] != config.services.unavailable_value, '%s must not be unavailable' % key
utils.dumps(backend.entry_archive.m_to_dict())
def test_normalizer(normalized_example: Backend):
assert_normalized(normalized_example)
......
......@@ -402,9 +402,12 @@ def parser_in_dir(dir):
try:
backend = parser.run(file_path)
utils.dumps(backend.entry_archive.m_to_dict())
backend.resource.unload()
except Exception as e:
print(file_path, parser, 'FAILURE', e)
import traceback
traceback.print_exc()
else:
print(file_path, parser, 'SUCCESS')
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment