diff --git a/.gitignore b/.gitignore index fd472b47576a780668340f52e0e8958e14c7bad6..e8f39e893340f7ac4800a89d4d1b073e4fce037e 100644 --- a/.gitignore +++ b/.gitignore @@ -6,4 +6,5 @@ __pycache__ data .dependencies/ .volumes/ -.pytest_cache/ \ No newline at end of file +.pytest_cache/ +.coverage diff --git a/nomad/parsing.py b/nomad/parsing.py new file mode 100644 index 0000000000000000000000000000000000000000..40bce0100d1245a731f9857b01b8670336f7d1a7 --- /dev/null +++ b/nomad/parsing.py @@ -0,0 +1,68 @@ +import json + + +class JSONStreamGenerator(): + """ + A generator that allows to output JSON based on calling 'event' functions. + Its pure python and could be replaced by some faster implementation, e.g. yajl-py. + It does not do anychecks. Expect random exceptions when events are out of order or + imcomplete. + """ + def __init__(self, fp, pretty=False): + self._fp = fp + self._pretty = pretty + + self._indent = '' + self._separators = [''] + + def _write(self, str): + self._fp.write(str) + + def _write_seperator(self): + self._write(self._separators.pop()) + + def _seperator_with_newline(self, base=None): + pretty_ext = ('\n%s' % self._indent) if self._pretty else '' + if base is None: + return pretty_ext + else: + return '%s%s' % (base, pretty_ext) + + def _open(self, open_char): + self._write_seperator() + self._write(open_char) + self._indent = '%s ' % self._indent + self._separators.append(self._seperator_with_newline()) + + def _close(self, close_char): + self._separators.pop() + self._indent = self._indent[:-2] + self._write(self._seperator_with_newline()) + self._write(close_char) + self._separators.append(self._seperator_with_newline(',')) + + def open_object(self): + self._open('{') + + def close_object(self): + self._close('}') + + def open_array(self): + self._open('[') + + def close_array(self): + self._close(']') + + def key_value(self, key, value): + self.key(key) + self.value(value) + + def key(self, key): + self._write_seperator() + json.dump(key, self._fp) + self._separators.append(': ' if self._pretty else ':') + + def value(self, value): + self._write_seperator() + json.dump(value, self._fp) + self._separators.append(self._seperator_with_newline(',')) diff --git a/tests/test_parsing.py b/tests/test_parsing.py new file mode 100644 index 0000000000000000000000000000000000000000..f8c1187328ebe7eb2ad8aba9558e98c723c8ae07 --- /dev/null +++ b/tests/test_parsing.py @@ -0,0 +1,47 @@ +from nomad.parsing import JSONStreamGenerator +from io import StringIO +import json +import pytest + + +def create_reference(data, pretty): + if (pretty): + return json.dumps(data, indent=2) + else: + return json.dumps(data, separators=(',', ':')) + + +@pytest.mark.parametrize("pretty", [False, True]) +def test_stream_generator(pretty): + example_data = [ + { + 'key1': 'value', + 'key2': 1 + }, + { + 'key': { + 'key': 'value' + } + } + ] + + out = StringIO() + generator = JSONStreamGenerator(out, pretty=pretty) + generator.open_array() + generator.open_object() + generator.key('key1') + generator.value('value') + generator.key('key2') + generator.value(1) + generator.close_object() + generator.open_object() + generator.key('key') + generator.open_object() + generator.key('key') + generator.value('value') + generator.close_object() + generator.close_object() + generator.close_array() + + assert create_reference(example_data, pretty) == out.getvalue() +