diff --git a/nomad/parsing/file_parser/file_parser.py b/nomad/parsing/file_parser/file_parser.py index 14bd2ad89bb3e91baa812f7caa305cc1cda81e2d..08fd87ee6abbe4401c62f72371c4fdb19ec5f309 100644 --- a/nomad/parsing/file_parser/file_parser.py +++ b/nomad/parsing/file_parser/file_parser.py @@ -22,6 +22,8 @@ import bz2 import lzma import tarfile +from nomad.metainfo import MSection + class FileParser: ''' @@ -32,6 +34,7 @@ class FileParser: Arguments: mainfile: the file to be parsed logger: optional logger + open: function to open file ''' def __init__(self, mainfile=None, logger=None, open=None): self._mainfile: Any = None @@ -152,8 +155,31 @@ class FileParser: def __getattr__(self, key): if self._results is None: self._results = dict() + self.parse() return self._results.get(key, None) + def to_dict(self): + ''' + Recursively converts the the parser results into a dictionary. + ''' + results = {} + for key, val in self.results.items(): + if isinstance(val, FileParser): + val = val.to_dict() + elif isinstance(val, list) and val and isinstance(val[0], FileParser): + for n, val_n in enumerate(val): + val[n] = val_n.to_dict() + + results[key] = val + return results + + def write_to_archive(self, section: MSection): + ''' + Wrapper for the m_from_dict functionality of msection to write the parser + results to an archive section. + ''' + return section.m_from_dict(self.to_dict()) + def parse(self, quantity_key: str = None, **kwargs): pass diff --git a/tests/parsing/test_file_parser.py b/tests/parsing/test_file_parser.py index d89572e5e945e8463319dc5c0e2f634b5e22bc0c..bf5569f49a1c32fbea6320a6941822b9d05009b6 100644 --- a/tests/parsing/test_file_parser.py +++ b/tests/parsing/test_file_parser.py @@ -2,11 +2,70 @@ import pytest import numpy as np import pint from nomad.units import ureg - from nomad.parsing.file_parser import TextParser, Quantity, ParsePattern,\ - XMLParser, BasicParser + XMLParser, BasicParser, FileParser from nomad.datamodel.metainfo.simulation.system import Atoms from nomad.datamodel import EntryArchive +from nomad.datamodel.metainfo.simulation.run import Run +from nomad.datamodel.metainfo.simulation.calculation import Calculation + + +class TestFileParser: + + @pytest.fixture(scope='class') + def calculation_parser(self): + class Parser(FileParser): + def parse(self, key): + self._results = {'time_calculation': 2.0} + + return Parser() + + @pytest.fixture(scope='class') + def run_parser(self, calculation_parser): + class Parser(FileParser): + def parse(self, key): + self._results = {'clean_end': True, 'calculation': [calculation_parser]} + + return Parser() + + @pytest.fixture(scope='function') + def text_parser(self): + return TextParser() + + @pytest.fixture(scope='class') + def parser(self, run_parser): + class Parser(FileParser): + def parse(self, key): + self._results = {'run': [run_parser]} + + return Parser() + + @pytest.mark.parametrize('mainfile', [ + 'tests/data/parsers/vasp/vasp.xml', + 'tests/data/parsers/vasp_compressed/vasp.xml.gz']) + def test_open(self, text_parser, mainfile): + text_parser.quantities = [ + Quantity('program', r'name="program" type="string">(.+?) *<') + ] + text_parser.mainfile = mainfile + assert text_parser.program == 'vasp' + + def test_get(self, text_parser): + text_parser.quantities = [ + Quantity('energy', r'free +energy +TOTEN += +(\S+) +eV', dtype=np.float64, repeats=True), + ] + text_parser.mainfile = 'tests/data/parsers/vasp_outcar/OUTCAR' + assert text_parser.energy[1] == 1.70437998 + assert text_parser.get('energy', unit='eV')[1].magnitude == 1.70437998 + + def test_write_to_archive(self, parser): + for create_section in [True, False]: + archive = EntryArchive() + if create_section: + archive.m_create(Run).m_create(Calculation) + archive = parser.write_to_archive(archive) + assert archive.run[0].clean_end + assert archive.run[0].calculation[0].time_calculation.magnitude == 2.0 class TestTextParser: