From c0fef89da9538c9a2d9eb10201b59798bdf9cffa Mon Sep 17 00:00:00 2001 From: Alvin Noe Ladines <ladinesalvinnoe@gmail.com> Date: Thu, 20 Apr 2023 21:19:38 +0000 Subject: [PATCH] Resolve "Extend text parser" Implement functionality to write file parser results to archive. Changelog: Added --- nomad/parsing/file_parser/file_parser.py | 26 ++++++++++ tests/parsing/test_file_parser.py | 63 +++++++++++++++++++++++- 2 files changed, 87 insertions(+), 2 deletions(-) diff --git a/nomad/parsing/file_parser/file_parser.py b/nomad/parsing/file_parser/file_parser.py index 14bd2ad89b..08fd87ee6a 100644 --- a/nomad/parsing/file_parser/file_parser.py +++ b/nomad/parsing/file_parser/file_parser.py @@ -22,6 +22,8 @@ import bz2 import lzma import tarfile +from nomad.metainfo import MSection + class FileParser: ''' @@ -32,6 +34,7 @@ class FileParser: Arguments: mainfile: the file to be parsed logger: optional logger + open: function to open file ''' def __init__(self, mainfile=None, logger=None, open=None): self._mainfile: Any = None @@ -152,8 +155,31 @@ class FileParser: def __getattr__(self, key): if self._results is None: self._results = dict() + self.parse() return self._results.get(key, None) + def to_dict(self): + ''' + Recursively converts the the parser results into a dictionary. + ''' + results = {} + for key, val in self.results.items(): + if isinstance(val, FileParser): + val = val.to_dict() + elif isinstance(val, list) and val and isinstance(val[0], FileParser): + for n, val_n in enumerate(val): + val[n] = val_n.to_dict() + + results[key] = val + return results + + def write_to_archive(self, section: MSection): + ''' + Wrapper for the m_from_dict functionality of msection to write the parser + results to an archive section. + ''' + return section.m_from_dict(self.to_dict()) + def parse(self, quantity_key: str = None, **kwargs): pass diff --git a/tests/parsing/test_file_parser.py b/tests/parsing/test_file_parser.py index d89572e5e9..bf5569f49a 100644 --- a/tests/parsing/test_file_parser.py +++ b/tests/parsing/test_file_parser.py @@ -2,11 +2,70 @@ import pytest import numpy as np import pint from nomad.units import ureg - from nomad.parsing.file_parser import TextParser, Quantity, ParsePattern,\ - XMLParser, BasicParser + XMLParser, BasicParser, FileParser from nomad.datamodel.metainfo.simulation.system import Atoms from nomad.datamodel import EntryArchive +from nomad.datamodel.metainfo.simulation.run import Run +from nomad.datamodel.metainfo.simulation.calculation import Calculation + + +class TestFileParser: + + @pytest.fixture(scope='class') + def calculation_parser(self): + class Parser(FileParser): + def parse(self, key): + self._results = {'time_calculation': 2.0} + + return Parser() + + @pytest.fixture(scope='class') + def run_parser(self, calculation_parser): + class Parser(FileParser): + def parse(self, key): + self._results = {'clean_end': True, 'calculation': [calculation_parser]} + + return Parser() + + @pytest.fixture(scope='function') + def text_parser(self): + return TextParser() + + @pytest.fixture(scope='class') + def parser(self, run_parser): + class Parser(FileParser): + def parse(self, key): + self._results = {'run': [run_parser]} + + return Parser() + + @pytest.mark.parametrize('mainfile', [ + 'tests/data/parsers/vasp/vasp.xml', + 'tests/data/parsers/vasp_compressed/vasp.xml.gz']) + def test_open(self, text_parser, mainfile): + text_parser.quantities = [ + Quantity('program', r'name="program" type="string">(.+?) *<') + ] + text_parser.mainfile = mainfile + assert text_parser.program == 'vasp' + + def test_get(self, text_parser): + text_parser.quantities = [ + Quantity('energy', r'free +energy +TOTEN += +(\S+) +eV', dtype=np.float64, repeats=True), + ] + text_parser.mainfile = 'tests/data/parsers/vasp_outcar/OUTCAR' + assert text_parser.energy[1] == 1.70437998 + assert text_parser.get('energy', unit='eV')[1].magnitude == 1.70437998 + + def test_write_to_archive(self, parser): + for create_section in [True, False]: + archive = EntryArchive() + if create_section: + archive.m_create(Run).m_create(Calculation) + archive = parser.write_to_archive(archive) + assert archive.run[0].clean_end + assert archive.run[0].calculation[0].time_calculation.magnitude == 2.0 class TestTextParser: -- GitLab