diff --git a/dependencies/parsers/aptfim b/dependencies/parsers/aptfim index d394f7ae375cd2355127abc67e07b1592f18d4f4..ed482b9956088163db9370dd5eee9b184c47870a 160000 --- a/dependencies/parsers/aptfim +++ b/dependencies/parsers/aptfim @@ -1 +1 @@ -Subproject commit d394f7ae375cd2355127abc67e07b1592f18d4f4 +Subproject commit ed482b9956088163db9370dd5eee9b184c47870a diff --git a/dependencies/parsers/eels b/dependencies/parsers/eels index 6f0e7ec897284382d8ef30b0ee0372f010ce468f..136ab4f5c17e340009a14df8dbb72dc101acb1b1 160000 --- a/dependencies/parsers/eels +++ b/dependencies/parsers/eels @@ -1 +1 @@ -Subproject commit 6f0e7ec897284382d8ef30b0ee0372f010ce468f +Subproject commit 136ab4f5c17e340009a14df8dbb72dc101acb1b1 diff --git a/dependencies/parsers/mpes b/dependencies/parsers/mpes index aa701e8f2780419e7911972c8e2a16d188008902..d4083cdadd4b34b9d99cbeed579c56c09aa7606b 160000 --- a/dependencies/parsers/mpes +++ b/dependencies/parsers/mpes @@ -1 +1 @@ -Subproject commit aa701e8f2780419e7911972c8e2a16d188008902 +Subproject commit d4083cdadd4b34b9d99cbeed579c56c09aa7606b diff --git a/dependencies/parsers/vasp b/dependencies/parsers/vasp index 2c8b7763eae8a27e7c1ca3a8ce86533978f42f73..8c2e56b0d9d17b777b5fa6a61c8e2877444ab603 160000 --- a/dependencies/parsers/vasp +++ b/dependencies/parsers/vasp @@ -1 +1 @@ -Subproject commit 2c8b7763eae8a27e7c1ca3a8ce86533978f42f73 +Subproject commit 8c2e56b0d9d17b777b5fa6a61c8e2877444ab603 diff --git a/nomad/cli/parse.py b/nomad/cli/parse.py index 32f8b5ab80f8e5e34ceec99577fb0ce3f2af496e..c998837e6e92d4072045aa647ff33215ca52121c 100644 --- a/nomad/cli/parse.py +++ b/nomad/cli/parse.py @@ -43,17 +43,6 @@ def parse( parser_backend = parser.run(mainfile_path, logger=logger) - from nomad.metainfo import MSection - from nomad.parsing.legacy import Backend - - if isinstance(parser_backend, MSection): - backend = Backend(parser._metainfo_env, parser.domain) - root_section = parser_backend.m_def.name - section_def = getattr(datamodel.EntryArchive, root_section) - backend.entry_archive.m_add_sub_section(section_def, parser_backend) - backend.resource.add(parser_backend) - parser_backend = backend - if not parser_backend.status[0] == 'ParseSuccess': logger.error('parsing was not successful', status=parser_backend.status) @@ -74,7 +63,7 @@ def normalize( if normalizer_instance.__class__.__name__ == normalizer) assert normalizer is not None, 'there is no normalizer %s' % str(normalizer) - normalizer_instance = typing.cast(typing.Callable, normalizer)(parser_backend) + normalizer_instance = typing.cast(typing.Callable, normalizer)(parser_backend.entry_archive) logger = logger.bind(normalizer=normalizer_instance.__class__.__name__) logger.info('identified normalizer') diff --git a/nomad/parsing/parser.py b/nomad/parsing/parser.py index b155f4e5bdab47dad3d1428f4d09d9075c3e4641..fb57f6c2e01da88456ff315954a6e22e244990c4 100644 --- a/nomad/parsing/parser.py +++ b/nomad/parsing/parser.py @@ -18,12 +18,18 @@ import re import importlib from nomad.metainfo import Environment +from nomad.datamodel import EntryArchive class Parser(metaclass=ABCMeta): ''' Instances specify a parser. It allows to find *main files* from given uploaded and extracted files. Further, allows to run the parser on those 'main files'. + + TODO: There are currently two "run" functions. :func:`run` and :func:`parse`. + Because we are in the middle of transitioning out of the backend dependence we currently + have both, where 'run' creates a backend and 'parse' simply gets an archive that the + parser is supposed to populate. Eventually, we will only have the 'parse' function. ''' name = "parsers/parser" @@ -64,6 +70,19 @@ class Parser(metaclass=ABCMeta): The used :class:`Backend` with status information and result data. ''' + def parse(self, mainfile: str, archive: EntryArchive, logger=None) -> None: + ''' + Runs the parser on the given mainfile and populates the result in the given + archive root_section. It allows to be run repeatedly for different mainfiles. + + Args: + mainfile: A path to a mainfile that this parser can parse. + archive: An instance of the section :class:`EntryArchive`. It might contain + a ``section_metadata`` with information about the entry. + logger: A optional logger + ''' + pass + class BrokenParser(Parser): ''' @@ -156,28 +175,22 @@ class MatchingParser(Parser): class FairdiParser(MatchingParser): - def __init__(self, parser_class_name: str, *args, **kwargs): - super().__init__(*args, **kwargs) - self.parser_class_name = parser_class_name - - module_name = self.parser_class_name.split('.')[:-1] - parser_class_name = self.parser_class_name.split('.')[-1] - self.__parser_impl = module_name, parser_class_name - self.__parser_class = None - - @property - def parser_class(self): - if self.__parser_class is None: - module_name, parser_class_name = self.__parser_impl - module = importlib.import_module('.'.join(module_name)) - self.__parser_class = getattr(module, parser_class_name) - - return self.__parser_class - def run(self, mainfile: str, logger=None): - parser = self.parser_class() # pylint: disable=not-callable - root_section = parser.parse(mainfile, logger) - return root_section + from .legacy import Backend + python_module = importlib.import_module(self.__module__ + '.metainfo') + metainfo = getattr(python_module, 'm_env') + backend = Backend(metainfo, domain=self.domain, logger=logger) + self.parse(mainfile, backend.entry_archive, logger=logger) + return backend + + def parse(self, mainfile: str, archive: EntryArchive, logger=None): + raise NotImplementedError() + + @classmethod + def main(cls, mainfile): + archive = EntryArchive() + cls().parse(mainfile, archive) # pylint: disable=no-value-for-parameter + return archive class MissingParser(MatchingParser): diff --git a/nomad/processing/data.py b/nomad/processing/data.py index 33492bbb35d2744daeec9aad168b89729ca4fe39..63fd024af153b38d7003b515c4ee9f4a5deea4ac 100644 --- a/nomad/processing/data.py +++ b/nomad/processing/data.py @@ -46,7 +46,6 @@ from nomad.archive import query_archive from nomad.datamodel.encyclopedia import ( EncyclopediaMetadata, ) -from nomad.metainfo import MSection import phonopyparser.metainfo @@ -386,14 +385,6 @@ class Calc(Proc): self._parser_backend = parser.run( self.upload_files.raw_file_object(self.mainfile).os_path, logger=logger) - if isinstance(self._parser_backend, MSection): - backend = Backend(parser._metainfo_env, parser.domain) - root_section = self._parser_backend.m_def.name - section_def = getattr(datamodel.EntryArchive, root_section) - backend.entry_archive.m_add_sub_section(section_def, self._parser_backend) - backend.resource.add(self._parser_backend) - self._parser_backend = backend - except Exception as e: self.fail('parser failed with exception', exc_info=e, error=str(e), **context) return diff --git a/tests/test_parsing.py b/tests/test_parsing.py index b5985b8f59a87f21bce331a38f683fd7abadebf3..a33e98927edc47d13d7b25810571b1b5e3ffc478 100644 --- a/tests/test_parsing.py +++ b/tests/test_parsing.py @@ -23,7 +23,6 @@ from nomad import utils, files, datamodel from nomad.parsing import BrokenParser, Backend from nomad.parsing.parsers import parser_dict, match_parser from nomad.app import dump_json -from nomad.metainfo import MSection parser_examples = [ ('parsers/random', 'test/data/parsers/random_0'), @@ -259,13 +258,7 @@ def assert_parser_dir_unchanged(previous_wd, current_wd): def run_parser(parser_name, mainfile): parser = parser_dict[parser_name] result = parser.run(mainfile, logger=utils.get_logger(__name__)) - if isinstance(result, MSection): - backend = Backend(parser._metainfo_env, parser.domain) - root_section = result.m_def.name - section_def = getattr(datamodel.EntryArchive, root_section) - backend.entry_archive.m_add_sub_section(section_def, result) - backend.resource.add(result) - result = backend + result.domain = parser.domain return add_calculation_info(result, parser_name=parser_name)