diff --git a/dependencies/parsers/crystal b/dependencies/parsers/crystal index 24a12012effc70d296ec45dcc0bd5fbeb0a0b2ff..a58cfed379d240e5241bb69689441d42a5fb5e0f 160000 --- a/dependencies/parsers/crystal +++ b/dependencies/parsers/crystal @@ -1 +1 @@ -Subproject commit 24a12012effc70d296ec45dcc0bd5fbeb0a0b2ff +Subproject commit a58cfed379d240e5241bb69689441d42a5fb5e0f diff --git a/dependencies/parsers/fhi-aims b/dependencies/parsers/fhi-aims index ad90ca934bb42fad1c95cfe8c1c6aac8317514c8..89e18d81a5a6fa658c1c0008f0078cb58dd2cbd2 160000 --- a/dependencies/parsers/fhi-aims +++ b/dependencies/parsers/fhi-aims @@ -1 +1 @@ -Subproject commit ad90ca934bb42fad1c95cfe8c1c6aac8317514c8 +Subproject commit 89e18d81a5a6fa658c1c0008f0078cb58dd2cbd2 diff --git a/dependencies/parsers/gaussian b/dependencies/parsers/gaussian index ab61bf9c4300d5c3bc91a444d646e486f68cfa40..2e2a4cc93fe2f2f91b8ce44f1da983f315cf1453 160000 --- a/dependencies/parsers/gaussian +++ b/dependencies/parsers/gaussian @@ -1 +1 @@ -Subproject commit ab61bf9c4300d5c3bc91a444d646e486f68cfa40 +Subproject commit 2e2a4cc93fe2f2f91b8ce44f1da983f315cf1453 diff --git a/dependencies/parsers/gpaw b/dependencies/parsers/gpaw index a3f52f6249c59899f7373ac1e9f88d4b7531490c..56354bd376ac8d97a0476be1c6dc396cd1e2c3c2 160000 --- a/dependencies/parsers/gpaw +++ b/dependencies/parsers/gpaw @@ -1 +1 @@ -Subproject commit a3f52f6249c59899f7373ac1e9f88d4b7531490c +Subproject commit 56354bd376ac8d97a0476be1c6dc396cd1e2c3c2 diff --git a/dependencies/parsers/octopus b/dependencies/parsers/octopus index cf7a94db5e606d1da5c65d16dac05b405bcf12ae..b2eaed00f3b74363c875826cb39ee4d24d27cd8d 160000 --- a/dependencies/parsers/octopus +++ b/dependencies/parsers/octopus @@ -1 +1 @@ -Subproject commit cf7a94db5e606d1da5c65d16dac05b405bcf12ae +Subproject commit b2eaed00f3b74363c875826cb39ee4d24d27cd8d diff --git a/dependencies/parsers/orca b/dependencies/parsers/orca index 18e24ef0a192c13f83098330a2a7575fe1608c57..cfc88f526cee932fa25fdc1fc6236383f843d025 160000 --- a/dependencies/parsers/orca +++ b/dependencies/parsers/orca @@ -1 +1 @@ -Subproject commit 18e24ef0a192c13f83098330a2a7575fe1608c57 +Subproject commit cfc88f526cee932fa25fdc1fc6236383f843d025 diff --git a/dependencies/parsers/quantum-espresso b/dependencies/parsers/quantum-espresso index 7c7a5c991123a3687c4f3bb659286aea54c3a45d..bc0e599c58252f791c3cb809b3c8cfbf652d3b73 160000 --- a/dependencies/parsers/quantum-espresso +++ b/dependencies/parsers/quantum-espresso @@ -1 +1 @@ -Subproject commit 7c7a5c991123a3687c4f3bb659286aea54c3a45d +Subproject commit bc0e599c58252f791c3cb809b3c8cfbf652d3b73 diff --git a/dependencies/parsers/vasp b/dependencies/parsers/vasp index edc78862afa35250d43cf7c4748f961f8fe57543..660338bb96b347b9118ea9942d8f76d6cfb1aa0e 160000 --- a/dependencies/parsers/vasp +++ b/dependencies/parsers/vasp @@ -1 +1 @@ -Subproject commit edc78862afa35250d43cf7c4748f961f8fe57543 +Subproject commit 660338bb96b347b9118ea9942d8f76d6cfb1aa0e diff --git a/dependencies/parsers/wien2k b/dependencies/parsers/wien2k index 2ef508e9fe2523af787581c4309a616b0da75365..16f7a7f6909dbe16908d1be2e1fa03d3bddd17b5 160000 --- a/dependencies/parsers/wien2k +++ b/dependencies/parsers/wien2k @@ -1 +1 @@ -Subproject commit 2ef508e9fe2523af787581c4309a616b0da75365 +Subproject commit 16f7a7f6909dbe16908d1be2e1fa03d3bddd17b5 diff --git a/dependencies/python_common b/dependencies/python_common index fd3199aed68b5c3f09a515e687d075755f415f06..75a663a7e1ba8ff13c49bcdc62bca8bdb2f2d108 160000 --- a/dependencies/python_common +++ b/dependencies/python_common @@ -1 +1 @@ -Subproject commit fd3199aed68b5c3f09a515e687d075755f415f06 +Subproject commit 75a663a7e1ba8ff13c49bcdc62bca8bdb2f2d108 diff --git a/nomad/parsing/__init__.py b/nomad/parsing/__init__.py index f92d76016c39f34de5f7d6a2c989ca46a702185b..1da3a95fcd15cf2a6c2b3a4a10fc7c22e52d9b29 100644 --- a/nomad/parsing/__init__.py +++ b/nomad/parsing/__init__.py @@ -173,7 +173,7 @@ parsers = [ ), LegacyParser( name='parsers/vasp', code_name='VASP', - parser_class_name='vaspparser.VASPRunParserInterface', + parser_class_name='vaspparser.VASPRunParser', mainfile_mime_re=r'(application/.*)|(text/.*)', mainfile_contents_re=( r'^\s*<\?xml version="1\.0" encoding="ISO-8859-1"\?>\s*' diff --git a/nomad/parsing/legacy.py b/nomad/parsing/legacy.py index 50efa6c905f2e6364fcd5a8df88354ed82862913..06b2b16e776a843ec6f91620bb52d7bd07b28681 100644 --- a/nomad/parsing/legacy.py +++ b/nomad/parsing/legacy.py @@ -18,7 +18,7 @@ new nomad@fairdi infrastructure. This covers aspects like the new metainfo, a un wrapper for parsers, parser logging, and a parser backend. ''' -from typing import Dict, List, Union, Any, Tuple, Type +from typing import Dict, List, Union, Any, Tuple, Type, cast from abc import ABCMeta, abstractmethod import importlib import os.path @@ -28,8 +28,6 @@ import logging import glob import sys -from nomadcore.local_meta_info import InfoKindEnv - from nomad import utils, datamodel, config from nomad.metainfo import ( SubSection, Quantity, Section, Reference, MResource, MSection, MSectionBound, Property) @@ -227,21 +225,21 @@ class Backend(AbstractParserBackend): warnings and errors. ''' - def __init__(self, metainfo: Union[str, InfoKindEnv], domain: str = None, logger=None): + def __init__(self, metainfo: Union[str, LegacyMetainfoEnvironment], domain: str = None, logger=None): + assert metainfo is not None if logger is None: logger = utils.get_logger(__name__) self.logger = logger self.domain = domain if domain is not None else 'dft' # TODO - if isinstance(metainfo, InfoKindEnv): - print('#################') # TODO remove - metainfo = metainfo.name + if isinstance(metainfo, str): + python_package_name, _ = python_package_mapping(metainfo) + python_package_name = '.'.join(python_package_name.split('.')[:-1]) + python_module = importlib.import_module(python_package_name) + metainfo = getattr(python_module, 'm_env') - python_package_name, _ = python_package_mapping(metainfo) - python_package_name = '.'.join(python_package_name.split('.')[:-1]) - python_module = importlib.import_module(python_package_name) - self.env: LegacyMetainfoEnvironment = getattr(python_module, 'm_env') + self.env: LegacyMetainfoEnvironment = cast(LegacyMetainfoEnvironment, metainfo) self.__legacy_env = None self.resource = MResource() @@ -540,27 +538,33 @@ class LegacyParser(MatchingParser): self.parser_class_name = parser_class_name self.backend_factory = backend_factory + module_name = self.parser_class_name.split('.')[:-1] + parser_class = self.parser_class_name.split('.')[-1] + module = importlib.import_module('.'.join(module_name)) + self.parser_class = getattr(module, parser_class) + def run(self, mainfile: str, logger=None) -> Backend: # TODO we need a homogeneous interface to parsers, but we dont have it right now. # There are some hacks to distinguish between ParserInterface parser and simple_parser # using hasattr, kwargs, etc. + + if issubclass(self.parser_class, CoEParser): + # TODO reuse parser + parser = self.parser_class() + return parser.run(mainfile, logger) + def create_backend(meta_info): if self.backend_factory is not None: return self.backend_factory(meta_info, logger=logger) return Backend(meta_info, logger=logger, domain=self.domain) - module_name = self.parser_class_name.split('.')[:-1] - parser_class = self.parser_class_name.split('.')[-1] - module = importlib.import_module('.'.join(module_name)) - Parser = getattr(module, parser_class) - - init_signature = inspect.getargspec(Parser.__init__) + init_signature = inspect.getargspec(self.parser_class.__init__) kwargs = dict(backend=create_backend, log_level=logging.DEBUG, debug=True) kwargs = {key: value for key, value in kwargs.items() if key in init_signature.args} with utils.legacy_logger(logger): - self.parser = Parser(**kwargs) + self.parser = self.parser_class(**kwargs) with patch.object(sys, 'argv', []): backend = self.parser.parse(mainfile) @@ -572,6 +576,86 @@ class LegacyParser(MatchingParser): return backend +class CoEParser(metaclass=ABCMeta): + + @abstractmethod + def run(self, mainfile, logger) -> Backend: + pass + + +class CoEInterfaceParser(CoEParser): + + def __init__(self, interface_class): + super().__init__() + self.interface_class = interface_class + self.__interface = None + + def run(self, mainfile, logger): + if self.__interface is None: + self.__interface = self.interface_class() + + self.__interface.setup_logger(logger) + self.__interface.parse(mainfile) + return self.__interface.parser_context.super_backend + + +class CoESimpleMatcherParser(CoEParser): + + def __init__(self): + super().__init__() + self.parser_description = self.create_parser_description() + self.simple_matcher = self.create_simple_matcher() + + self._metainfo_env = self.metainfo_env() + self.__legacy_metainfo_env = None + + self.caching_levels = self.create_caching_levels() + + @abstractmethod + def metainfo_env(self) -> LegacyMetainfoEnvironment: + pass + + @property + def metaInfoEnv(self) -> LegacyMetainfoEnvironment: + if self.__legacy_metainfo_env is None: + self.__legacy_metainfo_env = self._metainfo_env.legacy_info_env() + return self.__legacy_metainfo_env + + def create_caching_levels(self) -> dict: + return dict() + + @abstractmethod + def create_simple_matcher(self): + pass + + @abstractmethod + def create_parser_description(self) -> dict: + raise NotImplementedError() + + @abstractmethod + def create_super_context(self): + pass + + def simple_parser(self, mainfile, logger) -> Backend: + from nomadcore.simple_parser import mainFunction + backend = Backend(self._metainfo_env, logger=logger) + from unittest.mock import patch + with patch.object(sys, 'argv', ['<exe>', '--uri', 'nmd://uri', mainfile]): + mainFunction( + mainFileDescription=self.simple_matcher, + metaInfoEnv=self.metaInfoEnv, + parserInfo=self.parser_description, + cachingLevelForMetaName=self.caching_levels, + superContext=self.create_super_context(), + superBackend=backend) + + return backend + + def run(self, mainfile, logger) -> Backend: + with utils.legacy_logger(logger): + return self.simple_parser(mainfile, logger) + + class VaspOutcarParser(LegacyParser): ''' LegacyParser that only matches mailfiles, if there is no .xml in the diff --git a/tests/test_normalizing.py b/tests/test_normalizing.py index 0c14eb61e1f371883150a3fe43deb6a0d7c263c3..cf3d52fae737032120cadc4ea73244d3ac6f3b93 100644 --- a/tests/test_normalizing.py +++ b/tests/test_normalizing.py @@ -362,7 +362,6 @@ def test_vasp_incar_system(): # backend_value = backend.get_value('x_vasp_atom_kind_refs') # OK backend_value = backend.get_value('x_vasp_incar_SYSTEM') # OK - print("backend_value: ", backend_value) assert expected_value == backend_value diff --git a/tests/test_parsing.py b/tests/test_parsing.py index adce1e2da6ca8702f66bdd33df4caf9bcf283e25..96ad805a80f9ae7ebc0a570111d68f1b57c87aea 100644 --- a/tests/test_parsing.py +++ b/tests/test_parsing.py @@ -391,3 +391,32 @@ def test_match(raw_files, with_latin_1_file, no_warn): assert len(matched_mainfiles) == correct_num_output_files, ', '.join([ '%s: %s' % (parser.name, mainfile) for mainfile, parser in matched_mainfiles.items()]) + + +def parser_in_dir(dir): + for root, _, files in os.walk(dir): + for file_name in files: + file_path = os.path.join(root, file_name) + + if 'test' not in file_path: + continue + + parser = match_parser(file_path) + if parser is not None: + + try: + parser.run(file_path) + except Exception as e: + print(file_path, parser, 'FAILURE', e) + else: + print(file_path, parser, 'SUCCESS') + + +if __name__ == '__main__': + import sys + import os + + assert len(sys.argv) == 2 and os.path.isdir(sys.argv[1]), \ + 'One argument with an directory path is required.' + + parser_in_dir(sys.argv[1])