Commit 3f6bfc40 authored by Markus Scheidgen's avatar Markus Scheidgen
Browse files

Parser optimizations.

parent e2bd0f79
Pipeline #71094 passed with stages
in 20 minutes and 37 seconds
Subproject commit 24a12012effc70d296ec45dcc0bd5fbeb0a0b2ff
Subproject commit a58cfed379d240e5241bb69689441d42a5fb5e0f
Subproject commit ad90ca934bb42fad1c95cfe8c1c6aac8317514c8
Subproject commit 89e18d81a5a6fa658c1c0008f0078cb58dd2cbd2
Subproject commit ab61bf9c4300d5c3bc91a444d646e486f68cfa40
Subproject commit 2e2a4cc93fe2f2f91b8ce44f1da983f315cf1453
Subproject commit a3f52f6249c59899f7373ac1e9f88d4b7531490c
Subproject commit 56354bd376ac8d97a0476be1c6dc396cd1e2c3c2
Subproject commit cf7a94db5e606d1da5c65d16dac05b405bcf12ae
Subproject commit b2eaed00f3b74363c875826cb39ee4d24d27cd8d
Subproject commit 18e24ef0a192c13f83098330a2a7575fe1608c57
Subproject commit cfc88f526cee932fa25fdc1fc6236383f843d025
Subproject commit 7c7a5c991123a3687c4f3bb659286aea54c3a45d
Subproject commit bc0e599c58252f791c3cb809b3c8cfbf652d3b73
Subproject commit edc78862afa35250d43cf7c4748f961f8fe57543
Subproject commit 660338bb96b347b9118ea9942d8f76d6cfb1aa0e
Subproject commit 2ef508e9fe2523af787581c4309a616b0da75365
Subproject commit 16f7a7f6909dbe16908d1be2e1fa03d3bddd17b5
Subproject commit fd3199aed68b5c3f09a515e687d075755f415f06
Subproject commit 75a663a7e1ba8ff13c49bcdc62bca8bdb2f2d108
......@@ -173,7 +173,7 @@ parsers = [
),
LegacyParser(
name='parsers/vasp', code_name='VASP',
parser_class_name='vaspparser.VASPRunParserInterface',
parser_class_name='vaspparser.VASPRunParser',
mainfile_mime_re=r'(application/.*)|(text/.*)',
mainfile_contents_re=(
r'^\s*<\?xml version="1\.0" encoding="ISO-8859-1"\?>\s*'
......
......@@ -18,7 +18,7 @@ new nomad@fairdi infrastructure. This covers aspects like the new metainfo, a un
wrapper for parsers, parser logging, and a parser backend.
'''
from typing import Dict, List, Union, Any, Tuple, Type
from typing import Dict, List, Union, Any, Tuple, Type, cast
from abc import ABCMeta, abstractmethod
import importlib
import os.path
......@@ -28,8 +28,6 @@ import logging
import glob
import sys
from nomadcore.local_meta_info import InfoKindEnv
from nomad import utils, datamodel, config
from nomad.metainfo import (
SubSection, Quantity, Section, Reference, MResource, MSection, MSectionBound, Property)
......@@ -227,21 +225,21 @@ class Backend(AbstractParserBackend):
warnings and errors.
'''
def __init__(self, metainfo: Union[str, InfoKindEnv], domain: str = None, logger=None):
def __init__(self, metainfo: Union[str, LegacyMetainfoEnvironment], domain: str = None, logger=None):
assert metainfo is not None
if logger is None:
logger = utils.get_logger(__name__)
self.logger = logger
self.domain = domain if domain is not None else 'dft' # TODO
if isinstance(metainfo, InfoKindEnv):
print('#################') # TODO remove
metainfo = metainfo.name
if isinstance(metainfo, str):
python_package_name, _ = python_package_mapping(metainfo)
python_package_name = '.'.join(python_package_name.split('.')[:-1])
python_module = importlib.import_module(python_package_name)
metainfo = getattr(python_module, 'm_env')
python_package_name, _ = python_package_mapping(metainfo)
python_package_name = '.'.join(python_package_name.split('.')[:-1])
python_module = importlib.import_module(python_package_name)
self.env: LegacyMetainfoEnvironment = getattr(python_module, 'm_env')
self.env: LegacyMetainfoEnvironment = cast(LegacyMetainfoEnvironment, metainfo)
self.__legacy_env = None
self.resource = MResource()
......@@ -540,27 +538,33 @@ class LegacyParser(MatchingParser):
self.parser_class_name = parser_class_name
self.backend_factory = backend_factory
module_name = self.parser_class_name.split('.')[:-1]
parser_class = self.parser_class_name.split('.')[-1]
module = importlib.import_module('.'.join(module_name))
self.parser_class = getattr(module, parser_class)
def run(self, mainfile: str, logger=None) -> Backend:
# TODO we need a homogeneous interface to parsers, but we dont have it right now.
# There are some hacks to distinguish between ParserInterface parser and simple_parser
# using hasattr, kwargs, etc.
if issubclass(self.parser_class, CoEParser):
# TODO reuse parser
parser = self.parser_class()
return parser.run(mainfile, logger)
def create_backend(meta_info):
if self.backend_factory is not None:
return self.backend_factory(meta_info, logger=logger)
return Backend(meta_info, logger=logger, domain=self.domain)
module_name = self.parser_class_name.split('.')[:-1]
parser_class = self.parser_class_name.split('.')[-1]
module = importlib.import_module('.'.join(module_name))
Parser = getattr(module, parser_class)
init_signature = inspect.getargspec(Parser.__init__)
init_signature = inspect.getargspec(self.parser_class.__init__)
kwargs = dict(backend=create_backend, log_level=logging.DEBUG, debug=True)
kwargs = {key: value for key, value in kwargs.items() if key in init_signature.args}
with utils.legacy_logger(logger):
self.parser = Parser(**kwargs)
self.parser = self.parser_class(**kwargs)
with patch.object(sys, 'argv', []):
backend = self.parser.parse(mainfile)
......@@ -572,6 +576,86 @@ class LegacyParser(MatchingParser):
return backend
class CoEParser(metaclass=ABCMeta):
@abstractmethod
def run(self, mainfile, logger) -> Backend:
pass
class CoEInterfaceParser(CoEParser):
def __init__(self, interface_class):
super().__init__()
self.interface_class = interface_class
self.__interface = None
def run(self, mainfile, logger):
if self.__interface is None:
self.__interface = self.interface_class()
self.__interface.setup_logger(logger)
self.__interface.parse(mainfile)
return self.__interface.parser_context.super_backend
class CoESimpleMatcherParser(CoEParser):
def __init__(self):
super().__init__()
self.parser_description = self.create_parser_description()
self.simple_matcher = self.create_simple_matcher()
self._metainfo_env = self.metainfo_env()
self.__legacy_metainfo_env = None
self.caching_levels = self.create_caching_levels()
@abstractmethod
def metainfo_env(self) -> LegacyMetainfoEnvironment:
pass
@property
def metaInfoEnv(self) -> LegacyMetainfoEnvironment:
if self.__legacy_metainfo_env is None:
self.__legacy_metainfo_env = self._metainfo_env.legacy_info_env()
return self.__legacy_metainfo_env
def create_caching_levels(self) -> dict:
return dict()
@abstractmethod
def create_simple_matcher(self):
pass
@abstractmethod
def create_parser_description(self) -> dict:
raise NotImplementedError()
@abstractmethod
def create_super_context(self):
pass
def simple_parser(self, mainfile, logger) -> Backend:
from nomadcore.simple_parser import mainFunction
backend = Backend(self._metainfo_env, logger=logger)
from unittest.mock import patch
with patch.object(sys, 'argv', ['<exe>', '--uri', 'nmd://uri', mainfile]):
mainFunction(
mainFileDescription=self.simple_matcher,
metaInfoEnv=self.metaInfoEnv,
parserInfo=self.parser_description,
cachingLevelForMetaName=self.caching_levels,
superContext=self.create_super_context(),
superBackend=backend)
return backend
def run(self, mainfile, logger) -> Backend:
with utils.legacy_logger(logger):
return self.simple_parser(mainfile, logger)
class VaspOutcarParser(LegacyParser):
'''
LegacyParser that only matches mailfiles, if there is no .xml in the
......
......@@ -362,7 +362,6 @@ def test_vasp_incar_system():
# backend_value = backend.get_value('x_vasp_atom_kind_refs') # OK
backend_value = backend.get_value('x_vasp_incar_SYSTEM') # OK
print("backend_value: ", backend_value)
assert expected_value == backend_value
......
......@@ -391,3 +391,32 @@ def test_match(raw_files, with_latin_1_file, no_warn):
assert len(matched_mainfiles) == correct_num_output_files, ', '.join([
'%s: %s' % (parser.name, mainfile)
for mainfile, parser in matched_mainfiles.items()])
def parser_in_dir(dir):
for root, _, files in os.walk(dir):
for file_name in files:
file_path = os.path.join(root, file_name)
if 'test' not in file_path:
continue
parser = match_parser(file_path)
if parser is not None:
try:
parser.run(file_path)
except Exception as e:
print(file_path, parser, 'FAILURE', e)
else:
print(file_path, parser, 'SUCCESS')
if __name__ == '__main__':
import sys
import os
assert len(sys.argv) == 2 and os.path.isdir(sys.argv[1]), \
'One argument with an directory path is required.'
parser_in_dir(sys.argv[1])
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment