diff --git a/.gitmodules b/.gitmodules index 732af5a777009594257e6ff4ac25c7e8da7b146c..0c41087904fe20841acbf1daec9c5d1a72db81d7 100644 --- a/.gitmodules +++ b/.gitmodules @@ -69,3 +69,6 @@ [submodule "dependencies/parsers/example"] path = dependencies/parsers/example url = https://github.com/nomad-coe/nomad-parser-example.git +[submodule "dependencies/parsers/simulation"] + path = dependencies/parsers/simulation + url = https://github.com/nomad-coe/simulation-parsers.git diff --git a/dependencies/parsers/simulation b/dependencies/parsers/simulation index 6c351b50b24b83ebdb873eba7491460be3abe747..4507e3a5528043a9bad076f04da2b6af88f13f39 160000 --- a/dependencies/parsers/simulation +++ b/dependencies/parsers/simulation @@ -1 +1 @@ -Subproject commit 6c351b50b24b83ebdb873eba7491460be3abe747 +Subproject commit 4507e3a5528043a9bad076f04da2b6af88f13f39 diff --git a/nomad/config/__init__.py b/nomad/config/__init__.py index 99a5666cfe3b603b703e5dc35ce6eb6c456c440a..21be03f11c06a333bf2895c705e6e094f510294b 100644 --- a/nomad/config/__init__.py +++ b/nomad/config/__init__.py @@ -353,7 +353,7 @@ plugins = Plugins( mainfile_contents_re=(r'Build.+\s+http://www\.yambo-code\.org'), ), 'parsers/abacus': Parser( - python_package='electronicparsers.abacus', + python_package='simulationparsers.abacus', mainfile_contents_re=(r'\s*\n\s*WELCOME TO ABACUS'), ), 'parsers/amber': Parser( diff --git a/nomad/parsing/file_parser/__init__.py b/nomad/parsing/file_parser/__init__.py index b29d0ae74464c0942c8619ee2a68f96fbb07db4a..e6a541c281a03db0a00a926a97293e57e8e9693c 100644 --- a/nomad/parsing/file_parser/__init__.py +++ b/nomad/parsing/file_parser/__init__.py @@ -1,4 +1,4 @@ -from .file_parser import FileParser +from .file_parser import FileParser, Parser from .text_parser import TextParser, DataTextParser, Quantity, ParsePattern from .xml_parser import XMLParser from .tar_parser import TarParser diff --git a/nomad/parsing/file_parser/file_parser.py b/nomad/parsing/file_parser/file_parser.py index 07ef0cae0ccdf5854f6e27bec65dc149d414e03c..1a45ffd2ee7e07da26ada7db48503b352efe0aa9 100644 --- a/nomad/parsing/file_parser/file_parser.py +++ b/nomad/parsing/file_parser/file_parser.py @@ -15,7 +15,7 @@ from abc import ABC, abstractmethod import os import pint -from typing import Any, Dict, Callable, IO, Union +from typing import Any, Dict, Callable, IO, Union, List import gzip import bz2 import lzma @@ -23,6 +23,7 @@ import tarfile from nomad.metainfo import MSection from nomad.utils import get_logger +from nomad.datamodel import EntryArchive class FileParser(ABC): @@ -52,7 +53,7 @@ class FileParser(ABC): self.logger = logger if logger is not None else get_logger(__name__) # a key is necessary for xml parsers, where parsing is done dynamically self._key: str = None - self._kwargs: Dict[str, Any] = dict() + self._kwargs: Dict[str, Any] = {} self._results: Dict[str, Any] = None self._file_handler: Any = None @@ -156,7 +157,7 @@ class FileParser(ABC): self._key = key self._kwargs = kwargs - val = self.results.get(key, None) + val = self.results.get(key) if val is None: val = default @@ -214,6 +215,18 @@ class FileParser(ABC): def parse(self, quantity_key: str = None, **kwargs): pass + def __getitem__(self, key): + if isinstance(key, str): + return self.get(key) + elif isinstance(key, int): + return self[int] + + def __getattr__(self, key): + if self._results is None: + self._results = {} + self.parse(key) + return self._results.get(key) + def __repr__(self) -> str: results = list(self._results.keys()) if self._results else [] string = f'{self.__class__.__name__}' @@ -222,3 +235,49 @@ class FileParser(ABC): if results: string += f'--> {len(results)} parsed quantities ({", ".join(results[:5])}{", ..." if len(results) > 5 else ""})' return string + + +class Parser(ABC): + mainfile: str = None + archive: EntryArchive = None + logger = None + child_archives = None + + def get_mainfile_keys( + self, filename: str, decoded_buffer: str + ) -> Union[bool, List[str]]: + """ + If child archives are necessary for the entry, a list of keys for the archives are + returned. + """ + return True + + def to_dict(self) -> Dict[str, Any]: + """ + Converts the parsed metadata into a dictionary following the nomad archive schema. + """ + return {} + + def write_to_archive(self) -> None: + """ + Abstract method to write the parsed metadata from mainfile to archive. The parser + may directly write to the archive or convert to a dictionary following the archive + schema through the to_dict method which is then used to update the archive. + """ + if self.archive is None: + return + + self.archive.m_update_from_dict(self.to_dict()) + + def parse( + self, mainfile: str, archive: EntryArchive, logger=None, child_archives=None + ) -> None: + """ + Main interface to the nomad parsing infrastructure. + """ + self.mainfile = mainfile + self.archive = archive + self.logger = logger if logger else get_logger(__name__) + self.child_archives = child_archives + + self.write_to_archive()