From bb53da6f40d1b5d32c209af552b80f55ad18b07e Mon Sep 17 00:00:00 2001
From: Alvin Noe Ladines <ladinesalvinnoe@gmail.com>
Date: Wed, 15 Nov 2023 22:31:39 +0100
Subject: [PATCH] Implement new parser interface to simulation parsers

---
 .gitmodules                              |  3 ++
 dependencies/parsers/simulation          |  2 +-
 nomad/config/__init__.py                 |  2 +-
 nomad/parsing/file_parser/__init__.py    |  2 +-
 nomad/parsing/file_parser/file_parser.py | 65 ++++++++++++++++++++++--
 5 files changed, 68 insertions(+), 6 deletions(-)

diff --git a/.gitmodules b/.gitmodules
index 732af5a777..0c41087904 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -69,3 +69,6 @@
 [submodule "dependencies/parsers/example"]
         path = dependencies/parsers/example
         url = https://github.com/nomad-coe/nomad-parser-example.git
+[submodule "dependencies/parsers/simulation"]
+	path = dependencies/parsers/simulation
+	url = https://github.com/nomad-coe/simulation-parsers.git
diff --git a/dependencies/parsers/simulation b/dependencies/parsers/simulation
index 6c351b50b2..4507e3a552 160000
--- a/dependencies/parsers/simulation
+++ b/dependencies/parsers/simulation
@@ -1 +1 @@
-Subproject commit 6c351b50b24b83ebdb873eba7491460be3abe747
+Subproject commit 4507e3a5528043a9bad076f04da2b6af88f13f39
diff --git a/nomad/config/__init__.py b/nomad/config/__init__.py
index 99a5666cfe..21be03f11c 100644
--- a/nomad/config/__init__.py
+++ b/nomad/config/__init__.py
@@ -353,7 +353,7 @@ plugins = Plugins(
             mainfile_contents_re=(r'Build.+\s+http://www\.yambo-code\.org'),
         ),
         'parsers/abacus': Parser(
-            python_package='electronicparsers.abacus',
+            python_package='simulationparsers.abacus',
             mainfile_contents_re=(r'\s*\n\s*WELCOME TO ABACUS'),
         ),
         'parsers/amber': Parser(
diff --git a/nomad/parsing/file_parser/__init__.py b/nomad/parsing/file_parser/__init__.py
index b29d0ae744..e6a541c281 100644
--- a/nomad/parsing/file_parser/__init__.py
+++ b/nomad/parsing/file_parser/__init__.py
@@ -1,4 +1,4 @@
-from .file_parser import FileParser
+from .file_parser import FileParser, Parser
 from .text_parser import TextParser, DataTextParser, Quantity, ParsePattern
 from .xml_parser import XMLParser
 from .tar_parser import TarParser
diff --git a/nomad/parsing/file_parser/file_parser.py b/nomad/parsing/file_parser/file_parser.py
index 07ef0cae0c..1a45ffd2ee 100644
--- a/nomad/parsing/file_parser/file_parser.py
+++ b/nomad/parsing/file_parser/file_parser.py
@@ -15,7 +15,7 @@
 from abc import ABC, abstractmethod
 import os
 import pint
-from typing import Any, Dict, Callable, IO, Union
+from typing import Any, Dict, Callable, IO, Union, List
 import gzip
 import bz2
 import lzma
@@ -23,6 +23,7 @@ import tarfile
 
 from nomad.metainfo import MSection
 from nomad.utils import get_logger
+from nomad.datamodel import EntryArchive
 
 
 class FileParser(ABC):
@@ -52,7 +53,7 @@ class FileParser(ABC):
         self.logger = logger if logger is not None else get_logger(__name__)
         # a key is necessary for xml parsers, where parsing is done dynamically
         self._key: str = None
-        self._kwargs: Dict[str, Any] = dict()
+        self._kwargs: Dict[str, Any] = {}
         self._results: Dict[str, Any] = None
         self._file_handler: Any = None
 
@@ -156,7 +157,7 @@ class FileParser(ABC):
 
         self._key = key
         self._kwargs = kwargs
-        val = self.results.get(key, None)
+        val = self.results.get(key)
         if val is None:
             val = default
 
@@ -214,6 +215,18 @@ class FileParser(ABC):
     def parse(self, quantity_key: str = None, **kwargs):
         pass
 
+    def __getitem__(self, key):
+        if isinstance(key, str):
+            return self.get(key)
+        elif isinstance(key, int):
+            return self[int]
+
+    def __getattr__(self, key):
+        if self._results is None:
+            self._results = {}
+            self.parse(key)
+        return self._results.get(key)
+
     def __repr__(self) -> str:
         results = list(self._results.keys()) if self._results else []
         string = f'{self.__class__.__name__}'
@@ -222,3 +235,49 @@ class FileParser(ABC):
         if results:
             string += f'--> {len(results)} parsed quantities ({", ".join(results[:5])}{", ..." if len(results) > 5 else ""})'
         return string
+
+
+class Parser(ABC):
+    mainfile: str = None
+    archive: EntryArchive = None
+    logger = None
+    child_archives = None
+
+    def get_mainfile_keys(
+        self, filename: str, decoded_buffer: str
+    ) -> Union[bool, List[str]]:
+        """
+        If child archives are necessary for the entry, a list of keys for the archives are
+        returned.
+        """
+        return True
+
+    def to_dict(self) -> Dict[str, Any]:
+        """
+        Converts the parsed metadata into a dictionary following the nomad archive schema.
+        """
+        return {}
+
+    def write_to_archive(self) -> None:
+        """
+        Abstract method to write the parsed metadata from mainfile to archive. The parser
+        may directly write to the archive or convert to a dictionary following the archive
+        schema through the to_dict method which is then used to update the archive.
+        """
+        if self.archive is None:
+            return
+
+        self.archive.m_update_from_dict(self.to_dict())
+
+    def parse(
+        self, mainfile: str, archive: EntryArchive, logger=None, child_archives=None
+    ) -> None:
+        """
+        Main interface to the nomad parsing infrastructure.
+        """
+        self.mainfile = mainfile
+        self.archive = archive
+        self.logger = logger if logger else get_logger(__name__)
+        self.child_archives = child_archives
+
+        self.write_to_archive()
-- 
GitLab