diff --git a/nomad/client/processing.py b/nomad/client/processing.py index 169395753b50f9a06606ad702bd28ae2005f2150..e75d163790d8eb83022e948002a836100cbed5ca 100644 --- a/nomad/client/processing.py +++ b/nomad/client/processing.py @@ -55,27 +55,7 @@ def parse( if hasattr(parser, 'backend_factory'): setattr(parser, 'backend_factory', backend_factory) - entry_archives = [] - for mainfile_key in mainfile_keys: - entry_archive = datamodel.EntryArchive() - metadata = entry_archive.m_create(datamodel.EntryMetadata) - metadata.mainfile = mainfile_path - metadata.mainfile_key = mainfile_key - cwd = os.getcwd() - try: - mainfile_path = os.path.abspath(mainfile_path) - os.chdir(os.path.abspath(os.path.dirname(mainfile_path))) - parser.parse(mainfile_path, entry_archive, logger=logger) - except Exception as e: - logger.error('parsing was not successful', exc_info=e) - raise e - finally: - os.chdir(cwd) - - if metadata.domain is None: - metadata.domain = parser.domain - - entry_archives.append(entry_archive) + entry_archives = parsers.run_parser(mainfile_path, parser, mainfile_keys, logger) logger.info('ran parser') return entry_archives diff --git a/nomad/parsing/artificial.py b/nomad/parsing/artificial.py index fe104ae70b431ddcb7c9024d4c2c137a0180c753..638a07ec149d920533c0e3b3d5569d69f4cb192c 100644 --- a/nomad/parsing/artificial.py +++ b/nomad/parsing/artificial.py @@ -44,7 +44,7 @@ class EmptyParser(MatchingParser): ''' name = "parsers/empty" - def parse(self, mainfile: str, archive: EntryArchive, logger=None) -> None: + def parse(self, mainfile: str, archive: EntryArchive, logger=None, child_archives=None) -> None: run = archive.m_create(Run) run.program = Program(name=self.code_name) @@ -65,7 +65,7 @@ class TemplateParser(Parser): compression: str = None) -> bool: return filename.endswith('template.json') - def parse(self, mainfile: str, archive: EntryArchive, logger=None) -> None: + def parse(self, mainfile: str, archive: EntryArchive, logger=None, child_archives=None) -> None: # tell tests about received logger if logger is not None: logger.debug('received logger') @@ -99,7 +99,7 @@ class ChaosParser(Parser): compression: str = None) -> bool: return filename.endswith('chaos.json') - def parse(self, mainfile: str, archive: EntryArchive, logger=None) -> None: + def parse(self, mainfile: str, archive: EntryArchive, logger=None, child_archives=None) -> None: chaos_json = json.load(open(mainfile, 'r')) if isinstance(chaos_json, str): chaos = chaos_json @@ -153,7 +153,7 @@ class GenerateRandomParser(TemplateParser): compression: str = None) -> bool: return os.path.basename(filename).startswith('random_') - def parse(self, mainfile: str, archive: EntryArchive, logger=None) -> None: + def parse(self, mainfile: str, archive: EntryArchive, logger=None, child_archives=None) -> None: file_dir = os.path.dirname(os.path.abspath(__file__)) relative_template_file = "random_template.json" template_file = os.path.normpath(os.path.join(file_dir, relative_template_file)) diff --git a/nomad/parsing/file_parser/basic_parser.py b/nomad/parsing/file_parser/basic_parser.py index 072a3a3a45369f1d4b89f7c8c61590680938da57..436cbfff8926b2511f5a887ad8a3386bab241036 100644 --- a/nomad/parsing/file_parser/basic_parser.py +++ b/nomad/parsing/file_parser/basic_parser.py @@ -82,7 +82,7 @@ class BasicParser(FairdiParser): parser.logger = self.logger self.auxilliary_parsers.append(parser) - def parse(self, mainfile: str, archive: EntryArchive, logger=None) -> None: + def parse(self, mainfile: str, archive: EntryArchive, logger=None, child_archives=None) -> None: ''' Triggers parsing of mainfile and writing parsed quantities to archive. ''' diff --git a/nomad/parsing/parser.py b/nomad/parsing/parser.py index dc742fbc8ab8bb4a5d949885f6245dfa3a2f79dc..2ee6cf7f576e9494f53ea2b415ae054aebbc9231 100644 --- a/nomad/parsing/parser.py +++ b/nomad/parsing/parser.py @@ -16,7 +16,7 @@ # limitations under the License. # -from typing import List, Set, Union +from typing import List, Set, Dict, Union from abc import ABCMeta, abstractmethod import re import os @@ -34,6 +34,7 @@ class Parser(metaclass=ABCMeta): ''' name = "parsers/parser" level = 0 + creates_children = False ''' Level 0 parsers are run first, then level 1, and so on. Normally the value should be 0, use higher values only when a parser depends on other parsers. @@ -50,10 +51,11 @@ class Parser(metaclass=ABCMeta): Checks if a file is a mainfile for the parser. Should return True or a set of *keys* (non-empty strings) if it is a mainfile, otherwise a falsey value. - The option to return a set of keys should only be used by parsers that generate - multiple entries - namely a *main* entry and some number of *child* entries. - Most parsers, however, only generate a main entry, no child entries, and should thus - just return a boolean value. + The option to return a set of keys should only be used by parsers that have + `creates_children == True`. These create multiple entries for one mainfile, namely + a *main* entry and some number of *child* entries. Most parsers, however, have + `creates_children == False` and thus only generate a main entry, no child entries, + and these should thus just return a boolean value. If the return value is a set of keys, a main entry will be created when parsing, plus one child entry for each key in the returned set. The key value will be stored @@ -72,7 +74,12 @@ class Parser(metaclass=ABCMeta): pass @abstractmethod - def parse(self, mainfile: str, archive: EntryArchive, logger=None) -> None: + def parse( + self, + mainfile: str, + archive: EntryArchive, + logger=None, + child_archives: Dict[str, EntryArchive] = None) -> None: ''' Runs the parser on the given mainfile and populates the result in the given archive root_section. It allows to be run repeatedly for different mainfiles. @@ -82,6 +89,8 @@ class Parser(metaclass=ABCMeta): archive: An instance of the section :class:`EntryArchive`. It might contain a section ``metadata`` with information about the entry. logger: A optional logger + child_archives: a dictionary with {mainfile_key : EntryArchive} for each child, + for the parse function to populate with data. ''' pass @@ -98,10 +107,20 @@ class Parser(metaclass=ABCMeta): pass @classmethod - def main(cls, mainfile): + def main(cls, mainfile, mainfile_keys: List[str] = None): archive = EntryArchive() archive.m_create(EntryMetadata) - cls().parse(mainfile, archive) # pylint: disable=no-value-for-parameter + if mainfile_keys: + child_archives = {} + for mainfile_key in mainfile_keys: + child_archive = EntryArchive() + child_archive.m_create(EntryMetadata) + child_archives[mainfile_key] = child_archive + kwargs = dict(child_archives=child_archives) + else: + kwargs = {} + + cls().parse(mainfile, archive, **kwargs) # pylint: disable=no-value-for-parameter return archive @@ -131,7 +150,7 @@ class BrokenParser(Parser): return False - def parse(self, mainfile: str, archive, logger=None): + def parse(self, mainfile: str, archive, logger=None, child_archives=None): raise Exception('Failed on purpose.') @@ -227,7 +246,7 @@ class MatchingParser(Parser): return True - def parse(self, mainfile: str, archive: EntryArchive, logger=None) -> None: + def parse(self, mainfile: str, archive: EntryArchive, logger=None, child_archives=None) -> None: raise NotImplementedError() def __repr__(self): @@ -247,7 +266,7 @@ class ArchiveParser(MatchingParser): mainfile_mime_re='.*', mainfile_name_re=r'.*(archive|metainfo)\.(json|yaml|yml)$') - def parse(self, mainfile: str, archive: EntryArchive, logger=None): + def parse(self, mainfile: str, archive: EntryArchive, logger=None, child_archives=None): if mainfile.endswith('.json'): import json with open(mainfile, 'rt') as f: @@ -276,5 +295,5 @@ class MissingParser(MatchingParser): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) - def parse(self, mainfile: str, archive: EntryArchive, logger=None): + def parse(self, mainfile: str, archive: EntryArchive, logger=None, child_archives=None): raise Exception('The code %s is not yet supported.' % self.code_name) diff --git a/nomad/parsing/parsers.py b/nomad/parsing/parsers.py index 7f4941d2668abc492820a6897fd155a750e1283b..138a0f65ea1b43d9a922830909cc98c12b687440 100644 --- a/nomad/parsing/parsers.py +++ b/nomad/parsing/parsers.py @@ -20,7 +20,7 @@ import os.path from typing import Tuple, List, Dict from nomad import config -from nomad.datamodel import results +from nomad.datamodel import EntryArchive, EntryMetadata, results from .parser import MissingParser, BrokenParser, Parser, ArchiveParser from .artificial import EmptyParser, GenerateRandomParser, TemplateParser, ChaosParser @@ -118,10 +118,8 @@ def match_parser(mainfile_path: str, strict=True, parser_name: str = None) -> Tu Returns: A tuple (`parser`, `mainfile_keys`). The `parser` is the matched parser, and - `mainfile_keys` defines the keys to use for the resulting entries. The `mainfile_keys` - list will always contain at least one value, namely None (for the main entry). - This is always the first key in the list (and usually also the only key in the list, - since most parsers don't produce any child entries). If no parser matches, we return + `mainfile_keys` defines the keys to use for child entries, if any. If there are + no child entries, `mainfile_keys` will be None. If no parser matches, we return (None, None). ''' mainfile = os.path.basename(mainfile_path) @@ -164,12 +162,14 @@ def match_parser(mainfile_path: str, strict=True, parser_name: str = None) -> Tu match_result = parser.is_mainfile(mainfile_path, mime_type, buffer, decoded_buffer, compression) if match_result: - mainfile_keys: List[str] = [None] if type(match_result) == set: + assert parser.creates_children, 'Illegal return value - parser does not specify `creates_children`' for mainfile_key in match_result: # type: ignore assert mainfile_key and type(mainfile_key) == str, ( - f'Keys must be strings, got {type(mainfile_key)}') - mainfile_keys += sorted(match_result) # type: ignore + f'Child keys must be strings, got {type(mainfile_key)}') + mainfile_keys = sorted(match_result) # type: ignore + else: + mainfile_keys = None # potentially convert the file if encoding in ['iso-8859-1']: @@ -188,6 +188,49 @@ def match_parser(mainfile_path: str, strict=True, parser_name: str = None) -> Tu return None, None +def run_parser( + mainfile_path: str, parser: Parser, mainfile_keys: List[str] = None, logger=None) -> List[EntryArchive]: + ''' + Parses a file, given the path, the parser, and mainfile_keys, as returned by + :func:`match_parser`, and returns the resulting EntryArchive objects. Parsers that have + `create_children == False` (the most common case) will generate a list with a single entry, + for parsers that create children the list will consist of the main entry followed by the + child entries. The returned archive objects will have minimal metadata. + ''' + entry_archive = EntryArchive() + metadata = entry_archive.m_create(EntryMetadata) + metadata.mainfile = mainfile_path + entry_archives = [entry_archive] + if mainfile_keys: + child_archives = {} + for mainfile_key in mainfile_keys: + child_archive = EntryArchive() + child_metadata = child_archive.m_create(EntryMetadata) + child_metadata.mainfile = mainfile_path + child_metadata.mainfile_key = mainfile_key + child_archives[mainfile_key] = child_archive + entry_archives.append(child_archive) + kwargs = dict(child_archives=child_archives) + else: + kwargs = {} + + cwd = os.getcwd() + try: + mainfile_path = os.path.abspath(mainfile_path) + os.chdir(os.path.abspath(os.path.dirname(mainfile_path))) + parser.parse(mainfile_path, entry_archive, logger=logger, **kwargs) + except Exception as e: + if logger: + logger.error('parsing was not successful', exc_info=e) + raise e + finally: + os.chdir(cwd) + for entry_archive in entry_archives: + if entry_archive.metadata.domain is None: + entry_archive.metadata.domain = parser.domain + return entry_archives + + parsers = [ GenerateRandomParser(), TemplateParser(), diff --git a/nomad/processing/data.py b/nomad/processing/data.py index 4cfeceba85e56ead2395e53f5004afb10f9eecc9..e7b1730cdc1606786e221698f9ab31bcb419de89 100644 --- a/nomad/processing/data.py +++ b/nomad/processing/data.py @@ -731,6 +731,7 @@ class Entry(Proc): self._upload: Upload = None self._upload_files: StagingUploadFiles = None self._proc_logs: List[Any] = None + self._child_entries: List['Entry'] = [] self._entry_metadata: EntryMetadata = None self._perform_index = True @@ -851,7 +852,7 @@ class Entry(Proc): self._apply_metadata_from_mongo(upload, entry_metadata) return entry_metadata except KeyError: - # Due hard processing failures, it might be possible that an entry might not + # Due to hard processing failures, it might be possible that an entry might not # have an archive. Return the metadata that is available. if self._entry_metadata is not None: return self._entry_metadata @@ -919,8 +920,16 @@ class Entry(Proc): def _process_entry_local(self): logger = self.get_logger() - if self.upload is None: - logger.error('upload does not exist') + assert self.upload is not None, 'upload does not exist' + assert self.mainfile_key is None, 'cannot process a child entry, only the parent entry' + + # Get child entries, if any + self._child_entries = list(Entry.objects( + upload_id=self.upload_id, mainfile=self.mainfile, mainfile_key__ne=None)) + for child_entry in self._child_entries: + child_entry._upload = self.upload # Optimization + child_entry.process_status = ProcessStatus.RUNNING + child_entry.set_last_status_message('Parent entry processing') # Load the reprocess settings from the upload, and apply defaults settings = config.reprocess.customize(self.upload.reprocess_settings) @@ -947,23 +956,20 @@ class Entry(Proc): self.warnings = ['no matching parser found during processing'] parser = parser_dict[self.parser_name] - if parser is not None: - should_parse = True - parser_changed = self.parser_name != parser.name and parser_dict[self.parser_name].name != parser.name - if parser_changed: - if not settings.use_original_parser: - logger.info( - 'different parser matches during process, use new parser', - parser=parser.name) - self.parser_name = parser.name # Parser renamed - else: - should_parse = False - logger.error('could not determine a parser for this entry') - self.errors = ['could not determine a parser for this entry'] + assert parser is not None, 'could not determine a parser for this entry' + should_parse = True + parser_changed = self.parser_name != parser.name and parser_dict[self.parser_name].name != parser.name + if parser_changed: + if not settings.use_original_parser: + logger.info( + 'different parser matches during process, use new parser', + parser=parser.name) + self.parser_name = parser.name # Parser renamed if should_parse: self.set_last_status_message('Initializing metadata') - self._initialize_metadata_for_processing() + for entry in self._main_and_child_entries(): + entry._initialize_metadata_for_processing() if len(self._entry_metadata.files) >= config.auxfile_cutoff: self.warning( @@ -971,8 +977,10 @@ class Entry(Proc): 'Have you placed many mainfiles in the same directory?') self.parsing() - self.normalizing() - self.archiving() + for entry in self._main_and_child_entries(): + entry.normalizing() + entry.archiving() + elif self.upload.published: self.set_last_status_message('Preserving entry data') try: @@ -984,7 +992,32 @@ class Entry(Proc): logger.error('could not copy archive for non-reprocessed entry', exc_info=e) raise + def _main_and_child_entries(self) -> Iterable['Entry']: + yield self + for child_entry in self._child_entries: + yield child_entry + + def on_success(self): + # Mark any child entries as successfully completed (necessary because the child entries + # are not processed the normal way) + for child_entry in self._child_entries: + child_entry.errors = [] + child_entry.process_status = ProcessStatus.SUCCESS + child_entry.last_status_message = 'Process process_entry completed successfully' + child_entry.save() + def on_fail(self): + self._on_fail() + # Mark any child entries as failed (necessary because the child entries + # are not processed the normal way) + for child_entry in self._child_entries: + child_entry.errors = self.errors + child_entry.process_status = ProcessStatus.FAILURE + child_entry.last_status_message = f'Process process_entry failed: {self.errors[-1]}' + child_entry._on_fail() + child_entry.save() + + def _on_fail(self): # in case of failure, create a minimum set of metadata and mark # processing failure try: @@ -1035,7 +1068,6 @@ class Entry(Proc): context = dict(step=self.parser_name) logger = self.get_logger(**context) parser = parser_dict[self.parser_name] - self._entry_metadata.parser_name = self.parser_name with utils.timer(logger, 'parser executed', input_size=self.mainfile_file.size): if not config.process.reuse_parser: @@ -1047,7 +1079,14 @@ class Entry(Proc): 'could not re-create parser instance', exc_info=e, error=str(e), **context) try: - parser.parse(self.mainfile_file.os_path, self._parser_results, logger=logger) + if self._child_entries: + child_archives = {} + for child_entry in self._child_entries: + child_archives[child_entry.mainfile_key] = child_entry._parser_results + kwargs = dict(child_archives=child_archives) + else: + kwargs = {} + parser.parse(self.mainfile_file.os_path, self._parser_results, logger=logger, **kwargs) except Exception as e: raise ProcessFailure('parser failed with exception', exc_info=e, error=str(e), **context) @@ -1516,12 +1555,13 @@ class Upload(Proc): e.entry_id: e for e in Entry.objects(upload_id=self.upload_id, mainfile=target_path)} entry_ids_to_delete = set(old_entries_dict.keys()) - rv: Entry = None + main_entry: Entry = None if parser: metadata_handler = MetadataEditRequestHandler( self.get_logger(), self.main_author_user, staging_upload_files, self.upload_id) - for mainfile_key in mainfile_keys: + mainfile_keys_including_main_entry: List[str] = [None] + (mainfile_keys or []) # type: ignore + for mainfile_key in mainfile_keys_including_main_entry: entry_id = generate_entry_id(self.upload_id, target_path, mainfile_key) entry = old_entries_dict.get(entry_id) if entry: @@ -1543,12 +1583,13 @@ class Upload(Proc): for quantity_name, mongo_value in entry_metadata.items(): setattr(entry, quantity_name, mongo_value) entry.save() - # process locally - self.set_last_status_message('Processing') - entry.process_entry_local() if not mainfile_key: - rv = entry # This is the main entry + main_entry = entry # This is the main entry + + # process locally + self.set_last_status_message('Processing') + main_entry.process_entry_local() # Delete existing unmatched entries if entry_ids_to_delete: @@ -1556,7 +1597,7 @@ class Upload(Proc): for entry_id in entry_ids_to_delete: search.delete_entry(entry_id=entry_id, update_materials=True) old_entries_dict[entry_id].delete() - return rv + return main_entry @property def upload_files(self) -> UploadFiles: @@ -1679,7 +1720,8 @@ class Upload(Proc): parser, mainfile_keys = match_parser( staging_upload_files.raw_file_object(path_info.path).os_path) if parser is not None: - for mainfile_key in mainfile_keys: + mainfile_keys_including_main_entry: List[str] = [None] + (mainfile_keys or []) # type: ignore + for mainfile_key in mainfile_keys_including_main_entry: yield path_info.path, mainfile_key, parser except Exception as e: self.get_logger().error( @@ -1774,7 +1816,7 @@ class Upload(Proc): except Exception as e: # try to remove the staging copy in failure case - logger.error('failed to trigger processing of all entries', exc_info=e) + logger.error('failed to perform matching', exc_info=e) if self.published: self._cleanup_staging_files() raise @@ -1790,7 +1832,7 @@ class Upload(Proc): next_entries: List[Entry] = None with utils.timer(logger, 'entries processing called'): # Determine what the next level is and which entries belongs to this level - for entry in Entry.objects(upload_id=self.upload_id): + for entry in Entry.objects(upload_id=self.upload_id, mainfile_key=None): parser = parser_dict.get(entry.parser_name) if parser: level = parser.level diff --git a/tests/conftest.py b/tests/conftest.py index 8fc8f3ccbf30782cd4f3257d9066e38f592dcbb1..d4472373bd5530d79b27f8357f78be23f62315a4 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -584,13 +584,13 @@ def internal_example_user_metadata(example_user_metadata) -> dict: def parsed(example_mainfile: Tuple[str, str]) -> EntryArchive: ''' Provides a parsed entry in the form of an EntryArchive. ''' parser, mainfile = example_mainfile - return test_parsing.run_parser(parser, mainfile) + return test_parsing.run_singular_parser(parser, mainfile) @pytest.fixture(scope='session') def parsed_ems() -> EntryArchive: ''' Provides a parsed experiment in the form of a EntryArchive. ''' - return test_parsing.run_parser('parsers/eels', 'tests/data/parsers/eels.json') + return test_parsing.run_singular_parser('parsers/eels', 'tests/data/parsers/eels.json') @pytest.fixture(scope='session') diff --git a/tests/parsing/test_parsing.py b/tests/parsing/test_parsing.py index daa94d6ab0d51b758a3326213c7cfd13fedef919..fe8fe7d0c144786cb5312572ff7d071ecb6e07d5 100644 --- a/tests/parsing/test_parsing.py +++ b/tests/parsing/test_parsing.py @@ -21,10 +21,10 @@ import pytest import os from shutil import copyfile -from nomad import utils, files, datamodel -from nomad.datamodel import EntryArchive, EntryMetadata +from nomad import utils, files +from nomad.datamodel import EntryArchive from nomad.parsing import BrokenParser -from nomad.parsing.parsers import parser_dict, match_parser +from nomad.parsing.parsers import parser_dict, match_parser, run_parser from nomad.utils import dump_json parser_examples = [ @@ -122,38 +122,35 @@ def assert_parser_dir_unchanged(previous_wd, current_wd): assert previous_wd == current_wd -def run_parser(parser_name, mainfile): +def run_singular_parser(parser_name, mainfile): + ''' Runs a singular parser (a parser which creates no child entries) and adds metadata. ''' parser = parser_dict[parser_name] - entry_archive = EntryArchive() - metadata = entry_archive.m_create(EntryMetadata) - parser.parse(mainfile, entry_archive, logger=utils.get_logger(__name__)) - if metadata.domain is None: - metadata.domain = parser.domain - - return add_metadata(entry_archive, parser_name=parser_name) + assert not parser.creates_children + archives = run_parser(mainfile, parser, logger=utils.get_logger(__name__)) + return add_metadata(archives[0], parser_name=parser_name) @pytest.fixture def parsed_vasp_example() -> EntryArchive: - return run_parser( + return run_singular_parser( 'parsers/vasp', 'dependencies/parsers/vasp/test/examples/xml/perovskite.xml') @pytest.fixture def parsed_template_example() -> EntryArchive: - return run_parser( + return run_singular_parser( 'parsers/template', 'tests/data/templates/template.json') def parse_file(parser_name_and_mainfile) -> EntryArchive: parser_name, mainfile = parser_name_and_mainfile - return run_parser(parser_name, mainfile) + return run_singular_parser(parser_name, mainfile) @pytest.fixture(params=parser_examples, ids=lambda spec: '%s-%s' % spec) def parsed_example(request) -> EntryArchive: parser_name, mainfile = request.param - result = run_parser(parser_name, mainfile) + result = run_singular_parser(parser_name, mainfile) return result @@ -170,7 +167,7 @@ def add_metadata(entry_archive: EntryArchive, **kwargs) -> EntryArchive: @pytest.mark.parametrize('parser_name, mainfile', parser_examples) def test_parser(parser_name, mainfile, assert_parser_result): previous_wd = os.getcwd() # Get Working directory before parsing. - parsed_example = run_parser(parser_name, mainfile) + parsed_example = run_singular_parser(parser_name, mainfile) assert_parser_result(parsed_example) # Check that cwd has not changed. assert_parser_dir_unchanged(previous_wd, current_wd=os.getcwd()) @@ -179,7 +176,7 @@ def test_parser(parser_name, mainfile, assert_parser_result): def test_broken_xml_vasp(assert_parser_result): parser_name, mainfile = 'parsers/vasp', 'tests/data/parsers/vasp/broken.xml' previous_wd = os.getcwd() # Get Working directory before parsing. - parsed_example = run_parser(parser_name, mainfile) + parsed_example = run_singular_parser(parser_name, mainfile) assert_parser_result(parsed_example, has_warnings=True) # Check that cwd has not changed. assert_parser_dir_unchanged(previous_wd, current_wd=os.getcwd()) @@ -219,21 +216,18 @@ def parser_in_dir(dir): parser, mainfile_keys = match_parser(file_path) if parser is not None: - for mainfile_key in mainfile_keys: - try: - archive = datamodel.EntryArchive() - metadata = archive.m_create(datamodel.EntryMetadata) - metadata.mainfile = file_name - metadata.mainfile_key = mainfile_key - parser.parse(file_path, entry_archive=archive) - # check if the result can be dumped + try: + archives = run_parser(file_path, parser, mainfile_keys) + + # check if the result can be dumped + for archive in archives: dump_json(archive.m_to_dict()) - except Exception as e: - print(file_path, parser, 'FAILURE', e) - import traceback - traceback.print_exc() - else: - print(file_path, parser, 'SUCCESS') + except Exception as e: + print(file_path, parser, 'FAILURE', e) + import traceback + traceback.print_exc() + else: + print(file_path, parser, 'SUCCESS') if __name__ == '__main__': diff --git a/tests/processing/test_data.py b/tests/processing/test_data.py index cfc7322ef333d711e2b2c14fec570fe86ca8c00f..7172269a02cb90abc7686851df182e569fa4b558 100644 --- a/tests/processing/test_data.py +++ b/tests/processing/test_data.py @@ -17,7 +17,7 @@ # from nomad.datamodel.datamodel import EntryArchive -from typing import Generator, Tuple +from typing import Generator, Tuple, Dict import pytest import os.path import re @@ -657,6 +657,7 @@ def test_parent_child_parser(proc_infra, test_user, tmp): # Create a dummy parser which creates child entries class ParentChildParser(Parser): name = 'parsers/parentchild' + creates_children = True def is_mainfile( self, filename: str, mime: str, buffer: bytes, decoded_buffer: str, @@ -665,8 +666,12 @@ def test_parent_child_parser(proc_infra, test_user, tmp): return set([line.strip() for line in decoded_buffer.split('\n')[1:] if line.strip()]) return False - def parse(self, mainfile: str, archive: EntryArchive, logger=None): - archive.metadata.comment = archive.metadata.mainfile_key or 'parent' + def parse( + self, mainfile: str, archive: EntryArchive, logger=None, + child_archives: Dict[str, EntryArchive] = None): + archive.metadata.comment = 'parent' + for mainfile_key, child_archive in child_archives.items(): + child_archive.metadata.comment = mainfile_key # Register it test_parser = ParentChildParser() @@ -688,7 +693,7 @@ def test_parent_child_parser(proc_infra, test_user, tmp): assert set([e.mainfile_key for e in upload.successful_entries]) == set([None, *children]) for entry in upload.successful_entries: metadata = entry.full_entry_metadata(upload) - assert metadata.comment == entry.mainfile_key or 'parent' + assert metadata.comment == (entry.mainfile_key or 'parent') upload.process_upload(file_operation=dict(op='DELETE', path=example_filename)) upload.block_until_complete()