diff --git a/nomad/config.py b/nomad/config.py index 43bd47d4ec096d06da3d21b42824e18f1880f1b9..8a4537107ef247dd2534950f21addacd924babdc 100644 --- a/nomad/config.py +++ b/nomad/config.py @@ -211,6 +211,7 @@ datacite = NomadConfig( version = '0.7.6' commit = gitinfo.commit release = 'devel' +default_domain = 'dft' service = 'unknown nomad service' auxfile_cutoff = 100 parser_matching_size = 9128 diff --git a/nomad/datamodel/base.py b/nomad/datamodel/base.py index c6a00d80dd49292c1d219a4d69ff5942a11b53bc..75918c6c0c4166904ac17381e955b0bfe4cbd8a8 100644 --- a/nomad/datamodel/base.py +++ b/nomad/datamodel/base.py @@ -388,7 +388,9 @@ class Domain: quantity = domain_quantities.get(quantity_name, None) if quantity is None: - domain_quantities[quantity_name] = DomainQuantity() + quantity = DomainQuantity() + quantity.domain = name + domain_quantities[quantity_name] = quantity # add all domain quantities for quantity_name, quantity in domain_quantities.items(): diff --git a/nomad/normalizing/normalizer.py b/nomad/normalizing/normalizer.py index bbd76b3fd57082ecba4d54e816f900c7e6ba5b05..2ca1efbf906e3479aa0aa5080537f5006b494e78 100644 --- a/nomad/normalizing/normalizer.py +++ b/nomad/normalizing/normalizer.py @@ -18,12 +18,6 @@ from typing import List from nomad.parsing import AbstractParserBackend from nomad.utils import get_logger -s_system = 'section_system' -s_scc = 'section_single_configuration_calculation' -s_frame_sequence = 'section_frame_sequence' -r_scc_to_system = 'single_configuration_calculation_to_system_ref' -r_frame_sequence_local_frames = 'frame_sequence_local_frames_ref' - class Normalizer(metaclass=ABCMeta): """ @@ -34,7 +28,7 @@ class Normalizer(metaclass=ABCMeta): backend: The backend used to read and write data from and to. """ - domain = 'DFT' + domain = 'dft' """ The domain this normalizer should be used in. Default for all normalizer is 'DFT'. """ def __init__(self, backend: AbstractParserBackend) -> None: @@ -93,50 +87,40 @@ class SystemBasedNormalizer(Normalizer, metaclass=ABCMeta): Attempt to find a single section_system that is representative for the entry. The selection depends on the type of calculation. """ - system_idx = None - # Try to find a frame sequence, only first found is considered try: - frame_seqs = self._backend[s_frame_sequence] - frame_seq = frame_seqs[0] - sampling_method_idx = frame_seq["frame_sequence_to_sampling_ref"] - sec_sampling_method = self._backend["section_sampling_method"][sampling_method_idx] - sampling_method = sec_sampling_method["sampling_method"] - frames = frame_seq["frame_sequence_local_frames_ref"] - if sampling_method == "molecular_dynamics": + frame_seq = self._backend['section_frame_sequence'][0] + sampling_method_idx = frame_seq['frame_sequence_to_sampling_ref'] + sec_sampling_method = self._backend['section_sampling_method'][sampling_method_idx] + sampling_method = sec_sampling_method['sampling_method'] + frames = frame_seq['frame_sequence_local_frames_ref'] + if sampling_method == 'molecular_dynamics': scc_idx = frames[0] else: scc_idx = frames[-1] - scc = self._backend[s_scc][scc_idx] - system_idx = scc["single_configuration_calculation_to_system_ref"] + scc = self._backend['section_single_configuration_calculation'][scc_idx] + return scc['single_configuration_calculation_to_system_ref'] except Exception: - frame_seqs = [] + pass # If no frame sequences detected, try to find scc - if len(frame_seqs) == 0: - try: - sccs = self._backend[s_scc] - scc = sccs[-1] - system_idx = scc["single_configuration_calculation_to_system_ref"] - except Exception: - sccs = [] - - # If no sccs exist, try to find systems - if len(sccs) == 0: - try: - systems = self._backend.get_sections(s_system) - system_idx = systems[-1] - except Exception: - sccs = [] - - if system_idx is None: - self.logger.error('no "representative" section system found') - else: - self.logger.info( - 'chose "representative" system for normalization', - ) + try: + sccs = self._backend['section_single_configuration_calculation'] + scc = sccs[-1] + return scc['single_configuration_calculation_to_system_ref'] + except Exception: + pass + + # If no sccs exist, try to find systems + try: + systems = self._backend.get_sections('section_system') + return systems[-1] + except Exception: + pass + + self.logger.error('no "representative" section system found') - return system_idx + return None def __normalize_system(self, g_index, representative, logger=None) -> bool: try: @@ -157,11 +141,12 @@ class SystemBasedNormalizer(Normalizer, metaclass=ABCMeta): def normalize(self, logger=None) -> None: super().normalize(logger) - all_systems = self._backend.get_sections(s_system) + all_systems = self._backend.get_sections('section_system') # Process representative system first representative_system_idx = self.__representative_system() if representative_system_idx is not None: + self.logger.info('chose "representative" section system') self.__normalize_system(representative_system_idx, True, logger) # All the rest if requested diff --git a/nomad/search.py b/nomad/search.py index 9be8df3abb8d98fa396524044318a8232bee78be..5714f84801a47bae8dccbb8e7538c958850fce60 100644 --- a/nomad/search.py +++ b/nomad/search.py @@ -286,7 +286,7 @@ class SearchRequest: There is also scrolling for quantities to go through all quantity values. There is no paging for aggregations. ''' - def __init__(self, domain: str, query=None): + def __init__(self, domain: str = config.default_domain, query=None): self._domain = domain self._query = query self._search = Search(index=config.elastic.index_name) diff --git a/tests/processing/test_data.py b/tests/processing/test_data.py index 3b44218f19f2594c3457f5ed4530c5005ad77bc7..4ace9feecd4340bcd253ad1b0205870bd2e9ea6f 100644 --- a/tests/processing/test_data.py +++ b/tests/processing/test_data.py @@ -20,7 +20,7 @@ import json import re import shutil -from nomad import utils, infrastructure, config, datamodel +from nomad import utils, infrastructure, config from nomad.files import UploadFiles, StagingUploadFiles, PublicUploadFiles from nomad.processing import Upload, Calc from nomad.processing.base import task as task_decorator, FAILURE, SUCCESS @@ -106,6 +106,10 @@ def assert_processing(upload: Upload, published: bool = False): with upload_files.raw_file(path) as f: f.read() + # check some domain metadata + assert calc.metadata['n_atoms'] > 0 + assert len(calc.metadata['atoms']) > 0 + assert upload.get_calc(calc.calc_id).metadata is not None @@ -378,14 +382,14 @@ def test_malicious_parser_task_failure(proc_infra, failure, test_user): assert len(calc.errors) == 1 -def test_ems_data(proc_infra, test_user, monkeypatch): - monkeypatch.setattr('nomad.config.domain', 'ems') - monkeypatch.setattr('nomad.datamodel.Domain.instance', datamodel.Domain.instances['ems']) - monkeypatch.setattr('nomad.datamodel.CalcWithMetadata', datamodel.Domain.instance.domain_entry_class) +def test_ems_data(proc_infra, test_user): + upload = run_processing(('test_ems_upload', 'tests/data/proc/example_ems.zip'), test_user) - additional_keys = ['method', 'experiment_location', 'experiment_time', 'formula', 'chemical'] + additional_keys = [ + 'ems.method', 'ems.experiment_location', 'ems.experiment_time', 'ems.formula', + 'ems.chemical'] assert upload.total_calcs == 1 assert len(upload.calcs) == 1 diff --git a/tests/test_search.py b/tests/test_search.py index 1f7691dcc18492751de36a186760fcebb067baa8..9b1f2d0e29c9209c72bf1c6391282911b0c25d03 100644 --- a/tests/test_search.py +++ b/tests/test_search.py @@ -151,18 +151,18 @@ def test_search_totals(elastic, example_search_data): def test_search_exclude(elastic, example_search_data): for item in SearchRequest().execute_paginated()['results']: - assert 'atoms' in item + assert 'dft.atoms' in item - for item in SearchRequest().exclude('atoms').execute_paginated()['results']: - assert 'atoms' not in item + for item in SearchRequest().exclude('dft.atoms').execute_paginated()['results']: + assert 'dft.atoms' not in item def test_search_include(elastic, example_search_data): for item in SearchRequest().execute_paginated()['results']: - assert 'atoms' in item + assert 'dft.atoms' in item for item in SearchRequest().include('calc_id').execute_paginated()['results']: - assert 'atoms' not in item + assert 'dft.atoms' not in item assert 'calc_id' in item @@ -236,7 +236,6 @@ def assert_search_upload(upload: datamodel.UploadWithMetadata, additional_keys: assert key in hit for key in additional_keys: - print(hit.keys()) assert key in hit assert hit[key] != config.services.unavailable_value