Skip to content
Snippets Groups Projects
Commit cfb09d27 authored by Markus Scheidgen's avatar Markus Scheidgen
Browse files

Search and proc test work after efactoring domains.

parent a1e8a32b
No related branches found
No related tags found
2 merge requests!115V0.8.0 beta,!113V0.8.0
...@@ -211,6 +211,7 @@ datacite = NomadConfig( ...@@ -211,6 +211,7 @@ datacite = NomadConfig(
version = '0.7.6' version = '0.7.6'
commit = gitinfo.commit commit = gitinfo.commit
release = 'devel' release = 'devel'
default_domain = 'dft'
service = 'unknown nomad service' service = 'unknown nomad service'
auxfile_cutoff = 100 auxfile_cutoff = 100
parser_matching_size = 9128 parser_matching_size = 9128
......
...@@ -388,7 +388,9 @@ class Domain: ...@@ -388,7 +388,9 @@ class Domain:
quantity = domain_quantities.get(quantity_name, None) quantity = domain_quantities.get(quantity_name, None)
if quantity is None: if quantity is None:
domain_quantities[quantity_name] = DomainQuantity() quantity = DomainQuantity()
quantity.domain = name
domain_quantities[quantity_name] = quantity
# add all domain quantities # add all domain quantities
for quantity_name, quantity in domain_quantities.items(): for quantity_name, quantity in domain_quantities.items():
......
...@@ -18,12 +18,6 @@ from typing import List ...@@ -18,12 +18,6 @@ from typing import List
from nomad.parsing import AbstractParserBackend from nomad.parsing import AbstractParserBackend
from nomad.utils import get_logger from nomad.utils import get_logger
s_system = 'section_system'
s_scc = 'section_single_configuration_calculation'
s_frame_sequence = 'section_frame_sequence'
r_scc_to_system = 'single_configuration_calculation_to_system_ref'
r_frame_sequence_local_frames = 'frame_sequence_local_frames_ref'
class Normalizer(metaclass=ABCMeta): class Normalizer(metaclass=ABCMeta):
""" """
...@@ -34,7 +28,7 @@ class Normalizer(metaclass=ABCMeta): ...@@ -34,7 +28,7 @@ class Normalizer(metaclass=ABCMeta):
backend: The backend used to read and write data from and to. backend: The backend used to read and write data from and to.
""" """
domain = 'DFT' domain = 'dft'
""" The domain this normalizer should be used in. Default for all normalizer is 'DFT'. """ """ The domain this normalizer should be used in. Default for all normalizer is 'DFT'. """
def __init__(self, backend: AbstractParserBackend) -> None: def __init__(self, backend: AbstractParserBackend) -> None:
...@@ -93,50 +87,40 @@ class SystemBasedNormalizer(Normalizer, metaclass=ABCMeta): ...@@ -93,50 +87,40 @@ class SystemBasedNormalizer(Normalizer, metaclass=ABCMeta):
Attempt to find a single section_system that is representative for the Attempt to find a single section_system that is representative for the
entry. The selection depends on the type of calculation. entry. The selection depends on the type of calculation.
""" """
system_idx = None
# Try to find a frame sequence, only first found is considered # Try to find a frame sequence, only first found is considered
try: try:
frame_seqs = self._backend[s_frame_sequence] frame_seq = self._backend['section_frame_sequence'][0]
frame_seq = frame_seqs[0] sampling_method_idx = frame_seq['frame_sequence_to_sampling_ref']
sampling_method_idx = frame_seq["frame_sequence_to_sampling_ref"] sec_sampling_method = self._backend['section_sampling_method'][sampling_method_idx]
sec_sampling_method = self._backend["section_sampling_method"][sampling_method_idx] sampling_method = sec_sampling_method['sampling_method']
sampling_method = sec_sampling_method["sampling_method"] frames = frame_seq['frame_sequence_local_frames_ref']
frames = frame_seq["frame_sequence_local_frames_ref"] if sampling_method == 'molecular_dynamics':
if sampling_method == "molecular_dynamics":
scc_idx = frames[0] scc_idx = frames[0]
else: else:
scc_idx = frames[-1] scc_idx = frames[-1]
scc = self._backend[s_scc][scc_idx] scc = self._backend['section_single_configuration_calculation'][scc_idx]
system_idx = scc["single_configuration_calculation_to_system_ref"] return scc['single_configuration_calculation_to_system_ref']
except Exception: except Exception:
frame_seqs = [] pass
# If no frame sequences detected, try to find scc # If no frame sequences detected, try to find scc
if len(frame_seqs) == 0: try:
try: sccs = self._backend['section_single_configuration_calculation']
sccs = self._backend[s_scc] scc = sccs[-1]
scc = sccs[-1] return scc['single_configuration_calculation_to_system_ref']
system_idx = scc["single_configuration_calculation_to_system_ref"] except Exception:
except Exception: pass
sccs = []
# If no sccs exist, try to find systems
# If no sccs exist, try to find systems try:
if len(sccs) == 0: systems = self._backend.get_sections('section_system')
try: return systems[-1]
systems = self._backend.get_sections(s_system) except Exception:
system_idx = systems[-1] pass
except Exception:
sccs = [] self.logger.error('no "representative" section system found')
if system_idx is None:
self.logger.error('no "representative" section system found')
else:
self.logger.info(
'chose "representative" system for normalization',
)
return system_idx return None
def __normalize_system(self, g_index, representative, logger=None) -> bool: def __normalize_system(self, g_index, representative, logger=None) -> bool:
try: try:
...@@ -157,11 +141,12 @@ class SystemBasedNormalizer(Normalizer, metaclass=ABCMeta): ...@@ -157,11 +141,12 @@ class SystemBasedNormalizer(Normalizer, metaclass=ABCMeta):
def normalize(self, logger=None) -> None: def normalize(self, logger=None) -> None:
super().normalize(logger) super().normalize(logger)
all_systems = self._backend.get_sections(s_system) all_systems = self._backend.get_sections('section_system')
# Process representative system first # Process representative system first
representative_system_idx = self.__representative_system() representative_system_idx = self.__representative_system()
if representative_system_idx is not None: if representative_system_idx is not None:
self.logger.info('chose "representative" section system')
self.__normalize_system(representative_system_idx, True, logger) self.__normalize_system(representative_system_idx, True, logger)
# All the rest if requested # All the rest if requested
......
...@@ -286,7 +286,7 @@ class SearchRequest: ...@@ -286,7 +286,7 @@ class SearchRequest:
There is also scrolling for quantities to go through all quantity values. There is no There is also scrolling for quantities to go through all quantity values. There is no
paging for aggregations. paging for aggregations.
''' '''
def __init__(self, domain: str, query=None): def __init__(self, domain: str = config.default_domain, query=None):
self._domain = domain self._domain = domain
self._query = query self._query = query
self._search = Search(index=config.elastic.index_name) self._search = Search(index=config.elastic.index_name)
......
...@@ -20,7 +20,7 @@ import json ...@@ -20,7 +20,7 @@ import json
import re import re
import shutil import shutil
from nomad import utils, infrastructure, config, datamodel from nomad import utils, infrastructure, config
from nomad.files import UploadFiles, StagingUploadFiles, PublicUploadFiles from nomad.files import UploadFiles, StagingUploadFiles, PublicUploadFiles
from nomad.processing import Upload, Calc from nomad.processing import Upload, Calc
from nomad.processing.base import task as task_decorator, FAILURE, SUCCESS from nomad.processing.base import task as task_decorator, FAILURE, SUCCESS
...@@ -106,6 +106,10 @@ def assert_processing(upload: Upload, published: bool = False): ...@@ -106,6 +106,10 @@ def assert_processing(upload: Upload, published: bool = False):
with upload_files.raw_file(path) as f: with upload_files.raw_file(path) as f:
f.read() f.read()
# check some domain metadata
assert calc.metadata['n_atoms'] > 0
assert len(calc.metadata['atoms']) > 0
assert upload.get_calc(calc.calc_id).metadata is not None assert upload.get_calc(calc.calc_id).metadata is not None
...@@ -378,14 +382,14 @@ def test_malicious_parser_task_failure(proc_infra, failure, test_user): ...@@ -378,14 +382,14 @@ def test_malicious_parser_task_failure(proc_infra, failure, test_user):
assert len(calc.errors) == 1 assert len(calc.errors) == 1
def test_ems_data(proc_infra, test_user, monkeypatch): def test_ems_data(proc_infra, test_user):
monkeypatch.setattr('nomad.config.domain', 'ems')
monkeypatch.setattr('nomad.datamodel.Domain.instance', datamodel.Domain.instances['ems'])
monkeypatch.setattr('nomad.datamodel.CalcWithMetadata', datamodel.Domain.instance.domain_entry_class)
upload = run_processing(('test_ems_upload', 'tests/data/proc/example_ems.zip'), test_user) upload = run_processing(('test_ems_upload', 'tests/data/proc/example_ems.zip'), test_user)
additional_keys = ['method', 'experiment_location', 'experiment_time', 'formula', 'chemical'] additional_keys = [
'ems.method', 'ems.experiment_location', 'ems.experiment_time', 'ems.formula',
'ems.chemical']
assert upload.total_calcs == 1 assert upload.total_calcs == 1
assert len(upload.calcs) == 1 assert len(upload.calcs) == 1
......
...@@ -151,18 +151,18 @@ def test_search_totals(elastic, example_search_data): ...@@ -151,18 +151,18 @@ def test_search_totals(elastic, example_search_data):
def test_search_exclude(elastic, example_search_data): def test_search_exclude(elastic, example_search_data):
for item in SearchRequest().execute_paginated()['results']: for item in SearchRequest().execute_paginated()['results']:
assert 'atoms' in item assert 'dft.atoms' in item
for item in SearchRequest().exclude('atoms').execute_paginated()['results']: for item in SearchRequest().exclude('dft.atoms').execute_paginated()['results']:
assert 'atoms' not in item assert 'dft.atoms' not in item
def test_search_include(elastic, example_search_data): def test_search_include(elastic, example_search_data):
for item in SearchRequest().execute_paginated()['results']: for item in SearchRequest().execute_paginated()['results']:
assert 'atoms' in item assert 'dft.atoms' in item
for item in SearchRequest().include('calc_id').execute_paginated()['results']: for item in SearchRequest().include('calc_id').execute_paginated()['results']:
assert 'atoms' not in item assert 'dft.atoms' not in item
assert 'calc_id' in item assert 'calc_id' in item
...@@ -236,7 +236,6 @@ def assert_search_upload(upload: datamodel.UploadWithMetadata, additional_keys: ...@@ -236,7 +236,6 @@ def assert_search_upload(upload: datamodel.UploadWithMetadata, additional_keys:
assert key in hit assert key in hit
for key in additional_keys: for key in additional_keys:
print(hit.keys())
assert key in hit assert key in hit
assert hit[key] != config.services.unavailable_value assert hit[key] != config.services.unavailable_value
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment