Commit cfb09d27 authored by Markus Scheidgen's avatar Markus Scheidgen
Browse files

Search and proc test work after efactoring domains.

parent a1e8a32b
......@@ -211,6 +211,7 @@ datacite = NomadConfig(
version = '0.7.6'
commit = gitinfo.commit
release = 'devel'
default_domain = 'dft'
service = 'unknown nomad service'
auxfile_cutoff = 100
parser_matching_size = 9128
......
......@@ -388,7 +388,9 @@ class Domain:
quantity = domain_quantities.get(quantity_name, None)
if quantity is None:
domain_quantities[quantity_name] = DomainQuantity()
quantity = DomainQuantity()
quantity.domain = name
domain_quantities[quantity_name] = quantity
# add all domain quantities
for quantity_name, quantity in domain_quantities.items():
......
......@@ -18,12 +18,6 @@ from typing import List
from nomad.parsing import AbstractParserBackend
from nomad.utils import get_logger
s_system = 'section_system'
s_scc = 'section_single_configuration_calculation'
s_frame_sequence = 'section_frame_sequence'
r_scc_to_system = 'single_configuration_calculation_to_system_ref'
r_frame_sequence_local_frames = 'frame_sequence_local_frames_ref'
class Normalizer(metaclass=ABCMeta):
"""
......@@ -34,7 +28,7 @@ class Normalizer(metaclass=ABCMeta):
backend: The backend used to read and write data from and to.
"""
domain = 'DFT'
domain = 'dft'
""" The domain this normalizer should be used in. Default for all normalizer is 'DFT'. """
def __init__(self, backend: AbstractParserBackend) -> None:
......@@ -93,50 +87,40 @@ class SystemBasedNormalizer(Normalizer, metaclass=ABCMeta):
Attempt to find a single section_system that is representative for the
entry. The selection depends on the type of calculation.
"""
system_idx = None
# Try to find a frame sequence, only first found is considered
try:
frame_seqs = self._backend[s_frame_sequence]
frame_seq = frame_seqs[0]
sampling_method_idx = frame_seq["frame_sequence_to_sampling_ref"]
sec_sampling_method = self._backend["section_sampling_method"][sampling_method_idx]
sampling_method = sec_sampling_method["sampling_method"]
frames = frame_seq["frame_sequence_local_frames_ref"]
if sampling_method == "molecular_dynamics":
frame_seq = self._backend['section_frame_sequence'][0]
sampling_method_idx = frame_seq['frame_sequence_to_sampling_ref']
sec_sampling_method = self._backend['section_sampling_method'][sampling_method_idx]
sampling_method = sec_sampling_method['sampling_method']
frames = frame_seq['frame_sequence_local_frames_ref']
if sampling_method == 'molecular_dynamics':
scc_idx = frames[0]
else:
scc_idx = frames[-1]
scc = self._backend[s_scc][scc_idx]
system_idx = scc["single_configuration_calculation_to_system_ref"]
scc = self._backend['section_single_configuration_calculation'][scc_idx]
return scc['single_configuration_calculation_to_system_ref']
except Exception:
frame_seqs = []
pass
# If no frame sequences detected, try to find scc
if len(frame_seqs) == 0:
try:
sccs = self._backend[s_scc]
scc = sccs[-1]
system_idx = scc["single_configuration_calculation_to_system_ref"]
except Exception:
sccs = []
# If no sccs exist, try to find systems
if len(sccs) == 0:
try:
systems = self._backend.get_sections(s_system)
system_idx = systems[-1]
except Exception:
sccs = []
if system_idx is None:
self.logger.error('no "representative" section system found')
else:
self.logger.info(
'chose "representative" system for normalization',
)
try:
sccs = self._backend['section_single_configuration_calculation']
scc = sccs[-1]
return scc['single_configuration_calculation_to_system_ref']
except Exception:
pass
# If no sccs exist, try to find systems
try:
systems = self._backend.get_sections('section_system')
return systems[-1]
except Exception:
pass
self.logger.error('no "representative" section system found')
return system_idx
return None
def __normalize_system(self, g_index, representative, logger=None) -> bool:
try:
......@@ -157,11 +141,12 @@ class SystemBasedNormalizer(Normalizer, metaclass=ABCMeta):
def normalize(self, logger=None) -> None:
super().normalize(logger)
all_systems = self._backend.get_sections(s_system)
all_systems = self._backend.get_sections('section_system')
# Process representative system first
representative_system_idx = self.__representative_system()
if representative_system_idx is not None:
self.logger.info('chose "representative" section system')
self.__normalize_system(representative_system_idx, True, logger)
# All the rest if requested
......
......@@ -286,7 +286,7 @@ class SearchRequest:
There is also scrolling for quantities to go through all quantity values. There is no
paging for aggregations.
'''
def __init__(self, domain: str, query=None):
def __init__(self, domain: str = config.default_domain, query=None):
self._domain = domain
self._query = query
self._search = Search(index=config.elastic.index_name)
......
......@@ -20,7 +20,7 @@ import json
import re
import shutil
from nomad import utils, infrastructure, config, datamodel
from nomad import utils, infrastructure, config
from nomad.files import UploadFiles, StagingUploadFiles, PublicUploadFiles
from nomad.processing import Upload, Calc
from nomad.processing.base import task as task_decorator, FAILURE, SUCCESS
......@@ -106,6 +106,10 @@ def assert_processing(upload: Upload, published: bool = False):
with upload_files.raw_file(path) as f:
f.read()
# check some domain metadata
assert calc.metadata['n_atoms'] > 0
assert len(calc.metadata['atoms']) > 0
assert upload.get_calc(calc.calc_id).metadata is not None
......@@ -378,14 +382,14 @@ def test_malicious_parser_task_failure(proc_infra, failure, test_user):
assert len(calc.errors) == 1
def test_ems_data(proc_infra, test_user, monkeypatch):
monkeypatch.setattr('nomad.config.domain', 'ems')
monkeypatch.setattr('nomad.datamodel.Domain.instance', datamodel.Domain.instances['ems'])
monkeypatch.setattr('nomad.datamodel.CalcWithMetadata', datamodel.Domain.instance.domain_entry_class)
def test_ems_data(proc_infra, test_user):
upload = run_processing(('test_ems_upload', 'tests/data/proc/example_ems.zip'), test_user)
additional_keys = ['method', 'experiment_location', 'experiment_time', 'formula', 'chemical']
additional_keys = [
'ems.method', 'ems.experiment_location', 'ems.experiment_time', 'ems.formula',
'ems.chemical']
assert upload.total_calcs == 1
assert len(upload.calcs) == 1
......
......@@ -151,18 +151,18 @@ def test_search_totals(elastic, example_search_data):
def test_search_exclude(elastic, example_search_data):
for item in SearchRequest().execute_paginated()['results']:
assert 'atoms' in item
assert 'dft.atoms' in item
for item in SearchRequest().exclude('atoms').execute_paginated()['results']:
assert 'atoms' not in item
for item in SearchRequest().exclude('dft.atoms').execute_paginated()['results']:
assert 'dft.atoms' not in item
def test_search_include(elastic, example_search_data):
for item in SearchRequest().execute_paginated()['results']:
assert 'atoms' in item
assert 'dft.atoms' in item
for item in SearchRequest().include('calc_id').execute_paginated()['results']:
assert 'atoms' not in item
assert 'dft.atoms' not in item
assert 'calc_id' in item
......@@ -236,7 +236,6 @@ def assert_search_upload(upload: datamodel.UploadWithMetadata, additional_keys:
assert key in hit
for key in additional_keys:
print(hit.keys())
assert key in hit
assert hit[key] != config.services.unavailable_value
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment