Search and proc test work after efactoring domains.

cfb09d27 · Markus Scheidgen · a1e8a32b · cfb09d27 · cfb09d27 · cfb09d27
Commit cfb09d27 authored 5 years ago by Markus Scheidgen
--- a/nomad/config.py
+++ b/nomad/config.py
@@ -211,6 +211,7 @@ datacite = NomadConfig(
 version = '0.7.6'
 commit = gitinfo.commit
 release = 'devel'
+default_domain = 'dft'
 service = 'unknown nomad service'
 auxfile_cutoff = 100
 parser_matching_size = 9128

--- a/nomad/datamodel/base.py
+++ b/nomad/datamodel/base.py
@@ -388,7 +388,9 @@ class Domain:
                quantity = domain_quantities.get(quantity_name, None)

                if quantity is None:
-                    domain_quantities[quantity_name] = DomainQuantity()
+                    quantity = DomainQuantity()
+                    quantity.domain = name
+                    domain_quantities[quantity_name] = quantity

        # add all domain quantities
        for quantity_name, quantity in domain_quantities.items():

--- a/nomad/normalizing/normalizer.py
+++ b/nomad/normalizing/normalizer.py
@@ -18,12 +18,6 @@ from typing import List
 from nomad.parsing import AbstractParserBackend
 from nomad.utils import get_logger

-s_system = 'section_system'
-s_scc = 'section_single_configuration_calculation'
-s_frame_sequence = 'section_frame_sequence'
-r_scc_to_system = 'single_configuration_calculation_to_system_ref'
-r_frame_sequence_local_frames = 'frame_sequence_local_frames_ref'
-

 class Normalizer(metaclass=ABCMeta):
    """
@@ -34,7 +28,7 @@ class Normalizer(metaclass=ABCMeta):
        backend: The backend used to read and write data from and to.
    """

-    domain = 'DFT'
+    domain = 'dft'
    """ The domain this normalizer should be used in. Default for all normalizer is 'DFT'. """

    def __init__(self, backend: AbstractParserBackend) -> None:
@@ -93,50 +87,40 @@ class SystemBasedNormalizer(Normalizer, metaclass=ABCMeta):
        Attempt to find a single section_system that is representative for the
        entry. The selection depends on the type of calculation.
        """
-        system_idx = None
-
        # Try to find a frame sequence, only first found is considered
        try:
-            frame_seqs = self._backend[s_frame_sequence]
-            frame_seq = frame_seqs[0]
-            sampling_method_idx = frame_seq["frame_sequence_to_sampling_ref"]
-            sec_sampling_method = self._backend["section_sampling_method"][sampling_method_idx]
-            sampling_method = sec_sampling_method["sampling_method"]
-            frames = frame_seq["frame_sequence_local_frames_ref"]
-            if sampling_method == "molecular_dynamics":
+            frame_seq = self._backend['section_frame_sequence'][0]
+            sampling_method_idx = frame_seq['frame_sequence_to_sampling_ref']
+            sec_sampling_method = self._backend['section_sampling_method'][sampling_method_idx]
+            sampling_method = sec_sampling_method['sampling_method']
+            frames = frame_seq['frame_sequence_local_frames_ref']
+            if sampling_method == 'molecular_dynamics':
                scc_idx = frames[0]
            else:
                scc_idx = frames[-1]
-            scc = self._backend[s_scc][scc_idx]
-            system_idx = scc["single_configuration_calculation_to_system_ref"]
+            scc = self._backend['section_single_configuration_calculation'][scc_idx]
+            return scc['single_configuration_calculation_to_system_ref']
        except Exception:
-            frame_seqs = []
+            pass

        # If no frame sequences detected, try to find scc
-        if len(frame_seqs) == 0:
-            try:
-                sccs = self._backend[s_scc]
-                scc = sccs[-1]
-                system_idx = scc["single_configuration_calculation_to_system_ref"]
-            except Exception:
-                sccs = []
-
-            # If no sccs exist, try to find systems
-            if len(sccs) == 0:
-                try:
-                    systems = self._backend.get_sections(s_system)
-                    system_idx = systems[-1]
-                except Exception:
-                    sccs = []
-
-            if system_idx is None:
-                self.logger.error('no "representative" section system found')
-            else:
-                self.logger.info(
-                    'chose "representative" system for normalization',
-                )
+        try:
+            sccs = self._backend['section_single_configuration_calculation']
+            scc = sccs[-1]
+            return scc['single_configuration_calculation_to_system_ref']
+        except Exception:
+            pass
+
+        # If no sccs exist, try to find systems
+        try:
+            systems = self._backend.get_sections('section_system')
+            return systems[-1]
+        except Exception:
+            pass
+
+        self.logger.error('no "representative" section system found')

-        return system_idx
+        return None

    def __normalize_system(self, g_index, representative, logger=None) -> bool:
        try:
@@ -157,11 +141,12 @@ class SystemBasedNormalizer(Normalizer, metaclass=ABCMeta):
    def normalize(self, logger=None) -> None:
        super().normalize(logger)

-        all_systems = self._backend.get_sections(s_system)
+        all_systems = self._backend.get_sections('section_system')

        # Process representative system first
        representative_system_idx = self.__representative_system()
        if representative_system_idx is not None:
+            self.logger.info('chose "representative" section system')
            self.__normalize_system(representative_system_idx, True, logger)

        # All the rest if requested

--- a/nomad/search.py
+++ b/nomad/search.py
@@ -286,7 +286,7 @@ class SearchRequest:
    There is also scrolling for quantities to go through all quantity values. There is no
    paging for aggregations.
    '''
-    def __init__(self, domain: str, query=None):
+    def __init__(self, domain: str = config.default_domain, query=None):
        self._domain = domain
        self._query = query
        self._search = Search(index=config.elastic.index_name)

--- a/tests/processing/test_data.py
+++ b/tests/processing/test_data.py
@@ -20,7 +20,7 @@ import json
 import re
 import shutil

-from nomad import utils, infrastructure, config, datamodel
+from nomad import utils, infrastructure, config
 from nomad.files import UploadFiles, StagingUploadFiles, PublicUploadFiles
 from nomad.processing import Upload, Calc
 from nomad.processing.base import task as task_decorator, FAILURE, SUCCESS
@@ -106,6 +106,10 @@ def assert_processing(upload: Upload, published: bool = False):
            with upload_files.raw_file(path) as f:
                f.read()

+        # check some domain metadata
+        assert calc.metadata['n_atoms'] > 0
+        assert len(calc.metadata['atoms']) > 0
+
        assert upload.get_calc(calc.calc_id).metadata is not None


@@ -378,14 +382,14 @@ def test_malicious_parser_task_failure(proc_infra, failure, test_user):
    assert len(calc.errors) == 1


-def test_ems_data(proc_infra, test_user, monkeypatch):
-    monkeypatch.setattr('nomad.config.domain', 'ems')
-    monkeypatch.setattr('nomad.datamodel.Domain.instance', datamodel.Domain.instances['ems'])
-    monkeypatch.setattr('nomad.datamodel.CalcWithMetadata', datamodel.Domain.instance.domain_entry_class)
+def test_ems_data(proc_infra, test_user):
+

    upload = run_processing(('test_ems_upload', 'tests/data/proc/example_ems.zip'), test_user)

-    additional_keys = ['method', 'experiment_location', 'experiment_time', 'formula', 'chemical']
+    additional_keys = [
+        'ems.method', 'ems.experiment_location', 'ems.experiment_time', 'ems.formula',
+        'ems.chemical']
    assert upload.total_calcs == 1
    assert len(upload.calcs) == 1


--- a/tests/test_search.py
+++ b/tests/test_search.py
@@ -151,18 +151,18 @@ def test_search_totals(elastic, example_search_data):

 def test_search_exclude(elastic, example_search_data):
    for item in SearchRequest().execute_paginated()['results']:
-        assert 'atoms' in item
+        assert 'dft.atoms' in item

-    for item in SearchRequest().exclude('atoms').execute_paginated()['results']:
-        assert 'atoms' not in item
+    for item in SearchRequest().exclude('dft.atoms').execute_paginated()['results']:
+        assert 'dft.atoms' not in item


 def test_search_include(elastic, example_search_data):
    for item in SearchRequest().execute_paginated()['results']:
-        assert 'atoms' in item
+        assert 'dft.atoms' in item

    for item in SearchRequest().include('calc_id').execute_paginated()['results']:
-        assert 'atoms' not in item
+        assert 'dft.atoms' not in item
        assert 'calc_id' in item


@@ -236,7 +236,6 @@ def assert_search_upload(upload: datamodel.UploadWithMetadata, additional_keys:
                assert key in hit

            for key in additional_keys:
-                print(hit.keys())
                assert key in hit
                assert hit[key] != config.services.unavailable_value