Commit 435634c1 authored by Lauri Himanen's avatar Lauri Himanen
Browse files

Fixed multiple issues with the migration to MI2.

parent 6fe2d9a5
Pipeline #72495 failed with stages
in 24 minutes and 57 seconds
......@@ -30,6 +30,7 @@ import bs4
from matid import SymmetryAnalyzer
from nomad import processing as proc, search, datamodel, infrastructure, utils, config
from nomad import atomutils
from nomad import normalizing
from nomad.cli.cli import cli
......@@ -504,7 +505,7 @@ def prototypes_update(ctx, filepath, matches_only):
# letters to the data.
if spg_number == aflow_spg_number:
atomic_numbers = norm_system.get_atomic_numbers()
normalized_wyckoff_matid = normalizing.aflow_prototypes.get_normalized_wyckoff(atomic_numbers, wyckoff_matid)
normalized_wyckoff_matid = atomutils.get_normalized_wyckoff(atomic_numbers, wyckoff_matid)
prototype["normalized_wyckoff_matid"] = normalized_wyckoff_matid
else:
n_unmatched += 1
......
......@@ -19,46 +19,52 @@ import numpy as np
class DosNormalizer(Normalizer):
def normalize(self, logger=None) -> None:
if logger is not None:
self.logger = logger.bind(normalizer=self.__class__.__name__)
# Do nothing if section_run is not present
if self.section_run is None:
return
# 'scc': single_configuration_calculation
section_scc_indices = self._backend.get_sections('section_single_configuration_calculation')
section_sccs = self.section_run.section_single_configuration_calculation
if section_sccs is None:
return
for scc_index in section_scc_indices:
section_dos_indices = self._backend.get_sections('section_dos', scc_index)
for scc in section_sccs:
section_dos = scc.section_dos
if section_dos is None:
continue
for dos_index in section_dos_indices:
try:
dos = self._backend.get_value('dos_values', dos_index) # a numpy.ndarray
except KeyError:
# section dos without doc_values
for dos in section_dos:
dos_values = dos.dos_values
if dos_values is None:
# section dos without dos_values
continue
try:
system_index = self._backend.get_value(
'single_configuration_calculation_to_system_ref', scc_index)
except KeyError:
system_index = scc_index
try:
atom_positions = self._backend.get_value('atom_positions', system_index)
lattice_vectors = self._backend.get_value('lattice_vectors', system_index)
except IndexError:
system = scc.single_configuration_calculation_to_system_ref
if system is None:
self.logger.error('referenced system for dos calculation could not be found')
continue
atom_positions = system.atom_positions
lattice_vectors = system.lattice_vectors
if atom_positions is None:
self.logger.error('required quantity atom_positions is not available')
return
except KeyError as e:
self.logger.error('required quantity %s is not available' % e.args[0])
if lattice_vectors is None:
self.logger.error('required quantity lattice_vectors is not available')
return
number_of_atoms = np.shape(atom_positions)[0]
unit_cell_volume = np.linalg.det(lattice_vectors.magnitude)
# Final quantities
dos_normed = dos / (number_of_atoms * unit_cell_volume)
dos_normed = dos_values / (number_of_atoms * unit_cell_volume)
# Add quantities to NOMAD's Metainfo
scc_url = '/section_run/0/section_single_configuration_calculation/%d/section_dos/0' % scc_index
scc_url = '/section_run/0/section_single_configuration_calculation/%d/section_dos/0' % scc.m_parent_index
self._backend.openContext(scc_url)
self._backend.addArrayValues('dos_values_normalized', dos_normed, dos_index)
dos.dos_values_normalized = dos_normed
self._backend.closeContext(scc_url)
......@@ -64,7 +64,7 @@ class EncyclopediaNormalizer(Normalizer):
# No sequences, only a few calculations
if n_scc <= 3 and n_frame_seq == 0:
program_name = self._backend["program_name"]
program_name = self.section_run.program_name
if program_name == "elastic":
# TODO move to taylor expansion as soon as data is correct in archive
calc_type = calc_enums.elastic_constants
......@@ -77,9 +77,8 @@ class EncyclopediaNormalizer(Normalizer):
frame_seq = frame_sequences[0]
# See if sampling_method is present
try:
section_sampling_method = frame_seq.frame_sequence_to_sampling_ref
except KeyError:
section_sampling_method = frame_seq.frame_sequence_to_sampling_ref
if section_sampling_method is None:
self.logger.info(
"Cannot determine encyclopedia run type because missing "
"value for frame_sequence_to_sampling_ref."
......@@ -87,15 +86,8 @@ class EncyclopediaNormalizer(Normalizer):
return calc_type
# See if local frames are present
try:
frames = frame_seq.frame_sequence_local_frames_ref
except KeyError:
self.logger.info(
"section_frame_sequence_local_frames not found although a "
"frame_sequence exists."
)
return calc_type
if len(frames) == 0:
frames = frame_seq.frame_sequence_local_frames_ref
if not frames:
self.logger.info("No frames referenced in section_frame_sequence_local_frames.")
return calc_type
......@@ -215,6 +207,15 @@ class EncyclopediaNormalizer(Normalizer):
"""The caller will automatically log if the normalizer succeeds or ends
up with an exception.
"""
# Do nothing if section_run is not present
if self.section_run is None:
self.logger.info(
"Required data is missing or is invalid.",
enc_status="invalid_data",
invalid_metainfo="section_run",
)
return
try:
super().normalize(logger)
......
......@@ -233,19 +233,22 @@ class PropertiesNormalizer():
band_structure.scc_index = int(context.representative_scc_idx)
kpoints = []
energies = []
try:
segments = band_data['section_k_band_segment' + norm]
except KeyError:
segments = band_data['section_k_band_segment' + norm]
if not segments:
return
# Loop over segments
for segment_src in segments:
try:
seg_k_points = segment_src["band_k_points" + norm]
seg_energies = segment_src["band_energies" + norm].magnitude
seg_energies = segment_src["band_energies" + norm]
seg_labels = segment_src['band_segm_labels' + norm]
except KeyError:
except Exception:
return
if seg_k_points is None or seg_energies is None or seg_labels is None:
return
else:
seg_energies = seg_energies.magnitude
if "?" in seg_labels:
return
......
......@@ -34,7 +34,10 @@ class Normalizer(metaclass=ABCMeta):
def __init__(self, backend: Backend) -> None:
self._backend = backend
self.section_run = backend.entry_archive.section_run[0]
try:
self.section_run = backend.entry_archive.section_run[0]
except (AttributeError, IndexError):
self.section_run = None
self.logger = get_logger(__name__)
@abstractmethod
......@@ -165,6 +168,10 @@ class SystemBasedNormalizer(Normalizer, metaclass=ABCMeta):
def normalize(self, logger=None) -> None:
super().normalize(logger)
# If no section run detected, do nothing
if self.section_run is None:
return
# Process representative system first
repr_sys_idx = None
repr_sys = self.__representative_system()
......
......@@ -227,7 +227,7 @@ class SystemNormalizer(SystemBasedNormalizer):
# Save the Atoms as a temporary variable
system.m_cache["representative_atoms"] = atoms
# system type analysis
# System type analysis
if atom_positions is not None:
with utils.timer(
self.logger, 'system classification executed',
......@@ -236,12 +236,11 @@ class SystemNormalizer(SystemBasedNormalizer):
system_type = self._backend.get_value("system_type")
# symmetry analysis
# Symmetry analysis
if atom_positions is not None and (lattice_vectors is not None or not any(pbc)) and system_type == "bulk":
with utils.timer(
self.logger, 'symmetry analysis executed',
system_size=len(atoms)):
self.symmetry_analysis(system, atoms)
return True
......@@ -336,11 +335,11 @@ class SystemNormalizer(SystemBasedNormalizer):
self.logger.error('matid symmetry analysis fails with exception', exc_info=e)
return
# Write data extracted from MatID's symmetry analysis to the backend.
sec_symmetry = system.m_create(section_symmetry)
# Write data extracted from MatID's symmetry analysis to the
# representative section_system.
sec_symmetry = self._backend.openSection("section_symmetry", return_section=True)
sec_symmetry.m_cache["symmetry_analyzer"] = symm
# TODO: @dts, should we change the symmetry_method to MATID?
sec_symmetry.symmetry_method = 'MatID (spg)'
sec_symmetry.space_group_number = space_group_number
sec_symmetry.hall_number = hall_number
......@@ -352,21 +351,21 @@ class SystemNormalizer(SystemBasedNormalizer):
sec_symmetry.origin_shift = origin_shift
sec_symmetry.transformation_matrix = transform
sec_std = sec_symmetry.m_create(section_std_system)
sec_std = self._backend.openSection("section_std_system", return_section=True)
sec_std.lattice_vectors_std = conv_cell
sec_std.atom_positions_std = conv_pos
sec_std.atomic_numbers_std = conv_num
sec_std.wyckoff_letters_std = conv_wyckoff
sec_std.equivalent_atoms_std = conv_equivalent_atoms
sec_prim = sec_symmetry.m_create(section_primitive_system)
sec_prim = self._backend.openSection("section_primitive_system", return_section=True)
sec_prim.lattice_vectors_primitive = prim_cell
sec_prim.atom_positions_primitive = prim_pos
sec_prim.atomic_numbers_primitive = prim_num
sec_prim.wyckoff_letters_primitive = prim_wyckoff
sec_prim.equivalent_atoms_primitive = prim_equivalent_atoms
sec_orig = sec_symmetry.m_create(section_original_system)
sec_orig = self._backend.openSection("section_original_system", return_section=True)
sec_orig.wyckoff_letters_original = orig_wyckoff
sec_orig.equivalent_atoms_original = orig_equivalent_atoms
......@@ -433,7 +432,7 @@ class SystemNormalizer(SystemBasedNormalizer):
aflow_prototype_name,
protoDict.get("Pearsons Symbol", "-")
)
sec_prototype = system.m_create(section_prototype)
sec_prototype = self._backend.openSection("section_prototype", return_section=True)
sec_prototype.prototype_label = prototype_label
sec_prototype.prototype_aflow_id = aflow_prototype_id
sec_prototype.prototype_aflow_url = aflow_prototype_url
......
......@@ -313,12 +313,18 @@ class Backend(AbstractParserBackend):
# del(self.__open_sections[(section.m_def, -1)])
# del(self.__open_sections[(section.m_def, section.m_parent_index)])
def openSection(self, name, parent_index: int = -1):
def openSection(self, name, parent_index: int = -1, return_section=False):
'''
It will assume that there is a sub-section def with the given name.
It will use the latest opened section of the sub-sections parent as the parent
for the new section.
An Exception will be known root sections, e.g. 'section_run'.
Args:
name: The section name
parent_index: Index of the parent section in which the section will
be opened in.
return_section: If True, returns the section instead of the section index.
'''
section_def = self.resolve_definition(name, Section)
......@@ -335,6 +341,9 @@ class Backend(AbstractParserBackend):
section = parent.m_create(section_def.section_cls, sub_section_def)
self.__open(section)
if return_section:
return section
return section.m_parent_index
def get_open_section_for_quantity(self, name, g_index):
......
......@@ -17,7 +17,6 @@ import pytest
from ase import Atoms
from nomad.parsing.legacy import Backend
from nomad.datamodel import EntryArchive
from nomad.normalizing import normalizers
from tests.test_parsing import parsed_vasp_example # pylint: disable=unused-import
......@@ -39,8 +38,8 @@ def run_normalize(backend: Backend) -> Backend:
@pytest.fixture
def normalized_vasp_example(parsed_vasp_example: Backend) -> EntryArchive:
return run_normalize(parsed_vasp_example).entry_archive
def normalized_vasp_example(parsed_vasp_example: Backend) -> Backend:
return run_normalize(parsed_vasp_example)
@pytest.fixture
......@@ -53,7 +52,7 @@ def normalized_template_example(parsed_template_example) -> Backend:
return run_normalize(parsed_template_example)
def run_normalize_for_structure(atoms: Atoms) -> EntryArchive:
def run_normalize_for_structure(atoms: Atoms) -> Backend:
template = parsed_template_no_system()
# Fill structural information
......@@ -64,163 +63,163 @@ def run_normalize_for_structure(atoms: Atoms) -> EntryArchive:
template.addArrayValues("configuration_periodic_dimensions", atoms.get_pbc())
template.closeSection("section_system", gid)
return run_normalize(template).entry_archive
return run_normalize(template)
@pytest.fixture(scope='session')
def single_point(two_d) -> EntryArchive:
def single_point(two_d) -> Backend:
return two_d
@pytest.fixture(scope='session')
def gw(two_d) -> EntryArchive:
def gw(two_d) -> Backend:
parser_name = "parsers/template"
filepath = "tests/data/normalizers/gw.json"
backend = parse_file((parser_name, filepath))
backend = run_normalize(backend)
return backend.entry_archive
return backend
@pytest.fixture(scope='session')
def geometry_optimization() -> EntryArchive:
def geometry_optimization() -> Backend:
parser_name = "parsers/template"
filepath = "tests/data/normalizers/fcc_crystal_structure.json"
backend = parse_file((parser_name, filepath))
backend = run_normalize(backend)
return backend.entry_archive
return backend
@pytest.fixture(scope='session')
def molecular_dynamics(bulk) -> EntryArchive:
def molecular_dynamics(bulk) -> Backend:
return bulk
@pytest.fixture(scope='session')
def phonon() -> EntryArchive:
def phonon() -> Backend:
parser_name = "parsers/phonopy"
filepath = "tests/data/parsers/phonopy/phonopy-FHI-aims-displacement-01/control.in"
backend = parse_file((parser_name, filepath))
backend = run_normalize(backend)
return backend.entry_archive
return backend
@pytest.fixture(scope='session')
def bulk() -> EntryArchive:
def bulk() -> Backend:
parser_name = "parsers/cp2k"
filepath = "tests/data/normalizers/cp2k_bulk_md/si_md.out"
backend = parse_file((parser_name, filepath))
backend = run_normalize(backend)
return backend.entry_archive
return backend
@pytest.fixture(scope='session')
def two_d() -> EntryArchive:
def two_d() -> Backend:
parser_name = "parsers/fhi-aims"
filepath = "tests/data/normalizers/fhiaims_2d_singlepoint/aims.out"
backend = parse_file((parser_name, filepath))
backend = run_normalize(backend)
return backend.entry_archive
return backend
@pytest.fixture(scope='session')
def surface() -> EntryArchive:
def surface() -> Backend:
parser_name = "parsers/fhi-aims"
filepath = "tests/data/normalizers/fhiaims_surface_singlepoint/PBE-light+tight-rho2.out"
backend = parse_file((parser_name, filepath))
backend = run_normalize(backend)
return backend.entry_archive
return backend
@pytest.fixture(scope='session')
def molecule() -> EntryArchive:
def molecule() -> Backend:
parser_name = "parsers/fhi-aims"
filepath = "tests/data/normalizers/fhiaims_molecule_singlepoint/aims.out"
backend = parse_file((parser_name, filepath))
backend = run_normalize(backend)
return backend.entry_archive
return backend
@pytest.fixture(scope='session')
def atom() -> EntryArchive:
def atom() -> Backend:
parser_name = "parsers/gaussian"
filepath = "tests/data/normalizers/gaussian_atom_singlepoint/m9b7.out"
backend = parse_file((parser_name, filepath))
backend = run_normalize(backend)
return backend.entry_archive
return backend
@pytest.fixture(scope='session')
def one_d() -> EntryArchive:
def one_d() -> Backend:
parser_name = "parsers/exciting"
filepath = "tests/data/normalizers/exciting_1d_singlepoint/INFO.OUT"
backend = parse_file((parser_name, filepath))
backend = run_normalize(backend)
return backend.entry_archive
return backend
@pytest.fixture(scope='session')
def bands_unpolarized_gap_indirect() -> EntryArchive:
def bands_unpolarized_gap_indirect() -> Backend:
parser_name = "parsers/vasp"
filepath = "tests/data/normalizers/band_structure/unpolarized_gap/vasprun.xml.bands.xz"
backend = parse_file((parser_name, filepath))
backend = run_normalize(backend)
return backend.entry_archive
return backend
@pytest.fixture(scope='session')
def bands_polarized_no_gap() -> EntryArchive:
def bands_polarized_no_gap() -> Backend:
parser_name = "parsers/vasp"
filepath = "tests/data/normalizers/band_structure/polarized_no_gap/vasprun.xml.bands.xz"
backend = parse_file((parser_name, filepath))
backend = run_normalize(backend)
return backend.entry_archive
return backend
@pytest.fixture(scope='session')
def bands_unpolarized_no_gap() -> EntryArchive:
def bands_unpolarized_no_gap() -> Backend:
parser_name = "parsers/vasp"
filepath = "tests/data/normalizers/band_structure/unpolarized_no_gap/vasprun.xml.bands.xz"
backend = parse_file((parser_name, filepath))
backend = run_normalize(backend)
return backend.entry_archive
return backend
@pytest.fixture(scope='session')
def bands_polarized_gap_indirect() -> EntryArchive:
def bands_polarized_gap_indirect() -> Backend:
parser_name = "parsers/vasp"
filepath = "tests/data/normalizers/band_structure/polarized_gap/vasprun.xml.bands.xz"
backend = parse_file((parser_name, filepath))
backend = run_normalize(backend)
return backend.entry_archive
return backend
@pytest.fixture(scope='session')
def dos_polarized_vasp() -> EntryArchive:
def dos_polarized_vasp() -> Backend:
parser_name = "parsers/vasp"
filepath = "tests/data/normalizers/dos/polarized_vasp/vasprun.xml.relax2.xz"
backend = parse_file((parser_name, filepath))
backend = run_normalize(backend)
return backend.entry_archive
return backend
@pytest.fixture(scope='session')
def dos_unpolarized_vasp() -> EntryArchive:
def dos_unpolarized_vasp() -> Backend:
parser_name = "parsers/vasp"
filepath = "tests/data/normalizers/dos/unpolarized_vasp/vasprun.xml.xz"
backend = parse_file((parser_name, filepath))
backend = run_normalize(backend)
return backend.entry_archive
return backend
@pytest.fixture(scope='session')
def hash_exciting() -> EntryArchive:
def hash_exciting() -> Backend:
parser_name = "parsers/exciting"
filepath = "tests/data/normalizers/hashes/exciting/INFO.OUT"
backend = parse_file((parser_name, filepath))
backend = run_normalize(backend)
return backend.entry_archive
return backend
@pytest.fixture(scope='session')
def hash_vasp(bands_unpolarized_gap_indirect) -> EntryArchive:
def hash_vasp(bands_unpolarized_gap_indirect) -> Backend:
return bands_unpolarized_gap_indirect
......@@ -45,7 +45,7 @@ ureg = UnitRegistry()
def test_geometry_optimization(geometry_optimization: EntryArchive):
"""Tests that geometry optimizations are correctly processed."
"""
enc = geometry_optimization.section_encyclopedia
enc = geometry_optimization.entry_archive.section_encyclopedia
calc_type = enc.calculation.calculation_type
assert calc_type == "geometry optimization"
......@@ -53,7 +53,7 @@ def test_geometry_optimization(geometry_optimization: EntryArchive):
def test_molecular_dynamics(molecular_dynamics: EntryArchive):
"""Tests that geometry optimizations are correctly processed."
"""
enc = molecular_dynamics.section_encyclopedia
enc = molecular_dynamics.entry_archive.section_encyclopedia
calc_type = enc.calculation.calculation_type
assert calc_type == "molecular dynamics"
......@@ -70,7 +70,7 @@ def test_molecular_dynamics(molecular_dynamics: EntryArchive):
def test_1d_metainfo(one_d: EntryArchive):
"""Tests that metainfo for 1D systems is correctly processed.
"""
enc = one_d.section_encyclopedia
enc = one_d.entry_archive.section_encyclopedia
# Material
material = enc.material
assert material.material_type == "1D"
......@@ -90,7 +90,7 @@ def test_1d_metainfo(one_d: EntryArchive):
def test_2d_metainfo(two_d: EntryArchive):
"""Tests that metainfo for 2D systems is correctly processed.
"""
enc = two_d.section_encyclopedia
enc = two_d.entry_archive.section_encyclopedia
# Material
material = enc.material
assert material.material_type == "2D"
......@@ -111,7 +111,7 @@ def test_2d_metainfo(two_d: EntryArchive):
def test_bulk_metainfo(bulk: EntryArchive):
"""Tests that metainfo for bulk systems is correctly processed.
"""
enc = bulk.section_encyclopedia
enc = bulk.entry_archive.section_encyclopedia
# Material
material = enc.material
assert material.material_type == "bulk"
......@@ -152,20 +152,20 @@ def test_bulk_metainfo(bulk: EntryArchive):
def test_1d_material_identification():
# Original nanotube
nanotube1 = ase.build.nanotube(4, 4, vacuum=4)
enc = run_normalize_for_structure(nanotube1).section_encyclopedia
enc = run_normalize_for_structure(nanotube1).entry_archive.section_encyclopedia
hash1 = enc.material.material_hash
# Rotated copy
nanotube2 = nanotube1.copy()
nanotube2.rotate(90, "z", rotate_cell=True)
enc = run_normalize_for_structure(nanotube2).section_encyclopedia
enc = run_normalize_for_structure(nanotube2).entry_archive.section_encyclopedia
hash2 = enc.material.material_hash
assert hash2 == hash1
# Longer copy
nanotube3 = nanotube1.copy()
nanotube3 *= [1, 1, 2]
enc = run_normalize_for_structure(nanotube3).section_encyclopedia
enc = run_normalize_for_structure(nanotube3).entry_archive.section_encyclopedia
hash3 = enc.material.material_hash
assert hash3 == hash1
......@@ -176,7 +176,7 @@ def test_1d_material_identification():
pos = nanotube4.get_positions()
pos += 0.2 * np.random.rand(pos.shape[0], pos.shape[1])
nanotube4.set_positions(pos)
enc = run_normalize_for_structure(nanotube4).section_encyclopedia
enc = run_normalize_for_structure(nanotube4).entry_archive.section_encyclopedia
hash4 = enc.material.material_hash
assert hash4 == hash1
......@@ -186,7 +186,7 @@ def test_1d_material_identification():
np.random.seed(4)
pos += 1 * np.random.</