Commit 3523a24b authored by Markus Scheidgen's avatar Markus Scheidgen
Browse files

Continued working on normalizers. Added symmetry normalizer.

parent b76cf7b2
......@@ -164,6 +164,10 @@ dependencies = [
PythonGit(
name='normalizers/stats',
git_url='https://gitlab.mpcdf.mpg.de/nomad-lab/normalizer-stats.git',
git_commit='nomad-xt'),
PythonGit(
name='normalizers/symmetry',
git_url='https://gitlab.mpcdf.mpg.de/nomad-lab/normalizer-symmetry',
git_commit='master')
]
......
import numpy as np
import os.path
import logging
import sys
import setup_paths
from ase import Atoms
from systax import Class3DAnalyzer
from systax.exceptions import SystaxError
import systax.geometry
from nomad.normalizing import Normalizer
class SymmetryNormalizer(Normalizer):
"""
This is basically a copy of the legace NOMAD-coe symmetry normalizer.
"""
def normalize(self) -> None:
for g_index in self._backend.get_sections('section_system'):
section_system = dict_stream.readNextDict()
if section_system is None:
break
try:
# TODO maybe write a reasonable backend first?
atom_labels = self._backend.get_value('atom_labels', g_index)
atom_pos = self._backend.get_value('atom_positions', g_index)
# Try to first read the cell information from the renamed metainfo
try:
cell = section_system["lattice_vectors"]
except KeyError:
cell = section_system["simulation_cell"]
pbc = section_system["configuration_periodic_dimensions"]
pbc = pbc[0]
# If these values could not be read, then skip this system
except KeyError:
logging.exception(
"The necessary information is not available for the system: {}"
.format(section_system)
)
continue
# Make the data into numpy arrays
atom_labels = np.array(atom_labels["flatData"]).reshape(atom_labels["shape"])
atom_pos = np.array(atom_pos["flatData"]).reshape(atom_pos["shape"])
cell = np.array(cell["flatData"]).reshape(cell["shape"])
pbc = np.array(pbc["flatData"]).reshape(pbc["shape"])
# The pbc should be defined as a single-dimensional list
if len(pbc.shape) == 2:
pbc = pbc[0, :]
# If all dimensions are not defined to be periodic, skip this system as
# it cannot represent a crystal with symmetries
if not pbc.all():
continue
try:
atoms = Atoms(
positions=1e10*atom_pos,
symbols=atom_labels,
cell=1e10*cell,
pbc=pbc
)
# If there is an issue in creating the Atoms object, e.g. because the
# labels are invalid, then skip this system
except Exception:
logging.exception(
"Could not create an ASE.Atoms object for the system: {}. Could"
" be that the calculation is using customized atomic "
"labels.".format(section_system)
)
continue
# Figure out the dimensionality of the system by using the
# dimensionality detection included in the systax package. If the
# system is not 3D, then it is skipped.
try:
dim, _ = systax.geometry.get_dimensionality(
atoms,
cluster_threshold=3.0)
# If this exception is thrown, the dimensionality could not be detected
# because there are multiple energetically isolated components in the
# system.
except SystaxError:
continue
if dim != 3:
continue
try:
analyzer = Class3DAnalyzer(atoms)
space_group_number = analyzer.get_space_group_number()
hall_number = analyzer.get_hall_number()
hall_symbol = analyzer.get_hall_symbol()
international_short = analyzer.get_space_group_international_short()
point_group = analyzer.get_point_group()
crystal_system = analyzer.get_crystal_system()
bravais_lattice = analyzer.get_bravais_lattice()
conv_sys = analyzer._get_spglib_conventional_system()
conv_pos = conv_sys.get_scaled_positions()
conv_cell = conv_sys.get_cell()
conv_num = conv_sys.get_atomic_numbers()
conv_wyckoff = analyzer._get_spglib_wyckoff_letters_conventional()
conv_equivalent_atoms = analyzer._get_spglib_equivalent_atoms_conventional()
prim_sys = analyzer._get_spglib_primitive_system()
prim_pos = prim_sys.get_scaled_positions()
prim_cell = prim_sys.get_cell()
prim_num = prim_sys.get_atomic_numbers()
prim_wyckoff = analyzer._get_spglib_wyckoff_letters_primitive()
prim_equivalent_atoms = analyzer._get_spglib_equivalent_atoms_primitive()
orig_wyckoff = analyzer.get_wyckoff_letters_original()
orig_equivalent_atoms = analyzer.get_equivalent_atoms_original()
transform = analyzer._get_spglib_transformation_matrix()
origin_shift = analyzer._get_spglib_origin_shift()
except:
# If there is an issue getting the symmetry data (happens e.g. when
# atoms overlap), then skip this system
logging.exception(
"Error in getting the symmetry information for system: {}. This"
" can be e.g. caused by overlapping atoms."
.format(section_system)
)
continue
# Push the derived values to the backend
# print(context, file=sys.stderr)
context = section_system["uri"]
backend.openContext(context)
symGid = backend.openSection("section_symmetry")
backend.addValue("symmetry_method", "spg_normalized")
backend.addValue("space_group_number", space_group_number)
backend.addValue("hall_number", hall_number)
backend.addValue("hall_symbol", hall_symbol)
backend.addValue("international_short_symbol", international_short)
backend.addValue("point_group", point_group)
backend.addValue("crystal_system", crystal_system)
backend.addValue("bravais_lattice", bravais_lattice)
backend.addArrayValues("origin_shift", origin_shift)
backend.addArrayValues("transformation_matrix", transform)
stdGid = backend.openSection("section_std_system")
backend.addArrayValues("lattice_vectors_std", conv_cell)
backend.addArrayValues("atom_positions_std", conv_pos)
backend.addArrayValues("atomic_numbers_std", conv_num)
backend.addArrayValues("wyckoff_letters_std", conv_wyckoff)
backend.addArrayValues("equivalent_atoms_std", conv_equivalent_atoms)
backend.closeSection("section_std_system", stdGid)
primGid = backend.openSection("section_primitive_system")
backend.addArrayValues("lattice_vectors_primitive", prim_cell)
backend.addArrayValues("atom_positions_primitive", prim_pos)
backend.addArrayValues("atomic_numbers_primitive", prim_num)
backend.addArrayValues("wyckoff_letters_primitive", prim_wyckoff)
backend.addArrayValues("equivalent_atoms_primitive", prim_equivalent_atoms)
backend.closeSection("section_primitive_system", primGid)
origGid = backend.openSection("section_original_system")
backend.addArrayValues("wyckoff_letters_original", orig_wyckoff)
backend.addArrayValues("equivalent_atoms_original", orig_equivalent_atoms)
backend.closeSection("section_original_system", origGid)
backend.closeSection("section_symmetry", symGid)
backend.closeContext(context)
sys.stdout.flush()
backend.finishedParsingSession("ParseSuccess", None)
sys.stdout.flush()
# Copyright 2018 Markus Scheidgen
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an"AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from .normalizer import Normalizer
from .system import SystemNormalizer
from .symmetry import SymmetryNormalizer
"""
After parsing calculations have to be normalized with a set of *normalizers*.
In NOMAD-coe those were programmed in python (we'll reuse) and scala (we'll rewrite).
"""
normalizers = [
SystemNormalizer,
SymmetryNormalizer
]
from abc import ABCMeta, abstractmethod
from nomad.parsing import AbstractParserBackend
class Normalizer(metaclass=ABCMeta):
"""
A base class for normalizers. Normalizers work on a :class:`AbstractParserBackend` instance
for read and write.
"""
def __init__(self, backend: AbstractParserBackend) -> None:
self._backend = backend
@abstractmethod
def normalize(self) -> None:
pass
# Copyright 2018 Markus Scheidgen
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an"AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from nomad.normalizing import Normalizer
from symmetrynormalizer.symmetry_analysis import normalize
class SymmetryNormalizer(Normalizer):
"""
This is basically a copy of the legace NOMAD-coe symmetry normalizer.
"""
def normalize(self) -> None:
quantities = [
'atom_labels',
'atom_positions',
'lattice_vectors',
'simulation_cell',
'configuration_periodic_dimensions'
]
for g_index in self._backend.get_sections('section_system'):
input_data = dict()
for quantity in quantities:
try:
input_data[quantity] = self._backend.get_value(quantity, g_index)
except KeyError:
pass
normalize(self._backend, input_data)
......@@ -12,7 +12,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
from abc import ABCMeta, abstractmethod
import ase
import numpy
import spglib
......@@ -21,28 +20,13 @@ from nomadcore.json_support import addShasOfJson
from statsnormalizer.stats import crystalSystem
from statsnormalizer.classify_structure import ClassifyStructure
from nomad.parsing import AbstractParserBackend
from nomad.normalizing import Normalizer
"""
After parsing calculations have to be normalized with a set of *normalizers*.
In NOMAD-coe those were programmed in python (we'll reuse) and scala (we'll rewrite).
"""
# TODO: check what is wrong, the commented meta names seem not to exist
# in the current meta info
class Normalizer(metaclass=ABCMeta):
"""
A base class for normalizers. Normalizers work on a :class:`AbstractParserBackend` instance
for read and write.
"""
def __init__(self, backend: AbstractParserBackend) -> None:
self._backend = backend
@abstractmethod
def normalize(self) -> None:
pass
class SystemNomalizer(Normalizer):
class SystemNormalizer(Normalizer):
"""
This normalizer performs all system (atoms, cells, etc.) related normalizations
of the legacy NOMAD-coe *stats* normalizer.
......@@ -66,7 +50,7 @@ class SystemNomalizer(Normalizer):
atom_labels = backend.get_value('atom_labels', g_index)
atom_species = backend.get_value('atom_atom_numbers', g_index)
if atom_labels is not None and atom_species is None:
atom_label_to_num = SystemNomalizer.atom_label_to_num
atom_label_to_num = SystemNormalizer.atom_label_to_num
atom_species = [atom_label_to_num(atom_label) for atom_label in atom_labels]
periodic_dirs = backend.get_value('configuration_periodic_dimensions', g_index)
......@@ -76,10 +60,10 @@ class SystemNomalizer(Normalizer):
atom_symbols = [ase.data.chemical_symbols[atom_number] for atom_number in atom_species]
formula = ase.Atoms(atom_symbols).get_chemical_formula(mode='all')
formula_reduced = ase.Atoms(atom_symbols).get_chemical_formula(mode='reduce')
if periodic_dirs is not None and any(periodic_dirs):
formula_bulk = formula_reduced
else:
formula_bulk = formula
# if periodic_dirs is not None and any(periodic_dirs):
# formula_bulk = formula_reduced
# else:
# formula_bulk = formula
cell = backend.get_value('simulation_cell', g_index)
if cell is not None:
......@@ -133,8 +117,6 @@ class SystemNomalizer(Normalizer):
backend.closeNonOverlappingSection('section_topology')
if symm:
# TODO: check what is wrong, the commented meta names seem not to exist
# in the current meta info
# for quantity in ["number", "international", "hall", "choice", "pointgroup"]:
# v = symm.get(quantity)
# if v is not None:
......@@ -149,7 +131,7 @@ class SystemNomalizer(Normalizer):
n = symm.get("number")
if n:
backend.openNonOverlappingSection('section_symmetry')
backend.addValue("bravais_lattice", crystalSystem(n), g_index)
backend.addValue("bravais_lattice", crystalSystem(n))
backend.closeNonOverlappingSection('section_symmetry')
# backend.addValue("chemical_composition", formula, g_index)
......@@ -179,5 +161,3 @@ class SystemNomalizer(Normalizer):
def normalize(self) -> None:
for g_index in self._backend.get_sections('section_system'):
self._normalize_section_system(g_index)
normalizers = [SystemNomalizer]
......@@ -42,7 +42,7 @@ Parsers in NOMAD-coe use a *backend* to create output.
.. autoclass:: nomad.parsing.LocalBackend
"""
from typing import TextIO, Tuple, List, Any
from typing import TextIO, Tuple, List, Any, Callable
from abc import ABCMeta, abstractmethod
import json
import re
......@@ -50,7 +50,7 @@ import importlib
import logging
from nomadcore.local_backend import LocalBackend as LegacyLocalBackend
from nomadcore.local_backend import Section
from nomadcore.local_backend import Section, Results
from nomad.dependencies import dependencies_dict as dependencies, PythonGit
......@@ -77,7 +77,8 @@ class DelegatingMeta(ABCMeta):
class AbstractParserBackend(metaclass=ABCMeta):
"""
This ABS provides the parser backend interface used by the NOMAD-coe parsers.
This ABS provides the parser backend interface used by the NOMAD-coe parsers
and normalizers.
"""
@abstractmethod
def metaInfoEnv(self):
......@@ -100,6 +101,16 @@ class AbstractParserBackend(metaclass=ABCMeta):
""" Called when the parsing finishes. """
pass
@abstractmethod
def openContext(self, contextUri: str):
""" Open existing archive data to introduce new data into an existing section. """
pass
@abstractmethod
def closeContext(self, contextUri: str):
""" Close priorly opened existing archive data again. """
pass
@abstractmethod
def openSection(self, metaName):
""" Opens a new section and returns its new unique gIndex. """
......@@ -176,122 +187,25 @@ class AbstractParserBackend(metaclass=ABCMeta):
""" Used to catch parser warnings. """
pass
# The following are extensions to the origin NOMAD-coe parser backend.
# They allow to modify data after the fact.
# The following are extensions to the origin NOMAD-coe parser backend. And allow
# access to existing data
@property
@abstractmethod
def data(self) -> Results:
pass
@abstractmethod
def get_sections(self, meta_name) -> List[int]:
def get_sections(self, meta_name: str) -> List[int]:
""" Return all gIndices for existing sections of the given meta_name. """
pass
@abstractmethod
def get_value(self, metaName, g_index=-1) -> Any:
def get_value(self, metaName: str, g_index=-1) -> Any:
""" Return the value set to the given meta_name in its parent section of the given index. """
pass
class LegacyParserBackend(AbstractParserBackend, metaclass=DelegatingMeta):
"""
Simple implementation of :class:`AbstractParserBackend` that delegates all calls to
another parser object that not necessarely need to decend from the abstract base class.
"""
def __init__(self, legacy_backend):
self._delegate = legacy_backend
class LocalBackend(LegacyParserBackend):
"""
This implementation of :class:`AbstractParserBackend` is a extended version of
NOMAD-coe's ``LocalBackend`` that allows to write the results in an *archive*-style .json.
It can be used like the original thing, but also allows to output archive JSON
after parsing via :func:`write_json`.
"""
def __init__(self, *args, **kwargs):
delegate = LegacyLocalBackend(*args, **kwargs)
super().__init__(delegate)
self._status = 'none'
self._errors = None
def finishedParsingSession(self, parserStatus, parserErrors, **kwargs):
self._delegate.finishedParsingSession(parserStatus, parserErrors, **kwargs)
self._status = parserStatus
self._errors = parserErrors
def pwarn(self, msg):
logger.debug('Warning in parser: %s' % msg)
def get_value(self, meta_name, g_index=-1):
datamanager = self._delegate.results._datamanagers.get(meta_name)
if datamanager is not None:
sectionmanager = datamanager.superSectionManager
sections = sectionmanager.openSections
if g_index != -1:
sections = [section for section in sections if section.gIndex == g_index]
assert len(sections) == 1
section = sections[0]
return section[meta_name]
def get_sections(self, meta_name):
sections = self._delegate.results[meta_name]
return [section.gIndex for section in sections]
@staticmethod
def _write(json_writer, value):
if isinstance(value, list):
json_writer.open_array()
for item in value:
LocalBackend._write(json_writer, item)
json_writer.close_array()
elif isinstance(value, Section):
section = value
json_writer.open_object()
json_writer.key_value('_name', section.name)
json_writer.key_value('_gIndex', section.gIndex)
for name, value in section.items():
json_writer.key(name)
LocalBackend._write(json_writer, value)
json_writer.close_object()
else:
json_writer.value(value)
@property
def status(self) -> ParserStatus:
""" Returns status and potential errors. """
return (self._status, self._errors)
def write_json(self, out: TextIO, pretty=True):
"""
Writes the results stored in the backend after parsing in an 'archive'.json
style format.
Arguments:
out: The file-like that is used to write the json to.
pretty: Format the json or not.
"""
json_writer = JSONStreamWriter(out, pretty=pretty)
json_writer.open_object()
json_writer.key_value('parser_status', self._status)
if self._errors is not None and len(self._errors) > 0:
json_writer.key('parser_errors')
json_writer.open_array
for error in self._errors:
json_writer.value(error)
json_writer.close_array
json_writer.key('section_run')
json_writer.open_array()
for run in self._delegate.results['section_run']:
LocalBackend._write(json_writer, run)
json_writer.close_array()
json_writer.close_object()
json_writer.close()
class JSONStreamWriter():
START = 0
OBJECT = 1
......@@ -410,6 +324,132 @@ class JSONStreamWriter():
assert self._states[-1] == JSONStreamWriter.START, "Something was not closed."
class LegacyParserBackend(AbstractParserBackend, metaclass=DelegatingMeta):
"""
Simple implementation of :class:`AbstractParserBackend` that delegates all calls to
another parser object that not necessarely need to decend from the abstract base class.
"""
def __init__(self, legacy_backend):
self._delegate = legacy_backend
class LocalBackend(LegacyParserBackend):
"""
This implementation of :class:`AbstractParserBackend` is a extended version of
NOMAD-coe's ``LocalBackend`` that allows to write the results in an *archive*-style .json.
It can be used like the original thing, but also allows to output archive JSON
after parsing via :func:`write_json`.
"""
def __init__(self, *args, **kwargs):
delegate = LegacyLocalBackend(*args, **kwargs)
super().__init__(delegate)
self._status = 'none'
self._errors = None
def finishedParsingSession(self, parserStatus, parserErrors, **kwargs):
self._delegate.finishedParsingSession(parserStatus, parserErrors, **kwargs)
self._status = parserStatus
self._errors = parserErrors
def pwarn(self, msg):
logger.debug('Warning in parser: %s' % msg)
def openContext(self, contextUri: str):
path_str = contextUri.replace(r'nmd://[^/]+/[^/]+/', '')
path = path_str.split('/')
pass
def closeContext(self, contextUri):
pass
@property
def data(self) -> Results:
return self._delegate.results
def get_value(self, meta_name, g_index=-1):
datamanager = self._delegate.results._datamanagers.get(meta_name)
if datamanager is not None:
sectionmanager = datamanager.superSectionManager
sections = sectionmanager.openSections
if g_index != -1:
sections = [section for section in sections if section.gIndex == g_index]
assert len(sections) == 1
section = sections[0]
return section[meta_name]
def get_sections(self, meta_name):
sections = self._delegate.results[meta_name]
return [section.gIndex for section in sections]
@staticmethod
def _write(
json_writer: JSONStreamWriter,
value: Any,
filter: Callable[[str, Any], Any]=None):
if isinstance(value, list):
json_writer.open_array()
for item in value:
LocalBackend._write(json_writer, item, filter=filter)
json_writer.close_array()
elif isinstance(value, Section):
section = value
json_writer.open_object()
json_writer.key_value('_name', section.name)
json_writer.key_value('_gIndex', section.gIndex)