Commit 1995952d authored by Markus Scheidgen's avatar Markus Scheidgen
Browse files

Merge branch 'v0.8.4' into 'master'


See merge request !139
parents cc540ed5 8d9744c5
Pipeline #79921 passed with stages
in 20 minutes and 15 seconds
......@@ -24,12 +24,12 @@ from flask import request
from elasticsearch_dsl import Search, Q, A
from elasticsearch_dsl.utils import AttrDict
from nomad import config, files
from nomad import config, files, infrastructure
from nomad.units import ureg
from nomad.atomutils import get_hill_decomposition
from nomad.datamodel.datamodel import EntryArchive
from .api import api
from .common import enable_gzip
from .auth import authenticate
ns = api.namespace("encyclopedia", description="Access encyclopedia metadata.")
re_formula = re.compile(r"([A-Z][a-z]?)(\d*)")
......@@ -240,14 +240,65 @@ class EncMaterialsResource(Resource):
except Exception as e:
abort(400, message=str(e))
# The queries that correspond to AND queries typically need to access
# multiple calculations at once to find the material ids that
# correspond to the query. To implement this behaviour we need to run
# an initial aggregation that checks that the requested properties are
# present for a material. This is a a very crude solution that does not
# scale to complex queries, but I'm not sure we can do much better
# until we have a separate index for materials.
property_map = {
"has_thermal_properties": "",
"has_band_structure": "",
"has_dos": "",
"has_fermi_surface": "",
requested_properties = []
# The size is set very large because all the results need to be
# returned. We cannot get the results in a paginated way with composite
# aggregation, because pipeline aggregations are not compatible with
# them.
agg_parent = A("terms", field="encyclopedia.material.material_id", size=5000000)
for key, value in property_map.items():
if data[key] is True:
agg = A("filter", exists={"field": value})
agg_parent.bucket(key, agg)
if len(requested_properties) > 1:
bool_query = Q(
s = Search(index=config.elastic.index_name)
s = s.query(bool_query)
s.aggs.bucket("materials", agg_parent)
buckets_path = {x: "{}._count".format(x) for x in requested_properties}
script = " && ".join(["params.{} > 0".format(x) for x in requested_properties])
agg_parent.pipeline("selector", A(
s = s.extra(**{
"size": 0,
response = s.execute()
material_ids = [x["key"] for x in response.aggs.materials.buckets]
if len(material_ids) == 0:
abort(404, message="No materials found for the given search criteria or pagination.")
# After finding the material ids that fill the AND conditions, continue
# with a simple OR query.
filters = get_enc_filter()
must_nots = []
musts = []
def add_terms_filter(source, target, query_type="terms"):
if data[source]:
if data[source] is not None:
filters.append(Q(query_type, **{target: data[source]}))
if len(requested_properties) > 1:
filters.append(Q("terms", encyclopedia__material__material_id=material_ids))
add_terms_filter("material_name", "encyclopedia.material.material_name")
add_terms_filter("structure_type", "encyclopedia.material.bulk.structure_type")
add_terms_filter("space_group_number", "encyclopedia.material.bulk.space_group_number")
......@@ -258,7 +309,8 @@ class EncMaterialsResource(Resource):
add_terms_filter("basis_set_type", "dft.basis_set")
add_terms_filter("code_name", "dft.code_name")
# Add exists filters
# Add exists filters if only one property was requested. The initial
# aggregation will handlei multiple simultaneous properties.
def add_exists_filter(source, target):
param = data[source]
if param is not None:
......@@ -267,11 +319,9 @@ class EncMaterialsResource(Resource):
elif param is False:
add_exists_filter("has_thermal_properties", "")
add_exists_filter("has_band_structure", "")
add_exists_filter("has_dos", "")
add_exists_filter("has_fermi_surface", "")
if len(requested_properties) == 1:
prop_name = requested_properties[0]
add_exists_filter(prop_name, property_map[prop_name])
# Add range filters
def add_range_filter(source, target, source_unit=None, target_unit=None):
......@@ -430,8 +480,8 @@ class EncMaterialsResource(Resource):
pages["total"] = n_materials
# 2. Collapse approach. Quickly provides a list of materials
# corresponding to the query, offers full pagination, doesn"t include
# the number of matches per material.
# corresponding to the query, offers full pagination, the number of
# matches per material needs to be requested with a separate query.
elif mode == "collapse":
s = Search(index=config.elastic.index_name)
s = s.query(bool_query)
......@@ -741,7 +791,6 @@ calculations_result = api.model("calculations_result", {
class EncCalculationsResource(Resource):
@api.response(404, "Suggestion not found")
@api.response(400, "Bad request")
@api.response(200, "Metadata send", fields.Raw)
......@@ -780,7 +829,9 @@ class EncCalculationsResource(Resource):
def calc_score(entry):
"""Custom scoring function used to sort results by their
"quality". Currently built to mimic the scoring that was used
in the old Encyclopedia GUI.
in the old Encyclopedia GUI. Primarily sorts by quality measure,
ties are broken by alphabetic sorting of entry_id in order to
return consistent results.
score = 0
functional_score = {
......@@ -800,7 +851,7 @@ class EncCalculationsResource(Resource):
if has_dos and has_bs:
score += 10
return score
return (score, entry["calc_id"])
# The calculations are first sorted by "quality"
sorted_calc = sorted(response, key=lambda x: calc_score(x), reverse=True)
......@@ -1081,7 +1132,6 @@ calculation_property_result = api.model("calculation_property_result", {
class EncCalculationResource(Resource):
@api.response(404, "Material or calculation not found")
@api.response(400, "Bad request")
@api.response(200, "Metadata send", fields.Raw)
......@@ -1175,15 +1225,20 @@ class EncCalculationResource(Resource):
for key, arch_path in arch_properties.items():
value = root[arch_path]
# Save derived properties and turn into dict
# Replace unnormalized thermodynamical properties with
# normalized ones and turn into dict
if key == "thermodynamical_properties":
specific_heat_capacity = value.specific_heat_capacity.magnitude.tolist()
specific_free_energy = value.specific_vibrational_free_energy_at_constant_volume.magnitude.tolist()
specific_heat_capacity = [x if np.isfinite(x) else None for x in specific_heat_capacity]
specific_free_energy = [x if np.isfinite(x) else None for x in specific_free_energy]
if isinstance(value, list):
value = [x.m_to_dict() for x in value]
value = value.m_to_dict()
if key == "thermodynamical_properties":
del value["thermodynamical_property_heat_capacity_C_v"]
del value["vibrational_free_energy_at_constant_volume"]
value["specific_heat_capacity"] = specific_heat_capacity
value["specific_vibrational_free_energy_at_constant_volume"] = specific_free_energy
......@@ -1226,6 +1281,63 @@ class EncCalculationResource(Resource):
return result, 200
report_query = api.model("report_query", {
"server": fields.String,
"username": fields.String,
"email": fields.String,
"first_name": fields.String,
"last_name": fields.String,
"category": fields.String,
"subcategory": fields.String(allow_null=True),
"representatives": fields.Raw(Raw=True),
"message": fields.String,
class ReportsResource(Resource):
@api.response(500, "Error sending report")
@api.response(400, "Bad request")
@api.response(204, "Report succesfully sent", fields.Raw)
@api.expect(calculation_property_query, validate=False)
@api.marshal_with(calculation_property_result, skip_none=True)
def post(self, material_id):
# Get query parameters as json
query = marshal(request.get_json(), report_query)
except Exception as e:
abort(400, message=str(e))
# Send the report as an email
query["material_id"] = material_id
representatives = query["representatives"]
if representatives is not None:
representatives = "\n" + "\n".join([" {}: {}".format(key, value) for key, value in representatives.items()])
query["representatives"] = representatives
mail = (
"Server: {server}\n\n"
"Username: {username}\n"
"First name: {first_name}\n"
"Last name: {last_name}\n"
"Email: {email}\n\n"
"Material id: {material_id}\n"
"Category: {category}\n"
"Subcategory: {subcategory}\n"
"Representative calculations: {representatives}\n\n"
"Message: {message}"
name="webmaster", email="", message=mail, subject='Encyclopedia error report')
except Exception as e:
abort(500, message="Error sending error report email.")
return "", 204
def read_archive(upload_id: str, calc_id: str) -> EntryArchive:
"""Used to read data from the archive.
......@@ -102,21 +102,12 @@ class LegacyMetainfoResource(Resource):
Other required packages might also be returned, e.g. a parser might organize its
definitions in multiple packages.
package = metainfo_package_name
if package.endswith('.nomadmetainfo.json'):
package = package[:-19]
if package.endswith('.json'):
package = package[:-5]
python_package_name, _ = python_package_mapping(package)
python_package_name = '.'.join(python_package_name.split('.')[:-1])
python_module = importlib.import_module(python_package_name)
metainfo = getattr(python_module, 'm_env')
metainfo = LegacyMetainfoEnvironment.from_legacy_package_path(metainfo_package_name)
except (ImportError, KeyError, FileNotFoundError, AttributeError):
abort(404, message='Metainfo package %s does not exist.' % package)
abort(404, message='Metainfo package %s does not exist.' % metainfo_package_name)
if isinstance(metainfo, LegacyMetainfoEnvironment):
return metainfo.to_legacy_dict(metainfo.packages)
abort(404, message='Metainfo package %s is not a legacy package.' % package)
abort(404, message='Metainfo package %s is not a legacy package.' % metainfo_package_name)
......@@ -123,7 +123,7 @@ class Meta():
self.more_data_available = available > returned if available is not None else False
self.provider = dict(,,
index_base_url=url(version=None, prefix='index')
......@@ -200,10 +200,10 @@ json_api_data_object_model = api.model('DataObject', {
description='The id of the object.'),
'attributes': fields.Raw(
description='A dictionary, containing key-value pairs representing the entries properties')
description='A dictionary, containing key-value pairs representing the entries properties'),
# further optional fields: links, meta, relationships
'relationships': fields.Raw(
description='In accordance with section Relationships, all entry types MAY use relationships to describe relations to other entries.')
......@@ -75,7 +75,7 @@ def find_match(pos: np.array, positions: np.array, eps: float) -> Union[int, Non
return None
def get_symmetry_string(space_group: int, wyckoff_sets: List[WyckoffSet]) -> str:
def get_symmetry_string(space_group: int, wyckoff_sets: List[WyckoffSet], is_2d: bool = False) -> str:
"""Used to serialize symmetry information into a string. The Wyckoff
positions are assumed to be normalized and ordered as is the case if using
the matid-library.
......@@ -84,6 +84,9 @@ def get_symmetry_string(space_group: int, wyckoff_sets: List[WyckoffSet]) -> str
space_group: 3D space group number
wyckoff_sets: Wyckoff sets that map a Wyckoff letter to related
is_2d: Whether the symmetry information is analyzed from a 2D
structure. If true, a prefix is added to the string to distinguish
2D from 3D.
A string that encodes the symmetry properties of an atomistic
......@@ -97,7 +100,10 @@ def get_symmetry_string(space_group: int, wyckoff_sets: List[WyckoffSet]) -> str
i_string = "{} {} {}".format(element, wyckoff_letter, n_atoms)
wyckoff_string = ", ".join(sorted(wyckoff_strings))
string = "{} {}".format(space_group, wyckoff_string)
if is_2d:
string = "2D {} {}".format(space_group, wyckoff_string)
string = "{} {}".format(space_group, wyckoff_string)
return string
......@@ -43,6 +43,22 @@ def qa(skip_tests: bool, exitfirst: bool):
@dev.command(help='Generates a JSON with all metainfo.')
def metainfo():
import json
from nomad.metainfo import Package
from nomad.parsing.parsers import parsers
# Ensure all metainfo is loaded
for parser in parsers:
_ = parser.metainfo_env
data = {key: value.m_to_dict() for key, value in Package.registry.items()}
print(json.dumps(data, indent=2))
@dev.command(help='Generates source-code for the new metainfo from .json files of the old.')
@click.argument('path', nargs=-1)
def legacy_metainfo(path):
......@@ -43,16 +43,6 @@ def parse(
parser_backend =, logger=logger)
from nomad.metainfo import MSection
from nomad.parsing.legacy import Backend
if isinstance(parser_backend, MSection):
backend = Backend(parser._metainfo_env, parser.domain)
root_section = str([parser.domain]['root_section'])
setattr(backend.entry_archive, root_section, parser_backend)
parser_backend = backend
if not parser_backend.status[0] == 'ParseSuccess':
logger.error('parsing was not successful', status=parser_backend.status)
......@@ -73,7 +63,7 @@ def normalize(
if normalizer_instance.__class__.__name__ == normalizer)
assert normalizer is not None, 'there is no normalizer %s' % str(normalizer)
normalizer_instance = typing.cast(typing.Callable, normalizer)(parser_backend)
normalizer_instance = typing.cast(typing.Callable, normalizer)(parser_backend.entry_archive)
logger = logger.bind(normalizer=normalizer_instance.__class__.__name__)'identified normalizer')
......@@ -169,10 +169,14 @@ def api_url(ssl: bool = True):
def gui_url():
def gui_url(page: str = None):
base = api_url(True)[:-3]
if base.endswith('/'):
base = base[:-1]
if page is not None:
return '%s/gui/%s' % (base, page)
return '%s/gui' % base
......@@ -229,7 +233,7 @@ normalize = NomadConfig(
# The distance tolerance between atoms for grouping them into the same
# cluster. Used in detecting system type.
# Defines the "bin size" for rounding cell angles for the material hash
angle_rounding=float(10.0), # unit: degree
# The threshold for a system to be considered "flat". Used e.g. when
......@@ -239,8 +243,8 @@ normalize = NomadConfig(
# The threshold for point equality in k-space. Unit: 1/m.
# The energy threshold for how much a band can be on top or below the fermi
# level in order to detect a gap. k_B x T at room temperature. Unit: Joule
band_structure_energy_tolerance=300 * 1.38064852E-23,
# level in order to detect a gap. Unit: Joule.
band_structure_energy_tolerance=1.6022e-20, # 0.1 eV
......@@ -262,14 +266,14 @@ datacite = NomadConfig(
meta = NomadConfig(
service='unknown nomad service',
name='novel materials discovery (NOMAD)',
description='A FAIR data sharing platform for materials science data',
......@@ -397,7 +401,8 @@ def load_config(config_file: str = os.environ.get('NOMAD_CONFIG', 'nomad.yaml'))
logger.error('config key %s does not exist' % key)
adapt(globals(), config_data)
if config_data is not None:
adapt(globals(), config_data)
# load env and override yaml and defaults
kwargs = {
......@@ -315,7 +315,10 @@ class Material(MSection):
A fixed length, unique material identifier in the form of a hash
metric='cardinality', metric_name='materials',
description='Search for a particular material by its id.')
material_name = Quantity(
......@@ -1264,10 +1264,17 @@ class section_dos(MSection):
Array containing the set of discrete energy values with respect to the top of the
valence band for the density (electronic-energy) of states (DOS). This is the
total DOS, see atom_projected_dos_energies and species_projected_dos_energies for
Array containing the set of discrete energy values with respect to the
highest occupied energy level. This is the total DOS, see
atom_projected_dos_energies and species_projected_dos_energies for
partial density of states.
If not available through energy_reference_highest_occupied, the highest
occupied energy level is detected by searching for a non-zero DOS value
below (or nearby) the reported energy_reference_fermi. In case the
highest occupied energy level cannot be detected accurately, the
normalized values are not reported. For calculations with multiple
spin-channels, the normalization is determined by the first channel.
......@@ -1283,14 +1290,6 @@ class section_dos(MSection):
dos_fermi_energy = Quantity(
Stores the Fermi energy of the density of states.
dos_integrated_values = Quantity(
shape=['number_of_spin_channels', 'number_of_dos_values'],
......@@ -3440,6 +3439,11 @@ class section_run(MSection):
section_workflow = SubSection(
class section_sampling_method(MSection):
......@@ -5576,4 +5580,37 @@ class section_XC_functionals(MSection):
class section_workflow(MSection):
Section containing the results of a workflow.
m_def = Section(validate=False, a_legacy=LegacyDefinition(name='section_workflow'))
workflow_type = Quantity(
The type of calculation workflow. Can be one of relaxation, elastic, phonon,
molecular dynamics.
relaxation_energy_tolerance = Quantity(
The tolerance value in the energy between relaxation steps for convergence.
workflow_final_calculation_ref = Quantity(
Reference to last calculation step.
......@@ -433,6 +433,14 @@ def reset(remove: bool):
def send_mail(name: str, email: str, message: str, subject: str):
"""Used to programmatically send mails.
name: The email recipient name.
email: The email recipient address.
messsage: The email body.
subject: The subject line.
if not config.mail.enabled:
......@@ -453,7 +461,6 @@ def send_mail(name: str, email: str, message: str, subject: str):
msg = MIMEText(message)
msg['Subject'] = subject
msg['From'] = 'The nomad team <%s>' % config.mail.from_address
msg['To'] = name
to_addrs = [email]
......@@ -293,4 +293,5 @@ from .metainfo import (
......@@ -153,8 +153,9 @@ class ElasticDocument(SectionAnnotation):
# create an field for each sub section
for sub_section in section.all_sub_sections.values():
sub_sectoin_prefix = '%s.%s' % (prefix, if prefix else
inner_document = ElasticDocument.create_document(
sub_section.sub_section, inner_doc=True,
sub_section.sub_section, inner_doc=True, prefix=sub_sectoin_prefix)
if inner_document is not None:
# sub sections with no elastic quantities get a None document
attrs[] = Object(inner_document)
......@@ -22,6 +22,7 @@ from typing import cast, Dict, List, Union, Any, Set, Iterable, Tuple
import numpy as np
from pint.errors import UndefinedUnitError
import os.path
import importlib
from nomadcore.local_meta_info import loadJsonFile, InfoKindEl, InfoKindEnv
......@@ -105,6 +106,22 @@ class LegacyMetainfoEnvironment(Environment):
the environment.
def from_legacy_package_path(path):
metainfo_package_name = os.path.basename(path)
package = metainfo_package_name
if package.endswith('.nomadmetainfo.json'):
package = package[:-19]
if package.endswith('.json'):
package = package[:-5]
python_package_name, _ = python_package_mapping(package)
python_package_name = '.'.join(python_package_name.split('.')[:-1])
python_module = importlib.import_module(python_package_name)
metainfo = getattr(python_module, 'm_env')
return metainfo
legacy_package_name = Quantity(type=str)
def __init__(self, *args, **kwargs):
......@@ -31,7 +31,7 @@ import pytz
import docstring_parser
import jmespath