Commit 2ec86827 authored by Lauri Himanen's avatar Lauri Himanen
Browse files

Added a single Pint UnitRegistry at the project root, lot of new stuff in the encyclopedia API.

parent 5a083897
Pipeline #74853 failed with stages
in 15 minutes and 54 seconds
......@@ -15,58 +15,281 @@
'''
The encyclopedia API of the nomad@FAIRDI APIs.
'''
import re
from flask_restplus import Resource, abort, fields
from elasticsearch_dsl import Search
from .api import api
from .auth import authenticate
from flask_restplus import Resource, abort, fields, marshal
from flask import request
from elasticsearch_dsl import Search, Q
from nomad import config
from nomad.units import ureg
from nomad.atomutils import get_hill_decomposition, get_formula_string
from .api import api
ns = api.namespace('encyclopedia', description='Access encyclopedia metadata.')
search = Search(index=config.elastic.index_name)
re_formula = re.compile(r"([A-Z][a-z]?)(\d*)")
def add_result(result, key, function, default=""):
"""Convenience function that attempts to add a value from the ElasticSearch
result into the given result object. Upon failing returns the specified
default value.
"""
try:
value = function()
except Exception:
value = default
result[key] = value
def get_material(es_doc):
"""Used to form a material definition from the given ElasticSearch root
document.
"""
result = {}
add_result(result, "material_id", lambda: es_doc.encyclopedia.material.material_id, ""),
add_result(result, "bravais_lattice", lambda: es_doc.encyclopedia.material.bulk.bravais_lattice, ""),
add_result(result, "crystal_system", lambda: es_doc.encyclopedia.material.bulk.crystal_system, "")
add_result(result, "formula", lambda: es_doc.encyclopedia.material.formula, "")
add_result(result, "formula_reduced", lambda: es_doc.encyclopedia.material.formula_reduced, "")
add_result(result, "material_name", lambda: es_doc.encyclopedia.material.material_name, "")
add_result(result, "point_group", lambda: es_doc.encyclopedia.material.bulk.point_group, "")
add_result(result, "space_group", lambda: es_doc.encyclopedia.material.bulk.space_group_number, "")
add_result(result, "structure_type", lambda: es_doc.encyclopedia.material.bulk.structure_type, "")
add_result(result, "system_type", lambda: es_doc.encyclopedia.material.material_type, "")
return result
material_query = api.parser()
material_query.add_argument('material_id', type=str, help='Identifier for the searched material.', location='args')
material_result = api.model('material_result', {
"bravais_lattice": fields.String,
"crystal_system": fields.String,
"formula": fields.String,
"formula_reduced": fields.String,
"material_name": fields.String,
"point_group": fields.String,
"space_group": fields.Integer(),
"structure_type": fields.String,
"system_type": fields.String,
})
@ns.route('/materials/<string:material_id>')
class EncMaterialResource(Resource):
@api.response(404, 'The material does not exist')
@api.response(401, 'Not authorized to access the material')
@api.response(200, 'Metadata send', fields.Raw)
@api.doc('get_enc_material')
@authenticate()
@api.doc('material/<material_id>')
@api.expect(material_query)
@api.marshal_with(material_result)
def get(self, material_id):
"""Used to retrive basic information related to the specified material.
"""
def add_result(result, key, function, default=""):
try:
value = function()
except Exception:
value = default
result[key] = value
# Find the first entry with this material id and take information from
# there. In principle all other entries should have the same
# information.
s = search.query('term', encyclopedia__material__material_id=material_id)
# Find the first public entry with this material id and take
# information from there. In principle all other entries should have
# the same information.
s = Search(index=config.elastic.index_name)
# Since we are looking for an exact match, we use filter context
# together with term search for speed (instead of query context and
# match search)
query = Q(
'bool',
filter=[
Q('term', published=True),
Q('term', with_embargo=False),
Q('term', encyclopedia__material__material_id=material_id),
]
)
s = s.query(query)
response = s.execute()
# No such material
if len(response) == 0:
abort(404, message='There is no material {}'.format(material_id))
# Create result JSON
entry = response[0]
result = get_material(entry)
# Create result JSON
result = {}
result["material_id"] = material_id
add_result(result, "bravais_lattice", lambda: entry.encyclopedia.material.bulk.bravais_lattice, ""),
add_result(result, "crystal_system", lambda: entry.encyclopedia.material.bulk.crystal_system, "")
add_result(result, "formula", lambda: entry.encyclopedia.material.formula, "")
add_result(result, "formula_reduced", lambda: entry.encyclopedia.material.formula_reduced, "")
add_result(result, "material_name", lambda: entry.encyclopedia.material.material_name, "")
add_result(result, "point_group", lambda: entry.encyclopedia.material.bulk.point_group, "")
add_result(result, "space_group", lambda: entry.encyclopedia.material.bulk.space_group_number, "")
add_result(result, "structure_type", lambda: entry.encyclopedia.material.bulk.structure_type, "")
add_result(result, "system_type", lambda: entry.encyclopedia.material.material_type, "")
return result, 200
range_query = api.model('range_query', {
"max": fields.Float,
"min": fields.Float,
})
materials_query = api.model('materials_input', {
'search_by': fields.Nested(api.model('search_query', {
"exclusive": fields.Boolean(default=False),
"formula": fields.String,
"element": fields.List(fields.String),
"page": fields.Integer(default=1),
"per_page": fields.Integer(default=25),
"pagination": fields.Boolean,
})),
'material_name': fields.List(fields.String),
'structure_type': fields.List(fields.String),
'space_group': fields.List(fields.Integer),
'system_type': fields.List(fields.String),
'crystal_system': fields.List(fields.String),
'band_gap': fields.Nested(range_query, description="Band gap range in eV."),
'band_gap_direct': fields.Boolean,
'has_band_structure': fields.Boolean,
'has_dos': fields.Boolean,
'has_fermi_surface': fields.Boolean,
'has_thermal_properties': fields.Boolean,
'functional_type': fields.List(fields.String),
'basis_set_type': fields.List(fields.String),
'code_name': fields.List(fields.String),
'mass_density': fields.Nested(range_query, description="Mass density range in kg / m ** 3."),
})
materials_result = api.model('materials_result', {
'pages': fields.Integer(required=True),
'results': fields.List(fields.Nested(material_result)),
'total_results': fields.Integer(allow_null=False),
})
@ns.route('/materials')
class EncMaterialsResource(Resource):
@api.response(404, 'No materials found')
@api.response(400, 'Bad request')
@api.response(200, 'Metadata send', fields.Raw)
@api.expect(materials_query, validate=False)
@api.marshal_with(materials_result)
@api.doc('materials')
def post(self):
"""Used to query a list of materials with the given search options.
"""
# Get query parameters as json
try:
data = marshal(request.get_json(), materials_query)
except Exception as e:
abort(400, message=str(e))
s = Search(index=config.elastic.index_name)
filters = []
must_nots = []
musts = []
# Add term filters
filters.append(Q('term', published=True))
filters.append(Q('term', with_embargo=False))
def add_terms_filter(source, target, query_type="terms"):
if data[source]:
filters.append(Q(query_type, **{target: data[source]}))
add_terms_filter("material_name", "encyclopedia.material.material_name")
add_terms_filter("structure_type", "encyclopedia.material.bulk.structure_type")
add_terms_filter("space_group", "encyclopedia.material.bulk.space_group_number")
add_terms_filter("system_type", "encyclopedia.material.material_type")
add_terms_filter("crystal_system", "encyclopedia.material.bulk.crystal_system")
add_terms_filter("band_gap_direct", "encyclopedia.properties.band_gap_direct", query_type="term")
add_terms_filter("functional_type", "encyclopedia.method.functional_type")
add_terms_filter("basis_set_type", "dft.basis_set")
add_terms_filter("code_name", "dft.code_name")
# Add exists filters
def add_exists_filter(source, target):
param = data[source]
if param is not None:
query = Q("exists", field=target)
if param is True:
filters.append(query)
elif param is False:
must_nots.append(query)
add_exists_filter("has_thermal_properties", "encyclopedia.properties.thermodynamical_properties")
add_exists_filter("has_band_structure", "encyclopedia.properties.electronic_band_structure")
add_exists_filter("has_dos", "encyclopedia.properties.electronic_dos")
add_exists_filter("has_fermi_surface", "encyclopedia.properties.fermi_surface")
# Add range filters
def add_range_filter(source, target, source_unit=None, target_unit=None):
param = data[source]
query_dict = {}
if param["min"] is not None:
if source_unit is None and target_unit is None:
gte = param["min"]
else:
gte = (param["min"] * source_unit).to(target_unit).magnitude
query_dict["gte"] = gte
if param["max"] is not None:
if source_unit is None and target_unit is None:
lte = param["max"]
else:
lte = (param["max"] * source_unit).to(target_unit).magnitude
query_dict["lte"] = lte
if len(query_dict) != 0:
query = Q("range", **{target: query_dict})
filters.append(query)
add_range_filter("band_gap", "encyclopedia.properties.band_gap", ureg.eV, ureg.J)
add_range_filter("mass_density", "encyclopedia.properties.mass_density")
# Create query for elements or formula
search_by = data["search_by"]
formula = search_by["formula"]
exclusive = search_by["exclusive"]
if formula is not None:
# The given formula is reformatted with the Hill system
element_list = []
matches = re_formula.finditer(formula)
for match in matches:
groups = match.groups()
symbol = groups[0]
count = groups[1]
if symbol != "":
if count == "":
element_list.append(symbol)
else:
element_list += [[symbol] * int(count)]
names, counts = get_hill_decomposition(element_list)
# With exclusive search we look for exact match
if exclusive:
hill_formula = get_formula_string(names, counts)
filters.append(Q("term", **{"encyclopedia.material.formula": hill_formula}))
# With non-exclusive search we look for match that includes at
# least all parts of the formula, possibly even more.
else:
parts = ["{}{}".format(name, count) for name, count in zip(names, counts)]
musts.append(Q(
"match",
encyclopedia__material__formula_parts={"query": " ".join(parts), "operator": "and"}
))
# Prepare the final boolean query that combines the different queries
filter_query = Q('bool', filter=filters, must_not=must_nots, must=musts)
s = s.query(filter_query)
# Execute query
response = s.execute()
# No matches
if len(response) == 0:
abort(404, message='No materials found for the given search criteria.')
# Create final result dictionary
result_list = [get_material(es_doc) for es_doc in response]
result = {
"total_results": len(result_list),
"pages": None,
"results": result_list,
}
return result, 200
# @ns.route('/esmaterials')
# class EncESMaterialsResource(Resource):
# @api.response(404, 'No materials found')
# @api.response(200, 'Metadata send', fields.Raw)
# @api.doc('materials')
# def post(self):
# """Used to query a list of materials with the given ElasticSearch JSON
# query.
# """
......@@ -220,7 +220,7 @@ def get_hill_decomposition(atom_labels: np.ndarray, reduced: bool = False) -> Tu
def get_formula_string(symbols: List[str], counts: List[int]) -> str:
"""Used to form a single formula string from a list of chemical speices and
"""Used to form a single formula string from a list of chemical species and
their counts.
Args:
......
......@@ -145,6 +145,7 @@ from io import StringIO
from nomad import config
from nomad import metainfo as mi
from nomad.units import ureg
from nomad.datamodel import EntryArchive
# TODO this import is necessary to load all metainfo defintions that the parsers are using
......@@ -193,11 +194,11 @@ class ApiStatistics(mi.MSection):
description='Number of entries loaded in the last api call')
last_response_data_size = mi.Quantity(
type=int, unit=mi.units.bytes, default=0,
type=int, unit=ureg.bytes, default=0,
description='Bytes loaded in the last api call')
loaded_data_size = mi.Quantity(
type=int, unit=mi.units.bytes, default=0,
type=int, unit=ureg.bytes, default=0,
description='Bytes loaded from this query')
loaded_nentries = mi.Quantity(
......
......@@ -2,6 +2,7 @@ import numpy as np
from nomad.metainfo import MSection, Section, SubSection, Quantity, MEnum, Reference
from nomad.datamodel.metainfo.public import section_k_band, section_dos, section_thermodynamical_properties
from nomad.metainfo.search_extension import Search
from elasticsearch_dsl import Text
class WyckoffVariables(MSection):
......@@ -12,19 +13,19 @@ class WyckoffVariables(MSection):
"""
)
x = Quantity(
type=float,
type=np.dtype(np.float64),
description="""
The x variable if present.
"""
)
y = Quantity(
type=float,
type=np.dtype(np.float64),
description="""
The y variable if present.
"""
)
z = Quantity(
type=float,
type=np.dtype(np.float64),
description="""
The z variable if present.
"""
......@@ -128,7 +129,8 @@ class IdealizedStructure(MSection):
"""
)
cell_volume = Quantity(
type=float,
type=np.dtype(np.float64),
unit="m ** 3",
description="""
Volume of the idealized cell. The cell volume can only be reported
consistently after idealization and may not perfectly correspond to the
......@@ -141,7 +143,7 @@ class IdealizedStructure(MSection):
class Bulk(MSection):
m_def = Section(
a_flask=dict(skip_none=True),
a_elastic="bulk",
a_search="bulk",
description="""
Contains information that is specific to bulk crystalline materials.
"""
......@@ -239,7 +241,7 @@ class Bulk(MSection):
class Material(MSection):
m_def = Section(
a_flask=dict(skip_none=True),
a_elastic="material",
a_search="material",
description="""
Contains an overview of the type of material that was detected in this
entry.
......@@ -290,7 +292,14 @@ class Material(MSection):
Hill notation whre the number of occurences have been divided by the
greatest common divisor.
""",
a_search=Search()
)
formula_parts = Quantity(
type=str,
description="""
The formula separated into individual terms for easier search.
""",
a_search=Search(mapping=Text())
)
# Bulk-specific properties
......@@ -303,7 +312,7 @@ class Material(MSection):
class Method(MSection):
m_def = Section(
a_flask=dict(skip_none=True),
a_elastic="method",
a_search="method",
description="""
Contains an overview of the methodology that was detected in this
entry.
......@@ -315,12 +324,6 @@ class Method(MSection):
Generic name for the used methodology.
"""
)
basis_set_type = Quantity(
type=MEnum("Numeric AOs", "Gaussians", "(L)APW+lo", "FLAPW (full-potential linearized augmented planewave)", "Plane waves", "Real-space grid", "Local-orbital minimum-basis"),
description="""
Basic type of the used basis set.
"""
)
core_electron_treatment = Quantity(
type=MEnum("full all electron", "all electron frozen core", "pseudopotential", "unavailable"),
description="""
......@@ -385,7 +388,7 @@ class Method(MSection):
"""
)
smearing_parameter = Quantity(
type=float,
type=np.dtype(np.float64),
description="""
Parameter for smearing, usually the width.
"""
......@@ -395,7 +398,7 @@ class Method(MSection):
class Calculation(MSection):
m_def = Section(
a_flask=dict(skip_none=True),
a_elastic="calculation",
a_search="calculation",
description="""
Contains an overview of the type of calculation that was detected in
this entry.
......@@ -422,66 +425,92 @@ class Calculation(MSection):
class Properties(MSection):
m_def = Section(
a_flask=dict(skip_none=True),
a_elastic="properties",
a_search="properties",
description="""
Contains derived physical properties that are specific to the NOMAD
Encyclopedia.
"""
)
atomic_density = Quantity(
type=float,
type=np.dtype(np.float64),
unit="1 / m ** 3",
description="""
Atomic density of the material (atoms/volume)."
"""
""",
a_search=Search()
)
mass_density = Quantity(
type=float,
type=np.dtype(np.float64),
unit="kg / m ** 3",
description="""
Mass density of the material.
"""
""",
a_search=Search()
)
band_gap = Quantity(
type=np.dtype(np.float64),
unit="eV",
description="""
Band gap value. If multiple spin channels are present, this value is
taken from the channel with smallest band gap value.
""",
a_search=Search()
)
band_gap_direct = Quantity(
type=bool,
description="""
Whether band gap is direct or not. If multiple spin channels are
present, this value is taken from the channel with smallest band gap
value.
""",
a_search=Search()
)
energies = Quantity(
type=str,
description="""
Code dependent energy values, corrected to be per formula unit.
"""
""",
a_search=Search()
)
electronic_band_structure = Quantity(
type=Reference(section_k_band.m_def),
shape=[],
description="""
Reference to an electronic band structure.
"""
""",
a_search=Search(shallow=True)
)
electronic_dos = Quantity(
type=Reference(section_dos.m_def),
shape=[],
description="""
Reference to an electronic density of states.
"""
""",
a_search=Search(shallow=True)
)
phonon_band_structure = Quantity(
type=Reference(section_k_band.m_def),
shape=[],
description="""
Reference to a phonon band structure.
"""
""",
a_search=Search(shallow=True)
)
phonon_dos = Quantity(
type=Reference(section_dos.m_def),
shape=[],
description="""
Reference to a phonon density of states.
"""
""",
a_search=Search(shallow=True)
)
thermodynamical_properties = Quantity(
type=Reference(section_thermodynamical_properties.m_def),
shape=[],
description="""
Reference to a section containing thermodynamical properties.
"""
""",
a_search=Search(shallow=True)
)
......@@ -493,10 +522,10 @@ class EncyclopediaMetadata(MSection):
Section which stores information for the NOMAD Encyclopedia.
"""
)
material = SubSection(sub_section=Material.m_def, repeats=False)
method = SubSection(sub_section=Method.m_def, repeats=False)
properties = SubSection(sub_section=Properties.m_def, repeats=False)
calculation = SubSection(sub_section=Calculation.m_def, repeats=False)
material = SubSection(sub_section=Material.m_def, repeats=False, a_search='material')
method = SubSection(sub_section=Method.m_def, repeats=False, a_search='method')
properties = SubSection(sub_section=Properties.m_def, repeats=False, a_search='properties')
calculation = SubSection(sub_section=Calculation.m_def, repeats=False, a_search='calculation')
status = Quantity(
type=MEnum("success", "unsupported_material_type", "unsupported_calculation_type", "invalid_metainfo", "failure"),
description="""
......@@ -509,5 +538,6 @@ class EncyclopediaMetadata(MSection):
| `"unsupported_calculation_type"` | The detected calculation type is currenlty not supported by the Encyclopedia. |
| `"invalid_metainfo"` | The entry could not be processed due to missing or invalid metainfo. |
| `"failure"` | The entry could not be processed due to an unexpected exception. |
"""
""",
a_search=Search()
)
from ase.data import chemical_symbols
from elasticsearch_dsl import Keyword, Float, InnerDoc, Nested
from elasticsearch_dsl import Keyword, Float, InnerDoc, Nested, Integer
import numpy as np
from nomad.metainfo import MSection, Section, Quantity, SubSection, MEnum, units, DefinitionAnnotation
from nomad.units import ureg
from nomad.metainfo import MSection, Section, Quantity, SubSection, MEnum, DefinitionAnnotation
from nomad.metainfo.search_extension import Search
......@@ -82,7 +83,7 @@ class Species(MSection):
species).
''')
mass = Quantity(type=float, unit=units.amu, a_optimade=dict(entry='optional'))
mass = Quantity(type=float, unit=ureg.amu, a_optimade=dict(entry='optional'))