diff --git a/nomad/app/api/encyclopedia.py b/nomad/app/api/encyclopedia.py index 06a06ddba81be5f194559cb109cfbd2a4d51d896..6caf22d7b9e33340c2e4a11d9f864376f3a84965 100644 --- a/nomad/app/api/encyclopedia.py +++ b/nomad/app/api/encyclopedia.py @@ -15,58 +15,281 @@ ''' The encyclopedia API of the nomad@FAIRDI APIs. ''' +import re -from flask_restplus import Resource, abort, fields -from elasticsearch_dsl import Search - -from .api import api -from .auth import authenticate +from flask_restplus import Resource, abort, fields, marshal +from flask import request +from elasticsearch_dsl import Search, Q from nomad import config +from nomad.units import ureg +from nomad.atomutils import get_hill_decomposition, get_formula_string +from .api import api ns = api.namespace('encyclopedia', description='Access encyclopedia metadata.') -search = Search(index=config.elastic.index_name) +re_formula = re.compile(r"([A-Z][a-z]?)(\d*)") + + +def add_result(result, key, function, default=""): + """Convenience function that attempts to add a value from the ElasticSearch + result into the given result object. Upon failing returns the specified + default value. + """ + try: + value = function() + except Exception: + value = default + result[key] = value + + +def get_material(es_doc): + """Used to form a material definition from the given ElasticSearch root + document. + """ + result = {} + add_result(result, "material_id", lambda: es_doc.encyclopedia.material.material_id, ""), + add_result(result, "bravais_lattice", lambda: es_doc.encyclopedia.material.bulk.bravais_lattice, ""), + add_result(result, "crystal_system", lambda: es_doc.encyclopedia.material.bulk.crystal_system, "") + add_result(result, "formula", lambda: es_doc.encyclopedia.material.formula, "") + add_result(result, "formula_reduced", lambda: es_doc.encyclopedia.material.formula_reduced, "") + add_result(result, "material_name", lambda: es_doc.encyclopedia.material.material_name, "") + add_result(result, "point_group", lambda: es_doc.encyclopedia.material.bulk.point_group, "") + add_result(result, "space_group", lambda: es_doc.encyclopedia.material.bulk.space_group_number, "") + add_result(result, "structure_type", lambda: es_doc.encyclopedia.material.bulk.structure_type, "") + add_result(result, "system_type", lambda: es_doc.encyclopedia.material.material_type, "") + + return result + + +material_query = api.parser() +material_query.add_argument('material_id', type=str, help='Identifier for the searched material.', location='args') +material_result = api.model('material_result', { + "bravais_lattice": fields.String, + "crystal_system": fields.String, + "formula": fields.String, + "formula_reduced": fields.String, + "material_name": fields.String, + "point_group": fields.String, + "space_group": fields.Integer(), + "structure_type": fields.String, + "system_type": fields.String, +}) @ns.route('/materials/<string:material_id>') class EncMaterialResource(Resource): @api.response(404, 'The material does not exist') - @api.response(401, 'Not authorized to access the material') @api.response(200, 'Metadata send', fields.Raw) - @api.doc('get_enc_material') - @authenticate() + @api.doc('material/<material_id>') + @api.expect(material_query) + @api.marshal_with(material_result) def get(self, material_id): """Used to retrive basic information related to the specified material. """ - def add_result(result, key, function, default=""): - try: - value = function() - except Exception: - value = default - result[key] = value - - # Find the first entry with this material id and take information from - # there. In principle all other entries should have the same - # information. - s = search.query('term', encyclopedia__material__material_id=material_id) + + # Find the first public entry with this material id and take + # information from there. In principle all other entries should have + # the same information. + s = Search(index=config.elastic.index_name) + + # Since we are looking for an exact match, we use filter context + # together with term search for speed (instead of query context and + # match search) + query = Q( + 'bool', + filter=[ + Q('term', published=True), + Q('term', with_embargo=False), + Q('term', encyclopedia__material__material_id=material_id), + ] + ) + s = s.query(query) response = s.execute() + # No such material if len(response) == 0: abort(404, message='There is no material {}'.format(material_id)) + # Create result JSON entry = response[0] + result = get_material(entry) - # Create result JSON - result = {} - result["material_id"] = material_id - add_result(result, "bravais_lattice", lambda: entry.encyclopedia.material.bulk.bravais_lattice, ""), - add_result(result, "crystal_system", lambda: entry.encyclopedia.material.bulk.crystal_system, "") - add_result(result, "formula", lambda: entry.encyclopedia.material.formula, "") - add_result(result, "formula_reduced", lambda: entry.encyclopedia.material.formula_reduced, "") - add_result(result, "material_name", lambda: entry.encyclopedia.material.material_name, "") - add_result(result, "point_group", lambda: entry.encyclopedia.material.bulk.point_group, "") - add_result(result, "space_group", lambda: entry.encyclopedia.material.bulk.space_group_number, "") - add_result(result, "structure_type", lambda: entry.encyclopedia.material.bulk.structure_type, "") - add_result(result, "system_type", lambda: entry.encyclopedia.material.material_type, "") + return result, 200 + + +range_query = api.model('range_query', { + "max": fields.Float, + "min": fields.Float, +}) +materials_query = api.model('materials_input', { + 'search_by': fields.Nested(api.model('search_query', { + "exclusive": fields.Boolean(default=False), + "formula": fields.String, + "element": fields.List(fields.String), + "page": fields.Integer(default=1), + "per_page": fields.Integer(default=25), + "pagination": fields.Boolean, + })), + 'material_name': fields.List(fields.String), + 'structure_type': fields.List(fields.String), + 'space_group': fields.List(fields.Integer), + 'system_type': fields.List(fields.String), + 'crystal_system': fields.List(fields.String), + 'band_gap': fields.Nested(range_query, description="Band gap range in eV."), + 'band_gap_direct': fields.Boolean, + 'has_band_structure': fields.Boolean, + 'has_dos': fields.Boolean, + 'has_fermi_surface': fields.Boolean, + 'has_thermal_properties': fields.Boolean, + 'functional_type': fields.List(fields.String), + 'basis_set_type': fields.List(fields.String), + 'code_name': fields.List(fields.String), + 'mass_density': fields.Nested(range_query, description="Mass density range in kg / m ** 3."), +}) +materials_result = api.model('materials_result', { + 'pages': fields.Integer(required=True), + 'results': fields.List(fields.Nested(material_result)), + 'total_results': fields.Integer(allow_null=False), +}) + + +@ns.route('/materials') +class EncMaterialsResource(Resource): + @api.response(404, 'No materials found') + @api.response(400, 'Bad request') + @api.response(200, 'Metadata send', fields.Raw) + @api.expect(materials_query, validate=False) + @api.marshal_with(materials_result) + @api.doc('materials') + def post(self): + """Used to query a list of materials with the given search options. + """ + # Get query parameters as json + try: + data = marshal(request.get_json(), materials_query) + except Exception as e: + abort(400, message=str(e)) + + s = Search(index=config.elastic.index_name) + filters = [] + must_nots = [] + musts = [] + + # Add term filters + filters.append(Q('term', published=True)) + filters.append(Q('term', with_embargo=False)) + + def add_terms_filter(source, target, query_type="terms"): + if data[source]: + filters.append(Q(query_type, **{target: data[source]})) + + add_terms_filter("material_name", "encyclopedia.material.material_name") + add_terms_filter("structure_type", "encyclopedia.material.bulk.structure_type") + add_terms_filter("space_group", "encyclopedia.material.bulk.space_group_number") + add_terms_filter("system_type", "encyclopedia.material.material_type") + add_terms_filter("crystal_system", "encyclopedia.material.bulk.crystal_system") + add_terms_filter("band_gap_direct", "encyclopedia.properties.band_gap_direct", query_type="term") + add_terms_filter("functional_type", "encyclopedia.method.functional_type") + add_terms_filter("basis_set_type", "dft.basis_set") + add_terms_filter("code_name", "dft.code_name") + + # Add exists filters + def add_exists_filter(source, target): + param = data[source] + if param is not None: + query = Q("exists", field=target) + if param is True: + filters.append(query) + elif param is False: + must_nots.append(query) + + add_exists_filter("has_thermal_properties", "encyclopedia.properties.thermodynamical_properties") + add_exists_filter("has_band_structure", "encyclopedia.properties.electronic_band_structure") + add_exists_filter("has_dos", "encyclopedia.properties.electronic_dos") + add_exists_filter("has_fermi_surface", "encyclopedia.properties.fermi_surface") + + # Add range filters + def add_range_filter(source, target, source_unit=None, target_unit=None): + param = data[source] + query_dict = {} + if param["min"] is not None: + if source_unit is None and target_unit is None: + gte = param["min"] + else: + gte = (param["min"] * source_unit).to(target_unit).magnitude + query_dict["gte"] = gte + if param["max"] is not None: + if source_unit is None and target_unit is None: + lte = param["max"] + else: + lte = (param["max"] * source_unit).to(target_unit).magnitude + query_dict["lte"] = lte + if len(query_dict) != 0: + query = Q("range", **{target: query_dict}) + filters.append(query) + + add_range_filter("band_gap", "encyclopedia.properties.band_gap", ureg.eV, ureg.J) + add_range_filter("mass_density", "encyclopedia.properties.mass_density") + # Create query for elements or formula + search_by = data["search_by"] + formula = search_by["formula"] + exclusive = search_by["exclusive"] + + if formula is not None: + # The given formula is reformatted with the Hill system + element_list = [] + matches = re_formula.finditer(formula) + for match in matches: + groups = match.groups() + symbol = groups[0] + count = groups[1] + if symbol != "": + if count == "": + element_list.append(symbol) + else: + element_list += [[symbol] * int(count)] + names, counts = get_hill_decomposition(element_list) + + # With exclusive search we look for exact match + if exclusive: + hill_formula = get_formula_string(names, counts) + filters.append(Q("term", **{"encyclopedia.material.formula": hill_formula})) + # With non-exclusive search we look for match that includes at + # least all parts of the formula, possibly even more. + else: + parts = ["{}{}".format(name, count) for name, count in zip(names, counts)] + musts.append(Q( + "match", + encyclopedia__material__formula_parts={"query": " ".join(parts), "operator": "and"} + )) + + # Prepare the final boolean query that combines the different queries + filter_query = Q('bool', filter=filters, must_not=must_nots, must=musts) + s = s.query(filter_query) + + # Execute query + response = s.execute() + + # No matches + if len(response) == 0: + abort(404, message='No materials found for the given search criteria.') + + # Create final result dictionary + result_list = [get_material(es_doc) for es_doc in response] + result = { + "total_results": len(result_list), + "pages": None, + "results": result_list, + } return result, 200 + +# @ns.route('/esmaterials') +# class EncESMaterialsResource(Resource): + # @api.response(404, 'No materials found') + # @api.response(200, 'Metadata send', fields.Raw) + # @api.doc('materials') + # def post(self): + # """Used to query a list of materials with the given ElasticSearch JSON + # query. + # """ diff --git a/nomad/atomutils.py b/nomad/atomutils.py index 6ec85c0ccab7d7ba76b940bea8d7a530e85a58cb..6f4f3eecf95b4898ca48c63d125062e223364a0c 100644 --- a/nomad/atomutils.py +++ b/nomad/atomutils.py @@ -220,7 +220,7 @@ def get_hill_decomposition(atom_labels: np.ndarray, reduced: bool = False) -> Tu def get_formula_string(symbols: List[str], counts: List[int]) -> str: - """Used to form a single formula string from a list of chemical speices and + """Used to form a single formula string from a list of chemical species and their counts. Args: diff --git a/nomad/client.py b/nomad/client.py index fd7a0dc4f9f4fe94f2e63f69471a852968ea9db1..33c5e1f3824120753e2349f797ebddd4c6b6482f 100644 --- a/nomad/client.py +++ b/nomad/client.py @@ -145,6 +145,7 @@ from io import StringIO from nomad import config from nomad import metainfo as mi +from nomad.units import ureg from nomad.datamodel import EntryArchive # TODO this import is necessary to load all metainfo defintions that the parsers are using @@ -193,11 +194,11 @@ class ApiStatistics(mi.MSection): description='Number of entries loaded in the last api call') last_response_data_size = mi.Quantity( - type=int, unit=mi.units.bytes, default=0, + type=int, unit=ureg.bytes, default=0, description='Bytes loaded in the last api call') loaded_data_size = mi.Quantity( - type=int, unit=mi.units.bytes, default=0, + type=int, unit=ureg.bytes, default=0, description='Bytes loaded from this query') loaded_nentries = mi.Quantity( diff --git a/nomad/datamodel/encyclopedia.py b/nomad/datamodel/encyclopedia.py index 45ff9dac2315c63f430517c5c27c850586aaf7ea..031c18b2717617ed1f3f5d248e59e295d8451c7f 100644 --- a/nomad/datamodel/encyclopedia.py +++ b/nomad/datamodel/encyclopedia.py @@ -2,6 +2,7 @@ import numpy as np from nomad.metainfo import MSection, Section, SubSection, Quantity, MEnum, Reference from nomad.datamodel.metainfo.public import section_k_band, section_dos, section_thermodynamical_properties from nomad.metainfo.search_extension import Search +from elasticsearch_dsl import Text class WyckoffVariables(MSection): @@ -12,19 +13,19 @@ class WyckoffVariables(MSection): """ ) x = Quantity( - type=float, + type=np.dtype(np.float64), description=""" The x variable if present. """ ) y = Quantity( - type=float, + type=np.dtype(np.float64), description=""" The y variable if present. """ ) z = Quantity( - type=float, + type=np.dtype(np.float64), description=""" The z variable if present. """ @@ -128,7 +129,8 @@ class IdealizedStructure(MSection): """ ) cell_volume = Quantity( - type=float, + type=np.dtype(np.float64), + unit="m ** 3", description=""" Volume of the idealized cell. The cell volume can only be reported consistently after idealization and may not perfectly correspond to the @@ -141,7 +143,7 @@ class IdealizedStructure(MSection): class Bulk(MSection): m_def = Section( a_flask=dict(skip_none=True), - a_elastic="bulk", + a_search="bulk", description=""" Contains information that is specific to bulk crystalline materials. """ @@ -239,7 +241,7 @@ class Bulk(MSection): class Material(MSection): m_def = Section( a_flask=dict(skip_none=True), - a_elastic="material", + a_search="material", description=""" Contains an overview of the type of material that was detected in this entry. @@ -290,7 +292,14 @@ class Material(MSection): Hill notation whre the number of occurences have been divided by the greatest common divisor. """, - a_search=Search() + ) + + formula_parts = Quantity( + type=str, + description=""" + The formula separated into individual terms for easier search. + """, + a_search=Search(mapping=Text()) ) # Bulk-specific properties @@ -303,7 +312,7 @@ class Material(MSection): class Method(MSection): m_def = Section( a_flask=dict(skip_none=True), - a_elastic="method", + a_search="method", description=""" Contains an overview of the methodology that was detected in this entry. @@ -315,12 +324,6 @@ class Method(MSection): Generic name for the used methodology. """ ) - basis_set_type = Quantity( - type=MEnum("Numeric AOs", "Gaussians", "(L)APW+lo", "FLAPW (full-potential linearized augmented planewave)", "Plane waves", "Real-space grid", "Local-orbital minimum-basis"), - description=""" - Basic type of the used basis set. - """ - ) core_electron_treatment = Quantity( type=MEnum("full all electron", "all electron frozen core", "pseudopotential", "unavailable"), description=""" @@ -385,7 +388,7 @@ class Method(MSection): """ ) smearing_parameter = Quantity( - type=float, + type=np.dtype(np.float64), description=""" Parameter for smearing, usually the width. """ @@ -395,7 +398,7 @@ class Method(MSection): class Calculation(MSection): m_def = Section( a_flask=dict(skip_none=True), - a_elastic="calculation", + a_search="calculation", description=""" Contains an overview of the type of calculation that was detected in this entry. @@ -422,66 +425,92 @@ class Calculation(MSection): class Properties(MSection): m_def = Section( a_flask=dict(skip_none=True), - a_elastic="properties", + a_search="properties", description=""" Contains derived physical properties that are specific to the NOMAD Encyclopedia. """ ) atomic_density = Quantity( - type=float, + type=np.dtype(np.float64), unit="1 / m ** 3", description=""" Atomic density of the material (atoms/volume)." - """ + """, + a_search=Search() ) mass_density = Quantity( - type=float, + type=np.dtype(np.float64), unit="kg / m ** 3", description=""" Mass density of the material. - """ + """, + a_search=Search() + ) + band_gap = Quantity( + type=np.dtype(np.float64), + unit="eV", + description=""" + Band gap value. If multiple spin channels are present, this value is + taken from the channel with smallest band gap value. + """, + a_search=Search() + ) + band_gap_direct = Quantity( + type=bool, + description=""" + Whether band gap is direct or not. If multiple spin channels are + present, this value is taken from the channel with smallest band gap + value. + """, + a_search=Search() ) energies = Quantity( type=str, description=""" Code dependent energy values, corrected to be per formula unit. - """ + """, + a_search=Search() ) electronic_band_structure = Quantity( type=Reference(section_k_band.m_def), shape=[], description=""" Reference to an electronic band structure. - """ + """, + a_search=Search(shallow=True) ) electronic_dos = Quantity( type=Reference(section_dos.m_def), shape=[], description=""" Reference to an electronic density of states. - """ + """, + a_search=Search(shallow=True) ) phonon_band_structure = Quantity( type=Reference(section_k_band.m_def), shape=[], description=""" Reference to a phonon band structure. - """ + """, + a_search=Search(shallow=True) ) phonon_dos = Quantity( type=Reference(section_dos.m_def), shape=[], description=""" Reference to a phonon density of states. - """ + """, + a_search=Search(shallow=True) ) thermodynamical_properties = Quantity( type=Reference(section_thermodynamical_properties.m_def), shape=[], description=""" Reference to a section containing thermodynamical properties. - """ + """, + a_search=Search(shallow=True) ) @@ -493,10 +522,10 @@ class EncyclopediaMetadata(MSection): Section which stores information for the NOMAD Encyclopedia. """ ) - material = SubSection(sub_section=Material.m_def, repeats=False) - method = SubSection(sub_section=Method.m_def, repeats=False) - properties = SubSection(sub_section=Properties.m_def, repeats=False) - calculation = SubSection(sub_section=Calculation.m_def, repeats=False) + material = SubSection(sub_section=Material.m_def, repeats=False, a_search='material') + method = SubSection(sub_section=Method.m_def, repeats=False, a_search='method') + properties = SubSection(sub_section=Properties.m_def, repeats=False, a_search='properties') + calculation = SubSection(sub_section=Calculation.m_def, repeats=False, a_search='calculation') status = Quantity( type=MEnum("success", "unsupported_material_type", "unsupported_calculation_type", "invalid_metainfo", "failure"), description=""" @@ -509,5 +538,6 @@ class EncyclopediaMetadata(MSection): | `"unsupported_calculation_type"` | The detected calculation type is currenlty not supported by the Encyclopedia. | | `"invalid_metainfo"` | The entry could not be processed due to missing or invalid metainfo. | | `"failure"` | The entry could not be processed due to an unexpected exception. | - """ + """, + a_search=Search() ) diff --git a/nomad/datamodel/optimade.py b/nomad/datamodel/optimade.py index de081078840a3ebe575f2bc517dad90f2e53e3d4..77d43abba7fc9136c70f913ee2954601668ac189 100644 --- a/nomad/datamodel/optimade.py +++ b/nomad/datamodel/optimade.py @@ -1,8 +1,9 @@ from ase.data import chemical_symbols -from elasticsearch_dsl import Keyword, Float, InnerDoc, Nested +from elasticsearch_dsl import Keyword, Float, InnerDoc, Nested, Integer import numpy as np -from nomad.metainfo import MSection, Section, Quantity, SubSection, MEnum, units, DefinitionAnnotation +from nomad.units import ureg +from nomad.metainfo import MSection, Section, Quantity, SubSection, MEnum, DefinitionAnnotation from nomad.metainfo.search_extension import Search @@ -82,7 +83,7 @@ class Species(MSection): species). ''') - mass = Quantity(type=float, unit=units.amu, a_optimade=dict(entry='optional')) + mass = Quantity(type=float, unit=ureg.amu, a_optimade=dict(entry='optional')) original_name = Quantity(type=str, a_optimade=dict(entry='optional'), description=''' Can be any valid Unicode string, and SHOULD contain (if specified) the name of the @@ -172,7 +173,7 @@ class OptimadeEntry(MSection): dimension_types = Quantity( type=int, shape=[3], default=[0, 0, 0], links=optimade_links('h.6.2.8'), - a_search=Search(value=lambda a: sum(a.dimension_types)), + a_search=Search(value=lambda a: sum(a.dimension_types), type=Integer), a_optimade=Optimade(query=True, entry=True), description=''' List of three integers. For each of the three directions indicated by the three lattice @@ -183,7 +184,7 @@ class OptimadeEntry(MSection): ''') lattice_vectors = Quantity( - type=np.dtype('f8'), shape=[3, 3], unit=units.angstrom, + type=np.dtype('f8'), shape=[3, 3], unit=ureg.angstrom, links=optimade_links('h.6.2.9'), a_optimade=Optimade(query=False, entry=True), description=''' @@ -191,7 +192,7 @@ class OptimadeEntry(MSection): ''') cartesian_site_positions = Quantity( - type=np.dtype('f8'), shape=['nsites', 3], unit=units.angstrom, + type=np.dtype('f8'), shape=['nsites', 3], unit=ureg.angstrom, links=optimade_links('h.6.2.10'), a_optimade=Optimade(query=False, entry=True), description=''' Cartesian positions of each site. A site is an atom, a site potentially occupied by diff --git a/nomad/metainfo/__init__.py b/nomad/metainfo/__init__.py index 7d7ca15ee679dd44f957da0bbcfc5e3059739e1b..c401e348f60093b923952d7c92246026de3a9576 100644 --- a/nomad/metainfo/__init__.py +++ b/nomad/metainfo/__init__.py @@ -297,7 +297,6 @@ from .metainfo import ( Reference, MResource, m_package, - units, Annotation, DefinitionAnnotation, SectionAnnotation, diff --git a/nomad/metainfo/elastic_extension.py b/nomad/metainfo/elastic_extension.py index 99f33675ca06d56f893f6429c80e673b2353de68..9caa69cf0b958f753181e62f089638938baf937b 100644 --- a/nomad/metainfo/elastic_extension.py +++ b/nomad/metainfo/elastic_extension.py @@ -14,6 +14,8 @@ from typing import Callable, Any, Dict, cast import uuid +import numpy as np +import pint.quantity from .metainfo import ( @@ -37,13 +39,6 @@ class ElasticDocument(SectionAnnotation): classes, sub sections become inner documents, and quantities with the :class:`Elastic` extension become fields in their respective document. - Arguments: - index_name: This is used to optionally add the index_name to the resulting - elasticsearch_dsl document. - id: A callable that produces an id from a section instance that is used as id - for the respective elastic search index entry. The default will be randomly - generated UUID(4). - Attributes: document: The elasticsearch_dsl document class that was generated from the metainfo section @@ -52,6 +47,14 @@ class ElasticDocument(SectionAnnotation): _all_documents: Dict[str, Any] = {} def __init__(self, index_name: str = None, id: Callable[[Any], str] = None): + """ + Args: + index_name: This is used to optionally add the index_name to the resulting + elasticsearch_dsl document. + id: A callable that produces an id from a section instance that is used as id + for the respective elastic search index entry. The default will be randomly + generated UUID(4). + """ self.index_name = index_name self.id = id @@ -85,12 +88,20 @@ class ElasticDocument(SectionAnnotation): if value is None or value == []: continue - quantity_type = quantity.type - if isinstance(quantity_type, Reference): - if quantity.is_scalar: - value = ElasticDocument.create_index_entry(cast(MSection, value)) + if isinstance(quantity.type, Reference): + # For shallow section references only the path is stored + if annotation.shallow: + value = value.m_proxy_url + # For deep references the full section is resolved else: - value = [ElasticDocument.create_index_entry(item) for item in value] + if quantity.is_scalar: + value = ElasticDocument.create_index_entry(cast(MSection, value)) + else: + value = [ElasticDocument.create_index_entry(item) for item in value] + + # Only the magnitude of scalar Pint quantity objects is stored + if quantity.is_scalar and isinstance(value, pint.quantity._Quantity): + value = value.magnitude setattr(obj, annotation.field, value) @@ -137,7 +148,7 @@ class ElasticDocument(SectionAnnotation): if document is not None: return document - from elasticsearch_dsl import Document, InnerDoc, Keyword, Date, Integer, Boolean, Object + from elasticsearch_dsl import Document, InnerDoc, Keyword, Date, Integer, Boolean, Object, Double, Float, Long if attrs is None: attrs = {} @@ -156,25 +167,39 @@ class ElasticDocument(SectionAnnotation): for annotation in quantity.m_get_annotations(Elastic, as_list=True): if annotation.mapping is None and first: kwargs = dict(index=annotation.index) - # find a mapping based on quantity type - if quantity.type == str: - annotation.mapping = Keyword(**kwargs) - elif quantity.type == int: - annotation.mapping = Integer(**kwargs) - elif quantity.type == bool: - annotation.mapping = Boolean(**kwargs) - elif quantity.type == Datetime: - annotation.mapping = Date(**kwargs) - elif isinstance(quantity.type, Reference): - inner_document = ElasticDocument.create_document( - cast(Section, quantity.type.target_section_def), inner_doc=True, - prefix=annotation.field) - annotation.mapping = Object(inner_document) - elif isinstance(quantity.type, MEnum): + + # Use keyword type for shallow references + if isinstance(quantity.type, Reference) and annotation.shallow: annotation.mapping = Keyword(**kwargs) + # If an explicit type is given, use it + elif annotation.type is not None: + annotation.mapping = annotation.type(**kwargs) + # Otherwise find a mapping based on quantity type else: - raise NotImplementedError( - 'Quantity type %s for quantity %s is not supported.' % (quantity.type, quantity)) + if quantity.type == str: + annotation.mapping = Keyword(**kwargs) + elif quantity.type in [float, np.float64] and quantity.is_scalar: + annotation.mapping = Double(**kwargs) + elif quantity.type == np.float32 and quantity.is_scalar: + annotation.mapping = Float(**kwargs) + elif quantity.type in [int, np.int32] and quantity.is_scalar: + annotation.mapping = Integer(**kwargs) + elif quantity.type == np.int64 and quantity.is_scalar: + annotation.mapping = Long(**kwargs) + elif quantity.type == bool: + annotation.mapping = Boolean(**kwargs) + elif quantity.type == Datetime: + annotation.mapping = Date(**kwargs) + elif isinstance(quantity.type, Reference): + inner_document = ElasticDocument.create_document( + cast(Section, quantity.type.target_section_def), inner_doc=True, + prefix=annotation.field) + annotation.mapping = Object(inner_document) + elif isinstance(quantity.type, MEnum): + annotation.mapping = Keyword(**kwargs) + else: + raise NotImplementedError( + 'Quantity type %s for quantity %s is not supported.' % (quantity.type, quantity)) assert first or annotation.mapping is None, 'Only the first Elastic annotation is mapped' diff --git a/nomad/metainfo/example.py b/nomad/metainfo/example.py index f585a2c4a2aef8ddac7f6a012ff008f419535e2f..e7ad6f57cc2720c2b895ed95191b39d3873206fe 100644 --- a/nomad/metainfo/example.py +++ b/nomad/metainfo/example.py @@ -17,9 +17,10 @@ import numpy as np from datetime import datetime +from nomad.units import ureg from nomad.metainfo import ( - MSection, MCategory, Section, Quantity, Package, SubSection, MEnum, Datetime, units, - constraint) + MSection, MCategory, Section, Quantity, Package, SubSection, MEnum, + Datetime, constraint) m_package = Package(links=['http://metainfo.nomad-coe.eu']) @@ -57,11 +58,11 @@ class System(MSection): description='The atoms in the simulated systems.') atom_positions = Quantity( - type=np.dtype('f'), shape=['n_atoms', 3], unit=units.m, categories=[SystemHash], + type=np.dtype('f'), shape=['n_atoms', 3], unit=ureg.m, categories=[SystemHash], description='The atom positions in the simulated system.') lattice_vectors = Quantity( - type=np.dtype('f'), shape=[3, 3], unit=units.m, categories=[SystemHash], + type=np.dtype('f'), shape=[3, 3], unit=ureg.m, categories=[SystemHash], description='The lattice vectors of the simulated unit cell.') unit_cell = Quantity(synonym_for='lattice_vectors') @@ -75,8 +76,8 @@ class System(MSection): class SCC(MSection): - energy_total = Quantity(type=float, default=0.0, unit=units.J) - energy_total_0 = Quantity(type=np.dtype(np.float32), default=0.0, unit=units.J) + energy_total = Quantity(type=float, default=0.0, unit=ureg.J) + energy_total_0 = Quantity(type=np.dtype(np.float32), default=0.0, unit=ureg.J) an_int = Quantity(type=np.dtype(np.int32)) system = Quantity(type=System, description='The system that this calculation is based on.') diff --git a/nomad/metainfo/legacy.py b/nomad/metainfo/legacy.py index a091aaf3d5b627a2bbad9566f3fb042d99b3e124..6c382daba7b93e0d04d44682acd4e7c78a4ae369 100644 --- a/nomad/metainfo/legacy.py +++ b/nomad/metainfo/legacy.py @@ -27,8 +27,9 @@ import os.path from nomadcore.local_meta_info import loadJsonFile, InfoKindEl, InfoKindEnv from nomad import utils +from nomad.units import ureg from nomad.metainfo import ( - Definition, SubSection, Package, Quantity, Category, Section, Reference, units, + Definition, SubSection, Package, Quantity, Category, Section, Reference, Environment, MEnum, MSection, DefinitionAnnotation) logger = utils.get_logger(__name__) @@ -438,7 +439,7 @@ class PackageConversion: # units if legacy_def.units is not None: try: - definition.unit = units.parse_units(legacy_def.units) + definition.unit = ureg.parse_units(legacy_def.units) except UndefinedUnitError: logger.error('unknown unit %s' % legacy_def.units) except ValueError as e: diff --git a/nomad/metainfo/metainfo.py b/nomad/metainfo/metainfo.py index e3e3e75b89cc9296b47d140fd01667ad2181c4fb..38dc15714980bc05dfe2620e3e75a28ea33faefb 100644 --- a/nomad/metainfo/metainfo.py +++ b/nomad/metainfo/metainfo.py @@ -30,6 +30,8 @@ from datetime import datetime import pytz import docstring_parser +from nomad.units import ureg + m_package: 'Package' = None @@ -211,7 +213,7 @@ class _Dimension(DataType): class _Unit(DataType): def set_normalize(self, section, quantity_def: 'Quantity', value): if isinstance(value, str): - value = units.parse_units(value) + value = ureg.parse_units(value) elif not isinstance(value, pint.unit._Unit): raise TypeError('Units must be given as str or pint Unit instances.') @@ -222,11 +224,7 @@ class _Unit(DataType): return value.__str__() def deserialize(self, section, quantity_def: 'Quantity', value): - return units.parse_units(value) - - -units = pint.UnitRegistry() -''' The default pint unit registry that should be used to give units to quantity definitions. ''' + return ureg.parse_units(value) class _Callable(DataType): diff --git a/nomad/metainfo/search_extension.py b/nomad/metainfo/search_extension.py index 414df9417de9fa9d78d13a3a447f3cfb0a4c9c31..c8a0972a490d92d652c133d19ee7748d4a5939dd 100644 --- a/nomad/metainfo/search_extension.py +++ b/nomad/metainfo/search_extension.py @@ -69,11 +69,16 @@ class Search(Elastic): The information can be used (e.g. by the GUI) to fill in empty values. group: Indicates that his quantity can be used to group results. The value will be the name of the group. + derived: A callable that is applied to search parameter values before search. search_field: The qualified field in the elastic mapping that is used to search. This might be different from the field that is used to store the value in elastic search. This is especially useful if the field represents a inner document and a subfield of this inner object should be used for search. - derived: A callable that is applied to search parameter values before search. + type: Determines the data type class used in the ElasticSearch mapping. + Defaults to determining the type from the quantity type. + shallow: Determines how a reference is handled in the search index. If + set to True, only the unresolved reference proxy path (m_proxy_path) is + stored. If False, the whole referenced section will be stored. ''' def __init__( @@ -87,6 +92,8 @@ class Search(Elastic): statistic_values: List[str] = None, derived: Callable[[Any], Any] = None, search_field: str = None, + type: str = None, + shallow: bool = False, **kwargs): super().__init__(field=None, **kwargs) @@ -105,6 +112,8 @@ class Search(Elastic): self.statistic_order = statistic_order self.statistic_values = statistic_values self.search_field = search_field + self.type = type + self.shallow = shallow self.derived = derived diff --git a/nomad/normalizing/encyclopedia/basisset.py b/nomad/normalizing/encyclopedia/basisset.py index 2e615c97e0588b56153419e374b4cb6193e5860d..f8d3eaaa908fa44fc19ea6cc6beed75030f7eab8 100644 --- a/nomad/normalizing/encyclopedia/basisset.py +++ b/nomad/normalizing/encyclopedia/basisset.py @@ -2,14 +2,12 @@ from abc import ABC, abstractmethod from collections import OrderedDict import numpy as np from typing import Tuple, List -from pint import UnitRegistry +from nomad.units import ureg from nomad.parsing.legacy import Backend from nomad.metainfo import Section from nomad.utils import RestrictedDict -ureg = UnitRegistry() - def get_basis_set(context, backend: Backend, logger) -> RestrictedDict: """Decide which type of basis set settings are applicable to the entry and diff --git a/nomad/normalizing/encyclopedia/material.py b/nomad/normalizing/encyclopedia/material.py index 5d5b4418c63aaeb7d78e5adb7508b4bb990bf241..0fe7b50c05bf177aa078d4b5f64153e512b55b12 100644 --- a/nomad/normalizing/encyclopedia/material.py +++ b/nomad/normalizing/encyclopedia/material.py @@ -68,6 +68,10 @@ class MaterialNormalizer(): formula = atomutils.get_formula_string(names, counts) material.formula = formula + def formula_parts(self, material: Material, names: List[str], counts: List[int]) -> None: + parts = ["{}{}".format(name, count) for name, count in zip(names, counts)] + material.formula_parts = " ".join(parts) + def formula_reduced(self, material: Material, names: list, counts_reduced: list) -> None: formula = atomutils.get_formula_string(names, counts_reduced) material.formula_reduced = formula @@ -382,6 +386,7 @@ class MaterialBulkNormalizer(MaterialNormalizer): self.crystal_system(bulk, sec_symmetry) self.lattice_vectors_primitive(ideal, prim_atoms) self.formula(material, names, counts) + self.formula_parts(material, names, counts) self.formula_reduced(material, names, reduced_counts) self.has_free_wyckoff_parameters(bulk, symmetry_analyzer) self.lattice_parameters(ideal, std_atoms) diff --git a/nomad/normalizing/encyclopedia/method.py b/nomad/normalizing/encyclopedia/method.py index 4a844e018ff16f39828f19bd300d335aaa507072..e93cd1c065d9057078374a7349c0bd65ec65c7c9 100644 --- a/nomad/normalizing/encyclopedia/method.py +++ b/nomad/normalizing/encyclopedia/method.py @@ -16,7 +16,7 @@ from typing import List from abc import abstractmethod from collections import OrderedDict import numpy as np -from pint import UnitRegistry +from nomad.units import ureg from nomad.datamodel.encyclopedia import ( Material, @@ -28,8 +28,6 @@ from nomad.normalizing.encyclopedia.context import Context from nomad.utils import RestrictedDict from nomad import config -ureg = UnitRegistry() - class MethodNormalizer(): """A base class that is used for processing method related information diff --git a/nomad/normalizing/encyclopedia/properties.py b/nomad/normalizing/encyclopedia/properties.py index 74f7b8cb312869cf268ef7956c921a41860d3776..d034f5e0d5da2273fe0de549759109492607b133 100644 --- a/nomad/normalizing/encyclopedia/properties.py +++ b/nomad/normalizing/encyclopedia/properties.py @@ -60,7 +60,24 @@ class PropertiesNormalizer(): except Exception: return if representative_band is not None: - properties.electronic_band_structure = representative_band + properties.electronic_band_structure = representative_band.m_path() + + # Add band gap information to metadata if present. The channel with + # smallest band gap index is chosen as a representative one. + band_gaps = properties.electronic_band_structure.section_band_gap + if band_gaps is not None and len(band_gaps) > 0: + min_gap_index = 0 + min_gap = float("Inf") + for i, gap in enumerate(band_gaps): + value = gap.value + if value < min_gap: + min_gap_index = i + min_gap = value + representative_gap = band_gaps[min_gap_index] + bg_value = representative_gap.value + if bg_value is not None and bg_value > 0: + properties.band_gap = representative_gap.value + properties.band_gap_direct = representative_gap.type == "direct" def electronic_dos(self, properties: Properties, context: Context) -> None: """Tries to resolve a reference to a representative electonic density @@ -85,7 +102,7 @@ class PropertiesNormalizer(): except Exception: return if representative_dos is not None: - properties.electronic_dos = representative_dos + properties.electronic_dos = representative_dos.m_path() def elastic_constants_matrix(self) -> None: pass @@ -127,7 +144,7 @@ class PropertiesNormalizer(): except Exception: return if resolved_section is not None: - properties.thermodynamical_properties = resolved_section + properties.thermodynamical_properties = resolved_section.m_path() def phonon_band_structure(self, properties: Properties, context: Context) -> None: """Tries to resolve a reference to a representative phonon band @@ -158,7 +175,7 @@ class PropertiesNormalizer(): except Exception: return if representative_phonon_band is not None: - properties.phonon_band_structure = representative_phonon_band + properties.phonon_band_structure = representative_phonon_band.m_path() def phonon_dos(self, properties: Properties, context: Context) -> None: """Tries to resolve a reference to a representative phonon density of @@ -185,7 +202,7 @@ class PropertiesNormalizer(): except Exception: return if representative_phonon_dos is not None: - properties.phonon_dos = representative_phonon_dos + properties.phonon_dos = representative_phonon_dos.m_path() def energies(self, properties: Properties, gcd: int, representative_scc: Section) -> None: energy_dict = {} diff --git a/nomad/normalizing/optimade.py b/nomad/normalizing/optimade.py index 08528f6057792e76fa65ef23715e5eec339f1d71..a5296fa0628f6c9ff8e9c7bc08eb6c3c31b319d1 100644 --- a/nomad/normalizing/optimade.py +++ b/nomad/normalizing/optimade.py @@ -21,7 +21,7 @@ import pint.quantity from nomad.parsing.legacy import Backend from nomad.normalizing.normalizer import SystemBasedNormalizer -from nomad.metainfo import units +from nomad.units import ureg from nomad.datamodel import OptimadeEntry, Species, DFTMetadata, EntryMetadata species_re = re.compile(r'^([A-Z][a-z]?)(\d*)$') @@ -102,8 +102,8 @@ class OptimadeNormalizer(SystemBasedNormalizer): # sites optimade.nsites = len(nomad_species) optimade.species_at_sites = nomad_species - optimade.lattice_vectors = get_value('lattice_vectors', numpy=True, unit=units.m) - optimade.cartesian_site_positions = get_value('atom_positions', numpy=True, unit=units.m) + optimade.lattice_vectors = get_value('lattice_vectors', numpy=True, unit=ureg.m) + optimade.cartesian_site_positions = get_value('atom_positions', numpy=True, unit=ureg.m) optimade.dimension_types = [ 1 if value else 0 for value in get_value('configuration_periodic_dimensions')] diff --git a/nomad/processing/data.py b/nomad/processing/data.py index cf7863f0442329ed0550ffe8625f6e1eb2878169..411f6c894b92228e611743acc1117716e66323fb 100644 --- a/nomad/processing/data.py +++ b/nomad/processing/data.py @@ -424,8 +424,8 @@ class Calc(Proc): ref_archive = EntryArchive.m_from_dict(arch) # Get encyclopedia method information directly from the referenced calculation. - ref_enc_method = ref_archive.section_encyclopedia.method - backend.entry_archive.section_encyclopedia.method = ref_enc_method + ref_enc_method = ref_archive.section_metadata.encyclopedia.method + backend.entry_archive.section_metadata.encyclopedia.method = ref_enc_method # Overwrite old entry with new data. The metadata is updated with # new timestamp and method details taken from the referenced diff --git a/nomad/search.py b/nomad/search.py index c982a903633b959dfd2035d16a4fa36ef951ad0b..b77cdaed1e9600a10bc691a61e0c99300d5759f8 100644 --- a/nomad/search.py +++ b/nomad/search.py @@ -461,7 +461,7 @@ class SearchRequest: def execute(self): ''' - Exectutes without returning actual results. Only makes sense if the request + Executes without returning actual results. Only makes sense if the request was configured for statistics or quantity values. ''' search = self._search.query(self.q)[0:0] diff --git a/nomad/units.py b/nomad/units.py new file mode 100644 index 0000000000000000000000000000000000000000..5fbcc0ffe1e9f5f2baf1cc05ddca39823317b64c --- /dev/null +++ b/nomad/units.py @@ -0,0 +1,21 @@ +# Copyright 2018 Markus Scheidgen, empty_task +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an"AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +This module contains a global unit registry used thoughout the nomad-FAIR +package. +""" +from pint import UnitRegistry + +ureg = UnitRegistry() diff --git a/tests/app/test_api_encyclopedia.py b/tests/app/test_api_encyclopedia.py new file mode 100644 index 0000000000000000000000000000000000000000..4fcaf35bda602ace643f08d43300345a1b7035cb --- /dev/null +++ b/tests/app/test_api_encyclopedia.py @@ -0,0 +1,51 @@ +# Copyright 2018 Markus Scheidgen +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an"AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def test_material(): + pass + + # Unpublished material should not be found + + # Embargoed material should not be found + + # Missing material causes 404 + + # Correctly found materials returns all required values + + +def test_materials(): + pass + + # Unpublished material should not be found + + # Embargoed material should not be found + + # Missing material causes 404 + + # Correctly found materials returns all required values + + # Exclusive formula works as expected + + # Inclusive formula works as expected + + # Exclusive elements works as expected + + # Inclusive elements works as expected + + # Band gap + + # Mass density + + # Property existence diff --git a/tests/normalizing/test_band_structure.py b/tests/normalizing/test_band_structure.py index fc722a60f439bebfe2ac9ae2537cac63c38c53a9..250fca9e88d43a36be0dad92bda4c9a2e6fa716c 100644 --- a/tests/normalizing/test_band_structure.py +++ b/tests/normalizing/test_band_structure.py @@ -27,8 +27,8 @@ from tests.normalizing.conftest import ( # pylint: disable=unused-import band_path_mP_nonstandard, band_path_cF_nonstandard, ) -from pint import UnitRegistry -ureg = UnitRegistry() + +from nomad.units import ureg def test_band_gaps(bands_unpolarized_no_gap, bands_polarized_no_gap, bands_unpolarized_gap_indirect, bands_polarized_gap_indirect): diff --git a/tests/normalizing/test_encyclopedia.py b/tests/normalizing/test_encyclopedia.py index 85a7f305ac09f0f7b3c847a266bca364f10981af..fea499c51ad7fc90026185626db3846c86f5a58f 100644 --- a/tests/normalizing/test_encyclopedia.py +++ b/tests/normalizing/test_encyclopedia.py @@ -17,7 +17,6 @@ import numpy as np from ase import Atoms import ase.build from matid.symmetry.wyckoffset import WyckoffSet -from pint import UnitRegistry from nomad.utils import hash from nomad import atomutils @@ -38,8 +37,6 @@ from tests.normalizing.conftest import ( # pylint: disable=unused-import hash_vasp, ) -ureg = UnitRegistry() - def test_geometry_optimization(geometry_optimization: EntryArchive): """Tests that geometry optimizations are correctly processed." @@ -131,12 +128,12 @@ def test_bulk_metainfo(bulk: EntryArchive): assert ideal.lattice_vectors_primitive is not None assert np.array_equal(ideal.periodicity, [True, True, True]) assert ideal.lattice_parameters is not None - assert ideal.cell_volume == pytest.approx(5.431**3 * 1e-30) + assert ideal.cell_volume.magnitude == pytest.approx(5.431**3 * 1e-30) # Properties prop = enc.properties - assert prop.atomic_density == pytest.approx(4.99402346512432e+28) - assert prop.mass_density == pytest.approx(8 * 28.0855 * 1.6605389e-27 / (5.431**3 * 1e-30)) # Atomic mass in kg/m^3 + assert prop.atomic_density.magnitude == pytest.approx(4.99402346512432e+28) + assert prop.mass_density.magnitude == pytest.approx(8 * 28.0855 * 1.6605389e-27 / (5.431**3 * 1e-30)) # Atomic mass in kg/m^3 def test_1d_material_identification():