diff --git a/nomad/app/api/__init__.py b/nomad/app/api/__init__.py index 3334aa661347982b8e7ada6a3c3c7c7ca424569e..dd22791410a8fd7448ee28b46887d94282d405db 100644 --- a/nomad/app/api/__init__.py +++ b/nomad/app/api/__init__.py @@ -25,4 +25,4 @@ There is a separate documentation for the API endpoints from a client perspectiv ''' from .api import api, blueprint -from . import info, auth, upload, repo, archive, raw, mirror, dataset, metainfo +from . import info, auth, upload, repo, archive, encyclopedia, raw, mirror, dataset, metainfo diff --git a/nomad/app/api/encyclopedia.py b/nomad/app/api/encyclopedia.py new file mode 100644 index 0000000000000000000000000000000000000000..ffc828a8936b19f7e225ba8b5ef0b97730d991a3 --- /dev/null +++ b/nomad/app/api/encyclopedia.py @@ -0,0 +1,1181 @@ +# Copyright 2018 Markus Scheidgen +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an"AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +The encyclopedia API of the nomad@FAIRDI APIs. +""" +import re +import numpy as np + +from flask_restplus import Resource, abort, fields, marshal +from flask import request +from elasticsearch_dsl import Search, Q, A +from elasticsearch_dsl.utils import AttrDict + +from nomad import config, files +from nomad.units import ureg +from nomad.atomutils import get_hill_decomposition +from nomad.datamodel.datamodel import EntryArchive +from .api import api + +ns = api.namespace("encyclopedia", description="Access encyclopedia metadata.") +re_formula = re.compile(r"([A-Z][a-z]?)(\d*)") + +material_prop_map = { + # General + "material_id": "encyclopedia.material.material_id", + "formula": "encyclopedia.material.formula", + "formula_reduced": "encyclopedia.material.formula_reduced", + "system_type": "encyclopedia.material.material_type", + # Bulk only + "has_free_wyckoff_parameters": "encyclopedia.material.bulk.has_free_wyckoff_parameters", + "strukturbericht_designation": "encyclopedia.material.bulk.strukturbericht_designation", + "material_name": "encyclopedia.material.material_name", + "bravais_lattice": "encyclopedia.material.bulk.bravais_lattice", + "crystal_system": "encyclopedia.material.bulk.crystal_system", + "point_group": "encyclopedia.material.bulk.point_group", + "space_group_number": "encyclopedia.material.bulk.space_group_number", + "space_group_international_short_symbol": "encyclopedia.material.bulk.space_group_international_short_symbol", + "structure_prototype": "encyclopedia.material.bulk.structure_prototype", + "structure_type": "encyclopedia.material.bulk.structure_type", +} + + +def rgetattr(obj, attr_name): + """Used to perform attribute access based on a (possibly nested) attribute + name given as string. + """ + try: + for attr in attr_name.split("."): + obj = obj[attr] + except KeyError: + return None + return obj + + +def get_es_doc_values(es_doc, mapping, keys=None): + """Used to form a material definition for "materials/<material_id>" from + the given ElasticSearch root document. + """ + if keys is None: + keys = mapping.keys() + + result = {} + for key in keys: + es_key = mapping[key] + value = rgetattr(es_doc, es_key) + result[key] = value + + return result + + +material_query = api.parser() +material_query.add_argument( + "property", + type=str, + choices=tuple(material_prop_map.keys()), + help="Optional single property to retrieve for the given material. If not specified, all properties will be returned.", + location="args" +) +material_result = api.model("material_result", { + # General + "material_id": fields.String, + "formula": fields.String, + "formula_reduced": fields.String, + "system_type": fields.String, + "n_matches": fields.Integer, + # Bulk only + "has_free_wyckoff_parameters": fields.Boolean, + "strukturbericht_designation": fields.String, + "material_name": fields.String, + "bravais_lattice": fields.String, + "crystal_system": fields.String, + "point_group": fields.String, + "space_group_number": fields.Integer, + "space_group_international_short_symbol": fields.String, + "structure_prototype": fields.String, + "structure_type": fields.String, +}) +enc_filter = [ + Q("term", published=True), + Q("term", with_embargo=False), + Q("term", encyclopedia__status="success"), +] + + +@ns.route("/materials/<string:material_id>") +class EncMaterialResource(Resource): + @api.response(404, "The material does not exist") + @api.response(200, "Metadata send", fields.Raw) + @api.doc("material/<material_id>") + @api.expect(material_query) + @api.marshal_with(material_result, skip_none=True) + def get(self, material_id): + """Used to retrive basic information related to the specified material. + """ + # Parse request arguments + args = material_query.parse_args() + prop = args.get("property", None) + if prop is not None: + keys = [prop] + es_keys = [material_prop_map[prop]] + else: + keys = list(material_prop_map.keys()) + es_keys = list(material_prop_map.values()) + + # Find the first public entry with this material id and take + # information from there. In principle all other entries should have + # the same information. + s = Search(index=config.elastic.index_name) + + # Since we are looking for an exact match, we use filtek context + # together with term search for speed (instead of query context and + # match search) + query = Q( + "bool", + filter=enc_filter + [ + Q("term", encyclopedia__material__material_id=material_id), + ] + ) + s = s.query(query) + + # If a representative calculation is requested, all calculations are + # returned in order to perform the scoring with a custom loop. + # Otherwise, only one representative entry is returned. + s = s.extra(**{ + "_source": {"includes": es_keys}, + "size": 10000, + "collapse": {"field": "encyclopedia.material.material_id"}, + }) + response = s.execute() + + # No such material + if len(response) == 0: + abort(404, message="There is no material {}".format(material_id)) + + # Add values from ES entry + entry = response[0] + result = get_es_doc_values(entry, material_prop_map, keys) + + return result, 200 + + +range_query = api.model("range_query", { + "max": fields.Float, + "min": fields.Float, +}) +materials_after = api.model("materials_after", { + "materials": fields.String, +}) +materials_query = api.model("materials_input", { + "search_by": fields.Nested(api.model("search_query", { + "exclusive": fields.Boolean(default=False), + "formula": fields.String, + "element": fields.String, + "page": fields.Integer(default=1), + "after": fields.Nested(materials_after, allow_null=True), + "per_page": fields.Integer(default=25), + "pagination": fields.Boolean, + })), + "material_name": fields.List(fields.String), + "structure_type": fields.List(fields.String), + "space_group_number": fields.List(fields.Integer), + "system_type": fields.List(fields.String), + "crystal_system": fields.List(fields.String), + "band_gap": fields.Nested(range_query, description="Band gap range in eV."), + "band_gap_direct": fields.Boolean, + "has_band_structure": fields.Boolean, + "has_dos": fields.Boolean, + "has_fermi_surface": fields.Boolean, + "has_thermal_properties": fields.Boolean, + "functional_type": fields.List(fields.String), + "basis_set_type": fields.List(fields.String), + "code_name": fields.List(fields.String), + "mass_density": fields.Nested(range_query, description="Mass density range in kg / m ** 3."), +}) +pages_result = api.model("page_info", { + "per_page": fields.Integer, + "total": fields.Integer, + "page": fields.Integer, + "pages": fields.Integer, + "after": fields.Nested(materials_after), +}) + +materials_result = api.model("materials_result", { + "total_results": fields.Integer(allow_null=False), + "results": fields.List(fields.Nested(material_result, skip_none=True)), + "pages": fields.Nested(pages_result, skip_none=True), + "es_query": fields.String(allow_null=False), +}) + + +@ns.route("/materials") +class EncMaterialsResource(Resource): + @api.response(404, "No materials found") + @api.response(400, "Bad request") + @api.response(200, "Metadata send", fields.Raw) + @api.expect(materials_query, validate=False) + @api.marshal_with(materials_result, skip_none=True) + @api.doc("materials") + def post(self): + """Used to query a list of materials with the given search options. + """ + # Get query parameters as json + try: + data = marshal(request.get_json(), materials_query) + except Exception as e: + abort(400, message=str(e)) + + filters = enc_filter + must_nots = [] + musts = [] + + def add_terms_filter(source, target, query_type="terms"): + if data[source]: + filters.append(Q(query_type, **{target: data[source]})) + + add_terms_filter("material_name", "encyclopedia.material.material_name") + add_terms_filter("structure_type", "encyclopedia.material.bulk.structure_type") + add_terms_filter("space_group_number", "encyclopedia.material.bulk.space_group_number") + add_terms_filter("system_type", "encyclopedia.material.material_type") + add_terms_filter("crystal_system", "encyclopedia.material.bulk.crystal_system") + add_terms_filter("band_gap_direct", "encyclopedia.properties.band_gap_direct", query_type="term") + add_terms_filter("functional_type", "encyclopedia.method.functional_type") + add_terms_filter("basis_set_type", "dft.basis_set") + add_terms_filter("code_name", "dft.code_name") + + # Add exists filters + def add_exists_filter(source, target): + param = data[source] + if param is not None: + query = Q("exists", field=target) + if param is True: + filters.append(query) + elif param is False: + must_nots.append(query) + + add_exists_filter("has_thermal_properties", "encyclopedia.properties.thermodynamical_properties") + add_exists_filter("has_band_structure", "encyclopedia.properties.electronic_band_structure") + add_exists_filter("has_dos", "encyclopedia.properties.electronic_dos") + add_exists_filter("has_fermi_surface", "encyclopedia.properties.fermi_surface") + + # Add range filters + def add_range_filter(source, target, source_unit=None, target_unit=None): + param = data[source] + query_dict = {} + if param["min"] is not None: + if source_unit is None and target_unit is None: + gte = param["min"] + else: + gte = (param["min"] * source_unit).to(target_unit).magnitude + query_dict["gte"] = gte + if param["max"] is not None: + if source_unit is None and target_unit is None: + lte = param["max"] + else: + lte = (param["max"] * source_unit).to(target_unit).magnitude + query_dict["lte"] = lte + if len(query_dict) != 0: + query = Q("range", **{target: query_dict}) + filters.append(query) + + add_range_filter("band_gap", "encyclopedia.properties.band_gap", ureg.eV, ureg.J) + add_range_filter("mass_density", "encyclopedia.properties.mass_density") + + # Create query for elements or formula + search_by = data["search_by"] + formula = search_by["formula"] + elements = search_by["element"] + exclusive = search_by["exclusive"] + + if formula is not None: + # Here we determine a list of atom types. The types may occur + # multiple times and at multiple places. + element_list = [] + matches = re_formula.finditer(formula) + for match in matches: + groups = match.groups() + symbol = groups[0] + count = groups[1] + if symbol != "": + if count == "": + element_list.append(symbol) + else: + element_list += [symbol] * int(count) + + # The given list of species is reformatted with the Hill system + # into a query string. The counts are reduced by the greatest + # common divisor. + names, reduced_counts = get_hill_decomposition(element_list, reduced=True) + query_string = [] + for name, count in zip(names, reduced_counts): + if count == 1: + query_string.append(name) + else: + query_string.append("{}{}".format(name, int(count))) + query_string = " ".join(query_string) + + # With exclusive search we look for exact match + if exclusive: + filters.append(Q("term", **{"encyclopedia.material.species_and_counts.keyword": query_string})) + # With non-exclusive search we look for match that includes at + # least all parts of the formula, possibly even more. + else: + musts.append(Q( + "match", + encyclopedia__material__species_and_counts={"query": query_string, "operator": "and"} + )) + elif elements is not None: + # The given list of species is reformatted with the Hill system into a query string + species, _ = get_hill_decomposition(elements.split(",")) + query_string = " ".join(species) + + # With exclusive search we look for exact match + if exclusive: + filters.append(Q("term", **{"encyclopedia.material.species.keyword": query_string})) + # With non-exclusive search we look for match that includes at + # least all species, possibly even more. + else: + musts.append(Q( + "match", + encyclopedia__material__species={"query": query_string, "operator": "and"} + )) + + page = search_by["page"] + per_page = search_by["per_page"] + after = search_by["after"] + bool_query = Q( + "bool", + filter=filters, + must_not=must_nots, + must=musts, + ) + + # The top query filters out entries based on the user query + s = Search(index=config.elastic.index_name) + s = s.query(bool_query) + + # The materials are grouped by using three aggregations: + # "Composite" to enable scrolling, "Terms" to enable selecting + # by material_id and "Top Hits" to fetch a single + # representative material document. Unnecessary fields are + # filtered to reduce data transfer. + terms_agg = A("terms", field="encyclopedia.material.material_id") + composite_kwargs = {"sources": {"materials": terms_agg}, "size": per_page} + + # The number of matched materials is only requested on the first + # search, not for each page. + if after is not None: + composite_kwargs["after"] = after + else: + cardinality_agg = A("cardinality", field="encyclopedia.material.material_id", precision_threshold=1000) + s.aggs.metric("n_materials", cardinality_agg) + + composite_agg = A("composite", **composite_kwargs) + composite_agg.metric("representative", A( + "top_hits", + size=1, + _source={"includes": list(material_prop_map.values())}, + )) + s.aggs.bucket("materials", composite_agg) + + # We ignore the top level hits + s = s.extra(**{ + "size": 0, + }) + + response = s.execute() + materials = response.aggs.materials.buckets + if len(materials) == 0: + abort(404, message="No materials found for the given search criteria or pagination.") + after_new = response.aggs.materials["after_key"] + + # Gather results from aggregations + result_list = [] + materials = response.aggs.materials.buckets + keys = list(material_prop_map.keys()) + for material in materials: + representative = material["representative"][0] + mat_dict = get_es_doc_values(representative, material_prop_map, keys) + mat_dict["n_matches"] = material.doc_count + result_list.append(mat_dict) + + # Page information is incomplete for aggregations + pages = { + "page": page, + "per_page": per_page, + "after": after_new, + } + + if after is None: + n_materials = response.aggs.n_materials.value + pages["total"] = n_materials + + result = { + "results": result_list, + "pages": pages, + } + return result, 200 + + +groups_result = api.model("groups_result", { + "groups_eos": fields.Raw, + "groups_par": fields.Raw, +}) + + +@ns.route("/materials/<string:material_id>/groups") +class EncGroupsResource(Resource): + @api.response(404, "Material not found") + @api.response(400, "Bad request") + @api.response(200, "Metadata send", fields.Raw) + @api.marshal_with(groups_result) + @api.doc("enc_materials") + def get(self, material_id): + """Returns a summary of the calculation groups that were identified for + this material. + """ + # Find entries for the given material, which have EOS or parameter + # variation hashes set. + bool_query = Q( + "bool", + filter=enc_filter + [Q("term", encyclopedia__material__material_id=material_id)], + must=[ + Q("exists", field="encyclopedia.properties.energies.energy_total"), + Q("exists", field="encyclopedia.material.idealized_structure.cell_volume"), + ], + should=[ + Q("exists", field="encyclopedia.method.group_eos_id"), + Q("exists", field="encyclopedia.method.group_parametervariation_id"), + ], + minimum_should_match=1, # At least one of the should query must match + ) + + s = Search(index=config.elastic.index_name) + s = s.query(bool_query) + + # Bucket the calculations by the group hashes. Only create a bucket if an + # above-minimum number of documents are found. + group_eos_bucket = A("terms", field="encyclopedia.method.group_eos_id", min_doc_count=4) + group_param_bucket = A("terms", field="encyclopedia.method.group_parametervariation_id", min_doc_count=2) + calc_aggregation = A( + "top_hits", + _source={"includes": ["calc_id"]}, + sort=[{"encyclopedia.properties.energies.energy_total": {"order": "asc"}}], + size=100, + ) + group_eos_bucket.bucket("calculations", calc_aggregation) + group_param_bucket.bucket("calculations", calc_aggregation) + s.aggs.bucket("groups_eos", group_eos_bucket) + s.aggs.bucket("groups_param", group_param_bucket) + + # We ignore the top level hits + s = s.extra(**{ + "size": 0, + }) + + # Collect information for each group from the aggregations + response = s.execute() + groups_eos = {group.key: [calc.calc_id for calc in group.calculations.hits] for group in response.aggs.groups_eos.buckets} + groups_param = {group.key: [calc.calc_id for calc in group.calculations.hits] for group in response.aggs.groups_param.buckets} + + # Return results + result = { + "groups_eos": groups_eos, + "groups_par": groups_param, + } + + return result, 200 + + +group_result = api.model("group_result", { + "calculations": fields.List(fields.String), + "energies": fields.List(fields.Float), + "volumes": fields.List(fields.Float), +}) +group_source = { + "includes": [ + "calc_id", + "encyclopedia.properties.energies.energy_total", + "encyclopedia.material.idealized_structure.cell_volume", + ] +} + + +@ns.route("/materials/<string:material_id>/groups/<string:group_type>/<string:group_id>") +class EncGroupResource(Resource): + @api.response(404, "Group not found") + @api.response(400, "Bad request") + @api.response(200, "Metadata send", fields.Raw) + @api.marshal_with(group_result) + @api.doc("enc_group") + def get(self, material_id, group_type, group_id): + """Used to query detailed information for a specific calculation group. + """ + # Find entries for the given material, which have EOS or parameter + # variation hashes set. + if group_type == "eos": + group_id_source = "encyclopedia.method.group_eos_id" + elif group_type == "par": + group_id_source = "encyclopedia.method.group_parametervariation_id" + else: + abort(400, message="Unsupported group type.") + + bool_query = Q( + "bool", + filter=enc_filter + [ + Q("term", encyclopedia__material__material_id=material_id), + Q("term", **{group_id_source: group_id}), + ], + ) + + s = Search(index=config.elastic.index_name) + s = s.query(bool_query) + + # calc_id and energy should be extracted for each matched document. The + # documents are sorted by energy so that the minimum energy one can be + # easily extracted. A maximum request size is set in order to limit the + # result size. ES also has an index-level property + # "index.max_inner_result_window" that limits the number of results + # that an inner result can contain. + energy_aggregation = A( + "top_hits", + _source=group_source, + sort=[{"encyclopedia.properties.energies.energy_total": {"order": "asc"}}], + size=100, + ) + s.aggs.bucket("groups_eos", energy_aggregation) + + # We ignore the top level hits + s = s.extra(**{ + "size": 0, + }) + + # Collect information for each group from the aggregations + response = s.execute() + + hits = response.aggs.groups_eos.hits + calculations = [doc.calc_id for doc in hits] + energies = [doc.encyclopedia.properties.energies.energy_total for doc in hits] + volumes = [doc.encyclopedia.material.idealized_structure.cell_volume for doc in hits] + group_dict = { + "calculations": calculations, + "energies": energies, + "volumes": volumes, + } + + return group_dict, 200 + + +suggestions_map = { + "code_name": "dft.code_name", + "structure_type": "encyclopedia.material.bulk.structure_type", +} +suggestions_query = api.parser() +suggestions_query.add_argument( + "property", + type=str, + choices=("code_name", "structure_type"), + help="The property name for which suggestions are returned.", + location="args" +) +suggestions_result = api.model("suggestions_result", { + "code_name": fields.List(fields.String), + "structure_type": fields.List(fields.String), +}) + + +@ns.route("/suggestions") +class EncSuggestionsResource(Resource): + @api.response(404, "Suggestion not found") + @api.response(400, "Bad request") + @api.response(200, "Metadata send", fields.Raw) + @api.expect(suggestions_query, validate=False) + @api.marshal_with(suggestions_result, skip_none=True) + @api.doc("enc_suggestions") + def get(self): + + # Parse request arguments + args = suggestions_query.parse_args() + prop = args.get("property", None) + + # Use aggregation to return all unique terms for the requested field. + # Without using composite aggregations there is a size limit for the + # number of aggregation buckets. This should, however, not be a problem + # since the number of unique values is low for all supported properties. + s = Search(index=config.elastic.index_name) + query = Q( + "bool", + filter=enc_filter + ) + s = s.query(query) + s = s.extra(**{ + "size": 0, + }) + + terms_agg = A("terms", field=suggestions_map[prop]) + s.aggs.bucket("suggestions", terms_agg) + + # Gather unique values into a list + response = s.execute() + suggestions = [x.key for x in response.aggs.suggestions.buckets] + + return {prop: suggestions}, 200 + + +calc_prop_map = { + "calc_id": "calc_id", + "code_name": "dft.code_name", + "code_version": "dft.code_version", + "functional_type": "encyclopedia.method.functional_type", + "basis_set_type": "dft.basis_set", + "core_electron_treatment": "encyclopedia.method.core_electron_treatment", + "run_type": "encyclopedia.calculation.calculation_type", + "has_dos": "encyclopedia.properties.electronic_dos", + "has_band_structure": "encyclopedia.properties.electronic_band_structure", + "has_thermal_properties": "encyclopedia.properties.thermodynamical_properties", + "has_phonon_dos": "encyclopedia.properties.phonon_dos", + "has_phonon_band_structure": "encyclopedia.properties.phonon_band_structure", +} +calculation_result = api.model("calculation_result", { + "calc_id": fields.String, + "code_name": fields.String, + "code_version": fields.String, + "functional_type": fields.String, + "basis_set_type": fields.String, + "core_electron_treatment": fields.String, + "run_type": fields.String, + "has_dos": fields.Boolean, + "has_band_structure": fields.Boolean, + "has_thermal_properties": fields.Boolean, + "has_phonon_dos": fields.Boolean, + "has_phonon_band_structure": fields.Boolean, +}) +representatives_result = api.model("representatives_result", { + "idealized_structure": fields.String, + "electronic_band_structure": fields.String, + "electronic_dos": fields.String, + "thermodynamical_properties": fields.String, +}) +calculations_result = api.model("calculations_result", { + "total_results": fields.Integer, + "pages": fields.Nested(pages_result), + "results": fields.List(fields.Nested(calculation_result)), + "representatives": fields.Nested(representatives_result, skip_none=True), +}) + + +@ns.route("/materials/<string:material_id>/calculations") +class EncCalculationsResource(Resource): + @api.response(404, "Suggestion not found") + @api.response(400, "Bad request") + @api.response(200, "Metadata send", fields.Raw) + @api.doc("enc_calculations") + def get(self, material_id): + """Used to return all calculations related to the given material. Also + returns a representative calculation for each property shown in the + overview page. + """ + s = Search(index=config.elastic.index_name) + query = Q( + "bool", + filter=enc_filter + [ + Q("term", encyclopedia__material__material_id=material_id), + ] + ) + s = s.query(query) + + # The query is filtered already on the ES side so we don"t need to + # transfer so much data. + s = s.extra(**{ + "_source": {"includes": list(calc_prop_map.values()) + ["dft.xc_functional"]}, + "size": 10000, + "from": 0, + }) + response = s.execute() + + # No such material + if len(response) == 0: + abort(404, message="There is no material {}".format(material_id)) + + # Add representative properties. It might be possible to write a custom + # ES scoring mechanism or aggregation to also perform the selection. + representatives = {} + + def calc_score(entry): + """Custom scoring function used to sort results by their + "quality". Currently built to mimic the scoring that was used + in the old Encyclopedia GUI. + """ + score = 0 + functional_score = { + "GGA": 100 + } + code_score = { + "FHI-aims": 3, + "VASP": 2, + "Quantum Espresso": 1, + } + code_name = entry.dft.code_name + functional = entry.dft.xc_functional + has_dos = rgetattr(entry, "encyclopedia.properties.electronic_band_structure") is not None + has_bs = rgetattr(entry, "encyclopedia.properties.electronic_dos") is not None + score += functional_score.get(functional, 0) + score += code_score.get(code_name, 0) + if has_dos and has_bs: + score += 10 + + return score + + # The calculations are first sorted by "quality" + sorted_calc = sorted(response, key=lambda x: calc_score(x), reverse=True) + + # Get the requested representative properties + representatives["idealized_structure"] = sorted_calc[0].calc_id + thermo_found = False + bs_found = False + dos_found = False + for calc in sorted_calc: + if rgetattr(calc, "encyclopedia.properties.thermodynamical_properties") is not None: + representatives["thermodynamical_properties"] = calc.calc_id + thermo_found = True + if rgetattr(calc, "encyclopedia.properties.electronic_band_structure") is not None: + representatives["electronic_band_structure"] = calc.calc_id + bs_found = True + if rgetattr(calc, "encyclopedia.properties.electronic_dos") is not None: + representatives["electronic_dos"] = calc.calc_id + dos_found = True + if thermo_found and bs_found and dos_found: + break + + # Create result JSON + results = [] + for entry in response: + calc_dict = get_es_doc_values(entry, calc_prop_map) + calc_dict["has_dos"] = calc_dict["has_dos"] is not None + calc_dict["has_band_structure"] = calc_dict["has_band_structure"] is not None + calc_dict["has_thermal_properties"] = calc_dict["has_thermal_properties"] is not None + calc_dict["has_phonon_dos"] = calc_dict["has_phonon_dos"] is not None + calc_dict["has_phonon_band_structure"] = calc_dict["has_phonon_band_structure"] is not None + results.append(calc_dict) + + result = { + "total_results": len(results), + "results": results, + "representatives": representatives, + } + + return result, 200 + + +histogram = api.model("histogram", { + "occurrences": fields.List(fields.Integer), + "values": fields.List(fields.Float), +}) +statistics_query = api.model("statistics_query", { + "calculations": fields.List(fields.String), + "properties": fields.List(fields.String), + "n_histogram_bins": fields.Integer, +}) +statistics = api.model("statistics", { + "min": fields.Float, + "max": fields.Float, + "avg": fields.Float, + "histogram": fields.Nested(histogram, skip_none=True) +}) +statistics_result = api.model("statistics_result", { + "cell_volume": fields.Nested(statistics, skip_none=True), + "atomic_density": fields.Nested(statistics, skip_none=True), + "mass_density": fields.Nested(statistics, skip_none=True), + "lattice_a": fields.Nested(statistics, skip_none=True), + "lattice_b": fields.Nested(statistics, skip_none=True), + "lattice_c": fields.Nested(statistics, skip_none=True), + "alpha": fields.Nested(statistics, skip_none=True), + "beta": fields.Nested(statistics, skip_none=True), + "gamma": fields.Nested(statistics, skip_none=True), + "band_gap": fields.Nested(statistics, skip_none=True), +}) +property_map = { + "cell_volume": "encyclopedia.material.idealized_structure.cell_volume", + "atomic_density": "encyclopedia.properties.atomic_density", + "mass_density": "encyclopedia.properties.mass_density", + "lattice_a": "encyclopedia.material.idealized_structure.lattice_parameters.a", + "lattice_b": "encyclopedia.material.idealized_structure.lattice_parameters.b", + "lattice_c": "encyclopedia.material.idealized_structure.lattice_parameters.c", + "alpha": "encyclopedia.material.idealized_structure.lattice_parameters.alpha", + "beta": "encyclopedia.material.idealized_structure.lattice_parameters.beta", + "gamma": "encyclopedia.material.idealized_structure.lattice_parameters.gamma", + "band_gap": "encyclopedia.properties.band_gap", +} + + +@ns.route("/materials/<string:material_id>/statistics") +class EncStatisticsResource(Resource): + @api.response(404, "Suggestion not found") + @api.response(400, "Bad request") + @api.response(200, "Metadata send", fields.Raw) + @api.expect(statistics_query, validate=False) + @api.marshal_with(statistics_result, skip_none=True) + @api.doc("enc_statistics") + def post(self, material_id): + """Used to return statistics related to the specified material and + calculations. + """ + # Get query parameters as json + try: + data = marshal(request.get_json(), statistics_query) + except Exception as e: + abort(400, message=str(e)) + + # Find entries for the given material. + bool_query = Q( + "bool", + filter=enc_filter + [ + Q("term", encyclopedia__material__material_id=material_id), + Q("terms", calc_id=data["calculations"]), + ] + ) + + s = Search(index=config.elastic.index_name) + s = s.query(bool_query) + s = s.extra(**{ + "size": 0, + }) + + # Add statistics aggregations for each requested property + properties = data["properties"] + for prop in properties: + stats_agg = A("stats", field=property_map[prop]) + s.aggs.bucket("{}_stats".format(prop), stats_agg) + + # No hits on the top query level + response = s.execute() + if response.hits.total == 0: + abort(404, message="Could not find matching calculations.") + + # Run a second query that creates histograms with fixed size buckets + # based on the min and max from previous query. Might make more sense + # to use the mean and sigma to define the range? + s = Search(index=config.elastic.index_name) + s = s.query(bool_query) + s = s.extra(**{ + "size": 0, + }) + n_bins = data["n_histogram_bins"] + for prop in properties: + stats = getattr(response.aggs, "{}_stats".format(prop)) + if stats.count == 0: + continue + interval = (stats.max * 1.001 - stats.min) / n_bins + if interval == 0: + interval = 1 + hist_agg = A("histogram", field=property_map[prop], interval=interval, offset=stats.min, min_doc_count=0) + s.aggs.bucket("{}_hist".format(prop), hist_agg) + response_hist = s.execute() + + # Return results + result = {} + for prop in properties: + stats = getattr(response.aggs, "{}_stats".format(prop)) + if stats.count == 0: + continue + hist = getattr(response_hist.aggs, "{}_hist".format(prop)) + occurrences = [x.doc_count for x in hist.buckets] + values = [x.key for x in hist.buckets] + result[prop] = { + "min": stats.min, + "max": stats.max, + "avg": stats.avg, + "histogram": { + "occurrences": occurrences, + "values": values, + } + } + + return result, 200 + + +wyckoff_variables_result = api.model("wyckoff_variables_result", { + "x": fields.Float, + "y": fields.Float, + "z": fields.Float, +}) +wyckoff_set_result = api.model("wyckoff_set_result", { + "wyckoff_letter": fields.String, + "indices": fields.List(fields.Integer), + "element": fields.String, + "variables": fields.Nested(wyckoff_variables_result, skip_none=True), +}) +lattice_parameters = api.model("lattice_parameters", { + "a": fields.Float, + "b": fields.Float, + "c": fields.Float, + "alpha": fields.Float, + "beta": fields.Float, + "gamma": fields.Float, +}) + +idealized_structure_result = api.model("idealized_structure_result", { + "atom_labels": fields.List(fields.String), + "atom_positions": fields.List(fields.List(fields.Float)), + "lattice_vectors": fields.List(fields.List(fields.Float)), + "lattice_vectors_primitive": fields.List(fields.List(fields.Float)), + "lattice_parameters": fields.Nested(lattice_parameters, skip_none=True), + "periodicity": fields.List(fields.Boolean), + "number_of_atoms": fields.Integer, + "cell_volume": fields.Float, + "wyckoff_sets": fields.List(fields.Nested(wyckoff_set_result, skip_none=True)), +}) + +calculation_property_map = { + "lattice_parameters": { + "es_source": "encyclopedia.material.idealized_structure.lattice_parameters" + }, + "energies": { + "es_source": "encyclopedia.properties.energies", + }, + "mass_density": { + "es_source": "encyclopedia.properties.mass_density", + }, + "atomic_density": { + "es_source": "encyclopedia.properties.atomic_density", + }, + "cell_volume": { + "es_source": "encyclopedia.material.idealized_structure.cell_volume" + }, + "band_gap": { + "es_source": "encyclopedia.properties.band_gap" + }, + "electronic_band_structure": { + "es_source": "encyclopedia.properties.electronic_band_structure" + }, + "electronic_dos": { + "es_source": "encyclopedia.properties.electronic_dos" + }, + "phonon_band_structure": { + "es_source": "encyclopedia.properties.phonon_band_structure" + }, + "phonon_dos": { + "es_source": "encyclopedia.properties.phonon_dos" + }, + "thermodynamical_properties": { + "es_source": "encyclopedia.properties.thermodynamical_properties" + }, + "wyckoff_sets": { + "arch_source": "section_metadata/encyclopedia/material/idealized_structure/wyckoff_sets" + }, + "idealized_structure": { + "arch_source": "section_metadata/encyclopedia/material/idealized_structure" + }, +} + +calculation_property_query = api.model("calculation_query", { + "properties": fields.List(fields.String), +}) +energies = api.model("energies", { + "energy_total": fields.Float, + "energy_total_T0": fields.Float, + "energy_free": fields.Float, +}) +electronic_band_structure = api.model("electronic_band_structure", { + "reciprocal_cell": fields.List(fields.List(fields.Float)), + "brillouin_zone": fields.Raw, + "section_k_band_segment": fields.Raw, + "section_band_gap": fields.Raw, +}) +electronic_dos = api.model("electronic_dos", { + "dos_energies": fields.List(fields.Float), + "dos_values": fields.List(fields.List(fields.Float)), +}) +calculation_property_result = api.model("calculation_property_result", { + "lattice_parameters": fields.Nested(lattice_parameters, skip_none=True), + "energies": fields.Nested(energies, skip_none=True), + "mass_density": fields.Float, + "atomic_density": fields.Float, + "cell_volume": fields.Float, + "wyckoff_sets": fields.Nested(wyckoff_set_result, skip_none=True), + "idealized_structure": fields.Nested(idealized_structure_result, skip_none=True), + "band_gap": fields.Float, + "electronic_band_structure": fields.Nested(electronic_band_structure, skip_none=True), + "electronic_dos": fields.Nested(electronic_dos, skip_none=True), + "phonon_band_structure": fields.Raw, + "phonon_dos": fields.Raw, + "thermodynamical_properties": fields.Raw, +}) + + +@ns.route("/materials/<string:material_id>/calculations/<string:calc_id>") +class EncCalculationResource(Resource): + @api.response(404, "Material or calculation not found") + @api.response(400, "Bad request") + @api.response(200, "Metadata send", fields.Raw) + @api.expect(calculation_property_query, validate=False) + @api.marshal_with(calculation_property_result, skip_none=True) + @api.doc("enc_calculation") + def post(self, material_id, calc_id): + """Used to return calculation details. Some properties are not + available in the ES index and are instead read from the Archive + directly. + """ + # Get query parameters as json + try: + data = marshal(request.get_json(), calculation_property_query) + except Exception as e: + abort(400, message=str(e)) + + s = Search(index=config.elastic.index_name) + query = Q( + "bool", + filter=enc_filter + [ + Q("term", encyclopedia__material__material_id=material_id), + Q("term", calc_id=calc_id), + ] + ) + s = s.query(query) + + # Create dictionaries for requested properties + references = [] + properties = data["properties"] + arch_properties = {} + es_properties = {} + ref_properties = set(( + "electronic_dos", + "electronic_band_structure", + "thermodynamical_properties", + "phonon_dos", + "phonon_band_structure", + )) + for prop in properties: + es_source = calculation_property_map[prop].get("es_source") + if es_source is not None: + es_properties[prop] = es_source + if prop in ref_properties: + references.append(prop) + arch_source = calculation_property_map[prop].get("arch_source") + if arch_source is not None: + arch_properties[prop] = arch_source + + # The query is filtered already on the ES side so we don't need to + # transfer so much data. + sources = [ + "upload_id", + "calc_id", + "encyclopedia", + ] + sources += list(es_properties.values()) + + s = s.extra(**{ + "_source": {"includes": sources}, + "size": 1, + }) + + response = s.execute() + + # No such material + if len(response) == 0: + abort(404, message="There is no material {} with calculation {}".format(material_id, calc_id)) + + # Add references that are to be read from the archive + for ref in references: + arch_path = response[0] + arch_path = rgetattr(arch_path, es_properties[ref]) + if arch_path is not None: + arch_properties[ref] = arch_path + del es_properties[ref] + + # If any of the requested properties require data from the Archive, the + # file is opened and read. + result = {} + if len(arch_properties) != 0: + entry = response[0] + upload_id = entry.upload_id + calc_id = entry.calc_id + root = read_archive( + upload_id, + calc_id, + ) + + # Add results from archive + for key, arch_path in arch_properties.items(): + value = root[arch_path] + + # Save derived properties and turn into dict + if key == "thermodynamical_properties": + specific_heat_capacity = value.specific_heat_capacity.magnitude.tolist() + specific_free_energy = value.specific_vibrational_free_energy_at_constant_volume.magnitude.tolist() + if isinstance(value, list): + value = [x.m_to_dict() for x in value] + else: + value = value.m_to_dict() + if key == "thermodynamical_properties": + value["specific_heat_capacity"] = specific_heat_capacity + value["specific_vibrational_free_energy_at_constant_volume"] = specific_free_energy + + # DOS results are simplified. + if key == "electronic_dos": + if "dos_energies_normalized" in value: + value["dos_energies"] = value["dos_energies_normalized"] + del value["dos_energies_normalized"] + if "dos_values_normalized" in value: + value["dos_values"] = value["dos_values_normalized"] + del value["dos_values_normalized"] + + # Pre-calculate k-path length to be used as x-coordinate in + # plots. If the VBM and CBM information is needed later, it + # can be added as indices along the path. The exact + # k-points and occupations are removed to save band width. + if key == "electronic_band_structure" or key == "phonon_band_structure": + segments = value["section_k_band_segment"] + k_path_length = 0 + for segment in segments: + k_points = np.array(segment["band_k_points"]) + segment_length = np.linalg.norm(k_points[-1, :] - k_points[0, :]) + k_path_distances = k_path_length + np.linalg.norm(k_points - k_points[0, :], axis=1) + k_path_length += segment_length + segment["k_path_distances"] = k_path_distances.tolist() + del segment["band_k_points"] + if "band_occupations" in segment: + del segment["band_occupations"] + + result[key] = value + + # Add results from ES + for prop, es_source in es_properties.items(): + value = rgetattr(response[0], es_source) + if value is not None: + if isinstance(value, AttrDict): + value = value.to_dict() + result[prop] = value + + return result, 200 + + +def read_archive(upload_id: str, calc_id: str) -> EntryArchive: + """Used to read data from the archive. + + Args: + upload_id: Upload id. + calc_id: Calculation id. + + Returns: + MSection: The section_run as MSection + For each path, a dictionary containing the path as key and the returned + section as value. + """ + upload_files = files.PublicUploadFiles(upload_id) + with upload_files.read_archive(calc_id, access="public") as archive: + data = archive[calc_id] + root = EntryArchive.m_from_dict(data.to_dict()) + + return root diff --git a/nomad/atomutils.py b/nomad/atomutils.py index 6ec85c0ccab7d7ba76b940bea8d7a530e85a58cb..e7cd4ced73a64e3d7036d6e81f34aba899d625f5 100644 --- a/nomad/atomutils.py +++ b/nomad/atomutils.py @@ -41,6 +41,19 @@ def get_summed_atomic_mass(atomic_numbers: np.ndarray) -> float: return mass +def get_volume(parallelepiped: np.ndarray) -> float: + """Calculates a volume of the given parallelepiped. + + Args: + cell: The parallellepiped as 3x3 matrix with cell basis vectors as + rows. + + Returns: + The cell volume. + """ + return np.abs(np.linalg.det(parallelepiped)) + + def find_match(pos: np.array, positions: np.array, eps: float) -> Union[int, None]: """Attempts to find a position within a larger list of positions. @@ -220,7 +233,7 @@ def get_hill_decomposition(atom_labels: np.ndarray, reduced: bool = False) -> Tu def get_formula_string(symbols: List[str], counts: List[int]) -> str: - """Used to form a single formula string from a list of chemical speices and + """Used to form a single formula string from a list of chemical species and their counts. Args: diff --git a/nomad/client.py b/nomad/client.py index 1b73feeaa9ffbe5f4af81bc45c7ff8077c2dfffa..ba229a0d9a858e2b13bb49bbc6939024e888e0d8 100644 --- a/nomad/client.py +++ b/nomad/client.py @@ -151,6 +151,7 @@ from io import StringIO from nomad import config from nomad import metainfo as mi +from nomad.units import ureg from nomad.datamodel import EntryArchive # TODO this import is necessary to load all metainfo defintions that the parsers are using @@ -199,11 +200,11 @@ class ApiStatistics(mi.MSection): description='Number of entries loaded in the last api call') last_response_data_size = mi.Quantity( - type=int, unit=mi.units.bytes, default=0, + type=int, unit=ureg.bytes, default=0, description='Bytes loaded in the last api call') loaded_data_size = mi.Quantity( - type=int, unit=mi.units.bytes, default=0, + type=int, unit=ureg.bytes, default=0, description='Bytes loaded from this query') loaded_nentries = mi.Quantity( diff --git a/nomad/datamodel/datamodel.py b/nomad/datamodel/datamodel.py index c6eea77205aa1e8c53bca8dba9ca8e49578939ab..1ef64549881a471157591b2881546507f04c4c90 100644 --- a/nomad/datamodel/datamodel.py +++ b/nomad/datamodel/datamodel.py @@ -20,13 +20,13 @@ from elasticsearch_dsl import Keyword, Text, analyzer, tokenizer import ase.data from nomad import metainfo, config -from nomad.metainfo.encyclopedia import section_encyclopedia from nomad.metainfo.search_extension import Search from nomad.metainfo.elastic_extension import ElasticDocument from nomad.metainfo.mongoengine_extension import Mongo, MongoDocument from .dft import DFTMetadata from .ems import EMSMetadata +from .encyclopedia import EncyclopediaMetadata from .metainfo.public import section_run from .metainfo.general_experimental import section_experiment @@ -449,12 +449,15 @@ class EntryMetadata(metainfo.MSection): ems = metainfo.SubSection(sub_section=EMSMetadata, a_search='ems') dft = metainfo.SubSection(sub_section=DFTMetadata, a_search='dft') + encyclopedia = metainfo.SubSection(sub_section=EncyclopediaMetadata, a_search='encyclopedia') def apply_user_metadata(self, metadata: dict): ''' Applies a user provided metadata dict to this calc. ''' self.m_update(**metadata) def apply_domain_metadata(self, backend): + """Used to apply metadata that is related to the domain. + """ assert self.domain is not None, 'all entries must have a domain' domain_sub_section_def = self.m_def.all_sub_sections.get(self.domain) domain_section_def = domain_sub_section_def.sub_section @@ -473,7 +476,6 @@ class EntryArchive(metainfo.MSection): section_run = metainfo.SubSection(sub_section=section_run, repeats=True) section_experiment = metainfo.SubSection(sub_section=section_experiment) section_metadata = metainfo.SubSection(sub_section=EntryMetadata) - section_encyclopedia = metainfo.SubSection(sub_section=section_encyclopedia) processing_logs = metainfo.Quantity( type=Any, shape=['0..*'], diff --git a/nomad/metainfo/encyclopedia.py b/nomad/datamodel/encyclopedia.py similarity index 71% rename from nomad/metainfo/encyclopedia.py rename to nomad/datamodel/encyclopedia.py index f516652f5992902622a519809a2de86af0d1b074..d3c8dc3eb7096260b434e704dd56a848567fe2ff 100644 --- a/nomad/metainfo/encyclopedia.py +++ b/nomad/datamodel/encyclopedia.py @@ -1,31 +1,31 @@ import numpy as np -from elasticsearch_dsl import InnerDoc from nomad.metainfo import MSection, Section, SubSection, Quantity, MEnum, Reference from nomad.datamodel.metainfo.public import section_k_band, section_dos, section_thermodynamical_properties +from nomad.metainfo.search_extension import Search +from elasticsearch_dsl import Text, Keyword class WyckoffVariables(MSection): m_def = Section( a_flask=dict(skip_none=True), - a_elastic=dict(type=InnerDoc), description=""" Contains the variables associated with a Wyckoff set. """ ) x = Quantity( - type=float, + type=np.dtype(np.float64), description=""" The x variable if present. """ ) y = Quantity( - type=float, + type=np.dtype(np.float64), description=""" The y variable if present. """ ) z = Quantity( - type=float, + type=np.dtype(np.float64), description=""" The z variable if present. """ @@ -35,7 +35,6 @@ class WyckoffVariables(MSection): class WyckoffSet(MSection): m_def = Section( a_flask=dict(skip_none=True), - a_elastic=dict(type=InnerDoc), description=""" Section for storing Wyckoff set information. """ @@ -62,17 +61,71 @@ class WyckoffSet(MSection): variables = SubSection(sub_section=WyckoffVariables.m_def, repeats=False) +class LatticeParameters(MSection): + m_def = Section( + a_flask=dict(skip_none=True), + description=""" + Lattice parameters of the idealized cell. The lattice parameters can + only be reported consistently after idealization and may not perfectly + correspond to the original simulation cell. + """, + a_search="lattice_parameters" + ) + a = Quantity( + type=float, + description=""" + Length of the first basis vector. + """, + a_search=Search() + ) + b = Quantity( + type=float, + description=""" + Length of the second basis vector. + """, + a_search=Search() + ) + c = Quantity( + type=float, + description=""" + Length of the third basis vector. + """, + a_search=Search() + ) + alpha = Quantity( + type=float, + description=""" + Angle between second and third basis vector. + """, + a_search=Search() + ) + beta = Quantity( + type=float, + description=""" + Angle between first and third basis vector. + """, + a_search=Search() + ) + gamma = Quantity( + type=float, + description=""" + Angle between first and second basis vector. + """, + a_search=Search() + ) + + class IdealizedStructure(MSection): m_def = Section( a_flask=dict(skip_none=True), - a_elastic=dict(type=InnerDoc), description=""" Contains structural information for an idealized representation of the material used in the calculation. This idealization is used for visualizing the material and for calculating the structural properties. The properties of the idealized structure may slightly vary from the original structure used in the calculation. - """ + """, + a_search="idealized_structure", ) atom_labels = Quantity( type=str, @@ -107,15 +160,6 @@ class IdealizedStructure(MSection): idealized to match the detected symmemtry properties. """ ) - lattice_parameters = Quantity( - type=np.dtype(np.float64), - shape=[6], - description=""" - Lattice parameters of the idealized cell. The lattice parameters can - only be reported consistently after idealization and may not perfectly - correspond to the original simulation cell. - """ - ) periodicity = Quantity( type=np.bool, shape=[3], @@ -131,19 +175,23 @@ class IdealizedStructure(MSection): """ ) cell_volume = Quantity( - type=float, + type=np.dtype(np.float64), + unit="m ** 3", description=""" Volume of the idealized cell. The cell volume can only be reported consistently after idealization and may not perfectly correspond to the original simulation cell. - """ + """, + a_search=Search() ) + wyckoff_sets = SubSection(sub_section=WyckoffSet.m_def, repeats=True) + lattice_parameters = SubSection(sub_section=LatticeParameters.m_def) class Bulk(MSection): m_def = Section( a_flask=dict(skip_none=True), - a_elastic=dict(type=InnerDoc), + a_search="bulk", description=""" Contains information that is specific to bulk crystalline materials. """ @@ -172,13 +220,15 @@ class Bulk(MSection): I = Body centered R = Rhombohedral centring F = All faces centred - """ + """, + a_search=Search() ) crystal_system = Quantity( type=MEnum("triclinic", "monoclinic", "orthorhombic", "tetragonal", "trigonal", "hexagonal", "cubic"), description=""" The detected crystal system. One of seven possibilities in three dimensions. - """ + """, + a_search=Search() ) has_free_wyckoff_parameters = Quantity( type=bool, @@ -187,54 +237,60 @@ class Bulk(MSection): materials has free Wyckoff parameters, at least some of the atoms are not bound to a particular location in the structure but are allowed to move with possible restrictions set by the symmetry. - """ + """, + a_search=Search() ) point_group = Quantity( type=MEnum("1", "-1", "2", "m", "2/m", "222", "mm2", "mmm", "4", "-4", "4/m", "422", "4mm", "-42m", "4/mmm", "3", "-3", "32", "3m", "-3m", "6", "-6", "6/m", "622", "6mm", "-6m2", "6/mmm", "23", "m-3", "432", "-43m", "m-3m"), description=""" Point group in Hermann-Mauguin notation, part of crystal structure classification. There are 32 point groups in three dimensional space. - """ + """, + a_search=Search() ) space_group_number = Quantity( type=int, description=""" Integer representation of the space group, part of crystal structure classification, part of material definition. - """ + """, + a_search=Search() ) space_group_international_short_symbol = Quantity( type=str, description=""" International short symbol notation of the space group. - """ + """, + a_search=Search() ) structure_prototype = Quantity( type=str, description=""" The prototypical material for this crystal structure. - """ + """, + a_search=Search() ) structure_type = Quantity( type=str, description=""" Classification according to known structure type, considering the point group of the crystal and the occupations with different atom types. - """ + """, + a_search=Search() ) strukturbericht_designation = Quantity( type=str, description=""" Classification of the material according to the historically grown "strukturbericht". - """ + """, + a_search=Search() ) - wyckoff_sets = SubSection(sub_section=WyckoffSet.m_def, repeats=True) class Material(MSection): m_def = Section( a_flask=dict(skip_none=True), - a_elastic=dict(type=InnerDoc), + a_search="material", description=""" Contains an overview of the type of material that was detected in this entry. @@ -244,20 +300,23 @@ class Material(MSection): type=MEnum(bulk="bulk", two_d="2D", one_d="1D", unavailable="unavailable"), description=""" "Broad structural classification for the material, e.g. bulk, 2D, 1D... ", - """ + """, + a_search=Search() ) - material_hash = Quantity( + material_id = Quantity( type=str, description=""" A fixed length, unique material identifier in the form of a hash digest. - """ + """, + a_search=Search() ) material_name = Quantity( type=str, description=""" Most meaningful name for a material. - """ + """, + a_search=Search() ) material_classification = Quantity( type=str, @@ -272,28 +331,46 @@ class Material(MSection): Formula giving the composition and occurrences of the elements in the Hill notation. For periodic materials the formula is calculated fom the primitive unit cell. - """ + """, + a_search=Search() ) formula_reduced = Quantity( type=str, description=""" Formula giving the composition and occurrences of the elements in the - Hill notation whre the number of occurences have been divided by the + Hill notation where the number of occurences have been divided by the greatest common divisor. - """ + """, + a_search=Search() + ) + species_and_counts = Quantity( + type=str, + description=""" + The formula separated into individual terms containing both the atom + type and count. Used for searching parts of a formula. + """, + a_search=Search(mapping=Text(multi=True, fields={'keyword': Keyword()})) + ) + species = Quantity( + type=str, + description=""" + The formula separated into individual terms containing only unique atom + species. Used for searching materials containing specific elements. + """, + a_search=Search(mapping=Text(multi=True, fields={'keyword': Keyword()})) ) - - # The idealized structure for this material - idealized_structure = SubSection(sub_section=IdealizedStructure.m_def, repeats=False) # Bulk-specific properties bulk = SubSection(sub_section=Bulk.m_def, repeats=False) + # The idealized structure for this material + idealized_structure = SubSection(sub_section=IdealizedStructure.m_def, repeats=False) + class Method(MSection): m_def = Section( a_flask=dict(skip_none=True), - a_elastic=dict(type=InnerDoc), + a_search="method", description=""" Contains an overview of the methodology that was detected in this entry. @@ -303,33 +380,31 @@ class Method(MSection): type=MEnum("DFT", "GW", "unavailable", DFTU="DFT+U"), description=""" Generic name for the used methodology. - """ - ) - basis_set_type = Quantity( - type=MEnum("Numeric AOs", "Gaussians", "(L)APW+lo", "FLAPW (full-potential linearized augmented planewave)", "Plane waves", "Real-space grid", "Local-orbital minimum-basis"), - description=""" - Basic type of the used basis set. - """ + """, + a_search=Search() ) core_electron_treatment = Quantity( type=MEnum("full all electron", "all electron frozen core", "pseudopotential", "unavailable"), description=""" How the core electrons are described. - """ + """, + a_search=Search() ) functional_long_name = Quantity( type=str, description=""" Full identified for the used exchange-correlation functional. - """ + """, + a_search=Search() ) functional_type = Quantity( type=str, description=""" Basic type of the used exchange-correlation functional. - """ + """, + a_search=Search() ) - method_hash = Quantity( + method_id = Quantity( type=str, description=""" A fixed length, unique method identifier in the form of a hash digest. @@ -338,22 +413,24 @@ class Method(MSection): for the used program. """ ) - group_eos_hash = Quantity( + group_eos_id = Quantity( type=str, description=""" A fixed length, unique identifier for equation-of-state calculations. Only calculations within the same upload and with a method hash available will be grouped under the same hash. - """ + """, + a_search=Search() ) - group_parametervariation_hash = Quantity( + group_parametervariation_id = Quantity( type=str, description=""" A fixed length, unique identifier for calculations where structure is identical but the used computational parameters are varied. Only calculations within the same upload and with a method hash available will be grouped under the same hash. - """ + """, + a_search=Search() ) gw_starting_point = Quantity( type=str, @@ -375,7 +452,7 @@ class Method(MSection): """ ) smearing_parameter = Quantity( - type=float, + type=np.dtype(np.float64), description=""" Parameter for smearing, usually the width. """ @@ -385,7 +462,7 @@ class Method(MSection): class Calculation(MSection): m_def = Section( a_flask=dict(skip_none=True), - a_elastic=dict(type=InnerDoc), + a_search="calculation", description=""" Contains an overview of the type of calculation that was detected in this entry. @@ -405,90 +482,147 @@ class Calculation(MSection): unavailable="unavailable"), description=""" Defines the type of calculation that was detected for this entry. + """, + a_search=Search() + ) + + +class Energies(MSection): + m_def = Section( + a_flask=dict(skip_none=True), + a_search="energies", + description=""" + Contains different types of energies extracted from this entry. The + energies are extracted from a representative calculation: for geometry + optimization it is the last optimization step. """ ) + energy_total = Quantity( + type=np.dtype(np.float64), + unit="eV", + description=""" + Total energy. + """, + a_search=Search() + ) + energy_total_T0 = Quantity( + type=np.dtype(np.float64), + unit="eV", + description=""" + Total energy projected to T=0. + """, + a_search=Search() + ) + energy_free = Quantity( + type=np.dtype(np.float64), + unit="eV", + description=""" + Free energy. + """, + a_search=Search() + ) class Properties(MSection): m_def = Section( a_flask=dict(skip_none=True), - a_elastic=dict(type=InnerDoc), + a_search="properties", description=""" Contains derived physical properties that are specific to the NOMAD Encyclopedia. """ ) atomic_density = Quantity( - type=float, + type=np.dtype(np.float64), unit="1 / m ** 3", description=""" Atomic density of the material (atoms/volume)." - """ + """, + a_search=Search() ) mass_density = Quantity( - type=float, + type=np.dtype(np.float64), unit="kg / m ** 3", description=""" Mass density of the material. - """ + """, + a_search=Search() ) - energies = Quantity( - type=str, + band_gap = Quantity( + type=np.dtype(np.float64), + unit="eV", description=""" - Code dependent energy values, corrected to be per formula unit. - """ + Band gap value. If multiple spin channels are present, this value is + taken from the channel with smallest band gap value. + """, + a_search=Search() + ) + band_gap_direct = Quantity( + type=bool, + description=""" + Whether band gap is direct or not. If multiple spin channels are + present, this value is taken from the channel with smallest band gap + value. + """, + a_search=Search() ) + energies = SubSection(sub_section=Energies.m_def, repeats=False, a_search='energies') electronic_band_structure = Quantity( type=Reference(section_k_band.m_def), shape=[], description=""" Reference to an electronic band structure. - """ + """, + a_search=Search(value=lambda section: section.electronic_band_structure.m_proxy_url if section.electronic_band_structure is not None else None, mapping=Keyword()) ) electronic_dos = Quantity( type=Reference(section_dos.m_def), shape=[], description=""" Reference to an electronic density of states. - """ + """, + a_search=Search(value=lambda section: section.electronic_dos.m_proxy_url if section.electronic_dos is not None else None, mapping=Keyword()) ) phonon_band_structure = Quantity( type=Reference(section_k_band.m_def), shape=[], description=""" Reference to a phonon band structure. - """ + """, + a_search=Search(value=lambda section: section.phonon_band_structure.m_proxy_url if section.phonon_band_structure is not None else None, mapping=Keyword()) ) phonon_dos = Quantity( type=Reference(section_dos.m_def), shape=[], description=""" Reference to a phonon density of states. - """ + """, + a_search=Search(value=lambda section: section.phonon_dos.m_proxy_url if section.phonon_dos is not None else None, mapping=Keyword()) ) thermodynamical_properties = Quantity( type=Reference(section_thermodynamical_properties.m_def), shape=[], description=""" Reference to a section containing thermodynamical properties. - """ + """, + a_search=Search(value=lambda section: section.thermodynamical_properties.m_proxy_url if section.thermodynamical_properties is not None else None, mapping=Keyword()) ) -class section_encyclopedia(MSection): +class EncyclopediaMetadata(MSection): m_def = Section( a_flask=dict(skip_none=True), - a_elastic=dict(type=InnerDoc), + a_search='encyclopedia', description=""" Section which stores information for the NOMAD Encyclopedia. """ ) - material = SubSection(sub_section=Material.m_def, repeats=False) - method = SubSection(sub_section=Method.m_def, repeats=False) - properties = SubSection(sub_section=Properties.m_def, repeats=False) - calculation = SubSection(sub_section=Calculation.m_def, repeats=False) + material = SubSection(sub_section=Material.m_def, repeats=False, a_search='material') + method = SubSection(sub_section=Method.m_def, repeats=False, a_search='method') + properties = SubSection(sub_section=Properties.m_def, repeats=False, a_search='properties') + calculation = SubSection(sub_section=Calculation.m_def, repeats=False, a_search='calculation') status = Quantity( - type=MEnum("success", "unsupported_material_type", "unsupported_calculation_type", "invalid_metainfo", "failure"), + type=MEnum("success", "unsupported_material_type", "unsupported_method_type", "unsupported_calculation_type", "invalid_metainfo", "failure"), description=""" The final Encyclopedia processing status for this entry. The meaning of the status is as follows: @@ -499,5 +633,6 @@ class section_encyclopedia(MSection): | `"unsupported_calculation_type"` | The detected calculation type is currenlty not supported by the Encyclopedia. | | `"invalid_metainfo"` | The entry could not be processed due to missing or invalid metainfo. | | `"failure"` | The entry could not be processed due to an unexpected exception. | - """ + """, + a_search=Search() ) diff --git a/nomad/datamodel/metainfo/public.py b/nomad/datamodel/metainfo/public.py index 4ba8f84726497b28902abcb8d0c1f194d0a29c1c..19148b4bd1b797faf3f5eb5c0d2267cf54adb59a 100644 --- a/nomad/datamodel/metainfo/public.py +++ b/nomad/datamodel/metainfo/public.py @@ -2269,6 +2269,28 @@ class section_band_gap(MSection): ) +class section_brillouin_zone(MSection): + '''Defines a polyhedra for the Brillouin zone in reciprocal space. + ''' + m_def = Section(validate=False, a_legacy=LegacyDefinition(name='section_brillouin_zone')) + + vertices = Quantity( + type=np.dtype(np.float64), + shape=[3, "1..*"], + description=''' + The vertices of the Brillouin zone corners as 3D coordinates in reciprocal space. + ''', + a_legacy=LegacyDefinition(name='vertices')) + faces = Quantity( + type=np.dtype(np.int32), + shape=["1..*", "3..*"], + description=''' + The faces of the Brillouin zone polyhedron as vertex indices. The + surface normal is determined by a right-hand ordering of the points. + ''', + a_legacy=LegacyDefinition(name='faces')) + + class section_k_band(MSection): ''' This section stores information on a $k$-band (electronic or vibrational band @@ -2297,22 +2319,10 @@ class section_k_band(MSection): a_legacy=LegacyDefinition(name='reciprocal_cell') ) - brillouin_zone = Quantity( - type=str, - description=""" - The Brillouin zone that corresponds to the reciprocal cell used in the - band calculation. The Brillouin Zone is defined as a list of vertices - and facets that are encoded with JSON. The vertices are 3D points in - the reciprocal space, and facets are determined by a chain of vertice - indices, with a right-hand ordering determining the surface normal - direction. - { - "vertices": [[3, 2, 1], ...] - "faces": [[0, 1, 2, 3], ...] - } - """, - a_legacy=LegacyDefinition(name='brillouin_zone') - ) + brillouin_zone = SubSection( + sub_section=SectionProxy('section_brillouin_zone'), + repeats=False, + a_legacy=LegacyDefinition(name='section_k_band_segment')) section_band_gap = SubSection( sub_section=section_band_gap.m_def, @@ -5306,6 +5316,32 @@ class section_thermodynamical_properties(MSection): ''', a_legacy=LegacyDefinition(name='vibrational_free_energy_at_constant_volume')) + @derived( + type=np.dtype(np.float64), + shape=['number_of_thermodynamical_property_values'], + unit='joule / kilogram', + description=''' + Stores the specific vibrational free energy at constant volume. + ''', + a_legacy=LegacyDefinition(name='specific_vibrational_free_energy_at_constant_volume'), + cached=True + ) + def specific_vibrational_free_energy_at_constant_volume(self) -> np.array: + """Returns the specific vibrational free energy by dividing the vibrational free energy per + cell with the mass of the atoms in the cell. + """ + import nomad.atomutils + s_frame_sequence = self.m_parent + first_frame = s_frame_sequence.frame_sequence_local_frames_ref[0] + system = first_frame.single_configuration_calculation_to_system_ref + atomic_numbers = system.atom_species + n_atoms = len(atomic_numbers) + mass_per_atom = nomad.atomutils.get_summed_atomic_mass(atomic_numbers) / n_atoms + free_energy = self.vibrational_free_energy_at_constant_volume + specific_vibrational_free_energy_at_constant_volume = free_energy / mass_per_atom + + return specific_vibrational_free_energy_at_constant_volume + class section_volumetric_data(MSection): ''' diff --git a/nomad/datamodel/optimade.py b/nomad/datamodel/optimade.py index de081078840a3ebe575f2bc517dad90f2e53e3d4..905a1e99645b418d12f52a8cb6ed4873813b4f50 100644 --- a/nomad/datamodel/optimade.py +++ b/nomad/datamodel/optimade.py @@ -1,8 +1,9 @@ from ase.data import chemical_symbols -from elasticsearch_dsl import Keyword, Float, InnerDoc, Nested +from elasticsearch_dsl import Keyword, Float, InnerDoc, Nested, Integer import numpy as np -from nomad.metainfo import MSection, Section, Quantity, SubSection, MEnum, units, DefinitionAnnotation +from nomad.units import ureg +from nomad.metainfo import MSection, Section, Quantity, SubSection, MEnum, DefinitionAnnotation from nomad.metainfo.search_extension import Search @@ -82,7 +83,7 @@ class Species(MSection): species). ''') - mass = Quantity(type=float, unit=units.amu, a_optimade=dict(entry='optional')) + mass = Quantity(type=float, unit=ureg.amu, a_optimade=dict(entry='optional')) original_name = Quantity(type=str, a_optimade=dict(entry='optional'), description=''' Can be any valid Unicode string, and SHOULD contain (if specified) the name of the @@ -172,7 +173,7 @@ class OptimadeEntry(MSection): dimension_types = Quantity( type=int, shape=[3], default=[0, 0, 0], links=optimade_links('h.6.2.8'), - a_search=Search(value=lambda a: sum(a.dimension_types)), + a_search=Search(value=lambda a: sum(a.dimension_types), mapping=Integer()), a_optimade=Optimade(query=True, entry=True), description=''' List of three integers. For each of the three directions indicated by the three lattice @@ -183,7 +184,7 @@ class OptimadeEntry(MSection): ''') lattice_vectors = Quantity( - type=np.dtype('f8'), shape=[3, 3], unit=units.angstrom, + type=np.dtype('f8'), shape=[3, 3], unit=ureg.angstrom, links=optimade_links('h.6.2.9'), a_optimade=Optimade(query=False, entry=True), description=''' @@ -191,7 +192,7 @@ class OptimadeEntry(MSection): ''') cartesian_site_positions = Quantity( - type=np.dtype('f8'), shape=['nsites', 3], unit=units.angstrom, + type=np.dtype('f8'), shape=['nsites', 3], unit=ureg.angstrom, links=optimade_links('h.6.2.10'), a_optimade=Optimade(query=False, entry=True), description=''' Cartesian positions of each site. A site is an atom, a site potentially occupied by diff --git a/nomad/metainfo/__init__.py b/nomad/metainfo/__init__.py index 031da060f8fd62424a9cf35c405fe7844ec24387..53da636f235499265cc99ea6f28233150c135111 100644 --- a/nomad/metainfo/__init__.py +++ b/nomad/metainfo/__init__.py @@ -288,7 +288,6 @@ from .metainfo import ( Reference, MResource, m_package, - units, Annotation, DefinitionAnnotation, SectionAnnotation, diff --git a/nomad/metainfo/elastic_extension.py b/nomad/metainfo/elastic_extension.py index 99f33675ca06d56f893f6429c80e673b2353de68..dec96961a992c8c19264a8d52449a239b8b91b3b 100644 --- a/nomad/metainfo/elastic_extension.py +++ b/nomad/metainfo/elastic_extension.py @@ -14,6 +14,8 @@ from typing import Callable, Any, Dict, cast import uuid +import numpy as np +import pint.quantity from .metainfo import ( @@ -37,13 +39,6 @@ class ElasticDocument(SectionAnnotation): classes, sub sections become inner documents, and quantities with the :class:`Elastic` extension become fields in their respective document. - Arguments: - index_name: This is used to optionally add the index_name to the resulting - elasticsearch_dsl document. - id: A callable that produces an id from a section instance that is used as id - for the respective elastic search index entry. The default will be randomly - generated UUID(4). - Attributes: document: The elasticsearch_dsl document class that was generated from the metainfo section @@ -52,6 +47,14 @@ class ElasticDocument(SectionAnnotation): _all_documents: Dict[str, Any] = {} def __init__(self, index_name: str = None, id: Callable[[Any], str] = None): + """ + Args: + index_name: This is used to optionally add the index_name to the resulting + elasticsearch_dsl document. + id: A callable that produces an id from a section instance that is used as id + for the respective elastic search index entry. The default will be randomly + generated UUID(4). + """ self.index_name = index_name self.id = id @@ -63,6 +66,8 @@ class ElasticDocument(SectionAnnotation): @classmethod def create_index_entry(cls, section: MSection): ''' Creates an elasticsearch_dsl document instance for the given section. ''' + from elasticsearch_dsl import Object + m_def = section.m_def annotation = m_def.m_get_annotations(ElasticDocument) document_cls = ElasticDocument._all_documents[m_def.qualified_name()] @@ -85,13 +90,17 @@ class ElasticDocument(SectionAnnotation): if value is None or value == []: continue - quantity_type = quantity.type - if isinstance(quantity_type, Reference): + # By default the full section is resolved for references + if isinstance(quantity.type, Reference) and isinstance(annotation.mapping, Object): if quantity.is_scalar: value = ElasticDocument.create_index_entry(cast(MSection, value)) else: value = [ElasticDocument.create_index_entry(item) for item in value] + # Only the magnitude of scalar Pint quantity objects is stored + if quantity.is_scalar and isinstance(value, pint.quantity._Quantity): + value = value.magnitude + setattr(obj, annotation.field, value) for sub_section in m_def.all_sub_sections.values(): @@ -137,7 +146,7 @@ class ElasticDocument(SectionAnnotation): if document is not None: return document - from elasticsearch_dsl import Document, InnerDoc, Keyword, Date, Integer, Boolean, Object + from elasticsearch_dsl import Document, InnerDoc, Keyword, Date, Integer, Boolean, Object, Double, Float, Long if attrs is None: attrs = {} @@ -156,11 +165,18 @@ class ElasticDocument(SectionAnnotation): for annotation in quantity.m_get_annotations(Elastic, as_list=True): if annotation.mapping is None and first: kwargs = dict(index=annotation.index) - # find a mapping based on quantity type + + # Find a mapping based on quantity type if not explicitly given if quantity.type == str: annotation.mapping = Keyword(**kwargs) - elif quantity.type == int: + elif quantity.type in [float, np.float64] and quantity.is_scalar: + annotation.mapping = Double(**kwargs) + elif quantity.type == np.float32 and quantity.is_scalar: + annotation.mapping = Float(**kwargs) + elif quantity.type in [int, np.int32] and quantity.is_scalar: annotation.mapping = Integer(**kwargs) + elif quantity.type == np.int64 and quantity.is_scalar: + annotation.mapping = Long(**kwargs) elif quantity.type == bool: annotation.mapping = Boolean(**kwargs) elif quantity.type == Datetime: diff --git a/nomad/metainfo/example.py b/nomad/metainfo/example.py index f585a2c4a2aef8ddac7f6a012ff008f419535e2f..e7ad6f57cc2720c2b895ed95191b39d3873206fe 100644 --- a/nomad/metainfo/example.py +++ b/nomad/metainfo/example.py @@ -17,9 +17,10 @@ import numpy as np from datetime import datetime +from nomad.units import ureg from nomad.metainfo import ( - MSection, MCategory, Section, Quantity, Package, SubSection, MEnum, Datetime, units, - constraint) + MSection, MCategory, Section, Quantity, Package, SubSection, MEnum, + Datetime, constraint) m_package = Package(links=['http://metainfo.nomad-coe.eu']) @@ -57,11 +58,11 @@ class System(MSection): description='The atoms in the simulated systems.') atom_positions = Quantity( - type=np.dtype('f'), shape=['n_atoms', 3], unit=units.m, categories=[SystemHash], + type=np.dtype('f'), shape=['n_atoms', 3], unit=ureg.m, categories=[SystemHash], description='The atom positions in the simulated system.') lattice_vectors = Quantity( - type=np.dtype('f'), shape=[3, 3], unit=units.m, categories=[SystemHash], + type=np.dtype('f'), shape=[3, 3], unit=ureg.m, categories=[SystemHash], description='The lattice vectors of the simulated unit cell.') unit_cell = Quantity(synonym_for='lattice_vectors') @@ -75,8 +76,8 @@ class System(MSection): class SCC(MSection): - energy_total = Quantity(type=float, default=0.0, unit=units.J) - energy_total_0 = Quantity(type=np.dtype(np.float32), default=0.0, unit=units.J) + energy_total = Quantity(type=float, default=0.0, unit=ureg.J) + energy_total_0 = Quantity(type=np.dtype(np.float32), default=0.0, unit=ureg.J) an_int = Quantity(type=np.dtype(np.int32)) system = Quantity(type=System, description='The system that this calculation is based on.') diff --git a/nomad/metainfo/legacy.py b/nomad/metainfo/legacy.py index cb51e43d4577ba8d177cd83a3cab4e3f112d2798..113eb0b6a20b8d897a9d7a6c6e985b7bf048753e 100644 --- a/nomad/metainfo/legacy.py +++ b/nomad/metainfo/legacy.py @@ -27,8 +27,9 @@ import os.path from nomadcore.local_meta_info import loadJsonFile, InfoKindEl, InfoKindEnv from nomad import utils +from nomad.units import ureg from nomad.metainfo import ( - Definition, SubSection, Package, Quantity, Category, Section, Reference, units, + Definition, SubSection, Package, Quantity, Category, Section, Reference, Environment, MEnum, MSection, DefinitionAnnotation) logger = utils.get_logger(__name__) @@ -480,7 +481,7 @@ class PackageConversion: # units if legacy_def.units is not None: try: - definition.unit = units.parse_units(legacy_def.units) + definition.unit = ureg.parse_units(legacy_def.units) except UndefinedUnitError: logger.error('unknown unit %s' % legacy_def.units) except ValueError as e: diff --git a/nomad/metainfo/metainfo.py b/nomad/metainfo/metainfo.py index 9f6bc7afedee66ffc1d215358cc2ce144d7174c2..eb845b4d9a15cea2871777b55863cb23797a67a1 100644 --- a/nomad/metainfo/metainfo.py +++ b/nomad/metainfo/metainfo.py @@ -31,6 +31,8 @@ import pytz import docstring_parser import jmespath +from nomad.units import ureg + m_package: 'Package' = None @@ -219,7 +221,7 @@ class _Dimension(DataType): class _Unit(DataType): def set_normalize(self, section, quantity_def: 'Quantity', value): if isinstance(value, str): - value = units.parse_units(value) + value = ureg.parse_units(value) elif not isinstance(value, pint.unit._Unit): raise TypeError('Units must be given as str or pint Unit instances.') @@ -230,11 +232,7 @@ class _Unit(DataType): return value.__str__() def deserialize(self, section, quantity_def: 'Quantity', value): - return units.parse_units(value) - - -units = pint.UnitRegistry() -''' The default pint unit registry that should be used to give units to quantity definitions. ''' + return ureg.parse_units(value) class _Callable(DataType): @@ -1441,7 +1439,7 @@ class MSection(metaclass=MObjectMeta): # TODO find a way to make this a subclas Convinience method to get annotations Arguments: - key: Either the optional annoation name or an annotation class. In the first + key: Either the optional annotation name or an annotation class. In the first case the annotation is returned, regardless of its type. In the second case, all names and list for names are iterated and all annotations of the given class are returned. diff --git a/nomad/metainfo/search_extension.py b/nomad/metainfo/search_extension.py index 414df9417de9fa9d78d13a3a447f3cfb0a4c9c31..a520ab7c5c1a2c0257fd76bc283863bc7fdac170 100644 --- a/nomad/metainfo/search_extension.py +++ b/nomad/metainfo/search_extension.py @@ -69,11 +69,11 @@ class Search(Elastic): The information can be used (e.g. by the GUI) to fill in empty values. group: Indicates that his quantity can be used to group results. The value will be the name of the group. + derived: A callable that is applied to search parameter values before search. search_field: The qualified field in the elastic mapping that is used to search. This might be different from the field that is used to store the value in elastic search. This is especially useful if the field represents a inner document and a subfield of this inner object should be used for search. - derived: A callable that is applied to search parameter values before search. ''' def __init__( diff --git a/nomad/normalizing/band_structure.py b/nomad/normalizing/band_structure.py index 8754670dd85eea2f10d5c2dfa2c2e6a6987841bd..2d68a8ca6f555517b4ce6718b2a7e36f01cbdb15 100644 --- a/nomad/normalizing/band_structure.py +++ b/nomad/normalizing/band_structure.py @@ -12,11 +12,10 @@ # See the License for the specific language governing permissions and # limitations under the License. -import json import numpy as np import ase -from nomad.datamodel.metainfo.public import section_k_band, section_band_gap, section_system +from nomad.datamodel.metainfo.public import section_k_band, section_band_gap, section_system, section_brillouin_zone from nomad.normalizing.normalizer import Normalizer from nomad import config, atomutils from nomad.constants import pi @@ -42,8 +41,11 @@ class BandStructureNormalizer(Normalizer): for scc in self.section_run.section_single_configuration_calculation: # In order to resolve band gaps, we need a reference to the highest - # occupied energy. - valence_band_maximum = scc.energy_reference_highest_occupied + # occupied energy (semiconductors/insulators) or the Fermi energy + # (metals) + e_valence = scc.energy_reference_highest_occupied + e_fermi = scc.energy_reference_fermi + energy_reference = e_fermi if e_valence is None else e_valence # In order to resolve the special points and the reciprocal cell, # we need information about the system. @@ -53,7 +55,7 @@ class BandStructureNormalizer(Normalizer): if band.band_structure_kind != "vibrational": self.add_reciprocal_cell(band, system) self.add_brillouin_zone(band) - self.add_band_gaps(band, valence_band_maximum) + self.add_band_gaps(band, energy_reference) self.add_path_labels(band, system) def add_reciprocal_cell(self, band: section_k_band, system: section_system): @@ -91,10 +93,9 @@ class BandStructureNormalizer(Normalizer): band.reciprocal_cell = recip_cell def add_brillouin_zone(self, band: section_k_band) -> None: - """Adds a dictionary containing the information needed to display - the Brillouin zone for this material. This functionality could be put - into the GUI directly, with the Brillouin zone construction performed - from the reciprocal cell. + """Adds the information needed to display the Brillouin zone for this + material. This functionality could be put into the GUI directly, with + the Brillouin zone construction performed from the reciprocal cell. The Brillouin Zone is a Wigner-Seitz cell, and is thus uniquely defined. It's shape does not depend on the used primitive cell. @@ -104,9 +105,10 @@ class BandStructureNormalizer(Normalizer): self.logger.info("Could not resolve Brillouin zone as reciprocal cell is missing.") return - brillouin_zone = atomutils.get_brillouin_zone(recip_cell.magnitude) - bz_json = json.dumps(brillouin_zone) - band.brillouin_zone = bz_json + brillouin_zone_data = atomutils.get_brillouin_zone(recip_cell.magnitude) + section_bz = band.m_create(section_brillouin_zone) + section_bz.vertices = brillouin_zone_data["vertices"] + section_bz.faces = brillouin_zone_data["faces"] def get_k_space_distance(self, reciprocal_cell: np.array, point1: np.array, point2: np.array) -> float: """Used to calculate the Euclidean distance of two points in k-space, @@ -125,12 +127,11 @@ class BandStructureNormalizer(Normalizer): return k_point_distance - def add_band_gaps(self, band: section_k_band, valence_band_maximum: np.array) -> None: - """Given the band structure and fermi level, calculates the band gap - for spin channels and also reports the total band gap as the minum gap - found. + def add_band_gaps(self, band: section_k_band, energy_reference: np.array) -> None: + """Given the band structure and an energy reference, calculates the band gap + separately for all spin channels. """ - if valence_band_maximum is None: + if energy_reference is None: self.logger.info("Could not resolve band gaps as the energy reference is missing.") return @@ -142,7 +143,7 @@ class BandStructureNormalizer(Normalizer): # Gather the energies and k points from each segment into one big # array reciprocal_cell = reciprocal_cell.magnitude - valence_band_maximum = valence_band_maximum.magnitude + valence_band_maximum = energy_reference.magnitude path: np.array = [] energies: np.array = [] for segment in band.section_k_band_segment: diff --git a/nomad/normalizing/dos.py b/nomad/normalizing/dos.py index adfb38bbe79e13643a2ed83077594646a56a0ce9..a4d446881e3aee9013cfef38a1aadf03e98c2c00 100644 --- a/nomad/normalizing/dos.py +++ b/nomad/normalizing/dos.py @@ -13,6 +13,7 @@ # limitations under the License. from .normalizer import Normalizer +from nomad.atomutils import get_volume import numpy as np @@ -58,7 +59,7 @@ class DosNormalizer(Normalizer): return number_of_atoms = np.shape(atom_positions)[0] - unit_cell_volume = np.linalg.det(lattice_vectors.magnitude) + unit_cell_volume = get_volume(lattice_vectors.magnitude) # Final quantities dos_normed = dos_values / (number_of_atoms * unit_cell_volume) diff --git a/nomad/normalizing/encyclopedia/basisset.py b/nomad/normalizing/encyclopedia/basisset.py index 2e615c97e0588b56153419e374b4cb6193e5860d..f8d3eaaa908fa44fc19ea6cc6beed75030f7eab8 100644 --- a/nomad/normalizing/encyclopedia/basisset.py +++ b/nomad/normalizing/encyclopedia/basisset.py @@ -2,14 +2,12 @@ from abc import ABC, abstractmethod from collections import OrderedDict import numpy as np from typing import Tuple, List -from pint import UnitRegistry +from nomad.units import ureg from nomad.parsing.legacy import Backend from nomad.metainfo import Section from nomad.utils import RestrictedDict -ureg = UnitRegistry() - def get_basis_set(context, backend: Backend, logger) -> RestrictedDict: """Decide which type of basis set settings are applicable to the entry and diff --git a/nomad/normalizing/encyclopedia/encyclopedia.py b/nomad/normalizing/encyclopedia/encyclopedia.py index c0ab6201068eab73bcca5c6e58ce5e40d19d4669..5a88c70cf694cb13f598e674a2c957ed0ed9b802 100644 --- a/nomad/normalizing/encyclopedia/encyclopedia.py +++ b/nomad/normalizing/encyclopedia/encyclopedia.py @@ -15,8 +15,8 @@ from typing import Any from nomad.normalizing.normalizer import Normalizer -from nomad.metainfo.encyclopedia import ( - section_encyclopedia, +from nomad.datamodel.encyclopedia import ( + EncyclopediaMetadata, Material, Method, Properties, @@ -205,8 +205,8 @@ class EncyclopediaNormalizer(Normalizer): """The caller will automatically log if the normalizer succeeds or ends up with an exception. """ - sec_enc = self.backend.entry_archive.m_create(section_encyclopedia) - status_enums = section_encyclopedia.status.type + sec_enc = self.backend.entry_archive.section_metadata.m_create(EncyclopediaMetadata) + status_enums = EncyclopediaMetadata.status.type # Do nothing if section_run is not present if self.section_run is None: @@ -239,7 +239,7 @@ class EncyclopediaNormalizer(Normalizer): ) return - # Get the system type, stop if unknown + # Get the system type. material_enums = Material.material_type.type representative_system, material_type = self.material_type(material) if material_type != material_enums.bulk and material_type != material_enums.two_d and material_type != material_enums.one_d: @@ -249,12 +249,18 @@ class EncyclopediaNormalizer(Normalizer): "unsupported material type for encyclopedia", enc_status=status, ) - return - # Get the method type. For now, we allow unknown method type. - # Mostly to allow phonon calculations through. + # Get the method type. For now, we allow unknown method type to + # allow phonon calculations through. representative_method, method_type = self.method_type(method) + if method_type == config.services.unavailable_value: + status = status_enums.unsupported_method_type + sec_enc.status = status + self.logger.info( + "unsupported method type for encyclopedia", + enc_status=status, + ) # Get representative scc try: diff --git a/nomad/normalizing/encyclopedia/material.py b/nomad/normalizing/encyclopedia/material.py index fa59d12701a83ef3793e485c0fca3787f7291153..bf928e57fecd821237124e5b113c4d080437a1b8 100644 --- a/nomad/normalizing/encyclopedia/material.py +++ b/nomad/normalizing/encyclopedia/material.py @@ -13,7 +13,7 @@ # limitations under the License. from typing import Dict, List -from math import gcd as gcd +from math import gcd from functools import reduce from abc import abstractmethod import re @@ -25,13 +25,14 @@ import numpy as np from matid import SymmetryAnalyzer import matid.geometry -from nomad.metainfo.encyclopedia import ( +from nomad.datamodel.encyclopedia import ( Material, Properties, - WyckoffSet, Bulk, - WyckoffVariables, IdealizedStructure, + WyckoffSet, + WyckoffVariables, + LatticeParameters, ) from nomad.normalizing.encyclopedia.context import Context from nomad.parsing.legacy import Backend @@ -72,10 +73,22 @@ class MaterialNormalizer(): formula = atomutils.get_formula_string(names, counts_reduced) material.formula_reduced = formula - def material_hash(self, material: Material, spg_number: int, wyckoff_sets: List[WyckoffSet]) -> None: + def species_and_counts(self, material: Material, names: List[str], reduced_counts: List[int]) -> None: + parts = [] + for name, count in zip(names, reduced_counts): + if count == 1: + parts.append(name) + else: + parts.append("{}{}".format(name, int(count))) + material.species_and_counts = " ".join(parts) + + def species(self, material: Material, names: List[str]) -> None: + material.species = " ".join(names) + + def material_id(self, material: Material, spg_number: int, wyckoff_sets: List[WyckoffSet]) -> None: # Create and store hash based on SHA512 norm_hash_string = atomutils.get_symmetry_string(spg_number, wyckoff_sets) - material.material_hash = hash(norm_hash_string) + material.material_id = hash(norm_hash_string) def number_of_atoms(self, ideal: IdealizedStructure, std_atoms: Atoms) -> None: ideal.number_of_atoms = len(std_atoms) @@ -116,7 +129,14 @@ class MaterialBulkNormalizer(MaterialNormalizer): def lattice_parameters(self, ideal: IdealizedStructure, std_atoms: Atoms) -> None: cell_normalized = std_atoms.get_cell() * 1E-10 - ideal.lattice_parameters = atomutils.get_lattice_parameters(cell_normalized) + param_values = atomutils.get_lattice_parameters(cell_normalized) + param_section = ideal.m_create(LatticeParameters) + param_section.a = float(param_values[0]) + param_section.b = float(param_values[1]) + param_section.c = float(param_values[2]) + param_section.alpha = float(param_values[3]) + param_section.beta = float(param_values[4]) + param_section.gamma = float(param_values[5]) def mass_density(self, properties: Properties, repr_system: Atoms) -> None: mass = atomutils.get_summed_atomic_mass(repr_system.get_atomic_numbers()) @@ -334,9 +354,9 @@ class MaterialBulkNormalizer(MaterialNormalizer): strukturbericht = re.sub('[$_{}]', '', strukturbericht) bulk.strukturbericht_designation = strukturbericht - def wyckoff_sets(self, bulk: Bulk, wyckoff_sets: Dict) -> None: + def wyckoff_sets(self, ideal: IdealizedStructure, wyckoff_sets: Dict) -> None: for group in wyckoff_sets: - wset = bulk.m_create(WyckoffSet) + wset = ideal.m_create(WyckoffSet) if group.x is not None or group.y is not None or group.z is not None: variables = wset.m_create(WyckoffVariables) if group.x is not None: @@ -352,7 +372,7 @@ class MaterialBulkNormalizer(MaterialNormalizer): def normalize(self, context: Context) -> None: # Fetch resources sec_system = context.representative_system - sec_enc = self.backend.entry_archive.section_encyclopedia + sec_enc = self.backend.entry_archive.section_metadata.encyclopedia material = sec_enc.material properties = sec_enc.properties sec_symmetry = sec_system["section_symmetry"][0] @@ -371,7 +391,7 @@ class MaterialBulkNormalizer(MaterialNormalizer): bulk = material.m_create(Bulk) ideal = material.m_create(IdealizedStructure) self.mass_density(properties, repr_atoms) - self.material_hash(material, spg_number, wyckoff_sets) + self.material_id(material, spg_number, wyckoff_sets) self.number_of_atoms(ideal, std_atoms) self.atom_labels(ideal, std_atoms) self.atom_positions(ideal, std_atoms) @@ -383,6 +403,8 @@ class MaterialBulkNormalizer(MaterialNormalizer): self.lattice_vectors_primitive(ideal, prim_atoms) self.formula(material, names, counts) self.formula_reduced(material, names, reduced_counts) + self.species(material, names) + self.species_and_counts(material, names, reduced_counts) self.has_free_wyckoff_parameters(bulk, symmetry_analyzer) self.lattice_parameters(ideal, std_atoms) self.material_name(material, names, reduced_counts) @@ -394,7 +416,7 @@ class MaterialBulkNormalizer(MaterialNormalizer): self.structure_type(bulk, sec_system) self.structure_prototype(bulk, sec_system) self.strukturbericht_designation(bulk, sec_system) - self.wyckoff_sets(bulk, wyckoff_sets) + self.wyckoff_sets(ideal, wyckoff_sets) class Material2DNormalizer(MaterialNormalizer): @@ -411,16 +433,19 @@ class Material2DNormalizer(MaterialNormalizer): ideal.lattice_vectors_primitive = cell_prim def lattice_parameters(self, ideal: IdealizedStructure, std_atoms: Atoms, periodicity: np.array) -> None: - # 2D systems only have three lattice parameter: two length and angle between them + # 2D systems only have three lattice parameter: two lengths and angle between them periodic_indices = np.where(np.array(periodicity) == True)[0] # noqa: E712 cell = std_atoms.get_cell() a_vec = cell[periodic_indices[0], :] * 1e-10 b_vec = cell[periodic_indices[1], :] * 1e-10 a = np.linalg.norm(a_vec) b = np.linalg.norm(b_vec) - alpha = np.clip(np.dot(a_vec, b_vec) / (a * b), -1.0, 1.0) - alpha = np.arccos(alpha) - ideal.lattice_parameters = np.array([a, b, 0.0, alpha, 0.0, 0.0]) + gamma = np.clip(np.dot(a_vec, b_vec) / (a * b), -1.0, 1.0) + gamma = np.arccos(gamma) + param_section = ideal.m_create(LatticeParameters) + param_section.a = float(a) + param_section.b = float(b) + param_section.gamma = float(gamma) def periodicity(self, ideal: IdealizedStructure, std_atoms: Atoms) -> None: # MatID already provides the correct periodicity @@ -465,7 +490,7 @@ class Material2DNormalizer(MaterialNormalizer): def normalize(self, context: Context) -> None: # Fetch resources - sec_enc = self.backend.entry_archive.section_encyclopedia + sec_enc = self.backend.entry_archive.section_metadata.encyclopedia material = sec_enc.material repr_atoms = context.representative_system.m_cache["representative_atoms"] # Temporary value stored by SystemNormalizer symmetry_analyzer = self.get_symmetry_analyzer(repr_atoms) @@ -481,7 +506,7 @@ class Material2DNormalizer(MaterialNormalizer): # Fill metainfo ideal = material.m_create(IdealizedStructure) self.periodicity(ideal, std_atoms) - self.material_hash(material, spg_number, wyckoff_sets) + self.material_id(material, spg_number, wyckoff_sets) self.number_of_atoms(ideal, std_atoms) self.atom_labels(ideal, std_atoms) self.atom_positions(ideal, std_atoms) @@ -495,7 +520,7 @@ class Material2DNormalizer(MaterialNormalizer): class Material1DNormalizer(MaterialNormalizer): """Processes structure related metainfo for Encyclopedia 1D structures. """ - def material_hash_1d(self, material: Material, prim_atoms: Atoms) -> None: + def material_id_1d(self, material: Material, prim_atoms: Atoms) -> None: """Hash to be used as identifier for a material. Different 1D materials are defined by their Coulomb matrix eigenvalues and their Hill formulas. @@ -507,7 +532,7 @@ class Material1DNormalizer(MaterialNormalizer): id_strings.append(fingerprint) hash_seed = ", ".join(id_strings) hash_val = hash(hash_seed) - material.material_hash = hash_val + material.material_id = hash_val def lattice_vectors(self, ideal: IdealizedStructure, std_atoms: Atoms) -> None: cell_normalized = std_atoms.get_cell() @@ -519,7 +544,8 @@ class Material1DNormalizer(MaterialNormalizer): periodic_indices = np.where(np.array(periodicity) == True)[0] # noqa: E712 cell = std_atoms.get_cell() a = np.linalg.norm(cell[periodic_indices[0], :]) * 1e-10 - ideal.lattice_parameters = np.array([a, 0.0, 0.0, 0.0, 0.0, 0.0]) + params = ideal.m_create(LatticeParameters) + params.a = float(a) def periodicity(self, ideal: IdealizedStructure, prim_atoms: Atoms) -> None: # Get dimension of system by also taking into account the covalent radii @@ -680,7 +706,7 @@ class Material1DNormalizer(MaterialNormalizer): def normalize(self, context: Context) -> None: # Fetch resources sec_system = context.representative_system - sec_enc = self.backend.entry_archive.section_encyclopedia + sec_enc = self.backend.entry_archive.section_metadata.encyclopedia material = sec_enc.material repr_atoms = sec_system.m_cache["representative_atoms"] # Temporary value stored by SystemNormalizer symmetry_analyzer = self.get_symmetry_analyzer(repr_atoms) @@ -701,5 +727,5 @@ class Material1DNormalizer(MaterialNormalizer): self.lattice_vectors(ideal, std_atoms) self.formula(material, names, counts) self.formula_reduced(material, names, reduced_counts) - self.material_hash_1d(material, std_atoms) + self.material_id_1d(material, std_atoms) self.lattice_parameters(ideal, std_atoms, ideal.periodicity) diff --git a/nomad/normalizing/encyclopedia/method.py b/nomad/normalizing/encyclopedia/method.py index 033cda55cfaf60eeebb87f08205515c8df9c8f31..12ca4aed3c5525e2786548877e8c5fde2081beba 100644 --- a/nomad/normalizing/encyclopedia/method.py +++ b/nomad/normalizing/encyclopedia/method.py @@ -16,9 +16,9 @@ from typing import List from abc import abstractmethod from collections import OrderedDict import numpy as np -from pint import UnitRegistry +from nomad.units import ureg -from nomad.metainfo.encyclopedia import ( +from nomad.datamodel.encyclopedia import ( Material, Method, ) @@ -28,8 +28,6 @@ from nomad.normalizing.encyclopedia.context import Context from nomad.utils import RestrictedDict from nomad import config -ureg = UnitRegistry() - class MethodNormalizer(): """A base class that is used for processing method related information @@ -40,7 +38,7 @@ class MethodNormalizer(): self.logger = logger self.section_run = backend.entry_archive.section_run[0] - def method_hash(self, method: Method, settings_basis_set: RestrictedDict, repr_method: Section): + def method_id(self, method: Method, settings_basis_set: RestrictedDict, repr_method: Section): method_dict = RestrictedDict( mandatory_keys=[ "program_name", @@ -52,7 +50,7 @@ class MethodNormalizer(): # The subclasses may define their own method properties that are to be # included here. - subsettings = self.method_hash_dict(method, settings_basis_set, repr_method) + subsettings = self.method_id_dict(method, settings_basis_set, repr_method) method_dict["subsettings"] = subsettings # If all required information is present, safe the hash @@ -61,17 +59,17 @@ class MethodNormalizer(): except (KeyError, ValueError) as e: self.logger.info("Could not create method hash: {}".format(e)) else: - method.method_hash = method_dict.hash() + method.method_id = method_dict.hash() @abstractmethod - def method_hash_dict(self, method: Method, settings_basis_set: RestrictedDict, repr_method: Section) -> RestrictedDict: + def method_id_dict(self, method: Method, settings_basis_set: RestrictedDict, repr_method: Section) -> RestrictedDict: pass - def group_eos_hash(self, method: Method, material: Material, repr_method: Section): + def group_eos_id(self, method: Method, material: Material, repr_method: Section): eos_dict = RestrictedDict( mandatory_keys=[ "upload_id", - "method_hash", + "method_id", "formula", ], forbidden_values=[None] @@ -81,7 +79,7 @@ class MethodNormalizer(): eos_dict['upload_id'] = self.backend.entry_archive.section_metadata.upload_id # Method - eos_dict["method_hash"] = method.method_hash + eos_dict["method_id"] = method.method_id # The formula should be same for EoS (maybe even symmetries) eos_dict["formula"] = material.formula @@ -92,9 +90,9 @@ class MethodNormalizer(): except (KeyError, ValueError) as e: self.logger.info("Could not create EOS hash: {}".format(e)) else: - method.group_eos_hash = eos_dict.hash() + method.group_eos_id = eos_dict.hash() - def group_parametervariation_hash(self, method: Method, settings_basis_set: RestrictedDict, repr_system: Section, repr_method: Section): + def group_parametervariation_id(self, method: Method, settings_basis_set: RestrictedDict, repr_system: Section, repr_method: Section): # Create ordered dictionary with the values. Order is important for param_dict = RestrictedDict( mandatory_keys=[ @@ -136,7 +134,7 @@ class MethodNormalizer(): # The subclasses may define their own method properties that are to be # included here. - subsettings = self.group_parametervariation_hash_dict(method, settings_basis_set, repr_method) + subsettings = self.group_parametervariation_id_dict(method, settings_basis_set, repr_method) param_dict["subsettings"] = subsettings # Form a hash from the dictionary @@ -145,10 +143,10 @@ class MethodNormalizer(): except (KeyError, ValueError) as e: self.logger.info("Could not create parameter variation hash: {}".format(e)) else: - method.group_parametervariation_hash = param_dict.hash() + method.group_parametervariation_id = param_dict.hash() @abstractmethod - def group_parametervariation_hash_dict(self, method: Method, settings_basis_set: RestrictedDict, repr_method: Section) -> RestrictedDict: + def group_parametervariation_id_dict(self, method: Method, settings_basis_set: RestrictedDict, repr_method: Section) -> RestrictedDict: pass def group_e_min(self) -> None: @@ -243,7 +241,7 @@ class MethodDFTNormalizer(MethodNormalizer): short_name = self.create_xc_functional_shortname(long_name) method.functional_type = short_name - def method_hash_dict(self, method: Method, settings_basis_set: RestrictedDict, repr_method: Section) -> RestrictedDict: + def method_id_dict(self, method: Method, settings_basis_set: RestrictedDict, repr_method: Section) -> RestrictedDict: # Extend by DFT settings. hash_dict = RestrictedDict( mandatory_keys=( @@ -294,7 +292,7 @@ class MethodDFTNormalizer(MethodNormalizer): return hash_dict - def group_parametervariation_hash_dict(self, method: Method, settings_basis_set: RestrictedDict, repr_method: Section): + def group_parametervariation_id_dict(self, method: Method, settings_basis_set: RestrictedDict, repr_method: Section): """Dictionary containing the parameters used for convergence test grouping This is the source for generating the related hash.""" @@ -384,7 +382,7 @@ class MethodDFTNormalizer(MethodNormalizer): # Fetch resources repr_method = context.representative_method repr_system = context.representative_system - sec_enc = self.backend.entry_archive.section_encyclopedia + sec_enc = self.backend.entry_archive.section_metadata.encyclopedia method = sec_enc.method material = sec_enc.material settings_basis_set = get_basis_set(context, self.backend, self.logger) @@ -393,9 +391,9 @@ class MethodDFTNormalizer(MethodNormalizer): self.core_electron_treatment(method) self.functional_long_name(method, repr_method) self.functional_type(method) - self.method_hash(method, settings_basis_set, repr_method) - self.group_eos_hash(method, material, repr_method) - self.group_parametervariation_hash(method, settings_basis_set, repr_system, repr_method) + self.method_id(method, settings_basis_set, repr_method) + self.group_eos_id(method, material, repr_method) + self.group_parametervariation_id(method, settings_basis_set, repr_system, repr_method) class MethodGWNormalizer(MethodDFTNormalizer): @@ -423,7 +421,7 @@ class MethodGWNormalizer(MethodDFTNormalizer): def normalize(self, context: Context) -> None: # Fetch resources repr_method = context.representative_method - sec_enc = self.backend.entry_archive.section_encyclopedia + sec_enc = self.backend.entry_archive.section_metadata.encyclopedia method = sec_enc.method # Fill metainfo diff --git a/nomad/normalizing/encyclopedia/properties.py b/nomad/normalizing/encyclopedia/properties.py index 102cca2f9c1e159fc387455eaacb642190fc98ff..aa8ae3af36e38490d69e80c60510f8868467583d 100644 --- a/nomad/normalizing/encyclopedia/properties.py +++ b/nomad/normalizing/encyclopedia/properties.py @@ -12,11 +12,10 @@ # See the License for the specific language governing permissions and # limitations under the License. -import json - -from nomad.metainfo.encyclopedia import ( +from nomad.datamodel.encyclopedia import ( Calculation, Properties, + Energies, ) from nomad.parsing.legacy import Backend from nomad.metainfo import Section @@ -60,7 +59,24 @@ class PropertiesNormalizer(): except Exception: return if representative_band is not None: - properties.electronic_band_structure = representative_band + properties.electronic_band_structure = representative_band.m_path() + + # Add band gap information to metadata if present. The channel with + # smallest band gap index is chosen as a representative one. + band_gaps = properties.electronic_band_structure.section_band_gap + if band_gaps is not None and len(band_gaps) > 0: + min_gap_index = 0 + min_gap = float("Inf") + for i, gap in enumerate(band_gaps): + value = gap.value + if value < min_gap: + min_gap_index = i + min_gap = value + representative_gap = band_gaps[min_gap_index] + bg_value = representative_gap.value + if bg_value is not None: + properties.band_gap = representative_gap.value + properties.band_gap_direct = representative_gap.type == "direct" def electronic_dos(self, properties: Properties, context: Context) -> None: """Tries to resolve a reference to a representative electonic density @@ -85,7 +101,7 @@ class PropertiesNormalizer(): except Exception: return if representative_dos is not None: - properties.electronic_dos = representative_dos + properties.electronic_dos = representative_dos.m_path() def elastic_constants_matrix(self) -> None: pass @@ -127,7 +143,7 @@ class PropertiesNormalizer(): except Exception: return if resolved_section is not None: - properties.thermodynamical_properties = resolved_section + properties.thermodynamical_properties = resolved_section.m_path() def phonon_band_structure(self, properties: Properties, context: Context) -> None: """Tries to resolve a reference to a representative phonon band @@ -158,7 +174,7 @@ class PropertiesNormalizer(): except Exception: return if representative_phonon_band is not None: - properties.phonon_band_structure = representative_phonon_band + properties.phonon_band_structure = representative_phonon_band.m_path() def phonon_dos(self, properties: Properties, context: Context) -> None: """Tries to resolve a reference to a representative phonon density of @@ -185,25 +201,21 @@ class PropertiesNormalizer(): except Exception: return if representative_phonon_dos is not None: - properties.phonon_dos = representative_phonon_dos + properties.phonon_dos = representative_phonon_dos.m_path() - def energies(self, properties: Properties, gcd: int, representative_scc: Section) -> None: - energy_dict = {} + def energies(self, properties: Properties, n_atoms: int, representative_scc: Section) -> None: if representative_scc is not None: - energies_entries = { - "energy_total": "Total E", - "energy_total_T0": "Total E projected to T=0", - "energy_free": "Free E", - } - for energy_name, label in energies_entries.items(): - result = getattr(representative_scc, energy_name) - if result is not None: - energy_dict[label] = result.magnitude / gcd - - if len(energy_dict) == 0: - energy_dict = None - energies = json.dumps(energy_dict) - properties.energies = energies + energies = Energies() + energy_found = False + for energy_name in ["energy_total", "energy_total_T0", "energy_free"]: + energy_value = getattr(representative_scc, energy_name) + if energy_value is not None: + energy_found = True + + # The energies are normalized to be per atom + setattr(energies, energy_name, energy_value.magnitude / n_atoms) + if energy_found: + properties.m_add_sub_section(Properties.energies, energies) def normalize(self, context: Context) -> None: # There needs to be a valid SCC in order to extract any properties @@ -212,17 +224,17 @@ class PropertiesNormalizer(): return # Fetch resources - sec_enc = self.backend.entry_archive.section_encyclopedia + sec_enc = self.backend.entry_archive.section_metadata.encyclopedia properties = sec_enc.properties calc_type = context.calc_type material_type = context.material_type sec_system = context.representative_system - gcd = context.greatest_common_divisor + n_atoms = len(sec_system.atom_labels) # Save metainfo self.electronic_band_structure(properties, calc_type, material_type, context, sec_system) self.electronic_dos(properties, context) - self.energies(properties, gcd, representative_scc) + self.energies(properties, n_atoms, representative_scc) # Phonon calculations have a specific set of properties to extract if context.calc_type == Calculation.calculation_type.type.phonon_calculation: diff --git a/nomad/normalizing/optimade.py b/nomad/normalizing/optimade.py index 08528f6057792e76fa65ef23715e5eec339f1d71..a5296fa0628f6c9ff8e9c7bc08eb6c3c31b319d1 100644 --- a/nomad/normalizing/optimade.py +++ b/nomad/normalizing/optimade.py @@ -21,7 +21,7 @@ import pint.quantity from nomad.parsing.legacy import Backend from nomad.normalizing.normalizer import SystemBasedNormalizer -from nomad.metainfo import units +from nomad.units import ureg from nomad.datamodel import OptimadeEntry, Species, DFTMetadata, EntryMetadata species_re = re.compile(r'^([A-Z][a-z]?)(\d*)$') @@ -102,8 +102,8 @@ class OptimadeNormalizer(SystemBasedNormalizer): # sites optimade.nsites = len(nomad_species) optimade.species_at_sites = nomad_species - optimade.lattice_vectors = get_value('lattice_vectors', numpy=True, unit=units.m) - optimade.cartesian_site_positions = get_value('atom_positions', numpy=True, unit=units.m) + optimade.lattice_vectors = get_value('lattice_vectors', numpy=True, unit=ureg.m) + optimade.cartesian_site_positions = get_value('atom_positions', numpy=True, unit=ureg.m) optimade.dimension_types = [ 1 if value else 0 for value in get_value('configuration_periodic_dimensions')] diff --git a/nomad/processing/data.py b/nomad/processing/data.py index 69a54cdf7bdca39e7404727047073eff53cc5561..60fdd92c24d3aa4d6115c161f9637333fefa4870 100644 --- a/nomad/processing/data.py +++ b/nomad/processing/data.py @@ -42,6 +42,9 @@ from nomad.parsing import parser_dict, match_parser, Backend from nomad.normalizing import normalizers from nomad.datamodel import EntryArchive from nomad.archive import query_archive +from nomad.datamodel.encyclopedia import ( + EncyclopediaMetadata, +) import phonopyparser @@ -430,8 +433,10 @@ class Calc(Proc): ref_archive = EntryArchive.m_from_dict(arch) # Get encyclopedia method information directly from the referenced calculation. - ref_enc_method = ref_archive.section_encyclopedia.method - backend.entry_archive.section_encyclopedia.method = ref_enc_method + ref_enc_method = ref_archive.section_metadata.encyclopedia.method + if ref_enc_method is None or len(ref_enc_method) == 0: + raise ValueError("No method information available in referenced calculation.") + backend.entry_archive.section_metadata.encyclopedia.method = ref_enc_method # Overwrite old entry with new data. The metadata is updated with # new timestamp and method details taken from the referenced @@ -440,7 +445,10 @@ class Calc(Proc): self._entry_metadata.dft.xc_functional = ref_archive.section_metadata.dft.xc_functional self._entry_metadata.dft.basis_set = ref_archive.section_metadata.dft.basis_set self._entry_metadata.dft.update_group_hash() - + except Exception as e: + logger.error("Could not retrieve method information for phonon calculation.", exception=e) + self._entry_metadata.encyclopedia.status = EncyclopediaMetadata.status.type.failure + finally: # persist the calc metadata with utils.timer(logger, 'saved calc metadata', step='metadata'): self.apply_entry_metadata(self._entry_metadata) @@ -457,9 +465,6 @@ class Calc(Proc): archive_size = self.write_archive(self._parser_backend) log_data.update(archive_size=archive_size) - except Exception as e: - logger.error("Could not retrieve method information for phonon calculation.", exception=e) - @contextmanager def use_parser_backend(self, processor_name): self._parser_backend.reset_status() diff --git a/nomad/units.py b/nomad/units.py new file mode 100644 index 0000000000000000000000000000000000000000..5fbcc0ffe1e9f5f2baf1cc05ddca39823317b64c --- /dev/null +++ b/nomad/units.py @@ -0,0 +1,21 @@ +# Copyright 2018 Markus Scheidgen, empty_task +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an"AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +This module contains a global unit registry used thoughout the nomad-FAIR +package. +""" +from pint import UnitRegistry + +ureg = UnitRegistry() diff --git a/tests/app/test_api_encyclopedia.py b/tests/app/test_api_encyclopedia.py new file mode 100644 index 0000000000000000000000000000000000000000..4fcaf35bda602ace643f08d43300345a1b7035cb --- /dev/null +++ b/tests/app/test_api_encyclopedia.py @@ -0,0 +1,51 @@ +# Copyright 2018 Markus Scheidgen +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an"AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def test_material(): + pass + + # Unpublished material should not be found + + # Embargoed material should not be found + + # Missing material causes 404 + + # Correctly found materials returns all required values + + +def test_materials(): + pass + + # Unpublished material should not be found + + # Embargoed material should not be found + + # Missing material causes 404 + + # Correctly found materials returns all required values + + # Exclusive formula works as expected + + # Inclusive formula works as expected + + # Exclusive elements works as expected + + # Inclusive elements works as expected + + # Band gap + + # Mass density + + # Property existence diff --git a/tests/metainfo/test_metainfo.py b/tests/metainfo/test_metainfo.py index f2bbc64ebb6a1496007649501473dcbbc7504490..66e05402217081ffa379d922fec21dfd6f6f2aaa 100644 --- a/tests/metainfo/test_metainfo.py +++ b/tests/metainfo/test_metainfo.py @@ -19,10 +19,11 @@ import datetime from nomad.metainfo.metainfo import ( MSection, MCategory, Section, Quantity, SubSection, Definition, Package, DeriveError, - MetainfoError, Environment, MResource, Datetime, units, Annotation, SectionAnnotation, + MetainfoError, Environment, MResource, Datetime, Annotation, SectionAnnotation, DefinitionAnnotation, Reference, MProxy, derived) from nomad.metainfo.example import Run, VaspRun, System, SystemHash, Parsing, SCC, m_package as example_package from nomad import utils +from nomad.units import ureg from tests import utils as test_utils @@ -389,9 +390,9 @@ class TestM1: def test_unit_conversion(self): system = System() - system.atom_positions = [[1, 2, 3]] * units.angstrom - assert system.atom_positions.units == units.meter - assert system.atom_positions[0][0] < 0.1 * units.meter + system.atom_positions = [[1, 2, 3]] * ureg.angstrom + assert system.atom_positions.units == ureg.meter + assert system.atom_positions[0][0] < 0.1 * ureg.meter def test_synonym(self): system = System() @@ -542,7 +543,7 @@ class TestM1: scc.energy_total_0 = 1.0 scc.an_int = 1 assert scc.energy_total_0.m == 1.0 # pylint: disable=no-member - assert scc.energy_total_0 == 1.0 * units.J + assert scc.energy_total_0 == 1.0 * ureg.J assert scc.m_to_dict()['energy_total_0'] == 1.0 assert scc.an_int == 1 assert scc.an_int.__class__ == np.int32 diff --git a/tests/normalizing/test_band_structure.py b/tests/normalizing/test_band_structure.py index fc722a60f439bebfe2ac9ae2537cac63c38c53a9..250fca9e88d43a36be0dad92bda4c9a2e6fa716c 100644 --- a/tests/normalizing/test_band_structure.py +++ b/tests/normalizing/test_band_structure.py @@ -27,8 +27,8 @@ from tests.normalizing.conftest import ( # pylint: disable=unused-import band_path_mP_nonstandard, band_path_cF_nonstandard, ) -from pint import UnitRegistry -ureg = UnitRegistry() + +from nomad.units import ureg def test_band_gaps(bands_unpolarized_no_gap, bands_polarized_no_gap, bands_unpolarized_gap_indirect, bands_polarized_gap_indirect): diff --git a/tests/normalizing/test_encyclopedia.py b/tests/normalizing/test_encyclopedia.py index c25840174d31fc687424975231af9937c9a8d807..17d096acfa680b2c559681210b254f0fe9556862 100644 --- a/tests/normalizing/test_encyclopedia.py +++ b/tests/normalizing/test_encyclopedia.py @@ -17,7 +17,6 @@ import numpy as np from ase import Atoms import ase.build from matid.symmetry.wyckoffset import WyckoffSet -from pint import UnitRegistry from nomad.utils import hash from nomad import atomutils @@ -38,13 +37,11 @@ from tests.normalizing.conftest import ( # pylint: disable=unused-import hash_vasp, ) -ureg = UnitRegistry() - def test_geometry_optimization(geometry_optimization: EntryArchive): """Tests that geometry optimizations are correctly processed." """ - enc = geometry_optimization.entry_archive.section_encyclopedia + enc = geometry_optimization.entry_archive.section_metadata.encyclopedia calc_type = enc.calculation.calculation_type assert calc_type == "geometry optimization" @@ -52,7 +49,7 @@ def test_geometry_optimization(geometry_optimization: EntryArchive): def test_molecular_dynamics(molecular_dynamics: EntryArchive): """Tests that geometry optimizations are correctly processed." """ - enc = molecular_dynamics.entry_archive.section_encyclopedia + enc = molecular_dynamics.entry_archive.section_metadata.encyclopedia calc_type = enc.calculation.calculation_type assert calc_type == "molecular dynamics" @@ -60,7 +57,7 @@ def test_molecular_dynamics(molecular_dynamics: EntryArchive): def test_1d_metainfo(one_d: EntryArchive): """Tests that metainfo for 1D systems is correctly processed. """ - enc = one_d.entry_archive.section_encyclopedia + enc = one_d.entry_archive.section_metadata.encyclopedia # Material material = enc.material assert material.material_type == "1D" @@ -74,13 +71,13 @@ def test_1d_metainfo(one_d: EntryArchive): assert ideal.atom_positions is not None assert ideal.lattice_vectors is not None assert np.array_equal(ideal.periodicity, [True, False, False]) - assert np.allclose(ideal.lattice_parameters, [4.33793652e-10, 0, 0, 0, 0, 0], atol=0) + assert ideal.lattice_parameters.a == pytest.approx(4.33793652e-10) def test_2d_metainfo(two_d: EntryArchive): """Tests that metainfo for 2D systems is correctly processed. """ - enc = two_d.entry_archive.section_encyclopedia + enc = two_d.entry_archive.section_metadata.encyclopedia # Material material = enc.material assert material.material_type == "2D" @@ -95,13 +92,18 @@ def test_2d_metainfo(two_d: EntryArchive): assert ideal.lattice_vectors is not None assert ideal.lattice_vectors_primitive is not None assert np.array_equal(ideal.periodicity, [True, True, False]) - assert np.allclose(ideal.lattice_parameters, [2.46559821e-10, 2.46559821e-10, 0, 120 / 180 * np.pi, 0, 0], atol=0) + assert ideal.lattice_parameters.a == pytest.approx(2.46559821e-10) + assert ideal.lattice_parameters.b == pytest.approx(2.46559821e-10) + assert ideal.lattice_parameters.c is None + assert ideal.lattice_parameters.alpha is None + assert ideal.lattice_parameters.beta is None + assert ideal.lattice_parameters.gamma == pytest.approx(120 / 180 * np.pi) def test_bulk_metainfo(bulk: EntryArchive): """Tests that metainfo for bulk systems is correctly processed. """ - enc = bulk.entry_archive.section_encyclopedia + enc = bulk.entry_archive.section_metadata.encyclopedia # Material material = enc.material assert material.material_type == "bulk" @@ -115,7 +117,6 @@ def test_bulk_metainfo(bulk: EntryArchive): assert bulk.bravais_lattice == "cF" assert bulk.has_free_wyckoff_parameters is False assert bulk.point_group == "m-3m" - assert bulk.wyckoff_sets is not None assert bulk.space_group_number == 227 assert bulk.structure_type == "diamond" assert bulk.structure_prototype == "C" @@ -124,6 +125,7 @@ def test_bulk_metainfo(bulk: EntryArchive): # Idealized structure ideal = enc.material.idealized_structure + assert ideal.wyckoff_sets is not None assert ideal.number_of_atoms == 8 assert ideal.atom_labels == ["Si", "Si", "Si", "Si", "Si", "Si", "Si", "Si"] assert ideal.atom_positions is not None @@ -131,32 +133,32 @@ def test_bulk_metainfo(bulk: EntryArchive): assert ideal.lattice_vectors_primitive is not None assert np.array_equal(ideal.periodicity, [True, True, True]) assert ideal.lattice_parameters is not None - assert ideal.cell_volume == pytest.approx(5.431**3 * 1e-30) + assert ideal.cell_volume.magnitude == pytest.approx(5.431**3 * 1e-30) # Properties prop = enc.properties - assert prop.atomic_density == pytest.approx(4.99402346512432e+28) - assert prop.mass_density == pytest.approx(8 * 28.0855 * 1.6605389e-27 / (5.431**3 * 1e-30)) # Atomic mass in kg/m^3 + assert prop.atomic_density.magnitude == pytest.approx(4.99402346512432e+28) + assert prop.mass_density.magnitude == pytest.approx(8 * 28.0855 * 1.6605389e-27 / (5.431**3 * 1e-30)) # Atomic mass in kg/m^3 def test_1d_material_identification(): # Original nanotube nanotube1 = ase.build.nanotube(4, 4, vacuum=4) - enc = run_normalize_for_structure(nanotube1).entry_archive.section_encyclopedia - hash1 = enc.material.material_hash + enc = run_normalize_for_structure(nanotube1).entry_archive.section_metadata.encyclopedia + hash1 = enc.material.material_id # Rotated copy nanotube2 = nanotube1.copy() nanotube2.rotate(90, "z", rotate_cell=True) - enc = run_normalize_for_structure(nanotube2).entry_archive.section_encyclopedia - hash2 = enc.material.material_hash + enc = run_normalize_for_structure(nanotube2).entry_archive.section_metadata.encyclopedia + hash2 = enc.material.material_id assert hash2 == hash1 # Longer copy nanotube3 = nanotube1.copy() nanotube3 *= [1, 1, 2] - enc = run_normalize_for_structure(nanotube3).entry_archive.section_encyclopedia - hash3 = enc.material.material_hash + enc = run_normalize_for_structure(nanotube3).entry_archive.section_metadata.encyclopedia + hash3 = enc.material.material_id assert hash3 == hash1 # Slightly distorted copies should match @@ -166,8 +168,8 @@ def test_1d_material_identification(): pos = nanotube4.get_positions() pos += 0.2 * np.random.rand(pos.shape[0], pos.shape[1]) nanotube4.set_positions(pos) - enc = run_normalize_for_structure(nanotube4).entry_archive.section_encyclopedia - hash4 = enc.material.material_hash + enc = run_normalize_for_structure(nanotube4).entry_archive.section_metadata.encyclopedia + hash4 = enc.material.material_id assert hash4 == hash1 # Too distorted copy should not match @@ -176,8 +178,8 @@ def test_1d_material_identification(): np.random.seed(4) pos += 1 * np.random.rand(pos.shape[0], pos.shape[1]) nanotube5.set_positions(pos) - enc = run_normalize_for_structure(nanotube5).entry_archive.section_encyclopedia - hash5 = enc.material.material_hash + enc = run_normalize_for_structure(nanotube5).entry_archive.section_metadata.encyclopedia + hash5 = enc.material.material_id assert hash5 != hash1 @@ -191,7 +193,7 @@ def test_2d_material_identification(): )] space_group_number = 191 norm_hash_string = atomutils.get_symmetry_string(space_group_number, wyckoff_sets) - graphene_material_hash = hash(norm_hash_string) + graphene_material_id = hash(norm_hash_string) # Graphene orthogonal cell graphene = Atoms( @@ -209,14 +211,14 @@ def test_2d_material_identification(): ], pbc=True ) - enc = run_normalize_for_structure(graphene).entry_archive.section_encyclopedia - assert enc.material.material_hash == graphene_material_hash + enc = run_normalize_for_structure(graphene).entry_archive.section_metadata.encyclopedia + assert enc.material.material_id == graphene_material_id # Graphene orthogonal supercell graphene2 = graphene.copy() graphene2 *= [2, 1, 2] - enc = run_normalize_for_structure(graphene2).entry_archive.section_encyclopedia - assert enc.material.material_hash == graphene_material_hash + enc = run_normalize_for_structure(graphene2).entry_archive.section_metadata.encyclopedia + assert enc.material.material_id == graphene_material_id # Graphene primitive cell graphene3 = Atoms( @@ -232,8 +234,8 @@ def test_2d_material_identification(): ], pbc=True ) - enc = run_normalize_for_structure(graphene3).entry_archive.section_encyclopedia - assert enc.material.material_hash == graphene_material_hash + enc = run_normalize_for_structure(graphene3).entry_archive.section_metadata.encyclopedia + assert enc.material.material_id == graphene_material_id # Slightly distorted system should match np.random.seed(4) @@ -243,9 +245,9 @@ def test_2d_material_identification(): pos += 0.05 * np.random.rand(pos.shape[0], pos.shape[1]) graphene4.set_positions(pos) entry_archive = run_normalize_for_structure(graphene4) - enc = entry_archive.entry_archive.section_encyclopedia - hash4 = enc.material.material_hash - assert hash4 == graphene_material_hash + enc = entry_archive.entry_archive.section_metadata.encyclopedia + hash4 = enc.material.material_id + assert hash4 == graphene_material_id # Too distorted system should not match graphene5 = graphene.copy() @@ -253,9 +255,9 @@ def test_2d_material_identification(): np.random.seed(4) pos += 1 * np.random.rand(pos.shape[0], pos.shape[1]) graphene5.set_positions(pos) - enc = run_normalize_for_structure(graphene5).entry_archive.section_encyclopedia - hash5 = enc.material.material_hash - assert hash5 != graphene_material_hash + enc = run_normalize_for_structure(graphene5).entry_archive.section_metadata.encyclopedia + hash5 = enc.material.material_id + assert hash5 != graphene_material_id # Expected information for MoS2. MoS2 has finite thichkness unlike # graphene. The structure is thus treated differently and tested @@ -279,7 +281,7 @@ def test_2d_material_identification(): ] space_group_number = 11 norm_hash_string = atomutils.get_symmetry_string(space_group_number, wyckoff_sets) - mos2_material_hash = hash(norm_hash_string) + mos2_material_id = hash(norm_hash_string) # MoS2 orthogonal cell atoms = Atoms( @@ -300,33 +302,33 @@ def test_2d_material_identification(): pbc=True ) entry_archive = run_normalize_for_structure(atoms) - enc = entry_archive.entry_archive.section_encyclopedia - assert enc.material.material_hash == mos2_material_hash + enc = entry_archive.entry_archive.section_metadata.encyclopedia + assert enc.material.material_id == mos2_material_id # MoS2 orthogonal supercell atoms *= [2, 3, 1] - enc = run_normalize_for_structure(atoms).entry_archive.section_encyclopedia - assert enc.material.material_hash == mos2_material_hash + enc = run_normalize_for_structure(atoms).entry_archive.section_metadata.encyclopedia + assert enc.material.material_id == mos2_material_id def test_bulk_material_identification(): # Original system wurtzite = ase.build.bulk("SiC", crystalstructure="wurtzite", a=3.086, c=10.053) - enc = run_normalize_for_structure(wurtzite).entry_archive.section_encyclopedia - hash1 = enc.material.material_hash + enc = run_normalize_for_structure(wurtzite).entry_archive.section_metadata.encyclopedia + hash1 = enc.material.material_id # Rotated wurtzite2 = wurtzite.copy() wurtzite2.rotate(90, "z", rotate_cell=True) - enc = run_normalize_for_structure(wurtzite2).entry_archive.section_encyclopedia - hash2 = enc.material.material_hash + enc = run_normalize_for_structure(wurtzite2).entry_archive.section_metadata.encyclopedia + hash2 = enc.material.material_id assert hash2 == hash1 # Supercell wurtzite3 = wurtzite.copy() wurtzite3 *= [2, 3, 1] - enc = run_normalize_for_structure(wurtzite3).entry_archive.section_encyclopedia - hash3 = enc.material.material_hash + enc = run_normalize_for_structure(wurtzite3).entry_archive.section_metadata.encyclopedia + hash3 = enc.material.material_id assert hash3 == hash1 # Slightly distorted system should match @@ -336,8 +338,8 @@ def test_bulk_material_identification(): pos = wurtzite4.get_positions() pos += 0.05 * np.random.rand(pos.shape[0], pos.shape[1]) wurtzite4.set_positions(pos) - enc = run_normalize_for_structure(wurtzite4).entry_archive.section_encyclopedia - hash4 = enc.material.material_hash + enc = run_normalize_for_structure(wurtzite4).entry_archive.section_metadata.encyclopedia + hash4 = enc.material.material_id assert hash4 == hash1 # Too distorted system should not match @@ -346,8 +348,8 @@ def test_bulk_material_identification(): np.random.seed(4) pos += 1 * np.random.rand(pos.shape[0], pos.shape[1]) wurtzite5.set_positions(pos) - enc = run_normalize_for_structure(wurtzite5).entry_archive.section_encyclopedia - hash5 = enc.material.material_hash + enc = run_normalize_for_structure(wurtzite5).entry_archive.section_metadata.encyclopedia + hash5 = enc.material.material_id assert hash5 != hash1 @@ -369,7 +371,7 @@ def test_1d_structure_structure_at_cell_boundary(): ], pbc=True ) - enc = run_normalize_for_structure(atoms).entry_archive.section_encyclopedia + enc = run_normalize_for_structure(atoms).entry_archive.section_metadata.encyclopedia expected_cell = [ [0, 0, 0], @@ -409,7 +411,7 @@ def test_2d_structure_structure_at_cell_boundary(): ], pbc=True ) - enc = run_normalize_for_structure(atoms).entry_archive.section_encyclopedia + enc = run_normalize_for_structure(atoms).entry_archive.section_metadata.encyclopedia expected_cell = [ [2e-10, 0, 0], @@ -432,14 +434,14 @@ def test_2d_structure_structure_at_cell_boundary(): def test_method_dft_metainfo(single_point): - enc = single_point.entry_archive.section_encyclopedia + enc = single_point.entry_archive.section_metadata.encyclopedia assert enc.method.core_electron_treatment == "full all electron" assert enc.method.functional_long_name == "GGA_C_PBE+GGA_X_PBE" assert enc.method.functional_type == "GGA" def test_method_gw_metainfo(gw): - enc = gw.entry_archive.section_encyclopedia + enc = gw.entry_archive.section_metadata.encyclopedia assert enc.method.gw_type == "G0W0" assert enc.method.gw_starting_point == "GGA_C_PBE+0.75*GGA_X_PBE+0.25*HF_X" @@ -448,28 +450,28 @@ def test_hashes_exciting(hash_exciting): """Tests that the hashes has been successfully created for calculations from exciting. """ - enc = hash_exciting.entry_archive.section_encyclopedia - method_hash = enc.method.method_hash - group_eos_hash = enc.method.group_eos_hash - group_parametervariation_hash = enc.method.group_parametervariation_hash - assert method_hash is not None - assert group_eos_hash is not None - assert group_parametervariation_hash is not None + enc = hash_exciting.entry_archive.section_metadata.encyclopedia + method_id = enc.method.method_id + group_eos_id = enc.method.group_eos_id + group_parametervariation_id = enc.method.group_parametervariation_id + assert method_id is not None + assert group_eos_id is not None + assert group_parametervariation_id is not None def test_hashes_undefined(hash_vasp): """Tests that the hashes are not present when the method settings cannot be determined at a sufficient accuracy. """ - enc = hash_vasp.entry_archive.section_encyclopedia - method_hash = enc.method.method_hash - group_eos_hash = enc.method.group_eos_hash + enc = hash_vasp.entry_archive.section_metadata.encyclopedia + method_id = enc.method.method_id + group_eos_id = enc.method.group_eos_id # If the method cannot be determined accurately, the method hash and group # hash cannot be set. Parametervariation has may still be valid, as it does # not really need the method to be accurately defined. - assert method_hash is None - assert group_eos_hash is None + assert method_id is None + assert group_eos_id is None def test_dos(dos_unpolarized_vasp, dos_polarized_vasp): @@ -480,8 +482,8 @@ def test_dos(dos_unpolarized_vasp, dos_polarized_vasp): assert dos.dos_values_normalized.shape == (n_channels, 301) assert dos.dos_energies_normalized.shape == (301,) - generaltests(dos_unpolarized_vasp.entry_archive.section_encyclopedia.properties.electronic_dos, n_channels=1) - generaltests(dos_polarized_vasp.entry_archive.section_encyclopedia.properties.electronic_dos, n_channels=2) + generaltests(dos_unpolarized_vasp.entry_archive.section_metadata.encyclopedia.properties.electronic_dos, n_channels=1) + generaltests(dos_polarized_vasp.entry_archive.section_metadata.encyclopedia.properties.electronic_dos, n_channels=2) def test_electronic_bands(bands_unpolarized_no_gap, bands_polarized_no_gap, band_path_cF_nonstandard): @@ -495,18 +497,18 @@ def test_electronic_bands(bands_unpolarized_no_gap, bands_polarized_no_gap, band assert segment.band_segm_labels is not None # VASP bands - generaltests(bands_unpolarized_no_gap.entry_archive.section_encyclopedia.properties.electronic_band_structure) - generaltests(bands_polarized_no_gap.entry_archive.section_encyclopedia.properties.electronic_band_structure) + generaltests(bands_unpolarized_no_gap.entry_archive.section_metadata.encyclopedia.properties.electronic_band_structure) + generaltests(bands_polarized_no_gap.entry_archive.section_metadata.encyclopedia.properties.electronic_band_structure) # Band structure from exciting calculation where there are multiple sccs # and multiple bands present for some reason... - generaltests(band_path_cF_nonstandard.entry_archive.section_encyclopedia.properties.electronic_band_structure) + generaltests(band_path_cF_nonstandard.entry_archive.section_metadata.encyclopedia.properties.electronic_band_structure) def test_phonon(phonon: EntryArchive): """Tests that phonon calculations are correctly processed. """ - enc = phonon.entry_archive.section_encyclopedia + enc = phonon.entry_archive.section_metadata.encyclopedia calc_type = enc.calculation.calculation_type prop = enc.properties band = prop.phonon_band_structure