From d547f0bc51a70a32eef2e63fdf630033ef122620 Mon Sep 17 00:00:00 2001 From: Lauri Himanen <lauri.himanen@gmail.com> Date: Mon, 1 Jun 2020 12:44:46 +0300 Subject: [PATCH] Added working suggestions route, added dynamic property fetch for calculations. --- nomad/app/api/encyclopedia.py | 183 ++++++++++++++++++++++++++-------- 1 file changed, 141 insertions(+), 42 deletions(-) diff --git a/nomad/app/api/encyclopedia.py b/nomad/app/api/encyclopedia.py index 5d8ce72432..db80119272 100644 --- a/nomad/app/api/encyclopedia.py +++ b/nomad/app/api/encyclopedia.py @@ -22,8 +22,10 @@ from typing import List, Dict from flask_restplus import Resource, abort, fields, marshal from flask import request from elasticsearch_dsl import Search, Q, A +from elasticsearch_dsl.utils import AttrDict from nomad import config, files +from nomad.archive import ArchiveObject from nomad.units import ureg from nomad.metainfo import MSection from nomad.atomutils import get_hill_decomposition @@ -570,6 +572,10 @@ class EncGroupsResource(Resource): return result, 200 +suggestions_map = { + "code_name": "dft.code_name", + "structure_type": "encyclopedia.material.bulk.structure_type", +} suggestions_query = api.parser() suggestions_query.add_argument( "property", @@ -598,7 +604,31 @@ class EncSuggestionsResource(Resource): args = suggestions_query.parse_args() prop = args.get("property", None) - return {prop: []}, 200 + # Use aggregation to return all unique terms for the requested field. + # Without using composite aggregations there is a size limit for the + # number of aggregation buckets. This should, however, not be a problem + # since the number of unique values is low for all supported properties. + s = Search(index=config.elastic.index_name) + query = Q( + "bool", + filter=[ + Q("term", published=True), + Q("term", with_embargo=False), + ] + ) + s = s.query(query) + s = s.extra(**{ + "size": 0, + }) + + terms_agg = A("terms", field=suggestions_map[prop]) + s.aggs.bucket("suggestions", terms_agg) + + # Gather unique values into a list + response = s.execute() + suggestions = [x.key for x in response.aggs.suggestions.buckets] + + return {prop: suggestions}, 200 calcs_query = api.parser() @@ -922,16 +952,72 @@ class EncIdealizedStructureResource(Resource): return idealized_structure, 200 +calculation_property_map = { + "lattice_parameters": { + "es_source": "encyclopedia.material.idealized_structure.lattice_parameters" + }, + "energies": { + "es_source": "encyclopedia.properties.energies", + }, + "mass_density": { + "es_source": "encyclopedia.properties.mass_density", + }, + "atomic_density": { + "es_source": "encyclopedia.properties.atomic_density", + }, + "cell_volume": { + "es_source": "encyclopedia.material.idealized_structure.cell_volume" + }, + "electronic_band_structure": { + "es_source": "encyclopedia.properties.electronic_band_structure" + }, + "electronic_dos": { + "es_source": "encyclopedia.properties.electronic_dos" + }, + "wyckoff_sets": { + "arch_source": "section_metadata/encyclopedia/material/idealized_structure/wyckoff_sets" + }, +} + +calculation_property_query = api.model("calculation_query", { + "properties": fields.List(fields.String), +}) +energies = api.model("energies", { + "energy_total": fields.Float, + "energy_total_T0": fields.Float, + "energy_free": fields.Float, +}) +calculation_property_result = api.model("calculation_query", { + "lattice_parameters": fields.Nested(lattice_parameters), + "energies": fields.Nested(energies), + "mass_density": fields.Float, + "atomic_density": fields.Float, + "cell_volume": fields.Float, + "wyckoff_sets": fields.Nested(wyckoff_set_result), + # "electronic_band_structure": fields.Nested(electronic_band_structure), + # "electronic_dos": fields.Nested(electronic_dos), +}) + + @ns.route("/materials/<string:material_id>/calculations/<string:calc_id>") class EncCalculationResource(Resource): @api.response(404, "Material or calculation not found") @api.response(400, "Bad request") @api.response(200, "Metadata send", fields.Raw) + @api.expect(calculation_property_query, validate=False) + @api.marshal_with(calculation_property_result, skip_none=True) @api.doc("enc_calculation") - def get(self, material_id, calc_id): - """Used to return calculation details that are not available in the ES - index and are instead read from the Archive directly. + def post(self, material_id, calc_id): + """Used to return calculation details. Some properties are not + available in the ES index and are instead read from the Archive + directly. """ + # Get query parameters as json + try: + data = marshal(request.get_json(), calculation_property_query) + except Exception as e: + abort(400, message=str(e)) + s = Search(index=config.elastic.index_name) query = Q( "bool", @@ -944,16 +1030,30 @@ class EncCalculationResource(Resource): ) s = s.query(query) + # Create dictionaries for requested properties + properties = data["properties"] + arch_properties = {} + es_properties = {} + for prop in properties: + es_source = calculation_property_map[prop].get("es_source") + if es_source is not None: + es_properties[prop] = es_source + arch_source = calculation_property_map[prop].get("arch_source") + if arch_source is not None: + arch_properties[prop] = arch_source + # The query is filtered already on the ES side so we don"t need to # transfer so much data. + sources = [ + "upload_id", + "calc_id", + "encyclopedia.material.material_type", + "encyclopedia.material.bulk.has_free_wyckoff_parameters" + ] + sources += list(es_properties.values()) + s = s.extra(**{ - "_source": {"includes": [ - "upload_id", - "calc_id", - "encyclopedia.properties", - "encyclopedia.material.material_type", - "encyclopedia.material.bulk.has_free_wyckoff_parameters" - ]}, + "_source": {"includes": sources}, "size": 1, }) @@ -963,36 +1063,35 @@ class EncCalculationResource(Resource): if len(response) == 0: abort(404, message="There is no material {} with calculation {}".format(material_id, calc_id)) - # Read the idealized_structure from the Archive. The structure can be - # quite large and no direct search queries are performed against it, so - # it is not in the ES index. - entry = response[0] - upload_id = entry.upload_id - calc_id = entry.calc_id - paths = ['section_metadata/encyclopedia/material/idealized_structure'] - data = read_archive( - upload_id, - calc_id, - paths, - ) - - # Read the lattice parameters - ideal_struct = data['section_metadata/encyclopedia/material/idealized_structure'] - - # Final result - result = { - "lattice_parameters": ideal_struct["lattice_parameters"], - "energies": entry.encyclopedia.properties.energies.to_dict(), - "mass_density": entry.encyclopedia.properties.mass_density, - "atomic_density": entry.encyclopedia.properties.atomic_density, - "cell_volume": ideal_struct["cell_volume"], - } - - # Return full Wyckoff position information for bulk structures with - # free Wyckoff parameters - if entry.encyclopedia.material.material_type == "bulk": - if entry.encyclopedia.material.bulk.has_free_wyckoff_parameters: - result["wyckoff_sets"] = ideal_struct["wyckoff_sets"] + # If any of the requested properties require data from the Archive, the + # file is opened and read. + result = {} + if len(arch_properties) != 0: + arch_paths = set(arch_properties.values()) + entry = response[0] + upload_id = entry.upload_id + calc_id = entry.calc_id + data = read_archive( + upload_id, + calc_id, + arch_paths, + ) + + # Add results from archive + for key, value in arch_properties.items(): + value = data[value] + result[key] = value + + # Add results from ES + for prop in properties: + es_source = calculation_property_map[prop].get("es_source") + if es_source is not None: + value = response[0] + for attr in es_source.split("."): + value = value[attr] + if isinstance(value, AttrDict): + value = value.to_dict() + result[prop] = value return result, 200 @@ -1020,7 +1119,7 @@ def read_archive(upload_id: str, calc_id: str, paths: List[str]) -> Dict[str, MS parts = path.split("/") for part in parts: data = data[part] - if not isinstance(data, dict): + if isinstance(data, ArchiveObject): data = data.to_dict() result[path] = data -- GitLab