Commit 0d16e374 authored by Lauri Himanen's avatar Lauri Himanen
Browse files

Now the /materials route behaves exactly like the old one.

parent da904b62
Pipeline #77051 passed with stages
in 23 minutes and 4 seconds
......@@ -16,6 +16,7 @@
The encyclopedia API of the nomad@FAIRDI APIs.
"""
import re
import math
import numpy as np
from flask_restplus import Resource, abort, fields, marshal
......@@ -367,6 +368,11 @@ class EncMaterialsResource(Resource):
s = Search(index=config.elastic.index_name)
s = s.query(bool_query)
# 1: The paginated approach: No way to know the amount of materials,
# but can return aggregation results in a quick fashion including
# the number of calculation entries per material.
mode = "collapse"
if mode == "aggregation":
# The materials are grouped by using three aggregations:
# "Composite" to enable scrolling, "Terms" to enable selecting
# by material_id and "Top Hits" to fetch a single
......@@ -391,7 +397,7 @@ class EncMaterialsResource(Resource):
))
s.aggs.bucket("materials", composite_agg)
# We ignore the top level hits
# We ignore the top level hits and sort by reduced material formula.
s = s.extra(**{
"size": 0,
})
......@@ -418,11 +424,66 @@ class EncMaterialsResource(Resource):
"per_page": per_page,
"after": after_new,
}
if after is None:
n_materials = response.aggs.n_materials.value
pages["total"] = n_materials
# 2. Collapse approach. Quickly provides a list of materials
# corresponding to the query, offers full pagination, doesn"t include
# the number of matches per material.
elif mode == "collapse":
s = Search(index=config.elastic.index_name)
s = s.query(bool_query)
# Add cardinality aggregation that gives out the total number of materials
cardinality_agg = A("cardinality", field="encyclopedia.material.material_id", precision_threshold=1000)
s.aggs.metric("n_materials", cardinality_agg)
s = s.extra(**{
"collapse": {"field": "encyclopedia.material.material_id"},
"size": per_page,
"from": (page - 1) * per_page,
"sort": [{"encyclopedia.material.formula_reduced": {"order": "asc"}}],
"explain": True,
})
# Execute query
response = s.execute()
# No matches
if len(response) == 0:
abort(404, message="No materials found for the given search criteria or pagination.")
# Gather number of entries per material with a separate query
material_ids = [x.encyclopedia.material.material_id for x in response]
s = Search(index=config.elastic.index_name)
bool_query = Q(
"bool",
filter=Q("terms", encyclopedia__material__material_id=material_ids),
)
s2 = s.query(bool_query)
s2.aggs.bucket("n_matches", A("terms", field="encyclopedia.material.material_id"))
response2 = s2.execute()
matmap = {x.key: x.doc_count for x in response2.aggs.n_matches}
# Loop over materials
result_list = []
keys = list(material_prop_map.keys())
for material in response:
# Get values from the collapsed doc
mat_result = get_es_doc_values(material, material_prop_map, keys)
mat_id = material.encyclopedia.material.material_id
mat_result["n_matches"] = matmap[mat_id]
result_list.append(mat_result)
# Full page information available for collapse
pages = {
"page": page,
"per_page": per_page,
"pages": math.ceil(response.hits.total / per_page),
"total": response.aggs.n_materials.value,
}
result = {
"results": result_list,
"pages": pages,
......@@ -1134,8 +1195,8 @@ class EncCalculationResource(Resource):
# Pre-calculate k-path length to be used as x-coordinate in
# plots. If the VBM and CBM information is needed later, it
# can be added as indices along the path. The exact
# k-points and occupations are removed to save band width.
# can be added as indices along the path. The exact k-points
# and occupations are removed to save some bandwidth.
if key == "electronic_band_structure" or key == "phonon_band_structure":
segments = value["section_k_band_segment"]
k_path_length = 0
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment