Commit 30ec7560 authored by Lauri Himanen's avatar Lauri Himanen
Browse files

Separated the groups route into two: one for getting group summary for a...

Separated the groups route into two: one for getting group summary for a material and other for getting the froup EOS information for a specific group.
parent f412a94a
Pipeline #76034 failed with stages
in 28 minutes and 3 seconds
...@@ -453,27 +453,10 @@ class EncMaterialsResource(Resource): ...@@ -453,27 +453,10 @@ class EncMaterialsResource(Resource):
return result, 200 return result, 200
group_result = api.model("group_result", {
"calculations": fields.List(fields.String),
"energies": fields.List(fields.Float),
"volumes": fields.List(fields.Float),
"energy_minimum": fields.Float,
"group_hash": fields.String,
"group_type": fields.String,
"nr_of_calculations": fields.Integer,
"representative_calc_id": fields.String,
})
groups_result = api.model("groups_result", { groups_result = api.model("groups_result", {
"total_groups": fields.Integer(allow_null=False), "groups_eos": fields.Raw,
"groups": fields.List(fields.Nested(group_result)), "groups_par": fields.Raw,
}) })
group_source = {
"includes": [
"calc_id",
"encyclopedia.properties.energies.energy_total",
"encyclopedia.material.idealized_structure.cell_volume",
]
}
@ns.route("/materials/<string:material_id>/groups") @ns.route("/materials/<string:material_id>/groups")
...@@ -481,11 +464,12 @@ class EncGroupsResource(Resource): ...@@ -481,11 +464,12 @@ class EncGroupsResource(Resource):
@api.response(404, "Material not found") @api.response(404, "Material not found")
@api.response(400, "Bad request") @api.response(400, "Bad request")
@api.response(200, "Metadata send", fields.Raw) @api.response(200, "Metadata send", fields.Raw)
@api.expect(material_query, validate=False)
@api.marshal_with(groups_result) @api.marshal_with(groups_result)
@api.doc("enc_materials") @api.doc("enc_materials")
def get(self, material_id): def get(self, material_id):
"""Returns a summary of the calculation groups that were identified for
this material.
"""
# Find entries for the given material, which have EOS or parameter # Find entries for the given material, which have EOS or parameter
# variation hashes set. # variation hashes set.
bool_query = Q( bool_query = Q(
...@@ -500,8 +484,8 @@ class EncGroupsResource(Resource): ...@@ -500,8 +484,8 @@ class EncGroupsResource(Resource):
Q("exists", field="encyclopedia.material.idealized_structure.cell_volume"), Q("exists", field="encyclopedia.material.idealized_structure.cell_volume"),
], ],
should=[ should=[
Q("exists", field="encyclopedia.method.group_eos_hash"), Q("exists", field="encyclopedia.method.group_eos_id"),
Q("exists", field="encyclopedia.method.group_parametervariation_hash"), Q("exists", field="encyclopedia.method.group_parametervariation_id"),
], ],
minimum_should_match=1, # At least one of the should query must match minimum_should_match=1, # At least one of the should query must match
) )
...@@ -511,8 +495,83 @@ class EncGroupsResource(Resource): ...@@ -511,8 +495,83 @@ class EncGroupsResource(Resource):
# Bucket the calculations by the group hashes. Only create a bucket if an # Bucket the calculations by the group hashes. Only create a bucket if an
# above-minimum number of documents are found. # above-minimum number of documents are found.
group_eos_bucket = A("terms", field="encyclopedia.method.group_eos_hash", min_doc_count=4) group_eos_bucket = A("terms", field="encyclopedia.method.group_eos_id", min_doc_count=4)
group_param_bucket = A("terms", field="encyclopedia.method.group_parametervariation_hash", min_doc_count=2) group_param_bucket = A("terms", field="encyclopedia.method.group_parametervariation_id", min_doc_count=2)
calc_aggregation = A(
"top_hits",
_source={"includes": ["calc_id"]},
sort=[{"encyclopedia.properties.energies.energy_total": {"order": "asc"}}],
size=100,
)
group_eos_bucket.bucket("calculations", calc_aggregation)
group_param_bucket.bucket("calculations", calc_aggregation)
s.aggs.bucket("groups_eos", group_eos_bucket)
s.aggs.bucket("groups_param", group_param_bucket)
# We ignore the top level hits
s = s.extra(**{
"size": 0,
})
# Collect information for each group from the aggregations
response = s.execute()
groups_eos = {group.key: [calc.calc_id for calc in group.calculations.hits] for group in response.aggs.groups_eos.buckets}
groups_param = {group.key: [calc.calc_id for calc in group.calculations.hits] for group in response.aggs.groups_param.buckets}
# Return results
result = {
"groups_eos": groups_eos,
"groups_par": groups_param,
}
return result, 200
group_result = api.model("group_result", {
"calculations": fields.List(fields.String),
"energies": fields.List(fields.Float),
"volumes": fields.List(fields.Float),
})
group_source = {
"includes": [
"calc_id",
"encyclopedia.properties.energies.energy_total",
"encyclopedia.material.idealized_structure.cell_volume",
]
}
@ns.route("/materials/<string:material_id>/groups/<string:group_type>/<string:group_id>")
class EncGroupResource(Resource):
@api.response(404, "Group not found")
@api.response(400, "Bad request")
@api.response(200, "Metadata send", fields.Raw)
@api.marshal_with(group_result)
@api.doc("enc_group")
def get(self, material_id, group_type, group_id):
"""Used to query detailed information for a specific calculation group.
"""
# Find entries for the given material, which have EOS or parameter
# variation hashes set.
if group_type == "eos":
group_id_source = "encyclopedia.method.group_eos_id"
elif group_type == "par":
group_id_source = "encyclopedia.method.group_parametervariation_id"
else:
abort(400, message="Unsupported group type.")
bool_query = Q(
"bool",
filter=[
Q("term", published=True),
Q("term", with_embargo=False),
Q("term", encyclopedia__material__material_id=material_id),
Q("term", **{group_id_source: group_id}),
],
)
s = Search(index=config.elastic.index_name)
s = s.query(bool_query)
# calc_id and energy should be extracted for each matched document. The # calc_id and energy should be extracted for each matched document. The
# documents are sorted by energy so that the minimum energy one can be # documents are sorted by energy so that the minimum energy one can be
...@@ -526,52 +585,27 @@ class EncGroupsResource(Resource): ...@@ -526,52 +585,27 @@ class EncGroupsResource(Resource):
sort=[{"encyclopedia.properties.energies.energy_total": {"order": "asc"}}], sort=[{"encyclopedia.properties.energies.energy_total": {"order": "asc"}}],
size=100, size=100,
) )
group_eos_bucket.bucket("energies", energy_aggregation) s.aggs.bucket("groups_eos", energy_aggregation)
group_param_bucket.bucket("energies", energy_aggregation)
s.aggs.bucket("groups_eos", group_eos_bucket)
s.aggs.bucket("groups_param", group_param_bucket)
# We ignore the top level hits # We ignore the top level hits
s = s.extra(**{ s = s.extra(**{
"size": 0, "size": 0,
}) })
# No hits on the top query level
response = s.execute()
groups = []
# Collect information for each group from the aggregations # Collect information for each group from the aggregations
groups_eos = response.aggs.groups_eos.buckets response = s.execute()
groups_param = response.aggs.groups_param.buckets
def get_group(group, group_type, group_hash):
hits = group.energies.hits
calculations = [doc.calc_id for doc in hits]
energies = [doc.encyclopedia.properties.energies.energy_total for doc in hits]
volumes = [doc.encyclopedia.material.idealized_structure.cell_volume for doc in hits]
group_dict = {
"group_hash": group_hash,
"group_type": group_type,
"nr_of_calculations": len(calculations),
"representative_calc_id": hits[0].calc_id,
"calculations": calculations,
"energies": energies,
"volumes": volumes,
"energy_minimum": hits[0].encyclopedia.properties.energies.energy_total,
}
return group_dict
for group in groups_eos:
groups.append(get_group(group, "equation of state", group.key))
for group in groups_param:
groups.append(get_group(group, "parameter variation", group.key))
# Return results hits = response.aggs.groups_eos.hits
result = { calculations = [doc.calc_id for doc in hits]
"groups": groups, energies = [doc.encyclopedia.properties.energies.energy_total for doc in hits]
"total_groups": len(groups), volumes = [doc.encyclopedia.material.idealized_structure.cell_volume for doc in hits]
group_dict = {
"calculations": calculations,
"energies": energies,
"volumes": volumes,
} }
return result, 200
return group_dict, 200
suggestions_map = { suggestions_map = {
......
...@@ -404,7 +404,7 @@ class Method(MSection): ...@@ -404,7 +404,7 @@ class Method(MSection):
""", """,
a_search=Search() a_search=Search()
) )
method_hash = Quantity( method_id = Quantity(
type=str, type=str,
description=""" description="""
A fixed length, unique method identifier in the form of a hash digest. A fixed length, unique method identifier in the form of a hash digest.
...@@ -413,7 +413,7 @@ class Method(MSection): ...@@ -413,7 +413,7 @@ class Method(MSection):
for the used program. for the used program.
""" """
) )
group_eos_hash = Quantity( group_eos_id = Quantity(
type=str, type=str,
description=""" description="""
A fixed length, unique identifier for equation-of-state calculations. A fixed length, unique identifier for equation-of-state calculations.
...@@ -422,7 +422,7 @@ class Method(MSection): ...@@ -422,7 +422,7 @@ class Method(MSection):
""", """,
a_search=Search() a_search=Search()
) )
group_parametervariation_hash = Quantity( group_parametervariation_id = Quantity(
type=str, type=str,
description=""" description="""
A fixed length, unique identifier for calculations where structure is A fixed length, unique identifier for calculations where structure is
......
...@@ -38,7 +38,7 @@ class MethodNormalizer(): ...@@ -38,7 +38,7 @@ class MethodNormalizer():
self.logger = logger self.logger = logger
self.section_run = backend.entry_archive.section_run[0] self.section_run = backend.entry_archive.section_run[0]
def method_hash(self, method: Method, settings_basis_set: RestrictedDict, repr_method: Section): def method_id(self, method: Method, settings_basis_set: RestrictedDict, repr_method: Section):
method_dict = RestrictedDict( method_dict = RestrictedDict(
mandatory_keys=[ mandatory_keys=[
"program_name", "program_name",
...@@ -50,7 +50,7 @@ class MethodNormalizer(): ...@@ -50,7 +50,7 @@ class MethodNormalizer():
# The subclasses may define their own method properties that are to be # The subclasses may define their own method properties that are to be
# included here. # included here.
subsettings = self.method_hash_dict(method, settings_basis_set, repr_method) subsettings = self.method_id_dict(method, settings_basis_set, repr_method)
method_dict["subsettings"] = subsettings method_dict["subsettings"] = subsettings
# If all required information is present, safe the hash # If all required information is present, safe the hash
...@@ -59,17 +59,17 @@ class MethodNormalizer(): ...@@ -59,17 +59,17 @@ class MethodNormalizer():
except (KeyError, ValueError) as e: except (KeyError, ValueError) as e:
self.logger.info("Could not create method hash: {}".format(e)) self.logger.info("Could not create method hash: {}".format(e))
else: else:
method.method_hash = method_dict.hash() method.method_id = method_dict.hash()
@abstractmethod @abstractmethod
def method_hash_dict(self, method: Method, settings_basis_set: RestrictedDict, repr_method: Section) -> RestrictedDict: def method_id_dict(self, method: Method, settings_basis_set: RestrictedDict, repr_method: Section) -> RestrictedDict:
pass pass
def group_eos_hash(self, method: Method, material: Material, repr_method: Section): def group_eos_id(self, method: Method, material: Material, repr_method: Section):
eos_dict = RestrictedDict( eos_dict = RestrictedDict(
mandatory_keys=[ mandatory_keys=[
"upload_id", "upload_id",
"method_hash", "method_id",
"formula", "formula",
], ],
forbidden_values=[None] forbidden_values=[None]
...@@ -79,7 +79,7 @@ class MethodNormalizer(): ...@@ -79,7 +79,7 @@ class MethodNormalizer():
eos_dict['upload_id'] = self.backend.entry_archive.section_metadata.upload_id eos_dict['upload_id'] = self.backend.entry_archive.section_metadata.upload_id
# Method # Method
eos_dict["method_hash"] = method.method_hash eos_dict["method_id"] = method.method_id
# The formula should be same for EoS (maybe even symmetries) # The formula should be same for EoS (maybe even symmetries)
eos_dict["formula"] = material.formula eos_dict["formula"] = material.formula
...@@ -90,9 +90,9 @@ class MethodNormalizer(): ...@@ -90,9 +90,9 @@ class MethodNormalizer():
except (KeyError, ValueError) as e: except (KeyError, ValueError) as e:
self.logger.info("Could not create EOS hash: {}".format(e)) self.logger.info("Could not create EOS hash: {}".format(e))
else: else:
method.group_eos_hash = eos_dict.hash() method.group_eos_id = eos_dict.hash()
def group_parametervariation_hash(self, method: Method, settings_basis_set: RestrictedDict, repr_system: Section, repr_method: Section): def group_parametervariation_id(self, method: Method, settings_basis_set: RestrictedDict, repr_system: Section, repr_method: Section):
# Create ordered dictionary with the values. Order is important for # Create ordered dictionary with the values. Order is important for
param_dict = RestrictedDict( param_dict = RestrictedDict(
mandatory_keys=[ mandatory_keys=[
...@@ -134,7 +134,7 @@ class MethodNormalizer(): ...@@ -134,7 +134,7 @@ class MethodNormalizer():
# The subclasses may define their own method properties that are to be # The subclasses may define their own method properties that are to be
# included here. # included here.
subsettings = self.group_parametervariation_hash_dict(method, settings_basis_set, repr_method) subsettings = self.group_parametervariation_id_dict(method, settings_basis_set, repr_method)
param_dict["subsettings"] = subsettings param_dict["subsettings"] = subsettings
# Form a hash from the dictionary # Form a hash from the dictionary
...@@ -143,10 +143,10 @@ class MethodNormalizer(): ...@@ -143,10 +143,10 @@ class MethodNormalizer():
except (KeyError, ValueError) as e: except (KeyError, ValueError) as e:
self.logger.info("Could not create parameter variation hash: {}".format(e)) self.logger.info("Could not create parameter variation hash: {}".format(e))
else: else:
method.group_parametervariation_hash = param_dict.hash() method.group_parametervariation_id = param_dict.hash()
@abstractmethod @abstractmethod
def group_parametervariation_hash_dict(self, method: Method, settings_basis_set: RestrictedDict, repr_method: Section) -> RestrictedDict: def group_parametervariation_id_dict(self, method: Method, settings_basis_set: RestrictedDict, repr_method: Section) -> RestrictedDict:
pass pass
def group_e_min(self) -> None: def group_e_min(self) -> None:
...@@ -241,7 +241,7 @@ class MethodDFTNormalizer(MethodNormalizer): ...@@ -241,7 +241,7 @@ class MethodDFTNormalizer(MethodNormalizer):
short_name = self.create_xc_functional_shortname(long_name) short_name = self.create_xc_functional_shortname(long_name)
method.functional_type = short_name method.functional_type = short_name
def method_hash_dict(self, method: Method, settings_basis_set: RestrictedDict, repr_method: Section) -> RestrictedDict: def method_id_dict(self, method: Method, settings_basis_set: RestrictedDict, repr_method: Section) -> RestrictedDict:
# Extend by DFT settings. # Extend by DFT settings.
hash_dict = RestrictedDict( hash_dict = RestrictedDict(
mandatory_keys=( mandatory_keys=(
...@@ -292,7 +292,7 @@ class MethodDFTNormalizer(MethodNormalizer): ...@@ -292,7 +292,7 @@ class MethodDFTNormalizer(MethodNormalizer):
return hash_dict return hash_dict
def group_parametervariation_hash_dict(self, method: Method, settings_basis_set: RestrictedDict, repr_method: Section): def group_parametervariation_id_dict(self, method: Method, settings_basis_set: RestrictedDict, repr_method: Section):
"""Dictionary containing the parameters used for convergence test """Dictionary containing the parameters used for convergence test
grouping grouping
This is the source for generating the related hash.""" This is the source for generating the related hash."""
...@@ -391,9 +391,9 @@ class MethodDFTNormalizer(MethodNormalizer): ...@@ -391,9 +391,9 @@ class MethodDFTNormalizer(MethodNormalizer):
self.core_electron_treatment(method) self.core_electron_treatment(method)
self.functional_long_name(method, repr_method) self.functional_long_name(method, repr_method)
self.functional_type(method) self.functional_type(method)
self.method_hash(method, settings_basis_set, repr_method) self.method_id(method, settings_basis_set, repr_method)
self.group_eos_hash(method, material, repr_method) self.group_eos_id(method, material, repr_method)
self.group_parametervariation_hash(method, settings_basis_set, repr_system, repr_method) self.group_parametervariation_id(method, settings_basis_set, repr_system, repr_method)
class MethodGWNormalizer(MethodDFTNormalizer): class MethodGWNormalizer(MethodDFTNormalizer):
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment