Commit d65328f1 authored by Lauri Himanen's avatar Lauri Himanen Committed by Markus Scheidgen
Browse files

Fixes issues with the Encyclopedia API parameter/response models (fixes #443),...

Fixes issues with the Encyclopedia API parameter/response models (fixes #443),  improved the materials indexing command.
parent b8277c4a
...@@ -240,7 +240,8 @@ def get_es_doc_values(es_doc, mapping, keys=None): ...@@ -240,7 +240,8 @@ def get_es_doc_values(es_doc, mapping, keys=None):
for key in keys: for key in keys:
es_key = mapping[key] es_key = mapping[key]
value = rgetattr(es_doc, es_key) value = rgetattr(es_doc, es_key)
result[key] = value if value is not None:
result[key] = value
return result return result
...@@ -326,10 +327,9 @@ material_result = api.model("material_result", { ...@@ -326,10 +327,9 @@ material_result = api.model("material_result", {
class EncMaterialResource(Resource): class EncMaterialResource(Resource):
@api.response(404, "The material does not exist") @api.response(404, "The material does not exist")
@api.response(200, "Metadata send", fields.Raw) @api.response(200, "Metadata send", fields.Raw)
@api.doc("get_material") @api.doc("get_material", params={"material_id": "28 character identifier for the material."})
@api.expect(material_query) @api.expect(material_query)
@api.marshal_with(material_result, skip_none=True) @api.marshal_with(material_result, skip_none=True)
@api.param("material_id", "28 character identifier for the material.")
@authenticate() @authenticate()
def get(self, material_id): def get(self, material_id):
"""Used to retrieve basic information related to a material. """Used to retrieve basic information related to a material.
...@@ -610,8 +610,7 @@ class EncGroupsResource(Resource): ...@@ -610,8 +610,7 @@ class EncGroupsResource(Resource):
@api.response(400, "Bad request") @api.response(400, "Bad request")
@api.response(200, "OK", groups_result) @api.response(200, "OK", groups_result)
@api.marshal_with(groups_result) @api.marshal_with(groups_result)
@api.doc("get_material_groups") @api.doc("get_material_groups", params={"material_id": "28 character identifier for the material."})
@api.param("material_id", "28 character identifier for the material.")
@authenticate() @authenticate()
def get(self, material_id): def get(self, material_id):
"""Returns a summary of the calculation groups that were identified for this material. """Returns a summary of the calculation groups that were identified for this material.
...@@ -695,10 +694,11 @@ class EncGroupResource(Resource): ...@@ -695,10 +694,11 @@ class EncGroupResource(Resource):
@api.response(400, "Bad request") @api.response(400, "Bad request")
@api.response(200, "OK", group_result) @api.response(200, "OK", group_result)
@api.marshal_with(group_result) @api.marshal_with(group_result)
@api.doc("get_material_group") @api.doc("get_material_group", params={
@api.param("group_type", "Type of group. Valid options are: 'eos' and 'par'.") "material_id": "28 character identifier for the material.",
@api.param("group_id", "28 character identifier for the group.") "group_type": "Type of group. Valid options are: 'eos' and 'par'.",
@api.param("material_id", "28 character identifier for the material.") "group_id": "28 character identifier for the group.",
})
@authenticate() @authenticate()
def get(self, material_id, group_type, group_id): def get(self, material_id, group_type, group_id):
"""Used to query detailed information about a specific calculation group. """Used to query detailed information about a specific calculation group.
...@@ -783,8 +783,8 @@ calculation_result = api.model("calculation_result", { ...@@ -783,8 +783,8 @@ calculation_result = api.model("calculation_result", {
"code_version": fields.String, "code_version": fields.String,
"functional_type": fields.String, "functional_type": fields.String,
"basis_set_type": fields.String, "basis_set_type": fields.String,
"core_electron_treatment": fields.String, "core_electron_treatment": fields.String(default="unavailable"),
"run_type": fields.String, "run_type": fields.String(default="unavailable"),
"has_dos": fields.Boolean, "has_dos": fields.Boolean,
"has_band_structure": fields.Boolean, "has_band_structure": fields.Boolean,
"has_thermal_properties": fields.Boolean, "has_thermal_properties": fields.Boolean,
...@@ -797,7 +797,6 @@ representatives_result = api.model("representatives_result", { ...@@ -797,7 +797,6 @@ representatives_result = api.model("representatives_result", {
}) })
calculations_result = api.model("calculations_result", { calculations_result = api.model("calculations_result", {
"total_results": fields.Integer, "total_results": fields.Integer,
"pages": fields.Nested(pages_result),
"results": fields.List(fields.Nested(calculation_result)), "results": fields.List(fields.Nested(calculation_result)),
"representatives": fields.Nested(representatives_result, skip_none=True), "representatives": fields.Nested(representatives_result, skip_none=True),
}) })
...@@ -809,6 +808,7 @@ class EncCalculationsResource(Resource): ...@@ -809,6 +808,7 @@ class EncCalculationsResource(Resource):
@api.response(400, "Bad request") @api.response(400, "Bad request")
@api.response(200, "OK", calculations_result) @api.response(200, "OK", calculations_result)
@api.doc("get_material_calculations") @api.doc("get_material_calculations")
@api.marshal_with(calculations_result)
@authenticate() @authenticate()
def get(self, material_id): def get(self, material_id):
"""Used to return information about all calculations related to the given material. """Used to return information about all calculations related to the given material.
...@@ -947,8 +947,7 @@ class EncStatisticsResource(Resource): ...@@ -947,8 +947,7 @@ class EncStatisticsResource(Resource):
@api.response(200, "OK", statistics_result) @api.response(200, "OK", statistics_result)
@api.expect(statistics_query, validate=False) @api.expect(statistics_query, validate=False)
@api.marshal_with(statistics_result, skip_none=True) @api.marshal_with(statistics_result, skip_none=True)
@api.doc("get_material_statistics") @api.doc("get_material_statistics", params={"material_id": "28 character identifier for the material."})
@api.param("material_id", "28 character identifier for the material.")
@authenticate() @authenticate()
def post(self, material_id): def post(self, material_id):
"""Used to return statistics related to the specified material and """Used to return statistics related to the specified material and
...@@ -1403,8 +1402,7 @@ class ReportsResource(Resource): ...@@ -1403,8 +1402,7 @@ class ReportsResource(Resource):
@api.response(400, "Bad request") @api.response(400, "Bad request")
@api.response(204, "Report succesfully sent") @api.response(204, "Report succesfully sent")
@api.expect(report_query) @api.expect(report_query)
@api.doc("post_material_report") @api.doc("post_material_report", params={"material_id": "28 character identifier for the material."})
@api.param("material_id", "28 character identifier for the material.")
@authenticate(required=True) @authenticate(required=True)
def post(self, material_id): def post(self, material_id):
"""Post an error report on a material. Requires authentication. """Post an error report on a material. Requires authentication.
......
...@@ -241,15 +241,47 @@ def index(threads, dry): ...@@ -241,15 +241,47 @@ def index(threads, dry):
def index_materials(threads, code, dry, in_place, n, source): def index_materials(threads, code, dry, in_place, n, source):
"""(Re-)index all materials. """(Re-)index all materials.
This command will completely rebuild the materials index. The index is This command will is used to completely rebuild the materials index or
built from the material metainfo stored in MongoDB. The materials index can parts of it. You can choose which source is used for building the index: In
be used normally during the reindexing. general ElasticSearch is preferred, but if it is not available also MongoDB
can be used.
By default this command will start to rebuild the index from scratch. This
can be done on a "live" system because a new temporary index is used. If
you use the --in-place option, the indexing will be run on the same index
that is currently in use.
""" """
from nomad.datamodel.material import Material, Calculation from nomad.datamodel.material import Material, Calculation
from nomad.datamodel.encyclopedia import EncyclopediaMetadata from nomad.datamodel.encyclopedia import EncyclopediaMetadata
from nomad.search import material_document from nomad.search import material_document
from nomad.datamodel.material import Material, Calculation, Method, Properties, IdealizedStructure, Energies, Workflow, Bulk from nomad.datamodel.material import Material, Calculation, Method, Properties, IdealizedStructure, Energies, Workflow, Bulk
def create_entry(material, calc, in_place):
"""Creates an ES update operation that inserts the full material info
if entry does not exists, otherwise only adds the calculation into the
nested subdocument, possibly replacing the old one if in_place is True.
"""
entry = {}
entry['_op_type'] = 'update'
entry['_index'] = target_index_name
entry['_id'] = material.material_id
entry['_type'] = 'doc'
entry['_source'] = {
"upsert": material.m_to_dict(include_defaults=False, partial="es"),
"doc_as_upsert": False,
"script": {
"params": {
"calc": calc.m_to_dict(include_defaults=False, partial="es")
},
}
}
if in_place:
entry['_source']["script"]["source"] = "ctx._source.calculations.removeIf(x -> x.calc_id == params.calc.calc_id); ctx._source.calculations.add(params.calc)"
else:
entry['_source']["script"]["source"] = "ctx._source.calculations.add(params.calc)"
return entry
chunk_size = 500 chunk_size = 500
infrastructure.setup_mongo() infrastructure.setup_mongo()
client = infrastructure.setup_elastic() client = infrastructure.setup_elastic()
...@@ -271,16 +303,20 @@ def index_materials(threads, code, dry, in_place, n, source): ...@@ -271,16 +303,20 @@ def index_materials(threads, code, dry, in_place, n, source):
) )
if source == "mongo": if source == "mongo":
all_calcs = proc.Calc.objects().count() mongo_db = infrastructure.mongo_client[config.mongo.db_name]
mongo_collection = mongo_db['archive']
if code:
collection = mongo_collection.find({"section_metadata.dft.code_name": {"$in": code}})
else:
collection = mongo_collection.find()
all_calcs = collection.count()
print('indexing materials from %d calculations ...' % all_calcs) print('indexing materials from %d calculations ...' % all_calcs)
# Bulk update # Bulk update
def elastic_updates(): def elastic_updates():
with utils.ETA(all_calcs, ' index %10d of %10d calcs, ETA %s') as eta: with utils.ETA(all_calcs, ' index %10d of %10d calcs, ETA %s') as eta:
mongo_db = infrastructure.mongo_client[config.mongo.db_name]
mongo_collection = mongo_db['archive']
i_calc = 0 i_calc = 0
for mongo_archive in mongo_collection.find(): for mongo_archive in collection:
i_calc += 1 i_calc += 1
if n is not None: if n is not None:
if i_calc > n: if i_calc > n:
...@@ -371,24 +407,10 @@ def index_materials(threads, code, dry, in_place, n, source): ...@@ -371,24 +407,10 @@ def index_materials(threads, code, dry, in_place, n, source):
material.m_add_sub_section(Material.calculations, calc) material.m_add_sub_section(Material.calculations, calc)
# Update entry that inserts the full material info if entry # Update entry that inserts the full material info if entry
# does not exists, otherwise only adds the calculation into the # does not exists, otherwise only adds the calculation into
# nested subdocument # the nested subdocument
entry = {} yield create_entry(material, calc, in_place)
entry['_op_type'] = 'update'
entry['_index'] = target_index_name
entry['_id'] = material.material_id
entry['_type'] = 'doc'
entry['_source'] = {
"upsert": material.m_to_dict(include_defaults=False, partial="es"),
"doc_as_upsert": False,
"script": {
"source": "ctx._source.calculations.add(params.calc)",
"params": {
"calc": calc.m_to_dict(include_defaults=False, partial="es")
},
}
}
yield entry
elif source == "es": elif source == "es":
s = elasticsearch_dsl.Search(index=config.elastic.index_name) s = elasticsearch_dsl.Search(index=config.elastic.index_name)
filters = [elasticsearch_dsl.Q("term", encyclopedia__status="success")] filters = [elasticsearch_dsl.Q("term", encyclopedia__status="success")]
...@@ -531,28 +553,7 @@ def index_materials(threads, code, dry, in_place, n, source): ...@@ -531,28 +553,7 @@ def index_materials(threads, code, dry, in_place, n, source):
material.m_add_sub_section(Material.calculations, calc) material.m_add_sub_section(Material.calculations, calc)
# Update entry that inserts the full material info if entry yield create_entry(material, calc, in_place)
# does not exists, otherwise only adds the calculation into
# the nested subdocument
entry = {}
entry['_op_type'] = 'update'
entry['_index'] = target_index_name
entry['_id'] = material.material_id
entry['_type'] = 'doc'
entry['_source'] = {
"upsert": material.m_to_dict(include_defaults=False, partial="es"),
"doc_as_upsert": False,
"script": {
"params": {
"calc": calc.m_to_dict(include_defaults=False, partial="es")
},
}
}
if in_place:
entry['_source']["script"]["source"] = "ctx._source.calculations.removeIf(x -> x.calc_id == params.calc.calc_id); ctx._source.calculations.add(params.calc)"
else:
entry['_source']["script"]["source"] = "ctx._source.calculations.add(params.calc)"
yield entry
if dry: if dry:
for _ in elastic_updates(): for _ in elastic_updates():
......
...@@ -214,7 +214,7 @@ class Method(MSection): ...@@ -214,7 +214,7 @@ class Method(MSection):
a_search=Search() a_search=Search()
) )
functional_type = Quantity( functional_type = Quantity(
type=MEnum("GGA", "LDA", "hybrid-GGA", "hybrid-meta-GGA" "HF", "GW", "meta-GGA"), type=MEnum("GGA", "LDA", "hybrid-GGA", "hybrid-meta-GGA", "HF", "GW", "meta-GGA"),
description=""" description="""
Basic type of the used exchange-correlation functional. Basic type of the used exchange-correlation functional.
""", """,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment