Commit e2322615 authored by Markus Scheidgen's avatar Markus Scheidgen
Browse files

Merge branch 'bravadofix' into 'v0.9.6'

Fixes issues with the Encyclopedia API parameter/response models (fixes #443),  improved the materials indexing command.

See merge request !211
parents b8277c4a d65328f1
Pipeline #86517 passed with stages
in 35 minutes and 6 seconds
......@@ -240,7 +240,8 @@ def get_es_doc_values(es_doc, mapping, keys=None):
for key in keys:
es_key = mapping[key]
value = rgetattr(es_doc, es_key)
result[key] = value
if value is not None:
result[key] = value
return result
......@@ -326,10 +327,9 @@ material_result = api.model("material_result", {
class EncMaterialResource(Resource):
@api.response(404, "The material does not exist")
@api.response(200, "Metadata send", fields.Raw)
@api.doc("get_material")
@api.doc("get_material", params={"material_id": "28 character identifier for the material."})
@api.expect(material_query)
@api.marshal_with(material_result, skip_none=True)
@api.param("material_id", "28 character identifier for the material.")
@authenticate()
def get(self, material_id):
"""Used to retrieve basic information related to a material.
......@@ -610,8 +610,7 @@ class EncGroupsResource(Resource):
@api.response(400, "Bad request")
@api.response(200, "OK", groups_result)
@api.marshal_with(groups_result)
@api.doc("get_material_groups")
@api.param("material_id", "28 character identifier for the material.")
@api.doc("get_material_groups", params={"material_id": "28 character identifier for the material."})
@authenticate()
def get(self, material_id):
"""Returns a summary of the calculation groups that were identified for this material.
......@@ -695,10 +694,11 @@ class EncGroupResource(Resource):
@api.response(400, "Bad request")
@api.response(200, "OK", group_result)
@api.marshal_with(group_result)
@api.doc("get_material_group")
@api.param("group_type", "Type of group. Valid options are: 'eos' and 'par'.")
@api.param("group_id", "28 character identifier for the group.")
@api.param("material_id", "28 character identifier for the material.")
@api.doc("get_material_group", params={
"material_id": "28 character identifier for the material.",
"group_type": "Type of group. Valid options are: 'eos' and 'par'.",
"group_id": "28 character identifier for the group.",
})
@authenticate()
def get(self, material_id, group_type, group_id):
"""Used to query detailed information about a specific calculation group.
......@@ -783,8 +783,8 @@ calculation_result = api.model("calculation_result", {
"code_version": fields.String,
"functional_type": fields.String,
"basis_set_type": fields.String,
"core_electron_treatment": fields.String,
"run_type": fields.String,
"core_electron_treatment": fields.String(default="unavailable"),
"run_type": fields.String(default="unavailable"),
"has_dos": fields.Boolean,
"has_band_structure": fields.Boolean,
"has_thermal_properties": fields.Boolean,
......@@ -797,7 +797,6 @@ representatives_result = api.model("representatives_result", {
})
calculations_result = api.model("calculations_result", {
"total_results": fields.Integer,
"pages": fields.Nested(pages_result),
"results": fields.List(fields.Nested(calculation_result)),
"representatives": fields.Nested(representatives_result, skip_none=True),
})
......@@ -809,6 +808,7 @@ class EncCalculationsResource(Resource):
@api.response(400, "Bad request")
@api.response(200, "OK", calculations_result)
@api.doc("get_material_calculations")
@api.marshal_with(calculations_result)
@authenticate()
def get(self, material_id):
"""Used to return information about all calculations related to the given material.
......@@ -947,8 +947,7 @@ class EncStatisticsResource(Resource):
@api.response(200, "OK", statistics_result)
@api.expect(statistics_query, validate=False)
@api.marshal_with(statistics_result, skip_none=True)
@api.doc("get_material_statistics")
@api.param("material_id", "28 character identifier for the material.")
@api.doc("get_material_statistics", params={"material_id": "28 character identifier for the material."})
@authenticate()
def post(self, material_id):
"""Used to return statistics related to the specified material and
......@@ -1403,8 +1402,7 @@ class ReportsResource(Resource):
@api.response(400, "Bad request")
@api.response(204, "Report succesfully sent")
@api.expect(report_query)
@api.doc("post_material_report")
@api.param("material_id", "28 character identifier for the material.")
@api.doc("post_material_report", params={"material_id": "28 character identifier for the material."})
@authenticate(required=True)
def post(self, material_id):
"""Post an error report on a material. Requires authentication.
......
......@@ -241,15 +241,47 @@ def index(threads, dry):
def index_materials(threads, code, dry, in_place, n, source):
"""(Re-)index all materials.
This command will completely rebuild the materials index. The index is
built from the material metainfo stored in MongoDB. The materials index can
be used normally during the reindexing.
This command will is used to completely rebuild the materials index or
parts of it. You can choose which source is used for building the index: In
general ElasticSearch is preferred, but if it is not available also MongoDB
can be used.
By default this command will start to rebuild the index from scratch. This
can be done on a "live" system because a new temporary index is used. If
you use the --in-place option, the indexing will be run on the same index
that is currently in use.
"""
from nomad.datamodel.material import Material, Calculation
from nomad.datamodel.encyclopedia import EncyclopediaMetadata
from nomad.search import material_document
from nomad.datamodel.material import Material, Calculation, Method, Properties, IdealizedStructure, Energies, Workflow, Bulk
def create_entry(material, calc, in_place):
"""Creates an ES update operation that inserts the full material info
if entry does not exists, otherwise only adds the calculation into the
nested subdocument, possibly replacing the old one if in_place is True.
"""
entry = {}
entry['_op_type'] = 'update'
entry['_index'] = target_index_name
entry['_id'] = material.material_id
entry['_type'] = 'doc'
entry['_source'] = {
"upsert": material.m_to_dict(include_defaults=False, partial="es"),
"doc_as_upsert": False,
"script": {
"params": {
"calc": calc.m_to_dict(include_defaults=False, partial="es")
},
}
}
if in_place:
entry['_source']["script"]["source"] = "ctx._source.calculations.removeIf(x -> x.calc_id == params.calc.calc_id); ctx._source.calculations.add(params.calc)"
else:
entry['_source']["script"]["source"] = "ctx._source.calculations.add(params.calc)"
return entry
chunk_size = 500
infrastructure.setup_mongo()
client = infrastructure.setup_elastic()
......@@ -271,16 +303,20 @@ def index_materials(threads, code, dry, in_place, n, source):
)
if source == "mongo":
all_calcs = proc.Calc.objects().count()
mongo_db = infrastructure.mongo_client[config.mongo.db_name]
mongo_collection = mongo_db['archive']
if code:
collection = mongo_collection.find({"section_metadata.dft.code_name": {"$in": code}})
else:
collection = mongo_collection.find()
all_calcs = collection.count()
print('indexing materials from %d calculations ...' % all_calcs)
# Bulk update
def elastic_updates():
with utils.ETA(all_calcs, ' index %10d of %10d calcs, ETA %s') as eta:
mongo_db = infrastructure.mongo_client[config.mongo.db_name]
mongo_collection = mongo_db['archive']
i_calc = 0
for mongo_archive in mongo_collection.find():
for mongo_archive in collection:
i_calc += 1
if n is not None:
if i_calc > n:
......@@ -371,24 +407,10 @@ def index_materials(threads, code, dry, in_place, n, source):
material.m_add_sub_section(Material.calculations, calc)
# Update entry that inserts the full material info if entry
# does not exists, otherwise only adds the calculation into the
# nested subdocument
entry = {}
entry['_op_type'] = 'update'
entry['_index'] = target_index_name
entry['_id'] = material.material_id
entry['_type'] = 'doc'
entry['_source'] = {
"upsert": material.m_to_dict(include_defaults=False, partial="es"),
"doc_as_upsert": False,
"script": {
"source": "ctx._source.calculations.add(params.calc)",
"params": {
"calc": calc.m_to_dict(include_defaults=False, partial="es")
},
}
}
yield entry
# does not exists, otherwise only adds the calculation into
# the nested subdocument
yield create_entry(material, calc, in_place)
elif source == "es":
s = elasticsearch_dsl.Search(index=config.elastic.index_name)
filters = [elasticsearch_dsl.Q("term", encyclopedia__status="success")]
......@@ -531,28 +553,7 @@ def index_materials(threads, code, dry, in_place, n, source):
material.m_add_sub_section(Material.calculations, calc)
# Update entry that inserts the full material info if entry
# does not exists, otherwise only adds the calculation into
# the nested subdocument
entry = {}
entry['_op_type'] = 'update'
entry['_index'] = target_index_name
entry['_id'] = material.material_id
entry['_type'] = 'doc'
entry['_source'] = {
"upsert": material.m_to_dict(include_defaults=False, partial="es"),
"doc_as_upsert": False,
"script": {
"params": {
"calc": calc.m_to_dict(include_defaults=False, partial="es")
},
}
}
if in_place:
entry['_source']["script"]["source"] = "ctx._source.calculations.removeIf(x -> x.calc_id == params.calc.calc_id); ctx._source.calculations.add(params.calc)"
else:
entry['_source']["script"]["source"] = "ctx._source.calculations.add(params.calc)"
yield entry
yield create_entry(material, calc, in_place)
if dry:
for _ in elastic_updates():
......
......@@ -214,7 +214,7 @@ class Method(MSection):
a_search=Search()
)
functional_type = Quantity(
type=MEnum("GGA", "LDA", "hybrid-GGA", "hybrid-meta-GGA" "HF", "GW", "meta-GGA"),
type=MEnum("GGA", "LDA", "hybrid-GGA", "hybrid-meta-GGA", "HF", "GW", "meta-GGA"),
description="""
Basic type of the used exchange-correlation functional.
""",
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment