encyclopedia.py 55.1 KB
Newer Older
Markus Scheidgen's avatar
Markus Scheidgen committed
1
2
3
4
#
# Copyright The NOMAD Authors.
#
# This file is part of NOMAD. See https://nomad-lab.eu for further info.
5
6
7
8
9
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
Markus Scheidgen's avatar
Markus Scheidgen committed
10
#     http://www.apache.org/licenses/LICENSE-2.0
11
12
#
# Unless required by applicable law or agreed to in writing, software
Markus Scheidgen's avatar
Markus Scheidgen committed
13
# distributed under the License is distributed on an "AS IS" BASIS,
14
15
16
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
Markus Scheidgen's avatar
Markus Scheidgen committed
17
#
18

19
"""
20
API for retrieving material information.
21
"""
22
import re
23
import math
24
import numpy as np
25
from collections import defaultdict
26

27
from flask_restplus import Resource, abort, fields, marshal
28
from flask import request, g
29
from elasticsearch_dsl import Search, Q, A
30
from elasticsearch_dsl.utils import AttrDict
31

32
33
from nomad import config, infrastructure, search
from nomad.files import UploadFiles
34
from nomad.units import ureg
Lauri Himanen's avatar
Lauri Himanen committed
35
from nomad.atomutils import get_hill_decomposition
36
from nomad.datamodel.datamodel import EntryArchive
37
from nomad.datamodel.material import Material, Bulk, Method
38
from .api import api
39
from .auth import authenticate, create_authorization_predicate
40

41
42
ns = api.namespace("encyclopedia", description="Access materials data.")
missing_material_msg = "The specified material {} could not be retrieved. It either does not exists or requires authentication."
43

44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217

class MaterialAccessError(Exception):
    pass


class MaterialSearch():
    """Convenience class for material searches. Automatically ensures the
    correct visibility of materials when the search is constructed through the
    methods of his class.
    """
    def __init__(self):
        self._s = Search(index=config.elastic.materials_index_name)
        self._filters = []
        self._musts = []
        self._extra = {}
        self._authenticated = False

    def add_material_filter(self, query):
        """Adds material based filters.
        """
        self._filters.append(query)

    def add_material_aggregation(self, name, aggregation):
        """Adds material based aggregation.
        """
        self._s.aggs.bucket(name, aggregation)

    def add_material_must(self, query):
        """Adds material based must query.
        """
        self._musts.append(query)

    def add_calculation_filter(self, queries):
        """Adds calculation based filters. The visibility of calculations is
        automatically checked.
        """
        if not isinstance(queries, (list, tuple)):
            queries = [queries]
        filters = self.get_authentication_filters_nested() + queries
        nested_bool = Q(
            "bool",
            filter=filters,
        )
        nested_query = Q("nested", path="calculations", query=nested_bool)
        self._musts.append(nested_query)
        self._authenticated = True

    def includes(self, includes):
        self._extra["_source"] = {"includes": includes}

    def size(self, size):
        self._extra["size"] = size

    def extra(self, extra):
        self._extra = extra

    def s(self):
        # If no authentication filters have been added already, add them now.
        if not self._authenticated:
            self._musts.append(Q(
                "nested",
                path="calculations",
                query=Q("bool", filter=self.get_authentication_filters_nested()),
            ))
            self._authenticated = True
        query = Q(
            "bool",
            filter=self._filters,
            must=self._musts,
        )
        s = self._s.query(query)
        extra = self._extra
        s = s.extra(**extra)
        return s

    def execute(self):
        s = self.s()
        return s.execute()

    def get_authentication_filters_nested(self):
        """Returns a shared term filter that will leave out unpublished (of
        other users) or embargoed materials.
        """
        # Handle authentication
        filters = []
        if g.user is not None:
            q = Q('term', calculations__published=True) & Q('term', calculations__with_embargo=False)
            if g.user.user_id is not None:
                q = q | Q('term', calculations__owners=g.user.user_id)
            filters.append(q)
        else:
            q = Q('term', calculations__published=True) & Q('term', calculations__with_embargo=False)
            filters.append(q)

        return filters

    def calculations(self):
        """Executes the query and returns a list of visible calculations
        associated with the first found material. Currently fetches all
        calculations associated with a material. If the number of calculations
        per material increases significantly then the inner_hits available for
        nested queries should be used instead.

        Returns:
            List of visible calculations for the first material matching the
            constructed query.

        Raises:
            MaterialAccessError if the queried material could not be found.
        """
        source = self._extra.get("_source")
        if source is None:
            source = {}
            self._extra["_source"] = source
        includes = source.get("includes")
        if includes is None:
            includes = []
            source["includes"] = includes

        self._extra["_source"]["includes"].extend([
            "calculations.published",
            "calculations.with_embargo",
            "calculations.owners",
        ])
        response = self.execute()
        if response.hits.total == 0:
            raise MaterialAccessError

        material = response.hits[0]

        # Filter out calculations based on their visibility
        visible_calcs = []
        for calc in material.calculations:
            if calc.published and not calc.with_embargo:
                visible_calcs.append(calc)
            elif g.user is not None and g.user.user_id in calc.owners:
                visible_calcs.append(calc)
        return visible_calcs


def get_authentication_filters():
    """Returns a shared term filter that will leave out unpublished (of other
    users), embargoed or invalid entries in the calculations index.
    """
    # Handle authentication
    s = search.SearchRequest()
    if g.user is not None:
        s.owner('visible', user_id=g.user.user_id)
    else:
        s.owner('public')
    return [
        s.q,
        Q("term", encyclopedia__status="success"),
    ]


def get_range_filter(field, minimum=None, maximum=None, source_unit=None, target_unit=None):
    """For adding range filters
    """
    query_dict = {}
    if minimum is not None:
        if source_unit is None and target_unit is None:
            gte = minimum
        else:
            gte = (minimum * source_unit).to(target_unit).magnitude
        query_dict["gte"] = gte
    if maximum is not None:
        if source_unit is None and target_unit is None:
            lte = maximum
        else:
            lte = (maximum * source_unit).to(target_unit).magnitude
        query_dict["lte"] = lte
    query = Q("range", **{field: query_dict})
    return query
218
219


220
221
222
223
224
225
226
227
228
229
230
231
def rgetattr(obj, attr_name):
    """Used to perform attribute access based on a (possibly nested) attribute
    name given as string.
    """
    try:
        for attr in attr_name.split("."):
            obj = obj[attr]
    except KeyError:
        return None
    return obj


232
def get_es_doc_values(es_doc, mapping, keys=None):
233
234
    """Used to form a material definition for "materials/<material_id>" from
    the given ElasticSearch root document.
235
    """
236
237
238
    if keys is None:
        keys = mapping.keys()

239
    result = {}
240
    for key in keys:
241
        es_key = mapping[key]
242
        value = rgetattr(es_doc, es_key)
243
244
        if value is not None:
            result[key] = value
245
246
247
248

    return result


249
250
251
252
253
254
255
256
257
258
259
def read_archive(upload_id: str, calc_id: str) -> EntryArchive:
    """Used to read data from the archive.

    Args:
        upload_id: Upload id.
        calc_id: Calculation id.

    Returns:
        MSection: The section_run as MSection
        For each path, a dictionary containing the path as key and the returned
        section as value.
260
    """
261
262
263
264
265
266
267
268
    upload_files = UploadFiles.get(
        upload_id, is_authorized=create_authorization_predicate(upload_id, calc_id))

    with upload_files.read_archive(calc_id) as archive:
        data = archive[calc_id]
        root = EntryArchive.m_from_dict(data.to_dict())

    return root
269
270


271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
material_prop_map = {
    # General
    "material_id": "material_id",
    "formula": "formula",
    "formula_reduced": "formula_reduced",
    "material_type": "material_type",
    "material_name": "material_name",
    # Bulk
    "has_free_wyckoff_parameters": "bulk.has_free_wyckoff_parameters",
    "strukturbericht_designation": "bulk.strukturbericht_designation",
    "bravais_lattice": "bulk.bravais_lattice",
    "crystal_system": "bulk.crystal_system",
    "point_group": "bulk.point_group",
    "space_group_number": "bulk.space_group_number",
    "space_group_international_short_symbol": "bulk.space_group_international_short_symbol",
    "structure_type": "bulk.structure_type",
    "structure_prototype": "bulk.structure_prototype",
}
289
290
291
292
293
294
295
similarity = api.model("similarity", {
    # General
    "material_id": fields.String,
    "value": fields.Float,
    "formula": fields.String,
    "space_group_number": fields.Integer,
})
296
material_query = api.parser()
297
298
299
300
301
302
303
304
material_query.add_argument(
    "property",
    type=str,
    choices=tuple(material_prop_map.keys()),
    help="Optional single property to retrieve for the given material. If not specified, all properties will be returned.",
    location="args"
)
material_result = api.model("material_result", {
305
306
    # General
    "material_id": fields.String,
307
308
    "formula": fields.String,
    "formula_reduced": fields.String,
309
    "material_type": fields.String,
310
    "n_matches": fields.Integer,
311
    # Bulk only
312
    "has_free_wyckoff_parameters": fields.Boolean,
313
    "strukturbericht_designation": fields.String,
314
    "material_name": fields.String,
315
316
    "bravais_lattice": fields.String,
    "crystal_system": fields.String,
317
    "point_group": fields.String,
318
319
320
    "space_group_number": fields.Integer,
    "space_group_international_short_symbol": fields.String,
    "structure_prototype": fields.String,
321
    "structure_type": fields.String,
322
    "similarity": fields.List(fields.Nested(similarity, skip_none=True), skip_none=True),
323
})
324
325


326
@ns.route("/materials/<string:material_id>")
327
class EncMaterialResource(Resource):
328
329
    @api.response(404, "The material does not exist")
    @api.response(200, "Metadata send", fields.Raw)
330
    @api.doc("get_material", params={"material_id": "28 character identifier for the material."})
331
    @api.expect(material_query)
332
    @api.marshal_with(material_result, skip_none=True)
333
    @authenticate()
334
    def get(self, material_id):
335
        """Used to retrieve basic information related to a material.
336
        """
337
338
339
340
        # Parse request arguments
        args = material_query.parse_args()
        prop = args.get("property", None)
        if prop is not None:
341
342
            keys = [prop]
            es_keys = [material_prop_map[prop]]
343
344
        else:
            keys = list(material_prop_map.keys())
345
            es_keys = list(material_prop_map.values())
346

347
348
349
350
        # Get the material info, check that at least one calculation is visible
        s = MaterialSearch()
        s.add_material_filter(Q("term", material_id=material_id))
        s.includes(es_keys)
351
352
        response = s.execute()

353
        # No such material
354
355
        if response.hits.total == 0:
            abort(404, message=missing_material_msg.format(material_id))
356

357
        # Add values from ES entry
358
        entry = response[0]
359
        result = get_es_doc_values(entry, material_prop_map, keys)
360

361
        # Add similarity data that is stored in MongoDB.
362
363
364
365
366
367
368
369
370
371
        try:
            material = Material.m_def.a_mongo.get(material_id=material_id)
            dos_similarity = material.similarity.electronic_dos
        except KeyError:
            # No similarity data for this material
            pass
        else:
            # Only include similarity for materials that exist on the current
            # deployment to avoid dead links.
            similar_ids = dos_similarity.material_ids
372
373
374
375
376
            id_value_map = {key: value for key, value in zip(similar_ids, dos_similarity.values)}
            s = MaterialSearch()
            s.add_material_filter(Q("terms", material_id=similar_ids))
            s.includes(["material_id", "formula_reduced", "bulk.space_group_number"])
            s.size(5)
377
            response = s.execute()
378

379
380
381
382
            similarity = []
            for hit in response.hits:
                try:
                    similarity.append({
383
384
385
386
                        "material_id": hit.material_id,
                        "value": id_value_map[hit.material_id],
                        "formula": hit.formula_reduced,
                        "space_group_number": hit.bulk.space_group_number,
387
388
389
390
391
392
                    })
                except AttributeError:
                    pass
            if similarity:
                result["similarity"] = similarity

393
394
395
        return result, 200


396
re_formula = re.compile(r"([A-Z][a-z]?)(\d*)")
397
range_query = api.model("range_query", {
398
399
400
    "max": fields.Float,
    "min": fields.Float,
})
401
402
materials_query = api.model("materials_input", {
    "search_by": fields.Nested(api.model("search_query", {
403
404
        "exclusive": fields.Boolean(default=False),
        "formula": fields.String,
Lauri Himanen's avatar
Lauri Himanen committed
405
        "element": fields.String,
406
407
408
        "page": fields.Integer(default=1, description="Requested page number, indexing starts from 1."),
        "per_page": fields.Integer(default=25, description="Number of results per page."),
        "restricted": fields.Boolean(default=False, description="Select to restrict the query to individual calculations. If not selected, the query will combine results from several different calculations."),
409
    })),
410
411
412
413
414
415
416
417
418
419
420
421
    "material_type": fields.List(fields.String(enum=list(Material.material_type.type)), description=Material.material_type.description),
    "material_name": fields.List(fields.String, description=Material.material_name.description),
    "structure_type": fields.List(fields.String, description=Bulk.structure_type.description),
    "space_group_number": fields.List(fields.Integer, description=Bulk.space_group_number.description),
    "crystal_system": fields.List(fields.String(enum=list(Bulk.crystal_system.type)), description=Bulk.crystal_system.description),
    "band_gap": fields.Nested(range_query, description="Band gap range in eV.", allow_null=True),
    "has_band_structure": fields.Boolean(description="Set to True if electronic band structure needs to be available for this material."),
    "has_dos": fields.Boolean(description="Set to True if electronic density of states needs to be available for this material."),
    "has_thermal_properties": fields.Boolean(description="Set to True if thermodynamical properties need to be available for this material."),
    "functional_type": fields.List(fields.String(enum=list(Method.functional_type.type)), description=Method.functional_type.description),
    "basis_set": fields.List(fields.String(enum=list(Method.basis_set.type)), description=Method.basis_set.description),
    "code_name": fields.List(fields.String(enum=list(Method.program_name.type)), description=Method.program_name.description),
422
})
423
424
425
426
427
428
429
pages_result = api.model("page_info", {
    "per_page": fields.Integer,
    "total": fields.Integer,
    "page": fields.Integer,
    "pages": fields.Integer,
})

430
431
materials_result = api.model("materials_result", {
    "total_results": fields.Integer(allow_null=False),
432
433
    "results": fields.List(fields.Nested(material_result, skip_none=True)),
    "pages": fields.Nested(pages_result, skip_none=True),
434
435
436
})


437
@ns.route("/materials/")
438
class EncMaterialsResource(Resource):
439
440
    @api.response(404, "No materials found")
    @api.response(400, "Bad request")
441
    @api.response(200, "OK", materials_result)
442
    @api.expect(materials_query, validate=False)
443
    @api.marshal_with(materials_result, skip_none=True)
444
    @api.doc("search_materials")
445
    @authenticate()
446
    def post(self):
447
        """Search materials based on their properties.
448
449
450
451
452
453
454
        """
        # Get query parameters as json
        try:
            data = marshal(request.get_json(), materials_query)
        except Exception as e:
            abort(400, message=str(e))

455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
        # Create filters from user query
        s = MaterialSearch()

        # Material level filters
        if data["material_type"] is not None: s.add_material_filter(Q("terms", material_type=data["material_type"]))
        if data["material_name"] is not None: s.add_material_filter(Q("terms", material_name=data["material_name"]))
        if data["structure_type"] is not None: s.add_material_filter(Q("terms", bulk__structure_type=data["structure_type"]))
        if data["space_group_number"] is not None: s.add_material_filter(Q("terms", bulk__space_group_number=data["space_group_number"]))
        if data["crystal_system"] is not None: s.add_material_filter(Q("terms", bulk__crystal_system=data["crystal_system"]))

        # Calculation filters
        calc_filters = []
        if data["functional_type"] is not None: calc_filters.append(Q("terms", calculations__method__functional_type=data["functional_type"]))
        if data["basis_set"] is not None: calc_filters.append(Q("terms", calculations__method__basis_set=data["basis_set"]))
        if data["code_name"] is not None: calc_filters.append(Q("terms", calculations__method__program_name=data["code_name"]))
        if data["has_band_structure"] is not None: calc_filters.append(Q("term", calculations__properties__has_electronic_band_structure=data["has_band_structure"]))
        if data["has_dos"] is not None: calc_filters.append(Q("term", calculations__properties__has_electronic_dos=data["has_dos"]))
        if data["has_thermal_properties"] is not None: calc_filters.append(Q("term", calculations__properties__has_thermodynamical_properties=data["has_thermal_properties"]))
        if data["band_gap"] is not None: calc_filters.append(get_range_filter(
            "calculations.properties.band_gap",
            minimum=data["band_gap"].get("min"),
            maximum=data["band_gap"].get("max"),
            source_unit=ureg.eV,
            target_unit=ureg.J,
        ))
480
        search_by = data["search_by"]
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
        restricted = search_by["restricted"]
        if restricted:
            s.add_calculation_filter(calc_filters)
        else:
            for f in calc_filters:
                s.add_calculation_filter(f)

        # if data["functional_type"] is not None: s.add_calculation_filter(Q("terms", calculations__method__functional_type=data["functional_type"]))
        # if data["basis_set"] is not None: s.add_calculation_filter(Q("terms", calculations__method__basis_set=data["basis_set"]))
        # if data["code_name"] is not None: s.add_calculation_filter(Q("terms", calculations__method__program_name=data["code_name"]))
        # if data["has_band_structure"] is not None: s.add_calculation_filter(Q("term", calculations__properties__has_electronic_band_structure=data["has_band_structure"]))
        # if data["has_dos"] is not None: s.add_calculation_filter(Q("term", calculations__properties__has_electronic_dos=data["has_dos"]))
        # if data["has_thermal_properties"] is not None: s.add_calculation_filter(Q("term", calculations__properties__has_thermodynamical_properties=data["has_thermal_properties"]))
        # if data["band_gap"] is not None: s.add_calculation_filter(get_range_filter(
            # "calculations.properties.band_gap",
            # minimum=data["band_gap"].get("min"),
            # maximum=data["band_gap"].get("max"),
            # source_unit=ureg.eV,
            # target_unit=ureg.J,
        # ))

502
        formula = search_by["formula"]
Lauri Himanen's avatar
Lauri Himanen committed
503
        elements = search_by["element"]
504
505
        exclusive = search_by["exclusive"]

506
507
508
509
        # The given list of species/formula is reformatted with the Hill system into a
        # query string. With exclusive search we look for exact match, with
        # non-exclusive search we look for match that includes at least all
        # species, possibly even more.
510
511
512
        if formula is not None:
            element_list = []
            matches = re_formula.finditer(formula)
513

514
515
516
517
518
519
520
521
            for match in matches:
                groups = match.groups()
                symbol = groups[0]
                count = groups[1]
                if symbol != "":
                    if count == "":
                        element_list.append(symbol)
                    else:
Lauri Himanen's avatar
Lauri Himanen committed
522
523
                        element_list += [symbol] * int(count)

524
            names, reduced_counts = get_hill_decomposition(element_list, reduced=True)
Lauri Himanen's avatar
Lauri Himanen committed
525
            query_string = []
526

Lauri Himanen's avatar
Lauri Himanen committed
527
528
529
530
            for name, count in zip(names, reduced_counts):
                if count == 1:
                    query_string.append(name)
                else:
531
                    query_string.append("{}{}".format(name, int(count)))
Lauri Himanen's avatar
Lauri Himanen committed
532
            query_string = " ".join(query_string)
533
534

            if exclusive:
535
                s.add_material_filter(Q("term", **{"species_and_counts.keyword": query_string}))
536
            else:
537
                s.add_material_must(Q(
538
                    "match",
539
                    species_and_counts={"query": query_string, "operator": "and"}
Lauri Himanen's avatar
Lauri Himanen committed
540
541
542
543
544
545
                ))
        elif elements is not None:
            species, _ = get_hill_decomposition(elements.split(","))
            query_string = " ".join(species)

            if exclusive:
546
                s.add_material_filter(Q("term", **{"species.keyword": query_string}))
Lauri Himanen's avatar
Lauri Himanen committed
547
            else:
548
                s.add_material_must(Q(
Lauri Himanen's avatar
Lauri Himanen committed
549
                    "match",
550
                    species={"query": query_string, "operator": "and"}
551
552
                ))

553
        # Execute query
554
555
        page = search_by["page"]
        per_page = search_by["per_page"]
556
557
558
559
560
561
562
        s.extra({
            "size": per_page,
            "from": (page - 1) * per_page,
            "sort": [{"formula_reduced": {"order": "asc"}}],
            "_source": {"includes": list(material_prop_map.values())},
        })
        response = s.execute()
563

564
565
566
567
568
569
570
        # Form final response
        pages = {
            "page": page,
            "per_page": per_page,
            "pages": math.ceil(response.hits.total / per_page),
            "total": response.hits.total,
        }
571

572
573
574
575
576
577
578
579
580
581
582
583
        # Gather the number of visible calculation for each returned material
        # with an aggregation
        if len(response) != 0:
            material_ids = [x.material_id for x in response]
            s2 = MaterialSearch()
            s2.size(0)
            matched = s2._s.aggs.bucket("matched", A("filter", filter=Q("terms", material_id=material_ids)))
            materials = matched.bucket("materials", A("terms", field="material_id", size=len(material_ids)))
            nested = materials.bucket("nested", A("nested", path="calculations"))
            nested.bucket(
                "visible",
                A("filter", filter=Q("bool", filter=s2.get_authentication_filters_nested()))
584
585
            )
            response2 = s2.execute()
586
587
588
            agg_dict = {}
            for agg in response2.aggs.matched.materials:
                agg_dict[agg.key] = agg.nested.visible.doc_count
589

590
591
592
593
594
595
596
        # Form the final list of results
        result_list = []
        for x in response:
            res = get_es_doc_values(x, material_prop_map, list(material_prop_map.keys()))
            material_id = x.material_id
            res["n_matches"] = agg_dict[material_id]
            result_list.append(res)
597

598
        return {"results": result_list, "pages": pages}, 200
599
600


601
groups_result = api.model("groups_result", {
602
603
    "groups_eos": fields.Raw,
    "groups_par": fields.Raw,
604
605
606
})


607
@ns.route("/materials/<string:material_id>/groups")
Lauri Himanen's avatar
Lauri Himanen committed
608
class EncGroupsResource(Resource):
609
610
    @api.response(404, "Material not found")
    @api.response(400, "Bad request")
611
    @api.response(200, "OK", groups_result)
612
    @api.marshal_with(groups_result)
613
    @api.doc("get_material_groups", params={"material_id": "28 character identifier for the material."})
614
    @authenticate()
Lauri Himanen's avatar
Lauri Himanen committed
615
    def get(self, material_id):
616
617
618
619
620
621
622
623
624
        """Returns a summary of the calculation groups that were identified for this material.

        Two types of groups are reported: equation of state groups and
        parameter variation groups. Equation of state groups contain
        calculations with identical method and material, but different volume.
        Parameter variation groups contain identical structure but different
        methods. The response contains dictionaries for both groups
        ('groups_eos' and 'groups_par'). These dictionaries map a group id with
        a list of calculation ids.
625
        """
626
627
628
629
630
631
632
633
634
635
636
637
        # Get full entry for this material
        s = MaterialSearch()
        s.add_material_filter(Q("term", material_id=material_id))
        s.extra({
            "_source": {"includes": [
                "calculations.calc_id",
                "calculations.method.group_eos_id",
                "calculations.method.group_parametervariation_id",
                "calculations.properties.energies.energy_total",
                "calculations.idealized_structure.cell_volume",
            ]},
            "size": 1,
638
639
        })

640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
        # Raise error if material not found
        try:
            calculations = s.calculations()
        except MaterialAccessError:
            abort(404, message=missing_material_msg.format(material_id))

        groups_eos = defaultdict(list)
        groups_param = defaultdict(list)
        for calc in calculations:
            try:
                calc.properties.energies.energy_total
                calc.idealized_structure.cell_volume
            except AttributeError:
                continue
            try:
                group_eos_id = calc.method.group_eos_id
                if group_eos_id:
                    groups_eos[group_eos_id].append(calc.calc_id)
            except AttributeError:
                pass
            try:
                group_param_id = calc.method.group_parametervariation_id
                if group_param_id:
                    groups_param[group_param_id].append(calc.calc_id)
            except AttributeError:
                pass

        # Filter out groups with too few entries
        for key, items in list(groups_eos.items()):
            if len(items) < 4:
                del groups_eos[key]
        for key, items in list(groups_param.items()):
            if len(items) < 2:
                del groups_param[key]
674
675
676
677
678
679
680
681
682
683
684

        # Return results
        result = {
            "groups_eos": groups_eos,
            "groups_par": groups_param,
        }

        return result, 200


group_result = api.model("group_result", {
685
686
687
    "calculations": fields.List(fields.String, description="List of calculation ids."),
    "energies": fields.List(fields.Float, description="List of total energies."),
    "volumes": fields.List(fields.Float, description="List of cell volumes."),
688
689
690
691
692
693
694
})


@ns.route("/materials/<string:material_id>/groups/<string:group_type>/<string:group_id>")
class EncGroupResource(Resource):
    @api.response(404, "Group not found")
    @api.response(400, "Bad request")
695
    @api.response(200, "OK", group_result)
696
    @api.marshal_with(group_result)
697
698
699
700
701
    @api.doc("get_material_group", params={
        "material_id": "28 character identifier for the material.",
        "group_type": "Type of group. Valid options are: 'eos' and 'par'.",
        "group_id": "28 character identifier for the group.",
    })
702
    @authenticate()
703
    def get(self, material_id, group_type, group_id):
704
        """Used to query detailed information about a specific calculation group.
705
706
707
708
        """
        # Find entries for the given material, which have EOS or parameter
        # variation hashes set.
        if group_type == "eos":
709
            group_id_source = "group_eos_id"
710
        elif group_type == "par":
711
            group_id_source = "group_parametervariation_id"
712
713
714
        else:
            abort(400, message="Unsupported group type.")

715
716
717
718
719
720
721
722
723
724
        s = MaterialSearch()
        s.add_material_filter(Q("term", material_id=material_id))
        s.extra({
            "_source": {"includes": [
                "calculations.calc_id",
                "calculations.properties.energies.energy_total",
                "calculations.idealized_structure.cell_volume",
                "calculations.method." + group_id_source,
            ]},
            "size": 1,
725
        })
726

727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
        # Raise error if material not found
        try:
            calculations = s.calculations()
        except MaterialAccessError:
            abort(404, message=missing_material_msg.format(material_id))

        # Gather groups from the calculations
        calcs = []
        energies = []
        volumes = []
        for calc in calculations:
            try:
                i_group_id = getattr(calc.method, group_id_source)
                if i_group_id == group_id:
                    calcs.append(calc.calc_id)
                    volumes.append(calc.idealized_structure.cell_volume)
                    energies.append(calc.properties.energies.energy_total)
            except Exception:
                pass

        # Sort results by energy
        energies = np.array(energies)
        volumes = np.array(volumes)
        calcs = np.array(calcs)
        order = energies.argsort()
        energies = energies[order]
        volumes = volumes[order]
        calcs = calcs[order]
755

756
        # Return results
757
        group_dict = {
758
759
760
            "calculations": calcs.tolist(),
            "energies": energies.tolist(),
            "volumes": volumes.tolist(),
761
        }
762
763

        return group_dict, 200
764
765
766
767


calc_prop_map = {
    "calc_id": "calc_id",
768
    "upload_id": "upload_id",
769
770
771
772
773
774
775
776
777
    "code_name": "method.program_name",
    "code_version": "method.program_version",
    "functional_type": "method.functional_type",
    "basis_set_type": "method.basis_set",
    "core_electron_treatment": "method.core_electron_treatment",
    "run_type": "workflow.workflow_type",
    "has_dos": "properties.has_electronic_dos",
    "has_band_structure": "properties.has_electronic_band_structure",
    "has_thermal_properties": "properties.has_thermodynamical_properties",
778
779
780
}
calculation_result = api.model("calculation_result", {
    "calc_id": fields.String,
781
    "upload_id": fields.String,
782
783
784
785
    "code_name": fields.String,
    "code_version": fields.String,
    "functional_type": fields.String,
    "basis_set_type": fields.String,
786
787
    "core_electron_treatment": fields.String(default="unavailable"),
    "run_type": fields.String(default="unavailable"),
788
789
790
791
    "has_dos": fields.Boolean,
    "has_band_structure": fields.Boolean,
    "has_thermal_properties": fields.Boolean,
})
792
793
794
795
796
797
representatives_result = api.model("representatives_result", {
    "idealized_structure": fields.String,
    "electronic_band_structure": fields.String,
    "electronic_dos": fields.String,
    "thermodynamical_properties": fields.String,
})
798
799
800
calculations_result = api.model("calculations_result", {
    "total_results": fields.Integer,
    "results": fields.List(fields.Nested(calculation_result)),
801
    "representatives": fields.Nested(representatives_result, skip_none=True),
802
803
804
805
})


@ns.route("/materials/<string:material_id>/calculations")
806
class EncCalculationsResource(Resource):
807
    @api.response(404, "Material not found")
808
    @api.response(400, "Bad request")
809
810
    @api.response(200, "OK", calculations_result)
    @api.doc("get_material_calculations")
811
    @api.marshal_with(calculations_result)
812
    @authenticate()
813
    def get(self, material_id):
814
        """Used to return information about all calculations related to the given material.
815

816
817
818
819
820
821
        Returns a list of all calculations and a representative calculation for
        few select quantities that are shown in the material overview page.
        """
        s = MaterialSearch()
        s.add_material_filter(Q("term", material_id=material_id))
        s.extra({"_source": {"includes": ["calculations"]}})
822
823
824
825

        def calc_score(entry):
            """Custom scoring function used to sort results by their
            "quality". Currently built to mimic the scoring that was used
826
827
828
            in the old Encyclopedia GUI. Primarily sorts by quality measure,
            ties are broken by alphabetic sorting of entry_id in order to
            return consistent results.
829
830
831
832
833
834
            """
            score = 0
            functional_score = {
                "GGA": 100
            }
            code_score = {
835
836
                "VASP": 3,  # Prefer VASP data as it is the "cleanest" on average
                "FHI-aims": 2,
837
838
                "Quantum Espresso": 1,
            }
839
840
841
842
843
844
845
846
847
848
            code_name = entry.method.program_name
            functional = entry.method.functional_type
            try:
                has_bs = entry.properties.has_electronic_band_structure
            except AttributeError:
                has_bs = False
            try:
                has_dos = entry.properties.has_electronic_dos
            except AttributeError:
                has_dos = False
849
850
851
852
853
            score += functional_score.get(functional, 0)
            score += code_score.get(code_name, 0)
            if has_dos and has_bs:
                score += 10

854
855
856
857
858
859
860
            return (score, entry.calc_id)

        # Raise error if material not found
        try:
            calculations = s.calculations()
        except MaterialAccessError:
            abort(404, message=missing_material_msg.format(material_id))
861

862
863
        # Sort calculations by "quality"
        sorted_calc = sorted(calculations, key=lambda x: calc_score(x), reverse=True)
864
865

        # Get the requested representative properties
866
        representatives = {}
867
868
869
870
871
        representatives["idealized_structure"] = sorted_calc[0].calc_id
        thermo_found = False
        bs_found = False
        dos_found = False
        for calc in sorted_calc:
872
873
874
875
            if not hasattr(calc, "properties"):
                continue

            if not thermo_found and calc.properties.has_thermodynamical_properties:
876
877
                representatives["thermodynamical_properties"] = calc.calc_id
                thermo_found = True
878
            if not bs_found and calc.properties.has_electronic_band_structure:
879
880
                representatives["electronic_band_structure"] = calc.calc_id
                bs_found = True
881
            if not dos_found and calc.properties.has_electronic_dos:
882
883
884
885
886
                representatives["electronic_dos"] = calc.calc_id
                dos_found = True
            if thermo_found and bs_found and dos_found:
                break

887
888
        # Create result JSON
        results = []
889
        for entry in sorted_calc:
890
891
892
893
894
895
            calc_dict = get_es_doc_values(entry, calc_prop_map)
            results.append(calc_dict)

        result = {
            "total_results": len(results),
            "results": results,
896
            "representatives": representatives,
897
898
899
900
901
        }

        return result, 200


902
903
904
905
histogram = api.model("histogram", {
    "occurrences": fields.List(fields.Integer),
    "values": fields.List(fields.Float),
})
906
907
statistics_query = api.model("statistics_query", {
    "calculations": fields.List(fields.String),
908
    "properties": fields.List(fields.String),
909
    "n_histogram_bins": fields.Integer,
910
911
912
913
914
})
statistics = api.model("statistics", {
    "min": fields.Float,
    "max": fields.Float,
    "avg": fields.Float,
915
    "histogram": fields.Nested(histogram, skip_none=True)
916
917
})
statistics_result = api.model("statistics_result", {
918
919
920
921
922
923
924
925
926
927
    "cell_volume": fields.Nested(statistics, skip_none=True),
    "atomic_density": fields.Nested(statistics, skip_none=True),
    "mass_density": fields.Nested(statistics, skip_none=True),
    "lattice_a": fields.Nested(statistics, skip_none=True),
    "lattice_b": fields.Nested(statistics, skip_none=True),
    "lattice_c": fields.Nested(statistics, skip_none=True),
    "alpha": fields.Nested(statistics, skip_none=True),
    "beta": fields.Nested(statistics, skip_none=True),
    "gamma": fields.Nested(statistics, skip_none=True),
    "band_gap": fields.Nested(statistics, skip_none=True),
928
})
929
930
931
932
933
934
935
936
937
938
property_map = {
    "cell_volume": "encyclopedia.material.idealized_structure.cell_volume",
    "atomic_density": "encyclopedia.properties.atomic_density",
    "mass_density": "encyclopedia.properties.mass_density",
    "lattice_a": "encyclopedia.material.idealized_structure.lattice_parameters.a",
    "lattice_b": "encyclopedia.material.idealized_structure.lattice_parameters.b",
    "lattice_c": "encyclopedia.material.idealized_structure.lattice_parameters.c",
    "alpha": "encyclopedia.material.idealized_structure.lattice_parameters.alpha",
    "beta": "encyclopedia.material.idealized_structure.lattice_parameters.beta",
    "gamma": "encyclopedia.material.idealized_structure.lattice_parameters.gamma",
Lauri Himanen's avatar
Lauri Himanen committed
939
    "band_gap": "encyclopedia.properties.band_gap",
940
}
941
942
943


@ns.route("/materials/<string:material_id>/statistics")
944
class EncStatisticsResource(Resource):
945
946
    @api.response(404, "Suggestion not found")
    @api.response(400, "Bad request")
947
    @api.response(200, "OK", statistics_result)
948
949
    @api.expect(statistics_query, validate=False)
    @api.marshal_with(statistics_result, skip_none=True)
950
    @api.doc("get_material_statistics", params={"material_id": "28 character identifier for the material."})
951
    @authenticate()
952
    def post(self, material_id):
953
954
        """Used to return statistics related to the specified material and
        calculations.
955
        """
956
957
958
959
960
961
962
963
964
        # Get query parameters as json
        try:
            data = marshal(request.get_json(), statistics_query)
        except Exception as e:
            abort(400, message=str(e))

        # Find entries for the given material.
        bool_query = Q(
            "bool",
965
            filter=get_authentication_filters() + [
966
967
968
969
970
971
972
973
974
975
976
                Q("term", encyclopedia__material__material_id=material_id),
                Q("terms", calc_id=data["calculations"]),
            ]
        )

        s = Search(index=config.elastic.index_name)
        s = s.query(bool_query)
        s = s.extra(**{
            "size": 0,
        })

977
978
979
980
981
982
        # Add statistics aggregations for each requested property
        properties = data["properties"]
        for prop in properties:
            stats_agg = A("stats", field=property_map[prop])
            s.aggs.bucket("{}_stats".format(prop), stats_agg)

983
984
985
        # No hits on the top query level
        response = s.execute()
        if response.hits.total == 0:
986
            abort(404, message="The given calculations could not be found for material {}".format(material_id))
987

988
989
990
991
992
993
994
995
        # Run a second query that creates histograms with fixed size buckets
        # based on the min and max from previous query. Might make more sense
        # to use the mean and sigma to define the range?
        s = Search(index=config.elastic.index_name)
        s = s.query(bool_query)
        s = s.extra(**{
            "size": 0,
        })
996
        n_bins = data["n_histogram_bins"]
997
998
        for prop in properties:
            stats = getattr(response.aggs, "{}_stats".format(prop))
999
1000
            if stats.count == 0:
                continue
1001
            interval = (stats.max * 1.001 - stats.min) / n_bins
1002
1003
            if interval == 0:
                interval = 1
1004
            hist_agg = A("histogram", field=property_map[prop], interval=interval, offset=stats.min, min_doc_count=0)
1005
1006
1007
            s.aggs.bucket("{}_hist".format(prop), hist_agg)
        response_hist = s.execute()

1008
        # Return results
1009
1010
1011
        result = {}
        for prop in properties:
            stats = getattr(response.aggs, "{}_stats".format(prop))
1012
1013
            if stats.count == 0:
                continue
1014
1015
1016
1017
1018
1019
1020
            hist = getattr(response_hist.aggs, "{}_hist".format(prop))
            occurrences = [x.doc_count for x in hist.buckets]
            values = [x.key for x in hist.buckets]
            result[prop] = {
                "min": stats.min,
                "max": stats.max,
                "avg": stats.avg,
1021
                "histogram": {
1022
1023
1024
                    "occurrences": occurrences,
                    "values": values,
                }
1025
            }
1026

1027
        return result, 200
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038


wyckoff_variables_result = api.model("wyckoff_variables_result", {
    "x": fields.Float,
    "y": fields.Float,
    "z": fields.Float,
})
wyckoff_set_result = api.model("wyckoff_set_result", {
    "wyckoff_letter": fields.String,
    "indices": fields.List(fields.Integer),
    "element": fields.String,
1039
    "variables": fields.Nested(wyckoff_variables_result, skip_none=True),
1040
})
1041
1042
1043
1044
1045
1046
1047
1048
lattice_parameters = api.model("lattice_parameters", {
    "a": fields.Float,
    "b": fields.Float,
    "c": fields.Float,
    "alpha": fields.Float,
    "beta": fields.Float,
    "gamma": fields.Float,
})
1049
1050
1051
1052
1053
1054

idealized_structure_result = api.model("idealized_structure_result", {
    "atom_labels": fields.List(fields.String),
    "atom_positions": fields.List(fields.List(fields.Float)),
    "lattice_vectors": fields.List(fields.List(fields.Float)),
    "lattice_vectors_primitive": fields.List(fields.List(fields.Float)),
1055
    "lattice_parameters": fields.Nested(lattice_parameters, skip_none=True),
1056
1057
1058
    "periodicity": fields.List(fields.Boolean),
    "number_of_atoms": fields.Integer,
    "cell_volume": fields.Float,
1059
    "wyckoff_sets": fields.List(fields.Nested(wyckoff_set_result, skip_none=True)),
1060
1061
})

1062
1063
calculation_property_map = {
    "lattice_parameters": {
1064
1065
        "source": "es",
        "path": "encyclopedia.material.idealized_structure.lattice_parameters"
1066
1067
    },
    "energies": {
1068
1069
        "source": "es",
        "path": "encyclopedia.properties.energies",
1070
1071
    },
    "mass_density": {
1072
1073
        "source": "es",
        "path": "encyclopedia.properties.mass_density",
1074
1075
    },
    "atomic_density": {
1076
1077
        "source": "es",
        "path": "encyclopedia.properties.atomic_density",
1078
1079
    },
    "cell_volume": {
1080
1081
        "source": "es",
        "path": "encyclopedia.material.idealized_structure.cell_volume"
1082
    },
Lauri Himanen's avatar
Lauri Himanen committed
1083
    "band_gap": {
1084
1085
        "source": "es",
        "path": "encyclopedia.properties.band_gap"
Lauri Himanen's avatar
Lauri Himanen committed
1086
    },
1087
    "electronic_band_structure": {
1088
1089
        "source": "es",
        "path": "encyclopedia.properties.electronic_band_structure"
1090
1091
    },
    "electronic_dos": {
1092
1093
        "source": "es",
        "path": "encyclopedia.properties.electronic_dos"
1094
    },
1095
    "phonon_band_structure": {
1096
1097
        "source": "es",
        "path": "encyclopedia.properties.phonon_band_structure"
1098
1099
    },
    "phonon_dos": {
1100
1101
        "source": "es",
        "path": "encyclopedia.properties.phonon_dos"
1102
1103
    },
    "thermodynamical_properties": {
1104
1105
        "source": "es",
        "path": "encyclopedia.properties.thermodynamical_properties"
1106
    },
1107
    "wyckoff_sets": {
1108
1109
        "source": "archive",
        "path": "section_metadata/encyclopedia/material/idealized_structure/wyckoff_sets"
1110
    },
1111
    "idealized_structure": {
1112
1113
        "source": "archive",
        "path": "section_metadata/encyclopedia/material/idealized_structure"
1114
    },
1115
1116
1117
}

calculation_property_query = api.model("calculation_query", {
1118
    "properties": fields.List(fields.String(enum=list(calculation_property_map.keys())), description="List of calculation properties to return."),
1119
1120
1121
1122
1123
1124
})
energies = api.model("energies", {
    "energy_total": fields.Float,
    "energy_total_T0": fields.Float,
    "energy_free": fields.Float,
})
Lauri Himanen's avatar
Lauri Himanen committed
1125
1126
1127
1128
electronic_band_structure = api.model("electronic_band_structure", {
    "reciprocal_cell": fields.List(fields.List(fields.Float)),
    "brillouin_zone": fields.Raw,
    "section_k_band_segment": fields.Raw,
1129
    "section_band_gap": fields.Raw,
Lauri Himanen's avatar
Lauri Himanen committed
1130
1131
1132
})
electronic_dos = api.model("electronic_dos", {
    "dos_energies": fields.List(fields.Float),
1133
    "dos_values": fields.List(fields.List(fields.Float)),
Lauri Himanen's avatar
Lauri Himanen committed
1134
})
1135
1136
1137
calculation_property_result = api.model("calculation_property_result", {
    "lattice_parameters": fields.Nested(lattice_parameters, skip_none=True),
    "energies": fields.Nested(energies, skip_none=True),
1138
1139
1140
    "mass_density": fields.Float,
    "atomic_density": fields.Float,
    "cell_volume": fields.Float,
1141
    "wyckoff_sets": fields.Nested(wyckoff_set_result, skip_none=True),
1142
    "idealized_structure": fields.Nested(idealized_structure_result, skip_none=True),
1143
1144
1145
    "band_gap": fields.Float,
    "electronic_band_structure": fields.Nested(electronic_band_structure, skip_none=True),
    "electronic_dos": fields.Nested(electronic_dos, skip_none=True),
1146
1147
1148
    "phonon_band_structure": fields.Raw,
    "phonon_dos": fields.Raw,
    "thermodynamical_properties": fields.Raw,
1149
1150
1151
})


1152
1153
1154
1155
@ns.route("/materials/<string:material_id>/calculations/<string:calc_id>")
class EncCalculationResource(Resource):
    @api.response(404, "Material or calculation not found")
    @api.response(400, "Bad request")
1156
    @api.response(200, "OK", calculation_property_result)
1157
    @api.expect(calculation_property_query, validate=False)