encyclopedia.py 55 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
# Copyright 2018 Markus Scheidgen
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an"AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

15
"""
16
API for retrieving material information.
17
"""
18
import re
19
import math
20
import numpy as np
21
from collections import defaultdict
22

23
from flask_restplus import Resource, abort, fields, marshal
24
from flask import request, g
25
from elasticsearch_dsl import Search, Q, A
26
from elasticsearch_dsl.utils import AttrDict
27

28
29
from nomad import config, infrastructure, search
from nomad.files import UploadFiles
30
from nomad.units import ureg
Lauri Himanen's avatar
Lauri Himanen committed
31
from nomad.atomutils import get_hill_decomposition
32
from nomad.datamodel.datamodel import EntryArchive
33
from nomad.datamodel.material import Material, Bulk, Method
34
from .api import api
35
from .auth import authenticate, create_authorization_predicate
36

37
38
ns = api.namespace("encyclopedia", description="Access materials data.")
missing_material_msg = "The specified material {} could not be retrieved. It either does not exists or requires authentication."
39

40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213

class MaterialAccessError(Exception):
    pass


class MaterialSearch():
    """Convenience class for material searches. Automatically ensures the
    correct visibility of materials when the search is constructed through the
    methods of his class.
    """
    def __init__(self):
        self._s = Search(index=config.elastic.materials_index_name)
        self._filters = []
        self._musts = []
        self._extra = {}
        self._authenticated = False

    def add_material_filter(self, query):
        """Adds material based filters.
        """
        self._filters.append(query)

    def add_material_aggregation(self, name, aggregation):
        """Adds material based aggregation.
        """
        self._s.aggs.bucket(name, aggregation)

    def add_material_must(self, query):
        """Adds material based must query.
        """
        self._musts.append(query)

    def add_calculation_filter(self, queries):
        """Adds calculation based filters. The visibility of calculations is
        automatically checked.
        """
        if not isinstance(queries, (list, tuple)):
            queries = [queries]
        filters = self.get_authentication_filters_nested() + queries
        nested_bool = Q(
            "bool",
            filter=filters,
        )
        nested_query = Q("nested", path="calculations", query=nested_bool)
        self._musts.append(nested_query)
        self._authenticated = True

    def includes(self, includes):
        self._extra["_source"] = {"includes": includes}

    def size(self, size):
        self._extra["size"] = size

    def extra(self, extra):
        self._extra = extra

    def s(self):
        # If no authentication filters have been added already, add them now.
        if not self._authenticated:
            self._musts.append(Q(
                "nested",
                path="calculations",
                query=Q("bool", filter=self.get_authentication_filters_nested()),
            ))
            self._authenticated = True
        query = Q(
            "bool",
            filter=self._filters,
            must=self._musts,
        )
        s = self._s.query(query)
        extra = self._extra
        s = s.extra(**extra)
        return s

    def execute(self):
        s = self.s()
        return s.execute()

    def get_authentication_filters_nested(self):
        """Returns a shared term filter that will leave out unpublished (of
        other users) or embargoed materials.
        """
        # Handle authentication
        filters = []
        if g.user is not None:
            q = Q('term', calculations__published=True) & Q('term', calculations__with_embargo=False)
            if g.user.user_id is not None:
                q = q | Q('term', calculations__owners=g.user.user_id)
            filters.append(q)
        else:
            q = Q('term', calculations__published=True) & Q('term', calculations__with_embargo=False)
            filters.append(q)

        return filters

    def calculations(self):
        """Executes the query and returns a list of visible calculations
        associated with the first found material. Currently fetches all
        calculations associated with a material. If the number of calculations
        per material increases significantly then the inner_hits available for
        nested queries should be used instead.

        Returns:
            List of visible calculations for the first material matching the
            constructed query.

        Raises:
            MaterialAccessError if the queried material could not be found.
        """
        source = self._extra.get("_source")
        if source is None:
            source = {}
            self._extra["_source"] = source
        includes = source.get("includes")
        if includes is None:
            includes = []
            source["includes"] = includes

        self._extra["_source"]["includes"].extend([
            "calculations.published",
            "calculations.with_embargo",
            "calculations.owners",
        ])
        response = self.execute()
        if response.hits.total == 0:
            raise MaterialAccessError

        material = response.hits[0]

        # Filter out calculations based on their visibility
        visible_calcs = []
        for calc in material.calculations:
            if calc.published and not calc.with_embargo:
                visible_calcs.append(calc)
            elif g.user is not None and g.user.user_id in calc.owners:
                visible_calcs.append(calc)
        return visible_calcs


def get_authentication_filters():
    """Returns a shared term filter that will leave out unpublished (of other
    users), embargoed or invalid entries in the calculations index.
    """
    # Handle authentication
    s = search.SearchRequest()
    if g.user is not None:
        s.owner('visible', user_id=g.user.user_id)
    else:
        s.owner('public')
    return [
        s.q,
        Q("term", encyclopedia__status="success"),
    ]


def get_range_filter(field, minimum=None, maximum=None, source_unit=None, target_unit=None):
    """For adding range filters
    """
    query_dict = {}
    if minimum is not None:
        if source_unit is None and target_unit is None:
            gte = minimum
        else:
            gte = (minimum * source_unit).to(target_unit).magnitude
        query_dict["gte"] = gte
    if maximum is not None:
        if source_unit is None and target_unit is None:
            lte = maximum
        else:
            lte = (maximum * source_unit).to(target_unit).magnitude
        query_dict["lte"] = lte
    query = Q("range", **{field: query_dict})
    return query
214
215


216
217
218
219
220
221
222
223
224
225
226
227
def rgetattr(obj, attr_name):
    """Used to perform attribute access based on a (possibly nested) attribute
    name given as string.
    """
    try:
        for attr in attr_name.split("."):
            obj = obj[attr]
    except KeyError:
        return None
    return obj


228
def get_es_doc_values(es_doc, mapping, keys=None):
229
230
    """Used to form a material definition for "materials/<material_id>" from
    the given ElasticSearch root document.
231
    """
232
233
234
    if keys is None:
        keys = mapping.keys()

235
    result = {}
236
    for key in keys:
237
        es_key = mapping[key]
238
        value = rgetattr(es_doc, es_key)
239
        result[key] = value
240
241
242
243

    return result


244
245
246
247
248
249
250
251
252
253
254
def read_archive(upload_id: str, calc_id: str) -> EntryArchive:
    """Used to read data from the archive.

    Args:
        upload_id: Upload id.
        calc_id: Calculation id.

    Returns:
        MSection: The section_run as MSection
        For each path, a dictionary containing the path as key and the returned
        section as value.
255
    """
256
257
258
259
260
261
262
263
    upload_files = UploadFiles.get(
        upload_id, is_authorized=create_authorization_predicate(upload_id, calc_id))

    with upload_files.read_archive(calc_id) as archive:
        data = archive[calc_id]
        root = EntryArchive.m_from_dict(data.to_dict())

    return root
264
265


266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
material_prop_map = {
    # General
    "material_id": "material_id",
    "formula": "formula",
    "formula_reduced": "formula_reduced",
    "material_type": "material_type",
    "material_name": "material_name",
    # Bulk
    "has_free_wyckoff_parameters": "bulk.has_free_wyckoff_parameters",
    "strukturbericht_designation": "bulk.strukturbericht_designation",
    "bravais_lattice": "bulk.bravais_lattice",
    "crystal_system": "bulk.crystal_system",
    "point_group": "bulk.point_group",
    "space_group_number": "bulk.space_group_number",
    "space_group_international_short_symbol": "bulk.space_group_international_short_symbol",
    "structure_type": "bulk.structure_type",
    "structure_prototype": "bulk.structure_prototype",
}
284
285
286
287
288
289
290
similarity = api.model("similarity", {
    # General
    "material_id": fields.String,
    "value": fields.Float,
    "formula": fields.String,
    "space_group_number": fields.Integer,
})
291
material_query = api.parser()
292
293
294
295
296
297
298
299
material_query.add_argument(
    "property",
    type=str,
    choices=tuple(material_prop_map.keys()),
    help="Optional single property to retrieve for the given material. If not specified, all properties will be returned.",
    location="args"
)
material_result = api.model("material_result", {
300
301
    # General
    "material_id": fields.String,
302
303
    "formula": fields.String,
    "formula_reduced": fields.String,
304
    "material_type": fields.String,
305
    "n_matches": fields.Integer,
306
    # Bulk only
307
    "has_free_wyckoff_parameters": fields.Boolean,
308
    "strukturbericht_designation": fields.String,
309
    "material_name": fields.String,
310
311
    "bravais_lattice": fields.String,
    "crystal_system": fields.String,
312
    "point_group": fields.String,
313
314
315
    "space_group_number": fields.Integer,
    "space_group_international_short_symbol": fields.String,
    "structure_prototype": fields.String,
316
    "structure_type": fields.String,
317
    "similarity": fields.List(fields.Nested(similarity, skip_none=True), skip_none=True),
318
})
319
320


321
@ns.route("/materials/<string:material_id>")
322
class EncMaterialResource(Resource):
323
324
    @api.response(404, "The material does not exist")
    @api.response(200, "Metadata send", fields.Raw)
325
    @api.doc("get_material")
326
    @api.expect(material_query)
327
    @api.marshal_with(material_result, skip_none=True)
328
    @api.param("material_id", "28 character identifier for the material.")
329
    @authenticate()
330
    def get(self, material_id):
331
        """Used to retrieve basic information related to a material.
332
        """
333
334
335
336
        # Parse request arguments
        args = material_query.parse_args()
        prop = args.get("property", None)
        if prop is not None:
337
338
            keys = [prop]
            es_keys = [material_prop_map[prop]]
339
340
        else:
            keys = list(material_prop_map.keys())
341
            es_keys = list(material_prop_map.values())
342

343
344
345
346
        # Get the material info, check that at least one calculation is visible
        s = MaterialSearch()
        s.add_material_filter(Q("term", material_id=material_id))
        s.includes(es_keys)
347
348
        response = s.execute()

349
        # No such material
350
351
        if response.hits.total == 0:
            abort(404, message=missing_material_msg.format(material_id))
352

353
        # Add values from ES entry
354
        entry = response[0]
355
        result = get_es_doc_values(entry, material_prop_map, keys)
356

357
        # Add similarity data that is stored in MongoDB.
358
359
360
361
362
363
364
365
366
367
        try:
            material = Material.m_def.a_mongo.get(material_id=material_id)
            dos_similarity = material.similarity.electronic_dos
        except KeyError:
            # No similarity data for this material
            pass
        else:
            # Only include similarity for materials that exist on the current
            # deployment to avoid dead links.
            similar_ids = dos_similarity.material_ids
368
369
370
371
372
            id_value_map = {key: value for key, value in zip(similar_ids, dos_similarity.values)}
            s = MaterialSearch()
            s.add_material_filter(Q("terms", material_id=similar_ids))
            s.includes(["material_id", "formula_reduced", "bulk.space_group_number"])
            s.size(5)
373
            response = s.execute()
374

375
376
377
378
            similarity = []
            for hit in response.hits:
                try:
                    similarity.append({
379
380
381
382
                        "material_id": hit.material_id,
                        "value": id_value_map[hit.material_id],
                        "formula": hit.formula_reduced,
                        "space_group_number": hit.bulk.space_group_number,
383
384
385
386
387
388
                    })
                except AttributeError:
                    pass
            if similarity:
                result["similarity"] = similarity

389
390
391
        return result, 200


392
re_formula = re.compile(r"([A-Z][a-z]?)(\d*)")
393
range_query = api.model("range_query", {
394
395
396
    "max": fields.Float,
    "min": fields.Float,
})
397
398
materials_query = api.model("materials_input", {
    "search_by": fields.Nested(api.model("search_query", {
399
400
        "exclusive": fields.Boolean(default=False),
        "formula": fields.String,
Lauri Himanen's avatar
Lauri Himanen committed
401
        "element": fields.String,
402
403
404
        "page": fields.Integer(default=1, description="Requested page number, indexing starts from 1."),
        "per_page": fields.Integer(default=25, description="Number of results per page."),
        "restricted": fields.Boolean(default=False, description="Select to restrict the query to individual calculations. If not selected, the query will combine results from several different calculations."),
405
    })),
406
407
408
409
410
411
412
413
414
415
416
417
    "material_type": fields.List(fields.String(enum=list(Material.material_type.type)), description=Material.material_type.description),
    "material_name": fields.List(fields.String, description=Material.material_name.description),
    "structure_type": fields.List(fields.String, description=Bulk.structure_type.description),
    "space_group_number": fields.List(fields.Integer, description=Bulk.space_group_number.description),
    "crystal_system": fields.List(fields.String(enum=list(Bulk.crystal_system.type)), description=Bulk.crystal_system.description),
    "band_gap": fields.Nested(range_query, description="Band gap range in eV.", allow_null=True),
    "has_band_structure": fields.Boolean(description="Set to True if electronic band structure needs to be available for this material."),
    "has_dos": fields.Boolean(description="Set to True if electronic density of states needs to be available for this material."),
    "has_thermal_properties": fields.Boolean(description="Set to True if thermodynamical properties need to be available for this material."),
    "functional_type": fields.List(fields.String(enum=list(Method.functional_type.type)), description=Method.functional_type.description),
    "basis_set": fields.List(fields.String(enum=list(Method.basis_set.type)), description=Method.basis_set.description),
    "code_name": fields.List(fields.String(enum=list(Method.program_name.type)), description=Method.program_name.description),
418
})
419
420
421
422
423
424
425
pages_result = api.model("page_info", {
    "per_page": fields.Integer,
    "total": fields.Integer,
    "page": fields.Integer,
    "pages": fields.Integer,
})

426
427
materials_result = api.model("materials_result", {
    "total_results": fields.Integer(allow_null=False),
428
429
    "results": fields.List(fields.Nested(material_result, skip_none=True)),
    "pages": fields.Nested(pages_result, skip_none=True),
430
431
432
})


433
@ns.route("/materials/")
434
class EncMaterialsResource(Resource):
435
436
    @api.response(404, "No materials found")
    @api.response(400, "Bad request")
437
    @api.response(200, "OK", materials_result)
438
    @api.expect(materials_query, validate=False)
439
    @api.marshal_with(materials_result, skip_none=True)
440
    @api.doc("search_materials")
441
    @authenticate()
442
    def post(self):
443
        """Search materials based on their properties.
444
445
446
447
448
449
450
        """
        # Get query parameters as json
        try:
            data = marshal(request.get_json(), materials_query)
        except Exception as e:
            abort(400, message=str(e))

451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
        # Create filters from user query
        s = MaterialSearch()

        # Material level filters
        if data["material_type"] is not None: s.add_material_filter(Q("terms", material_type=data["material_type"]))
        if data["material_name"] is not None: s.add_material_filter(Q("terms", material_name=data["material_name"]))
        if data["structure_type"] is not None: s.add_material_filter(Q("terms", bulk__structure_type=data["structure_type"]))
        if data["space_group_number"] is not None: s.add_material_filter(Q("terms", bulk__space_group_number=data["space_group_number"]))
        if data["crystal_system"] is not None: s.add_material_filter(Q("terms", bulk__crystal_system=data["crystal_system"]))

        # Calculation filters
        calc_filters = []
        if data["functional_type"] is not None: calc_filters.append(Q("terms", calculations__method__functional_type=data["functional_type"]))
        if data["basis_set"] is not None: calc_filters.append(Q("terms", calculations__method__basis_set=data["basis_set"]))
        if data["code_name"] is not None: calc_filters.append(Q("terms", calculations__method__program_name=data["code_name"]))
        if data["has_band_structure"] is not None: calc_filters.append(Q("term", calculations__properties__has_electronic_band_structure=data["has_band_structure"]))
        if data["has_dos"] is not None: calc_filters.append(Q("term", calculations__properties__has_electronic_dos=data["has_dos"]))
        if data["has_thermal_properties"] is not None: calc_filters.append(Q("term", calculations__properties__has_thermodynamical_properties=data["has_thermal_properties"]))
        if data["band_gap"] is not None: calc_filters.append(get_range_filter(
            "calculations.properties.band_gap",
            minimum=data["band_gap"].get("min"),
            maximum=data["band_gap"].get("max"),
            source_unit=ureg.eV,
            target_unit=ureg.J,
        ))
476
        search_by = data["search_by"]
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
        restricted = search_by["restricted"]
        if restricted:
            s.add_calculation_filter(calc_filters)
        else:
            for f in calc_filters:
                s.add_calculation_filter(f)

        # if data["functional_type"] is not None: s.add_calculation_filter(Q("terms", calculations__method__functional_type=data["functional_type"]))
        # if data["basis_set"] is not None: s.add_calculation_filter(Q("terms", calculations__method__basis_set=data["basis_set"]))
        # if data["code_name"] is not None: s.add_calculation_filter(Q("terms", calculations__method__program_name=data["code_name"]))
        # if data["has_band_structure"] is not None: s.add_calculation_filter(Q("term", calculations__properties__has_electronic_band_structure=data["has_band_structure"]))
        # if data["has_dos"] is not None: s.add_calculation_filter(Q("term", calculations__properties__has_electronic_dos=data["has_dos"]))
        # if data["has_thermal_properties"] is not None: s.add_calculation_filter(Q("term", calculations__properties__has_thermodynamical_properties=data["has_thermal_properties"]))
        # if data["band_gap"] is not None: s.add_calculation_filter(get_range_filter(
            # "calculations.properties.band_gap",
            # minimum=data["band_gap"].get("min"),
            # maximum=data["band_gap"].get("max"),
            # source_unit=ureg.eV,
            # target_unit=ureg.J,
        # ))

498
        formula = search_by["formula"]
Lauri Himanen's avatar
Lauri Himanen committed
499
        elements = search_by["element"]
500
501
        exclusive = search_by["exclusive"]

502
503
504
505
        # The given list of species/formula is reformatted with the Hill system into a
        # query string. With exclusive search we look for exact match, with
        # non-exclusive search we look for match that includes at least all
        # species, possibly even more.
506
507
508
        if formula is not None:
            element_list = []
            matches = re_formula.finditer(formula)
509

510
511
512
513
514
515
516
517
            for match in matches:
                groups = match.groups()
                symbol = groups[0]
                count = groups[1]
                if symbol != "":
                    if count == "":
                        element_list.append(symbol)
                    else:
Lauri Himanen's avatar
Lauri Himanen committed
518
519
                        element_list += [symbol] * int(count)

520
            names, reduced_counts = get_hill_decomposition(element_list, reduced=True)
Lauri Himanen's avatar
Lauri Himanen committed
521
            query_string = []
522

Lauri Himanen's avatar
Lauri Himanen committed
523
524
525
526
            for name, count in zip(names, reduced_counts):
                if count == 1:
                    query_string.append(name)
                else:
527
                    query_string.append("{}{}".format(name, int(count)))
Lauri Himanen's avatar
Lauri Himanen committed
528
            query_string = " ".join(query_string)
529
530

            if exclusive:
531
                s.add_material_filter(Q("term", **{"species_and_counts.keyword": query_string}))
532
            else:
533
                s.add_material_must(Q(
534
                    "match",
535
                    species_and_counts={"query": query_string, "operator": "and"}
Lauri Himanen's avatar
Lauri Himanen committed
536
537
538
539
540
541
                ))
        elif elements is not None:
            species, _ = get_hill_decomposition(elements.split(","))
            query_string = " ".join(species)

            if exclusive:
542
                s.add_material_filter(Q("term", **{"species.keyword": query_string}))
Lauri Himanen's avatar
Lauri Himanen committed
543
            else:
544
                s.add_material_must(Q(
Lauri Himanen's avatar
Lauri Himanen committed
545
                    "match",
546
                    species={"query": query_string, "operator": "and"}
547
548
                ))

549
        # Execute query
550
551
        page = search_by["page"]
        per_page = search_by["per_page"]
552
553
554
555
556
557
558
        s.extra({
            "size": per_page,
            "from": (page - 1) * per_page,
            "sort": [{"formula_reduced": {"order": "asc"}}],
            "_source": {"includes": list(material_prop_map.values())},
        })
        response = s.execute()
559

560
561
562
563
564
565
566
        # Form final response
        pages = {
            "page": page,
            "per_page": per_page,
            "pages": math.ceil(response.hits.total / per_page),
            "total": response.hits.total,
        }
567

568
569
570
571
572
573
574
575
576
577
578
579
        # Gather the number of visible calculation for each returned material
        # with an aggregation
        if len(response) != 0:
            material_ids = [x.material_id for x in response]
            s2 = MaterialSearch()
            s2.size(0)
            matched = s2._s.aggs.bucket("matched", A("filter", filter=Q("terms", material_id=material_ids)))
            materials = matched.bucket("materials", A("terms", field="material_id", size=len(material_ids)))
            nested = materials.bucket("nested", A("nested", path="calculations"))
            nested.bucket(
                "visible",
                A("filter", filter=Q("bool", filter=s2.get_authentication_filters_nested()))
580
581
            )
            response2 = s2.execute()
582
583
584
            agg_dict = {}
            for agg in response2.aggs.matched.materials:
                agg_dict[agg.key] = agg.nested.visible.doc_count
585

586
587
588
589
590
591
592
        # Form the final list of results
        result_list = []
        for x in response:
            res = get_es_doc_values(x, material_prop_map, list(material_prop_map.keys()))
            material_id = x.material_id
            res["n_matches"] = agg_dict[material_id]
            result_list.append(res)
593

594
        return {"results": result_list, "pages": pages}, 200
595
596


597
groups_result = api.model("groups_result", {
598
599
    "groups_eos": fields.Raw,
    "groups_par": fields.Raw,
600
601
602
})


603
@ns.route("/materials/<string:material_id>/groups")
Lauri Himanen's avatar
Lauri Himanen committed
604
class EncGroupsResource(Resource):
605
606
    @api.response(404, "Material not found")
    @api.response(400, "Bad request")
607
    @api.response(200, "OK", groups_result)
608
    @api.marshal_with(groups_result)
609
610
    @api.doc("get_material_groups")
    @api.param("material_id", "28 character identifier for the material.")
611
    @authenticate()
Lauri Himanen's avatar
Lauri Himanen committed
612
    def get(self, material_id):
613
614
615
616
617
618
619
620
621
        """Returns a summary of the calculation groups that were identified for this material.

        Two types of groups are reported: equation of state groups and
        parameter variation groups. Equation of state groups contain
        calculations with identical method and material, but different volume.
        Parameter variation groups contain identical structure but different
        methods. The response contains dictionaries for both groups
        ('groups_eos' and 'groups_par'). These dictionaries map a group id with
        a list of calculation ids.
622
        """
623
624
625
626
627
628
629
630
631
632
633
634
        # Get full entry for this material
        s = MaterialSearch()
        s.add_material_filter(Q("term", material_id=material_id))
        s.extra({
            "_source": {"includes": [
                "calculations.calc_id",
                "calculations.method.group_eos_id",
                "calculations.method.group_parametervariation_id",
                "calculations.properties.energies.energy_total",
                "calculations.idealized_structure.cell_volume",
            ]},
            "size": 1,
635
636
        })

637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
        # Raise error if material not found
        try:
            calculations = s.calculations()
        except MaterialAccessError:
            abort(404, message=missing_material_msg.format(material_id))

        groups_eos = defaultdict(list)
        groups_param = defaultdict(list)
        for calc in calculations:
            try:
                calc.properties.energies.energy_total
                calc.idealized_structure.cell_volume
            except AttributeError:
                continue
            try:
                group_eos_id = calc.method.group_eos_id
                if group_eos_id:
                    groups_eos[group_eos_id].append(calc.calc_id)
            except AttributeError:
                pass
            try:
                group_param_id = calc.method.group_parametervariation_id
                if group_param_id:
                    groups_param[group_param_id].append(calc.calc_id)
            except AttributeError:
                pass

        # Filter out groups with too few entries
        for key, items in list(groups_eos.items()):
            if len(items) < 4:
                del groups_eos[key]
        for key, items in list(groups_param.items()):
            if len(items) < 2:
                del groups_param[key]
671
672
673
674
675
676
677
678
679
680
681

        # Return results
        result = {
            "groups_eos": groups_eos,
            "groups_par": groups_param,
        }

        return result, 200


group_result = api.model("group_result", {
682
683
684
    "calculations": fields.List(fields.String, description="List of calculation ids."),
    "energies": fields.List(fields.Float, description="List of total energies."),
    "volumes": fields.List(fields.Float, description="List of cell volumes."),
685
686
687
688
689
690
691
})


@ns.route("/materials/<string:material_id>/groups/<string:group_type>/<string:group_id>")
class EncGroupResource(Resource):
    @api.response(404, "Group not found")
    @api.response(400, "Bad request")
692
    @api.response(200, "OK", group_result)
693
    @api.marshal_with(group_result)
694
695
696
697
    @api.doc("get_material_group")
    @api.param("group_type", "Type of group. Valid options are: 'eos' and 'par'.")
    @api.param("group_id", "28 character identifier for the group.")
    @api.param("material_id", "28 character identifier for the material.")
698
    @authenticate()
699
    def get(self, material_id, group_type, group_id):
700
        """Used to query detailed information about a specific calculation group.
701
702
703
704
        """
        # Find entries for the given material, which have EOS or parameter
        # variation hashes set.
        if group_type == "eos":
705
            group_id_source = "group_eos_id"
706
        elif group_type == "par":
707
            group_id_source = "group_parametervariation_id"
708
709
710
        else:
            abort(400, message="Unsupported group type.")

711
712
713
714
715
716
717
718
719
720
        s = MaterialSearch()
        s.add_material_filter(Q("term", material_id=material_id))
        s.extra({
            "_source": {"includes": [
                "calculations.calc_id",
                "calculations.properties.energies.energy_total",
                "calculations.idealized_structure.cell_volume",
                "calculations.method." + group_id_source,
            ]},
            "size": 1,
721
        })
722

723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
        # Raise error if material not found
        try:
            calculations = s.calculations()
        except MaterialAccessError:
            abort(404, message=missing_material_msg.format(material_id))

        # Gather groups from the calculations
        calcs = []
        energies = []
        volumes = []
        for calc in calculations:
            try:
                i_group_id = getattr(calc.method, group_id_source)
                if i_group_id == group_id:
                    calcs.append(calc.calc_id)
                    volumes.append(calc.idealized_structure.cell_volume)
                    energies.append(calc.properties.energies.energy_total)
            except Exception:
                pass

        # Sort results by energy
        energies = np.array(energies)
        volumes = np.array(volumes)
        calcs = np.array(calcs)
        order = energies.argsort()
        energies = energies[order]
        volumes = volumes[order]
        calcs = calcs[order]
751

752
        # Return results
753
        group_dict = {
754
755
756
            "calculations": calcs.tolist(),
            "energies": energies.tolist(),
            "volumes": volumes.tolist(),
757
        }
758
759

        return group_dict, 200
760
761
762
763


calc_prop_map = {
    "calc_id": "calc_id",
764
    "upload_id": "upload_id",
765
766
767
768
769
770
771
772
773
    "code_name": "method.program_name",
    "code_version": "method.program_version",
    "functional_type": "method.functional_type",
    "basis_set_type": "method.basis_set",
    "core_electron_treatment": "method.core_electron_treatment",
    "run_type": "workflow.workflow_type",
    "has_dos": "properties.has_electronic_dos",
    "has_band_structure": "properties.has_electronic_band_structure",
    "has_thermal_properties": "properties.has_thermodynamical_properties",
774
775
776
}
calculation_result = api.model("calculation_result", {
    "calc_id": fields.String,
777
    "upload_id": fields.String,
778
779
780
781
    "code_name": fields.String,
    "code_version": fields.String,
    "functional_type": fields.String,
    "basis_set_type": fields.String,
782
    "core_electron_treatment": fields.String,
783
784
785
786
787
    "run_type": fields.String,
    "has_dos": fields.Boolean,
    "has_band_structure": fields.Boolean,
    "has_thermal_properties": fields.Boolean,
})
788
789
790
791
792
793
representatives_result = api.model("representatives_result", {
    "idealized_structure": fields.String,
    "electronic_band_structure": fields.String,
    "electronic_dos": fields.String,
    "thermodynamical_properties": fields.String,
})
794
795
796
797
calculations_result = api.model("calculations_result", {
    "total_results": fields.Integer,
    "pages": fields.Nested(pages_result),
    "results": fields.List(fields.Nested(calculation_result)),
798
    "representatives": fields.Nested(representatives_result, skip_none=True),
799
800
801
802
})


@ns.route("/materials/<string:material_id>/calculations")
803
class EncCalculationsResource(Resource):
804
    @api.response(404, "Material not found")
805
    @api.response(400, "Bad request")
806
807
    @api.response(200, "OK", calculations_result)
    @api.doc("get_material_calculations")
808
    @authenticate()
809
    def get(self, material_id):
810
        """Used to return information about all calculations related to the given material.
811

812
813
814
815
816
817
        Returns a list of all calculations and a representative calculation for
        few select quantities that are shown in the material overview page.
        """
        s = MaterialSearch()
        s.add_material_filter(Q("term", material_id=material_id))
        s.extra({"_source": {"includes": ["calculations"]}})
818
819
820
821

        def calc_score(entry):
            """Custom scoring function used to sort results by their
            "quality". Currently built to mimic the scoring that was used
822
823
824
            in the old Encyclopedia GUI. Primarily sorts by quality measure,
            ties are broken by alphabetic sorting of entry_id in order to
            return consistent results.
825
826
827
828
829
830
            """
            score = 0
            functional_score = {
                "GGA": 100
            }
            code_score = {
831
832
                "VASP": 3,  # Prefer VASP data as it is the "cleanest" on average
                "FHI-aims": 2,
833
834
                "Quantum Espresso": 1,
            }
835
836
837
838
839
840
841
842
843
844
            code_name = entry.method.program_name
            functional = entry.method.functional_type
            try:
                has_bs = entry.properties.has_electronic_band_structure
            except AttributeError:
                has_bs = False
            try:
                has_dos = entry.properties.has_electronic_dos
            except AttributeError:
                has_dos = False
845
846
847
848
849
            score += functional_score.get(functional, 0)
            score += code_score.get(code_name, 0)
            if has_dos and has_bs:
                score += 10

850
851
852
853
854
855
856
            return (score, entry.calc_id)

        # Raise error if material not found
        try:
            calculations = s.calculations()
        except MaterialAccessError:
            abort(404, message=missing_material_msg.format(material_id))
857

858
859
        # Sort calculations by "quality"
        sorted_calc = sorted(calculations, key=lambda x: calc_score(x), reverse=True)
860
861

        # Get the requested representative properties
862
        representatives = {}
863
864
865
866
867
        representatives["idealized_structure"] = sorted_calc[0].calc_id
        thermo_found = False
        bs_found = False
        dos_found = False
        for calc in sorted_calc:
868
869
870
871
            if not hasattr(calc, "properties"):
                continue

            if not thermo_found and calc.properties.has_thermodynamical_properties:
872
873
                representatives["thermodynamical_properties"] = calc.calc_id
                thermo_found = True
874
            if not bs_found and calc.properties.has_electronic_band_structure:
875
876
                representatives["electronic_band_structure"] = calc.calc_id
                bs_found = True
877
            if not dos_found and calc.properties.has_electronic_dos:
878
879
880
881
882
                representatives["electronic_dos"] = calc.calc_id
                dos_found = True
            if thermo_found and bs_found and dos_found:
                break

883
884
        # Create result JSON
        results = []
885
        for entry in sorted_calc:
886
887
888
889
890
891
            calc_dict = get_es_doc_values(entry, calc_prop_map)
            results.append(calc_dict)

        result = {
            "total_results": len(results),
            "results": results,
892
            "representatives": representatives,
893
894
895
896
897
        }

        return result, 200


898
899
900
901
histogram = api.model("histogram", {
    "occurrences": fields.List(fields.Integer),
    "values": fields.List(fields.Float),
})
902
903
statistics_query = api.model("statistics_query", {
    "calculations": fields.List(fields.String),
904
    "properties": fields.List(fields.String),
905
    "n_histogram_bins": fields.Integer,
906
907
908
909
910
})
statistics = api.model("statistics", {
    "min": fields.Float,
    "max": fields.Float,
    "avg": fields.Float,
911
    "histogram": fields.Nested(histogram, skip_none=True)
912
913
})
statistics_result = api.model("statistics_result", {
914
915
916
917
918
919
920
921
922
923
    "cell_volume": fields.Nested(statistics, skip_none=True),
    "atomic_density": fields.Nested(statistics, skip_none=True),
    "mass_density": fields.Nested(statistics, skip_none=True),
    "lattice_a": fields.Nested(statistics, skip_none=True),
    "lattice_b": fields.Nested(statistics, skip_none=True),
    "lattice_c": fields.Nested(statistics, skip_none=True),
    "alpha": fields.Nested(statistics, skip_none=True),
    "beta": fields.Nested(statistics, skip_none=True),
    "gamma": fields.Nested(statistics, skip_none=True),
    "band_gap": fields.Nested(statistics, skip_none=True),
924
})
925
926
927
928
929
930
931
932
933
934
property_map = {
    "cell_volume": "encyclopedia.material.idealized_structure.cell_volume",
    "atomic_density": "encyclopedia.properties.atomic_density",
    "mass_density": "encyclopedia.properties.mass_density",
    "lattice_a": "encyclopedia.material.idealized_structure.lattice_parameters.a",
    "lattice_b": "encyclopedia.material.idealized_structure.lattice_parameters.b",
    "lattice_c": "encyclopedia.material.idealized_structure.lattice_parameters.c",
    "alpha": "encyclopedia.material.idealized_structure.lattice_parameters.alpha",
    "beta": "encyclopedia.material.idealized_structure.lattice_parameters.beta",
    "gamma": "encyclopedia.material.idealized_structure.lattice_parameters.gamma",
Lauri Himanen's avatar
Lauri Himanen committed
935
    "band_gap": "encyclopedia.properties.band_gap",
936
}
937
938
939


@ns.route("/materials/<string:material_id>/statistics")
940
class EncStatisticsResource(Resource):
941
942
    @api.response(404, "Suggestion not found")
    @api.response(400, "Bad request")
943
    @api.response(200, "OK", statistics_result)
944
945
    @api.expect(statistics_query, validate=False)
    @api.marshal_with(statistics_result, skip_none=True)
946
947
    @api.doc("get_material_statistics")
    @api.param("material_id", "28 character identifier for the material.")
948
    @authenticate()
949
    def post(self, material_id):
950
951
        """Used to return statistics related to the specified material and
        calculations.
952
        """
953
954
955
956
957
958
959
960
961
        # Get query parameters as json
        try:
            data = marshal(request.get_json(), statistics_query)
        except Exception as e:
            abort(400, message=str(e))

        # Find entries for the given material.
        bool_query = Q(
            "bool",
962
            filter=get_authentication_filters() + [
963
964
965
966
967
968
969
970
971
972
973
                Q("term", encyclopedia__material__material_id=material_id),
                Q("terms", calc_id=data["calculations"]),
            ]
        )

        s = Search(index=config.elastic.index_name)
        s = s.query(bool_query)
        s = s.extra(**{
            "size": 0,
        })

974
975
976
977
978
979
        # Add statistics aggregations for each requested property
        properties = data["properties"]
        for prop in properties:
            stats_agg = A("stats", field=property_map[prop])
            s.aggs.bucket("{}_stats".format(prop), stats_agg)

980
981
982
        # No hits on the top query level
        response = s.execute()
        if response.hits.total == 0:
983
            abort(404, message="The given calculations could not be found for material {}".format(material_id))
984

985
986
987
988
989
990
991
992
        # Run a second query that creates histograms with fixed size buckets
        # based on the min and max from previous query. Might make more sense
        # to use the mean and sigma to define the range?
        s = Search(index=config.elastic.index_name)
        s = s.query(bool_query)
        s = s.extra(**{
            "size": 0,
        })
993
        n_bins = data["n_histogram_bins"]
994
995
        for prop in properties:
            stats = getattr(response.aggs, "{}_stats".format(prop))
996
997
            if stats.count == 0:
                continue
998
            interval = (stats.max * 1.001 - stats.min) / n_bins
999
1000
            if interval == 0:
                interval = 1
1001
            hist_agg = A("histogram", field=property_map[prop], interval=interval, offset=stats.min, min_doc_count=0)
1002
1003
1004
            s.aggs.bucket("{}_hist".format(prop), hist_agg)
        response_hist = s.execute()

1005
        # Return results
1006
1007
1008
        result = {}
        for prop in properties:
            stats = getattr(response.aggs, "{}_stats".format(prop))
1009
1010
            if stats.count == 0:
                continue
1011
1012
1013
1014
1015
1016
1017
            hist = getattr(response_hist.aggs, "{}_hist".format(prop))
            occurrences = [x.doc_count for x in hist.buckets]
            values = [x.key for x in hist.buckets]
            result[prop] = {
                "min": stats.min,
                "max": stats.max,
                "avg": stats.avg,
1018
                "histogram": {
1019
1020
1021
                    "occurrences": occurrences,
                    "values": values,
                }
1022
            }
1023

1024
        return result, 200
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035


wyckoff_variables_result = api.model("wyckoff_variables_result", {
    "x": fields.Float,
    "y": fields.Float,
    "z": fields.Float,
})
wyckoff_set_result = api.model("wyckoff_set_result", {
    "wyckoff_letter": fields.String,
    "indices": fields.List(fields.Integer),
    "element": fields.String,
1036
    "variables": fields.Nested(wyckoff_variables_result, skip_none=True),
1037
})
1038
1039
1040
1041
1042
1043
1044
1045
lattice_parameters = api.model("lattice_parameters", {
    "a": fields.Float,
    "b": fields.Float,
    "c": fields.Float,
    "alpha": fields.Float,
    "beta": fields.Float,
    "gamma": fields.Float,
})
1046
1047
1048
1049
1050
1051

idealized_structure_result = api.model("idealized_structure_result", {
    "atom_labels": fields.List(fields.String),
    "atom_positions": fields.List(fields.List(fields.Float)),
    "lattice_vectors": fields.List(fields.List(fields.Float)),
    "lattice_vectors_primitive": fields.List(fields.List(fields.Float)),
1052
    "lattice_parameters": fields.Nested(lattice_parameters, skip_none=True),
1053
1054
1055
    "periodicity": fields.List(fields.Boolean),
    "number_of_atoms": fields.Integer,
    "cell_volume": fields.Float,
1056
    "wyckoff_sets": fields.List(fields.Nested(wyckoff_set_result, skip_none=True)),
1057
1058
})

1059
1060
calculation_property_map = {
    "lattice_parameters": {
1061
1062
        "source": "es",
        "path": "encyclopedia.material.idealized_structure.lattice_parameters"
1063
1064
    },
    "energies": {
1065
1066
        "source": "es",
        "path": "encyclopedia.properties.energies",
1067
1068
    },
    "mass_density": {
1069
1070
        "source": "es",
        "path": "encyclopedia.properties.mass_density",
1071
1072
    },
    "atomic_density": {
1073
1074
        "source": "es",
        "path": "encyclopedia.properties.atomic_density",
1075
1076
    },
    "cell_volume": {
1077
1078
        "source": "es",
        "path": "encyclopedia.material.idealized_structure.cell_volume"
1079
    },
Lauri Himanen's avatar
Lauri Himanen committed
1080
    "band_gap": {
1081
1082
        "source": "es",
        "path": "encyclopedia.properties.band_gap"
Lauri Himanen's avatar
Lauri Himanen committed
1083
    },
1084
    "electronic_band_structure": {
1085
1086
        "source": "es",
        "path": "encyclopedia.properties.electronic_band_structure"
1087
1088
    },
    "electronic_dos": {
1089
1090
        "source": "es",
        "path": "encyclopedia.properties.electronic_dos"
1091
    },
1092
    "phonon_band_structure": {
1093
1094
        "source": "es",
        "path": "encyclopedia.properties.phonon_band_structure"
1095
1096
    },
    "phonon_dos": {
1097
1098
        "source": "es",
        "path": "encyclopedia.properties.phonon_dos"
1099
1100
    },
    "thermodynamical_properties": {
1101
1102
        "source": "es",
        "path": "encyclopedia.properties.thermodynamical_properties"
1103
    },
1104
    "wyckoff_sets": {
1105
1106
        "source": "archive",
        "path": "section_metadata/encyclopedia/material/idealized_structure/wyckoff_sets"
1107
    },
1108
    "idealized_structure": {
1109
1110
        "source": "archive",
        "path": "section_metadata/encyclopedia/material/idealized_structure"
1111
    },
1112
1113
1114
}

calculation_property_query = api.model("calculation_query", {
1115
    "properties": fields.List(fields.String(enum=list(calculation_property_map.keys())), description="List of calculation properties to return."),
1116
1117
1118
1119
1120
1121
})
energies = api.model("energies", {
    "energy_total": fields.Float,
    "energy_total_T0": fields.Float,
    "energy_free": fields.Float,
})
Lauri Himanen's avatar
Lauri Himanen committed
1122
1123
1124
1125
electronic_band_structure = api.model("electronic_band_structure", {
    "reciprocal_cell": fields.List(fields.List(fields.Float)),
    "brillouin_zone": fields.Raw,
    "section_k_band_segment": fields.Raw,
1126
    "section_band_gap": fields.Raw,
Lauri Himanen's avatar
Lauri Himanen committed
1127
1128
1129
})
electronic_dos = api.model("electronic_dos", {
    "dos_energies": fields.List(fields.Float),
1130
    "dos_values": fields.List(fields.List(fields.Float)),
Lauri Himanen's avatar
Lauri Himanen committed
1131
})
1132
1133
1134
calculation_property_result = api.model("calculation_property_result", {
    "lattice_parameters": fields.Nested(lattice_parameters, skip_none=True),
    "energies": fields.Nested(energies, skip_none=True),
1135
1136
1137
    "mass_density": fields.Float,
    "atomic_density": fields.Float,
    "cell_volume": fields.Float,
1138
    "wyckoff_sets": fields.Nested(wyckoff_set_result, skip_none=True),
1139
    "idealized_structure": fields.Nested(idealized_structure_result, skip_none=True),
1140
1141
1142
    "band_gap": fields.Float,
    "electronic_band_structure": fields.Nested(electronic_band_structure, skip_none=True),
    "electronic_dos": fields.Nested(electronic_dos, skip_none=True),
1143
1144
1145
    "phonon_band_structure": fields.Raw,
    "phonon_dos": fields.Raw,
    "thermodynamical_properties": fields.Raw,
1146
1147
1148
})


1149
1150
1151
1152
@ns.route("/materials/<string:material_id>/calculations/<string:calc_id>")
class EncCalculationResource(Resource):
    @api.response(404, "Material or calculation not found")
    @api.response(400, "Bad request")
1153
    @api.response(200, "OK", calculation_property_result)
1154
    @api.expect(calculation_property_query, validate=False)
1155
    @api.marshal_with(calculation_property_result, skip_none=True)
1156
    @api.doc("get_calculation")
1157
    @authenticate()
1158
    def post(self, material_id, calc_id):
1159
        """Get properties from a specific calculation related to a material.
1160
        """
1161
1162
1163
1164
1165
1166
        # Get query parameters as json
        try:
            data = marshal(request.get_json(), calculation_property_query)
        except Exception as e:
            abort(400, message=str(e))