repo.py 9.1 KB
Newer Older
Markus Scheidgen's avatar
Markus Scheidgen committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
# Copyright 2018 Markus Scheidgen
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an"AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""
The repository API of the nomad@FAIRDI APIs. Currently allows to resolve repository
meta-data.
"""

from flask_restplus import Resource, abort, fields
21
22
from flask import request, g
from elasticsearch_dsl import Q
Markus Scheidgen's avatar
Markus Scheidgen committed
23

24
from nomad.files import UploadFiles, Restricted
25
from nomad import search
Markus Scheidgen's avatar
Markus Scheidgen committed
26
27

from .app import api
28
from .auth import login_if_available, create_authorization_predicate
Markus Scheidgen's avatar
Markus Scheidgen committed
29
30
from .common import pagination_model, pagination_request_parser, calc_route

31
ns = api.namespace('repo', description='Access repository metadata.')
Markus Scheidgen's avatar
Markus Scheidgen committed
32
33
34
35
36


@calc_route(ns)
class RepoCalcResource(Resource):
    @api.response(404, 'The upload or calculation does not exist')
37
    @api.response(401, 'Not authorized to access the calculation')
38
    @api.response(200, 'Metadata send', fields.Raw)
39
    @api.doc('get_repo_calc')
40
    @login_if_available
41
    def get(self, upload_id, calc_id):
Markus Scheidgen's avatar
Markus Scheidgen committed
42
43
44
45
        """
        Get calculation metadata in repository form.

        Repository metadata only entails the quanties shown in the repository.
46
        Calcs are references via *upload_id*, *calc_id* pairs.
Markus Scheidgen's avatar
Markus Scheidgen committed
47
        """
48
        # TODO use elastic search instead of the files
Markus Scheidgen's avatar
Markus Scheidgen committed
49
        # TODO add missing user metadata (from elastic or repo db)
50
51
52
53
        upload_files = UploadFiles.get(upload_id, create_authorization_predicate(upload_id, calc_id))
        if upload_files is None:
            abort(404, message='There is no upload %s' % upload_id)

Markus Scheidgen's avatar
Markus Scheidgen committed
54
        try:
55
56
57
58
            return upload_files.metadata.get(calc_id), 200
        except Restricted:
            abort(401, message='Not authorized to access %s/%s.' % (upload_id, calc_id))
        except KeyError:
59
            abort(404, message='There is no calculation for %s/%s' % (upload_id, calc_id))
Markus Scheidgen's avatar
Markus Scheidgen committed
60
61
62
63


repo_calcs_model = api.model('RepoCalculations', {
    'pagination': fields.Nested(pagination_model),
64
65
66
    'results': fields.List(fields.Raw, description=(
        'A list of search results. Each result is a dict with quantitie names as key and '
        'values as values')),
67
    'scroll_id': fields.String(description='Id of the current scroll view in scroll based search.'),
68
    'aggregations': fields.Raw(description=(
Markus Scheidgen's avatar
Markus Scheidgen committed
69
70
71
72
73
74
        'A dict with all aggregations. Each aggregation is dictionary with a metrics dict as '
        'value and quantity value as key. The metrics are code runs(calcs), total energies, '
        'geometries, and datasets')),
    'metrics': fields.Raw(description=(
        'A dict with the overall metrics. The metrics are code runs(calcs), total energies, '
        'geometries, and datasets'))
Markus Scheidgen's avatar
Markus Scheidgen committed
75
76
77
78
79
80
})

repo_request_parser = pagination_request_parser.copy()
repo_request_parser.add_argument(
    'owner', type=str,
    help='Specify which calcs to return: ``all``, ``user``, ``staging``, default is ``all``')
81
82
83
84
repo_request_parser.add_argument(
    'scroll', type=bool, help='Enable scrolling')
repo_request_parser.add_argument(
    'scroll_id', type=str, help='The id of the current scrolling window to use.')
Markus Scheidgen's avatar
Markus Scheidgen committed
85
86
87
88
89
90
91
92
repo_request_parser.add_argument(
    'total_metrics', type=str, help=(
        'Metrics to aggregate all search results over.'
        'Possible values are total_energies, geometries, and datasets.'))
repo_request_parser.add_argument(
    'aggregation_metrics', type=str, help=(
        'Metrics to aggregate all aggregation buckets over as comma separated list. '
        'Possible values are total_energies, geometries, and datasets.'))
Markus Scheidgen's avatar
Markus Scheidgen committed
93

94
95
96
97
for search_quantity in search.search_quantities.keys():
    _, _, description = search.search_quantities[search_quantity]
    repo_request_parser.add_argument(search_quantity, type=str, help=description)

Markus Scheidgen's avatar
Markus Scheidgen committed
98
99
100

@ns.route('/')
class RepoCalcsResource(Resource):
101
    @api.doc('search')
102
    @api.response(400, 'Invalid requests, e.g. wrong owner type or bad quantities')
Markus Scheidgen's avatar
Markus Scheidgen committed
103
104
105
106
107
    @api.expect(repo_request_parser, validate=True)
    @api.marshal_with(repo_calcs_model, skip_none=True, code=200, description='Metadata send')
    @login_if_available
    def get(self):
        """
108
109
110
111
112
113
114
115
        Search for calculations in the repository from, paginated.

        The ``owner`` parameter determines the overall entries to search through.
        You can use the various quantities to search/filter for. For some of the
        indexed quantities this endpoint returns aggregation information. This means
        you will be given a list of all possible values and the number of entries
        that have the certain value. You can also use these aggregations on an empty
        search to determine the possible values.
116
117
118
119
120
121
122
123
124
125
126
127
128
129

        The pagination parameters allows determine which page to return via the
        ``page`` and ``per_page`` parameters. Pagination however, is limited to the first
        100k (depending on ES configuration) hits. An alternative to pagination is to use
        ``scroll`` and ``scroll_id``. With ``scroll`` you will get a ``scroll_id`` on
        the first request. Each call with ``scroll`` and the respective ``scroll_id`` will
        return the next ``per_page`` (here the default is 1000) results. Scroll however,
        ignores ordering and does not return aggregations. The scroll view used in the
        background will stay alive for 1 minute between requests.

        The search will return aggregations on a predefined set of quantities. Aggregations
        will tell you what quantity values exist and how many entries match those values.

        Ordering is determined by ``order_by`` and ``order`` parameters.
130
        """
131
132

        try:
133
134
            scroll = bool(request.args.get('scroll', False))
            scroll_id = request.args.get('scroll_id', None)
135
            page = int(request.args.get('page', 1))
136
            per_page = int(request.args.get('per_page', 10 if not scroll else 1000))
137
            order = int(request.args.get('order', -1))
Markus Scheidgen's avatar
Markus Scheidgen committed
138
139
140
141
142
143
144
145
146
            total_metrics_str = request.args.get('total_metrics', '')
            aggregation_metrics_str = request.args.get('aggregation_metrics', '')

            total_metrics = [
                metric for metric in total_metrics_str.split(',')
                if metric in ['total_energies', 'geometries', 'datasets']]
            aggregation_metrics = [
                metric for metric in aggregation_metrics_str.split(',')
                if metric in ['total_energies', 'geometries', 'datasets']]
147
148
149
        except Exception:
            abort(400, message='bad parameter types')

150
        owner = request.args.get('owner', 'all')
151
        order_by = request.args.get('order_by', 'formula')
152
153

        try:
154
            assert page >= 1
155
156
157
158
            assert per_page > 0
        except AssertionError:
            abort(400, message='invalid pagination')

159
160
161
        if order not in [-1, 1]:
            abort(400, message='invalid pagination')

162
        if owner == 'all':
163
164
165
            q = Q('term', published=True) & Q('term', with_embargo=False)
            if g.user is not None:
                q = q | Q('term', owners__user_id=g.user.user_id)
166
167
168
169
        elif owner == 'user':
            if g.user is None:
                abort(401, message='Authentication required for owner value user.')

170
            q = Q('term', owners__user_id=g.user.user_id)
171
172
173
        elif owner == 'staging':
            if g.user is None:
                abort(401, message='Authentication required for owner value user.')
174
            q = Q('term', published=False) & Q('term', owners__user_id=g.user.user_id)
175
176
177
        else:
            abort(400, message='Invalid owner value. Valid values are all|user|staging, default is all')

178
179
        data = dict(**request.args)
        data.pop('owner', None)
180
181
182
183
184
185
        data.pop('scroll', None)
        data.pop('scroll_id', None)
        data.pop('per_page', None)
        data.pop('page', None)
        data.pop('order', None)
        data.pop('order_by', None)
Markus Scheidgen's avatar
Markus Scheidgen committed
186
187
        data.pop('total_metrics', None)
        data.pop('aggregation_metrics', None)
188
189
190
191

        if scroll:
            data.update(scroll_id=scroll_id, size=per_page)
        else:
Markus Scheidgen's avatar
Markus Scheidgen committed
192
193
194
            data.update(
                per_page=per_page, page=page, order=order, order_by=order_by,
                total_metrics=total_metrics, aggregation_metrics=aggregation_metrics)
195
196

        try:
197
198
199
            if scroll:
                page = -1
                scroll_id, total, results = search.scroll_search(q=q, **data)
Markus Scheidgen's avatar
Markus Scheidgen committed
200
201
                aggregations = {}
                metrics = {}
202
203
            else:
                scroll_id = None
Markus Scheidgen's avatar
Markus Scheidgen committed
204
                total, results, aggregations, metrics = search.aggregate_search(q=q, **data)
205
206
207
208
209
210
        except KeyError as e:
            abort(400, str(e))

        return dict(
            pagination=dict(total=total, page=page, per_page=per_page),
            results=results,
211
            scroll_id=scroll_id,
Markus Scheidgen's avatar
Markus Scheidgen committed
212
213
            aggregations=aggregations,
            metrics=metrics), 200