repo.py 11.7 KB
Newer Older
Markus Scheidgen's avatar
Markus Scheidgen committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
# Copyright 2018 Markus Scheidgen
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an"AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""
The repository API of the nomad@FAIRDI APIs. Currently allows to resolve repository
meta-data.
"""

from flask_restplus import Resource, abort, fields
21
22
from flask import request, g
from elasticsearch_dsl import Q
23
from elasticsearch.exceptions import NotFoundError
Markus Scheidgen's avatar
Markus Scheidgen committed
24
import datetime
Markus Scheidgen's avatar
Markus Scheidgen committed
25

Markus Scheidgen's avatar
Markus Scheidgen committed
26
from nomad import search
Markus Scheidgen's avatar
Markus Scheidgen committed
27

Markus Scheidgen's avatar
Markus Scheidgen committed
28
from .app import api, rfc3339DateTime
29
from .auth import login_if_available
Markus Scheidgen's avatar
Markus Scheidgen committed
30
31
from .common import pagination_model, pagination_request_parser, calc_route

32
ns = api.namespace('repo', description='Access repository metadata.')
Markus Scheidgen's avatar
Markus Scheidgen committed
33
34
35
36
37


@calc_route(ns)
class RepoCalcResource(Resource):
    @api.response(404, 'The upload or calculation does not exist')
38
    @api.response(401, 'Not authorized to access the calculation')
39
    @api.response(200, 'Metadata send', fields.Raw)
40
    @api.doc('get_repo_calc')
41
    @login_if_available
42
    def get(self, upload_id, calc_id):
Markus Scheidgen's avatar
Markus Scheidgen committed
43
44
45
        """
        Get calculation metadata in repository form.

46
        Repository metadata only entails the quantities shown in the repository.
47
        Calcs are references via *upload_id*, *calc_id* pairs.
Markus Scheidgen's avatar
Markus Scheidgen committed
48
49
        """
        try:
50
51
52
53
54
55
56
57
58
59
60
            calc = search.Entry.get(calc_id)
        except NotFoundError:
            abort(404, message='There is no calculation %s/%s' % (upload_id, calc_id))

        if calc.with_embargo or not calc.published:
            if g.user is None:
                abort(401, message='Not logged in to access %s/%s.' % (upload_id, calc_id))

            is_owner = g.user.user_id == 0
            if not is_owner:
                for owner in calc.owners:
61
62
63
                    # At somepoint ids will be emails (strings) anyways.
                    # Right now it is hard to make sure that both are either str or int.
                    if str(owner.user_id) == str(g.user.user_id):
64
65
66
67
68
69
                        is_owner = True
                        break
            if not is_owner:
                abort(401, message='Not authorized to access %s/%s.' % (upload_id, calc_id))

        return calc.to_dict(), 200
Markus Scheidgen's avatar
Markus Scheidgen committed
70
71
72
73


repo_calcs_model = api.model('RepoCalculations', {
    'pagination': fields.Nested(pagination_model),
74
75
76
    'results': fields.List(fields.Raw, description=(
        'A list of search results. Each result is a dict with quantitie names as key and '
        'values as values')),
77
    'scroll_id': fields.String(description='Id of the current scroll view in scroll based search.'),
78
    'aggregations': fields.Raw(description=(
Markus Scheidgen's avatar
Markus Scheidgen committed
79
        'A dict with all aggregations. Each aggregation is dictionary with a metrics dict as '
80
81
        'value and quantity value as key. The metrics are code runs(calcs), %s. ' %
        ', '.join(search.metrics_names))),
Markus Scheidgen's avatar
Markus Scheidgen committed
82
    'metrics': fields.Raw(description=(
83
84
        'A dict with the overall metrics. The metrics are code runs(calcs), %s.' %
        ', '.join(search.metrics_names)))
Markus Scheidgen's avatar
Markus Scheidgen committed
85
86
87
88
89
})

repo_request_parser = pagination_request_parser.copy()
repo_request_parser.add_argument(
    'owner', type=str,
90
    help='Specify which calcs to return: ``all``, ``public``, ``user``, ``staging``, default is ``all``')
Markus Scheidgen's avatar
Markus Scheidgen committed
91
92
93
94
95
96
repo_request_parser.add_argument(
    'from_time', type=lambda x: rfc3339DateTime.parse(x),
    help='A yyyy-MM-ddTHH:mm:ss (RFC3339) minimum entry time (e.g. upload time)')
repo_request_parser.add_argument(
    'until_time', type=lambda x: rfc3339DateTime.parse(x),
    help='A yyyy-MM-ddTHH:mm:ss (RFC3339) maximum entry time (e.g. upload time)')
97
98
99
100
repo_request_parser.add_argument(
    'scroll', type=bool, help='Enable scrolling')
repo_request_parser.add_argument(
    'scroll_id', type=str, help='The id of the current scrolling window to use.')
Markus Scheidgen's avatar
Markus Scheidgen committed
101
102
103
repo_request_parser.add_argument(
    'total_metrics', type=str, help=(
        'Metrics to aggregate all search results over.'
104
        'Possible values are %s.' % ', '.join(search.metrics_names)))
Markus Scheidgen's avatar
Markus Scheidgen committed
105
106
107
repo_request_parser.add_argument(
    'aggregation_metrics', type=str, help=(
        'Metrics to aggregate all aggregation buckets over as comma separated list. '
108
        'Possible values are %s.' % ', '.join(search.metrics_names)))
Markus Scheidgen's avatar
Markus Scheidgen committed
109

110
111
112
113
for search_quantity in search.search_quantities.keys():
    _, _, description = search.search_quantities[search_quantity]
    repo_request_parser.add_argument(search_quantity, type=str, help=description)

Markus Scheidgen's avatar
Markus Scheidgen committed
114
115
116

@ns.route('/')
class RepoCalcsResource(Resource):
117
    @api.doc('search')
118
    @api.response(400, 'Invalid requests, e.g. wrong owner type or bad quantities')
Markus Scheidgen's avatar
Markus Scheidgen committed
119
120
121
122
123
    @api.expect(repo_request_parser, validate=True)
    @api.marshal_with(repo_calcs_model, skip_none=True, code=200, description='Metadata send')
    @login_if_available
    def get(self):
        """
124
125
126
        Search for calculations in the repository from, paginated.

        The ``owner`` parameter determines the overall entries to search through.
127
128
129
130
        Possible values are: ``all`` (show all entries visible to the current user), ``public``
        (show all publically visible entries), ``user`` (show all user entries, requires login),
        ``staging`` (show all user entries in staging area, requires login).

131
132
133
134
135
        You can use the various quantities to search/filter for. For some of the
        indexed quantities this endpoint returns aggregation information. This means
        you will be given a list of all possible values and the number of entries
        that have the certain value. You can also use these aggregations on an empty
        search to determine the possible values.
136
137
138

        The pagination parameters allows determine which page to return via the
        ``page`` and ``per_page`` parameters. Pagination however, is limited to the first
Markus Scheidgen's avatar
Markus Scheidgen committed
139
140
141
142
143
144
145
146
        100k (depending on ES configuration) hits.

        An alternative to pagination is to use ``scroll`` and ``scroll_id``. With ``scroll``
        you will get a ``scroll_id`` on the first request. Each call with ``scroll`` and
        the respective ``scroll_id`` will return the next ``per_page`` (here the default is 1000)
        results. Scroll however, ignores ordering and does not return aggregations.
        The scroll view used in the background will stay alive for 1 minute between requests.
        If the given ``scroll_id`` is not available anymore, a HTTP 400 is raised.
147
148
149
150
151

        The search will return aggregations on a predefined set of quantities. Aggregations
        will tell you what quantity values exist and how many entries match those values.

        Ordering is determined by ``order_by`` and ``order`` parameters.
152
        """
153
154

        try:
155
156
            scroll = bool(request.args.get('scroll', False))
            scroll_id = request.args.get('scroll_id', None)
157
            page = int(request.args.get('page', 1))
158
            per_page = int(request.args.get('per_page', 10 if not scroll else 1000))
159
            order = int(request.args.get('order', -1))
Markus Scheidgen's avatar
Markus Scheidgen committed
160
161
162
            total_metrics_str = request.args.get('total_metrics', '')
            aggregation_metrics_str = request.args.get('aggregation_metrics', '')

Markus Scheidgen's avatar
Markus Scheidgen committed
163
164
165
166
167
            from_time = rfc3339DateTime.parse(request.args.get('from_time', '2000-01-01'))
            until_time_str = request.args.get('until_time', None)
            until_time = rfc3339DateTime.parse(until_time_str) if until_time_str is not None else datetime.datetime.now()
            time_range = (from_time, until_time)

Markus Scheidgen's avatar
Markus Scheidgen committed
168
169
            total_metrics = [
                metric for metric in total_metrics_str.split(',')
170
                if metric in search.metrics_names]
Markus Scheidgen's avatar
Markus Scheidgen committed
171
172
            aggregation_metrics = [
                metric for metric in aggregation_metrics_str.split(',')
173
                if metric in search.metrics_names]
174
175
176
        except Exception:
            abort(400, message='bad parameter types')

177
        owner = request.args.get('owner', 'all')
178
        order_by = request.args.get('order_by', 'formula')
179
180

        try:
181
            assert page >= 1
182
183
184
185
            assert per_page > 0
        except AssertionError:
            abort(400, message='invalid pagination')

186
187
188
        if order not in [-1, 1]:
            abort(400, message='invalid pagination')

189
190
191
192
193
194
195
        # TODO this should be removed after migration
        # if owner == 'migrated':
        #     q = Q('term', published=True) & Q('term', with_embargo=False)
        #     if g.user is not None:
        #         q = q | Q('term', owners__user_id=g.user.user_id)
        #     q = q & ~Q('term', **{'uploader.user_id': 1})  # pylint: disable=invalid-unary-operand-type
        if owner == 'all':
196
197
198
            q = Q('term', published=True) & Q('term', with_embargo=False)
            if g.user is not None:
                q = q | Q('term', owners__user_id=g.user.user_id)
199
200
        elif owner == 'public':
            q = Q('term', published=True) & Q('term', with_embargo=False)
201
202
203
204
        elif owner == 'user':
            if g.user is None:
                abort(401, message='Authentication required for owner value user.')

205
            q = Q('term', owners__user_id=g.user.user_id)
206
207
208
        elif owner == 'staging':
            if g.user is None:
                abort(401, message='Authentication required for owner value user.')
209
            q = Q('term', published=False) & Q('term', owners__user_id=g.user.user_id)
210
211
212
213
        elif owner == 'admin':
            if g.user is None or not g.user.is_admin:
                abort(401, message='This can only be used by the admin user.')
            q = None
214
215
216
        else:
            abort(400, message='Invalid owner value. Valid values are all|user|staging, default is all')

217
218
219
        # TODO this should be removed after migration
        without_currupted_mainfile = ~Q('term', code_name='currupted mainfile')  # pylint: disable=invalid-unary-operand-type
        q = q & without_currupted_mainfile if q is not None else without_currupted_mainfile
Markus Scheidgen's avatar
Markus Scheidgen committed
220

221
222
        data = dict(**request.args)
        data.pop('owner', None)
223
224
225
226
227
228
        data.pop('scroll', None)
        data.pop('scroll_id', None)
        data.pop('per_page', None)
        data.pop('page', None)
        data.pop('order', None)
        data.pop('order_by', None)
Markus Scheidgen's avatar
Markus Scheidgen committed
229
230
        data.pop('total_metrics', None)
        data.pop('aggregation_metrics', None)
Markus Scheidgen's avatar
Markus Scheidgen committed
231
232
        data.pop('from_time', None)
        data.pop('until_time', None)
233
234
235
236

        if scroll:
            data.update(scroll_id=scroll_id, size=per_page)
        else:
Markus Scheidgen's avatar
Markus Scheidgen committed
237
            data.update(
Markus Scheidgen's avatar
Markus Scheidgen committed
238
                per_page=per_page, page=page, order=order, order_by=order_by, time_range=time_range,
Markus Scheidgen's avatar
Markus Scheidgen committed
239
                total_metrics=total_metrics, aggregation_metrics=aggregation_metrics)
240
241

        try:
242
243
244
            if scroll:
                page = -1
                scroll_id, total, results = search.scroll_search(q=q, **data)
Markus Scheidgen's avatar
Markus Scheidgen committed
245
246
                aggregations = {}
                metrics = {}
247
248
            else:
                scroll_id = None
Markus Scheidgen's avatar
Markus Scheidgen committed
249
                total, results, aggregations, metrics = search.aggregate_search(q=q, **data)
Markus Scheidgen's avatar
Markus Scheidgen committed
250
251
        except search.ScrollIdNotFound:
            abort(400, 'The given scroll_id does not exist.')
252
253
254
        except KeyError as e:
            abort(400, str(e))

255
256
257
258
        # TODO just a workarround to make things prettier
        if 'code_name' in aggregations and 'currupted mainfile' in aggregations['code_name']:
            del(aggregations['code_name']['currupted mainfile'])

259
260
261
        return dict(
            pagination=dict(total=total, page=page, per_page=per_page),
            results=results,
262
            scroll_id=scroll_id,
Markus Scheidgen's avatar
Markus Scheidgen committed
263
264
            aggregations=aggregations,
            metrics=metrics), 200