repo.py 31.4 KB
Newer Older
Markus Scheidgen's avatar
Markus Scheidgen committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
# Copyright 2018 Markus Scheidgen
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an"AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

15
'''
Markus Scheidgen's avatar
Markus Scheidgen committed
16
17
The repository API of the nomad@FAIRDI APIs. Currently allows to resolve repository
meta-data.
18
'''
Markus Scheidgen's avatar
Markus Scheidgen committed
19

20
from typing import List, Dict, Any
Markus Scheidgen's avatar
Markus Scheidgen committed
21
from flask_restplus import Resource, abort, fields
22
from flask import request, g
23
from elasticsearch_dsl import Q
24
from elasticsearch.exceptions import NotFoundError
25
import elasticsearch.helpers
Markus Scheidgen's avatar
Markus Scheidgen committed
26
from datetime import datetime
Markus Scheidgen's avatar
Markus Scheidgen committed
27

28
from nomad import search, utils, datamodel, processing as proc, infrastructure
29
from nomad.metainfo import search_extension
30
from nomad.datamodel import Dataset, User, EditableUserMetadata
31
from nomad.app import common
32
from nomad.app.common import RFC3339DateTime, DotKeyNested
Markus Scheidgen's avatar
Markus Scheidgen committed
33

Markus Scheidgen's avatar
Markus Scheidgen committed
34
from .api import api
35
from .auth import authenticate
36
37
from .common import search_model, calc_route, add_pagination_parameters,\
    add_scroll_parameters, add_search_parameters, apply_search_parameters,\
38
    query_api_python, query_api_curl, _search_quantities
Markus Scheidgen's avatar
Markus Scheidgen committed
39

40
ns = api.namespace('repo', description='Access repository metadata.')
Markus Scheidgen's avatar
Markus Scheidgen committed
41
42
43
44
45


@calc_route(ns)
class RepoCalcResource(Resource):
    @api.response(404, 'The upload or calculation does not exist')
46
    @api.response(401, 'Not authorized to access the calculation')
47
    @api.response(200, 'Metadata send', fields.Raw)
48
    @api.doc('get_repo_calc')
49
    @authenticate()
50
    def get(self, upload_id, calc_id):
51
        '''
Markus Scheidgen's avatar
Markus Scheidgen committed
52
53
        Get calculation metadata in repository form.

54
        Repository metadata only entails the quantities shown in the repository.
55
        Calcs are references via *upload_id*, *calc_id* pairs.
56
        '''
Markus Scheidgen's avatar
Markus Scheidgen committed
57
        try:
58
            calc = search.entry_document.get(calc_id)
59
60
61
62
63
64
65
        except NotFoundError:
            abort(404, message='There is no calculation %s/%s' % (upload_id, calc_id))

        if calc.with_embargo or not calc.published:
            if g.user is None:
                abort(401, message='Not logged in to access %s/%s.' % (upload_id, calc_id))

66
            if not (any(g.user.user_id == user.user_id for user in calc.owners) or g.user.is_admin):
67
68
                abort(401, message='Not authorized to access %s/%s.' % (upload_id, calc_id))

69
        result = calc.to_dict()
70
71
        result['python'] = query_api_python('archive', upload_id, calc_id)
        result['curl'] = query_api_curl('archive', upload_id, calc_id)
Markus Scheidgen's avatar
Markus Scheidgen committed
72

73
        return result, 200
Markus Scheidgen's avatar
Markus Scheidgen committed
74

75

76
77
78
79
80
_search_request_parser = api.parser()
add_pagination_parameters(_search_request_parser)
add_scroll_parameters(_search_request_parser)
add_search_parameters(_search_request_parser)
_search_request_parser.add_argument(
81
    'date_histogram', type=bool, help='Add an additional aggregation over the upload time')
82
_search_request_parser.add_argument(
83
    'metrics', type=str, action='append', help=(
84
        'Metrics to aggregate over all quantities and their values as comma separated list. '
85
        'Possible values are %s.' % ', '.join(search_extension.metrics.keys())))
86
_search_request_parser.add_argument(
Markus Scheidgen's avatar
Markus Scheidgen committed
87
    'statistics', type=bool, help=('Return statistics.'))
88
89
_search_request_parser.add_argument(
    'exclude', type=str, action='split', help='Excludes the given keys in the returned data.')
90
for group_name in search_extension.groups:
91
    _search_request_parser.add_argument(
92
        group_name, type=bool, help=('Return %s group data.' % group_name))
93
    _search_request_parser.add_argument(
Markus Scheidgen's avatar
Markus Scheidgen committed
94
95
96
        '%s_after' % group_name, type=str,
        help='The last %s id of the last scroll window for the %s group' % (group_name, group_name))

97
98
99
100
101
_repo_calcs_model_fields = {
    'statistics': fields.Raw(description=(
        'A dict with all statistics. Each statistic is dictionary with a metrics dict as '
        'value and quantity value as key. The possible metrics are code runs(calcs), %s. '
        'There is a pseudo quantity "total" with a single value "all" that contains the '
102
        ' metrics over all results. ' % ', '.join(search_extension.metrics.keys())))}
103

104
for group_name in search_extension.groups:
105
    _repo_calcs_model_fields[group_name] = (DotKeyNested if '.' in group_name else fields.Nested)(api.model('RepoGroup', {
106
        'after': fields.String(description='The after value that can be used to retrieve the next %s.' % group_name),
107
        'values': fields.Raw(description='A dict with %s as key. The values are dicts with "total" and "examples" keys.' % group_name)
108
    }), skip_none=True)
109

110
for qualified_name, quantity in search_extension.search_quantities.items():
111
    _repo_calcs_model_fields[qualified_name] = fields.Raw(
112
113
        description=quantity.description, allow_null=True, skip_none=True)

114
_repo_calcs_model = api.inherit('RepoCalculations', search_model, _repo_calcs_model_fields)
115
116


Markus Scheidgen's avatar
Markus Scheidgen committed
117
118
@ns.route('/')
class RepoCalcsResource(Resource):
119
    @api.doc('search')
120
    @api.response(400, 'Invalid requests, e.g. wrong owner type or bad search parameters')
121
122
    @api.expect(_search_request_parser, validate=True)
    @api.marshal_with(_repo_calcs_model, skip_none=True, code=200, description='Search results send')
123
    @authenticate()
Markus Scheidgen's avatar
Markus Scheidgen committed
124
    def get(self):
125
        '''
126
        Search for calculations in the repository form, paginated.
127
128

        The ``owner`` parameter determines the overall entries to search through.
129
130
131
132
        Possible values are: ``all`` (show all entries visible to the current user), ``public``
        (show all publically visible entries), ``user`` (show all user entries, requires login),
        ``staging`` (show all user entries in staging area, requires login).

133
134
135
136
137
        You can use the various quantities to search/filter for. For some of the
        indexed quantities this endpoint returns aggregation information. This means
        you will be given a list of all possible values and the number of entries
        that have the certain value. You can also use these aggregations on an empty
        search to determine the possible values.
138
139
140

        The pagination parameters allows determine which page to return via the
        ``page`` and ``per_page`` parameters. Pagination however, is limited to the first
Markus Scheidgen's avatar
Markus Scheidgen committed
141
142
143
144
145
146
147
148
        100k (depending on ES configuration) hits.

        An alternative to pagination is to use ``scroll`` and ``scroll_id``. With ``scroll``
        you will get a ``scroll_id`` on the first request. Each call with ``scroll`` and
        the respective ``scroll_id`` will return the next ``per_page`` (here the default is 1000)
        results. Scroll however, ignores ordering and does not return aggregations.
        The scroll view used in the background will stay alive for 1 minute between requests.
        If the given ``scroll_id`` is not available anymore, a HTTP 400 is raised.
149
150
151
152

        The search will return aggregations on a predefined set of quantities. Aggregations
        will tell you what quantity values exist and how many entries match those values.

153
154
        Ordering is determined by ``order_by`` and ``order`` parameters. Default is
        ``upload_time`` in decending order.
155
        '''
156
157

        try:
158
            parsed_args = _search_request_parser.parse_args()
159
            args = {
160
                key: value for key, value in parsed_args.items()
161
162
163
164
165
166
167
                if value is not None}

            scroll = args.get('scroll', False)
            scroll_id = args.get('scroll_id', None)
            page = args.get('page', 1)
            per_page = args.get('per_page', 10 if not scroll else 1000)
            order = args.get('order', -1)
168
            order_by = args.get('order_by', 'upload_time')
169
170

            date_histogram = args.get('date_histogram', False)
171
            metrics: List[str] = request.args.getlist('metrics')
Markus Scheidgen's avatar
Markus Scheidgen committed
172

Markus Scheidgen's avatar
Markus Scheidgen committed
173
            with_statistics = args.get('statistics', False) or \
174
                any(args.get(group_name, False) for group_name in search_extension.groups)
175
176
177
178
        except Exception as e:
            abort(400, message='bad parameters: %s' % str(e))

        search_request = search.SearchRequest()
179
        apply_search_parameters(search_request, args)
180
181
        if date_histogram:
            search_request.date_histogram()
182

183
        try:
184
            assert page >= 1
185
            assert per_page >= 0
186
187
188
        except AssertionError:
            abort(400, message='invalid pagination')

189
190
191
        if order not in [-1, 1]:
            abort(400, message='invalid pagination')

192
        for metric in metrics:
193
            if metric not in search_extension.metrics:
194
195
                abort(400, message='there is no metric %s' % metric)

Markus Scheidgen's avatar
Markus Scheidgen committed
196
197
        if with_statistics:
            search_request.default_statistics(metrics_to_use=metrics)
198

Markus Scheidgen's avatar
Markus Scheidgen committed
199
            additional_metrics = [
200
                group_quantity.metric_name
201
                for group_name, group_quantity in search_extension.groups.items()
202
                if args.get(group_name, False)]
203
204
205

            total_metrics = metrics + additional_metrics

Markus Scheidgen's avatar
Markus Scheidgen committed
206
207
            search_request.totals(metrics_to_use=total_metrics)
            search_request.statistic('authors', 1000)
208
209
        elif len(metrics) > 0:
            search_request.totals(metrics_to_use=metrics)
210

211
        if 'exclude' in parsed_args:
212
213
214
            excludes = parsed_args['exclude']
            if excludes is not None:
                search_request.exclude(*excludes)
215

216
        try:
217
            if scroll:
218
                results = search_request.execute_scrolled(scroll_id=scroll_id, size=per_page)
219

220
            else:
221
                for group_name, group_quantity in search_extension.groups.items():
222
                    if args.get(group_name, False):
223
                        kwargs: Dict[str, Any] = {}
224
                        if group_name == 'group_uploads':
225
                            kwargs.update(order_by='upload_time', order='desc')
Markus Scheidgen's avatar
Markus Scheidgen committed
226
                        search_request.quantity(
227
                            group_quantity.qualified_name, size=per_page, examples=1,
228
229
                            after=request.args.get('%s_after' % group_name, None),
                            **kwargs)
230

231
232
                results = search_request.execute_paginated(
                    per_page=per_page, page=page, order=order, order_by=order_by)
233
234

                # TODO just a work around to make things prettier
Markus Scheidgen's avatar
Markus Scheidgen committed
235
236
237
238
239
                if with_statistics:
                    statistics = results['statistics']
                    if 'code_name' in statistics and 'currupted mainfile' in statistics['code_name']:
                        del(statistics['code_name']['currupted mainfile'])

240
241
242
                if 'quantities' in results:
                    quantities = results.pop('quantities')

243
                for group_name, group_quantity in search_extension.groups.items():
244
                    if args.get(group_name, False):
245
                        results[group_name] = quantities[group_quantity.qualified_name]
246

247
            # build python code/curl snippet
248
249
250
251
252
            code_args = dict(request.args)
            if 'statistics' in code_args:
                del(code_args['statistics'])
            results['curl'] = query_api_curl('archive', 'query', query_string=code_args)
            results['python'] = query_api_python('archive', 'query', query_string=code_args)
253

254
            return results, 200
Markus Scheidgen's avatar
Markus Scheidgen committed
255
256
        except search.ScrollIdNotFound:
            abort(400, 'The given scroll_id does not exist.')
257
        except KeyError as e:
258
259
            import traceback
            traceback.print_exc()
260
            abort(400, str(e))
261

262

263
_query_model_parameters = {
264
265
266
267
268
    'owner': fields.String(description='Specify which calcs to return: ``all``, ``public``, ``user``, ``staging``, default is ``all``'),
    'from_time': RFC3339DateTime(description='A yyyy-MM-ddTHH:mm:ss (RFC3339) minimum entry time (e.g. upload time)'),
    'until_time': RFC3339DateTime(description='A yyyy-MM-ddTHH:mm:ss (RFC3339) maximum entry time (e.g. upload time)')
}

269
270
for qualified_name, quantity in search.search_quantities.items():
    if quantity.many_and:
271
272
273
274
        def field(**kwargs):
            return fields.List(fields.String(**kwargs))
    else:
        field = fields.String
275
    _query_model_parameters[qualified_name] = field(description=quantity.description)
276

277
_repo_query_model = api.model('RepoQuery', _query_model_parameters, skip_none=True)
278
279
280
281


def repo_edit_action_field(quantity):
    if quantity.is_scalar:
282
        return fields.Nested(_repo_edit_action_model, description=quantity.description, skip_none=True)
283
284
    else:
        return fields.List(
285
            fields.Nested(_repo_edit_action_model, skip_none=True), description=quantity.description)
286
287


288
_repo_edit_action_model = api.model('RepoEditAction', {
289
    'value': fields.String(description='The value/values that is set as a string.'),
290
291
292
293
    'success': fields.Boolean(description='If this can/could be done. Only in API response.'),
    'message': fields.String(descriptin='A message that details the action result. Only in API response.')
})

294
_repo_edit_model = api.model('RepoEdit', {
295
    'verify': fields.Boolean(description='If true, no action is performed.'),
296
    'query': fields.Nested(_repo_query_model, skip_none=True, description='New metadata will be applied to query results.'),
297
298
299
    'actions': fields.Nested(
        api.model('RepoEditActions', {
            quantity.name: repo_edit_action_field(quantity)
300
            for quantity in EditableUserMetadata.m_def.definitions
301
        }), skip_none=True,
302
303
304
        description='Each action specifies a single value (even for multi valued quantities).'),
    'success': fields.Boolean(description='If the overall edit can/could be done. Only in API response.'),
    'message': fields.String(description='A message that details the overall edit result. Only in API response.')
305
306
})

307
308
309
_editable_quantities = {
    quantity.name: quantity for quantity in EditableUserMetadata.m_def.definitions}

310

311
def edit(parsed_query: Dict[str, Any], mongo_update: Dict[str, Any] = None, re_index=True) -> List[str]:
312
    # get all calculations that have to change
313
    with utils.timer(common.logger, 'edit query executed'):
314
        search_request = search.SearchRequest().include('calc_id', 'upload_id')
315
        apply_search_parameters(search_request, parsed_query)
316
317
318
319
320
        upload_ids = set()
        calc_ids = []
        for hit in search_request.execute_scan():
            calc_ids.append(hit['calc_id'])
            upload_ids.add(hit['upload_id'])
321
322

    # perform the update on the mongo db
323
    with utils.timer(common.logger, 'edit mongo update executed', size=len(calc_ids)):
324
325
326
        if mongo_update is not None:
            n_updated = proc.Calc.objects(calc_id__in=calc_ids).update(multi=True, **mongo_update)
            if n_updated != len(calc_ids):
327
                common.logger.error('edit repo did not update all entries', payload=mongo_update)
328
329

    # re-index the affected entries in elastic search
330
    with utils.timer(common.logger, 'edit elastic update executed', size=len(calc_ids)):
331
332
333
        if re_index:
            def elastic_updates():
                for calc in proc.Calc.objects(calc_id__in=calc_ids):
334
                    entry = datamodel.EntryMetadata.m_def.m_x('elastic').create_index_entry(
335
                        datamodel.EntryMetadata.m_from_dict(calc['metadata']))
336
337
338
339
340
341
342
343
                    entry = entry.to_dict(include_meta=True)
                    entry['_op_type'] = 'index'
                    yield entry

            _, failed = elasticsearch.helpers.bulk(
                infrastructure.elastic_client, elastic_updates(), stats_only=True)
            search.refresh()
            if failed > 0:
344
                common.logger.error(
345
346
                    'edit repo with failed elastic updates',
                    payload=mongo_update, nfailed=len(failed))
347

348
349
    return list(upload_ids)

350

351
def get_uploader_ids(query):
352
    ''' Get all the uploader from the query, to check coauthers and shared_with for uploaders. '''
353
    search_request = search.SearchRequest()
354
    apply_search_parameters(search_request, query)
355
356
357
358
    search_request.quantity(name='uploader_id')
    return search_request.execute()['quantities']['uploader_id']['values']


359
360
@ns.route('/edit')
class EditRepoCalcsResource(Resource):
361
362
    @api.doc('edit_repo')
    @api.response(400, 'Invalid requests, e.g. wrong owner type or bad search parameters')
363
364
    @api.expect(_repo_edit_model)
    @api.marshal_with(_repo_edit_model, skip_none=True, code=200, description='Edit verified/performed')
365
    @authenticate()
366
    def post(self):
367
        ''' Edit repository metadata. '''
368
369

        # basic body parsing and some semantic checks
370
371
372
373
374
375
376
377
378
379
        json_data = request.get_json()
        if json_data is None:
            json_data = {}
        query = json_data.get('query', {})

        owner = query.get('owner', 'user')
        if owner not in ['user', 'staging']:
            abort(400, 'Not a valid owner for edit %s. Edit can only be performed in user or staging' % owner)
        query['owner'] = owner

380
381
382
383
        if 'actions' not in json_data:
            abort(400, 'Missing key actions in edit data')
        actions = json_data['actions']
        verify = json_data.get('verify', False)
384

385
386
        # preparing the query of entries that are edited
        parsed_query = {}
387
388
        for quantity_name, value in query.items():
            if quantity_name in _search_quantities:
389
390
391
392
                quantity = search.search_quantities[quantity_name]
                if quantity.many:
                    if not isinstance(value, list):
                        value = value.split(',')
393
394
                parsed_query[quantity_name] = value
        parsed_query['owner'] = owner
395
        parsed_query['domain'] = query.get('domain')
396

397
        # checking the edit actions and preparing a mongo update on the fly
398
        json_data['success'] = True
399
        mongo_update = {}
400
        uploader_ids = None
401
        lift_embargo = False
402
403
        removed_datasets = None

404
        with utils.timer(common.logger, 'edit verified'):
405
            for action_quantity_name, quantity_actions in actions.items():
406
                quantity = _editable_quantities.get(action_quantity_name)
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
                if quantity is None:
                    abort(400, 'Unknown quantity %s' % action_quantity_name)

                quantity_flask = quantity.m_x('flask', {})
                if quantity_flask.get('admin_only', False):
                    if not g.user.is_admin():
                        abort(404, 'Only the admin user can set %s' % quantity.name)

                if isinstance(quantity_actions, list) == quantity.is_scalar:
                    abort(400, 'Wrong shape for quantity %s' % action_quantity_name)

                if not isinstance(quantity_actions, list):
                    quantity_actions = [quantity_actions]

                flask_verify = quantity_flask.get('verify', None)
                mongo_key = 'metadata__%s' % quantity.name
                has_error = False
                for action in quantity_actions:
                    action['success'] = True
                    action['message'] = None
                    action_value = action.get('value')
                    action_value = action_value if action_value is None else action_value.strip()

                    if action_value is None:
                        mongo_value = None
432

433
                    elif action_value == '':
434
                        mongo_value = None
435
436
437
438
439

                    elif flask_verify == datamodel.User:
                        try:
                            mongo_value = User.get(user_id=action_value).user_id
                        except KeyError:
440
441
                            action['success'] = False
                            has_error = True
442
                            action['message'] = 'User does not exist'
443
                            continue
444

445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
                        if uploader_ids is None:
                            uploader_ids = get_uploader_ids(parsed_query)
                        if action_value in uploader_ids:
                            action['success'] = False
                            has_error = True
                            action['message'] = 'This user is already an uploader of one entry in the query'
                            continue

                    elif flask_verify == datamodel.Dataset:
                        try:
                            mongo_value = Dataset.m_def.m_x('me').get(
                                user_id=g.user.user_id, name=action_value).dataset_id
                        except KeyError:
                            action['message'] = 'Dataset does not exist and will be created'
                            mongo_value = None
                            if not verify:
                                dataset = Dataset(
                                    dataset_id=utils.create_uuid(), user_id=g.user.user_id,
Markus Scheidgen's avatar
Markus Scheidgen committed
463
                                    name=action_value, created=datetime.utcnow())
464
465
466
467
468
469
470
471
472
473
                                dataset.m_x('me').create()
                                mongo_value = dataset.dataset_id

                    elif action_quantity_name == 'with_embargo':
                        # ignore the actual value ... just lift the embargo
                        mongo_value = False
                        lift_embargo = True

                        # check if necessary
                        search_request = search.SearchRequest()
474
                        apply_search_parameters(search_request, parsed_query)
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
                        search_request.q = search_request.q & Q('term', with_embargo=True)
                        if search_request.execute()['total'] == 0:
                            action['success'] = False
                            has_error = True
                            action['message'] = 'There is no embargo to lift'
                            continue
                    else:
                        mongo_value = action_value

                    if len(quantity.shape) == 0:
                        mongo_update[mongo_key] = mongo_value
                    else:
                        mongo_values = mongo_update.setdefault(mongo_key, [])
                        if mongo_value is not None:
                            if mongo_value in mongo_values:
                                action['success'] = False
                                has_error = True
                                action['message'] = 'Duplicate values are not allowed'
                                continue
                            mongo_values.append(mongo_value)

                if len(quantity_actions) == 0 and len(quantity.shape) > 0:
                    mongo_update[mongo_key] = []

                if action_quantity_name == 'datasets':
                    # check if datasets edit is allowed and if datasets have to be removed
                    search_request = search.SearchRequest()
502
                    apply_search_parameters(search_request, parsed_query)
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
                    search_request.quantity(name='dataset_id')
                    old_datasets = list(
                        search_request.execute()['quantities']['dataset_id']['values'].keys())

                    removed_datasets = []
                    for dataset_id in old_datasets:
                        if dataset_id not in mongo_update.get(mongo_key, []):
                            removed_datasets.append(dataset_id)

                    doi_ds = Dataset.m_def.m_x('me').objects(
                        dataset_id__in=removed_datasets, doi__ne=None).first()
                    if doi_ds is not None:
                        json_data['success'] = False
                        json_data['message'] = json_data.get('message', '') + \
                            'Edit would remove entries from a dataset with DOI (%s) ' % doi_ds.name
                        has_error = True
519

520
521
522
523
524
525
526
527
        # stop here, if client just wants to verify its actions
        if verify:
            return json_data, 200

        # stop if the action were not ok
        if has_error:
            return json_data, 400

528
        # perform the change
Markus Scheidgen's avatar
Markus Scheidgen committed
529
        mongo_update['metadata__last_edit'] = datetime.utcnow()
530
        upload_ids = edit(parsed_query, mongo_update, True)
531
532
533
534
535
536

        # lift embargo
        if lift_embargo:
            for upload_id in upload_ids:
                upload = proc.Upload.get(upload_id)
                upload.re_pack()
537

538
        # remove potentially empty old datasets
539
        if removed_datasets is not None:
540
541
542
            for dataset in removed_datasets:
                if proc.Calc.objects(metadata__dataset_id=dataset).first() is None:
                    Dataset.m_def.m_x('me').objects(dataset_id=dataset).delete()
543

544
        return json_data, 200
545

546

547
548
549
550
551
552
553
554
_repo_quantity_search_request_parser = api.parser()
add_search_parameters(_repo_quantity_search_request_parser)
_repo_quantity_search_request_parser.add_argument(
    'after', type=str, help='The after value to use for "scrolling".')
_repo_quantity_search_request_parser.add_argument(
    'size', type=int, help='The max size of the returned values.')

_repo_quantity_model = api.model('RepoQuantity', {
555
556
557
558
    'after': fields.String(description='The after value that can be used to retrieve the next set of values.'),
    'values': fields.Raw(description='A dict with values as key. Values are dicts with "total" and "examples" keys.')
})

559
560
_repo_quantity_values_model = api.model('RepoQuantityValues', {
    'quantity': fields.Nested(_repo_quantity_model, allow_null=True)
561
562
})

563

564
@ns.route('/quantity/<string:quantity>')
565
566
567
class RepoQuantityResource(Resource):
    @api.doc('quantity_search')
    @api.response(400, 'Invalid requests, e.g. wrong owner type, bad quantity, bad search parameters')
568
569
    @api.expect(_repo_quantity_search_request_parser, validate=True)
    @api.marshal_with(_repo_quantity_values_model, skip_none=True, code=200, description='Search results send')
570
    @authenticate()
571
    def get(self, quantity: str):
572
        '''
573
574
575
576
577
578
579
580
581
582
583
584
        Retrieve quantity values from entries matching the search.

        You can use the various quantities to search/filter for. For some of the
        indexed quantities this endpoint returns aggregation information. This means
        you will be given a list of all possible values and the number of entries
        that have the certain value. You can also use these aggregations on an empty
        search to determine the possible values.

        There is no ordering and no pagination. Instead there is an 'after' key based
        scrolling. The result will contain an 'after' value, that can be specified
        for the next request. You can use the 'size' and 'after' parameters accordingly.

585
586
587
        The result will contain a 'quantity' key with quantity values and the "after"
        value. There will be upto 'size' many values. For the rest of the values use the
        "after" parameter in another request.
588
        '''
589

590
        search_request = search.SearchRequest()
591
592
        args = {
            key: value
593
            for key, value in _repo_quantity_search_request_parser.parse_args().items()
594
            if value is not None}
595

596
        apply_search_parameters(search_request, args)
597
598
        after = args.get('after', None)
        size = args.get('size', 100)
599
600
601
602
603
604
605

        try:
            assert size >= 0
        except AssertionError:
            abort(400, message='invalid size')

        try:
606
            search_request.quantity(quantity, size=size, after=after)
607
608
609
            results = search_request.execute()
            quantities = results.pop('quantities')
            results['quantity'] = quantities[quantity]
610
611
612
613
614
615

            return results, 200
        except KeyError as e:
            import traceback
            traceback.print_exc()
            abort(400, 'Given quantity does not exist: %s' % str(e))
616
617


618
619
620
_repo_quantities_search_request_parser = api.parser()
add_search_parameters(_repo_quantities_search_request_parser)
_repo_quantities_search_request_parser.add_argument(
621
622
    'quantities', type=str, action='append',
    help='The quantities to retrieve values from')
623
_repo_quantities_search_request_parser.add_argument(
624
625
    'size', type=int, help='The max size of the returned values.')

Markus Scheidgen's avatar
Markus Scheidgen committed
626
_repo_quantities_model = api.model('RepoQuantities', {
627
628
629
    'quantities': fields.List(fields.Nested(_repo_quantity_model))
})

630
631
632
633
634

@ns.route('/quantities')
class RepoQuantitiesResource(Resource):
    @api.doc('quantities_search')
    @api.response(400, 'Invalid requests, e.g. wrong owner type, bad quantity, bad search parameters')
635
    @api.expect(_repo_quantities_search_request_parser, validate=True)
Markus Scheidgen's avatar
Markus Scheidgen committed
636
    @api.marshal_with(_repo_quantities_model, skip_none=True, code=200, description='Search results send')
637
638
    @authenticate()
    def get(self):
639
        '''
640
641
642
643
644
645
646
647
648
649
650
651
652
        Retrieve quantity values for multiple quantities at once.

        You can use the various quantities to search/filter for. For some of the
        indexed quantities this endpoint returns aggregation information. This means
        you will be given a list of all possible values and the number of entries
        that have the certain value. You can also use these aggregations on an empty
        search to determine the possible values.

        There is no ordering and no pagination and not after key based scrolling. Instead
        there is an 'after' key based scrolling.

        The result will contain a 'quantities' key with a dict of quantity names and the
        retrieved values as values.
653
        '''
654
655
656
657

        search_request = search.SearchRequest()
        args = {
            key: value
658
            for key, value in _repo_quantities_search_request_parser.parse_args().items()
659
660
            if value is not None}

661
        apply_search_parameters(search_request, args)
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
        quantities = args.get('quantities', [])
        size = args.get('size', 5)

        try:
            assert size >= 0
        except AssertionError:
            abort(400, message='invalid size')

        for quantity in quantities:
            try:
                search_request.quantity(quantity, size=size)
            except KeyError as e:
                import traceback
                traceback.print_exc()
                abort(400, 'Given quantity does not exist: %s' % str(e))

        return search_request.execute(), 200


Markus Scheidgen's avatar
Markus Scheidgen committed
681
_repo_calc_id_model = api.model('RepoCalculationId', {
682
683
684
685
    'upload_id': fields.String(), 'calc_id': fields.String()
})


686
@ns.route('/pid/<path:pid>')
687
688
689
class RepoPidResource(Resource):
    @api.doc('resolve_pid')
    @api.response(404, 'Entry with PID does not exist')
Markus Scheidgen's avatar
Markus Scheidgen committed
690
    @api.marshal_with(_repo_calc_id_model, skip_none=True, code=200, description='Entry resolved')
Markus Scheidgen's avatar
Markus Scheidgen committed
691
    @authenticate()
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
    def get(self, pid: str):
        if '/' in pid:
            prefix, pid = pid.split('/')
            if prefix != '21.11132':
                abort(400, 'Wrong PID format')
            try:
                pid_int = utils.decode_handle_id(pid)
            except ValueError:
                abort(400, 'Wrong PID format')
        else:
            try:
                pid_int = int(pid)
            except ValueError:
                abort(400, 'Wrong PID format')

707
        search_request = search.SearchRequest().include('upload_id', 'calc_id')
708
709
710

        if g.user is not None:
            search_request.owner('all', user_id=g.user.user_id)
711
        else:
712
713
            search_request.owner('all')

714
        search_request.search_parameter('pid', pid_int)
715
716
717
718
719

        results = list(search_request.execute_scan())
        total = len(results)

        if total == 0:
720
            abort(404, 'Entry with PID %s does not exist' % pid)
721
722

        if total > 1:
723
            common.logger.error('Two entries for the same pid', pid=pid_int)
724
725
726
727
728

        result = results[0]
        return dict(
            upload_id=result['upload_id'],
            calc_id=result['calc_id'])