repo.py 31.1 KB
Newer Older
Markus Scheidgen's avatar
Markus Scheidgen committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
# Copyright 2018 Markus Scheidgen
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an"AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""
The repository API of the nomad@FAIRDI APIs. Currently allows to resolve repository
meta-data.
"""

20
from typing import List, Dict, Any
Markus Scheidgen's avatar
Markus Scheidgen committed
21
from flask_restplus import Resource, abort, fields
22
from flask import request, g
23
from elasticsearch_dsl import Q
24
from elasticsearch.exceptions import NotFoundError
25
import elasticsearch.helpers
Markus Scheidgen's avatar
Markus Scheidgen committed
26
from datetime import datetime
Markus Scheidgen's avatar
Markus Scheidgen committed
27

28
from nomad import search, utils, datamodel, processing as proc, infrastructure
29
from nomad.datamodel import UserMetadata, Dataset, User, Domain
30
from nomad.app import common
31
from nomad.app.common import RFC3339DateTime, DotKeyNested
Markus Scheidgen's avatar
Markus Scheidgen committed
32

Markus Scheidgen's avatar
Markus Scheidgen committed
33
from .api import api
34
from .auth import authenticate
35
36
from .common import search_model, calc_route, add_pagination_parameters,\
    add_scroll_parameters, add_search_parameters, apply_search_parameters,\
37
    query_api_python, query_api_curl, _search_quantities
Markus Scheidgen's avatar
Markus Scheidgen committed
38

39
ns = api.namespace('repo', description='Access repository metadata.')
Markus Scheidgen's avatar
Markus Scheidgen committed
40
41
42
43
44


@calc_route(ns)
class RepoCalcResource(Resource):
    @api.response(404, 'The upload or calculation does not exist')
45
    @api.response(401, 'Not authorized to access the calculation')
46
    @api.response(200, 'Metadata send', fields.Raw)
47
    @api.doc('get_repo_calc')
48
    @authenticate()
49
    def get(self, upload_id, calc_id):
Markus Scheidgen's avatar
Markus Scheidgen committed
50
51
52
        """
        Get calculation metadata in repository form.

53
        Repository metadata only entails the quantities shown in the repository.
54
        Calcs are references via *upload_id*, *calc_id* pairs.
Markus Scheidgen's avatar
Markus Scheidgen committed
55
56
        """
        try:
57
58
59
60
61
62
63
64
            calc = search.Entry.get(calc_id)
        except NotFoundError:
            abort(404, message='There is no calculation %s/%s' % (upload_id, calc_id))

        if calc.with_embargo or not calc.published:
            if g.user is None:
                abort(401, message='Not logged in to access %s/%s.' % (upload_id, calc_id))

65
            if not (any(g.user.user_id == user.user_id for user in calc.owners) or g.user.is_admin):
66
67
                abort(401, message='Not authorized to access %s/%s.' % (upload_id, calc_id))

68
        result = calc.to_dict()
69
70
        result['python'] = query_api_python('archive', upload_id, calc_id)
        result['curl'] = query_api_curl('archive', upload_id, calc_id)
Markus Scheidgen's avatar
Markus Scheidgen committed
71

72
        return result, 200
Markus Scheidgen's avatar
Markus Scheidgen committed
73

74

75
76
77
78
79
_search_request_parser = api.parser()
add_pagination_parameters(_search_request_parser)
add_scroll_parameters(_search_request_parser)
add_search_parameters(_search_request_parser)
_search_request_parser.add_argument(
80
    'date_histogram', type=bool, help='Add an additional aggregation over the upload time')
81
_search_request_parser.add_argument(
82
    'metrics', type=str, action='append', help=(
83
        'Metrics to aggregate over all quantities and their values as comma separated list. '
84
        'Possible values are %s.' % ', '.join(search.metrics_names)))
85
_search_request_parser.add_argument(
Markus Scheidgen's avatar
Markus Scheidgen committed
86
    'statistics', type=bool, help=('Return statistics.'))
87
88
_search_request_parser.add_argument(
    'exclude', type=str, action='split', help='Excludes the given keys in the returned data.')
Markus Scheidgen's avatar
Markus Scheidgen committed
89
for group_name in search.groups:
90
    _search_request_parser.add_argument(
Markus Scheidgen's avatar
Markus Scheidgen committed
91
        group_name, type=bool, help=('Return %s group data.' % group_name))
92
    _search_request_parser.add_argument(
Markus Scheidgen's avatar
Markus Scheidgen committed
93
94
95
        '%s_after' % group_name, type=str,
        help='The last %s id of the last scroll window for the %s group' % (group_name, group_name))

96
97
98
99
100
_repo_calcs_model_fields = {
    'statistics': fields.Raw(description=(
        'A dict with all statistics. Each statistic is dictionary with a metrics dict as '
        'value and quantity value as key. The possible metrics are code runs(calcs), %s. '
        'There is a pseudo quantity "total" with a single value "all" that contains the '
101
102
        ' metrics over all results. ' % ', '.join(search.metrics_names)))}

103
for group_name, (group_quantity, _) in search.groups.items():
104
    _repo_calcs_model_fields[group_name] = (DotKeyNested if '.' in group_name else fields.Nested)(api.model('RepoGroup', {
105
106
107
        'after': fields.String(description='The after value that can be used to retrieve the next %s.' % group_name),
        'values': fields.Raw(description='A dict with %s as key. The values are dicts with "total" and "examples" keys.' % group_quantity)
    }), skip_none=True)
108
109
110
111
112

for quantity in Domain.all_quantities():
    _repo_calcs_model_fields[quantity.name] = fields.Raw(
        description=quantity.description, allow_null=True, skip_none=True)

113
_repo_calcs_model = api.inherit('RepoCalculations', search_model, _repo_calcs_model_fields)
114
115


Markus Scheidgen's avatar
Markus Scheidgen committed
116
117
@ns.route('/')
class RepoCalcsResource(Resource):
118
    @api.doc('search')
119
    @api.response(400, 'Invalid requests, e.g. wrong owner type or bad search parameters')
120
121
    @api.expect(_search_request_parser, validate=True)
    @api.marshal_with(_repo_calcs_model, skip_none=True, code=200, description='Search results send')
122
    @authenticate()
Markus Scheidgen's avatar
Markus Scheidgen committed
123
124
    def get(self):
        """
125
        Search for calculations in the repository form, paginated.
126
127

        The ``owner`` parameter determines the overall entries to search through.
128
129
130
131
        Possible values are: ``all`` (show all entries visible to the current user), ``public``
        (show all publically visible entries), ``user`` (show all user entries, requires login),
        ``staging`` (show all user entries in staging area, requires login).

132
133
134
135
136
        You can use the various quantities to search/filter for. For some of the
        indexed quantities this endpoint returns aggregation information. This means
        you will be given a list of all possible values and the number of entries
        that have the certain value. You can also use these aggregations on an empty
        search to determine the possible values.
137
138
139

        The pagination parameters allows determine which page to return via the
        ``page`` and ``per_page`` parameters. Pagination however, is limited to the first
Markus Scheidgen's avatar
Markus Scheidgen committed
140
141
142
143
144
145
146
147
        100k (depending on ES configuration) hits.

        An alternative to pagination is to use ``scroll`` and ``scroll_id``. With ``scroll``
        you will get a ``scroll_id`` on the first request. Each call with ``scroll`` and
        the respective ``scroll_id`` will return the next ``per_page`` (here the default is 1000)
        results. Scroll however, ignores ordering and does not return aggregations.
        The scroll view used in the background will stay alive for 1 minute between requests.
        If the given ``scroll_id`` is not available anymore, a HTTP 400 is raised.
148
149
150
151

        The search will return aggregations on a predefined set of quantities. Aggregations
        will tell you what quantity values exist and how many entries match those values.

152
153
        Ordering is determined by ``order_by`` and ``order`` parameters. Default is
        ``upload_time`` in decending order.
154
        """
155
156

        try:
157
            parsed_args = _search_request_parser.parse_args()
158
            args = {
159
                key: value for key, value in parsed_args.items()
160
161
162
163
164
165
166
                if value is not None}

            scroll = args.get('scroll', False)
            scroll_id = args.get('scroll_id', None)
            page = args.get('page', 1)
            per_page = args.get('per_page', 10 if not scroll else 1000)
            order = args.get('order', -1)
167
            order_by = args.get('order_by', 'upload_time')
168
169

            date_histogram = args.get('date_histogram', False)
170
            metrics: List[str] = request.args.getlist('metrics')
Markus Scheidgen's avatar
Markus Scheidgen committed
171

Markus Scheidgen's avatar
Markus Scheidgen committed
172
173
            with_statistics = args.get('statistics', False) or \
                any(args.get(group_name, False) for group_name in search.groups)
174
175
176
177
        except Exception as e:
            abort(400, message='bad parameters: %s' % str(e))

        search_request = search.SearchRequest()
178
        apply_search_parameters(search_request, args)
179
180
        if date_histogram:
            search_request.date_histogram()
181

182
        try:
183
            assert page >= 1
184
            assert per_page >= 0
185
186
187
        except AssertionError:
            abort(400, message='invalid pagination')

188
189
190
        if order not in [-1, 1]:
            abort(400, message='invalid pagination')

191
192
        for metric in metrics:
            if metric not in search.metrics_names:
193
194
                abort(400, message='there is no metric %s' % metric)

Markus Scheidgen's avatar
Markus Scheidgen committed
195
196
        if with_statistics:
            search_request.default_statistics(metrics_to_use=metrics)
197

Markus Scheidgen's avatar
Markus Scheidgen committed
198
199
200
201
            additional_metrics = [
                metric
                for group_name, (_, metric) in search.groups.items()
                if args.get(group_name, False)]
202
203
204

            total_metrics = metrics + additional_metrics

Markus Scheidgen's avatar
Markus Scheidgen committed
205
206
            search_request.totals(metrics_to_use=total_metrics)
            search_request.statistic('authors', 1000)
207
208
        elif len(metrics) > 0:
            search_request.totals(metrics_to_use=metrics)
209

210
        if 'exclude' in parsed_args:
211
212
213
            excludes = parsed_args['exclude']
            if excludes is not None:
                search_request.exclude(*excludes)
214

215
        try:
216
            if scroll:
217
                results = search_request.execute_scrolled(scroll_id=scroll_id, size=per_page)
218

219
            else:
Markus Scheidgen's avatar
Markus Scheidgen committed
220
221
                for group_name, (group_quantity, _) in search.groups.items():
                    if args.get(group_name, False):
222
223
224
                        kwargs: Dict[str, Any] = {}
                        if group_name == 'uploads':
                            kwargs.update(order_by='upload_time', order='desc')
Markus Scheidgen's avatar
Markus Scheidgen committed
225
226
                        search_request.quantity(
                            group_quantity, size=per_page, examples=1,
227
228
                            after=request.args.get('%s_after' % group_name, None),
                            **kwargs)
229

230
231
                results = search_request.execute_paginated(
                    per_page=per_page, page=page, order=order, order_by=order_by)
232
233

                # TODO just a work around to make things prettier
Markus Scheidgen's avatar
Markus Scheidgen committed
234
235
236
237
238
                if with_statistics:
                    statistics = results['statistics']
                    if 'code_name' in statistics and 'currupted mainfile' in statistics['code_name']:
                        del(statistics['code_name']['currupted mainfile'])

239
240
241
                if 'quantities' in results:
                    quantities = results.pop('quantities')

Markus Scheidgen's avatar
Markus Scheidgen committed
242
243
244
                for group_name, (group_quantity, _) in search.groups.items():
                    if args.get(group_name, False):
                        results[group_name] = quantities[group_quantity]
245

246
            # build python code/curl snippet
247
248
249
250
251
            code_args = dict(request.args)
            if 'statistics' in code_args:
                del(code_args['statistics'])
            results['curl'] = query_api_curl('archive', 'query', query_string=code_args)
            results['python'] = query_api_python('archive', 'query', query_string=code_args)
252

253
            return results, 200
Markus Scheidgen's avatar
Markus Scheidgen committed
254
255
        except search.ScrollIdNotFound:
            abort(400, 'The given scroll_id does not exist.')
256
        except KeyError as e:
257
258
            import traceback
            traceback.print_exc()
259
            abort(400, str(e))
260

261

262
_query_model_parameters = {
263
264
265
266
267
    'owner': fields.String(description='Specify which calcs to return: ``all``, ``public``, ``user``, ``staging``, default is ``all``'),
    'from_time': RFC3339DateTime(description='A yyyy-MM-ddTHH:mm:ss (RFC3339) minimum entry time (e.g. upload time)'),
    'until_time': RFC3339DateTime(description='A yyyy-MM-ddTHH:mm:ss (RFC3339) maximum entry time (e.g. upload time)')
}

268
for quantity in datamodel.Domain.all_quantities():
269
    if quantity.multi and quantity.argparse_action is None:
270
271
272
273
        def field(**kwargs):
            return fields.List(fields.String(**kwargs))
    else:
        field = fields.String
274
    _query_model_parameters[quantity.name] = field(description=quantity.description)
275

276
_repo_query_model = api.model('RepoQuery', _query_model_parameters, skip_none=True)
277
278
279
280


def repo_edit_action_field(quantity):
    if quantity.is_scalar:
281
        return fields.Nested(_repo_edit_action_model, description=quantity.description, skip_none=True)
282
283
    else:
        return fields.List(
284
            fields.Nested(_repo_edit_action_model, skip_none=True), description=quantity.description)
285
286


287
_repo_edit_action_model = api.model('RepoEditAction', {
288
    'value': fields.String(description='The value/values that is set as a string.'),
289
290
291
292
    'success': fields.Boolean(description='If this can/could be done. Only in API response.'),
    'message': fields.String(descriptin='A message that details the action result. Only in API response.')
})

293
_repo_edit_model = api.model('RepoEdit', {
294
    'verify': fields.Boolean(description='If true, no action is performed.'),
295
    'query': fields.Nested(_repo_query_model, skip_none=True, description='New metadata will be applied to query results.'),
296
297
298
299
300
    'actions': fields.Nested(
        api.model('RepoEditActions', {
            quantity.name: repo_edit_action_field(quantity)
            for quantity in UserMetadata.m_def.all_quantities.values()
        }), skip_none=True,
301
302
303
        description='Each action specifies a single value (even for multi valued quantities).'),
    'success': fields.Boolean(description='If the overall edit can/could be done. Only in API response.'),
    'message': fields.String(description='A message that details the overall edit result. Only in API response.')
304
305
306
})


307
def edit(parsed_query: Dict[str, Any], mongo_update: Dict[str, Any] = None, re_index=True) -> List[str]:
308
    # get all calculations that have to change
309
    with utils.timer(common.logger, 'edit query executed'):
310
        search_request = search.SearchRequest().include('calc_id', 'upload_id')
311
        apply_search_parameters(search_request, parsed_query)
312
313
314
315
316
        upload_ids = set()
        calc_ids = []
        for hit in search_request.execute_scan():
            calc_ids.append(hit['calc_id'])
            upload_ids.add(hit['upload_id'])
317
318

    # perform the update on the mongo db
319
    with utils.timer(common.logger, 'edit mongo update executed', size=len(calc_ids)):
320
321
322
        if mongo_update is not None:
            n_updated = proc.Calc.objects(calc_id__in=calc_ids).update(multi=True, **mongo_update)
            if n_updated != len(calc_ids):
323
                common.logger.error('edit repo did not update all entries', payload=mongo_update)
324
325

    # re-index the affected entries in elastic search
326
    with utils.timer(common.logger, 'edit elastic update executed', size=len(calc_ids)):
327
328
329
330
331
332
333
334
335
336
337
338
339
        if re_index:
            def elastic_updates():
                for calc in proc.Calc.objects(calc_id__in=calc_ids):
                    entry = search.Entry.from_calc_with_metadata(
                        datamodel.CalcWithMetadata(**calc['metadata']))
                    entry = entry.to_dict(include_meta=True)
                    entry['_op_type'] = 'index'
                    yield entry

            _, failed = elasticsearch.helpers.bulk(
                infrastructure.elastic_client, elastic_updates(), stats_only=True)
            search.refresh()
            if failed > 0:
340
                common.logger.error(
341
342
                    'edit repo with failed elastic updates',
                    payload=mongo_update, nfailed=len(failed))
343

344
345
    return list(upload_ids)

346

347
348
349
def get_uploader_ids(query):
    """ Get all the uploader from the query, to check coauthers and shared_with for uploaders. """
    search_request = search.SearchRequest()
350
    apply_search_parameters(search_request, query)
351
352
353
354
    search_request.quantity(name='uploader_id')
    return search_request.execute()['quantities']['uploader_id']['values']


355
356
@ns.route('/edit')
class EditRepoCalcsResource(Resource):
357
358
    @api.doc('edit_repo')
    @api.response(400, 'Invalid requests, e.g. wrong owner type or bad search parameters')
359
360
    @api.expect(_repo_edit_model)
    @api.marshal_with(_repo_edit_model, skip_none=True, code=200, description='Edit verified/performed')
361
    @authenticate()
362
    def post(self):
363
        """ Edit repository metadata. """
364
365

        # basic body parsing and some semantic checks
366
367
368
369
370
371
372
373
374
375
        json_data = request.get_json()
        if json_data is None:
            json_data = {}
        query = json_data.get('query', {})

        owner = query.get('owner', 'user')
        if owner not in ['user', 'staging']:
            abort(400, 'Not a valid owner for edit %s. Edit can only be performed in user or staging' % owner)
        query['owner'] = owner

376
377
378
379
        if 'actions' not in json_data:
            abort(400, 'Missing key actions in edit data')
        actions = json_data['actions']
        verify = json_data.get('verify', False)
380

381
382
        # preparing the query of entries that are edited
        parsed_query = {}
383
384
385
        for quantity_name, value in query.items():
            if quantity_name in _search_quantities:
                quantity = datamodel.Domain.get_quantity(quantity_name)
386
387
388
389
                if quantity.multi and quantity.argparse_action == 'split' and not isinstance(value, list):
                    value = value.split(',')
                parsed_query[quantity_name] = value
        parsed_query['owner'] = owner
390
        parsed_query['domain'] = query.get('domain')
391

392
        # checking the edit actions and preparing a mongo update on the fly
393
        json_data['success'] = True
394
        mongo_update = {}
395
        uploader_ids = None
396
        lift_embargo = False
397
398
        removed_datasets = None

399
        with utils.timer(common.logger, 'edit verified'):
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
            for action_quantity_name, quantity_actions in actions.items():
                quantity = UserMetadata.m_def.all_quantities.get(action_quantity_name)
                if quantity is None:
                    abort(400, 'Unknown quantity %s' % action_quantity_name)

                quantity_flask = quantity.m_x('flask', {})
                if quantity_flask.get('admin_only', False):
                    if not g.user.is_admin():
                        abort(404, 'Only the admin user can set %s' % quantity.name)

                if isinstance(quantity_actions, list) == quantity.is_scalar:
                    abort(400, 'Wrong shape for quantity %s' % action_quantity_name)

                if not isinstance(quantity_actions, list):
                    quantity_actions = [quantity_actions]

                flask_verify = quantity_flask.get('verify', None)
                mongo_key = 'metadata__%s' % quantity.name
                has_error = False
                for action in quantity_actions:
                    action['success'] = True
                    action['message'] = None
                    action_value = action.get('value')
                    action_value = action_value if action_value is None else action_value.strip()

                    if action_value is None:
                        mongo_value = None
427

428
                    elif action_value == '':
429
                        mongo_value = None
430
431
432
433
434

                    elif flask_verify == datamodel.User:
                        try:
                            mongo_value = User.get(user_id=action_value).user_id
                        except KeyError:
435
436
                            action['success'] = False
                            has_error = True
437
                            action['message'] = 'User does not exist'
438
                            continue
439

440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
                        if uploader_ids is None:
                            uploader_ids = get_uploader_ids(parsed_query)
                        if action_value in uploader_ids:
                            action['success'] = False
                            has_error = True
                            action['message'] = 'This user is already an uploader of one entry in the query'
                            continue

                    elif flask_verify == datamodel.Dataset:
                        try:
                            mongo_value = Dataset.m_def.m_x('me').get(
                                user_id=g.user.user_id, name=action_value).dataset_id
                        except KeyError:
                            action['message'] = 'Dataset does not exist and will be created'
                            mongo_value = None
                            if not verify:
                                dataset = Dataset(
                                    dataset_id=utils.create_uuid(), user_id=g.user.user_id,
Markus Scheidgen's avatar
Markus Scheidgen committed
458
                                    name=action_value, created=datetime.utcnow())
459
460
461
462
463
464
465
466
467
468
                                dataset.m_x('me').create()
                                mongo_value = dataset.dataset_id

                    elif action_quantity_name == 'with_embargo':
                        # ignore the actual value ... just lift the embargo
                        mongo_value = False
                        lift_embargo = True

                        # check if necessary
                        search_request = search.SearchRequest()
469
                        apply_search_parameters(search_request, parsed_query)
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
                        search_request.q = search_request.q & Q('term', with_embargo=True)
                        if search_request.execute()['total'] == 0:
                            action['success'] = False
                            has_error = True
                            action['message'] = 'There is no embargo to lift'
                            continue
                    else:
                        mongo_value = action_value

                    if len(quantity.shape) == 0:
                        mongo_update[mongo_key] = mongo_value
                    else:
                        mongo_values = mongo_update.setdefault(mongo_key, [])
                        if mongo_value is not None:
                            if mongo_value in mongo_values:
                                action['success'] = False
                                has_error = True
                                action['message'] = 'Duplicate values are not allowed'
                                continue
                            mongo_values.append(mongo_value)

                if len(quantity_actions) == 0 and len(quantity.shape) > 0:
                    mongo_update[mongo_key] = []

                if action_quantity_name == 'datasets':
                    # check if datasets edit is allowed and if datasets have to be removed
                    search_request = search.SearchRequest()
497
                    apply_search_parameters(search_request, parsed_query)
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
                    search_request.quantity(name='dataset_id')
                    old_datasets = list(
                        search_request.execute()['quantities']['dataset_id']['values'].keys())

                    removed_datasets = []
                    for dataset_id in old_datasets:
                        if dataset_id not in mongo_update.get(mongo_key, []):
                            removed_datasets.append(dataset_id)

                    doi_ds = Dataset.m_def.m_x('me').objects(
                        dataset_id__in=removed_datasets, doi__ne=None).first()
                    if doi_ds is not None:
                        json_data['success'] = False
                        json_data['message'] = json_data.get('message', '') + \
                            'Edit would remove entries from a dataset with DOI (%s) ' % doi_ds.name
                        has_error = True
514

515
516
517
518
519
520
521
522
        # stop here, if client just wants to verify its actions
        if verify:
            return json_data, 200

        # stop if the action were not ok
        if has_error:
            return json_data, 400

523
        # perform the change
Markus Scheidgen's avatar
Markus Scheidgen committed
524
        mongo_update['metadata__last_edit'] = datetime.utcnow()
525
        upload_ids = edit(parsed_query, mongo_update, True)
526
527
528
529
530
531

        # lift embargo
        if lift_embargo:
            for upload_id in upload_ids:
                upload = proc.Upload.get(upload_id)
                upload.re_pack()
532

533
        # remove potentially empty old datasets
534
        if removed_datasets is not None:
535
536
537
            for dataset in removed_datasets:
                if proc.Calc.objects(metadata__dataset_id=dataset).first() is None:
                    Dataset.m_def.m_x('me').objects(dataset_id=dataset).delete()
538

539
        return json_data, 200
540

541

542
543
544
545
546
547
548
549
_repo_quantity_search_request_parser = api.parser()
add_search_parameters(_repo_quantity_search_request_parser)
_repo_quantity_search_request_parser.add_argument(
    'after', type=str, help='The after value to use for "scrolling".')
_repo_quantity_search_request_parser.add_argument(
    'size', type=int, help='The max size of the returned values.')

_repo_quantity_model = api.model('RepoQuantity', {
550
551
552
553
    'after': fields.String(description='The after value that can be used to retrieve the next set of values.'),
    'values': fields.Raw(description='A dict with values as key. Values are dicts with "total" and "examples" keys.')
})

554
555
_repo_quantity_values_model = api.model('RepoQuantityValues', {
    'quantity': fields.Nested(_repo_quantity_model, allow_null=True)
556
557
})

558

559
@ns.route('/quantity/<string:quantity>')
560
561
562
class RepoQuantityResource(Resource):
    @api.doc('quantity_search')
    @api.response(400, 'Invalid requests, e.g. wrong owner type, bad quantity, bad search parameters')
563
564
    @api.expect(_repo_quantity_search_request_parser, validate=True)
    @api.marshal_with(_repo_quantity_values_model, skip_none=True, code=200, description='Search results send')
565
    @authenticate()
566
567
568
569
570
571
572
573
574
575
576
577
578
579
    def get(self, quantity: str):
        """
        Retrieve quantity values from entries matching the search.

        You can use the various quantities to search/filter for. For some of the
        indexed quantities this endpoint returns aggregation information. This means
        you will be given a list of all possible values and the number of entries
        that have the certain value. You can also use these aggregations on an empty
        search to determine the possible values.

        There is no ordering and no pagination. Instead there is an 'after' key based
        scrolling. The result will contain an 'after' value, that can be specified
        for the next request. You can use the 'size' and 'after' parameters accordingly.

580
581
582
        The result will contain a 'quantity' key with quantity values and the "after"
        value. There will be upto 'size' many values. For the rest of the values use the
        "after" parameter in another request.
583
584
        """

585
        search_request = search.SearchRequest()
586
587
        args = {
            key: value
588
            for key, value in _repo_quantity_search_request_parser.parse_args().items()
589
            if value is not None}
590

591
        apply_search_parameters(search_request, args)
592
593
        after = args.get('after', None)
        size = args.get('size', 100)
594
595
596
597
598
599
600

        try:
            assert size >= 0
        except AssertionError:
            abort(400, message='invalid size')

        try:
601
            search_request.quantity(quantity, size=size, after=after)
602
603
604
            results = search_request.execute()
            quantities = results.pop('quantities')
            results['quantity'] = quantities[quantity]
605
606
607
608
609
610

            return results, 200
        except KeyError as e:
            import traceback
            traceback.print_exc()
            abort(400, 'Given quantity does not exist: %s' % str(e))
611
612


613
614
615
_repo_quantities_search_request_parser = api.parser()
add_search_parameters(_repo_quantities_search_request_parser)
_repo_quantities_search_request_parser.add_argument(
616
617
    'quantities', type=str, action='append',
    help='The quantities to retrieve values from')
618
_repo_quantities_search_request_parser.add_argument(
619
620
    'size', type=int, help='The max size of the returned values.')

Markus Scheidgen's avatar
Markus Scheidgen committed
621
_repo_quantities_model = api.model('RepoQuantities', {
622
623
624
    'quantities': fields.List(fields.Nested(_repo_quantity_model))
})

625
626
627
628
629

@ns.route('/quantities')
class RepoQuantitiesResource(Resource):
    @api.doc('quantities_search')
    @api.response(400, 'Invalid requests, e.g. wrong owner type, bad quantity, bad search parameters')
630
    @api.expect(_repo_quantities_search_request_parser, validate=True)
Markus Scheidgen's avatar
Markus Scheidgen committed
631
    @api.marshal_with(_repo_quantities_model, skip_none=True, code=200, description='Search results send')
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
    @authenticate()
    def get(self):
        """
        Retrieve quantity values for multiple quantities at once.

        You can use the various quantities to search/filter for. For some of the
        indexed quantities this endpoint returns aggregation information. This means
        you will be given a list of all possible values and the number of entries
        that have the certain value. You can also use these aggregations on an empty
        search to determine the possible values.

        There is no ordering and no pagination and not after key based scrolling. Instead
        there is an 'after' key based scrolling.

        The result will contain a 'quantities' key with a dict of quantity names and the
        retrieved values as values.
        """

        search_request = search.SearchRequest()
        args = {
            key: value
653
            for key, value in _repo_quantities_search_request_parser.parse_args().items()
654
655
            if value is not None}

656
        apply_search_parameters(search_request, args)
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
        quantities = args.get('quantities', [])
        size = args.get('size', 5)

        try:
            assert size >= 0
        except AssertionError:
            abort(400, message='invalid size')

        for quantity in quantities:
            try:
                search_request.quantity(quantity, size=size)
            except KeyError as e:
                import traceback
                traceback.print_exc()
                abort(400, 'Given quantity does not exist: %s' % str(e))

        return search_request.execute(), 200


Markus Scheidgen's avatar
Markus Scheidgen committed
676
_repo_calc_id_model = api.model('RepoCalculationId', {
677
678
679
680
    'upload_id': fields.String(), 'calc_id': fields.String()
})


681
@ns.route('/pid/<path:pid>')
682
683
684
class RepoPidResource(Resource):
    @api.doc('resolve_pid')
    @api.response(404, 'Entry with PID does not exist')
Markus Scheidgen's avatar
Markus Scheidgen committed
685
    @api.marshal_with(_repo_calc_id_model, skip_none=True, code=200, description='Entry resolved')
Markus Scheidgen's avatar
Markus Scheidgen committed
686
    @authenticate()
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
    def get(self, pid: str):
        if '/' in pid:
            prefix, pid = pid.split('/')
            if prefix != '21.11132':
                abort(400, 'Wrong PID format')
            try:
                pid_int = utils.decode_handle_id(pid)
            except ValueError:
                abort(400, 'Wrong PID format')
        else:
            try:
                pid_int = int(pid)
            except ValueError:
                abort(400, 'Wrong PID format')

702
        search_request = search.SearchRequest().include('upload_id', 'calc_id')
703
704
705

        if g.user is not None:
            search_request.owner('all', user_id=g.user.user_id)
706
        else:
707
708
            search_request.owner('all')

709
        search_request.search_parameter('pid', pid_int)
710
711
712
713
714

        results = list(search_request.execute_scan())
        total = len(results)

        if total == 0:
715
            abort(404, 'Entry with PID %s does not exist' % pid)
716
717

        if total > 1:
718
            common.logger.error('Two entries for the same pid', pid=pid_int)
719
720
721
722
723

        result = results[0]
        return dict(
            upload_id=result['upload_id'],
            calc_id=result['calc_id'])