repo.py 30.4 KB
Newer Older
Markus Scheidgen's avatar
Markus Scheidgen committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
# Copyright 2018 Markus Scheidgen
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an"AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""
The repository API of the nomad@FAIRDI APIs. Currently allows to resolve repository
meta-data.
"""

20
from typing import List, Dict, Any
Markus Scheidgen's avatar
Markus Scheidgen committed
21
from flask_restplus import Resource, abort, fields
22
from flask import request, g
23
from elasticsearch_dsl import Q
24
from elasticsearch.exceptions import NotFoundError
25
import elasticsearch.helpers
Markus Scheidgen's avatar
Markus Scheidgen committed
26
from datetime import datetime
Markus Scheidgen's avatar
Markus Scheidgen committed
27

28
from nomad import search, utils, datamodel, processing as proc, infrastructure
29
from nomad.app.utils import RFC3339DateTime, with_logger
30
from nomad.datamodel import UserMetadata, Dataset, User
Markus Scheidgen's avatar
Markus Scheidgen committed
31

Markus Scheidgen's avatar
Markus Scheidgen committed
32
from .api import api
33
from .auth import authenticate
34
35
36
from .common import search_model, calc_route, add_pagination_parameters,\
    add_scroll_parameters, add_search_parameters, apply_search_parameters,\
    query_api_python, query_api_curl
Markus Scheidgen's avatar
Markus Scheidgen committed
37

38
ns = api.namespace('repo', description='Access repository metadata.')
Markus Scheidgen's avatar
Markus Scheidgen committed
39
40
41
42
43


@calc_route(ns)
class RepoCalcResource(Resource):
    @api.response(404, 'The upload or calculation does not exist')
44
    @api.response(401, 'Not authorized to access the calculation')
45
    @api.response(200, 'Metadata send', fields.Raw)
46
    @api.doc('get_repo_calc')
47
    @authenticate()
48
    def get(self, upload_id, calc_id):
Markus Scheidgen's avatar
Markus Scheidgen committed
49
50
51
        """
        Get calculation metadata in repository form.

52
        Repository metadata only entails the quantities shown in the repository.
53
        Calcs are references via *upload_id*, *calc_id* pairs.
Markus Scheidgen's avatar
Markus Scheidgen committed
54
55
        """
        try:
56
57
58
59
60
61
62
63
            calc = search.Entry.get(calc_id)
        except NotFoundError:
            abort(404, message='There is no calculation %s/%s' % (upload_id, calc_id))

        if calc.with_embargo or not calc.published:
            if g.user is None:
                abort(401, message='Not logged in to access %s/%s.' % (upload_id, calc_id))

64
            if not (any(g.user.user_id == user.user_id for user in calc.owners) or g.user.is_admin):
65
66
                abort(401, message='Not authorized to access %s/%s.' % (upload_id, calc_id))

67
        result = calc.to_dict()
68
69
        result['python'] = query_api_python('archive', upload_id, calc_id)
        result['curl'] = query_api_curl('archive', upload_id, calc_id)
Markus Scheidgen's avatar
Markus Scheidgen committed
70

71
        return result, 200
Markus Scheidgen's avatar
Markus Scheidgen committed
72

73

74
75
76
77
78
_search_request_parser = api.parser()
add_pagination_parameters(_search_request_parser)
add_scroll_parameters(_search_request_parser)
add_search_parameters(_search_request_parser)
_search_request_parser.add_argument(
79
    'date_histogram', type=bool, help='Add an additional aggregation over the upload time')
80
_search_request_parser.add_argument(
81
    'metrics', type=str, action='append', help=(
82
        'Metrics to aggregate over all quantities and their values as comma separated list. '
83
        'Possible values are %s.' % ', '.join(datamodel.Domain.instance.metrics_names)))
84
_search_request_parser.add_argument(
Markus Scheidgen's avatar
Markus Scheidgen committed
85
    'statistics', type=bool, help=('Return statistics.'))
Markus Scheidgen's avatar
Markus Scheidgen committed
86
for group_name in search.groups:
87
    _search_request_parser.add_argument(
Markus Scheidgen's avatar
Markus Scheidgen committed
88
        group_name, type=bool, help=('Return %s group data.' % group_name))
89
    _search_request_parser.add_argument(
Markus Scheidgen's avatar
Markus Scheidgen committed
90
91
92
        '%s_after' % group_name, type=str,
        help='The last %s id of the last scroll window for the %s group' % (group_name, group_name))

93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
_repo_calcs_model_fields = {
    'statistics': fields.Raw(description=(
        'A dict with all statistics. Each statistic is dictionary with a metrics dict as '
        'value and quantity value as key. The possible metrics are code runs(calcs), %s. '
        'There is a pseudo quantity "total" with a single value "all" that contains the '
        ' metrics over all results. ' % ', '.join(datamodel.Domain.instance.metrics_names))),
    'python': fields.String(description=(
        'A string of python code snippet which can be executed to reproduce the api result.')),
    'curl': fields.String(description=(
        'A string of curl command which can be executed to reproduce the api result.')),
}
for group_name, (group_quantity, _) in search.groups.items():
    _repo_calcs_model_fields[group_name] = fields.Nested(api.model('RepoDatasets', {
        'after': fields.String(description='The after value that can be used to retrieve the next %s.' % group_name),
        'values': fields.Raw(description='A dict with %s as key. The values are dicts with "total" and "examples" keys.' % group_quantity)
    }), skip_none=True)
_repo_calcs_model = api.inherit('RepoCalculations', search_model, _repo_calcs_model_fields)
110
111


Markus Scheidgen's avatar
Markus Scheidgen committed
112
113
@ns.route('/')
class RepoCalcsResource(Resource):
114
    @api.doc('search')
115
    @api.response(400, 'Invalid requests, e.g. wrong owner type or bad search parameters')
116
117
    @api.expect(_search_request_parser, validate=True)
    @api.marshal_with(_repo_calcs_model, skip_none=True, code=200, description='Search results send')
118
    @authenticate()
Markus Scheidgen's avatar
Markus Scheidgen committed
119
120
    def get(self):
        """
121
        Search for calculations in the repository form, paginated.
122
123

        The ``owner`` parameter determines the overall entries to search through.
124
125
126
127
        Possible values are: ``all`` (show all entries visible to the current user), ``public``
        (show all publically visible entries), ``user`` (show all user entries, requires login),
        ``staging`` (show all user entries in staging area, requires login).

128
129
130
131
132
        You can use the various quantities to search/filter for. For some of the
        indexed quantities this endpoint returns aggregation information. This means
        you will be given a list of all possible values and the number of entries
        that have the certain value. You can also use these aggregations on an empty
        search to determine the possible values.
133
134
135

        The pagination parameters allows determine which page to return via the
        ``page`` and ``per_page`` parameters. Pagination however, is limited to the first
Markus Scheidgen's avatar
Markus Scheidgen committed
136
137
138
139
140
141
142
143
        100k (depending on ES configuration) hits.

        An alternative to pagination is to use ``scroll`` and ``scroll_id``. With ``scroll``
        you will get a ``scroll_id`` on the first request. Each call with ``scroll`` and
        the respective ``scroll_id`` will return the next ``per_page`` (here the default is 1000)
        results. Scroll however, ignores ordering and does not return aggregations.
        The scroll view used in the background will stay alive for 1 minute between requests.
        If the given ``scroll_id`` is not available anymore, a HTTP 400 is raised.
144
145
146
147
148

        The search will return aggregations on a predefined set of quantities. Aggregations
        will tell you what quantity values exist and how many entries match those values.

        Ordering is determined by ``order_by`` and ``order`` parameters.
149
        """
150
151

        try:
152
            args = {
153
                key: value for key, value in _search_request_parser.parse_args().items()
154
155
156
157
158
159
160
161
162
163
                if value is not None}

            scroll = args.get('scroll', False)
            scroll_id = args.get('scroll_id', None)
            page = args.get('page', 1)
            per_page = args.get('per_page', 10 if not scroll else 1000)
            order = args.get('order', -1)
            order_by = args.get('order_by', 'formula')

            date_histogram = args.get('date_histogram', False)
164
            metrics: List[str] = request.args.getlist('metrics')
Markus Scheidgen's avatar
Markus Scheidgen committed
165

Markus Scheidgen's avatar
Markus Scheidgen committed
166
167
            with_statistics = args.get('statistics', False) or \
                any(args.get(group_name, False) for group_name in search.groups)
168
169
170
171
        except Exception as e:
            abort(400, message='bad parameters: %s' % str(e))

        search_request = search.SearchRequest()
172
        apply_search_parameters(search_request, args)
173
174
        if date_histogram:
            search_request.date_histogram()
175

176
        try:
177
            assert page >= 1
178
            assert per_page >= 0
179
180
181
        except AssertionError:
            abort(400, message='invalid pagination')

182
183
184
        if order not in [-1, 1]:
            abort(400, message='invalid pagination')

185
186
        for metric in metrics:
            if metric not in search.metrics_names:
187
188
                abort(400, message='there is no metric %s' % metric)

Markus Scheidgen's avatar
Markus Scheidgen committed
189
190
        if with_statistics:
            search_request.default_statistics(metrics_to_use=metrics)
191

Markus Scheidgen's avatar
Markus Scheidgen committed
192
193
194
195
            additional_metrics = [
                metric
                for group_name, (_, metric) in search.groups.items()
                if args.get(group_name, False)]
196
197
198

            total_metrics = metrics + additional_metrics

Markus Scheidgen's avatar
Markus Scheidgen committed
199
200
            search_request.totals(metrics_to_use=total_metrics)
            search_request.statistic('authors', 1000)
201
202
        elif len(metrics) > 0:
            search_request.totals(metrics_to_use=metrics)
203

204
        try:
205
            if scroll:
206
                results = search_request.execute_scrolled(scroll_id=scroll_id, size=per_page)
207

208
            else:
Markus Scheidgen's avatar
Markus Scheidgen committed
209
210
211
212
213
                for group_name, (group_quantity, _) in search.groups.items():
                    if args.get(group_name, False):
                        search_request.quantity(
                            group_quantity, size=per_page, examples=1,
                            after=request.args.get('%s_after' % group_name, None))
214

215
216
                results = search_request.execute_paginated(
                    per_page=per_page, page=page, order=order, order_by=order_by)
217
218

                # TODO just a work around to make things prettier
Markus Scheidgen's avatar
Markus Scheidgen committed
219
220
221
222
223
                if with_statistics:
                    statistics = results['statistics']
                    if 'code_name' in statistics and 'currupted mainfile' in statistics['code_name']:
                        del(statistics['code_name']['currupted mainfile'])

224
225
226
                if 'quantities' in results:
                    quantities = results.pop('quantities')

Markus Scheidgen's avatar
Markus Scheidgen committed
227
228
229
                for group_name, (group_quantity, _) in search.groups.items():
                    if args.get(group_name, False):
                        results[group_name] = quantities[group_quantity]
230

231
            # build python code/curl snippet
232
233
234
235
236
            code_args = dict(request.args)
            if 'statistics' in code_args:
                del(code_args['statistics'])
            results['curl'] = query_api_curl('archive', 'query', query_string=code_args)
            results['python'] = query_api_python('archive', 'query', query_string=code_args)
237

238
            return results, 200
Markus Scheidgen's avatar
Markus Scheidgen committed
239
240
        except search.ScrollIdNotFound:
            abort(400, 'The given scroll_id does not exist.')
241
        except KeyError as e:
242
243
            import traceback
            traceback.print_exc()
244
            abort(400, str(e))
245

246

247
_query_model_parameters = {
248
249
250
251
252
253
    'owner': fields.String(description='Specify which calcs to return: ``all``, ``public``, ``user``, ``staging``, default is ``all``'),
    'from_time': RFC3339DateTime(description='A yyyy-MM-ddTHH:mm:ss (RFC3339) minimum entry time (e.g. upload time)'),
    'until_time': RFC3339DateTime(description='A yyyy-MM-ddTHH:mm:ss (RFC3339) maximum entry time (e.g. upload time)')
}

for quantity in search.quantities.values():
254
    if quantity.multi and quantity.argparse_action is None:
255
256
257
258
        def field(**kwargs):
            return fields.List(fields.String(**kwargs))
    else:
        field = fields.String
259
    _query_model_parameters[quantity.name] = field(description=quantity.description)
260

261
_repo_query_model = api.model('RepoQuery', _query_model_parameters, skip_none=True)
262
263
264
265


def repo_edit_action_field(quantity):
    if quantity.is_scalar:
266
        return fields.Nested(_repo_edit_action_model, description=quantity.description, skip_none=True)
267
268
    else:
        return fields.List(
269
            fields.Nested(_repo_edit_action_model, skip_none=True), description=quantity.description)
270
271


272
_repo_edit_action_model = api.model('RepoEditAction', {
273
    'value': fields.String(description='The value/values that is set as a string.'),
274
275
276
277
    'success': fields.Boolean(description='If this can/could be done. Only in API response.'),
    'message': fields.String(descriptin='A message that details the action result. Only in API response.')
})

278
_repo_edit_model = api.model('RepoEdit', {
279
    'verify': fields.Boolean(description='If true, no action is performed.'),
280
    'query': fields.Nested(_repo_query_model, skip_none=True, description='New metadata will be applied to query results.'),
281
282
283
284
285
    'actions': fields.Nested(
        api.model('RepoEditActions', {
            quantity.name: repo_edit_action_field(quantity)
            for quantity in UserMetadata.m_def.all_quantities.values()
        }), skip_none=True,
286
287
288
        description='Each action specifies a single value (even for multi valued quantities).'),
    'success': fields.Boolean(description='If the overall edit can/could be done. Only in API response.'),
    'message': fields.String(description='A message that details the overall edit result. Only in API response.')
289
290
291
})


292
def edit(parsed_query: Dict[str, Any], logger, mongo_update: Dict[str, Any] = None, re_index=True) -> List[str]:
293
    # get all calculations that have to change
294
295
    with utils.timer(logger, 'edit query executed'):
        search_request = search.SearchRequest()
296
        apply_search_parameters(search_request, parsed_query)
297
298
299
300
301
        upload_ids = set()
        calc_ids = []
        for hit in search_request.execute_scan():
            calc_ids.append(hit['calc_id'])
            upload_ids.add(hit['upload_id'])
302
303

    # perform the update on the mongo db
304
305
306
307
308
    with utils.timer(logger, 'edit mongo update executed', size=len(calc_ids)):
        if mongo_update is not None:
            n_updated = proc.Calc.objects(calc_id__in=calc_ids).update(multi=True, **mongo_update)
            if n_updated != len(calc_ids):
                logger.error('edit repo did not update all entries', payload=mongo_update)
309
310

    # re-index the affected entries in elastic search
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
    with utils.timer(logger, 'edit elastic update executed', size=len(calc_ids)):
        if re_index:
            def elastic_updates():
                for calc in proc.Calc.objects(calc_id__in=calc_ids):
                    entry = search.Entry.from_calc_with_metadata(
                        datamodel.CalcWithMetadata(**calc['metadata']))
                    entry = entry.to_dict(include_meta=True)
                    entry['_op_type'] = 'index'
                    yield entry

            _, failed = elasticsearch.helpers.bulk(
                infrastructure.elastic_client, elastic_updates(), stats_only=True)
            search.refresh()
            if failed > 0:
                logger.error(
                    'edit repo with failed elastic updates',
                    payload=mongo_update, nfailed=len(failed))
328

329
330
    return list(upload_ids)

331

332
333
334
def get_uploader_ids(query):
    """ Get all the uploader from the query, to check coauthers and shared_with for uploaders. """
    search_request = search.SearchRequest()
335
    apply_search_parameters(search_request, query)
336
337
338
339
    search_request.quantity(name='uploader_id')
    return search_request.execute()['quantities']['uploader_id']['values']


340
341
@ns.route('/edit')
class EditRepoCalcsResource(Resource):
342
343
    @api.doc('edit_repo')
    @api.response(400, 'Invalid requests, e.g. wrong owner type or bad search parameters')
344
345
    @api.expect(_repo_edit_model)
    @api.marshal_with(_repo_edit_model, skip_none=True, code=200, description='Edit verified/performed')
346
347
348
349
    @authenticate()
    @with_logger
    def post(self, logger):
        """ Edit repository metadata. """
350
351

        # basic body parsing and some semantic checks
352
353
354
355
356
357
358
359
360
361
        json_data = request.get_json()
        if json_data is None:
            json_data = {}
        query = json_data.get('query', {})

        owner = query.get('owner', 'user')
        if owner not in ['user', 'staging']:
            abort(400, 'Not a valid owner for edit %s. Edit can only be performed in user or staging' % owner)
        query['owner'] = owner

362
363
364
365
        if 'actions' not in json_data:
            abort(400, 'Missing key actions in edit data')
        actions = json_data['actions']
        verify = json_data.get('verify', False)
366

367
368
369
370
371
372
373
374
375
376
        # preparing the query of entries that are edited
        parsed_query = {}
        for quantity_name, quantity in search.quantities.items():
            if quantity_name in query:
                value = query[quantity_name]
                if quantity.multi and quantity.argparse_action == 'split' and not isinstance(value, list):
                    value = value.split(',')
                parsed_query[quantity_name] = value
        parsed_query['owner'] = owner

377
        # checking the edit actions and preparing a mongo update on the fly
378
        json_data['success'] = True
379
        mongo_update = {}
380
        uploader_ids = None
381
        lift_embargo = False
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
        removed_datasets = None

        with utils.timer(logger, 'edit verified'):
            for action_quantity_name, quantity_actions in actions.items():
                quantity = UserMetadata.m_def.all_quantities.get(action_quantity_name)
                if quantity is None:
                    abort(400, 'Unknown quantity %s' % action_quantity_name)

                quantity_flask = quantity.m_x('flask', {})
                if quantity_flask.get('admin_only', False):
                    if not g.user.is_admin():
                        abort(404, 'Only the admin user can set %s' % quantity.name)

                if isinstance(quantity_actions, list) == quantity.is_scalar:
                    abort(400, 'Wrong shape for quantity %s' % action_quantity_name)

                if not isinstance(quantity_actions, list):
                    quantity_actions = [quantity_actions]

                flask_verify = quantity_flask.get('verify', None)
                mongo_key = 'metadata__%s' % quantity.name
                has_error = False
                for action in quantity_actions:
                    action['success'] = True
                    action['message'] = None
                    action_value = action.get('value')
                    action_value = action_value if action_value is None else action_value.strip()

                    if action_value is None:
                        mongo_value = None
412

413
                    elif action_value == '':
414
                        mongo_value = None
415
416
417
418
419

                    elif flask_verify == datamodel.User:
                        try:
                            mongo_value = User.get(user_id=action_value).user_id
                        except KeyError:
420
421
                            action['success'] = False
                            has_error = True
422
                            action['message'] = 'User does not exist'
423
                            continue
424

425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
                        if uploader_ids is None:
                            uploader_ids = get_uploader_ids(parsed_query)
                        if action_value in uploader_ids:
                            action['success'] = False
                            has_error = True
                            action['message'] = 'This user is already an uploader of one entry in the query'
                            continue

                    elif flask_verify == datamodel.Dataset:
                        try:
                            mongo_value = Dataset.m_def.m_x('me').get(
                                user_id=g.user.user_id, name=action_value).dataset_id
                        except KeyError:
                            action['message'] = 'Dataset does not exist and will be created'
                            mongo_value = None
                            if not verify:
                                dataset = Dataset(
                                    dataset_id=utils.create_uuid(), user_id=g.user.user_id,
Markus Scheidgen's avatar
Markus Scheidgen committed
443
                                    name=action_value, created=datetime.utcnow())
444
445
446
447
448
449
450
451
452
453
                                dataset.m_x('me').create()
                                mongo_value = dataset.dataset_id

                    elif action_quantity_name == 'with_embargo':
                        # ignore the actual value ... just lift the embargo
                        mongo_value = False
                        lift_embargo = True

                        # check if necessary
                        search_request = search.SearchRequest()
454
                        apply_search_parameters(search_request, parsed_query)
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
                        search_request.q = search_request.q & Q('term', with_embargo=True)
                        if search_request.execute()['total'] == 0:
                            action['success'] = False
                            has_error = True
                            action['message'] = 'There is no embargo to lift'
                            continue
                    else:
                        mongo_value = action_value

                    if len(quantity.shape) == 0:
                        mongo_update[mongo_key] = mongo_value
                    else:
                        mongo_values = mongo_update.setdefault(mongo_key, [])
                        if mongo_value is not None:
                            if mongo_value in mongo_values:
                                action['success'] = False
                                has_error = True
                                action['message'] = 'Duplicate values are not allowed'
                                continue
                            mongo_values.append(mongo_value)

                if len(quantity_actions) == 0 and len(quantity.shape) > 0:
                    mongo_update[mongo_key] = []

                if action_quantity_name == 'datasets':
                    # check if datasets edit is allowed and if datasets have to be removed
                    search_request = search.SearchRequest()
482
                    apply_search_parameters(search_request, parsed_query)
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
                    search_request.quantity(name='dataset_id')
                    old_datasets = list(
                        search_request.execute()['quantities']['dataset_id']['values'].keys())

                    removed_datasets = []
                    for dataset_id in old_datasets:
                        if dataset_id not in mongo_update.get(mongo_key, []):
                            removed_datasets.append(dataset_id)

                    doi_ds = Dataset.m_def.m_x('me').objects(
                        dataset_id__in=removed_datasets, doi__ne=None).first()
                    if doi_ds is not None:
                        json_data['success'] = False
                        json_data['message'] = json_data.get('message', '') + \
                            'Edit would remove entries from a dataset with DOI (%s) ' % doi_ds.name
                        has_error = True
499

500
501
502
503
504
505
506
507
        # stop here, if client just wants to verify its actions
        if verify:
            return json_data, 200

        # stop if the action were not ok
        if has_error:
            return json_data, 400

508
        # perform the change
Markus Scheidgen's avatar
Markus Scheidgen committed
509
        mongo_update['metadata__last_edit'] = datetime.utcnow()
510
511
512
513
514
515
516
        upload_ids = edit(parsed_query, logger, mongo_update, True)

        # lift embargo
        if lift_embargo:
            for upload_id in upload_ids:
                upload = proc.Upload.get(upload_id)
                upload.re_pack()
517

518
        # remove potentially empty old datasets
519
        if removed_datasets is not None:
520
521
522
            for dataset in removed_datasets:
                if proc.Calc.objects(metadata__dataset_id=dataset).first() is None:
                    Dataset.m_def.m_x('me').objects(dataset_id=dataset).delete()
523

524
        return json_data, 200
525

526

527
528
529
530
531
532
533
534
_repo_quantity_search_request_parser = api.parser()
add_search_parameters(_repo_quantity_search_request_parser)
_repo_quantity_search_request_parser.add_argument(
    'after', type=str, help='The after value to use for "scrolling".')
_repo_quantity_search_request_parser.add_argument(
    'size', type=int, help='The max size of the returned values.')

_repo_quantity_model = api.model('RepoQuantity', {
535
536
537
538
    'after': fields.String(description='The after value that can be used to retrieve the next set of values.'),
    'values': fields.Raw(description='A dict with values as key. Values are dicts with "total" and "examples" keys.')
})

539
540
_repo_quantity_values_model = api.model('RepoQuantityValues', {
    'quantity': fields.Nested(_repo_quantity_model, allow_null=True)
541
542
})

543

544
@ns.route('/quantity/<string:quantity>')
545
546
547
class RepoQuantityResource(Resource):
    @api.doc('quantity_search')
    @api.response(400, 'Invalid requests, e.g. wrong owner type, bad quantity, bad search parameters')
548
549
    @api.expect(_repo_quantity_search_request_parser, validate=True)
    @api.marshal_with(_repo_quantity_values_model, skip_none=True, code=200, description='Search results send')
550
    @authenticate()
551
552
553
554
555
556
557
558
559
560
561
562
563
564
    def get(self, quantity: str):
        """
        Retrieve quantity values from entries matching the search.

        You can use the various quantities to search/filter for. For some of the
        indexed quantities this endpoint returns aggregation information. This means
        you will be given a list of all possible values and the number of entries
        that have the certain value. You can also use these aggregations on an empty
        search to determine the possible values.

        There is no ordering and no pagination. Instead there is an 'after' key based
        scrolling. The result will contain an 'after' value, that can be specified
        for the next request. You can use the 'size' and 'after' parameters accordingly.

565
566
567
        The result will contain a 'quantity' key with quantity values and the "after"
        value. There will be upto 'size' many values. For the rest of the values use the
        "after" parameter in another request.
568
569
        """

570
        search_request = search.SearchRequest()
571
572
        args = {
            key: value
573
            for key, value in _repo_quantity_search_request_parser.parse_args().items()
574
            if value is not None}
575

576
        apply_search_parameters(search_request, args)
577
578
        after = args.get('after', None)
        size = args.get('size', 100)
579
580
581
582
583
584

        try:
            assert size >= 0
        except AssertionError:
            abort(400, message='invalid size')

585
        search_request.quantity(quantity, size=size, after=after)
586
587

        try:
588
589
590
            results = search_request.execute()
            quantities = results.pop('quantities')
            results['quantity'] = quantities[quantity]
591
592
593
594
595
596

            return results, 200
        except KeyError as e:
            import traceback
            traceback.print_exc()
            abort(400, 'Given quantity does not exist: %s' % str(e))
597
598


599
600
601
_repo_quantities_search_request_parser = api.parser()
add_search_parameters(_repo_quantities_search_request_parser)
_repo_quantities_search_request_parser.add_argument(
602
603
    'quantities', type=str, action='append',
    help='The quantities to retrieve values from')
604
_repo_quantities_search_request_parser.add_argument(
605
606
    'size', type=int, help='The max size of the returned values.')

Markus Scheidgen's avatar
Markus Scheidgen committed
607
_repo_quantities_model = api.model('RepoQuantities', {
608
609
610
    'quantities': fields.List(fields.Nested(_repo_quantity_model))
})

611
612
613
614
615

@ns.route('/quantities')
class RepoQuantitiesResource(Resource):
    @api.doc('quantities_search')
    @api.response(400, 'Invalid requests, e.g. wrong owner type, bad quantity, bad search parameters')
616
    @api.expect(_repo_quantities_search_request_parser, validate=True)
Markus Scheidgen's avatar
Markus Scheidgen committed
617
    @api.marshal_with(_repo_quantities_model, skip_none=True, code=200, description='Search results send')
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
    @authenticate()
    def get(self):
        """
        Retrieve quantity values for multiple quantities at once.

        You can use the various quantities to search/filter for. For some of the
        indexed quantities this endpoint returns aggregation information. This means
        you will be given a list of all possible values and the number of entries
        that have the certain value. You can also use these aggregations on an empty
        search to determine the possible values.

        There is no ordering and no pagination and not after key based scrolling. Instead
        there is an 'after' key based scrolling.

        The result will contain a 'quantities' key with a dict of quantity names and the
        retrieved values as values.
        """

        search_request = search.SearchRequest()
        args = {
            key: value
639
            for key, value in _repo_quantities_search_request_parser.parse_args().items()
640
641
            if value is not None}

642
        apply_search_parameters(search_request, args)
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
        quantities = args.get('quantities', [])
        size = args.get('size', 5)

        try:
            assert size >= 0
        except AssertionError:
            abort(400, message='invalid size')

        for quantity in quantities:
            try:
                search_request.quantity(quantity, size=size)
            except KeyError as e:
                import traceback
                traceback.print_exc()
                abort(400, 'Given quantity does not exist: %s' % str(e))

        return search_request.execute(), 200


Markus Scheidgen's avatar
Markus Scheidgen committed
662
_repo_calc_id_model = api.model('RepoCalculationId', {
663
664
665
666
    'upload_id': fields.String(), 'calc_id': fields.String()
})


667
@ns.route('/pid/<path:pid>')
668
669
670
class RepoPidResource(Resource):
    @api.doc('resolve_pid')
    @api.response(404, 'Entry with PID does not exist')
Markus Scheidgen's avatar
Markus Scheidgen committed
671
    @api.marshal_with(_repo_calc_id_model, skip_none=True, code=200, description='Entry resolved')
Markus Scheidgen's avatar
Markus Scheidgen committed
672
    @authenticate()
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
    def get(self, pid: str):
        if '/' in pid:
            prefix, pid = pid.split('/')
            if prefix != '21.11132':
                abort(400, 'Wrong PID format')
            try:
                pid_int = utils.decode_handle_id(pid)
            except ValueError:
                abort(400, 'Wrong PID format')
        else:
            try:
                pid_int = int(pid)
            except ValueError:
                abort(400, 'Wrong PID format')

688
689
690
691
        search_request = search.SearchRequest()

        if g.user is not None:
            search_request.owner('all', user_id=g.user.user_id)
692
        else:
693
694
            search_request.owner('all')

695
        search_request.search_parameter('pid', pid_int)
696
697
698
699
700

        results = list(search_request.execute_scan())
        total = len(results)

        if total == 0:
701
            abort(404, 'Entry with PID %s does not exist' % pid)
702
703

        if total > 1:
704
            utils.get_logger(__name__).error('Two entries for the same pid', pid=pid_int)
705
706
707
708
709

        result = results[0]
        return dict(
            upload_id=result['upload_id'],
            calc_id=result['calc_id'])