repo.py 34.8 KB
Newer Older
Markus Scheidgen's avatar
Markus Scheidgen committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
# Copyright 2018 Markus Scheidgen
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an"AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

15
'''
Markus Scheidgen's avatar
Markus Scheidgen committed
16
17
The repository API of the nomad@FAIRDI APIs. Currently allows to resolve repository
meta-data.
18
'''
Markus Scheidgen's avatar
Markus Scheidgen committed
19

20
from typing import List, Dict, Any
Markus Scheidgen's avatar
Markus Scheidgen committed
21
from flask_restplus import Resource, abort, fields
22
from flask import request, g
23
from elasticsearch_dsl import Q
24
from elasticsearch.exceptions import NotFoundError
25
import elasticsearch.helpers
Markus Scheidgen's avatar
Markus Scheidgen committed
26
from datetime import datetime
Markus Scheidgen's avatar
Markus Scheidgen committed
27

28
from nomad import search, utils, datamodel, processing as proc, infrastructure, files
29
from nomad.metainfo import search_extension
30
from nomad.datamodel import Dataset, User, EditableUserMetadata
31
from nomad.app import common
32
from nomad.app.common import RFC3339DateTime, DotKeyNested
Markus Scheidgen's avatar
Markus Scheidgen committed
33

Markus Scheidgen's avatar
Markus Scheidgen committed
34
from .api import api
35
from .auth import authenticate
36
37
from .common import search_model, calc_route, add_pagination_parameters,\
    add_scroll_parameters, add_search_parameters, apply_search_parameters,\
38
    query_api_python, query_api_curl, query_api_clientlib, _search_quantities
Markus Scheidgen's avatar
Markus Scheidgen committed
39

40
ns = api.namespace('repo', description='Access repository metadata.')
Markus Scheidgen's avatar
Markus Scheidgen committed
41
42
43
44
45


@calc_route(ns)
class RepoCalcResource(Resource):
    @api.response(404, 'The upload or calculation does not exist')
46
    @api.response(401, 'Not authorized to access the calculation')
47
    @api.response(200, 'Metadata send', fields.Raw)
48
    @api.doc('get_repo_calc')
49
    @authenticate()
50
    def get(self, upload_id, calc_id):
51
        '''
Markus Scheidgen's avatar
Markus Scheidgen committed
52
53
        Get calculation metadata in repository form.

54
        Repository metadata only entails the quantities shown in the repository.
55
        Calcs are references via *upload_id*, *calc_id* pairs.
56
        '''
Markus Scheidgen's avatar
Markus Scheidgen committed
57
        try:
58
            calc = search.entry_document.get(calc_id)
59
60
61
62
63
64
65
        except NotFoundError:
            abort(404, message='There is no calculation %s/%s' % (upload_id, calc_id))

        if calc.with_embargo or not calc.published:
            if g.user is None:
                abort(401, message='Not logged in to access %s/%s.' % (upload_id, calc_id))

66
            if not (any(g.user.user_id == user.user_id for user in calc.owners) or g.user.is_admin):
67
68
                abort(401, message='Not authorized to access %s/%s.' % (upload_id, calc_id))

69
        result = calc.to_dict()
70
71
72
73
74
        result['code'] = {
            'python': query_api_python('archive', upload_id, calc_id),
            'curl': query_api_curl('archive', upload_id, calc_id),
            'clientlib': query_api_clientlib(upload_id=[upload_id], calc_id=[calc_id])
        }
Markus Scheidgen's avatar
Markus Scheidgen committed
75

76
        return result, 200
Markus Scheidgen's avatar
Markus Scheidgen committed
77

78

79
80
81
82
83
_search_request_parser = api.parser()
add_pagination_parameters(_search_request_parser)
add_scroll_parameters(_search_request_parser)
add_search_parameters(_search_request_parser)
_search_request_parser.add_argument(
84
    'date_histogram', type=bool, help='Add an additional aggregation over the upload time')
85
86
_search_request_parser.add_argument(
    'interval', type=str, help='Interval to use for upload time aggregation.')
87
_search_request_parser.add_argument(
88
    'metrics', type=str, action='append', help=(
89
        'Metrics to aggregate over all quantities and their values as comma separated list. '
90
        'Possible values are %s.' % ', '.join(search_extension.metrics.keys())))
91
_search_request_parser.add_argument(
92
93
    'statistics', type=str, action='append', help=(
        'Quantities for which to aggregate values and their metrics.'))
94
95
_search_request_parser.add_argument(
    'exclude', type=str, action='split', help='Excludes the given keys in the returned data.')
96
for group_name in search_extension.groups:
97
    _search_request_parser.add_argument(
98
        group_name, type=bool, help=('Return %s group data.' % group_name))
99
    _search_request_parser.add_argument(
Markus Scheidgen's avatar
Markus Scheidgen committed
100
101
102
        '%s_after' % group_name, type=str,
        help='The last %s id of the last scroll window for the %s group' % (group_name, group_name))

103
104
105
106
107
_repo_calcs_model_fields = {
    'statistics': fields.Raw(description=(
        'A dict with all statistics. Each statistic is dictionary with a metrics dict as '
        'value and quantity value as key. The possible metrics are code runs(calcs), %s. '
        'There is a pseudo quantity "total" with a single value "all" that contains the '
108
        ' metrics over all results. ' % ', '.join(search_extension.metrics.keys())))}
109

110
for group_name in search_extension.groups:
111
    _repo_calcs_model_fields[group_name] = (DotKeyNested if '.' in group_name else fields.Nested)(api.model('RepoGroup', {
112
        'after': fields.String(description='The after value that can be used to retrieve the next %s.' % group_name),
113
        'values': fields.Raw(description='A dict with %s as key. The values are dicts with "total" and "examples" keys.' % group_name)
114
    }), skip_none=True)
115

116
for qualified_name, quantity in search_extension.search_quantities.items():
117
    _repo_calcs_model_fields[qualified_name] = fields.Raw(
118
119
        description=quantity.description, allow_null=True, skip_none=True)

120
_repo_calcs_model = api.inherit('RepoCalculations', search_model, _repo_calcs_model_fields)
121
122


Markus Scheidgen's avatar
Markus Scheidgen committed
123
124
@ns.route('/')
class RepoCalcsResource(Resource):
125
    @api.doc('search')
126
    @api.response(400, 'Invalid requests, e.g. wrong owner type or bad search parameters')
127
128
    @api.expect(_search_request_parser, validate=True)
    @api.marshal_with(_repo_calcs_model, skip_none=True, code=200, description='Search results send')
129
    @authenticate()
Markus Scheidgen's avatar
Markus Scheidgen committed
130
    def get(self):
131
        '''
132
        Search for calculations in the repository form, paginated.
133
134

        The ``owner`` parameter determines the overall entries to search through.
135
136
137
138
        Possible values are: ``all`` (show all entries visible to the current user), ``public``
        (show all publically visible entries), ``user`` (show all user entries, requires login),
        ``staging`` (show all user entries in staging area, requires login).

139
140
141
142
143
        You can use the various quantities to search/filter for. For some of the
        indexed quantities this endpoint returns aggregation information. This means
        you will be given a list of all possible values and the number of entries
        that have the certain value. You can also use these aggregations on an empty
        search to determine the possible values.
144
145
146

        The pagination parameters allows determine which page to return via the
        ``page`` and ``per_page`` parameters. Pagination however, is limited to the first
Markus Scheidgen's avatar
Markus Scheidgen committed
147
148
149
150
151
152
153
154
        100k (depending on ES configuration) hits.

        An alternative to pagination is to use ``scroll`` and ``scroll_id``. With ``scroll``
        you will get a ``scroll_id`` on the first request. Each call with ``scroll`` and
        the respective ``scroll_id`` will return the next ``per_page`` (here the default is 1000)
        results. Scroll however, ignores ordering and does not return aggregations.
        The scroll view used in the background will stay alive for 1 minute between requests.
        If the given ``scroll_id`` is not available anymore, a HTTP 400 is raised.
155
156
157
158

        The search will return aggregations on a predefined set of quantities. Aggregations
        will tell you what quantity values exist and how many entries match those values.

159
160
        Ordering is determined by ``order_by`` and ``order`` parameters. Default is
        ``upload_time`` in decending order.
161
        '''
162
163

        try:
164
            parsed_args = _search_request_parser.parse_args()
165
            args = {
166
                key: value for key, value in parsed_args.items()
167
168
169
170
171
172
173
                if value is not None}

            scroll = args.get('scroll', False)
            scroll_id = args.get('scroll_id', None)
            page = args.get('page', 1)
            per_page = args.get('per_page', 10 if not scroll else 1000)
            order = args.get('order', -1)
174
            order_by = args.get('order_by', 'upload_time')
175
176

            date_histogram = args.get('date_histogram', False)
177
            interval = args.get('interval', '1M')
178
            metrics: List[str] = request.args.getlist('metrics')
179
            statistics = args.get('statistics', [])
180
181
182
183
        except Exception as e:
            abort(400, message='bad parameters: %s' % str(e))

        search_request = search.SearchRequest()
184
        apply_search_parameters(search_request, args)
185
        if date_histogram:
186
            search_request.date_histogram(interval=interval)
187

188
        try:
189
            assert page >= 1
190
            assert per_page >= 0
191
192
193
        except AssertionError:
            abort(400, message='invalid pagination')

194
195
196
        if order not in [-1, 1]:
            abort(400, message='invalid pagination')

197
        for metric in metrics:
198
            if metric not in search_extension.metrics:
199
200
                abort(400, message='there is no metric %s' % metric)

201
202
        if len(statistics) > 0:
            search_request.statistics(statistics, metrics_to_use=metrics)
203

204
205
206
207
208
209
        group_metrics = [
            group_quantity.metric_name
            for group_name, group_quantity in search_extension.groups.items()
            if args.get(group_name, False)]
        total_metrics = metrics + group_metrics
        if len(total_metrics) > 0:
Markus Scheidgen's avatar
Markus Scheidgen committed
210
            search_request.totals(metrics_to_use=total_metrics)
211

212
        if 'exclude' in parsed_args:
213
214
215
            excludes = parsed_args['exclude']
            if excludes is not None:
                search_request.exclude(*excludes)
216

217
        try:
218
            if scroll:
219
                results = search_request.execute_scrolled(scroll_id=scroll_id, size=per_page)
220

221
            else:
222
                for group_name, group_quantity in search_extension.groups.items():
223
                    if args.get(group_name, False):
224
                        kwargs: Dict[str, Any] = {}
225
                        if group_name == 'group_uploads':
226
                            kwargs.update(order_by='upload_time', order='desc')
Markus Scheidgen's avatar
Markus Scheidgen committed
227
                        search_request.quantity(
228
                            group_quantity.qualified_name, size=per_page, examples=1,
229
230
                            after=request.args.get('%s_after' % group_name, None),
                            **kwargs)
231

232
233
                results = search_request.execute_paginated(
                    per_page=per_page, page=page, order=order, order_by=order_by)
234
235

                # TODO just a work around to make things prettier
236
                if 'statistics' in results:
Markus Scheidgen's avatar
Markus Scheidgen committed
237
238
239
240
                    statistics = results['statistics']
                    if 'code_name' in statistics and 'currupted mainfile' in statistics['code_name']:
                        del(statistics['code_name']['currupted mainfile'])

241
242
243
                if 'quantities' in results:
                    quantities = results.pop('quantities')

244
                for group_name, group_quantity in search_extension.groups.items():
245
                    if args.get(group_name, False):
246
                        results[group_name] = quantities[group_quantity.qualified_name]
247

248
            # build python code/curl snippet
249
250
251
            code_args = dict(request.args)
            if 'statistics' in code_args:
                del(code_args['statistics'])
252
253
254
255
256
            results['code'] = {
                'curl': query_api_curl('archive', 'query', query_string=code_args),
                'python': query_api_python('archive', 'query', query_string=code_args),
                'clientlib': query_api_clientlib(**code_args)
            }
257

258
            return results, 200
Markus Scheidgen's avatar
Markus Scheidgen committed
259
260
        except search.ScrollIdNotFound:
            abort(400, 'The given scroll_id does not exist.')
261
        except KeyError as e:
262
263
            import traceback
            traceback.print_exc()
264
            abort(400, str(e))
265

266

267
_query_model_parameters = {
268
269
270
271
272
    'owner': fields.String(description='Specify which calcs to return: ``all``, ``public``, ``user``, ``staging``, default is ``all``'),
    'from_time': RFC3339DateTime(description='A yyyy-MM-ddTHH:mm:ss (RFC3339) minimum entry time (e.g. upload time)'),
    'until_time': RFC3339DateTime(description='A yyyy-MM-ddTHH:mm:ss (RFC3339) maximum entry time (e.g. upload time)')
}

273
for qualified_name, quantity in search.search_quantities.items():
274
    if quantity.many_and == 'append' or quantity.many_or == 'append':
275
276
277
278
        def field(**kwargs):
            return fields.List(fields.String(**kwargs))
    else:
        field = fields.String
279
    _query_model_parameters[qualified_name] = field(description=quantity.description)
280

281
_repo_query_model = api.model('RepoQuery', _query_model_parameters, skip_none=True)
282
283
284
285


def repo_edit_action_field(quantity):
    if quantity.is_scalar:
286
        return fields.Nested(_repo_edit_action_model, description=quantity.description, skip_none=True)
287
288
    else:
        return fields.List(
289
            fields.Nested(_repo_edit_action_model, skip_none=True), description=quantity.description)
290
291


292
_repo_edit_action_model = api.model('RepoEditAction', {
293
    'value': fields.String(description='The value/values that is set as a string.'),
294
295
296
297
    'success': fields.Boolean(description='If this can/could be done. Only in API response.'),
    'message': fields.String(descriptin='A message that details the action result. Only in API response.')
})

298
_repo_edit_model = api.model('RepoEdit', {
299
    'verify': fields.Boolean(description='If true, no action is performed.'),
300
    'query': fields.Nested(_repo_query_model, skip_none=True, description='New metadata will be applied to query results.'),
301
302
303
    'actions': fields.Nested(
        api.model('RepoEditActions', {
            quantity.name: repo_edit_action_field(quantity)
304
            for quantity in EditableUserMetadata.m_def.definitions
305
        }), skip_none=True,
306
307
308
        description='Each action specifies a single value (even for multi valued quantities).'),
    'success': fields.Boolean(description='If the overall edit can/could be done. Only in API response.'),
    'message': fields.String(description='A message that details the overall edit result. Only in API response.')
309
310
})

311
312
313
_editable_quantities = {
    quantity.name: quantity for quantity in EditableUserMetadata.m_def.definitions}

314

315
def edit(parsed_query: Dict[str, Any], mongo_update: Dict[str, Any] = None, re_index=True) -> List[str]:
316
    # get all calculations that have to change
317
    with utils.timer(common.logger, 'edit query executed'):
318
        search_request = search.SearchRequest().include('calc_id', 'upload_id')
319
        apply_search_parameters(search_request, parsed_query)
320
321
        upload_ids = set()
        calc_ids = []
322

323
324
325
        for hit in search_request.execute_scan():
            calc_ids.append(hit['calc_id'])
            upload_ids.add(hit['upload_id'])
326
327

    # perform the update on the mongo db
328
    with utils.timer(common.logger, 'edit mongo update executed', size=len(calc_ids)):
329
330
331
        if mongo_update is not None:
            n_updated = proc.Calc.objects(calc_id__in=calc_ids).update(multi=True, **mongo_update)
            if n_updated != len(calc_ids):
332
                common.logger.error('edit repo did not update all entries', payload=mongo_update)
333
334

    # re-index the affected entries in elastic search
335
    with utils.timer(common.logger, 'edit elastic update executed', size=len(calc_ids)):
336
337
        if re_index:
            def elastic_updates():
338
339
                upload_files_cache: Dict[str, files.UploadFiles] = dict()

340
                for calc in proc.Calc.objects(calc_id__in=calc_ids):
341
342
343
344
345
346
347
                    upload_id = calc.upload_id
                    upload_files = upload_files_cache.get(upload_id)
                    if upload_files is None:
                        upload_files = files.UploadFiles.get(upload_id, is_authorized=lambda: True)
                        upload_files_cache[upload_id] = upload_files

                    entry_metadata = calc.entry_metadata(upload_files)
348
                    entry = entry_metadata.a_elastic.create_index_entry().to_dict(include_meta=True)
349
                    entry['_op_type'] = 'index'
350

351
352
                    yield entry

353
354
355
                for upload_files in upload_files_cache.values():
                    upload_files.close()

356
357
358
359
            _, failed = elasticsearch.helpers.bulk(
                infrastructure.elastic_client, elastic_updates(), stats_only=True)
            search.refresh()
            if failed > 0:
360
                common.logger.error(
361
362
                    'edit repo with failed elastic updates',
                    payload=mongo_update, nfailed=len(failed))
363

364
365
    return list(upload_ids)

366

367
def get_uploader_ids(query):
368
    ''' Get all the uploader from the query, to check coauthers and shared_with for uploaders. '''
369
    search_request = search.SearchRequest()
370
    apply_search_parameters(search_request, query)
371
372
373
374
    search_request.quantity(name='uploader_id')
    return search_request.execute()['quantities']['uploader_id']['values']


375
376
@ns.route('/edit')
class EditRepoCalcsResource(Resource):
377
378
    @api.doc('edit_repo')
    @api.response(400, 'Invalid requests, e.g. wrong owner type or bad search parameters')
379
380
    @api.expect(_repo_edit_model)
    @api.marshal_with(_repo_edit_model, skip_none=True, code=200, description='Edit verified/performed')
381
    @authenticate()
382
    def post(self):
383
        ''' Edit repository metadata. '''
384
385

        # basic body parsing and some semantic checks
386
387
388
389
390
391
392
393
394
395
        json_data = request.get_json()
        if json_data is None:
            json_data = {}
        query = json_data.get('query', {})

        owner = query.get('owner', 'user')
        if owner not in ['user', 'staging']:
            abort(400, 'Not a valid owner for edit %s. Edit can only be performed in user or staging' % owner)
        query['owner'] = owner

396
397
398
399
        if 'actions' not in json_data:
            abort(400, 'Missing key actions in edit data')
        actions = json_data['actions']
        verify = json_data.get('verify', False)
400

401
402
        # preparing the query of entries that are edited
        parsed_query = {}
403
404
        for quantity_name, value in query.items():
            if quantity_name in _search_quantities:
405
406
407
408
                quantity = search.search_quantities[quantity_name]
                if quantity.many:
                    if not isinstance(value, list):
                        value = value.split(',')
409
410
                parsed_query[quantity_name] = value
        parsed_query['owner'] = owner
411
        parsed_query['domain'] = query.get('domain')
412

413
        # checking the edit actions and preparing a mongo update on the fly
414
        json_data['success'] = True
415
        mongo_update = {}
416
        uploader_ids = None
417
        lift_embargo = False
418
419
        removed_datasets = None

420
        with utils.timer(common.logger, 'edit verified'):
421
            for action_quantity_name, quantity_actions in actions.items():
422
                quantity = _editable_quantities.get(action_quantity_name)
423
424
425
                if quantity is None:
                    abort(400, 'Unknown quantity %s' % action_quantity_name)

426
                quantity_flask = quantity.m_get_annotations('flask', {})
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
                if quantity_flask.get('admin_only', False):
                    if not g.user.is_admin():
                        abort(404, 'Only the admin user can set %s' % quantity.name)

                if isinstance(quantity_actions, list) == quantity.is_scalar:
                    abort(400, 'Wrong shape for quantity %s' % action_quantity_name)

                if not isinstance(quantity_actions, list):
                    quantity_actions = [quantity_actions]

                flask_verify = quantity_flask.get('verify', None)
                mongo_key = 'metadata__%s' % quantity.name
                has_error = False
                for action in quantity_actions:
                    action['success'] = True
                    action['message'] = None
                    action_value = action.get('value')
                    action_value = action_value if action_value is None else action_value.strip()

                    if action_value is None:
                        mongo_value = None
448

449
                    elif action_value == '':
450
                        mongo_value = None
451
452
453
454
455

                    elif flask_verify == datamodel.User:
                        try:
                            mongo_value = User.get(user_id=action_value).user_id
                        except KeyError:
456
457
                            action['success'] = False
                            has_error = True
458
                            action['message'] = 'User does not exist'
459
                            continue
460

461
462
463
464
465
466
467
468
469
470
                        if uploader_ids is None:
                            uploader_ids = get_uploader_ids(parsed_query)
                        if action_value in uploader_ids:
                            action['success'] = False
                            has_error = True
                            action['message'] = 'This user is already an uploader of one entry in the query'
                            continue

                    elif flask_verify == datamodel.Dataset:
                        try:
471
                            mongo_value = Dataset.m_def.a_mongo.get(
472
473
474
475
476
477
478
                                user_id=g.user.user_id, name=action_value).dataset_id
                        except KeyError:
                            action['message'] = 'Dataset does not exist and will be created'
                            mongo_value = None
                            if not verify:
                                dataset = Dataset(
                                    dataset_id=utils.create_uuid(), user_id=g.user.user_id,
Markus Scheidgen's avatar
Markus Scheidgen committed
479
                                    name=action_value, created=datetime.utcnow())
480
                                dataset.a_mongo.create()
481
482
483
484
485
486
487
488
489
                                mongo_value = dataset.dataset_id

                    elif action_quantity_name == 'with_embargo':
                        # ignore the actual value ... just lift the embargo
                        mongo_value = False
                        lift_embargo = True

                        # check if necessary
                        search_request = search.SearchRequest()
490
                        apply_search_parameters(search_request, parsed_query)
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
                        search_request.q = search_request.q & Q('term', with_embargo=True)
                        if search_request.execute()['total'] == 0:
                            action['success'] = False
                            has_error = True
                            action['message'] = 'There is no embargo to lift'
                            continue
                    else:
                        mongo_value = action_value

                    if len(quantity.shape) == 0:
                        mongo_update[mongo_key] = mongo_value
                    else:
                        mongo_values = mongo_update.setdefault(mongo_key, [])
                        if mongo_value is not None:
                            if mongo_value in mongo_values:
                                action['success'] = False
                                has_error = True
                                action['message'] = 'Duplicate values are not allowed'
                                continue
                            mongo_values.append(mongo_value)

                if len(quantity_actions) == 0 and len(quantity.shape) > 0:
                    mongo_update[mongo_key] = []

                if action_quantity_name == 'datasets':
                    # check if datasets edit is allowed and if datasets have to be removed
                    search_request = search.SearchRequest()
518
                    apply_search_parameters(search_request, parsed_query)
519
520
521
522
523
524
525
526
527
                    search_request.quantity(name='dataset_id')
                    old_datasets = list(
                        search_request.execute()['quantities']['dataset_id']['values'].keys())

                    removed_datasets = []
                    for dataset_id in old_datasets:
                        if dataset_id not in mongo_update.get(mongo_key, []):
                            removed_datasets.append(dataset_id)

528
                    doi_ds = Dataset.m_def.a_mongo.objects(
529
530
531
532
533
534
                        dataset_id__in=removed_datasets, doi__ne=None).first()
                    if doi_ds is not None:
                        json_data['success'] = False
                        json_data['message'] = json_data.get('message', '') + \
                            'Edit would remove entries from a dataset with DOI (%s) ' % doi_ds.name
                        has_error = True
535

536
537
538
539
540
541
542
543
        # stop here, if client just wants to verify its actions
        if verify:
            return json_data, 200

        # stop if the action were not ok
        if has_error:
            return json_data, 400

544
        # perform the change
Markus Scheidgen's avatar
Markus Scheidgen committed
545
        mongo_update['metadata__last_edit'] = datetime.utcnow()
546
        upload_ids = edit(parsed_query, mongo_update, True)
547
548
549
550
551
552

        # lift embargo
        if lift_embargo:
            for upload_id in upload_ids:
                upload = proc.Upload.get(upload_id)
                upload.re_pack()
553

554
        # remove potentially empty old datasets
555
        if removed_datasets is not None:
556
            for dataset in removed_datasets:
557
                if proc.Calc.objects(metadata__datasets=dataset).first() is None:
558
                    Dataset.m_def.a_mongo.objects(dataset_id=dataset).delete()
559

560
        return json_data, 200
561

562

563
564
565
566
567
568
_repo_quantity_search_request_parser = api.parser()
add_search_parameters(_repo_quantity_search_request_parser)
_repo_quantity_search_request_parser.add_argument(
    'after', type=str, help='The after value to use for "scrolling".')
_repo_quantity_search_request_parser.add_argument(
    'size', type=int, help='The max size of the returned values.')
569
570
_repo_quantity_search_request_parser.add_argument(
    'value', type=str, help='A partial value. Only values that include this will be returned')
571
572

_repo_quantity_model = api.model('RepoQuantity', {
573
574
575
576
    'after': fields.String(description='The after value that can be used to retrieve the next set of values.'),
    'values': fields.Raw(description='A dict with values as key. Values are dicts with "total" and "examples" keys.')
})

577
578
_repo_quantity_values_model = api.model('RepoQuantityValues', {
    'quantity': fields.Nested(_repo_quantity_model, allow_null=True)
579
580
})

581

582
@ns.route('/quantity/<string:quantity>')
583
584
585
class RepoQuantityResource(Resource):
    @api.doc('quantity_search')
    @api.response(400, 'Invalid requests, e.g. wrong owner type, bad quantity, bad search parameters')
586
587
    @api.expect(_repo_quantity_search_request_parser, validate=True)
    @api.marshal_with(_repo_quantity_values_model, skip_none=True, code=200, description='Search results send')
588
    @authenticate()
589
    def get(self, quantity: str):
590
        '''
591
592
593
594
595
596
597
598
599
600
601
602
        Retrieve quantity values from entries matching the search.

        You can use the various quantities to search/filter for. For some of the
        indexed quantities this endpoint returns aggregation information. This means
        you will be given a list of all possible values and the number of entries
        that have the certain value. You can also use these aggregations on an empty
        search to determine the possible values.

        There is no ordering and no pagination. Instead there is an 'after' key based
        scrolling. The result will contain an 'after' value, that can be specified
        for the next request. You can use the 'size' and 'after' parameters accordingly.

603
604
605
        The result will contain a 'quantity' key with quantity values and the "after"
        value. There will be upto 'size' many values. For the rest of the values use the
        "after" parameter in another request.
606
        '''
607

608
        search_request = search.SearchRequest()
609
610
        args = {
            key: value
611
            for key, value in _repo_quantity_search_request_parser.parse_args().items()
612
            if value is not None}
613

614
        apply_search_parameters(search_request, args)
615
616
        after = args.get('after', None)
        size = args.get('size', 100)
617
618
619
620
621
622
623

        try:
            assert size >= 0
        except AssertionError:
            abort(400, message='invalid size')

        try:
624
            search_request.quantity(quantity, size=size, after=after)
625
626
627
            results = search_request.execute()
            quantities = results.pop('quantities')
            results['quantity'] = quantities[quantity]
628
629
630
631
632
633

            return results, 200
        except KeyError as e:
            import traceback
            traceback.print_exc()
            abort(400, 'Given quantity does not exist: %s' % str(e))
634
635


636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
_repo_suggestions_search_request_parser = api.parser()
add_search_parameters(_repo_suggestions_search_request_parser)
_repo_suggestions_search_request_parser.add_argument(
    'size', type=int, help='The max size of the returned values.')
_repo_suggestions_search_request_parser.add_argument(
    'include', type=str, help='A substring that all values need to include.')

_repo_suggestions_model = api.model('RepoSuggestionsValues', {
    'suggestions': fields.List(fields.String, description='A list with the suggested values.')
})


@ns.route('/suggestions/<string:quantity>')
class RepoSuggestionsResource(Resource):
    @api.doc('suggestions_search')
    @api.response(400, 'Invalid requests, e.g. wrong owner type, bad quantity, bad search parameters')
    @api.expect(_repo_suggestions_search_request_parser, validate=True)
    @api.marshal_with(_repo_suggestions_model, skip_none=True, code=200, description='Suggestions send')
    @authenticate()
    def get(self, quantity: str):
        '''
        Retrieve the top values for the given quantity from entries matching the search.
        Values can be filtered by to include a given value.

        There is no ordering, no pagination, and no scroll interface.

        The result will contain a 'suggestions' key with values. There will be upto 'size' many values.
        '''

        search_request = search.SearchRequest()
        args = {
            key: value
            for key, value in _repo_suggestions_search_request_parser.parse_args().items()
            if value is not None}

        apply_search_parameters(search_request, args)
        size = args.get('size', 20)
        include = args.get('include', None)

        try:
            assert size >= 0
        except AssertionError:
            abort(400, message='invalid size')

        try:
Markus Scheidgen's avatar
Markus Scheidgen committed
681
            search_request.statistic(quantity, size=size, include=include, order=dict(_key='desc'))
682
            results = search_request.execute()
Markus Scheidgen's avatar
Markus Scheidgen committed
683
684
685
686
            values = {
                value: metric['code_runs']
                for value, metric in results['statistics'][quantity].items()
                if metric['code_runs'] > 0}
Markus Scheidgen's avatar
Markus Scheidgen committed
687
            results['suggestions'] = sorted(
Markus Scheidgen's avatar
Markus Scheidgen committed
688
                values.keys(), key=lambda value: values[value], reverse=True)
689
690
691
692
693
694
695
696

            return results, 200
        except KeyError as e:
            import traceback
            traceback.print_exc()
            abort(400, 'Given quantity does not exist: %s' % str(e))


697
698
699
_repo_quantities_search_request_parser = api.parser()
add_search_parameters(_repo_quantities_search_request_parser)
_repo_quantities_search_request_parser.add_argument(
700
701
    'quantities', type=str, action='append',
    help='The quantities to retrieve values from')
702
_repo_quantities_search_request_parser.add_argument(
703
704
    'size', type=int, help='The max size of the returned values.')

705
706
707
708
709
_repo_quantities_model = api.model('RepoQuantitiesResponse', {
    'quantities': fields.Nested(api.model('RepoQuantities', {
        quantity: fields.List(fields.Nested(_repo_quantity_model))
        for quantity in search_extension.search_quantities
    }))
710
711
})

712
713
714
715
716

@ns.route('/quantities')
class RepoQuantitiesResource(Resource):
    @api.doc('quantities_search')
    @api.response(400, 'Invalid requests, e.g. wrong owner type, bad quantity, bad search parameters')
717
    @api.expect(_repo_quantities_search_request_parser, validate=True)
Markus Scheidgen's avatar
Markus Scheidgen committed
718
    @api.marshal_with(_repo_quantities_model, skip_none=True, code=200, description='Search results send')
719
720
    @authenticate()
    def get(self):
721
        '''
722
723
724
725
726
727
728
729
730
731
732
733
734
        Retrieve quantity values for multiple quantities at once.

        You can use the various quantities to search/filter for. For some of the
        indexed quantities this endpoint returns aggregation information. This means
        you will be given a list of all possible values and the number of entries
        that have the certain value. You can also use these aggregations on an empty
        search to determine the possible values.

        There is no ordering and no pagination and not after key based scrolling. Instead
        there is an 'after' key based scrolling.

        The result will contain a 'quantities' key with a dict of quantity names and the
        retrieved values as values.
735
        '''
736
737
738
739

        search_request = search.SearchRequest()
        args = {
            key: value
740
            for key, value in _repo_quantities_search_request_parser.parse_args().items()
741
742
            if value is not None}

743
        apply_search_parameters(search_request, args)
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
        quantities = args.get('quantities', [])
        size = args.get('size', 5)

        try:
            assert size >= 0
        except AssertionError:
            abort(400, message='invalid size')

        for quantity in quantities:
            try:
                search_request.quantity(quantity, size=size)
            except KeyError as e:
                import traceback
                traceback.print_exc()
                abort(400, 'Given quantity does not exist: %s' % str(e))

        return search_request.execute(), 200


Markus Scheidgen's avatar
Markus Scheidgen committed
763
_repo_calc_id_model = api.model('RepoCalculationId', {
764
765
766
767
    'upload_id': fields.String(), 'calc_id': fields.String()
})


768
@ns.route('/pid/<path:pid>')
769
770
771
class RepoPidResource(Resource):
    @api.doc('resolve_pid')
    @api.response(404, 'Entry with PID does not exist')
Markus Scheidgen's avatar
Markus Scheidgen committed
772
    @api.marshal_with(_repo_calc_id_model, skip_none=True, code=200, description='Entry resolved')
Markus Scheidgen's avatar
Markus Scheidgen committed
773
    @authenticate()
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
    def get(self, pid: str):
        if '/' in pid:
            prefix, pid = pid.split('/')
            if prefix != '21.11132':
                abort(400, 'Wrong PID format')
            try:
                pid_int = utils.decode_handle_id(pid)
            except ValueError:
                abort(400, 'Wrong PID format')
        else:
            try:
                pid_int = int(pid)
            except ValueError:
                abort(400, 'Wrong PID format')

789
        search_request = search.SearchRequest().include('upload_id', 'calc_id')
790
791
792

        if g.user is not None:
            search_request.owner('all', user_id=g.user.user_id)
793
        else:
794
795
            search_request.owner('all')

796
        search_request.search_parameter('pid', pid_int)
797
798
799
800
801

        results = list(search_request.execute_scan())
        total = len(results)

        if total == 0:
802
            abort(404, 'Entry with PID %s does not exist' % pid)
803
804

        if total > 1:
805
            common.logger.error('Two entries for the same pid', pid=pid_int)
806
807
808
809
810

        result = results[0]
        return dict(
            upload_id=result['upload_id'],
            calc_id=result['calc_id'])