test_api.py 46.5 KB
Newer Older
Markus Scheidgen's avatar
Markus Scheidgen committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
# Copyright 2018 Markus Scheidgen
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an"AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

15
from typing import Any
16
17
18
import pytest
import time
import json
19
20
import zipfile
import io
21
import inspect
Markus Scheidgen's avatar
Markus Scheidgen committed
22
import datetime
23
import os.path
24
from urllib.parse import urlencode
25
import base64
26

Markus Scheidgen's avatar
Markus Scheidgen committed
27
from nomad.api.app import rfc3339DateTime
28
from nomad.api.auth import generate_upload_token
29
from nomad import search, parsing, files, config, utils, infrastructure
30
31
from nomad.files import UploadFiles, PublicUploadFiles
from nomad.processing import Upload, Calc, SUCCESS
32
from nomad.datamodel import UploadWithMetadata, CalcWithMetadata, User
33

34
from tests.conftest import create_auth_headers, clear_elastic
35
from tests.test_files import example_file, example_file_mainfile, example_file_contents
36
from tests.test_files import create_staging_upload, create_public_upload, assert_upload_files
37
from tests.test_search import assert_search_upload
38
39


40
41
logger = utils.get_logger(__name__)

Markus Scheidgen's avatar
Markus Scheidgen committed
42

43
44
45
46
47
def test_alive(client):
    rv = client.get('/alive')
    assert rv.status_code == 200


48
49
@pytest.fixture(scope='function')
def test_user_signature_token(client, test_user_auth):
50
    rv = client.get('/auth/', headers=test_user_auth)
51
    assert rv.status_code == 200
52
    return json.loads(rv.data)['signature_token']
53
54


55
56
57
58
59
60
61
62
def get_upload_with_metadata(upload: dict) -> UploadWithMetadata:
    """ Create a :class:`UploadWithMetadata` from a API upload json record. """
    return UploadWithMetadata(
        upload_id=upload['upload_id'], calcs=[
            CalcWithMetadata(calc_id=calc['calc_id'], mainfile=calc['mainfile'])
            for calc in upload['calcs']['results']])


63
64
65
class TestInfo:
    def test_info(self, client):
        rv = client.get('/info/')
66
67
68
69
        data = json.loads(rv.data)
        assert 'codes' in data
        assert 'parsers' in data
        assert len(data['parsers']) >= len(data['codes'])
70
71
        assert rv.status_code == 200

72

73
class TestKeycloak:
74
75
76
77
    def test_auth_wo_credentials(self, client, keycloak, no_warn):
        rv = client.get('/auth/')
        assert rv.status_code == 401

78
79
80
81
    @pytest.fixture(scope='function')
    def auth_headers(self, client, keycloak):
        basic_auth = base64.standard_b64encode(b'sheldon.cooper@nomad-coe.eu:password')
        rv = client.get('/auth/', headers=dict(Authorization='Basic %s' % basic_auth.decode('utf-8')))
82
        assert rv.status_code == 200
83
84
85
86
87
88
89
90
91
92
93
94
        auth = json.loads(rv.data)
        assert 'access_token' in auth
        assert auth['access_token'] is not None
        return dict(Authorization='Bearer %s' % auth['access_token'])

    def test_auth_with_password(self, client, auth_headers):
        pass

    def test_auth_with_access_token(self, client, auth_headers):
        rv = client.get('/auth/', headers=auth_headers)
        assert rv.status_code == 200

95
96
97
98
99
100
101
102
103
104
    def test_get_user(self, keycloak):
        user = infrastructure.keycloak.get_user(email='sheldon.cooper@nomad-coe.eu')
        assert user.email is not None
        assert user.name == 'Sheldon Cooper'
        assert user.first_name == 'Sheldon'
        assert user.last_name == 'Cooper'
        assert user.created is not None
        assert user.affiliation is not None
        assert user.affiliation_address is not None

105

106
107
108
109
class TestAuth:
    def test_auth_wo_credentials(self, client, no_warn):
        rv = client.get('/auth/')
        assert rv.status_code == 401
110

111
112
113
114
    def test_auth_with_token(self, client, test_user_auth):
        rv = client.get('/auth/', headers=test_user_auth)
        assert rv.status_code == 200
        self.assert_auth(client, json.loads(rv.data))
115

116
    def assert_auth(self, client, auth):
117
        assert 'user' not in auth
118
119
120
        assert 'access_token' in auth
        assert 'upload_token' in auth
        assert 'signature_token' in auth
121

122
123
124
    def test_signature_token(self, test_user_signature_token, no_warn):
        assert test_user_signature_token is not None

125
126
127
128
129

class TestUploads:

    def assert_uploads(self, upload_json_str, count=0, **kwargs):
        data = json.loads(upload_json_str)
130
131
132
133
        assert 'pagination' in data
        assert 'page' in data['pagination']

        data = data['results']
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
        assert isinstance(data, list)
        assert len(data) == count

        if count > 0:
            self.assert_upload(json.dumps(data[0]), **kwargs)

    def assert_upload(self, upload_json_str, id=None, **kwargs):
        data = json.loads(upload_json_str)
        assert 'upload_id' in data
        if id is not None:
            assert id == data['upload_id']
        assert 'create_time' in data

        for key, value in kwargs.items():
            assert data.get(key, None) == value

        return data

    def assert_processing(self, client, test_user_auth, upload_id):
        upload_endpoint = '/uploads/%s' % upload_id

        # poll until completed
156
        upload = self.block_until_completed(client, upload_id, test_user_auth)
157
158

        assert len(upload['tasks']) == 4
159
        assert upload['tasks_status'] == SUCCESS
160
        assert upload['current_task'] == 'cleanup'
161
        assert not upload['process_running']
162

163
164
        calcs = upload['calcs']['results']
        for calc in calcs:
165
            assert calc['tasks_status'] == SUCCESS
166
167
            assert calc['current_task'] == 'archiving'
            assert len(calc['tasks']) == 3
168
            assert client.get('/archive/logs/%s/%s' % (calc['upload_id'], calc['calc_id']), headers=test_user_auth).status_code == 200
169
170

        if upload['calcs']['pagination']['total'] > 1:
171
            rv = client.get('%s?page=2&per_page=1&order_by=tasks_status' % upload_endpoint, headers=test_user_auth)
172
173
174
175
            assert rv.status_code == 200
            upload = self.assert_upload(rv.data)
            assert len(upload['calcs']['results']) == 1

176
177
        upload_with_metadata = get_upload_with_metadata(upload)
        assert_upload_files(upload_with_metadata, files.StagingUploadFiles)
178
        assert_search_upload(upload_with_metadata, additional_keys=['atoms', 'system'])
179

180
    def assert_published(self, client, test_user_auth, upload_id, proc_infra, metadata={}):
181
182
        rv = client.get('/uploads/%s' % upload_id, headers=test_user_auth)
        upload = self.assert_upload(rv.data)
183
184

        upload_with_metadata = get_upload_with_metadata(upload)
185

186
187
188
        rv = client.post(
            '/uploads/%s' % upload_id,
            headers=test_user_auth,
189
            data=json.dumps(dict(operation='publish', metadata=metadata)),
190
            content_type='application/json')
191
        assert rv.status_code == 200
192
        upload = self.assert_upload(rv.data)
193
        assert upload['current_process'] == 'publish_upload'
194
        assert upload['process_running']
195

196
        additional_keys = ['with_embargo']
197

198
        self.block_until_completed(client, upload_id, test_user_auth)
199

200
201
202
        upload_proc = Upload.objects(upload_id=upload_id).first()
        assert upload_proc is not None
        assert upload_proc.published is True
203
        upload_with_metadata = upload_proc.to_upload_with_metadata()
204

205
        assert_upload_files(upload_with_metadata, files.PublicUploadFiles, published=True)
206
207
208
        assert_search_upload(upload_with_metadata, additional_keys=additional_keys, published=True)

    def block_until_completed(self, client, upload_id: str, test_user_auth):
209
210
211
212
213
        while True:
            time.sleep(0.1)
            rv = client.get('/uploads/%s' % upload_id, headers=test_user_auth)
            if rv.status_code == 200:
                upload = self.assert_upload(rv.data)
214
215
                if not upload['process_running'] and not upload['tasks_running']:
                    return upload
216
            elif rv.status_code == 404:
217
                return None
218
219
220
221
            else:
                raise Exception(
                    'unexpected status code while blocking for upload processing: %s' %
                    str(rv.status_code))
222
223
224

    def assert_upload_does_not_exist(self, client, upload_id: str, test_user_auth):
        self.block_until_completed(client, upload_id, test_user_auth)
225

226
227
228
229
230
231
        rv = client.get('/uploads/%s' % upload_id, headers=test_user_auth)
        assert rv.status_code == 404
        assert Upload.objects(upload_id=upload_id).first() is None
        assert Calc.objects(upload_id=upload_id).count() is 0
        upload_files = UploadFiles.get(upload_id)
        assert upload_files is None or isinstance(upload_files, PublicUploadFiles)
Markus Scheidgen's avatar
Markus Scheidgen committed
232

233
234
235
236
237
238
239
    def test_get_command(self, client, test_user_auth, no_warn):
        rv = client.get('/uploads/command', headers=test_user_auth)
        assert rv.status_code == 200
        data = json.loads(rv.data)
        assert 'upload_command' in data
        assert 'upload_url' in data

240
241
    def test_get_empty(self, client, test_user_auth, no_warn):
        rv = client.get('/uploads/', headers=test_user_auth)
Markus Scheidgen's avatar
Markus Scheidgen committed
242

243
244
        assert rv.status_code == 200
        self.assert_uploads(rv.data, count=0)
Markus Scheidgen's avatar
Markus Scheidgen committed
245

246
247
248
    def test_get_not_existing(self, client, test_user_auth, no_warn):
        rv = client.get('/uploads/123456789012123456789012', headers=test_user_auth)
        assert rv.status_code == 404
249

250
    def test_put_upload_token(self, client, non_empty_example_upload, test_user):
251
252
253
254
        url = '/uploads/?token=%s&local_path=%s&name=test_upload' % (
            generate_upload_token(test_user), non_empty_example_upload)
        rv = client.put(url)
        assert rv.status_code == 200
255
        assert 'Thanks for uploading' in rv.data.decode('utf-8')
256

257
258
    @pytest.mark.parametrize('mode', ['multipart', 'stream', 'local_path'])
    @pytest.mark.parametrize('name', [None, 'test_name'])
Markus Scheidgen's avatar
Markus Scheidgen committed
259
    def test_put(self, client, test_user_auth, proc_infra, example_upload, mode, name, no_warn):
260
        file = example_upload
261
262
263
264
265
266
267
        if name:
            url = '/uploads/?name=%s' % name
        else:
            url = '/uploads/'

        if mode == 'multipart':
            rv = client.put(
268
269
270
                url, data=dict(file=(open(file, 'rb'), 'the_name')), headers=test_user_auth)
            if not name:
                name = 'the_name'
271
272
273
274
275
276
277
278
279
        elif mode == 'stream':
            with open(file, 'rb') as f:
                rv = client.put(url, data=f.read(), headers=test_user_auth)
        elif mode == 'local_path':
            url += '&' if name else '?'
            url += 'local_path=%s' % file
            rv = client.put(url, headers=test_user_auth)
        else:
            assert False
280

281
282
        assert rv.status_code == 200
        if mode == 'local_path':
283
            upload = self.assert_upload(rv.data, upload_path=file, name=name)
284
285
        else:
            upload = self.assert_upload(rv.data, name=name)
286
        assert upload['tasks_running']
287

288
        self.assert_processing(client, test_user_auth, upload['upload_id'])
289

290
291
292
293
294
295
296
297
    def test_upload_limit(self, client, mongo, test_user, test_user_auth, proc_infra):
        for _ in range(0, config.services.upload_limit):
            Upload.create(user=test_user)
        file = example_file
        rv = client.put('/uploads/?local_path=%s' % file, headers=test_user_auth)
        assert rv.status_code == 400
        assert Upload.user_uploads(test_user).count() == config.services.upload_limit

298
299
300
    def test_delete_not_existing(self, client, test_user_auth, no_warn):
        rv = client.delete('/uploads/123456789012123456789012', headers=test_user_auth)
        assert rv.status_code == 404
301

302
303
304
305
306
307
308
309
310
311
312
313
    @pytest.fixture(scope='function')
    def slow_processing(self, monkeypatch):
        old_cleanup = Upload.cleanup

        def slow_cleanup(self):
            time.sleep(0.5)
            old_cleanup(self)

        monkeypatch.setattr('nomad.processing.data.Upload.cleanup', slow_cleanup)
        yield True
        monkeypatch.setattr('nomad.processing.data.Upload.cleanup', old_cleanup)

314
    def test_delete_published(self, client, test_user_auth, proc_infra, no_warn):
315
316
317
        rv = client.put('/uploads/?local_path=%s' % example_file, headers=test_user_auth)
        upload = self.assert_upload(rv.data)
        self.assert_processing(client, test_user_auth, upload['upload_id'])
318
        self.assert_published(client, test_user_auth, upload['upload_id'], proc_infra)
319
        rv = client.delete('/uploads/%s' % upload['upload_id'], headers=test_user_auth)
320
        assert rv.status_code == 400
321

Markus Scheidgen's avatar
Markus Scheidgen committed
322
    def test_delete(self, client, test_user_auth, proc_infra, no_warn):
323
324
325
326
327
        rv = client.put('/uploads/?local_path=%s' % example_file, headers=test_user_auth)
        upload = self.assert_upload(rv.data)
        self.assert_processing(client, test_user_auth, upload['upload_id'])
        rv = client.delete('/uploads/%s' % upload['upload_id'], headers=test_user_auth)
        assert rv.status_code == 200
328
        self.assert_upload_does_not_exist(client, upload['upload_id'], test_user_auth)
329

330
331
332
333
334
335
336
337
338
339
340
    def test_post_empty(self, client, test_user_auth, empty_upload, proc_infra, no_warn):
        rv = client.put('/uploads/?local_path=%s' % empty_upload, headers=test_user_auth)
        assert rv.status_code == 200
        upload = self.assert_upload(rv.data)
        self.assert_processing(client, test_user_auth, upload['upload_id'])
        rv = client.post(
            '/uploads/%s' % upload['upload_id'], headers=test_user_auth,
            data=json.dumps(dict(operation='publish')),
            content_type='application/json')
        assert rv.status_code == 400

341
    def test_post(self, client, test_user_auth, non_empty_example_upload, proc_infra, no_warn):
342
343
        rv = client.put('/uploads/?local_path=%s' % non_empty_example_upload, headers=test_user_auth)
        assert rv.status_code == 200
344
345
        upload = self.assert_upload(rv.data)
        self.assert_processing(client, test_user_auth, upload['upload_id'])
346
        self.assert_published(client, test_user_auth, upload['upload_id'], proc_infra)
347

348
349
350
        # still visible
        assert client.get('/uploads/%s' % upload['upload_id'], headers=test_user_auth).status_code == 200
        # still listed with all=True
351
        rv = client.get('/uploads/?state=all', headers=test_user_auth)
352
        assert rv.status_code == 200
353
        data = json.loads(rv.data)['results']
354
355
356
357
358
        assert len(data) > 0
        assert any(item['upload_id'] == upload['upload_id'] for item in data)
        # not listed with all=False
        rv = client.get('/uploads/', headers=test_user_auth)
        assert rv.status_code == 200
359
        data = json.loads(rv.data)['results']
360
361
        assert not any(item['upload_id'] == upload['upload_id'] for item in data)

362
363
    def test_post_metadata(
            self, client, proc_infra, admin_user_auth, test_user_auth, test_user,
364
            other_test_user, no_warn, example_user_metadata):
365
366
367
        rv = client.put('/uploads/?local_path=%s' % example_file, headers=test_user_auth)
        upload = self.assert_upload(rv.data)
        self.assert_processing(client, test_user_auth, upload['upload_id'])
368
        metadata = dict(**example_user_metadata)
369
        metadata['_upload_time'] = datetime.datetime.utcnow().isoformat()
370
        self.assert_published(client, admin_user_auth, upload['upload_id'], proc_infra, metadata)
371

Markus Scheidgen's avatar
Markus Scheidgen committed
372
    def test_post_metadata_forbidden(self, client, proc_infra, test_user_auth, no_warn):
373
374
375
376
377
378
        rv = client.put('/uploads/?local_path=%s' % example_file, headers=test_user_auth)
        upload = self.assert_upload(rv.data)
        self.assert_processing(client, test_user_auth, upload['upload_id'])
        rv = client.post(
            '/uploads/%s' % upload['upload_id'],
            headers=test_user_auth,
379
            data=json.dumps(dict(operation='publish', metadata=dict(_pid=256))),
380
381
382
            content_type='application/json')
        assert rv.status_code == 401

383
384
385
386
387
388
389
    def test_post_metadata_and_republish(
            self, client, proc_infra, admin_user_auth, test_user_auth, test_user,
            other_test_user, no_warn, example_user_metadata):
        rv = client.put('/uploads/?local_path=%s' % example_file, headers=test_user_auth)
        upload = self.assert_upload(rv.data)
        self.assert_processing(client, test_user_auth, upload['upload_id'])
        metadata = dict(**example_user_metadata)
390
        metadata['_upload_time'] = datetime.datetime.utcnow().isoformat()
391
        self.assert_published(client, admin_user_auth, upload['upload_id'], proc_infra, metadata)
392
        self.assert_published(client, admin_user_auth, upload['upload_id'], proc_infra, {})
393

394
395
396
397
398
399
400
401
402
403
404
405
406
407
    def test_post_re_process(self, client, published, test_user_auth, monkeypatch):
        monkeypatch.setattr('nomad.config.version', 're_process_test_version')
        monkeypatch.setattr('nomad.config.commit', 're_process_test_commit')

        upload_id = published.upload_id
        rv = client.post(
            '/uploads/%s' % upload_id,
            headers=test_user_auth,
            data=json.dumps(dict(operation='re-process')),
            content_type='application/json')

        assert rv.status_code == 200
        assert self.block_until_completed(client, upload_id, test_user_auth) is not None

408
    # TODO validate metadata (or all input models in API for that matter)
409
    # def test_post_bad_metadata(self, client, proc_infra, test_user_auth):
410
411
412
413
414
415
    #     rv = client.put('/uploads/?local_path=%s' % example_file, headers=test_user_auth)
    #     upload = self.assert_upload(rv.data)
    #     self.assert_processing(client, test_user_auth, upload['upload_id'])
    #     rv = client.post(
    #         '/uploads/%s' % upload['upload_id'],
    #         headers=test_user_auth,
416
    #         data=json.dumps(dict(operation='publish', metadata=dict(doesnotexist='hi'))),
417
418
419
    #         content_type='application/json')
    #     assert rv.status_code == 400

420
    def test_potcar(self, client, proc_infra, test_user_auth):
421
        # only the owner, shared with people are supposed to download the original potcar file
422
423
424
425
426
427
        example_file = 'tests/data/proc/examples_potcar.zip'
        rv = client.put('/uploads/?local_path=%s' % example_file, headers=test_user_auth)

        upload = self.assert_upload(rv.data)
        upload_id = upload['upload_id']
        self.assert_processing(client, test_user_auth, upload_id)
428
        self.assert_published(client, test_user_auth, upload_id, proc_infra)
429
430
431
432
433
434
435
        rv = client.get('/raw/%s/examples_potcar/POTCAR' % upload_id)
        assert rv.status_code == 401
        rv = client.get('/raw/%s/examples_potcar/POTCAR' % upload_id, headers=test_user_auth)
        assert rv.status_code == 200
        rv = client.get('/raw/%s/examples_potcar/POTCAR.stripped' % upload_id)
        assert rv.status_code == 200

436

Markus Scheidgen's avatar
Markus Scheidgen committed
437
438
439
today = datetime.datetime.utcnow().date()


440
441
442
443
444
445
446
447
448
449
450
451
452
class UploadFilesBasedTests:

    @staticmethod
    def fix_signature(func, wrapper):
        additional_args = list(inspect.signature(func).parameters.values())[4:]
        wrapper_sig = inspect.signature(wrapper)
        wrapper_args = list(wrapper_sig.parameters.values())[:3] + additional_args
        wrapper_sig = wrapper_sig.replace(parameters=tuple(wrapper_args))
        wrapper.__signature__ = wrapper_sig

    @staticmethod
    def check_authorizaton(func):
        @pytest.mark.parametrize('test_data', [
453
454
455
456
457
458
459
460
461
462
463
            [True, None, True],      # in staging for upload
            [True, None, False],     # in staging for different user
            [True, None, None],      # in staging for guest
            [True, None, 'admin'],   # in staging, for admin
            [False, True, True],     # in public, restricted for uploader
            [False, True, False],    # in public, restricted for different user
            [False, True, None],     # in public, restricted for guest
            [False, True, 'admin'],  # in public, restricted for admin
            [False, False, True],    # in public, public, for uploader
            [False, False, False],   # in public, public, for different user
            [False, False, None]     # in public, public, for guest
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
        ], indirect=True)
        def wrapper(self, client, test_data, *args, **kwargs):
            upload, authorized, auth_headers = test_data
            try:
                func(self, client, upload, auth_headers, *args, **kwargs)
            except AssertionError as assertion:
                assertion_str = str(assertion)
                if not authorized:
                    if '0 == 5' in assertion_str and 'ZipFile' in assertion_str:
                        # the user is not authorized an gets an empty zip as expected
                        return
                    if '401' in assertion_str:
                        # the user is not authorized and gets a 401 as expected
                        return
                raise assertion

            if not authorized:
                assert False
        UploadFilesBasedTests.fix_signature(func, wrapper)
        return wrapper

    @staticmethod
    def ignore_authorization(func):
        @pytest.mark.parametrize('test_data', [
            [True, None, True],      # in staging
            [False, False, None],    # in public
        ], indirect=True)
        def wrapper(self, client, test_data, *args, **kwargs):
            upload, _, auth_headers = test_data
            func(self, client, upload, auth_headers, *args, **kwargs)
        UploadFilesBasedTests.fix_signature(func, wrapper)
        return wrapper
496

497
    @pytest.fixture(scope='function')
498
    def test_data(self, request, mongo, raw_files, no_warn, test_user, other_test_user, admin_user):
499
500
501
502
503
        # delete potential old test files
        for _ in [0, 1]:
            upload_files = UploadFiles.get('test_upload')
            if upload_files:
                upload_files.delete()
504

505
        in_staging, restricted, for_uploader = request.param
506

507
        if in_staging:
508
            authorized = for_uploader is True or for_uploader == 'admin'
509
        else:
510
            authorized = not restricted or for_uploader is True or for_uploader == 'admin'
511

512
        if for_uploader is True:
513
514
515
            auth_headers = create_auth_headers(test_user)
        elif for_uploader is False:
            auth_headers = create_auth_headers(other_test_user)
516
517
        elif for_uploader == 'admin':
            auth_headers = create_auth_headers(admin_user)
518
519
        else:
            auth_headers = None
520

521
        calc_specs = 'r' if restricted else 'p'
522
        Upload.create(user=test_user, upload_id='test_upload')
523
        if in_staging:
524
            _, upload_files = create_staging_upload('test_upload', calc_specs=calc_specs)
525
        else:
526
            _, upload_files = create_public_upload('test_upload', calc_specs=calc_specs)
527

528
        yield 'test_upload', authorized, auth_headers
529

530
        upload_files.delete()
531
532


533
534
535
536
class TestArchive(UploadFilesBasedTests):
    @UploadFilesBasedTests.check_authorizaton
    def test_get(self, client, upload, auth_headers):
        rv = client.get('/archive/%s/0' % upload, headers=auth_headers)
537
        assert rv.status_code == 200
538
        assert json.loads(rv.data) is not None
539

540
541
    @UploadFilesBasedTests.ignore_authorization
    def test_get_signed(self, client, upload, _, test_user_signature_token):
542
        rv = client.get('/archive/%s/0?signature_token=%s' % (upload, test_user_signature_token))
543
544
545
        assert rv.status_code == 200
        assert json.loads(rv.data) is not None

546
547
548
    @UploadFilesBasedTests.check_authorizaton
    def test_get_calc_proc_log(self, client, upload, auth_headers):
        rv = client.get('/archive/logs/%s/0' % upload, headers=auth_headers)
549
        assert rv.status_code == 200
550
        assert len(rv.data) > 0
551

552
553
    @UploadFilesBasedTests.ignore_authorization
    def test_get_calc_proc_log_signed(self, client, upload, _, test_user_signature_token):
554
        rv = client.get('/archive/logs/%s/0?signature_token=%s' % (upload, test_user_signature_token))
555
556
557
        assert rv.status_code == 200
        assert len(rv.data) > 0

558
559
560
    @UploadFilesBasedTests.ignore_authorization
    def test_get_non_existing_archive(self, client, upload, auth_headers):
        rv = client.get('/archive/%s' % 'doesnt/exist', headers=auth_headers)
561
        assert rv.status_code == 404
Markus Scheidgen's avatar
Markus Scheidgen committed
562

563
564
565
566
567
568
569
    @pytest.mark.parametrize('info', [
        'all.nomadmetainfo.json',
        'all.experimental.nomadmetainfo.json',
        'vasp.nomadmetainfo.json',
        'mpes.nomadmetainfo.json'])
    def test_get_metainfo(self, client, info):
        rv = client.get('/archive/metainfo/%s' % info)
570
        assert rv.status_code == 200
571
572
        metainfo = json.loads((rv.data))
        assert len(metainfo) > 0
573

Markus Scheidgen's avatar
Markus Scheidgen committed
574

575
class TestRepo():
576
577
578
    @pytest.fixture(scope='class')
    def example_elastic_calcs(
            self, elastic_infra, normalized: parsing.LocalBackend,
579
            test_user: User, other_test_user: User):
580
581
        clear_elastic(elastic_infra)

Markus Scheidgen's avatar
Markus Scheidgen committed
582
        calc_with_metadata = CalcWithMetadata(upload_id=0, calc_id=0, upload_time=today)
583
        calc_with_metadata.files = ['test/mainfile.txt']
584
        calc_with_metadata.apply_domain_metadata(normalized)
585

Markus Scheidgen's avatar
Markus Scheidgen committed
586
        calc_with_metadata.update(
587
            calc_id='1', uploader=test_user.user_id, published=True, with_embargo=False)
588
589
        search.Entry.from_calc_with_metadata(calc_with_metadata).save(refresh=True)

Markus Scheidgen's avatar
Markus Scheidgen committed
590
        calc_with_metadata.update(
591
            calc_id='2', uploader=other_test_user.user_id, published=True, with_embargo=False,
Markus Scheidgen's avatar
Markus Scheidgen committed
592
            upload_time=today - datetime.timedelta(days=5))
Markus Scheidgen's avatar
Markus Scheidgen committed
593
594
        calc_with_metadata.update(
            atoms=['Fe'], comment='this is a specific word', formula='AAA', basis_set='zzz')
595
596
        search.Entry.from_calc_with_metadata(calc_with_metadata).save(refresh=True)

Markus Scheidgen's avatar
Markus Scheidgen committed
597
        calc_with_metadata.update(
598
            calc_id='3', uploader=other_test_user.user_id, published=False, with_embargo=False)
599
600
        search.Entry.from_calc_with_metadata(calc_with_metadata).save(refresh=True)

Markus Scheidgen's avatar
Markus Scheidgen committed
601
        calc_with_metadata.update(
602
            calc_id='4', uploader=other_test_user.user_id, published=True, with_embargo=True)
603
604
        search.Entry.from_calc_with_metadata(calc_with_metadata).save(refresh=True)

605
    def assert_search(self, rv: Any, number_of_calcs: int) -> dict:
606
607
        if rv.status_code != 200:
            print(rv.data)
608
        assert rv.status_code == 200
609

610
611
612
613
614
615
616
617
618
        data = json.loads(rv.data)

        results = data.get('results', None)
        assert results is not None
        assert isinstance(results, list)
        assert len(results) == number_of_calcs

        return data

619
620
    def test_own_calc(self, client, example_elastic_calcs, no_warn, test_user_auth):
        rv = client.get('/repo/0/1', headers=test_user_auth)
621
622
        assert rv.status_code == 200

623
624
625
626
627
628
629
630
    def test_public_calc(self, client, example_elastic_calcs, no_warn, other_test_user_auth):
        rv = client.get('/repo/0/1', headers=other_test_user_auth)
        assert rv.status_code == 200

    def test_embargo_calc(self, client, example_elastic_calcs, no_warn, test_user_auth):
        rv = client.get('/repo/0/4', headers=test_user_auth)
        assert rv.status_code == 401

631
632
633
634
    def test_own_embargo_calc(self, client, example_elastic_calcs, no_warn, other_test_user_auth):
        rv = client.get('/repo/0/4', headers=other_test_user_auth)
        assert rv.status_code == 200

635
636
637
638
    def test_staging_calc(self, client, example_elastic_calcs, no_warn, test_user_auth):
        rv = client.get('/repo/0/3', headers=test_user_auth)
        assert rv.status_code == 401

639
640
641
642
    def test_own_staging_calc(self, client, example_elastic_calcs, no_warn, other_test_user_auth):
        rv = client.get('/repo/0/3', headers=other_test_user_auth)
        assert rv.status_code == 200

643
644
    def test_non_existing_calcs(self, client, example_elastic_calcs, test_user_auth):
        rv = client.get('/repo/0/10', headers=test_user_auth)
645
646
        assert rv.status_code == 404

647
648
649
    @pytest.mark.parametrize('calcs, owner, auth', [
        (2, 'all', 'none'),
        (2, 'all', 'test_user'),
650
        (4, 'all', 'other_test_user'),
651
        (1, 'user', 'test_user'),
652
        (3, 'user', 'other_test_user'),
653
        (0, 'staging', 'test_user'),
654
        (1, 'staging', 'other_test_user')
655
    ])
656
    def test_search_owner(self, client, example_elastic_calcs, no_warn, test_user_auth, other_test_user_auth, calcs, owner, auth):
657
658
        auth = dict(none=None, test_user=test_user_auth, other_test_user=other_test_user_auth).get(auth)
        rv = client.get('/repo/?owner=%s' % owner, headers=auth)
659
        data = self.assert_search(rv, calcs)
660
661
662
663
664
        results = data.get('results', None)
        if calcs > 0:
            for key in ['uploader', 'calc_id', 'formula', 'upload_id']:
                assert key in results[0]

Markus Scheidgen's avatar
Markus Scheidgen committed
665
    @pytest.mark.parametrize('calcs, start, end', [
Markus Scheidgen's avatar
Markus Scheidgen committed
666
667
668
669
670
671
        (2, today - datetime.timedelta(days=6), today),
        (2, today - datetime.timedelta(days=5), today),
        (1, today - datetime.timedelta(days=4), today),
        (1, today, today),
        (1, today - datetime.timedelta(days=6), today - datetime.timedelta(days=5)),
        (0, today - datetime.timedelta(days=7), today - datetime.timedelta(days=6)),
Markus Scheidgen's avatar
Markus Scheidgen committed
672
        (2, None, None),
Markus Scheidgen's avatar
Markus Scheidgen committed
673
674
        (1, today, None),
        (2, None, today)
Markus Scheidgen's avatar
Markus Scheidgen committed
675
676
677
678
679
680
681
682
683
684
685
686
687
    ])
    def test_search_time(self, client, example_elastic_calcs, no_warn, calcs, start, end):
        query_string = ''
        if start is not None:
            query_string = 'from_time=%s' % rfc3339DateTime.format(start)
        if end is not None:
            if query_string != '':
                query_string += '&'
            query_string += 'until_time=%s' % rfc3339DateTime.format(end)
        if query_string != '':
            query_string = '?%s' % query_string

        rv = client.get('/repo/%s' % query_string)
688
        self.assert_search(rv, calcs)
Markus Scheidgen's avatar
Markus Scheidgen committed
689

690
    @pytest.mark.parametrize('calcs, quantity, value', [
691
692
        (2, 'system', 'bulk'),
        (0, 'system', 'atom'),
693
694
        (1, 'atoms', 'Br'),
        (1, 'atoms', 'Fe'),
695
        (0, 'atoms', ['Fe', 'Br', 'A', 'B']),
696
697
        (0, 'only_atoms', ['Br', 'Si']),
        (1, 'only_atoms', ['Fe']),
698
699
        (1, 'only_atoms', ['Br', 'K', 'Si']),
        (1, 'only_atoms', ['Br', 'Si', 'K']),
700
        (1, 'comment', 'specific'),
701
        (1, 'authors', 'Leonard Hofstadter'),
702
703
704
705
706
707
708
        (2, 'files', 'test/mainfile.txt'),
        (2, 'paths', 'mainfile.txt'),
        (2, 'paths', 'test'),
        (2, 'quantities', ['wyckoff_letters_primitive', 'hall_number']),
        (0, 'quantities', 'dos')
    ])
    def test_search_quantities(self, client, example_elastic_calcs, no_warn, test_user_auth, calcs, quantity, value):
709
        query_string = urlencode({quantity: value}, doseq=True)
710

711
        rv = client.get('/repo/?%s' % query_string, headers=test_user_auth)
712
        logger.debug('run search quantities test', query_string=query_string)
713
        data = self.assert_search(rv, calcs)
714

715
716
        quantities = data.get('quantities', None)
        assert quantities is not None
717
        if quantity == 'system' and calcs != 0:
718
719
720
721
            # for simplicity we only assert on quantities for this case
            assert 'system' in quantities
            assert len(quantities['system']) == 1
            assert value in quantities['system']
722

723
724
    metrics_permutations = [[], search.metrics_names] + [[metric] for metric in search.metrics_names]

725
726
727
728
729
730
731
732
733
734
735
    def test_search_admin(self, client, example_elastic_calcs, no_warn, admin_user_auth):
        rv = client.get('/repo/?owner=admin', headers=admin_user_auth)
        self.assert_search(rv, 4)

    def test_search_admin_auth(self, client, example_elastic_calcs, no_warn, test_user_auth):
        rv = client.get('/repo/?owner=admin', headers=test_user_auth)
        assert rv.status_code == 401

        rv = client.get('/repo/?owner=admin')
        assert rv.status_code == 401

736
    @pytest.mark.parametrize('metrics', metrics_permutations)
Markus Scheidgen's avatar
Markus Scheidgen committed
737
    def test_search_total_metrics(self, client, example_elastic_calcs, no_warn, metrics):
738
739
        rv = client.get('/repo/?%s' % urlencode(dict(metrics=metrics), doseq=True))
        assert rv.status_code == 200, str(rv.data)
Markus Scheidgen's avatar
Markus Scheidgen committed
740
        data = json.loads(rv.data)
741
742
743
        total_metrics = data.get('quantities', {}).get('total', {}).get('all', None)
        assert total_metrics is not None
        assert 'code_runs' in total_metrics
Markus Scheidgen's avatar
Markus Scheidgen committed
744
        for metric in metrics:
745
            assert metric in total_metrics
Markus Scheidgen's avatar
Markus Scheidgen committed
746

747
    @pytest.mark.parametrize('metrics', metrics_permutations)
Markus Scheidgen's avatar
Markus Scheidgen committed
748
    def test_search_aggregation_metrics(self, client, example_elastic_calcs, no_warn, metrics):
749
        rv = client.get('/repo/?%s' % urlencode(dict(metrics=metrics), doseq=True))
Markus Scheidgen's avatar
Markus Scheidgen committed
750
751
        assert rv.status_code == 200
        data = json.loads(rv.data)
752
753
        for name, quantity in data.get('quantities').items():
            for metrics_result in quantity.values():
Markus Scheidgen's avatar
Markus Scheidgen committed
754
                assert 'code_runs' in metrics_result
755
756
757
758
759
                if name != 'authors':
                    for metric in metrics:
                        assert metric in metrics_result
                else:
                    assert len(metrics_result) == 1  # code_runs is the only metric for authors
Markus Scheidgen's avatar
Markus Scheidgen committed
760

761
762
763
764
765
766
767
768
    def test_search_date_histogram(self, client, example_elastic_calcs, no_warn):
        rv = client.get('/repo/?date_histogram=true&metrics=total_energies')
        assert rv.status_code == 200
        data = json.loads(rv.data)
        histogram = data.get('quantities').get('date_histogram')
        print(histogram)
        assert len(histogram) > 0

769
770
771
    @pytest.mark.parametrize('n_results, page, per_page', [(2, 1, 5), (1, 1, 1), (0, 2, 3)])
    def test_search_pagination(self, client, example_elastic_calcs, no_warn, n_results, page, per_page):
        rv = client.get('/repo/?page=%d&per_page=%d' % (page, per_page))
772
773
774
        assert rv.status_code == 200
        data = json.loads(rv.data)
        results = data.get('results', None)
775
        assert data['pagination']['total'] == 2
776
        assert results is not None
777
        assert len(results) == n_results
778

779
780
    @pytest.mark.parametrize('first, order_by, order', [
        ('1', 'formula', -1), ('2', 'formula', 1),
781
782
        ('2', 'basis_set', -1), ('1', 'basis_set', 1),
        (None, 'authors', -1)])
783
784
785
786
787
788
789
    def test_search_order(self, client, example_elastic_calcs, no_warn, first, order_by, order):
        rv = client.get('/repo/?order_by=%s&order=%d' % (order_by, order))
        assert rv.status_code == 200
        data = json.loads(rv.data)
        results = data.get('results', None)
        assert data['pagination']['total'] == 2
        assert len(results) == 2
790
791
        if first is not None:
            assert results[0]['calc_id'] == first
792

793
794
795
796
797
798
799
800
801
802
    @pytest.mark.parametrize('n_results, size', [(2, None), (2, 5), (1, 1)])
    def test_search_scroll(self, client, example_elastic_calcs, no_warn, n_results, size):
        if size is not None:
            rv = client.get('/repo/?scroll=1,&per_page=%d' % size)
        else:
            rv = client.get('/repo/?scroll=1')

        assert rv.status_code == 200
        data = json.loads(rv.data)
        results = data.get('results', None)
803
        assert data.get('scroll', {}).get('size', -1) > 0
804
805
        assert results is not None
        assert len(results) == n_results
806
        scroll_id = data.get('scroll', {}).get('scroll_id', None)
807
808
809
810
811
812
        assert scroll_id is not None

        has_another_page = False
        while scroll_id is not None:
            rv = client.get('/repo/?scroll=1&scroll_id=%s' % scroll_id)
            data = json.loads(rv.data)
813
            scroll_id = data.get('scroll', {}).get('scroll_id', None)
814
815
816
817
818
            has_another_page |= len(data.get('results')) > 0

        if n_results < 2:
            assert has_another_page

819
820
821
    def test_search_user_authrequired(self, client, example_elastic_calcs, no_warn):
        rv = client.get('/repo/?owner=user')
        assert rv.status_code == 401
822

823
824
825
826
827
    @pytest.mark.parametrize('calcs, quantity, value', [
        (2, 'system', 'bulk'),
        (0, 'system', 'atom'),
        (1, 'atoms', 'Br'),
        (1, 'atoms', 'Fe'),
828
        (1, 'authors', 'Leonard Hofstadter'),
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
        (2, 'files', 'test/mainfile.txt'),
        (0, 'quantities', 'dos')
    ])
    def test_quantity_search(self, client, example_elastic_calcs, no_warn, test_user_auth, calcs, quantity, value):
        rv = client.get('/repo/%s' % quantity, headers=test_user_auth)
        assert rv.status_code == 200
        data = json.loads(rv.data)

        quantities = data['quantities']
        assert quantity in quantities
        values = quantities[quantity]['values']
        assert (value in values) == (calcs > 0)
        assert values.get(value, 0) == calcs

    def test_quantity_search_after(self, client, example_elastic_calcs, no_warn, test_user_auth):
        rv = client.get('/repo/atoms?size=1')
        assert rv.status_code == 200
        data = json.loads(rv.data)

        quantity = data['quantities']['atoms']
        assert 'after' in quantity
        after = quantity['after']
        assert len(quantity['values']) == 1
        value = list(quantity['values'].keys())[0]

        while True:
            rv = client.get('/repo/atoms?size=1&after=%s' % after)
            assert rv.status_code == 200
            data = json.loads(rv.data)

            quantity = data['quantities']['atoms']

            if 'after' not in quantity:
                assert len(quantity['values']) == 0
                break

            assert len(quantity['values']) == 1
            assert value != list(quantity['values'].keys())[0]
            assert after != quantity['after']
            after = quantity['after']

870

871
class TestRaw(UploadFilesBasedTests):
Markus Scheidgen's avatar
Markus Scheidgen committed
872

873
874
875
876
877
878
879
880
881
882
883
884
885
886
    def test_raw_file_from_calc(self, client, non_empty_processed, test_user_auth):
        calc = list(non_empty_processed.calcs)[0]
        url = '/raw/calc/%s/%s/%s' % (
            non_empty_processed.upload_id, calc.calc_id, os.path.basename(calc.mainfile))
        rv = client.get(url, headers=test_user_auth)
        assert rv.status_code == 200
        assert len(rv.data) > 0

        url = '/raw/calc/%s/%s/' % (non_empty_processed.upload_id, calc.calc_id)
        rv = client.get(url, headers=test_user_auth)
        assert rv.status_code == 200
        result = json.loads(rv.data)
        assert len(result['contents']) > 0

887
888
    @UploadFilesBasedTests.check_authorizaton
    def test_raw_file(self, client, upload, auth_headers):
889
        url = '/raw/%s/%s' % (upload, example_file_mainfile)
890
        rv = client.get(url, headers=auth_headers)
891
892
893
        assert rv.status_code == 200
        assert len(rv.data) > 0

894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
    @UploadFilesBasedTests.check_authorizaton
    def test_raw_file_partial(self, client, upload, auth_headers):
        url = '/raw/%s/%s?offset=0&length=20' % (upload, example_file_mainfile)
        rv = client.get(url, headers=auth_headers)
        assert rv.status_code == 200
        start_data = rv.data
        assert len(start_data) == 20

        url = '/raw/%s/%s?offset=10&length=10' % (upload, example_file_mainfile)
        rv = client.get(url, headers=auth_headers)
        assert rv.status_code == 200
        next_data = rv.data
        assert len(rv.data) == 10
        assert start_data[10:] == next_data

909
910
    @UploadFilesBasedTests.ignore_authorization
    def test_raw_file_signed(self, client, upload, _, test_user_signature_token):
911
        url = '/raw/%s/%s?signature_token=%s' % (upload, example_file_mainfile, test_user_signature_token)
912
913
914
915
        rv = client.get(url)
        assert rv.status_code == 200
        assert len(rv.data) > 0

916
917
    @UploadFilesBasedTests.ignore_authorization
    def test_raw_file_missing_file(self, client, upload, auth_headers):
918
        url = '/raw/%s/does/not/exist' % upload
919
        rv = client.get(url, headers=auth_headers)
920
        assert rv.status_code == 404
921
922
923
        data = json.loads(rv.data)
        assert 'files' not in data

924
    @pytest.mark.parametrize('compress', [True, False])
925
926
    @UploadFilesBasedTests.ignore_authorization
    def test_raw_file_wildcard(self, client, upload, auth_headers, compress):
927
        url = '/raw/%s/examples*' % upload
928
929
        if compress:
            url = '%s?compress=1' % url
930
        rv = client.get(url, headers=auth_headers)
931
932
933
934
935
936
937

        assert rv.status_code == 200
        assert len(rv.data) > 0
        with zipfile.ZipFile(io.BytesIO(rv.data)) as zip_file:
            assert zip_file.testzip() is None
            assert len(zip_file.namelist()) == len(example_file_contents)

938
939
940
941
    @UploadFilesBasedTests.ignore_authorization
    def test_raw_file_wildcard_missing(self, client, upload, auth_headers):
        url = '/raw/%s/does/not/exist*' % upload
        rv = client.get(url, headers=auth_headers)
942
        assert rv.status_code == 404
943

944
945
    @UploadFilesBasedTests.ignore_authorization
    def test_raw_file_missing_upload(self, client, upload, auth_headers):
946
        url = '/raw/doesnotexist/%s' % example_file_mainfile
947
        rv = client.get(url, headers=auth_headers)
948
949
        assert rv.status_code == 404

950
    @pytest.mark.parametrize('compress', [True, False])
951
952
    @UploadFilesBasedTests.check_authorizaton
    def test_raw_files(self, client, upload, auth_headers, compress):
953
        url = '/raw/%s?files=%s' % (
954
            upload, ','.join(example_file_contents))
955
956
        if compress:
            url = '%s&compress=1' % url
957
        rv = client.get(url, headers=auth_headers)
Markus Scheidgen's avatar
Markus Scheidgen committed
958

959
960
961
962
        assert rv.status_code == 200
        assert len(rv.data) > 0
        with zipfile.ZipFile(io.BytesIO(rv.data)) as zip_file:
            assert zip_file.testzip() is None
963
            assert len(zip_file.namelist()) == len(example_file_contents)
Markus Scheidgen's avatar
Markus Scheidgen committed
964

965
966
967
    @pytest.mark.parametrize('compress', [False, True])
    def test_raw_files_from_query_upload_id(self, client, non_empty_processed, test_user_auth, compress):
        url = '/raw/query?upload_id=%s&compress=%s' % (non_empty_processed.upload_id, 'true' if compress else 'false')