test_api.py 47.5 KB
Newer Older
Markus Scheidgen's avatar
Markus Scheidgen committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
# Copyright 2018 Markus Scheidgen
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an"AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

15
from typing import Any
16
17
18
import pytest
import time
import json
19
import base64
20
21
import zipfile
import io
22
import inspect
23
from passlib.hash import bcrypt
Markus Scheidgen's avatar
Markus Scheidgen committed
24
import datetime
25
import os.path
26
from urllib.parse import urlencode
27

Markus Scheidgen's avatar
Markus Scheidgen committed
28
from nomad.api.app import rfc3339DateTime
29
from nomad import coe_repo, search, parsing, files, config, utils
30
31
from nomad.files import UploadFiles, PublicUploadFiles
from nomad.processing import Upload, Calc, SUCCESS
32
from nomad.datamodel import UploadWithMetadata, CalcWithMetadata
33

34
from tests.conftest import create_auth_headers, clear_elastic
35
from tests.test_files import example_file, example_file_mainfile, example_file_contents
36
from tests.test_files import create_staging_upload, create_public_upload, assert_upload_files
37
from tests.test_coe_repo import assert_coe_upload
38
from tests.test_search import assert_search_upload
39
40


41
42
logger = utils.get_logger(__name__)

Markus Scheidgen's avatar
Markus Scheidgen committed
43

44
45
46
47
48
def test_alive(client):
    rv = client.get('/alive')
    assert rv.status_code == 200


49
50
51
52
53
54
55
@pytest.fixture(scope='function')
def test_user_signature_token(client, test_user_auth):
    rv = client.get('/auth/token', headers=test_user_auth)
    assert rv.status_code == 200
    return json.loads(rv.data)['token']


56
57
58
59
60
61
62
63
def get_upload_with_metadata(upload: dict) -> UploadWithMetadata:
    """ Create a :class:`UploadWithMetadata` from a API upload json record. """
    return UploadWithMetadata(
        upload_id=upload['upload_id'], calcs=[
            CalcWithMetadata(calc_id=calc['calc_id'], mainfile=calc['mainfile'])
            for calc in upload['calcs']['results']])


64
65
66
67
68
class TestInfo:
    def test_info(self, client):
        rv = client.get('/info/')
        assert rv.status_code == 200

69

70
class TestAdmin:
Markus Scheidgen's avatar
Markus Scheidgen committed
71
    @pytest.mark.timeout(config.tests.default_timeout)
72
73
    def test_reset(self, client, admin_user_auth, expandable_postgres, monkeypatch):
        monkeypatch.setattr('nomad.config.services.disable_reset', False)
74
75
76
        rv = client.post('/admin/reset', headers=admin_user_auth)
        assert rv.status_code == 200

Markus Scheidgen's avatar
Markus Scheidgen committed
77
    @pytest.mark.timeout(config.tests.default_timeout)
78
79
    def test_remove(self, client, admin_user_auth, expandable_postgres, monkeypatch):
        monkeypatch.setattr('nomad.config.services.disable_reset', False)
80
81
        rv = client.post('/admin/remove', headers=admin_user_auth)
        assert rv.status_code == 200
82
83
84
85
86
87

    def test_doesnotexist(self, client, admin_user_auth):
        rv = client.post('/admin/doesnotexist', headers=admin_user_auth)
        assert rv.status_code == 404

    def test_only_admin(self, client, test_user_auth):
Markus Scheidgen's avatar
Markus Scheidgen committed
88
        rv = client.post('/admin/reset', headers=test_user_auth)
89
90
        assert rv.status_code == 401

91
92
    def test_disabled(self, client, admin_user_auth, expandable_postgres, monkeypatch):
        monkeypatch.setattr('nomad.config.services.disable_reset', True)
93
94
95
96
        rv = client.post('/admin/reset', headers=admin_user_auth)
        assert rv.status_code == 400


97
class TestAuth:
98
    def test_xtoken_auth(self, client, test_user: coe_repo.User, no_warn):
99
        rv = client.get('/uploads/', headers={
100
            'X-Token': test_user.first_name.lower()  # the test users have their firstname as tokens for convinience
101
        })
102

103
        assert rv.status_code == 200
Markus Scheidgen's avatar
Markus Scheidgen committed
104

105
    def test_xtoken_auth_denied(self, client, no_warn, postgres):
106
107
108
        rv = client.get('/uploads/', headers={
            'X-Token': 'invalid'
        })
Markus Scheidgen's avatar
Markus Scheidgen committed
109

110
        assert rv.status_code == 401
111

112
113
114
    def test_basic_auth(self, client, test_user_auth, no_warn):
        rv = client.get('/uploads/', headers=test_user_auth)
        assert rv.status_code == 200
115

116
117
118
119
120
121
122
    def test_basic_auth_denied(self, client, no_warn):
        basic_auth_base64 = base64.b64encode('invalid'.encode('utf-8')).decode('utf-8')
        rv = client.get('/uploads/', headers={
            'Authorization': 'Basic %s' % basic_auth_base64
        })
        assert rv.status_code == 401

123
    def test_get_user(self, client, test_user_auth, test_user: coe_repo.User, no_warn):
124
125
        rv = client.get('/auth/user', headers=test_user_auth)
        assert rv.status_code == 200
126
127
128
        self.assert_user(client, json.loads(rv.data))

    def assert_user(self, client, user):
129
130
131
132
133
134
135
        for key in ['first_name', 'last_name', 'email', 'token']:
            assert key in user

        rv = client.get('/uploads/', headers={
            'X-Token': user['token']
        })

136
137
        assert rv.status_code == 200

138
139
140
    def test_signature_token(self, test_user_signature_token, no_warn):
        assert test_user_signature_token is not None

141
142
143
144
145
146
147
148
149
150
151
    @pytest.mark.parametrize('token, affiliation', [
        ('test_token', dict(name='HU Berlin', address='Unter den Linden 6')),
        (None, None)])
    def test_put_user(self, client, postgres, admin_user_auth, token, affiliation):
        data = dict(
            email='test@email.com', last_name='Tester', first_name='Testi',
            token=token, affiliation=affiliation,
            password=bcrypt.encrypt('test_password', ident='2y'))

        data = {key: value for key, value in data.items() if value is not None}

152
153
        rv = client.put(
            '/auth/user', headers=admin_user_auth,
154
            content_type='application/json', data=json.dumps(data))
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191

        assert rv.status_code == 200
        self.assert_user(client, json.loads(rv.data))

    def test_put_user_admin_only(self, client, test_user_auth):
        rv = client.put(
            '/auth/user', headers=test_user_auth,
            content_type='application/json', data=json.dumps(dict(
                email='test@email.com', last_name='Tester', first_name='Testi',
                password=bcrypt.encrypt('test_password', ident='2y'))))
        assert rv.status_code == 401

    def test_put_user_required_field(self, client, admin_user_auth):
        rv = client.put(
            '/auth/user', headers=admin_user_auth,
            content_type='application/json', data=json.dumps(dict(
                email='test@email.com', password=bcrypt.encrypt('test_password', ident='2y'))))
        assert rv.status_code == 400

    def test_post_user(self, client, postgres, admin_user_auth):
        rv = client.put(
            '/auth/user', headers=admin_user_auth,
            content_type='application/json', data=json.dumps(dict(
                email='test@email.com', last_name='Tester', first_name='Testi',
                password=bcrypt.encrypt('test_password', ident='2y'))))

        assert rv.status_code == 200
        user = json.loads(rv.data)

        rv = client.post(
            '/auth/user', headers={'X-Token': user['token']},
            content_type='application/json', data=json.dumps(dict(
                last_name='Tester', first_name='Testi v.',
                password=bcrypt.encrypt('test_password_changed', ident='2y'))))
        assert rv.status_code == 200
        self.assert_user(client, json.loads(rv.data))

192
193
194
195
196

class TestUploads:

    def assert_uploads(self, upload_json_str, count=0, **kwargs):
        data = json.loads(upload_json_str)
197
198
199
200
        assert 'pagination' in data
        assert 'page' in data['pagination']

        data = data['results']
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
        assert isinstance(data, list)
        assert len(data) == count

        if count > 0:
            self.assert_upload(json.dumps(data[0]), **kwargs)

    def assert_upload(self, upload_json_str, id=None, **kwargs):
        data = json.loads(upload_json_str)
        assert 'upload_id' in data
        if id is not None:
            assert id == data['upload_id']
        assert 'create_time' in data

        for key, value in kwargs.items():
            assert data.get(key, None) == value

        return data

    def assert_processing(self, client, test_user_auth, upload_id):
        upload_endpoint = '/uploads/%s' % upload_id

        # poll until completed
223
        upload = self.block_until_completed(client, upload_id, test_user_auth)
224
225

        assert len(upload['tasks']) == 4
226
        assert upload['tasks_status'] == SUCCESS
227
        assert upload['current_task'] == 'cleanup'
228
        assert not upload['process_running']
229

230
231
        calcs = upload['calcs']['results']
        for calc in calcs:
232
            assert calc['tasks_status'] == SUCCESS
233
234
            assert calc['current_task'] == 'archiving'
            assert len(calc['tasks']) == 3
235
            assert client.get('/archive/logs/%s/%s' % (calc['upload_id'], calc['calc_id']), headers=test_user_auth).status_code == 200
236
237

        if upload['calcs']['pagination']['total'] > 1:
238
            rv = client.get('%s?page=2&per_page=1&order_by=tasks_status' % upload_endpoint, headers=test_user_auth)
239
240
241
242
            assert rv.status_code == 200
            upload = self.assert_upload(rv.data)
            assert len(upload['calcs']['results']) == 1

243
244
        upload_with_metadata = get_upload_with_metadata(upload)
        assert_upload_files(upload_with_metadata, files.StagingUploadFiles)
245
        assert_search_upload(upload_with_metadata, additional_keys=['atoms', 'system'])
246

247
    def assert_published(self, client, test_user_auth, upload_id, proc_infra, with_coe_repo=True, metadata={}, publish_with_metadata: bool = True):
248
249
        rv = client.get('/uploads/%s' % upload_id, headers=test_user_auth)
        upload = self.assert_upload(rv.data)
250
251

        upload_with_metadata = get_upload_with_metadata(upload)
252

253
254
255
        rv = client.post(
            '/uploads/%s' % upload_id,
            headers=test_user_auth,
256
            data=json.dumps(dict(operation='publish', metadata=metadata if publish_with_metadata else {})),
257
            content_type='application/json')
258
        assert rv.status_code == 200
259
        upload = self.assert_upload(rv.data)
260
        assert upload['current_process'] == 'publish_upload'
261
        assert upload['process_running']
262

263
        additional_keys = ['with_embargo']
264
        if with_coe_repo:
265
            additional_keys.append('pid')
266

267
268
269
270
271
272
273
        self.block_until_completed(client, upload_id, test_user_auth)
        upload_proc = Upload.objects(upload_id=upload_id).first()
        assert upload_proc is not None
        assert upload_proc.published is True

        if with_coe_repo:
            assert_coe_upload(upload_with_metadata.upload_id, user_metadata=metadata)
274
        assert_upload_files(upload_with_metadata, files.PublicUploadFiles, published=True)
275
276
277
        assert_search_upload(upload_with_metadata, additional_keys=additional_keys, published=True)

    def block_until_completed(self, client, upload_id: str, test_user_auth):
278
279
280
281
282
        while True:
            time.sleep(0.1)
            rv = client.get('/uploads/%s' % upload_id, headers=test_user_auth)
            if rv.status_code == 200:
                upload = self.assert_upload(rv.data)
283
284
                if not upload['process_running'] and not upload['tasks_running']:
                    return upload
285
            elif rv.status_code == 404:
286
                return None
287
288
289
290
            else:
                raise Exception(
                    'unexpected status code while blocking for upload processing: %s' %
                    str(rv.status_code))
291
292
293

    def assert_upload_does_not_exist(self, client, upload_id: str, test_user_auth):
        self.block_until_completed(client, upload_id, test_user_auth)
294

295
296
297
298
299
300
        rv = client.get('/uploads/%s' % upload_id, headers=test_user_auth)
        assert rv.status_code == 404
        assert Upload.objects(upload_id=upload_id).first() is None
        assert Calc.objects(upload_id=upload_id).count() is 0
        upload_files = UploadFiles.get(upload_id)
        assert upload_files is None or isinstance(upload_files, PublicUploadFiles)
Markus Scheidgen's avatar
Markus Scheidgen committed
301

302
303
304
305
306
307
308
    def test_get_command(self, client, test_user_auth, no_warn):
        rv = client.get('/uploads/command', headers=test_user_auth)
        assert rv.status_code == 200
        data = json.loads(rv.data)
        assert 'upload_command' in data
        assert 'upload_url' in data

309
310
    def test_get_empty(self, client, test_user_auth, no_warn):
        rv = client.get('/uploads/', headers=test_user_auth)
Markus Scheidgen's avatar
Markus Scheidgen committed
311

312
313
        assert rv.status_code == 200
        self.assert_uploads(rv.data, count=0)
Markus Scheidgen's avatar
Markus Scheidgen committed
314

315
316
317
    def test_get_not_existing(self, client, test_user_auth, no_warn):
        rv = client.get('/uploads/123456789012123456789012', headers=test_user_auth)
        assert rv.status_code == 404
318

319
320
    @pytest.mark.parametrize('mode', ['multipart', 'stream', 'local_path'])
    @pytest.mark.parametrize('name', [None, 'test_name'])
Markus Scheidgen's avatar
Markus Scheidgen committed
321
    def test_put(self, client, test_user_auth, proc_infra, example_upload, mode, name, no_warn):
322
        file = example_upload
323
324
325
326
327
328
329
        if name:
            url = '/uploads/?name=%s' % name
        else:
            url = '/uploads/'

        if mode == 'multipart':
            rv = client.put(
330
331
332
                url, data=dict(file=(open(file, 'rb'), 'the_name')), headers=test_user_auth)
            if not name:
                name = 'the_name'
333
334
335
336
337
338
339
340
341
        elif mode == 'stream':
            with open(file, 'rb') as f:
                rv = client.put(url, data=f.read(), headers=test_user_auth)
        elif mode == 'local_path':
            url += '&' if name else '?'
            url += 'local_path=%s' % file
            rv = client.put(url, headers=test_user_auth)
        else:
            assert False
342

343
344
        assert rv.status_code == 200
        if mode == 'local_path':
345
            upload = self.assert_upload(rv.data, upload_path=file, name=name)
346
347
        else:
            upload = self.assert_upload(rv.data, name=name)
348
        assert upload['tasks_running']
349

350
        self.assert_processing(client, test_user_auth, upload['upload_id'])
351

352
353
354
355
356
357
358
359
    def test_upload_limit(self, client, mongo, test_user, test_user_auth, proc_infra):
        for _ in range(0, config.services.upload_limit):
            Upload.create(user=test_user)
        file = example_file
        rv = client.put('/uploads/?local_path=%s' % file, headers=test_user_auth)
        assert rv.status_code == 400
        assert Upload.user_uploads(test_user).count() == config.services.upload_limit

360
361
362
    def test_delete_not_existing(self, client, test_user_auth, no_warn):
        rv = client.delete('/uploads/123456789012123456789012', headers=test_user_auth)
        assert rv.status_code == 404
363

364
365
366
367
368
369
370
371
372
373
374
375
    @pytest.fixture(scope='function')
    def slow_processing(self, monkeypatch):
        old_cleanup = Upload.cleanup

        def slow_cleanup(self):
            time.sleep(0.5)
            old_cleanup(self)

        monkeypatch.setattr('nomad.processing.data.Upload.cleanup', slow_cleanup)
        yield True
        monkeypatch.setattr('nomad.processing.data.Upload.cleanup', old_cleanup)

376
    def test_delete_published(self, client, test_user_auth, proc_infra, no_warn, with_publish_to_coe_repo):
377
378
379
        rv = client.put('/uploads/?local_path=%s' % example_file, headers=test_user_auth)
        upload = self.assert_upload(rv.data)
        self.assert_processing(client, test_user_auth, upload['upload_id'])
380
        self.assert_published(client, test_user_auth, upload['upload_id'], proc_infra, with_coe_repo=with_publish_to_coe_repo)
381
        rv = client.delete('/uploads/%s' % upload['upload_id'], headers=test_user_auth)
382
        assert rv.status_code == 400
383

Markus Scheidgen's avatar
Markus Scheidgen committed
384
    def test_delete(self, client, test_user_auth, proc_infra, no_warn):
385
386
387
388
389
        rv = client.put('/uploads/?local_path=%s' % example_file, headers=test_user_auth)
        upload = self.assert_upload(rv.data)
        self.assert_processing(client, test_user_auth, upload['upload_id'])
        rv = client.delete('/uploads/%s' % upload['upload_id'], headers=test_user_auth)
        assert rv.status_code == 200
390
        self.assert_upload_does_not_exist(client, upload['upload_id'], test_user_auth)
391

392
393
394
395
396
397
398
399
400
401
402
403
404
405
    def test_post_empty(self, client, test_user_auth, empty_upload, proc_infra, no_warn):
        rv = client.put('/uploads/?local_path=%s' % empty_upload, headers=test_user_auth)
        assert rv.status_code == 200
        upload = self.assert_upload(rv.data)
        self.assert_processing(client, test_user_auth, upload['upload_id'])
        rv = client.post(
            '/uploads/%s' % upload['upload_id'], headers=test_user_auth,
            data=json.dumps(dict(operation='publish')),
            content_type='application/json')
        assert rv.status_code == 400

    def test_post(self, client, test_user_auth, non_empty_example_upload, proc_infra, no_warn, with_publish_to_coe_repo):
        rv = client.put('/uploads/?local_path=%s' % non_empty_example_upload, headers=test_user_auth)
        assert rv.status_code == 200
406
407
        upload = self.assert_upload(rv.data)
        self.assert_processing(client, test_user_auth, upload['upload_id'])
408
        self.assert_published(client, test_user_auth, upload['upload_id'], proc_infra, with_coe_repo=with_publish_to_coe_repo)
409

410
411
412
        # still visible
        assert client.get('/uploads/%s' % upload['upload_id'], headers=test_user_auth).status_code == 200
        # still listed with all=True
413
        rv = client.get('/uploads/?state=all', headers=test_user_auth)
414
        assert rv.status_code == 200
415
        data = json.loads(rv.data)['results']
416
417
418
419
420
        assert len(data) > 0
        assert any(item['upload_id'] == upload['upload_id'] for item in data)
        # not listed with all=False
        rv = client.get('/uploads/', headers=test_user_auth)
        assert rv.status_code == 200
421
        data = json.loads(rv.data)['results']
422
423
        assert not any(item['upload_id'] == upload['upload_id'] for item in data)

424
425
    def test_post_metadata(
            self, client, proc_infra, admin_user_auth, test_user_auth, test_user,
426
            other_test_user, no_warn, example_user_metadata):
427
428
429
        rv = client.put('/uploads/?local_path=%s' % example_file, headers=test_user_auth)
        upload = self.assert_upload(rv.data)
        self.assert_processing(client, test_user_auth, upload['upload_id'])
430
        metadata = dict(**example_user_metadata)
431
        metadata['_upload_time'] = datetime.datetime.utcnow().isoformat()
432
        self.assert_published(client, admin_user_auth, upload['upload_id'], proc_infra, metadata)
433

Markus Scheidgen's avatar
Markus Scheidgen committed
434
    def test_post_metadata_forbidden(self, client, proc_infra, test_user_auth, no_warn):
435
436
437
438
439
440
        rv = client.put('/uploads/?local_path=%s' % example_file, headers=test_user_auth)
        upload = self.assert_upload(rv.data)
        self.assert_processing(client, test_user_auth, upload['upload_id'])
        rv = client.post(
            '/uploads/%s' % upload['upload_id'],
            headers=test_user_auth,
441
            data=json.dumps(dict(operation='publish', metadata=dict(_pid=256))),
442
443
444
            content_type='application/json')
        assert rv.status_code == 401

445
446
447
448
449
450
451
    def test_post_metadata_and_republish(
            self, client, proc_infra, admin_user_auth, test_user_auth, test_user,
            other_test_user, no_warn, example_user_metadata):
        rv = client.put('/uploads/?local_path=%s' % example_file, headers=test_user_auth)
        upload = self.assert_upload(rv.data)
        self.assert_processing(client, test_user_auth, upload['upload_id'])
        metadata = dict(**example_user_metadata)
452
        metadata['_upload_time'] = datetime.datetime.utcnow().isoformat()
453
454
455
        self.assert_published(client, admin_user_auth, upload['upload_id'], proc_infra, metadata)
        self.assert_published(client, admin_user_auth, upload['upload_id'], proc_infra, metadata, publish_with_metadata=False)

456
457
458
459
460
461
462
463
464
465
466
467
468
469
    def test_post_re_process(self, client, published, test_user_auth, monkeypatch):
        monkeypatch.setattr('nomad.config.version', 're_process_test_version')
        monkeypatch.setattr('nomad.config.commit', 're_process_test_commit')

        upload_id = published.upload_id
        rv = client.post(
            '/uploads/%s' % upload_id,
            headers=test_user_auth,
            data=json.dumps(dict(operation='re-process')),
            content_type='application/json')

        assert rv.status_code == 200
        assert self.block_until_completed(client, upload_id, test_user_auth) is not None

470
    # TODO validate metadata (or all input models in API for that matter)
471
    # def test_post_bad_metadata(self, client, proc_infra, test_user_auth, postgres):
472
473
474
475
476
477
    #     rv = client.put('/uploads/?local_path=%s' % example_file, headers=test_user_auth)
    #     upload = self.assert_upload(rv.data)
    #     self.assert_processing(client, test_user_auth, upload['upload_id'])
    #     rv = client.post(
    #         '/uploads/%s' % upload['upload_id'],
    #         headers=test_user_auth,
478
    #         data=json.dumps(dict(operation='publish', metadata=dict(doesnotexist='hi'))),
479
480
481
    #         content_type='application/json')
    #     assert rv.status_code == 400

482
    def test_potcar(self, client, proc_infra, test_user_auth):
483
        # only the owner, shared with people are supposed to download the original potcar file
484
485
486
487
488
489
490
491
492
493
494
495
496
497
        example_file = 'tests/data/proc/examples_potcar.zip'
        rv = client.put('/uploads/?local_path=%s' % example_file, headers=test_user_auth)

        upload = self.assert_upload(rv.data)
        upload_id = upload['upload_id']
        self.assert_processing(client, test_user_auth, upload_id)
        self.assert_published(client, test_user_auth, upload_id, proc_infra, with_coe_repo=True)
        rv = client.get('/raw/%s/examples_potcar/POTCAR' % upload_id)
        assert rv.status_code == 401
        rv = client.get('/raw/%s/examples_potcar/POTCAR' % upload_id, headers=test_user_auth)
        assert rv.status_code == 200
        rv = client.get('/raw/%s/examples_potcar/POTCAR.stripped' % upload_id)
        assert rv.status_code == 200

498

Markus Scheidgen's avatar
Markus Scheidgen committed
499
500
501
today = datetime.datetime.utcnow().date()


502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
class UploadFilesBasedTests:

    @staticmethod
    def fix_signature(func, wrapper):
        additional_args = list(inspect.signature(func).parameters.values())[4:]
        wrapper_sig = inspect.signature(wrapper)
        wrapper_args = list(wrapper_sig.parameters.values())[:3] + additional_args
        wrapper_sig = wrapper_sig.replace(parameters=tuple(wrapper_args))
        wrapper.__signature__ = wrapper_sig

    @staticmethod
    def check_authorizaton(func):
        @pytest.mark.parametrize('test_data', [
            [True, None, True],     # in staging for upload
            [True, None, False],    # in staging for different user
            [True, None, None],     # in staging for guest
            [False, True, True],    # in public, restricted for uploader
            [False, True, False],   # in public, restricted for different user
            [False, True, None],    # in public, restricted for guest
            [False, False, True],   # in public, public, for uploader
            [False, False, False],  # in public, public, for different user
            [False, False, None]    # in public, public, for guest
        ], indirect=True)
        def wrapper(self, client, test_data, *args, **kwargs):
            upload, authorized, auth_headers = test_data
            try:
                func(self, client, upload, auth_headers, *args, **kwargs)
            except AssertionError as assertion:
                assertion_str = str(assertion)
                if not authorized:
                    if '0 == 5' in assertion_str and 'ZipFile' in assertion_str:
                        # the user is not authorized an gets an empty zip as expected
                        return
                    if '401' in assertion_str:
                        # the user is not authorized and gets a 401 as expected
                        return
                raise assertion

            if not authorized:
                assert False
        UploadFilesBasedTests.fix_signature(func, wrapper)
        return wrapper

    @staticmethod
    def ignore_authorization(func):
        @pytest.mark.parametrize('test_data', [
            [True, None, True],      # in staging
            [False, False, None],    # in public
        ], indirect=True)
        def wrapper(self, client, test_data, *args, **kwargs):
            upload, _, auth_headers = test_data
            func(self, client, upload, auth_headers, *args, **kwargs)
        UploadFilesBasedTests.fix_signature(func, wrapper)
        return wrapper
556

557
    @pytest.fixture(scope='function')
558
    def test_data(self, request, postgres, mongo, raw_files, no_warn, test_user, other_test_user):
559
560
561
562
563
        # delete potential old test files
        for _ in [0, 1]:
            upload_files = UploadFiles.get('test_upload')
            if upload_files:
                upload_files.delete()
564

565
        in_staging, restricted, for_uploader = request.param
566

567
568
569
570
        if in_staging:
            authorized = for_uploader
        else:
            authorized = not restricted or for_uploader
571

572
573
574
575
576
577
        if for_uploader:
            auth_headers = create_auth_headers(test_user)
        elif for_uploader is False:
            auth_headers = create_auth_headers(other_test_user)
        else:
            auth_headers = None
578

579
580
        calc_specs = 'r' if restricted else 'p'
        if in_staging:
581
            Upload.create(user=test_user, upload_id='test_upload')
582
            _, upload_files = create_staging_upload('test_upload', calc_specs=calc_specs)
583
        else:
584
            _, upload_files = create_public_upload('test_upload', calc_specs=calc_specs)
585
            postgres.begin()
586
587
588
            coe_upload = coe_repo.Upload(
                upload_name='test_upload',
                user_id=test_user.user_id, is_processed=True)
589
590
            postgres.add(coe_upload)
            postgres.commit()
591

592
        yield 'test_upload', authorized, auth_headers
593

594
        upload_files.delete()
595
596


597
598
599
600
class TestArchive(UploadFilesBasedTests):
    @UploadFilesBasedTests.check_authorizaton
    def test_get(self, client, upload, auth_headers):
        rv = client.get('/archive/%s/0' % upload, headers=auth_headers)
601
        assert rv.status_code == 200
602
        assert json.loads(rv.data) is not None
603

604
605
606
607
608
609
    @UploadFilesBasedTests.ignore_authorization
    def test_get_signed(self, client, upload, _, test_user_signature_token):
        rv = client.get('/archive/%s/0?token=%s' % (upload, test_user_signature_token))
        assert rv.status_code == 200
        assert json.loads(rv.data) is not None

610
611
612
    @UploadFilesBasedTests.check_authorizaton
    def test_get_calc_proc_log(self, client, upload, auth_headers):
        rv = client.get('/archive/logs/%s/0' % upload, headers=auth_headers)
613
        assert rv.status_code == 200
614
        assert len(rv.data) > 0
615

616
617
618
619
620
621
    @UploadFilesBasedTests.ignore_authorization
    def test_get_calc_proc_log_signed(self, client, upload, _, test_user_signature_token):
        rv = client.get('/archive/logs/%s/0?token=%s' % (upload, test_user_signature_token))
        assert rv.status_code == 200
        assert len(rv.data) > 0

622
623
624
    @UploadFilesBasedTests.ignore_authorization
    def test_get_non_existing_archive(self, client, upload, auth_headers):
        rv = client.get('/archive/%s' % 'doesnt/exist', headers=auth_headers)
625
        assert rv.status_code == 404
Markus Scheidgen's avatar
Markus Scheidgen committed
626

627
628
629
630
631
632
633
    @pytest.mark.parametrize('info', [
        'all.nomadmetainfo.json',
        'all.experimental.nomadmetainfo.json',
        'vasp.nomadmetainfo.json',
        'mpes.nomadmetainfo.json'])
    def test_get_metainfo(self, client, info):
        rv = client.get('/archive/metainfo/%s' % info)
634
        assert rv.status_code == 200
635
636
        metainfo = json.loads((rv.data))
        assert len(metainfo) > 0
637

Markus Scheidgen's avatar
Markus Scheidgen committed
638

639
class TestRepo():
640
641
642
643
644
645
    @pytest.fixture(scope='class')
    def example_elastic_calcs(
            self, elastic_infra, normalized: parsing.LocalBackend,
            test_user: coe_repo.User, other_test_user: coe_repo.User):
        clear_elastic(elastic_infra)

Markus Scheidgen's avatar
Markus Scheidgen committed
646
        calc_with_metadata = CalcWithMetadata(upload_id=0, calc_id=0, upload_time=today)
647
        calc_with_metadata.files = ['test/mainfile.txt']
648
        calc_with_metadata.apply_domain_metadata(normalized)
649

Markus Scheidgen's avatar
Markus Scheidgen committed
650
651
        calc_with_metadata.update(
            calc_id='1', uploader=test_user.to_popo(), published=True, with_embargo=False)
652
653
        search.Entry.from_calc_with_metadata(calc_with_metadata).save(refresh=True)

Markus Scheidgen's avatar
Markus Scheidgen committed
654
655
        calc_with_metadata.update(
            calc_id='2', uploader=other_test_user.to_popo(), published=True, with_embargo=False,
Markus Scheidgen's avatar
Markus Scheidgen committed
656
            upload_time=today - datetime.timedelta(days=5))
Markus Scheidgen's avatar
Markus Scheidgen committed
657
658
        calc_with_metadata.update(
            atoms=['Fe'], comment='this is a specific word', formula='AAA', basis_set='zzz')
659
660
        search.Entry.from_calc_with_metadata(calc_with_metadata).save(refresh=True)

Markus Scheidgen's avatar
Markus Scheidgen committed
661
662
        calc_with_metadata.update(
            calc_id='3', uploader=other_test_user.to_popo(), published=False, with_embargo=False)
663
664
        search.Entry.from_calc_with_metadata(calc_with_metadata).save(refresh=True)

Markus Scheidgen's avatar
Markus Scheidgen committed
665
666
        calc_with_metadata.update(
            calc_id='4', uploader=other_test_user.to_popo(), published=True, with_embargo=True)
667
668
        search.Entry.from_calc_with_metadata(calc_with_metadata).save(refresh=True)

669
    def assert_search(self, rv: Any, number_of_calcs: int) -> dict:
670
671
        if rv.status_code != 200:
            print(rv.data)
672
        assert rv.status_code == 200
673

674
675
676
677
678
679
680
681
682
        data = json.loads(rv.data)

        results = data.get('results', None)
        assert results is not None
        assert isinstance(results, list)
        assert len(results) == number_of_calcs

        return data

683
684
    def test_own_calc(self, client, example_elastic_calcs, no_warn, test_user_auth):
        rv = client.get('/repo/0/1', headers=test_user_auth)
685
686
        assert rv.status_code == 200

687
688
689
690
691
692
693
694
    def test_public_calc(self, client, example_elastic_calcs, no_warn, other_test_user_auth):
        rv = client.get('/repo/0/1', headers=other_test_user_auth)
        assert rv.status_code == 200

    def test_embargo_calc(self, client, example_elastic_calcs, no_warn, test_user_auth):
        rv = client.get('/repo/0/4', headers=test_user_auth)
        assert rv.status_code == 401

695
696
697
698
    def test_own_embargo_calc(self, client, example_elastic_calcs, no_warn, other_test_user_auth):
        rv = client.get('/repo/0/4', headers=other_test_user_auth)
        assert rv.status_code == 200

699
700
701
702
    def test_staging_calc(self, client, example_elastic_calcs, no_warn, test_user_auth):
        rv = client.get('/repo/0/3', headers=test_user_auth)
        assert rv.status_code == 401

703
704
705
706
    def test_own_staging_calc(self, client, example_elastic_calcs, no_warn, other_test_user_auth):
        rv = client.get('/repo/0/3', headers=other_test_user_auth)
        assert rv.status_code == 200

707
708
    def test_non_existing_calcs(self, client, example_elastic_calcs, test_user_auth):
        rv = client.get('/repo/0/10', headers=test_user_auth)
709
710
        assert rv.status_code == 404

711
712
713
    @pytest.mark.parametrize('calcs, owner, auth', [
        (2, 'all', 'none'),
        (2, 'all', 'test_user'),
714
        (4, 'all', 'other_test_user'),
715
        (1, 'user', 'test_user'),
716
        (3, 'user', 'other_test_user'),
717
        (0, 'staging', 'test_user'),
718
        (1, 'staging', 'other_test_user')
719
    ])
720
    def test_search_owner(self, client, example_elastic_calcs, no_warn, test_user_auth, other_test_user_auth, calcs, owner, auth):
721
722
        auth = dict(none=None, test_user=test_user_auth, other_test_user=other_test_user_auth).get(auth)
        rv = client.get('/repo/?owner=%s' % owner, headers=auth)
723
        data = self.assert_search(rv, calcs)
724
725
726
727
728
        results = data.get('results', None)
        if calcs > 0:
            for key in ['uploader', 'calc_id', 'formula', 'upload_id']:
                assert key in results[0]

Markus Scheidgen's avatar
Markus Scheidgen committed
729
    @pytest.mark.parametrize('calcs, start, end', [
Markus Scheidgen's avatar
Markus Scheidgen committed
730
731
732
733
734
735
        (2, today - datetime.timedelta(days=6), today),
        (2, today - datetime.timedelta(days=5), today),
        (1, today - datetime.timedelta(days=4), today),
        (1, today, today),
        (1, today - datetime.timedelta(days=6), today - datetime.timedelta(days=5)),
        (0, today - datetime.timedelta(days=7), today - datetime.timedelta(days=6)),
Markus Scheidgen's avatar
Markus Scheidgen committed
736
        (2, None, None),
Markus Scheidgen's avatar
Markus Scheidgen committed
737
738
        (1, today, None),
        (2, None, today)
Markus Scheidgen's avatar
Markus Scheidgen committed
739
740
741
742
743
744
745
746
747
748
749
750
751
    ])
    def test_search_time(self, client, example_elastic_calcs, no_warn, calcs, start, end):
        query_string = ''
        if start is not None:
            query_string = 'from_time=%s' % rfc3339DateTime.format(start)
        if end is not None:
            if query_string != '':
                query_string += '&'
            query_string += 'until_time=%s' % rfc3339DateTime.format(end)
        if query_string != '':
            query_string = '?%s' % query_string

        rv = client.get('/repo/%s' % query_string)
752
        self.assert_search(rv, calcs)
Markus Scheidgen's avatar
Markus Scheidgen committed
753

754
    @pytest.mark.parametrize('calcs, quantity, value', [
755
756
        (2, 'system', 'bulk'),
        (0, 'system', 'atom'),
757
758
        (1, 'atoms', 'Br'),
        (1, 'atoms', 'Fe'),
759
        (0, 'atoms', ['Fe', 'Br', 'A', 'B']),
760
761
        (0, 'only_atoms', ['Br', 'Si']),
        (1, 'only_atoms', ['Fe']),
762
763
        (1, 'only_atoms', ['Br', 'K', 'Si']),
        (1, 'only_atoms', ['Br', 'Si', 'K']),
764
765
766
767
768
769
770
771
772
        (1, 'comment', 'specific'),
        (1, 'authors', 'Hofstadter, Leonard'),
        (2, 'files', 'test/mainfile.txt'),
        (2, 'paths', 'mainfile.txt'),
        (2, 'paths', 'test'),
        (2, 'quantities', ['wyckoff_letters_primitive', 'hall_number']),
        (0, 'quantities', 'dos')
    ])
    def test_search_quantities(self, client, example_elastic_calcs, no_warn, test_user_auth, calcs, quantity, value):
773
        query_string = urlencode({quantity: value}, doseq=True)
774

775
        rv = client.get('/repo/?%s' % query_string, headers=test_user_auth)
776
        logger.debug('run search quantities test', query_string=query_string)
777
        data = self.assert_search(rv, calcs)
778

779
780
        quantities = data.get('quantities', None)
        assert quantities is not None
781
        if quantity == 'system' and calcs != 0:
782
783
784
785
            # for simplicity we only assert on quantities for this case
            assert 'system' in quantities
            assert len(quantities['system']) == 1
            assert value in quantities['system']
786

787
788
    metrics_permutations = [[], search.metrics_names] + [[metric] for metric in search.metrics_names]

789
790
791
792
793
794
795
796
797
798
799
    def test_search_admin(self, client, example_elastic_calcs, no_warn, admin_user_auth):
        rv = client.get('/repo/?owner=admin', headers=admin_user_auth)
        self.assert_search(rv, 4)

    def test_search_admin_auth(self, client, example_elastic_calcs, no_warn, test_user_auth):
        rv = client.get('/repo/?owner=admin', headers=test_user_auth)
        assert rv.status_code == 401

        rv = client.get('/repo/?owner=admin')
        assert rv.status_code == 401

800
    @pytest.mark.parametrize('metrics', metrics_permutations)
Markus Scheidgen's avatar
Markus Scheidgen committed
801
    def test_search_total_metrics(self, client, example_elastic_calcs, no_warn, metrics):
802
803
        rv = client.get('/repo/?%s' % urlencode(dict(metrics=metrics), doseq=True))
        assert rv.status_code == 200, str(rv.data)
Markus Scheidgen's avatar
Markus Scheidgen committed
804
        data = json.loads(rv.data)
805
806
807
        total_metrics = data.get('quantities', {}).get('total', {}).get('all', None)
        assert total_metrics is not None
        assert 'code_runs' in total_metrics
Markus Scheidgen's avatar
Markus Scheidgen committed
808
        for metric in metrics:
809
            assert metric in total_metrics
Markus Scheidgen's avatar
Markus Scheidgen committed
810

811
    @pytest.mark.parametrize('metrics', metrics_permutations)
Markus Scheidgen's avatar
Markus Scheidgen committed
812
    def test_search_aggregation_metrics(self, client, example_elastic_calcs, no_warn, metrics):
813
        rv = client.get('/repo/?%s' % urlencode(dict(metrics=metrics), doseq=True))
Markus Scheidgen's avatar
Markus Scheidgen committed
814
815
        assert rv.status_code == 200
        data = json.loads(rv.data)
816
817
        for name, quantity in data.get('quantities').items():
            for metrics_result in quantity.values():
Markus Scheidgen's avatar
Markus Scheidgen committed
818
                assert 'code_runs' in metrics_result
819
820
821
822
823
                if name != 'authors':
                    for metric in metrics:
                        assert metric in metrics_result
                else:
                    assert len(metrics_result) == 1  # code_runs is the only metric for authors
Markus Scheidgen's avatar
Markus Scheidgen committed
824

825
826
827
    @pytest.mark.parametrize('n_results, page, per_page', [(2, 1, 5), (1, 1, 1), (0, 2, 3)])
    def test_search_pagination(self, client, example_elastic_calcs, no_warn, n_results, page, per_page):
        rv = client.get('/repo/?page=%d&per_page=%d' % (page, per_page))
828
829
830
        assert rv.status_code == 200
        data = json.loads(rv.data)
        results = data.get('results', None)
831
        assert data['pagination']['total'] == 2
832
        assert results is not None
833
        assert len(results) == n_results
834

835
836
    @pytest.mark.parametrize('first, order_by, order', [
        ('1', 'formula', -1), ('2', 'formula', 1),
837
838
        ('2', 'basis_set', -1), ('1', 'basis_set', 1),
        (None, 'authors', -1)])
839
840
841
842
843
844
845
    def test_search_order(self, client, example_elastic_calcs, no_warn, first, order_by, order):
        rv = client.get('/repo/?order_by=%s&order=%d' % (order_by, order))
        assert rv.status_code == 200
        data = json.loads(rv.data)
        results = data.get('results', None)
        assert data['pagination']['total'] == 2
        assert len(results) == 2
846
847
        if first is not None:
            assert results[0]['calc_id'] == first
848

849
850
851
852
853
854
855
856
857
858
    @pytest.mark.parametrize('n_results, size', [(2, None), (2, 5), (1, 1)])
    def test_search_scroll(self, client, example_elastic_calcs, no_warn, n_results, size):
        if size is not None:
            rv = client.get('/repo/?scroll=1,&per_page=%d' % size)
        else:
            rv = client.get('/repo/?scroll=1')

        assert rv.status_code == 200
        data = json.loads(rv.data)
        results = data.get('results', None)
859
        assert data.get('scroll', {}).get('size', -1) > 0
860
861
        assert results is not None
        assert len(results) == n_results
862
        scroll_id = data.get('scroll', {}).get('scroll_id', None)
863
864
865
866
867
868
        assert scroll_id is not None

        has_another_page = False
        while scroll_id is not None:
            rv = client.get('/repo/?scroll=1&scroll_id=%s' % scroll_id)
            data = json.loads(rv.data)
869
            scroll_id = data.get('scroll', {}).get('scroll_id', None)
870
871
872
873
874
            has_another_page |= len(data.get('results')) > 0

        if n_results < 2:
            assert has_another_page

875
876
877
    def test_search_user_authrequired(self, client, example_elastic_calcs, no_warn):
        rv = client.get('/repo/?owner=user')
        assert rv.status_code == 401
878

879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
    @pytest.mark.parametrize('calcs, quantity, value', [
        (2, 'system', 'bulk'),
        (0, 'system', 'atom'),
        (1, 'atoms', 'Br'),
        (1, 'atoms', 'Fe'),
        (1, 'authors', 'Hofstadter, Leonard'),
        (2, 'files', 'test/mainfile.txt'),
        (0, 'quantities', 'dos')
    ])
    def test_quantity_search(self, client, example_elastic_calcs, no_warn, test_user_auth, calcs, quantity, value):
        rv = client.get('/repo/%s' % quantity, headers=test_user_auth)
        assert rv.status_code == 200
        data = json.loads(rv.data)

        quantities = data['quantities']
        assert quantity in quantities
        values = quantities[quantity]['values']
        assert (value in values) == (calcs > 0)
        assert values.get(value, 0) == calcs

    def test_quantity_search_after(self, client, example_elastic_calcs, no_warn, test_user_auth):
        rv = client.get('/repo/atoms?size=1')
        assert rv.status_code == 200
        data = json.loads(rv.data)

        quantity = data['quantities']['atoms']
        assert 'after' in quantity
        after = quantity['after']
        assert len(quantity['values']) == 1
        value = list(quantity['values'].keys())[0]

        while True:
            rv = client.get('/repo/atoms?size=1&after=%s' % after)
            assert rv.status_code == 200
            data = json.loads(rv.data)

            quantity = data['quantities']['atoms']

            if 'after' not in quantity:
                assert len(quantity['values']) == 0
                break

            assert len(quantity['values']) == 1
            assert value != list(quantity['values'].keys())[0]
            assert after != quantity['after']
            after = quantity['after']

926

927
class TestRaw(UploadFilesBasedTests):
Markus Scheidgen's avatar
Markus Scheidgen committed
928

929
930
931
932
933
934
935
936
937
938
939
940
941
942
    def test_raw_file_from_calc(self, client, non_empty_processed, test_user_auth):
        calc = list(non_empty_processed.calcs)[0]
        url = '/raw/calc/%s/%s/%s' % (
            non_empty_processed.upload_id, calc.calc_id, os.path.basename(calc.mainfile))
        rv = client.get(url, headers=test_user_auth)
        assert rv.status_code == 200
        assert len(rv.data) > 0

        url = '/raw/calc/%s/%s/' % (non_empty_processed.upload_id, calc.calc_id)
        rv = client.get(url, headers=test_user_auth)
        assert rv.status_code == 200
        result = json.loads(rv.data)
        assert len(result['contents']) > 0

943
944
    @UploadFilesBasedTests.check_authorizaton
    def test_raw_file(self, client, upload, auth_headers):
945
        url = '/raw/%s/%s' % (upload, example_file_mainfile)
946
        rv = client.get(url, headers=auth_headers)
947
948
949
        assert rv.status_code == 200
        assert len(rv.data) > 0

950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
    @UploadFilesBasedTests.check_authorizaton
    def test_raw_file_partial(self, client, upload, auth_headers):
        url = '/raw/%s/%s?offset=0&length=20' % (upload, example_file_mainfile)
        rv = client.get(url, headers=auth_headers)
        assert rv.status_code == 200
        start_data = rv.data
        assert len(start_data) == 20

        url = '/raw/%s/%s?offset=10&length=10' % (upload, example_file_mainfile)
        rv = client.get(url, headers=auth_headers)
        assert rv.status_code == 200
        next_data = rv.data
        assert len(rv.data) == 10
        assert start_data[10:] == next_data

965
966
967
968
969
970
971
    @UploadFilesBasedTests.ignore_authorization
    def test_raw_file_signed(self, client, upload, _, test_user_signature_token):
        url = '/raw/%s/%s?token=%s' % (upload, example_file_mainfile, test_user_signature_token)
        rv = client.get(url)
        assert rv.status_code == 200
        assert len(rv.data) > 0

972
973
    @UploadFilesBasedTests.ignore_authorization
    def test_raw_file_missing_file(self, client, upload, auth_headers):
974
        url = '/raw/%s/does/not/exist' % upload
975
        rv = client.get(url, headers=auth_headers)
976
        assert rv.status_code == 404
977
978
979
        data = json.loads(rv.data)
        assert 'files' not in data

980
    @pytest.mark.parametrize('compress', [True, False])
981
982
    @UploadFilesBasedTests.ignore_authorization
    def test_raw_file_wildcard(self, client, upload, auth_headers, compress):
983
        url = '/raw/%s/examples*' % upload
984
985
        if compress:
            url = '%s?compress=1' % url
986
        rv = client.get(url, headers=auth_headers)
987
988
989
990
991
992
993

        assert rv.status_code == 200
        assert len(rv.data) > 0
        with zipfile.ZipFile(io.BytesIO(rv.data)) as zip_file:
            assert zip_file.testzip() is None
            assert len(zip_file.namelist()) == len(example_file_contents)

994
995
996
997
    @UploadFilesBasedTests.ignore_authorization
    def test_raw_file_wildcard_missing(self, client, upload, auth_headers):
        url = '/raw/%s/does/not/exist*' % upload
        rv = client.get(url, headers=auth_headers)
998
        assert rv.status_code == 404
999

1000
1001
    @UploadFilesBasedTests.ignore_authorization
    def test_raw_file_missing_upload(self, client, upload, auth_headers):
1002
        url = '/raw/doesnotexist/%s' % example_file_mainfile
1003
        rv = client.get(url, headers=auth_headers)
1004
1005
        assert rv.status_code == 404

1006
    @pytest.mark.parametrize('compress', [True, False])
1007
1008
    @UploadFilesBasedTests.check_authorizaton
    def test_raw_files(self, client, upload, auth_headers, compress):
1009
        url = '/raw/%s?files=%s' % (
1010
            upload, ','.join(example_file_contents))
1011
1012
        if compress:
            url = '%s&compress=1' % url
1013
        rv = client.get(url, headers=auth_headers)
Markus Scheidgen's avatar
Markus Scheidgen committed
1014

1015
1016
1017
1018
        assert rv.status_code == 200
        assert len(rv.data) > 0
        with zipfile.ZipFile(io.BytesIO(rv.data)) as zip_file:
            assert zip_file.testzip() is None
1019
            assert len(zip_file.namelist()) == len(example_file_contents)
Markus Scheidgen's avatar
Markus Scheidgen committed
1020

1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
    @UploadFilesBasedTests.ignore_authorization
    def test_raw_files_signed(self, client, upload, _, test_user_signature_token):
        url = '/raw/%s?files=%s&token=%s' % (
            upload, ','.join(example_file_contents), test_user_signature_token)
        rv = client.get(url)

        assert rv.status_code == 200
        assert len(rv.data) > 0
        with zipfile.ZipFile(io.BytesIO(</