test_api.py 49.8 KB
Newer Older
Markus Scheidgen's avatar
Markus Scheidgen committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
# Copyright 2018 Markus Scheidgen
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an"AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

15
from typing import Any
16
17
18
import pytest
import time
import json
19
import base64
20
21
import zipfile
import io
22
import inspect
23
from passlib.hash import bcrypt
Markus Scheidgen's avatar
Markus Scheidgen committed
24
import datetime
25
import os.path
26
from urllib.parse import urlencode
27

Markus Scheidgen's avatar
Markus Scheidgen committed
28
from nomad.api.app import rfc3339DateTime
29
from nomad import coe_repo, search, parsing, files, config, utils
30
31
from nomad.files import UploadFiles, PublicUploadFiles
from nomad.processing import Upload, Calc, SUCCESS
32
from nomad.datamodel import UploadWithMetadata, CalcWithMetadata
33

34
from tests.conftest import create_auth_headers, clear_elastic
35
from tests.test_files import example_file, example_file_mainfile, example_file_contents
36
from tests.test_files import create_staging_upload, create_public_upload, assert_upload_files
37
from tests.test_coe_repo import assert_coe_upload
38
from tests.test_search import assert_search_upload
39
40


41
42
logger = utils.get_logger(__name__)

Markus Scheidgen's avatar
Markus Scheidgen committed
43

44
45
46
47
48
def test_alive(client):
    rv = client.get('/alive')
    assert rv.status_code == 200


49
50
51
52
53
54
55
@pytest.fixture(scope='function')
def test_user_signature_token(client, test_user_auth):
    rv = client.get('/auth/token', headers=test_user_auth)
    assert rv.status_code == 200
    return json.loads(rv.data)['token']


56
57
58
59
60
61
62
63
def get_upload_with_metadata(upload: dict) -> UploadWithMetadata:
    """ Create a :class:`UploadWithMetadata` from a API upload json record. """
    return UploadWithMetadata(
        upload_id=upload['upload_id'], calcs=[
            CalcWithMetadata(calc_id=calc['calc_id'], mainfile=calc['mainfile'])
            for calc in upload['calcs']['results']])


64
65
66
class TestInfo:
    def test_info(self, client):
        rv = client.get('/info/')
67
68
69
70
        data = json.loads(rv.data)
        assert 'codes' in data
        assert 'parsers' in data
        assert len(data['parsers']) >= len(data['codes'])
71
72
        assert rv.status_code == 200

73

74
class TestAdmin:
Markus Scheidgen's avatar
Markus Scheidgen committed
75
    @pytest.mark.timeout(config.tests.default_timeout)
76
77
    def test_reset(self, client, admin_user_auth, expandable_postgres, monkeypatch):
        monkeypatch.setattr('nomad.config.services.disable_reset', False)
78
79
80
        rv = client.post('/admin/reset', headers=admin_user_auth)
        assert rv.status_code == 200

Markus Scheidgen's avatar
Markus Scheidgen committed
81
    @pytest.mark.timeout(config.tests.default_timeout)
82
83
    def test_remove(self, client, admin_user_auth, expandable_postgres, monkeypatch):
        monkeypatch.setattr('nomad.config.services.disable_reset', False)
84
85
        rv = client.post('/admin/remove', headers=admin_user_auth)
        assert rv.status_code == 200
86
87
88
89
90
91

    def test_doesnotexist(self, client, admin_user_auth):
        rv = client.post('/admin/doesnotexist', headers=admin_user_auth)
        assert rv.status_code == 404

    def test_only_admin(self, client, test_user_auth):
Markus Scheidgen's avatar
Markus Scheidgen committed
92
        rv = client.post('/admin/reset', headers=test_user_auth)
93
94
        assert rv.status_code == 401

95
96
    def test_disabled(self, client, admin_user_auth, expandable_postgres, monkeypatch):
        monkeypatch.setattr('nomad.config.services.disable_reset', True)
97
98
99
100
        rv = client.post('/admin/reset', headers=admin_user_auth)
        assert rv.status_code == 400


101
class TestAuth:
102
103
104
105
106
107
108
109
    def test_auth_wo_credentials(self, client, keycloak, no_warn):
        rv = client.get('/auth/')
        assert rv.status_code == 401

    def test_auth(self, client, test_user_auth, keycloak):
        rv = client.get('/auth/', headers=test_user_auth)
        assert rv.status_code == 200

110
    def test_xtoken_auth(self, client, test_user: coe_repo.User, no_warn):
111
        rv = client.get('/uploads/', headers={
112
            'X-Token': test_user.first_name.lower()  # the test users have their firstname as tokens for convinience
113
        })
114

115
        assert rv.status_code == 200
Markus Scheidgen's avatar
Markus Scheidgen committed
116

117
    def test_xtoken_auth_denied(self, client, no_warn, postgres):
118
119
120
        rv = client.get('/uploads/', headers={
            'X-Token': 'invalid'
        })
Markus Scheidgen's avatar
Markus Scheidgen committed
121

122
        assert rv.status_code == 401
123

124
125
126
    def test_basic_auth(self, client, test_user_auth, no_warn):
        rv = client.get('/uploads/', headers=test_user_auth)
        assert rv.status_code == 200
127

128
129
130
131
132
133
134
    def test_basic_auth_denied(self, client, no_warn):
        basic_auth_base64 = base64.b64encode('invalid'.encode('utf-8')).decode('utf-8')
        rv = client.get('/uploads/', headers={
            'Authorization': 'Basic %s' % basic_auth_base64
        })
        assert rv.status_code == 401

135
    def test_get_user(self, client, test_user_auth, test_user: coe_repo.User, no_warn):
136
137
        rv = client.get('/auth/user', headers=test_user_auth)
        assert rv.status_code == 200
138
139
140
        self.assert_user(client, json.loads(rv.data))

    def assert_user(self, client, user):
141
142
143
144
145
146
147
        for key in ['first_name', 'last_name', 'email', 'token']:
            assert key in user

        rv = client.get('/uploads/', headers={
            'X-Token': user['token']
        })

148
149
        assert rv.status_code == 200

150
151
152
    def test_signature_token(self, test_user_signature_token, no_warn):
        assert test_user_signature_token is not None

153
154
155
156
157
158
159
160
161
162
163
    @pytest.mark.parametrize('token, affiliation', [
        ('test_token', dict(name='HU Berlin', address='Unter den Linden 6')),
        (None, None)])
    def test_put_user(self, client, postgres, admin_user_auth, token, affiliation):
        data = dict(
            email='test@email.com', last_name='Tester', first_name='Testi',
            token=token, affiliation=affiliation,
            password=bcrypt.encrypt('test_password', ident='2y'))

        data = {key: value for key, value in data.items() if value is not None}

164
165
        rv = client.put(
            '/auth/user', headers=admin_user_auth,
166
            content_type='application/json', data=json.dumps(data))
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203

        assert rv.status_code == 200
        self.assert_user(client, json.loads(rv.data))

    def test_put_user_admin_only(self, client, test_user_auth):
        rv = client.put(
            '/auth/user', headers=test_user_auth,
            content_type='application/json', data=json.dumps(dict(
                email='test@email.com', last_name='Tester', first_name='Testi',
                password=bcrypt.encrypt('test_password', ident='2y'))))
        assert rv.status_code == 401

    def test_put_user_required_field(self, client, admin_user_auth):
        rv = client.put(
            '/auth/user', headers=admin_user_auth,
            content_type='application/json', data=json.dumps(dict(
                email='test@email.com', password=bcrypt.encrypt('test_password', ident='2y'))))
        assert rv.status_code == 400

    def test_post_user(self, client, postgres, admin_user_auth):
        rv = client.put(
            '/auth/user', headers=admin_user_auth,
            content_type='application/json', data=json.dumps(dict(
                email='test@email.com', last_name='Tester', first_name='Testi',
                password=bcrypt.encrypt('test_password', ident='2y'))))

        assert rv.status_code == 200
        user = json.loads(rv.data)

        rv = client.post(
            '/auth/user', headers={'X-Token': user['token']},
            content_type='application/json', data=json.dumps(dict(
                last_name='Tester', first_name='Testi v.',
                password=bcrypt.encrypt('test_password_changed', ident='2y'))))
        assert rv.status_code == 200
        self.assert_user(client, json.loads(rv.data))

204
205
206
207
208

class TestUploads:

    def assert_uploads(self, upload_json_str, count=0, **kwargs):
        data = json.loads(upload_json_str)
209
210
211
212
        assert 'pagination' in data
        assert 'page' in data['pagination']

        data = data['results']
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
        assert isinstance(data, list)
        assert len(data) == count

        if count > 0:
            self.assert_upload(json.dumps(data[0]), **kwargs)

    def assert_upload(self, upload_json_str, id=None, **kwargs):
        data = json.loads(upload_json_str)
        assert 'upload_id' in data
        if id is not None:
            assert id == data['upload_id']
        assert 'create_time' in data

        for key, value in kwargs.items():
            assert data.get(key, None) == value

        return data

    def assert_processing(self, client, test_user_auth, upload_id):
        upload_endpoint = '/uploads/%s' % upload_id

        # poll until completed
235
        upload = self.block_until_completed(client, upload_id, test_user_auth)
236
237

        assert len(upload['tasks']) == 4
238
        assert upload['tasks_status'] == SUCCESS
239
        assert upload['current_task'] == 'cleanup'
240
        assert not upload['process_running']
241

242
243
        calcs = upload['calcs']['results']
        for calc in calcs:
244
            assert calc['tasks_status'] == SUCCESS
245
246
            assert calc['current_task'] == 'archiving'
            assert len(calc['tasks']) == 3
247
            assert client.get('/archive/logs/%s/%s' % (calc['upload_id'], calc['calc_id']), headers=test_user_auth).status_code == 200
248
249

        if upload['calcs']['pagination']['total'] > 1:
250
            rv = client.get('%s?page=2&per_page=1&order_by=tasks_status' % upload_endpoint, headers=test_user_auth)
251
252
253
254
            assert rv.status_code == 200
            upload = self.assert_upload(rv.data)
            assert len(upload['calcs']['results']) == 1

255
256
        upload_with_metadata = get_upload_with_metadata(upload)
        assert_upload_files(upload_with_metadata, files.StagingUploadFiles)
257
        assert_search_upload(upload_with_metadata, additional_keys=['atoms', 'system'])
258

259
    def assert_published(self, client, test_user_auth, upload_id, proc_infra, with_coe_repo=True, metadata={}, publish_with_metadata: bool = True):
260
261
        rv = client.get('/uploads/%s' % upload_id, headers=test_user_auth)
        upload = self.assert_upload(rv.data)
262
263

        upload_with_metadata = get_upload_with_metadata(upload)
264

265
266
267
        rv = client.post(
            '/uploads/%s' % upload_id,
            headers=test_user_auth,
268
            data=json.dumps(dict(operation='publish', metadata=metadata if publish_with_metadata else {})),
269
            content_type='application/json')
270
        assert rv.status_code == 200
271
        upload = self.assert_upload(rv.data)
272
        assert upload['current_process'] == 'publish_upload'
273
        assert upload['process_running']
274

275
        additional_keys = ['with_embargo']
276
        if with_coe_repo:
277
            additional_keys.append('pid')
278

279
280
281
282
283
284
285
        self.block_until_completed(client, upload_id, test_user_auth)
        upload_proc = Upload.objects(upload_id=upload_id).first()
        assert upload_proc is not None
        assert upload_proc.published is True

        if with_coe_repo:
            assert_coe_upload(upload_with_metadata.upload_id, user_metadata=metadata)
286
        assert_upload_files(upload_with_metadata, files.PublicUploadFiles, published=True)
287
288
289
        assert_search_upload(upload_with_metadata, additional_keys=additional_keys, published=True)

    def block_until_completed(self, client, upload_id: str, test_user_auth):
290
291
292
293
294
        while True:
            time.sleep(0.1)
            rv = client.get('/uploads/%s' % upload_id, headers=test_user_auth)
            if rv.status_code == 200:
                upload = self.assert_upload(rv.data)
295
296
                if not upload['process_running'] and not upload['tasks_running']:
                    return upload
297
            elif rv.status_code == 404:
298
                return None
299
300
301
302
            else:
                raise Exception(
                    'unexpected status code while blocking for upload processing: %s' %
                    str(rv.status_code))
303
304
305

    def assert_upload_does_not_exist(self, client, upload_id: str, test_user_auth):
        self.block_until_completed(client, upload_id, test_user_auth)
306

307
308
309
310
311
312
        rv = client.get('/uploads/%s' % upload_id, headers=test_user_auth)
        assert rv.status_code == 404
        assert Upload.objects(upload_id=upload_id).first() is None
        assert Calc.objects(upload_id=upload_id).count() is 0
        upload_files = UploadFiles.get(upload_id)
        assert upload_files is None or isinstance(upload_files, PublicUploadFiles)
Markus Scheidgen's avatar
Markus Scheidgen committed
313

314
315
316
317
318
319
320
    def test_get_command(self, client, test_user_auth, no_warn):
        rv = client.get('/uploads/command', headers=test_user_auth)
        assert rv.status_code == 200
        data = json.loads(rv.data)
        assert 'upload_command' in data
        assert 'upload_url' in data

321
322
    def test_get_empty(self, client, test_user_auth, no_warn):
        rv = client.get('/uploads/', headers=test_user_auth)
Markus Scheidgen's avatar
Markus Scheidgen committed
323

324
325
        assert rv.status_code == 200
        self.assert_uploads(rv.data, count=0)
Markus Scheidgen's avatar
Markus Scheidgen committed
326

327
328
329
    def test_get_not_existing(self, client, test_user_auth, no_warn):
        rv = client.get('/uploads/123456789012123456789012', headers=test_user_auth)
        assert rv.status_code == 404
330

331
332
    @pytest.mark.parametrize('mode', ['multipart', 'stream', 'local_path'])
    @pytest.mark.parametrize('name', [None, 'test_name'])
Markus Scheidgen's avatar
Markus Scheidgen committed
333
    def test_put(self, client, test_user_auth, proc_infra, example_upload, mode, name, no_warn):
334
        file = example_upload
335
336
337
338
339
340
341
        if name:
            url = '/uploads/?name=%s' % name
        else:
            url = '/uploads/'

        if mode == 'multipart':
            rv = client.put(
342
343
344
                url, data=dict(file=(open(file, 'rb'), 'the_name')), headers=test_user_auth)
            if not name:
                name = 'the_name'
345
346
347
348
349
350
351
352
353
        elif mode == 'stream':
            with open(file, 'rb') as f:
                rv = client.put(url, data=f.read(), headers=test_user_auth)
        elif mode == 'local_path':
            url += '&' if name else '?'
            url += 'local_path=%s' % file
            rv = client.put(url, headers=test_user_auth)
        else:
            assert False
354

355
356
        assert rv.status_code == 200
        if mode == 'local_path':
357
            upload = self.assert_upload(rv.data, upload_path=file, name=name)
358
359
        else:
            upload = self.assert_upload(rv.data, name=name)
360
        assert upload['tasks_running']
361

362
        self.assert_processing(client, test_user_auth, upload['upload_id'])
363

364
365
366
367
368
369
370
371
    def test_upload_limit(self, client, mongo, test_user, test_user_auth, proc_infra):
        for _ in range(0, config.services.upload_limit):
            Upload.create(user=test_user)
        file = example_file
        rv = client.put('/uploads/?local_path=%s' % file, headers=test_user_auth)
        assert rv.status_code == 400
        assert Upload.user_uploads(test_user).count() == config.services.upload_limit

372
373
374
    def test_delete_not_existing(self, client, test_user_auth, no_warn):
        rv = client.delete('/uploads/123456789012123456789012', headers=test_user_auth)
        assert rv.status_code == 404
375

376
377
378
379
380
381
382
383
384
385
386
387
    @pytest.fixture(scope='function')
    def slow_processing(self, monkeypatch):
        old_cleanup = Upload.cleanup

        def slow_cleanup(self):
            time.sleep(0.5)
            old_cleanup(self)

        monkeypatch.setattr('nomad.processing.data.Upload.cleanup', slow_cleanup)
        yield True
        monkeypatch.setattr('nomad.processing.data.Upload.cleanup', old_cleanup)

388
    def test_delete_published(self, client, test_user_auth, proc_infra, no_warn, with_publish_to_coe_repo):
389
390
391
        rv = client.put('/uploads/?local_path=%s' % example_file, headers=test_user_auth)
        upload = self.assert_upload(rv.data)
        self.assert_processing(client, test_user_auth, upload['upload_id'])
392
        self.assert_published(client, test_user_auth, upload['upload_id'], proc_infra, with_coe_repo=with_publish_to_coe_repo)
393
        rv = client.delete('/uploads/%s' % upload['upload_id'], headers=test_user_auth)
394
        assert rv.status_code == 400
395

Markus Scheidgen's avatar
Markus Scheidgen committed
396
    def test_delete(self, client, test_user_auth, proc_infra, no_warn):
397
398
399
400
401
        rv = client.put('/uploads/?local_path=%s' % example_file, headers=test_user_auth)
        upload = self.assert_upload(rv.data)
        self.assert_processing(client, test_user_auth, upload['upload_id'])
        rv = client.delete('/uploads/%s' % upload['upload_id'], headers=test_user_auth)
        assert rv.status_code == 200
402
        self.assert_upload_does_not_exist(client, upload['upload_id'], test_user_auth)
403

404
405
406
407
408
409
410
411
412
413
414
415
416
417
    def test_post_empty(self, client, test_user_auth, empty_upload, proc_infra, no_warn):
        rv = client.put('/uploads/?local_path=%s' % empty_upload, headers=test_user_auth)
        assert rv.status_code == 200
        upload = self.assert_upload(rv.data)
        self.assert_processing(client, test_user_auth, upload['upload_id'])
        rv = client.post(
            '/uploads/%s' % upload['upload_id'], headers=test_user_auth,
            data=json.dumps(dict(operation='publish')),
            content_type='application/json')
        assert rv.status_code == 400

    def test_post(self, client, test_user_auth, non_empty_example_upload, proc_infra, no_warn, with_publish_to_coe_repo):
        rv = client.put('/uploads/?local_path=%s' % non_empty_example_upload, headers=test_user_auth)
        assert rv.status_code == 200
418
419
        upload = self.assert_upload(rv.data)
        self.assert_processing(client, test_user_auth, upload['upload_id'])
420
        self.assert_published(client, test_user_auth, upload['upload_id'], proc_infra, with_coe_repo=with_publish_to_coe_repo)
421

422
423
424
        # still visible
        assert client.get('/uploads/%s' % upload['upload_id'], headers=test_user_auth).status_code == 200
        # still listed with all=True
425
        rv = client.get('/uploads/?state=all', headers=test_user_auth)
426
        assert rv.status_code == 200
427
        data = json.loads(rv.data)['results']
428
429
430
431
432
        assert len(data) > 0
        assert any(item['upload_id'] == upload['upload_id'] for item in data)
        # not listed with all=False
        rv = client.get('/uploads/', headers=test_user_auth)
        assert rv.status_code == 200
433
        data = json.loads(rv.data)['results']
434
435
        assert not any(item['upload_id'] == upload['upload_id'] for item in data)

436
437
    def test_post_metadata(
            self, client, proc_infra, admin_user_auth, test_user_auth, test_user,
438
            other_test_user, no_warn, example_user_metadata):
439
440
441
        rv = client.put('/uploads/?local_path=%s' % example_file, headers=test_user_auth)
        upload = self.assert_upload(rv.data)
        self.assert_processing(client, test_user_auth, upload['upload_id'])
442
        metadata = dict(**example_user_metadata)
443
        metadata['_upload_time'] = datetime.datetime.utcnow().isoformat()
444
        self.assert_published(client, admin_user_auth, upload['upload_id'], proc_infra, metadata)
445

Markus Scheidgen's avatar
Markus Scheidgen committed
446
    def test_post_metadata_forbidden(self, client, proc_infra, test_user_auth, no_warn):
447
448
449
450
451
452
        rv = client.put('/uploads/?local_path=%s' % example_file, headers=test_user_auth)
        upload = self.assert_upload(rv.data)
        self.assert_processing(client, test_user_auth, upload['upload_id'])
        rv = client.post(
            '/uploads/%s' % upload['upload_id'],
            headers=test_user_auth,
453
            data=json.dumps(dict(operation='publish', metadata=dict(_pid=256))),
454
455
456
            content_type='application/json')
        assert rv.status_code == 401

457
458
459
460
461
462
463
    def test_post_metadata_and_republish(
            self, client, proc_infra, admin_user_auth, test_user_auth, test_user,
            other_test_user, no_warn, example_user_metadata):
        rv = client.put('/uploads/?local_path=%s' % example_file, headers=test_user_auth)
        upload = self.assert_upload(rv.data)
        self.assert_processing(client, test_user_auth, upload['upload_id'])
        metadata = dict(**example_user_metadata)
464
        metadata['_upload_time'] = datetime.datetime.utcnow().isoformat()
465
466
467
        self.assert_published(client, admin_user_auth, upload['upload_id'], proc_infra, metadata)
        self.assert_published(client, admin_user_auth, upload['upload_id'], proc_infra, metadata, publish_with_metadata=False)

468
469
470
471
472
473
474
475
476
477
478
479
480
481
    def test_post_re_process(self, client, published, test_user_auth, monkeypatch):
        monkeypatch.setattr('nomad.config.version', 're_process_test_version')
        monkeypatch.setattr('nomad.config.commit', 're_process_test_commit')

        upload_id = published.upload_id
        rv = client.post(
            '/uploads/%s' % upload_id,
            headers=test_user_auth,
            data=json.dumps(dict(operation='re-process')),
            content_type='application/json')

        assert rv.status_code == 200
        assert self.block_until_completed(client, upload_id, test_user_auth) is not None

482
    # TODO validate metadata (or all input models in API for that matter)
483
    # def test_post_bad_metadata(self, client, proc_infra, test_user_auth, postgres):
484
485
486
487
488
489
    #     rv = client.put('/uploads/?local_path=%s' % example_file, headers=test_user_auth)
    #     upload = self.assert_upload(rv.data)
    #     self.assert_processing(client, test_user_auth, upload['upload_id'])
    #     rv = client.post(
    #         '/uploads/%s' % upload['upload_id'],
    #         headers=test_user_auth,
490
    #         data=json.dumps(dict(operation='publish', metadata=dict(doesnotexist='hi'))),
491
492
493
    #         content_type='application/json')
    #     assert rv.status_code == 400

494
    def test_potcar(self, client, proc_infra, test_user_auth):
495
        # only the owner, shared with people are supposed to download the original potcar file
496
497
498
499
500
501
502
503
504
505
506
507
508
509
        example_file = 'tests/data/proc/examples_potcar.zip'
        rv = client.put('/uploads/?local_path=%s' % example_file, headers=test_user_auth)

        upload = self.assert_upload(rv.data)
        upload_id = upload['upload_id']
        self.assert_processing(client, test_user_auth, upload_id)
        self.assert_published(client, test_user_auth, upload_id, proc_infra, with_coe_repo=True)
        rv = client.get('/raw/%s/examples_potcar/POTCAR' % upload_id)
        assert rv.status_code == 401
        rv = client.get('/raw/%s/examples_potcar/POTCAR' % upload_id, headers=test_user_auth)
        assert rv.status_code == 200
        rv = client.get('/raw/%s/examples_potcar/POTCAR.stripped' % upload_id)
        assert rv.status_code == 200

510

Markus Scheidgen's avatar
Markus Scheidgen committed
511
512
513
today = datetime.datetime.utcnow().date()


514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
class UploadFilesBasedTests:

    @staticmethod
    def fix_signature(func, wrapper):
        additional_args = list(inspect.signature(func).parameters.values())[4:]
        wrapper_sig = inspect.signature(wrapper)
        wrapper_args = list(wrapper_sig.parameters.values())[:3] + additional_args
        wrapper_sig = wrapper_sig.replace(parameters=tuple(wrapper_args))
        wrapper.__signature__ = wrapper_sig

    @staticmethod
    def check_authorizaton(func):
        @pytest.mark.parametrize('test_data', [
            [True, None, True],     # in staging for upload
            [True, None, False],    # in staging for different user
            [True, None, None],     # in staging for guest
            [False, True, True],    # in public, restricted for uploader
            [False, True, False],   # in public, restricted for different user
            [False, True, None],    # in public, restricted for guest
            [False, False, True],   # in public, public, for uploader
            [False, False, False],  # in public, public, for different user
            [False, False, None]    # in public, public, for guest
        ], indirect=True)
        def wrapper(self, client, test_data, *args, **kwargs):
            upload, authorized, auth_headers = test_data
            try:
                func(self, client, upload, auth_headers, *args, **kwargs)
            except AssertionError as assertion:
                assertion_str = str(assertion)
                if not authorized:
                    if '0 == 5' in assertion_str and 'ZipFile' in assertion_str:
                        # the user is not authorized an gets an empty zip as expected
                        return
                    if '401' in assertion_str:
                        # the user is not authorized and gets a 401 as expected
                        return
                raise assertion

            if not authorized:
                assert False
        UploadFilesBasedTests.fix_signature(func, wrapper)
        return wrapper

    @staticmethod
    def ignore_authorization(func):
        @pytest.mark.parametrize('test_data', [
            [True, None, True],      # in staging
            [False, False, None],    # in public
        ], indirect=True)
        def wrapper(self, client, test_data, *args, **kwargs):
            upload, _, auth_headers = test_data
            func(self, client, upload, auth_headers, *args, **kwargs)
        UploadFilesBasedTests.fix_signature(func, wrapper)
        return wrapper
568

569
    @pytest.fixture(scope='function')
570
    def test_data(self, request, postgres, mongo, raw_files, no_warn, test_user, other_test_user):
571
572
573
574
575
        # delete potential old test files
        for _ in [0, 1]:
            upload_files = UploadFiles.get('test_upload')
            if upload_files:
                upload_files.delete()
576

577
        in_staging, restricted, for_uploader = request.param
578

579
580
581
582
        if in_staging:
            authorized = for_uploader
        else:
            authorized = not restricted or for_uploader
583

584
585
586
587
588
589
        if for_uploader:
            auth_headers = create_auth_headers(test_user)
        elif for_uploader is False:
            auth_headers = create_auth_headers(other_test_user)
        else:
            auth_headers = None
590

591
592
        calc_specs = 'r' if restricted else 'p'
        if in_staging:
593
            Upload.create(user=test_user, upload_id='test_upload')
594
            _, upload_files = create_staging_upload('test_upload', calc_specs=calc_specs)
595
        else:
596
            _, upload_files = create_public_upload('test_upload', calc_specs=calc_specs)
597
            postgres.begin()
598
599
600
            coe_upload = coe_repo.Upload(
                upload_name='test_upload',
                user_id=test_user.user_id, is_processed=True)
601
602
            postgres.add(coe_upload)
            postgres.commit()
603

604
        yield 'test_upload', authorized, auth_headers
605

606
        upload_files.delete()
607
608


609
610
611
612
class TestArchive(UploadFilesBasedTests):
    @UploadFilesBasedTests.check_authorizaton
    def test_get(self, client, upload, auth_headers):
        rv = client.get('/archive/%s/0' % upload, headers=auth_headers)
613
        assert rv.status_code == 200
614
        assert json.loads(rv.data) is not None
615

616
617
618
619
620
621
    @UploadFilesBasedTests.ignore_authorization
    def test_get_signed(self, client, upload, _, test_user_signature_token):
        rv = client.get('/archive/%s/0?token=%s' % (upload, test_user_signature_token))
        assert rv.status_code == 200
        assert json.loads(rv.data) is not None

622
623
624
    @UploadFilesBasedTests.check_authorizaton
    def test_get_calc_proc_log(self, client, upload, auth_headers):
        rv = client.get('/archive/logs/%s/0' % upload, headers=auth_headers)
625
        assert rv.status_code == 200
626
        assert len(rv.data) > 0
627

628
629
630
631
632
633
    @UploadFilesBasedTests.ignore_authorization
    def test_get_calc_proc_log_signed(self, client, upload, _, test_user_signature_token):
        rv = client.get('/archive/logs/%s/0?token=%s' % (upload, test_user_signature_token))
        assert rv.status_code == 200
        assert len(rv.data) > 0

634
635
636
    @UploadFilesBasedTests.ignore_authorization
    def test_get_non_existing_archive(self, client, upload, auth_headers):
        rv = client.get('/archive/%s' % 'doesnt/exist', headers=auth_headers)
637
        assert rv.status_code == 404
Markus Scheidgen's avatar
Markus Scheidgen committed
638

639
640
641
642
643
644
645
    @pytest.mark.parametrize('info', [
        'all.nomadmetainfo.json',
        'all.experimental.nomadmetainfo.json',
        'vasp.nomadmetainfo.json',
        'mpes.nomadmetainfo.json'])
    def test_get_metainfo(self, client, info):
        rv = client.get('/archive/metainfo/%s' % info)
646
        assert rv.status_code == 200
647
648
        metainfo = json.loads((rv.data))
        assert len(metainfo) > 0
649

Markus Scheidgen's avatar
Markus Scheidgen committed
650

651
class TestRepo():
652
653
654
655
656
657
    @pytest.fixture(scope='class')
    def example_elastic_calcs(
            self, elastic_infra, normalized: parsing.LocalBackend,
            test_user: coe_repo.User, other_test_user: coe_repo.User):
        clear_elastic(elastic_infra)

Markus Scheidgen's avatar
Markus Scheidgen committed
658
        calc_with_metadata = CalcWithMetadata(upload_id=0, calc_id=0, upload_time=today)
659
        calc_with_metadata.files = ['test/mainfile.txt']
660
        calc_with_metadata.apply_domain_metadata(normalized)
661

Markus Scheidgen's avatar
Markus Scheidgen committed
662
663
        calc_with_metadata.update(
            calc_id='1', uploader=test_user.to_popo(), published=True, with_embargo=False)
664
665
        search.Entry.from_calc_with_metadata(calc_with_metadata).save(refresh=True)

Markus Scheidgen's avatar
Markus Scheidgen committed
666
667
        calc_with_metadata.update(
            calc_id='2', uploader=other_test_user.to_popo(), published=True, with_embargo=False,
Markus Scheidgen's avatar
Markus Scheidgen committed
668
            upload_time=today - datetime.timedelta(days=5))
Markus Scheidgen's avatar
Markus Scheidgen committed
669
670
        calc_with_metadata.update(
            atoms=['Fe'], comment='this is a specific word', formula='AAA', basis_set='zzz')
671
672
        search.Entry.from_calc_with_metadata(calc_with_metadata).save(refresh=True)

Markus Scheidgen's avatar
Markus Scheidgen committed
673
674
        calc_with_metadata.update(
            calc_id='3', uploader=other_test_user.to_popo(), published=False, with_embargo=False)
675
676
        search.Entry.from_calc_with_metadata(calc_with_metadata).save(refresh=True)

Markus Scheidgen's avatar
Markus Scheidgen committed
677
678
        calc_with_metadata.update(
            calc_id='4', uploader=other_test_user.to_popo(), published=True, with_embargo=True)
679
680
        search.Entry.from_calc_with_metadata(calc_with_metadata).save(refresh=True)

681
    def assert_search(self, rv: Any, number_of_calcs: int) -> dict:
682
683
        if rv.status_code != 200:
            print(rv.data)
684
        assert rv.status_code == 200
685

686
687
688
689
690
691
692
693
694
        data = json.loads(rv.data)

        results = data.get('results', None)
        assert results is not None
        assert isinstance(results, list)
        assert len(results) == number_of_calcs

        return data

695
696
    def test_own_calc(self, client, example_elastic_calcs, no_warn, test_user_auth):
        rv = client.get('/repo/0/1', headers=test_user_auth)
697
698
        assert rv.status_code == 200

699
700
701
702
703
704
705
706
    def test_public_calc(self, client, example_elastic_calcs, no_warn, other_test_user_auth):
        rv = client.get('/repo/0/1', headers=other_test_user_auth)
        assert rv.status_code == 200

    def test_embargo_calc(self, client, example_elastic_calcs, no_warn, test_user_auth):
        rv = client.get('/repo/0/4', headers=test_user_auth)
        assert rv.status_code == 401

707
708
709
710
    def test_own_embargo_calc(self, client, example_elastic_calcs, no_warn, other_test_user_auth):
        rv = client.get('/repo/0/4', headers=other_test_user_auth)
        assert rv.status_code == 200

711
712
713
714
    def test_staging_calc(self, client, example_elastic_calcs, no_warn, test_user_auth):
        rv = client.get('/repo/0/3', headers=test_user_auth)
        assert rv.status_code == 401

715
716
717
718
    def test_own_staging_calc(self, client, example_elastic_calcs, no_warn, other_test_user_auth):
        rv = client.get('/repo/0/3', headers=other_test_user_auth)
        assert rv.status_code == 200

719
720
    def test_non_existing_calcs(self, client, example_elastic_calcs, test_user_auth):
        rv = client.get('/repo/0/10', headers=test_user_auth)
721
722
        assert rv.status_code == 404

723
724
725
    @pytest.mark.parametrize('calcs, owner, auth', [
        (2, 'all', 'none'),
        (2, 'all', 'test_user'),
726
        (4, 'all', 'other_test_user'),
727
        (1, 'user', 'test_user'),
728
        (3, 'user', 'other_test_user'),
729
        (0, 'staging', 'test_user'),
730
        (1, 'staging', 'other_test_user')
731
    ])
732
    def test_search_owner(self, client, example_elastic_calcs, no_warn, test_user_auth, other_test_user_auth, calcs, owner, auth):
733
734
        auth = dict(none=None, test_user=test_user_auth, other_test_user=other_test_user_auth).get(auth)
        rv = client.get('/repo/?owner=%s' % owner, headers=auth)
735
        data = self.assert_search(rv, calcs)
736
737
738
739
740
        results = data.get('results', None)
        if calcs > 0:
            for key in ['uploader', 'calc_id', 'formula', 'upload_id']:
                assert key in results[0]

Markus Scheidgen's avatar
Markus Scheidgen committed
741
    @pytest.mark.parametrize('calcs, start, end', [
Markus Scheidgen's avatar
Markus Scheidgen committed
742
743
744
745
746
747
        (2, today - datetime.timedelta(days=6), today),
        (2, today - datetime.timedelta(days=5), today),
        (1, today - datetime.timedelta(days=4), today),
        (1, today, today),
        (1, today - datetime.timedelta(days=6), today - datetime.timedelta(days=5)),
        (0, today - datetime.timedelta(days=7), today - datetime.timedelta(days=6)),
Markus Scheidgen's avatar
Markus Scheidgen committed
748
        (2, None, None),
Markus Scheidgen's avatar
Markus Scheidgen committed
749
750
        (1, today, None),
        (2, None, today)
Markus Scheidgen's avatar
Markus Scheidgen committed
751
752
753
754
755
756
757
758
759
760
761
762
763
    ])
    def test_search_time(self, client, example_elastic_calcs, no_warn, calcs, start, end):
        query_string = ''
        if start is not None:
            query_string = 'from_time=%s' % rfc3339DateTime.format(start)
        if end is not None:
            if query_string != '':
                query_string += '&'
            query_string += 'until_time=%s' % rfc3339DateTime.format(end)
        if query_string != '':
            query_string = '?%s' % query_string

        rv = client.get('/repo/%s' % query_string)
764
        self.assert_search(rv, calcs)
Markus Scheidgen's avatar
Markus Scheidgen committed
765

766
    @pytest.mark.parametrize('calcs, quantity, value', [
767
768
        (2, 'system', 'bulk'),
        (0, 'system', 'atom'),
769
770
        (1, 'atoms', 'Br'),
        (1, 'atoms', 'Fe'),
771
        (0, 'atoms', ['Fe', 'Br', 'A', 'B']),
772
773
        (0, 'only_atoms', ['Br', 'Si']),
        (1, 'only_atoms', ['Fe']),
774
775
        (1, 'only_atoms', ['Br', 'K', 'Si']),
        (1, 'only_atoms', ['Br', 'Si', 'K']),
776
777
778
779
780
781
782
783
784
        (1, 'comment', 'specific'),
        (1, 'authors', 'Hofstadter, Leonard'),
        (2, 'files', 'test/mainfile.txt'),
        (2, 'paths', 'mainfile.txt'),
        (2, 'paths', 'test'),
        (2, 'quantities', ['wyckoff_letters_primitive', 'hall_number']),
        (0, 'quantities', 'dos')
    ])
    def test_search_quantities(self, client, example_elastic_calcs, no_warn, test_user_auth, calcs, quantity, value):
785
        query_string = urlencode({quantity: value}, doseq=True)
786

787
        rv = client.get('/repo/?%s' % query_string, headers=test_user_auth)
788
        logger.debug('run search quantities test', query_string=query_string)
789
        data = self.assert_search(rv, calcs)
790

791
792
        quantities = data.get('quantities', None)
        assert quantities is not None
793
        if quantity == 'system' and calcs != 0:
794
795
796
797
            # for simplicity we only assert on quantities for this case
            assert 'system' in quantities
            assert len(quantities['system']) == 1
            assert value in quantities['system']
798

799
800
    metrics_permutations = [[], search.metrics_names] + [[metric] for metric in search.metrics_names]

801
802
803
804
805
806
807
808
809
810
811
    def test_search_admin(self, client, example_elastic_calcs, no_warn, admin_user_auth):
        rv = client.get('/repo/?owner=admin', headers=admin_user_auth)
        self.assert_search(rv, 4)

    def test_search_admin_auth(self, client, example_elastic_calcs, no_warn, test_user_auth):
        rv = client.get('/repo/?owner=admin', headers=test_user_auth)
        assert rv.status_code == 401

        rv = client.get('/repo/?owner=admin')
        assert rv.status_code == 401

812
    @pytest.mark.parametrize('metrics', metrics_permutations)
Markus Scheidgen's avatar
Markus Scheidgen committed
813
    def test_search_total_metrics(self, client, example_elastic_calcs, no_warn, metrics):
814
815
        rv = client.get('/repo/?%s' % urlencode(dict(metrics=metrics), doseq=True))
        assert rv.status_code == 200, str(rv.data)
Markus Scheidgen's avatar
Markus Scheidgen committed
816
        data = json.loads(rv.data)
817
818
819
        total_metrics = data.get('quantities', {}).get('total', {}).get('all', None)
        assert total_metrics is not None
        assert 'code_runs' in total_metrics
Markus Scheidgen's avatar
Markus Scheidgen committed
820
        for metric in metrics:
821
            assert metric in total_metrics
Markus Scheidgen's avatar
Markus Scheidgen committed
822

823
    @pytest.mark.parametrize('metrics', metrics_permutations)
Markus Scheidgen's avatar
Markus Scheidgen committed
824
    def test_search_aggregation_metrics(self, client, example_elastic_calcs, no_warn, metrics):
825
        rv = client.get('/repo/?%s' % urlencode(dict(metrics=metrics), doseq=True))
Markus Scheidgen's avatar
Markus Scheidgen committed
826
827
        assert rv.status_code == 200
        data = json.loads(rv.data)
828
829
        for name, quantity in data.get('quantities').items():
            for metrics_result in quantity.values():
Markus Scheidgen's avatar
Markus Scheidgen committed
830
                assert 'code_runs' in metrics_result
831
832
833
834
835
                if name != 'authors':
                    for metric in metrics:
                        assert metric in metrics_result
                else:
                    assert len(metrics_result) == 1  # code_runs is the only metric for authors
Markus Scheidgen's avatar
Markus Scheidgen committed
836

837
838
839
840
841
842
843
844
    def test_search_date_histogram(self, client, example_elastic_calcs, no_warn):
        rv = client.get('/repo/?date_histogram=true&metrics=total_energies')
        assert rv.status_code == 200
        data = json.loads(rv.data)
        histogram = data.get('quantities').get('date_histogram')
        print(histogram)
        assert len(histogram) > 0

845
846
847
    @pytest.mark.parametrize('n_results, page, per_page', [(2, 1, 5), (1, 1, 1), (0, 2, 3)])
    def test_search_pagination(self, client, example_elastic_calcs, no_warn, n_results, page, per_page):
        rv = client.get('/repo/?page=%d&per_page=%d' % (page, per_page))
848
849
850
        assert rv.status_code == 200
        data = json.loads(rv.data)
        results = data.get('results', None)
851
        assert data['pagination']['total'] == 2
852
        assert results is not None
853
        assert len(results) == n_results
854

855
856
    @pytest.mark.parametrize('first, order_by, order', [
        ('1', 'formula', -1), ('2', 'formula', 1),
857
858
        ('2', 'basis_set', -1), ('1', 'basis_set', 1),
        (None, 'authors', -1)])
859
860
861
862
863
864
865
    def test_search_order(self, client, example_elastic_calcs, no_warn, first, order_by, order):
        rv = client.get('/repo/?order_by=%s&order=%d' % (order_by, order))
        assert rv.status_code == 200
        data = json.loads(rv.data)
        results = data.get('results', None)
        assert data['pagination']['total'] == 2
        assert len(results) == 2
866
867
        if first is not None:
            assert results[0]['calc_id'] == first
868

869
870
871
872
873
874
875
876
877
878
    @pytest.mark.parametrize('n_results, size', [(2, None), (2, 5), (1, 1)])
    def test_search_scroll(self, client, example_elastic_calcs, no_warn, n_results, size):
        if size is not None:
            rv = client.get('/repo/?scroll=1,&per_page=%d' % size)
        else:
            rv = client.get('/repo/?scroll=1')

        assert rv.status_code == 200
        data = json.loads(rv.data)
        results = data.get('results', None)
879
        assert data.get('scroll', {}).get('size', -1) > 0
880
881
        assert results is not None
        assert len(results) == n_results
882
        scroll_id = data.get('scroll', {}).get('scroll_id', None)
883
884
885
886
887
888
        assert scroll_id is not None

        has_another_page = False
        while scroll_id is not None:
            rv = client.get('/repo/?scroll=1&scroll_id=%s' % scroll_id)
            data = json.loads(rv.data)
889
            scroll_id = data.get('scroll', {}).get('scroll_id', None)
890
891
892
893
894
            has_another_page |= len(data.get('results')) > 0

        if n_results < 2:
            assert has_another_page

895
896
897
    def test_search_user_authrequired(self, client, example_elastic_calcs, no_warn):
        rv = client.get('/repo/?owner=user')
        assert rv.status_code == 401
898

899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
    @pytest.mark.parametrize('calcs, quantity, value', [
        (2, 'system', 'bulk'),
        (0, 'system', 'atom'),
        (1, 'atoms', 'Br'),
        (1, 'atoms', 'Fe'),
        (1, 'authors', 'Hofstadter, Leonard'),
        (2, 'files', 'test/mainfile.txt'),
        (0, 'quantities', 'dos')
    ])
    def test_quantity_search(self, client, example_elastic_calcs, no_warn, test_user_auth, calcs, quantity, value):
        rv = client.get('/repo/%s' % quantity, headers=test_user_auth)
        assert rv.status_code == 200
        data = json.loads(rv.data)

        quantities = data['quantities']
        assert quantity in quantities
        values = quantities[quantity]['values']
        assert (value in values) == (calcs > 0)
        assert values.get(value, 0) == calcs

    def test_quantity_search_after(self, client, example_elastic_calcs, no_warn, test_user_auth):
        rv = client.get('/repo/atoms?size=1')
        assert rv.status_code == 200
        data = json.loads(rv.data)

        quantity = data['quantities']['atoms']
        assert 'after' in quantity
        after = quantity['after']
        assert len(quantity['values']) == 1
        value = list(quantity['values'].keys())[0]

        while True:
            rv = client.get('/repo/atoms?size=1&after=%s' % after)
            assert rv.status_code == 200
            data = json.loads(rv.data)

            quantity = data['quantities']['atoms']

            if 'after' not in quantity:
                assert len(quantity['values']) == 0
                break

            assert len(quantity['values']) == 1
            assert value != list(quantity['values'].keys())[0]
            assert after != quantity['after']
            after = quantity['after']

946

947
class TestRaw(UploadFilesBasedTests):
Markus Scheidgen's avatar
Markus Scheidgen committed
948

949
950
951
952
953
954
955
956
957
958
959
960
961
962
    def test_raw_file_from_calc(self, client, non_empty_processed, test_user_auth):
        calc = list(non_empty_processed.calcs)[0]
        url = '/raw/calc/%s/%s/%s' % (
            non_empty_processed.upload_id, calc.calc_id, os.path.basename(calc.mainfile))
        rv = client.get(url, headers=test_user_auth)
        assert rv.status_code == 200
        assert len(rv.data) > 0

        url = '/raw/calc/%s/%s/' % (non_empty_processed.upload_id, calc.calc_id)
        rv = client.get(url, headers=test_user_auth)
        assert rv.status_code == 200
        result = json.loads(rv.data)
        assert len(result['contents']) > 0

963
964
    @UploadFilesBasedTests.check_authorizaton
    def test_raw_file(self, client, upload, auth_headers):
965
        url = '/raw/%s/%s' % (upload, example_file_mainfile)
966
        rv = client.get(url, headers=auth_headers)
967
968
969
        assert rv.status_code == 200
        assert len(rv.data) > 0

970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
    @UploadFilesBasedTests.check_authorizaton
    def test_raw_file_partial(self, client, upload, auth_headers):
        url = '/raw/%s/%s?offset=0&length=20' % (upload, example_file_mainfile)
        rv = client.get(url, headers=auth_headers)
        assert rv.status_code == 200
        start_data = rv.data
        assert len(start_data) == 20

        url = '/raw/%s/%s?offset=10&length=10' % (upload, example_file_mainfile)
        rv = client.get(url, headers=auth_headers)
        assert rv.status_code == 200
        next_data = rv.data
        assert len(rv.data) == 10
        assert start_data[10:] == next_data

985
986
987
988
989
990
991
    @UploadFilesBasedTests.ignore_authorization
    def test_raw_file_signed(self, client, upload, _, test_user_signature_token):
        url = '/raw/%s/%s?token=%s' % (upload, example_file_mainfile, test_user_signature_token)
        rv = client.get(url)
        assert rv.status_code == 200
        assert len(rv.data) > 0

992
993
    @UploadFilesBasedTests.ignore_authorization
    def test_raw_file_missing_file(self, client, upload, auth_headers):
994
        url = '/raw/%s/does/not/exist' % upload
995
        rv = client.get(url, headers=auth_headers)
996
        assert rv.status_code == 404
997
998
999
        data = json.loads(rv.data)
        assert 'files' not in data

1000
    @pytest.mark.parametrize('compress', [True, False])
1001
1002
    @UploadFilesBasedTests.ignore_authorization
    def test_raw_file_wildcard(self, client, upload, auth_headers, compress):
1003
        url = '/raw/%s/examples*' % upload
1004
1005
        if compress:
            url = '%s?compress=1' % url
1006
        rv = client.get(url, headers=auth_headers)
1007
1008
1009
1010
1011
1012
1013

        assert rv.status_code == 200
        assert len(rv.data) > 0
        with zipfile.ZipFile(io.BytesIO(rv.data)) as zip_file:
            assert zip_file.testzip() is None
            assert len(zip_file.namelist()) == len(example_file_contents)

1014
1015
1016
1017
    @UploadFilesBasedTests.ignore_authorization
    def test_raw_file_wildcard_missing(self, client, upload, auth_headers):
        url = '/raw/%s/does/not/exist*' % upload
        rv = client.get(url, headers=auth_headers)
1018
        assert rv.status_code == 404
1019

1020
1021
    @UploadFilesBasedTests.ignore_authorization
    def test_raw_file_missing_upload(self, client, upload, auth_headers):
1022
        url = '/raw/doesnotexist/%s' % example_file_mainfile
1023
        rv = client.get(url, headers=auth_headers)
1024
1025
        assert rv.status_code == 404

1026
    @pytest.mark.parametrize('compress', [True, False])
1027
1028
    @UploadFilesBasedTests.check_authorizaton
    def test_raw_files(self, client, upload, auth_headers, compress):
1029
        url = '/raw/%s?files=%s' % (
1030
            upload, ','.join(example_file_contents))
1031
1032
        if compress:
            url = '%s&compress=1' % url
1033
        rv = client.get(url, headers=auth_headers)