test_api.py 47.7 KB
Newer Older
Markus Scheidgen's avatar
Markus Scheidgen committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
# Copyright 2018 Markus Scheidgen
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an"AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

15
from typing import Any
16
17
18
import pytest
import time
import json
19
import base64
20
21
import zipfile
import io
22
import inspect
23
from passlib.hash import bcrypt
Markus Scheidgen's avatar
Markus Scheidgen committed
24
import datetime
25
import os.path
26
from urllib.parse import urlencode
27

Markus Scheidgen's avatar
Markus Scheidgen committed
28
from nomad.api.app import rfc3339DateTime
29
from nomad import coe_repo, search, parsing, files, config, utils
30
31
from nomad.files import UploadFiles, PublicUploadFiles
from nomad.processing import Upload, Calc, SUCCESS
32
from nomad.datamodel import UploadWithMetadata, CalcWithMetadata
33

34
from tests.conftest import create_auth_headers, clear_elastic
35
from tests.test_files import example_file, example_file_mainfile, example_file_contents
36
from tests.test_files import create_staging_upload, create_public_upload, assert_upload_files
37
from tests.test_coe_repo import assert_coe_upload
38
from tests.test_search import assert_search_upload
39
40


41
42
logger = utils.get_logger(__name__)

Markus Scheidgen's avatar
Markus Scheidgen committed
43

44
45
46
47
48
def test_alive(client):
    rv = client.get('/alive')
    assert rv.status_code == 200


49
50
51
52
53
54
55
@pytest.fixture(scope='function')
def test_user_signature_token(client, test_user_auth):
    rv = client.get('/auth/token', headers=test_user_auth)
    assert rv.status_code == 200
    return json.loads(rv.data)['token']


56
57
58
59
60
61
62
63
def get_upload_with_metadata(upload: dict) -> UploadWithMetadata:
    """ Create a :class:`UploadWithMetadata` from a API upload json record. """
    return UploadWithMetadata(
        upload_id=upload['upload_id'], calcs=[
            CalcWithMetadata(calc_id=calc['calc_id'], mainfile=calc['mainfile'])
            for calc in upload['calcs']['results']])


64
65
66
class TestInfo:
    def test_info(self, client):
        rv = client.get('/info/')
67
68
69
70
        data = json.loads(rv.data)
        assert 'codes' in data
        assert 'parsers' in data
        assert len(data['parsers']) >= len(data['codes'])
71
72
        assert rv.status_code == 200

73

74
class TestAdmin:
Markus Scheidgen's avatar
Markus Scheidgen committed
75
    @pytest.mark.timeout(config.tests.default_timeout)
76
77
    def test_reset(self, client, admin_user_auth, expandable_postgres, monkeypatch):
        monkeypatch.setattr('nomad.config.services.disable_reset', False)
78
79
80
        rv = client.post('/admin/reset', headers=admin_user_auth)
        assert rv.status_code == 200

Markus Scheidgen's avatar
Markus Scheidgen committed
81
    @pytest.mark.timeout(config.tests.default_timeout)
82
83
    def test_remove(self, client, admin_user_auth, expandable_postgres, monkeypatch):
        monkeypatch.setattr('nomad.config.services.disable_reset', False)
84
85
        rv = client.post('/admin/remove', headers=admin_user_auth)
        assert rv.status_code == 200
86
87
88
89
90
91

    def test_doesnotexist(self, client, admin_user_auth):
        rv = client.post('/admin/doesnotexist', headers=admin_user_auth)
        assert rv.status_code == 404

    def test_only_admin(self, client, test_user_auth):
Markus Scheidgen's avatar
Markus Scheidgen committed
92
        rv = client.post('/admin/reset', headers=test_user_auth)
93
94
        assert rv.status_code == 401

95
96
    def test_disabled(self, client, admin_user_auth, expandable_postgres, monkeypatch):
        monkeypatch.setattr('nomad.config.services.disable_reset', True)
97
98
99
100
        rv = client.post('/admin/reset', headers=admin_user_auth)
        assert rv.status_code == 400


101
class TestAuth:
102
    def test_xtoken_auth(self, client, test_user: coe_repo.User, no_warn):
103
        rv = client.get('/uploads/', headers={
104
            'X-Token': test_user.first_name.lower()  # the test users have their firstname as tokens for convinience
105
        })
106

107
        assert rv.status_code == 200
Markus Scheidgen's avatar
Markus Scheidgen committed
108

109
    def test_xtoken_auth_denied(self, client, no_warn, postgres):
110
111
112
        rv = client.get('/uploads/', headers={
            'X-Token': 'invalid'
        })
Markus Scheidgen's avatar
Markus Scheidgen committed
113

114
        assert rv.status_code == 401
115

116
117
118
    def test_basic_auth(self, client, test_user_auth, no_warn):
        rv = client.get('/uploads/', headers=test_user_auth)
        assert rv.status_code == 200
119

120
121
122
123
124
125
126
    def test_basic_auth_denied(self, client, no_warn):
        basic_auth_base64 = base64.b64encode('invalid'.encode('utf-8')).decode('utf-8')
        rv = client.get('/uploads/', headers={
            'Authorization': 'Basic %s' % basic_auth_base64
        })
        assert rv.status_code == 401

127
    def test_get_user(self, client, test_user_auth, test_user: coe_repo.User, no_warn):
128
129
        rv = client.get('/auth/user', headers=test_user_auth)
        assert rv.status_code == 200
130
131
132
        self.assert_user(client, json.loads(rv.data))

    def assert_user(self, client, user):
133
134
135
136
137
138
139
        for key in ['first_name', 'last_name', 'email', 'token']:
            assert key in user

        rv = client.get('/uploads/', headers={
            'X-Token': user['token']
        })

140
141
        assert rv.status_code == 200

142
143
144
    def test_signature_token(self, test_user_signature_token, no_warn):
        assert test_user_signature_token is not None

145
146
147
148
149
150
151
152
153
154
155
    @pytest.mark.parametrize('token, affiliation', [
        ('test_token', dict(name='HU Berlin', address='Unter den Linden 6')),
        (None, None)])
    def test_put_user(self, client, postgres, admin_user_auth, token, affiliation):
        data = dict(
            email='test@email.com', last_name='Tester', first_name='Testi',
            token=token, affiliation=affiliation,
            password=bcrypt.encrypt('test_password', ident='2y'))

        data = {key: value for key, value in data.items() if value is not None}

156
157
        rv = client.put(
            '/auth/user', headers=admin_user_auth,
158
            content_type='application/json', data=json.dumps(data))
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195

        assert rv.status_code == 200
        self.assert_user(client, json.loads(rv.data))

    def test_put_user_admin_only(self, client, test_user_auth):
        rv = client.put(
            '/auth/user', headers=test_user_auth,
            content_type='application/json', data=json.dumps(dict(
                email='test@email.com', last_name='Tester', first_name='Testi',
                password=bcrypt.encrypt('test_password', ident='2y'))))
        assert rv.status_code == 401

    def test_put_user_required_field(self, client, admin_user_auth):
        rv = client.put(
            '/auth/user', headers=admin_user_auth,
            content_type='application/json', data=json.dumps(dict(
                email='test@email.com', password=bcrypt.encrypt('test_password', ident='2y'))))
        assert rv.status_code == 400

    def test_post_user(self, client, postgres, admin_user_auth):
        rv = client.put(
            '/auth/user', headers=admin_user_auth,
            content_type='application/json', data=json.dumps(dict(
                email='test@email.com', last_name='Tester', first_name='Testi',
                password=bcrypt.encrypt('test_password', ident='2y'))))

        assert rv.status_code == 200
        user = json.loads(rv.data)

        rv = client.post(
            '/auth/user', headers={'X-Token': user['token']},
            content_type='application/json', data=json.dumps(dict(
                last_name='Tester', first_name='Testi v.',
                password=bcrypt.encrypt('test_password_changed', ident='2y'))))
        assert rv.status_code == 200
        self.assert_user(client, json.loads(rv.data))

196
197
198
199
200

class TestUploads:

    def assert_uploads(self, upload_json_str, count=0, **kwargs):
        data = json.loads(upload_json_str)
201
202
203
204
        assert 'pagination' in data
        assert 'page' in data['pagination']

        data = data['results']
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
        assert isinstance(data, list)
        assert len(data) == count

        if count > 0:
            self.assert_upload(json.dumps(data[0]), **kwargs)

    def assert_upload(self, upload_json_str, id=None, **kwargs):
        data = json.loads(upload_json_str)
        assert 'upload_id' in data
        if id is not None:
            assert id == data['upload_id']
        assert 'create_time' in data

        for key, value in kwargs.items():
            assert data.get(key, None) == value

        return data

    def assert_processing(self, client, test_user_auth, upload_id):
        upload_endpoint = '/uploads/%s' % upload_id

        # poll until completed
227
        upload = self.block_until_completed(client, upload_id, test_user_auth)
228
229

        assert len(upload['tasks']) == 4
230
        assert upload['tasks_status'] == SUCCESS
231
        assert upload['current_task'] == 'cleanup'
232
        assert not upload['process_running']
233

234
235
        calcs = upload['calcs']['results']
        for calc in calcs:
236
            assert calc['tasks_status'] == SUCCESS
237
238
            assert calc['current_task'] == 'archiving'
            assert len(calc['tasks']) == 3
239
            assert client.get('/archive/logs/%s/%s' % (calc['upload_id'], calc['calc_id']), headers=test_user_auth).status_code == 200
240
241

        if upload['calcs']['pagination']['total'] > 1:
242
            rv = client.get('%s?page=2&per_page=1&order_by=tasks_status' % upload_endpoint, headers=test_user_auth)
243
244
245
246
            assert rv.status_code == 200
            upload = self.assert_upload(rv.data)
            assert len(upload['calcs']['results']) == 1

247
248
        upload_with_metadata = get_upload_with_metadata(upload)
        assert_upload_files(upload_with_metadata, files.StagingUploadFiles)
249
        assert_search_upload(upload_with_metadata, additional_keys=['atoms', 'system'])
250

251
    def assert_published(self, client, test_user_auth, upload_id, proc_infra, with_coe_repo=True, metadata={}, publish_with_metadata: bool = True):
252
253
        rv = client.get('/uploads/%s' % upload_id, headers=test_user_auth)
        upload = self.assert_upload(rv.data)
254
255

        upload_with_metadata = get_upload_with_metadata(upload)
256

257
258
259
        rv = client.post(
            '/uploads/%s' % upload_id,
            headers=test_user_auth,
260
            data=json.dumps(dict(operation='publish', metadata=metadata if publish_with_metadata else {})),
261
            content_type='application/json')
262
        assert rv.status_code == 200
263
        upload = self.assert_upload(rv.data)
264
        assert upload['current_process'] == 'publish_upload'
265
        assert upload['process_running']
266

267
        additional_keys = ['with_embargo']
268
        if with_coe_repo:
269
            additional_keys.append('pid')
270

271
272
273
274
275
276
277
        self.block_until_completed(client, upload_id, test_user_auth)
        upload_proc = Upload.objects(upload_id=upload_id).first()
        assert upload_proc is not None
        assert upload_proc.published is True

        if with_coe_repo:
            assert_coe_upload(upload_with_metadata.upload_id, user_metadata=metadata)
278
        assert_upload_files(upload_with_metadata, files.PublicUploadFiles, published=True)
279
280
281
        assert_search_upload(upload_with_metadata, additional_keys=additional_keys, published=True)

    def block_until_completed(self, client, upload_id: str, test_user_auth):
282
283
284
285
286
        while True:
            time.sleep(0.1)
            rv = client.get('/uploads/%s' % upload_id, headers=test_user_auth)
            if rv.status_code == 200:
                upload = self.assert_upload(rv.data)
287
288
                if not upload['process_running'] and not upload['tasks_running']:
                    return upload
289
            elif rv.status_code == 404:
290
                return None
291
292
293
294
            else:
                raise Exception(
                    'unexpected status code while blocking for upload processing: %s' %
                    str(rv.status_code))
295
296
297

    def assert_upload_does_not_exist(self, client, upload_id: str, test_user_auth):
        self.block_until_completed(client, upload_id, test_user_auth)
298

299
300
301
302
303
304
        rv = client.get('/uploads/%s' % upload_id, headers=test_user_auth)
        assert rv.status_code == 404
        assert Upload.objects(upload_id=upload_id).first() is None
        assert Calc.objects(upload_id=upload_id).count() is 0
        upload_files = UploadFiles.get(upload_id)
        assert upload_files is None or isinstance(upload_files, PublicUploadFiles)
Markus Scheidgen's avatar
Markus Scheidgen committed
305

306
307
308
309
310
311
312
    def test_get_command(self, client, test_user_auth, no_warn):
        rv = client.get('/uploads/command', headers=test_user_auth)
        assert rv.status_code == 200
        data = json.loads(rv.data)
        assert 'upload_command' in data
        assert 'upload_url' in data

313
314
    def test_get_empty(self, client, test_user_auth, no_warn):
        rv = client.get('/uploads/', headers=test_user_auth)
Markus Scheidgen's avatar
Markus Scheidgen committed
315

316
317
        assert rv.status_code == 200
        self.assert_uploads(rv.data, count=0)
Markus Scheidgen's avatar
Markus Scheidgen committed
318

319
320
321
    def test_get_not_existing(self, client, test_user_auth, no_warn):
        rv = client.get('/uploads/123456789012123456789012', headers=test_user_auth)
        assert rv.status_code == 404
322

323
324
    @pytest.mark.parametrize('mode', ['multipart', 'stream', 'local_path'])
    @pytest.mark.parametrize('name', [None, 'test_name'])
Markus Scheidgen's avatar
Markus Scheidgen committed
325
    def test_put(self, client, test_user_auth, proc_infra, example_upload, mode, name, no_warn):
326
        file = example_upload
327
328
329
330
331
332
333
        if name:
            url = '/uploads/?name=%s' % name
        else:
            url = '/uploads/'

        if mode == 'multipart':
            rv = client.put(
334
335
336
                url, data=dict(file=(open(file, 'rb'), 'the_name')), headers=test_user_auth)
            if not name:
                name = 'the_name'
337
338
339
340
341
342
343
344
345
        elif mode == 'stream':
            with open(file, 'rb') as f:
                rv = client.put(url, data=f.read(), headers=test_user_auth)
        elif mode == 'local_path':
            url += '&' if name else '?'
            url += 'local_path=%s' % file
            rv = client.put(url, headers=test_user_auth)
        else:
            assert False
346

347
348
        assert rv.status_code == 200
        if mode == 'local_path':
349
            upload = self.assert_upload(rv.data, upload_path=file, name=name)
350
351
        else:
            upload = self.assert_upload(rv.data, name=name)
352
        assert upload['tasks_running']
353

354
        self.assert_processing(client, test_user_auth, upload['upload_id'])
355

356
357
358
359
360
361
362
363
    def test_upload_limit(self, client, mongo, test_user, test_user_auth, proc_infra):
        for _ in range(0, config.services.upload_limit):
            Upload.create(user=test_user)
        file = example_file
        rv = client.put('/uploads/?local_path=%s' % file, headers=test_user_auth)
        assert rv.status_code == 400
        assert Upload.user_uploads(test_user).count() == config.services.upload_limit

364
365
366
    def test_delete_not_existing(self, client, test_user_auth, no_warn):
        rv = client.delete('/uploads/123456789012123456789012', headers=test_user_auth)
        assert rv.status_code == 404
367

368
369
370
371
372
373
374
375
376
377
378
379
    @pytest.fixture(scope='function')
    def slow_processing(self, monkeypatch):
        old_cleanup = Upload.cleanup

        def slow_cleanup(self):
            time.sleep(0.5)
            old_cleanup(self)

        monkeypatch.setattr('nomad.processing.data.Upload.cleanup', slow_cleanup)
        yield True
        monkeypatch.setattr('nomad.processing.data.Upload.cleanup', old_cleanup)

380
    def test_delete_published(self, client, test_user_auth, proc_infra, no_warn, with_publish_to_coe_repo):
381
382
383
        rv = client.put('/uploads/?local_path=%s' % example_file, headers=test_user_auth)
        upload = self.assert_upload(rv.data)
        self.assert_processing(client, test_user_auth, upload['upload_id'])
384
        self.assert_published(client, test_user_auth, upload['upload_id'], proc_infra, with_coe_repo=with_publish_to_coe_repo)
385
        rv = client.delete('/uploads/%s' % upload['upload_id'], headers=test_user_auth)
386
        assert rv.status_code == 400
387

Markus Scheidgen's avatar
Markus Scheidgen committed
388
    def test_delete(self, client, test_user_auth, proc_infra, no_warn):
389
390
391
392
393
        rv = client.put('/uploads/?local_path=%s' % example_file, headers=test_user_auth)
        upload = self.assert_upload(rv.data)
        self.assert_processing(client, test_user_auth, upload['upload_id'])
        rv = client.delete('/uploads/%s' % upload['upload_id'], headers=test_user_auth)
        assert rv.status_code == 200
394
        self.assert_upload_does_not_exist(client, upload['upload_id'], test_user_auth)
395

396
397
398
399
400
401
402
403
404
405
406
407
408
409
    def test_post_empty(self, client, test_user_auth, empty_upload, proc_infra, no_warn):
        rv = client.put('/uploads/?local_path=%s' % empty_upload, headers=test_user_auth)
        assert rv.status_code == 200
        upload = self.assert_upload(rv.data)
        self.assert_processing(client, test_user_auth, upload['upload_id'])
        rv = client.post(
            '/uploads/%s' % upload['upload_id'], headers=test_user_auth,
            data=json.dumps(dict(operation='publish')),
            content_type='application/json')
        assert rv.status_code == 400

    def test_post(self, client, test_user_auth, non_empty_example_upload, proc_infra, no_warn, with_publish_to_coe_repo):
        rv = client.put('/uploads/?local_path=%s' % non_empty_example_upload, headers=test_user_auth)
        assert rv.status_code == 200
410
411
        upload = self.assert_upload(rv.data)
        self.assert_processing(client, test_user_auth, upload['upload_id'])
412
        self.assert_published(client, test_user_auth, upload['upload_id'], proc_infra, with_coe_repo=with_publish_to_coe_repo)
413

414
415
416
        # still visible
        assert client.get('/uploads/%s' % upload['upload_id'], headers=test_user_auth).status_code == 200
        # still listed with all=True
417
        rv = client.get('/uploads/?state=all', headers=test_user_auth)
418
        assert rv.status_code == 200
419
        data = json.loads(rv.data)['results']
420
421
422
423
424
        assert len(data) > 0
        assert any(item['upload_id'] == upload['upload_id'] for item in data)
        # not listed with all=False
        rv = client.get('/uploads/', headers=test_user_auth)
        assert rv.status_code == 200
425
        data = json.loads(rv.data)['results']
426
427
        assert not any(item['upload_id'] == upload['upload_id'] for item in data)

428
429
    def test_post_metadata(
            self, client, proc_infra, admin_user_auth, test_user_auth, test_user,
430
            other_test_user, no_warn, example_user_metadata):
431
432
433
        rv = client.put('/uploads/?local_path=%s' % example_file, headers=test_user_auth)
        upload = self.assert_upload(rv.data)
        self.assert_processing(client, test_user_auth, upload['upload_id'])
434
        metadata = dict(**example_user_metadata)
435
        metadata['_upload_time'] = datetime.datetime.utcnow().isoformat()
436
        self.assert_published(client, admin_user_auth, upload['upload_id'], proc_infra, metadata)
437

Markus Scheidgen's avatar
Markus Scheidgen committed
438
    def test_post_metadata_forbidden(self, client, proc_infra, test_user_auth, no_warn):
439
440
441
442
443
444
        rv = client.put('/uploads/?local_path=%s' % example_file, headers=test_user_auth)
        upload = self.assert_upload(rv.data)
        self.assert_processing(client, test_user_auth, upload['upload_id'])
        rv = client.post(
            '/uploads/%s' % upload['upload_id'],
            headers=test_user_auth,
445
            data=json.dumps(dict(operation='publish', metadata=dict(_pid=256))),
446
447
448
            content_type='application/json')
        assert rv.status_code == 401

449
450
451
452
453
454
455
    def test_post_metadata_and_republish(
            self, client, proc_infra, admin_user_auth, test_user_auth, test_user,
            other_test_user, no_warn, example_user_metadata):
        rv = client.put('/uploads/?local_path=%s' % example_file, headers=test_user_auth)
        upload = self.assert_upload(rv.data)
        self.assert_processing(client, test_user_auth, upload['upload_id'])
        metadata = dict(**example_user_metadata)
456
        metadata['_upload_time'] = datetime.datetime.utcnow().isoformat()
457
458
459
        self.assert_published(client, admin_user_auth, upload['upload_id'], proc_infra, metadata)
        self.assert_published(client, admin_user_auth, upload['upload_id'], proc_infra, metadata, publish_with_metadata=False)

460
461
462
463
464
465
466
467
468
469
470
471
472
473
    def test_post_re_process(self, client, published, test_user_auth, monkeypatch):
        monkeypatch.setattr('nomad.config.version', 're_process_test_version')
        monkeypatch.setattr('nomad.config.commit', 're_process_test_commit')

        upload_id = published.upload_id
        rv = client.post(
            '/uploads/%s' % upload_id,
            headers=test_user_auth,
            data=json.dumps(dict(operation='re-process')),
            content_type='application/json')

        assert rv.status_code == 200
        assert self.block_until_completed(client, upload_id, test_user_auth) is not None

474
    # TODO validate metadata (or all input models in API for that matter)
475
    # def test_post_bad_metadata(self, client, proc_infra, test_user_auth, postgres):
476
477
478
479
480
481
    #     rv = client.put('/uploads/?local_path=%s' % example_file, headers=test_user_auth)
    #     upload = self.assert_upload(rv.data)
    #     self.assert_processing(client, test_user_auth, upload['upload_id'])
    #     rv = client.post(
    #         '/uploads/%s' % upload['upload_id'],
    #         headers=test_user_auth,
482
    #         data=json.dumps(dict(operation='publish', metadata=dict(doesnotexist='hi'))),
483
484
485
    #         content_type='application/json')
    #     assert rv.status_code == 400

486
    def test_potcar(self, client, proc_infra, test_user_auth):
487
        # only the owner, shared with people are supposed to download the original potcar file
488
489
490
491
492
493
494
495
496
497
498
499
500
501
        example_file = 'tests/data/proc/examples_potcar.zip'
        rv = client.put('/uploads/?local_path=%s' % example_file, headers=test_user_auth)

        upload = self.assert_upload(rv.data)
        upload_id = upload['upload_id']
        self.assert_processing(client, test_user_auth, upload_id)
        self.assert_published(client, test_user_auth, upload_id, proc_infra, with_coe_repo=True)
        rv = client.get('/raw/%s/examples_potcar/POTCAR' % upload_id)
        assert rv.status_code == 401
        rv = client.get('/raw/%s/examples_potcar/POTCAR' % upload_id, headers=test_user_auth)
        assert rv.status_code == 200
        rv = client.get('/raw/%s/examples_potcar/POTCAR.stripped' % upload_id)
        assert rv.status_code == 200

502

Markus Scheidgen's avatar
Markus Scheidgen committed
503
504
505
today = datetime.datetime.utcnow().date()


506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
class UploadFilesBasedTests:

    @staticmethod
    def fix_signature(func, wrapper):
        additional_args = list(inspect.signature(func).parameters.values())[4:]
        wrapper_sig = inspect.signature(wrapper)
        wrapper_args = list(wrapper_sig.parameters.values())[:3] + additional_args
        wrapper_sig = wrapper_sig.replace(parameters=tuple(wrapper_args))
        wrapper.__signature__ = wrapper_sig

    @staticmethod
    def check_authorizaton(func):
        @pytest.mark.parametrize('test_data', [
            [True, None, True],     # in staging for upload
            [True, None, False],    # in staging for different user
            [True, None, None],     # in staging for guest
            [False, True, True],    # in public, restricted for uploader
            [False, True, False],   # in public, restricted for different user
            [False, True, None],    # in public, restricted for guest
            [False, False, True],   # in public, public, for uploader
            [False, False, False],  # in public, public, for different user
            [False, False, None]    # in public, public, for guest
        ], indirect=True)
        def wrapper(self, client, test_data, *args, **kwargs):
            upload, authorized, auth_headers = test_data
            try:
                func(self, client, upload, auth_headers, *args, **kwargs)
            except AssertionError as assertion:
                assertion_str = str(assertion)
                if not authorized:
                    if '0 == 5' in assertion_str and 'ZipFile' in assertion_str:
                        # the user is not authorized an gets an empty zip as expected
                        return
                    if '401' in assertion_str:
                        # the user is not authorized and gets a 401 as expected
                        return
                raise assertion

            if not authorized:
                assert False
        UploadFilesBasedTests.fix_signature(func, wrapper)
        return wrapper

    @staticmethod
    def ignore_authorization(func):
        @pytest.mark.parametrize('test_data', [
            [True, None, True],      # in staging
            [False, False, None],    # in public
        ], indirect=True)
        def wrapper(self, client, test_data, *args, **kwargs):
            upload, _, auth_headers = test_data
            func(self, client, upload, auth_headers, *args, **kwargs)
        UploadFilesBasedTests.fix_signature(func, wrapper)
        return wrapper
560

561
    @pytest.fixture(scope='function')
562
    def test_data(self, request, postgres, mongo, raw_files, no_warn, test_user, other_test_user):
563
564
565
566
567
        # delete potential old test files
        for _ in [0, 1]:
            upload_files = UploadFiles.get('test_upload')
            if upload_files:
                upload_files.delete()
568

569
        in_staging, restricted, for_uploader = request.param
570

571
572
573
574
        if in_staging:
            authorized = for_uploader
        else:
            authorized = not restricted or for_uploader
575

576
577
578
579
580
581
        if for_uploader:
            auth_headers = create_auth_headers(test_user)
        elif for_uploader is False:
            auth_headers = create_auth_headers(other_test_user)
        else:
            auth_headers = None
582

583
584
        calc_specs = 'r' if restricted else 'p'
        if in_staging:
585
            Upload.create(user=test_user, upload_id='test_upload')
586
            _, upload_files = create_staging_upload('test_upload', calc_specs=calc_specs)
587
        else:
588
            _, upload_files = create_public_upload('test_upload', calc_specs=calc_specs)
589
            postgres.begin()
590
591
592
            coe_upload = coe_repo.Upload(
                upload_name='test_upload',
                user_id=test_user.user_id, is_processed=True)
593
594
            postgres.add(coe_upload)
            postgres.commit()
595

596
        yield 'test_upload', authorized, auth_headers
597

598
        upload_files.delete()
599
600


601
602
603
604
class TestArchive(UploadFilesBasedTests):
    @UploadFilesBasedTests.check_authorizaton
    def test_get(self, client, upload, auth_headers):
        rv = client.get('/archive/%s/0' % upload, headers=auth_headers)
605
        assert rv.status_code == 200
606
        assert json.loads(rv.data) is not None
607

608
609
610
611
612
613
    @UploadFilesBasedTests.ignore_authorization
    def test_get_signed(self, client, upload, _, test_user_signature_token):
        rv = client.get('/archive/%s/0?token=%s' % (upload, test_user_signature_token))
        assert rv.status_code == 200
        assert json.loads(rv.data) is not None

614
615
616
    @UploadFilesBasedTests.check_authorizaton
    def test_get_calc_proc_log(self, client, upload, auth_headers):
        rv = client.get('/archive/logs/%s/0' % upload, headers=auth_headers)
617
        assert rv.status_code == 200
618
        assert len(rv.data) > 0
619

620
621
622
623
624
625
    @UploadFilesBasedTests.ignore_authorization
    def test_get_calc_proc_log_signed(self, client, upload, _, test_user_signature_token):
        rv = client.get('/archive/logs/%s/0?token=%s' % (upload, test_user_signature_token))
        assert rv.status_code == 200
        assert len(rv.data) > 0

626
627
628
    @UploadFilesBasedTests.ignore_authorization
    def test_get_non_existing_archive(self, client, upload, auth_headers):
        rv = client.get('/archive/%s' % 'doesnt/exist', headers=auth_headers)
629
        assert rv.status_code == 404
Markus Scheidgen's avatar
Markus Scheidgen committed
630

631
632
633
634
635
636
637
    @pytest.mark.parametrize('info', [
        'all.nomadmetainfo.json',
        'all.experimental.nomadmetainfo.json',
        'vasp.nomadmetainfo.json',
        'mpes.nomadmetainfo.json'])
    def test_get_metainfo(self, client, info):
        rv = client.get('/archive/metainfo/%s' % info)
638
        assert rv.status_code == 200
639
640
        metainfo = json.loads((rv.data))
        assert len(metainfo) > 0
641

Markus Scheidgen's avatar
Markus Scheidgen committed
642

643
class TestRepo():
644
645
646
647
648
649
    @pytest.fixture(scope='class')
    def example_elastic_calcs(
            self, elastic_infra, normalized: parsing.LocalBackend,
            test_user: coe_repo.User, other_test_user: coe_repo.User):
        clear_elastic(elastic_infra)

Markus Scheidgen's avatar
Markus Scheidgen committed
650
        calc_with_metadata = CalcWithMetadata(upload_id=0, calc_id=0, upload_time=today)
651
        calc_with_metadata.files = ['test/mainfile.txt']
652
        calc_with_metadata.apply_domain_metadata(normalized)
653

Markus Scheidgen's avatar
Markus Scheidgen committed
654
655
        calc_with_metadata.update(
            calc_id='1', uploader=test_user.to_popo(), published=True, with_embargo=False)
656
657
        search.Entry.from_calc_with_metadata(calc_with_metadata).save(refresh=True)

Markus Scheidgen's avatar
Markus Scheidgen committed
658
659
        calc_with_metadata.update(
            calc_id='2', uploader=other_test_user.to_popo(), published=True, with_embargo=False,
Markus Scheidgen's avatar
Markus Scheidgen committed
660
            upload_time=today - datetime.timedelta(days=5))
Markus Scheidgen's avatar
Markus Scheidgen committed
661
662
        calc_with_metadata.update(
            atoms=['Fe'], comment='this is a specific word', formula='AAA', basis_set='zzz')
663
664
        search.Entry.from_calc_with_metadata(calc_with_metadata).save(refresh=True)

Markus Scheidgen's avatar
Markus Scheidgen committed
665
666
        calc_with_metadata.update(
            calc_id='3', uploader=other_test_user.to_popo(), published=False, with_embargo=False)
667
668
        search.Entry.from_calc_with_metadata(calc_with_metadata).save(refresh=True)

Markus Scheidgen's avatar
Markus Scheidgen committed
669
670
        calc_with_metadata.update(
            calc_id='4', uploader=other_test_user.to_popo(), published=True, with_embargo=True)
671
672
        search.Entry.from_calc_with_metadata(calc_with_metadata).save(refresh=True)

673
    def assert_search(self, rv: Any, number_of_calcs: int) -> dict:
674
675
        if rv.status_code != 200:
            print(rv.data)
676
        assert rv.status_code == 200
677

678
679
680
681
682
683
684
685
686
        data = json.loads(rv.data)

        results = data.get('results', None)
        assert results is not None
        assert isinstance(results, list)
        assert len(results) == number_of_calcs

        return data

687
688
    def test_own_calc(self, client, example_elastic_calcs, no_warn, test_user_auth):
        rv = client.get('/repo/0/1', headers=test_user_auth)
689
690
        assert rv.status_code == 200

691
692
693
694
695
696
697
698
    def test_public_calc(self, client, example_elastic_calcs, no_warn, other_test_user_auth):
        rv = client.get('/repo/0/1', headers=other_test_user_auth)
        assert rv.status_code == 200

    def test_embargo_calc(self, client, example_elastic_calcs, no_warn, test_user_auth):
        rv = client.get('/repo/0/4', headers=test_user_auth)
        assert rv.status_code == 401

699
700
701
702
    def test_own_embargo_calc(self, client, example_elastic_calcs, no_warn, other_test_user_auth):
        rv = client.get('/repo/0/4', headers=other_test_user_auth)
        assert rv.status_code == 200

703
704
705
706
    def test_staging_calc(self, client, example_elastic_calcs, no_warn, test_user_auth):
        rv = client.get('/repo/0/3', headers=test_user_auth)
        assert rv.status_code == 401

707
708
709
710
    def test_own_staging_calc(self, client, example_elastic_calcs, no_warn, other_test_user_auth):
        rv = client.get('/repo/0/3', headers=other_test_user_auth)
        assert rv.status_code == 200

711
712
    def test_non_existing_calcs(self, client, example_elastic_calcs, test_user_auth):
        rv = client.get('/repo/0/10', headers=test_user_auth)
713
714
        assert rv.status_code == 404

715
716
717
    @pytest.mark.parametrize('calcs, owner, auth', [
        (2, 'all', 'none'),
        (2, 'all', 'test_user'),
718
        (4, 'all', 'other_test_user'),
719
        (1, 'user', 'test_user'),
720
        (3, 'user', 'other_test_user'),
721
        (0, 'staging', 'test_user'),
722
        (1, 'staging', 'other_test_user')
723
    ])
724
    def test_search_owner(self, client, example_elastic_calcs, no_warn, test_user_auth, other_test_user_auth, calcs, owner, auth):
725
726
        auth = dict(none=None, test_user=test_user_auth, other_test_user=other_test_user_auth).get(auth)
        rv = client.get('/repo/?owner=%s' % owner, headers=auth)
727
        data = self.assert_search(rv, calcs)
728
729
730
731
732
        results = data.get('results', None)
        if calcs > 0:
            for key in ['uploader', 'calc_id', 'formula', 'upload_id']:
                assert key in results[0]

Markus Scheidgen's avatar
Markus Scheidgen committed
733
    @pytest.mark.parametrize('calcs, start, end', [
Markus Scheidgen's avatar
Markus Scheidgen committed
734
735
736
737
738
739
        (2, today - datetime.timedelta(days=6), today),
        (2, today - datetime.timedelta(days=5), today),
        (1, today - datetime.timedelta(days=4), today),
        (1, today, today),
        (1, today - datetime.timedelta(days=6), today - datetime.timedelta(days=5)),
        (0, today - datetime.timedelta(days=7), today - datetime.timedelta(days=6)),
Markus Scheidgen's avatar
Markus Scheidgen committed
740
        (2, None, None),
Markus Scheidgen's avatar
Markus Scheidgen committed
741
742
        (1, today, None),
        (2, None, today)
Markus Scheidgen's avatar
Markus Scheidgen committed
743
744
745
746
747
748
749
750
751
752
753
754
755
    ])
    def test_search_time(self, client, example_elastic_calcs, no_warn, calcs, start, end):
        query_string = ''
        if start is not None:
            query_string = 'from_time=%s' % rfc3339DateTime.format(start)
        if end is not None:
            if query_string != '':
                query_string += '&'
            query_string += 'until_time=%s' % rfc3339DateTime.format(end)
        if query_string != '':
            query_string = '?%s' % query_string

        rv = client.get('/repo/%s' % query_string)
756
        self.assert_search(rv, calcs)
Markus Scheidgen's avatar
Markus Scheidgen committed
757

758
    @pytest.mark.parametrize('calcs, quantity, value', [
759
760
        (2, 'system', 'bulk'),
        (0, 'system', 'atom'),
761
762
        (1, 'atoms', 'Br'),
        (1, 'atoms', 'Fe'),
763
        (0, 'atoms', ['Fe', 'Br', 'A', 'B']),
764
765
        (0, 'only_atoms', ['Br', 'Si']),
        (1, 'only_atoms', ['Fe']),
766
767
        (1, 'only_atoms', ['Br', 'K', 'Si']),
        (1, 'only_atoms', ['Br', 'Si', 'K']),
768
769
770
771
772
773
774
775
776
        (1, 'comment', 'specific'),
        (1, 'authors', 'Hofstadter, Leonard'),
        (2, 'files', 'test/mainfile.txt'),
        (2, 'paths', 'mainfile.txt'),
        (2, 'paths', 'test'),
        (2, 'quantities', ['wyckoff_letters_primitive', 'hall_number']),
        (0, 'quantities', 'dos')
    ])
    def test_search_quantities(self, client, example_elastic_calcs, no_warn, test_user_auth, calcs, quantity, value):
777
        query_string = urlencode({quantity: value}, doseq=True)
778

779
        rv = client.get('/repo/?%s' % query_string, headers=test_user_auth)
780
        logger.debug('run search quantities test', query_string=query_string)
781
        data = self.assert_search(rv, calcs)
782

783
784
        quantities = data.get('quantities', None)
        assert quantities is not None
785
        if quantity == 'system' and calcs != 0:
786
787
788
789
            # for simplicity we only assert on quantities for this case
            assert 'system' in quantities
            assert len(quantities['system']) == 1
            assert value in quantities['system']
790

791
792
    metrics_permutations = [[], search.metrics_names] + [[metric] for metric in search.metrics_names]

793
794
795
796
797
798
799
800
801
802
803
    def test_search_admin(self, client, example_elastic_calcs, no_warn, admin_user_auth):
        rv = client.get('/repo/?owner=admin', headers=admin_user_auth)
        self.assert_search(rv, 4)

    def test_search_admin_auth(self, client, example_elastic_calcs, no_warn, test_user_auth):
        rv = client.get('/repo/?owner=admin', headers=test_user_auth)
        assert rv.status_code == 401

        rv = client.get('/repo/?owner=admin')
        assert rv.status_code == 401

804
    @pytest.mark.parametrize('metrics', metrics_permutations)
Markus Scheidgen's avatar
Markus Scheidgen committed
805
    def test_search_total_metrics(self, client, example_elastic_calcs, no_warn, metrics):
806
807
        rv = client.get('/repo/?%s' % urlencode(dict(metrics=metrics), doseq=True))
        assert rv.status_code == 200, str(rv.data)
Markus Scheidgen's avatar
Markus Scheidgen committed
808
        data = json.loads(rv.data)
809
810
811
        total_metrics = data.get('quantities', {}).get('total', {}).get('all', None)
        assert total_metrics is not None
        assert 'code_runs' in total_metrics
Markus Scheidgen's avatar
Markus Scheidgen committed
812
        for metric in metrics:
813
            assert metric in total_metrics
Markus Scheidgen's avatar
Markus Scheidgen committed
814

815
    @pytest.mark.parametrize('metrics', metrics_permutations)
Markus Scheidgen's avatar
Markus Scheidgen committed
816
    def test_search_aggregation_metrics(self, client, example_elastic_calcs, no_warn, metrics):
817
        rv = client.get('/repo/?%s' % urlencode(dict(metrics=metrics), doseq=True))
Markus Scheidgen's avatar
Markus Scheidgen committed
818
819
        assert rv.status_code == 200
        data = json.loads(rv.data)
820
821
        for name, quantity in data.get('quantities').items():
            for metrics_result in quantity.values():
Markus Scheidgen's avatar
Markus Scheidgen committed
822
                assert 'code_runs' in metrics_result
823
824
825
826
827
                if name != 'authors':
                    for metric in metrics:
                        assert metric in metrics_result
                else:
                    assert len(metrics_result) == 1  # code_runs is the only metric for authors
Markus Scheidgen's avatar
Markus Scheidgen committed
828

829
830
831
    @pytest.mark.parametrize('n_results, page, per_page', [(2, 1, 5), (1, 1, 1), (0, 2, 3)])
    def test_search_pagination(self, client, example_elastic_calcs, no_warn, n_results, page, per_page):
        rv = client.get('/repo/?page=%d&per_page=%d' % (page, per_page))
832
833
834
        assert rv.status_code == 200
        data = json.loads(rv.data)
        results = data.get('results', None)
835
        assert data['pagination']['total'] == 2
836
        assert results is not None
837
        assert len(results) == n_results
838

839
840
    @pytest.mark.parametrize('first, order_by, order', [
        ('1', 'formula', -1), ('2', 'formula', 1),
841
842
        ('2', 'basis_set', -1), ('1', 'basis_set', 1),
        (None, 'authors', -1)])
843
844
845
846
847
848
849
    def test_search_order(self, client, example_elastic_calcs, no_warn, first, order_by, order):
        rv = client.get('/repo/?order_by=%s&order=%d' % (order_by, order))
        assert rv.status_code == 200
        data = json.loads(rv.data)
        results = data.get('results', None)
        assert data['pagination']['total'] == 2
        assert len(results) == 2
850
851
        if first is not None:
            assert results[0]['calc_id'] == first
852

853
854
855
856
857
858
859
860
861
862
    @pytest.mark.parametrize('n_results, size', [(2, None), (2, 5), (1, 1)])
    def test_search_scroll(self, client, example_elastic_calcs, no_warn, n_results, size):
        if size is not None:
            rv = client.get('/repo/?scroll=1,&per_page=%d' % size)
        else:
            rv = client.get('/repo/?scroll=1')

        assert rv.status_code == 200
        data = json.loads(rv.data)
        results = data.get('results', None)
863
        assert data.get('scroll', {}).get('size', -1) > 0
864
865
        assert results is not None
        assert len(results) == n_results
866
        scroll_id = data.get('scroll', {}).get('scroll_id', None)
867
868
869
870
871
872
        assert scroll_id is not None

        has_another_page = False
        while scroll_id is not None:
            rv = client.get('/repo/?scroll=1&scroll_id=%s' % scroll_id)
            data = json.loads(rv.data)
873
            scroll_id = data.get('scroll', {}).get('scroll_id', None)
874
875
876
877
878
            has_another_page |= len(data.get('results')) > 0

        if n_results < 2:
            assert has_another_page

879
880
881
    def test_search_user_authrequired(self, client, example_elastic_calcs, no_warn):
        rv = client.get('/repo/?owner=user')
        assert rv.status_code == 401
882

883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
    @pytest.mark.parametrize('calcs, quantity, value', [
        (2, 'system', 'bulk'),
        (0, 'system', 'atom'),
        (1, 'atoms', 'Br'),
        (1, 'atoms', 'Fe'),
        (1, 'authors', 'Hofstadter, Leonard'),
        (2, 'files', 'test/mainfile.txt'),
        (0, 'quantities', 'dos')
    ])
    def test_quantity_search(self, client, example_elastic_calcs, no_warn, test_user_auth, calcs, quantity, value):
        rv = client.get('/repo/%s' % quantity, headers=test_user_auth)
        assert rv.status_code == 200
        data = json.loads(rv.data)

        quantities = data['quantities']
        assert quantity in quantities
        values = quantities[quantity]['values']
        assert (value in values) == (calcs > 0)
        assert values.get(value, 0) == calcs

    def test_quantity_search_after(self, client, example_elastic_calcs, no_warn, test_user_auth):
        rv = client.get('/repo/atoms?size=1')
        assert rv.status_code == 200
        data = json.loads(rv.data)

        quantity = data['quantities']['atoms']
        assert 'after' in quantity
        after = quantity['after']
        assert len(quantity['values']) == 1
        value = list(quantity['values'].keys())[0]

        while True:
            rv = client.get('/repo/atoms?size=1&after=%s' % after)
            assert rv.status_code == 200
            data = json.loads(rv.data)

            quantity = data['quantities']['atoms']

            if 'after' not in quantity:
                assert len(quantity['values']) == 0
                break

            assert len(quantity['values']) == 1
            assert value != list(quantity['values'].keys())[0]
            assert after != quantity['after']
            after = quantity['after']

930

931
class TestRaw(UploadFilesBasedTests):
Markus Scheidgen's avatar
Markus Scheidgen committed
932

933
934
935
936
937
938
939
940
941
942
943
944
945
946
    def test_raw_file_from_calc(self, client, non_empty_processed, test_user_auth):
        calc = list(non_empty_processed.calcs)[0]
        url = '/raw/calc/%s/%s/%s' % (
            non_empty_processed.upload_id, calc.calc_id, os.path.basename(calc.mainfile))
        rv = client.get(url, headers=test_user_auth)
        assert rv.status_code == 200
        assert len(rv.data) > 0

        url = '/raw/calc/%s/%s/' % (non_empty_processed.upload_id, calc.calc_id)
        rv = client.get(url, headers=test_user_auth)
        assert rv.status_code == 200
        result = json.loads(rv.data)
        assert len(result['contents']) > 0

947
948
    @UploadFilesBasedTests.check_authorizaton
    def test_raw_file(self, client, upload, auth_headers):
949
        url = '/raw/%s/%s' % (upload, example_file_mainfile)
950
        rv = client.get(url, headers=auth_headers)
951
952
953
        assert rv.status_code == 200
        assert len(rv.data) > 0

954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
    @UploadFilesBasedTests.check_authorizaton
    def test_raw_file_partial(self, client, upload, auth_headers):
        url = '/raw/%s/%s?offset=0&length=20' % (upload, example_file_mainfile)
        rv = client.get(url, headers=auth_headers)
        assert rv.status_code == 200
        start_data = rv.data
        assert len(start_data) == 20

        url = '/raw/%s/%s?offset=10&length=10' % (upload, example_file_mainfile)
        rv = client.get(url, headers=auth_headers)
        assert rv.status_code == 200
        next_data = rv.data
        assert len(rv.data) == 10
        assert start_data[10:] == next_data

969
970
971
972
973
974
975
    @UploadFilesBasedTests.ignore_authorization
    def test_raw_file_signed(self, client, upload, _, test_user_signature_token):
        url = '/raw/%s/%s?token=%s' % (upload, example_file_mainfile, test_user_signature_token)
        rv = client.get(url)
        assert rv.status_code == 200
        assert len(rv.data) > 0

976
977
    @UploadFilesBasedTests.ignore_authorization
    def test_raw_file_missing_file(self, client, upload, auth_headers):
978
        url = '/raw/%s/does/not/exist' % upload
979
        rv = client.get(url, headers=auth_headers)
980
        assert rv.status_code == 404
981
982
983
        data = json.loads(rv.data)
        assert 'files' not in data

984
    @pytest.mark.parametrize('compress', [True, False])
985
986
    @UploadFilesBasedTests.ignore_authorization
    def test_raw_file_wildcard(self, client, upload, auth_headers, compress):
987
        url = '/raw/%s/examples*' % upload
988
989
        if compress:
            url = '%s?compress=1' % url
990
        rv = client.get(url, headers=auth_headers)
991
992
993
994
995
996
997

        assert rv.status_code == 200
        assert len(rv.data) > 0
        with zipfile.ZipFile(io.BytesIO(rv.data)) as zip_file:
            assert zip_file.testzip() is None
            assert len(zip_file.namelist()) == len(example_file_contents)

998
999
1000
1001
    @UploadFilesBasedTests.ignore_authorization
    def test_raw_file_wildcard_missing(self, client, upload, auth_headers):
        url = '/raw/%s/does/not/exist*' % upload
        rv = client.get(url, headers=auth_headers)
1002
        assert rv.status_code == 404
1003

1004
1005
    @UploadFilesBasedTests.ignore_authorization
    def test_raw_file_missing_upload(self, client, upload, auth_headers):
1006
        url = '/raw/doesnotexist/%s' % example_file_mainfile
1007
        rv = client.get(url, headers=auth_headers)
1008
1009
        assert rv.status_code == 404

1010
    @pytest.mark.parametrize('compress', [True, False])
1011
1012
    @UploadFilesBasedTests.check_authorizaton
    def test_raw_files(self, client, upload, auth_headers, compress):
1013
        url = '/raw/%s?files=%s' % (
1014
            upload, ','.join(example_file_contents))
1015
1016
        if compress:
            url = '%s&compress=1' % url
1017
        rv = client.get(url, headers=auth_headers)
Markus Scheidgen's avatar
Markus Scheidgen committed
1018

1019
1020
1021
1022
        assert rv.status_code == 200
        assert len(rv.data) > 0
        with zipfile.ZipFile(io.BytesIO(rv.data)) as zip_file:
            assert zip_file.testzip() is None
1023
            assert len(zip_file.namelist()) == len(example_file_contents)
Markus Scheidgen's avatar
Markus Scheidgen committed
1024