test_api.py 46.2 KB
Newer Older
Markus Scheidgen's avatar
Markus Scheidgen committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
# Copyright 2018 Markus Scheidgen
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an"AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

15
from typing import Any
16
17
18
import pytest
import time
import json
19
20
import zipfile
import io
21
import inspect
Markus Scheidgen's avatar
Markus Scheidgen committed
22
import datetime
23
import os.path
24
from urllib.parse import urlencode
25
import base64
26

Markus Scheidgen's avatar
Markus Scheidgen committed
27
from nomad.api.app import rfc3339DateTime
28
from nomad.api.auth import generate_upload_token
29
from nomad import search, parsing, files, config, utils
30
31
from nomad.files import UploadFiles, PublicUploadFiles
from nomad.processing import Upload, Calc, SUCCESS
32
from nomad.datamodel import UploadWithMetadata, CalcWithMetadata, User
33

34
from tests.conftest import create_auth_headers, clear_elastic
35
from tests.test_files import example_file, example_file_mainfile, example_file_contents
36
from tests.test_files import create_staging_upload, create_public_upload, assert_upload_files
37
from tests.test_search import assert_search_upload
38
39


40
41
logger = utils.get_logger(__name__)

Markus Scheidgen's avatar
Markus Scheidgen committed
42

43
44
45
46
47
def test_alive(client):
    rv = client.get('/alive')
    assert rv.status_code == 200


48
49
@pytest.fixture(scope='function')
def test_user_signature_token(client, test_user_auth):
50
    rv = client.get('/auth/', headers=test_user_auth)
51
    assert rv.status_code == 200
52
    return json.loads(rv.data)['signature_token']
53
54


55
56
57
58
59
60
61
62
def get_upload_with_metadata(upload: dict) -> UploadWithMetadata:
    """ Create a :class:`UploadWithMetadata` from a API upload json record. """
    return UploadWithMetadata(
        upload_id=upload['upload_id'], calcs=[
            CalcWithMetadata(calc_id=calc['calc_id'], mainfile=calc['mainfile'])
            for calc in upload['calcs']['results']])


63
64
65
class TestInfo:
    def test_info(self, client):
        rv = client.get('/info/')
66
67
68
69
        data = json.loads(rv.data)
        assert 'codes' in data
        assert 'parsers' in data
        assert len(data['parsers']) >= len(data['codes'])
70
71
        assert rv.status_code == 200

72

73
class TestKeycloak:
74
75
76
77
    def test_auth_wo_credentials(self, client, keycloak, no_warn):
        rv = client.get('/auth/')
        assert rv.status_code == 401

78
79
80
81
    @pytest.fixture(scope='function')
    def auth_headers(self, client, keycloak):
        basic_auth = base64.standard_b64encode(b'sheldon.cooper@nomad-coe.eu:password')
        rv = client.get('/auth/', headers=dict(Authorization='Basic %s' % basic_auth.decode('utf-8')))
82
        assert rv.status_code == 200
83
84
85
86
87
88
89
90
91
92
93
94
        auth = json.loads(rv.data)
        assert 'access_token' in auth
        assert auth['access_token'] is not None
        return dict(Authorization='Bearer %s' % auth['access_token'])

    def test_auth_with_password(self, client, auth_headers):
        pass

    def test_auth_with_access_token(self, client, auth_headers):
        rv = client.get('/auth/', headers=auth_headers)
        assert rv.status_code == 200

95

96
97
98
99
class TestAuth:
    def test_auth_wo_credentials(self, client, no_warn):
        rv = client.get('/auth/')
        assert rv.status_code == 401
100

101
102
103
104
    def test_auth_with_token(self, client, test_user_auth):
        rv = client.get('/auth/', headers=test_user_auth)
        assert rv.status_code == 200
        self.assert_auth(client, json.loads(rv.data))
105

106
107
108
    def assert_auth(self, client, auth):
        assert 'user' in auth
        user = auth['user']
109
        for key in ['first_name', 'last_name', 'email', 'name', 'user_id']:
110
111
            assert key in user

112
113
114
        assert 'access_token' in auth
        assert 'upload_token' in auth
        assert 'signature_token' in auth
115

116
117
118
    def test_signature_token(self, test_user_signature_token, no_warn):
        assert test_user_signature_token is not None

119
120
121
122
123

class TestUploads:

    def assert_uploads(self, upload_json_str, count=0, **kwargs):
        data = json.loads(upload_json_str)
124
125
126
127
        assert 'pagination' in data
        assert 'page' in data['pagination']

        data = data['results']
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
        assert isinstance(data, list)
        assert len(data) == count

        if count > 0:
            self.assert_upload(json.dumps(data[0]), **kwargs)

    def assert_upload(self, upload_json_str, id=None, **kwargs):
        data = json.loads(upload_json_str)
        assert 'upload_id' in data
        if id is not None:
            assert id == data['upload_id']
        assert 'create_time' in data

        for key, value in kwargs.items():
            assert data.get(key, None) == value

        return data

    def assert_processing(self, client, test_user_auth, upload_id):
        upload_endpoint = '/uploads/%s' % upload_id

        # poll until completed
150
        upload = self.block_until_completed(client, upload_id, test_user_auth)
151
152

        assert len(upload['tasks']) == 4
153
        assert upload['tasks_status'] == SUCCESS
154
        assert upload['current_task'] == 'cleanup'
155
        assert not upload['process_running']
156

157
158
        calcs = upload['calcs']['results']
        for calc in calcs:
159
            assert calc['tasks_status'] == SUCCESS
160
161
            assert calc['current_task'] == 'archiving'
            assert len(calc['tasks']) == 3
162
            assert client.get('/archive/logs/%s/%s' % (calc['upload_id'], calc['calc_id']), headers=test_user_auth).status_code == 200
163
164

        if upload['calcs']['pagination']['total'] > 1:
165
            rv = client.get('%s?page=2&per_page=1&order_by=tasks_status' % upload_endpoint, headers=test_user_auth)
166
167
168
169
            assert rv.status_code == 200
            upload = self.assert_upload(rv.data)
            assert len(upload['calcs']['results']) == 1

170
171
        upload_with_metadata = get_upload_with_metadata(upload)
        assert_upload_files(upload_with_metadata, files.StagingUploadFiles)
172
        assert_search_upload(upload_with_metadata, additional_keys=['atoms', 'system'])
173

174
    def assert_published(self, client, test_user_auth, upload_id, proc_infra, metadata={}):
175
176
        rv = client.get('/uploads/%s' % upload_id, headers=test_user_auth)
        upload = self.assert_upload(rv.data)
177
178

        upload_with_metadata = get_upload_with_metadata(upload)
179

180
181
182
        rv = client.post(
            '/uploads/%s' % upload_id,
            headers=test_user_auth,
183
            data=json.dumps(dict(operation='publish', metadata=metadata)),
184
            content_type='application/json')
185
        assert rv.status_code == 200
186
        upload = self.assert_upload(rv.data)
187
        assert upload['current_process'] == 'publish_upload'
188
        assert upload['process_running']
189

190
        additional_keys = ['with_embargo']
191

192
        self.block_until_completed(client, upload_id, test_user_auth)
193

194
195
196
        upload_proc = Upload.objects(upload_id=upload_id).first()
        assert upload_proc is not None
        assert upload_proc.published is True
197
        upload_with_metadata = upload_proc.to_upload_with_metadata()
198

199
        assert_upload_files(upload_with_metadata, files.PublicUploadFiles, published=True)
200
201
202
        assert_search_upload(upload_with_metadata, additional_keys=additional_keys, published=True)

    def block_until_completed(self, client, upload_id: str, test_user_auth):
203
204
205
206
207
        while True:
            time.sleep(0.1)
            rv = client.get('/uploads/%s' % upload_id, headers=test_user_auth)
            if rv.status_code == 200:
                upload = self.assert_upload(rv.data)
208
209
                if not upload['process_running'] and not upload['tasks_running']:
                    return upload
210
            elif rv.status_code == 404:
211
                return None
212
213
214
215
            else:
                raise Exception(
                    'unexpected status code while blocking for upload processing: %s' %
                    str(rv.status_code))
216
217
218

    def assert_upload_does_not_exist(self, client, upload_id: str, test_user_auth):
        self.block_until_completed(client, upload_id, test_user_auth)
219

220
221
222
223
224
225
        rv = client.get('/uploads/%s' % upload_id, headers=test_user_auth)
        assert rv.status_code == 404
        assert Upload.objects(upload_id=upload_id).first() is None
        assert Calc.objects(upload_id=upload_id).count() is 0
        upload_files = UploadFiles.get(upload_id)
        assert upload_files is None or isinstance(upload_files, PublicUploadFiles)
Markus Scheidgen's avatar
Markus Scheidgen committed
226

227
228
229
230
231
232
233
    def test_get_command(self, client, test_user_auth, no_warn):
        rv = client.get('/uploads/command', headers=test_user_auth)
        assert rv.status_code == 200
        data = json.loads(rv.data)
        assert 'upload_command' in data
        assert 'upload_url' in data

234
235
    def test_get_empty(self, client, test_user_auth, no_warn):
        rv = client.get('/uploads/', headers=test_user_auth)
Markus Scheidgen's avatar
Markus Scheidgen committed
236

237
238
        assert rv.status_code == 200
        self.assert_uploads(rv.data, count=0)
Markus Scheidgen's avatar
Markus Scheidgen committed
239

240
241
242
    def test_get_not_existing(self, client, test_user_auth, no_warn):
        rv = client.get('/uploads/123456789012123456789012', headers=test_user_auth)
        assert rv.status_code == 404
243

244
245
246
247
248
249
250
    def test_put_upload_token(self, client, non_empty_example_upload, test_user, no_warn):
        url = '/uploads/?token=%s&local_path=%s&name=test_upload' % (
            generate_upload_token(test_user), non_empty_example_upload)
        rv = client.put(url)
        assert rv.status_code == 200
        self.assert_upload(rv.data, name='test_upload')

251
252
    @pytest.mark.parametrize('mode', ['multipart', 'stream', 'local_path'])
    @pytest.mark.parametrize('name', [None, 'test_name'])
Markus Scheidgen's avatar
Markus Scheidgen committed
253
    def test_put(self, client, test_user_auth, proc_infra, example_upload, mode, name, no_warn):
254
        file = example_upload
255
256
257
258
259
260
261
        if name:
            url = '/uploads/?name=%s' % name
        else:
            url = '/uploads/'

        if mode == 'multipart':
            rv = client.put(
262
263
264
                url, data=dict(file=(open(file, 'rb'), 'the_name')), headers=test_user_auth)
            if not name:
                name = 'the_name'
265
266
267
268
269
270
271
272
273
        elif mode == 'stream':
            with open(file, 'rb') as f:
                rv = client.put(url, data=f.read(), headers=test_user_auth)
        elif mode == 'local_path':
            url += '&' if name else '?'
            url += 'local_path=%s' % file
            rv = client.put(url, headers=test_user_auth)
        else:
            assert False
274

275
276
        assert rv.status_code == 200
        if mode == 'local_path':
277
            upload = self.assert_upload(rv.data, upload_path=file, name=name)
278
279
        else:
            upload = self.assert_upload(rv.data, name=name)
280
        assert upload['tasks_running']
281

282
        self.assert_processing(client, test_user_auth, upload['upload_id'])
283

284
285
286
287
288
289
290
291
    def test_upload_limit(self, client, mongo, test_user, test_user_auth, proc_infra):
        for _ in range(0, config.services.upload_limit):
            Upload.create(user=test_user)
        file = example_file
        rv = client.put('/uploads/?local_path=%s' % file, headers=test_user_auth)
        assert rv.status_code == 400
        assert Upload.user_uploads(test_user).count() == config.services.upload_limit

292
293
294
    def test_delete_not_existing(self, client, test_user_auth, no_warn):
        rv = client.delete('/uploads/123456789012123456789012', headers=test_user_auth)
        assert rv.status_code == 404
295

296
297
298
299
300
301
302
303
304
305
306
307
    @pytest.fixture(scope='function')
    def slow_processing(self, monkeypatch):
        old_cleanup = Upload.cleanup

        def slow_cleanup(self):
            time.sleep(0.5)
            old_cleanup(self)

        monkeypatch.setattr('nomad.processing.data.Upload.cleanup', slow_cleanup)
        yield True
        monkeypatch.setattr('nomad.processing.data.Upload.cleanup', old_cleanup)

308
    def test_delete_published(self, client, test_user_auth, proc_infra, no_warn):
309
310
311
        rv = client.put('/uploads/?local_path=%s' % example_file, headers=test_user_auth)
        upload = self.assert_upload(rv.data)
        self.assert_processing(client, test_user_auth, upload['upload_id'])
312
        self.assert_published(client, test_user_auth, upload['upload_id'], proc_infra)
313
        rv = client.delete('/uploads/%s' % upload['upload_id'], headers=test_user_auth)
314
        assert rv.status_code == 400
315

Markus Scheidgen's avatar
Markus Scheidgen committed
316
    def test_delete(self, client, test_user_auth, proc_infra, no_warn):
317
318
319
320
321
        rv = client.put('/uploads/?local_path=%s' % example_file, headers=test_user_auth)
        upload = self.assert_upload(rv.data)
        self.assert_processing(client, test_user_auth, upload['upload_id'])
        rv = client.delete('/uploads/%s' % upload['upload_id'], headers=test_user_auth)
        assert rv.status_code == 200
322
        self.assert_upload_does_not_exist(client, upload['upload_id'], test_user_auth)
323

324
325
326
327
328
329
330
331
332
333
334
    def test_post_empty(self, client, test_user_auth, empty_upload, proc_infra, no_warn):
        rv = client.put('/uploads/?local_path=%s' % empty_upload, headers=test_user_auth)
        assert rv.status_code == 200
        upload = self.assert_upload(rv.data)
        self.assert_processing(client, test_user_auth, upload['upload_id'])
        rv = client.post(
            '/uploads/%s' % upload['upload_id'], headers=test_user_auth,
            data=json.dumps(dict(operation='publish')),
            content_type='application/json')
        assert rv.status_code == 400

335
    def test_post(self, client, test_user_auth, non_empty_example_upload, proc_infra, no_warn):
336
337
        rv = client.put('/uploads/?local_path=%s' % non_empty_example_upload, headers=test_user_auth)
        assert rv.status_code == 200
338
339
        upload = self.assert_upload(rv.data)
        self.assert_processing(client, test_user_auth, upload['upload_id'])
340
        self.assert_published(client, test_user_auth, upload['upload_id'], proc_infra)
341

342
343
344
        # still visible
        assert client.get('/uploads/%s' % upload['upload_id'], headers=test_user_auth).status_code == 200
        # still listed with all=True
345
        rv = client.get('/uploads/?state=all', headers=test_user_auth)
346
        assert rv.status_code == 200
347
        data = json.loads(rv.data)['results']
348
349
350
351
352
        assert len(data) > 0
        assert any(item['upload_id'] == upload['upload_id'] for item in data)
        # not listed with all=False
        rv = client.get('/uploads/', headers=test_user_auth)
        assert rv.status_code == 200
353
        data = json.loads(rv.data)['results']
354
355
        assert not any(item['upload_id'] == upload['upload_id'] for item in data)

356
357
    def test_post_metadata(
            self, client, proc_infra, admin_user_auth, test_user_auth, test_user,
358
            other_test_user, no_warn, example_user_metadata):
359
360
361
        rv = client.put('/uploads/?local_path=%s' % example_file, headers=test_user_auth)
        upload = self.assert_upload(rv.data)
        self.assert_processing(client, test_user_auth, upload['upload_id'])
362
        metadata = dict(**example_user_metadata)
363
        metadata['_upload_time'] = datetime.datetime.utcnow().isoformat()
364
        self.assert_published(client, admin_user_auth, upload['upload_id'], proc_infra, metadata)
365

Markus Scheidgen's avatar
Markus Scheidgen committed
366
    def test_post_metadata_forbidden(self, client, proc_infra, test_user_auth, no_warn):
367
368
369
370
371
372
        rv = client.put('/uploads/?local_path=%s' % example_file, headers=test_user_auth)
        upload = self.assert_upload(rv.data)
        self.assert_processing(client, test_user_auth, upload['upload_id'])
        rv = client.post(
            '/uploads/%s' % upload['upload_id'],
            headers=test_user_auth,
373
            data=json.dumps(dict(operation='publish', metadata=dict(_pid=256))),
374
375
376
            content_type='application/json')
        assert rv.status_code == 401

377
378
379
380
381
382
383
    def test_post_metadata_and_republish(
            self, client, proc_infra, admin_user_auth, test_user_auth, test_user,
            other_test_user, no_warn, example_user_metadata):
        rv = client.put('/uploads/?local_path=%s' % example_file, headers=test_user_auth)
        upload = self.assert_upload(rv.data)
        self.assert_processing(client, test_user_auth, upload['upload_id'])
        metadata = dict(**example_user_metadata)
384
        metadata['_upload_time'] = datetime.datetime.utcnow().isoformat()
385
        self.assert_published(client, admin_user_auth, upload['upload_id'], proc_infra, metadata)
386
        self.assert_published(client, admin_user_auth, upload['upload_id'], proc_infra, {})
387

388
389
390
391
392
393
394
395
396
397
398
399
400
401
    def test_post_re_process(self, client, published, test_user_auth, monkeypatch):
        monkeypatch.setattr('nomad.config.version', 're_process_test_version')
        monkeypatch.setattr('nomad.config.commit', 're_process_test_commit')

        upload_id = published.upload_id
        rv = client.post(
            '/uploads/%s' % upload_id,
            headers=test_user_auth,
            data=json.dumps(dict(operation='re-process')),
            content_type='application/json')

        assert rv.status_code == 200
        assert self.block_until_completed(client, upload_id, test_user_auth) is not None

402
    # TODO validate metadata (or all input models in API for that matter)
403
    # def test_post_bad_metadata(self, client, proc_infra, test_user_auth):
404
405
406
407
408
409
    #     rv = client.put('/uploads/?local_path=%s' % example_file, headers=test_user_auth)
    #     upload = self.assert_upload(rv.data)
    #     self.assert_processing(client, test_user_auth, upload['upload_id'])
    #     rv = client.post(
    #         '/uploads/%s' % upload['upload_id'],
    #         headers=test_user_auth,
410
    #         data=json.dumps(dict(operation='publish', metadata=dict(doesnotexist='hi'))),
411
412
413
    #         content_type='application/json')
    #     assert rv.status_code == 400

414
    def test_potcar(self, client, proc_infra, test_user_auth):
415
        # only the owner, shared with people are supposed to download the original potcar file
416
417
418
419
420
421
        example_file = 'tests/data/proc/examples_potcar.zip'
        rv = client.put('/uploads/?local_path=%s' % example_file, headers=test_user_auth)

        upload = self.assert_upload(rv.data)
        upload_id = upload['upload_id']
        self.assert_processing(client, test_user_auth, upload_id)
422
        self.assert_published(client, test_user_auth, upload_id, proc_infra)
423
424
425
426
427
428
429
        rv = client.get('/raw/%s/examples_potcar/POTCAR' % upload_id)
        assert rv.status_code == 401
        rv = client.get('/raw/%s/examples_potcar/POTCAR' % upload_id, headers=test_user_auth)
        assert rv.status_code == 200
        rv = client.get('/raw/%s/examples_potcar/POTCAR.stripped' % upload_id)
        assert rv.status_code == 200

430

Markus Scheidgen's avatar
Markus Scheidgen committed
431
432
433
today = datetime.datetime.utcnow().date()


434
435
436
437
438
439
440
441
442
443
444
445
446
class UploadFilesBasedTests:

    @staticmethod
    def fix_signature(func, wrapper):
        additional_args = list(inspect.signature(func).parameters.values())[4:]
        wrapper_sig = inspect.signature(wrapper)
        wrapper_args = list(wrapper_sig.parameters.values())[:3] + additional_args
        wrapper_sig = wrapper_sig.replace(parameters=tuple(wrapper_args))
        wrapper.__signature__ = wrapper_sig

    @staticmethod
    def check_authorizaton(func):
        @pytest.mark.parametrize('test_data', [
447
448
449
450
451
452
453
454
455
456
457
            [True, None, True],      # in staging for upload
            [True, None, False],     # in staging for different user
            [True, None, None],      # in staging for guest
            [True, None, 'admin'],   # in staging, for admin
            [False, True, True],     # in public, restricted for uploader
            [False, True, False],    # in public, restricted for different user
            [False, True, None],     # in public, restricted for guest
            [False, True, 'admin'],  # in public, restricted for admin
            [False, False, True],    # in public, public, for uploader
            [False, False, False],   # in public, public, for different user
            [False, False, None]     # in public, public, for guest
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
        ], indirect=True)
        def wrapper(self, client, test_data, *args, **kwargs):
            upload, authorized, auth_headers = test_data
            try:
                func(self, client, upload, auth_headers, *args, **kwargs)
            except AssertionError as assertion:
                assertion_str = str(assertion)
                if not authorized:
                    if '0 == 5' in assertion_str and 'ZipFile' in assertion_str:
                        # the user is not authorized an gets an empty zip as expected
                        return
                    if '401' in assertion_str:
                        # the user is not authorized and gets a 401 as expected
                        return
                raise assertion

            if not authorized:
                assert False
        UploadFilesBasedTests.fix_signature(func, wrapper)
        return wrapper

    @staticmethod
    def ignore_authorization(func):
        @pytest.mark.parametrize('test_data', [
            [True, None, True],      # in staging
            [False, False, None],    # in public
        ], indirect=True)
        def wrapper(self, client, test_data, *args, **kwargs):
            upload, _, auth_headers = test_data
            func(self, client, upload, auth_headers, *args, **kwargs)
        UploadFilesBasedTests.fix_signature(func, wrapper)
        return wrapper
490

491
    @pytest.fixture(scope='function')
492
    def test_data(self, request, mongo, raw_files, no_warn, test_user, other_test_user, admin_user):
493
494
495
496
497
        # delete potential old test files
        for _ in [0, 1]:
            upload_files = UploadFiles.get('test_upload')
            if upload_files:
                upload_files.delete()
498

499
        in_staging, restricted, for_uploader = request.param
500

501
        if in_staging:
502
            authorized = for_uploader is True or for_uploader == 'admin'
503
        else:
504
            authorized = not restricted or for_uploader is True or for_uploader == 'admin'
505

506
        if for_uploader is True:
507
508
509
            auth_headers = create_auth_headers(test_user)
        elif for_uploader is False:
            auth_headers = create_auth_headers(other_test_user)
510
511
        elif for_uploader == 'admin':
            auth_headers = create_auth_headers(admin_user)
512
513
        else:
            auth_headers = None
514

515
        calc_specs = 'r' if restricted else 'p'
516
        Upload.create(user=test_user, upload_id='test_upload')
517
        if in_staging:
518
            _, upload_files = create_staging_upload('test_upload', calc_specs=calc_specs)
519
        else:
520
            _, upload_files = create_public_upload('test_upload', calc_specs=calc_specs)
521

522
        yield 'test_upload', authorized, auth_headers
523

524
        upload_files.delete()
525
526


527
528
529
530
class TestArchive(UploadFilesBasedTests):
    @UploadFilesBasedTests.check_authorizaton
    def test_get(self, client, upload, auth_headers):
        rv = client.get('/archive/%s/0' % upload, headers=auth_headers)
531
        assert rv.status_code == 200
532
        assert json.loads(rv.data) is not None
533

534
535
    @UploadFilesBasedTests.ignore_authorization
    def test_get_signed(self, client, upload, _, test_user_signature_token):
536
        rv = client.get('/archive/%s/0?signature_token=%s' % (upload, test_user_signature_token))
537
538
539
        assert rv.status_code == 200
        assert json.loads(rv.data) is not None

540
541
542
    @UploadFilesBasedTests.check_authorizaton
    def test_get_calc_proc_log(self, client, upload, auth_headers):
        rv = client.get('/archive/logs/%s/0' % upload, headers=auth_headers)
543
        assert rv.status_code == 200
544
        assert len(rv.data) > 0
545

546
547
    @UploadFilesBasedTests.ignore_authorization
    def test_get_calc_proc_log_signed(self, client, upload, _, test_user_signature_token):
548
        rv = client.get('/archive/logs/%s/0?signature_token=%s' % (upload, test_user_signature_token))
549
550
551
        assert rv.status_code == 200
        assert len(rv.data) > 0

552
553
554
    @UploadFilesBasedTests.ignore_authorization
    def test_get_non_existing_archive(self, client, upload, auth_headers):
        rv = client.get('/archive/%s' % 'doesnt/exist', headers=auth_headers)
555
        assert rv.status_code == 404
Markus Scheidgen's avatar
Markus Scheidgen committed
556

557
558
559
560
561
562
563
    @pytest.mark.parametrize('info', [
        'all.nomadmetainfo.json',
        'all.experimental.nomadmetainfo.json',
        'vasp.nomadmetainfo.json',
        'mpes.nomadmetainfo.json'])
    def test_get_metainfo(self, client, info):
        rv = client.get('/archive/metainfo/%s' % info)
564
        assert rv.status_code == 200
565
566
        metainfo = json.loads((rv.data))
        assert len(metainfo) > 0
567

Markus Scheidgen's avatar
Markus Scheidgen committed
568

569
class TestRepo():
570
571
572
    @pytest.fixture(scope='class')
    def example_elastic_calcs(
            self, elastic_infra, normalized: parsing.LocalBackend,
573
            test_user: User, other_test_user: User):
574
575
        clear_elastic(elastic_infra)

Markus Scheidgen's avatar
Markus Scheidgen committed
576
        calc_with_metadata = CalcWithMetadata(upload_id=0, calc_id=0, upload_time=today)
577
        calc_with_metadata.files = ['test/mainfile.txt']
578
        calc_with_metadata.apply_domain_metadata(normalized)
579

Markus Scheidgen's avatar
Markus Scheidgen committed
580
        calc_with_metadata.update(
581
            calc_id='1', uploader=test_user.user_id, published=True, with_embargo=False)
582
583
        search.Entry.from_calc_with_metadata(calc_with_metadata).save(refresh=True)

Markus Scheidgen's avatar
Markus Scheidgen committed
584
        calc_with_metadata.update(
585
            calc_id='2', uploader=other_test_user.user_id, published=True, with_embargo=False,
Markus Scheidgen's avatar
Markus Scheidgen committed
586
            upload_time=today - datetime.timedelta(days=5))
Markus Scheidgen's avatar
Markus Scheidgen committed
587
588
        calc_with_metadata.update(
            atoms=['Fe'], comment='this is a specific word', formula='AAA', basis_set='zzz')
589
590
        search.Entry.from_calc_with_metadata(calc_with_metadata).save(refresh=True)

Markus Scheidgen's avatar
Markus Scheidgen committed
591
        calc_with_metadata.update(
592
            calc_id='3', uploader=other_test_user.user_id, published=False, with_embargo=False)
593
594
        search.Entry.from_calc_with_metadata(calc_with_metadata).save(refresh=True)

Markus Scheidgen's avatar
Markus Scheidgen committed
595
        calc_with_metadata.update(
596
            calc_id='4', uploader=other_test_user.user_id, published=True, with_embargo=True)
597
598
        search.Entry.from_calc_with_metadata(calc_with_metadata).save(refresh=True)

599
    def assert_search(self, rv: Any, number_of_calcs: int) -> dict:
600
601
        if rv.status_code != 200:
            print(rv.data)
602
        assert rv.status_code == 200
603

604
605
606
607
608
609
610
611
612
        data = json.loads(rv.data)

        results = data.get('results', None)
        assert results is not None
        assert isinstance(results, list)
        assert len(results) == number_of_calcs

        return data

613
614
    def test_own_calc(self, client, example_elastic_calcs, no_warn, test_user_auth):
        rv = client.get('/repo/0/1', headers=test_user_auth)
615
616
        assert rv.status_code == 200

617
618
619
620
621
622
623
624
    def test_public_calc(self, client, example_elastic_calcs, no_warn, other_test_user_auth):
        rv = client.get('/repo/0/1', headers=other_test_user_auth)
        assert rv.status_code == 200

    def test_embargo_calc(self, client, example_elastic_calcs, no_warn, test_user_auth):
        rv = client.get('/repo/0/4', headers=test_user_auth)
        assert rv.status_code == 401

625
626
627
628
    def test_own_embargo_calc(self, client, example_elastic_calcs, no_warn, other_test_user_auth):
        rv = client.get('/repo/0/4', headers=other_test_user_auth)
        assert rv.status_code == 200

629
630
631
632
    def test_staging_calc(self, client, example_elastic_calcs, no_warn, test_user_auth):
        rv = client.get('/repo/0/3', headers=test_user_auth)
        assert rv.status_code == 401

633
634
635
636
    def test_own_staging_calc(self, client, example_elastic_calcs, no_warn, other_test_user_auth):
        rv = client.get('/repo/0/3', headers=other_test_user_auth)
        assert rv.status_code == 200

637
638
    def test_non_existing_calcs(self, client, example_elastic_calcs, test_user_auth):
        rv = client.get('/repo/0/10', headers=test_user_auth)
639
640
        assert rv.status_code == 404

641
642
643
    @pytest.mark.parametrize('calcs, owner, auth', [
        (2, 'all', 'none'),
        (2, 'all', 'test_user'),
644
        (4, 'all', 'other_test_user'),
645
        (1, 'user', 'test_user'),
646
        (3, 'user', 'other_test_user'),
647
        (0, 'staging', 'test_user'),
648
        (1, 'staging', 'other_test_user')
649
    ])
650
    def test_search_owner(self, client, example_elastic_calcs, no_warn, test_user_auth, other_test_user_auth, calcs, owner, auth):
651
652
        auth = dict(none=None, test_user=test_user_auth, other_test_user=other_test_user_auth).get(auth)
        rv = client.get('/repo/?owner=%s' % owner, headers=auth)
653
        data = self.assert_search(rv, calcs)
654
655
656
657
658
        results = data.get('results', None)
        if calcs > 0:
            for key in ['uploader', 'calc_id', 'formula', 'upload_id']:
                assert key in results[0]

Markus Scheidgen's avatar
Markus Scheidgen committed
659
    @pytest.mark.parametrize('calcs, start, end', [
Markus Scheidgen's avatar
Markus Scheidgen committed
660
661
662
663
664
665
        (2, today - datetime.timedelta(days=6), today),
        (2, today - datetime.timedelta(days=5), today),
        (1, today - datetime.timedelta(days=4), today),
        (1, today, today),
        (1, today - datetime.timedelta(days=6), today - datetime.timedelta(days=5)),
        (0, today - datetime.timedelta(days=7), today - datetime.timedelta(days=6)),
Markus Scheidgen's avatar
Markus Scheidgen committed
666
        (2, None, None),
Markus Scheidgen's avatar
Markus Scheidgen committed
667
668
        (1, today, None),
        (2, None, today)
Markus Scheidgen's avatar
Markus Scheidgen committed
669
670
671
672
673
674
675
676
677
678
679
680
681
    ])
    def test_search_time(self, client, example_elastic_calcs, no_warn, calcs, start, end):
        query_string = ''
        if start is not None:
            query_string = 'from_time=%s' % rfc3339DateTime.format(start)
        if end is not None:
            if query_string != '':
                query_string += '&'
            query_string += 'until_time=%s' % rfc3339DateTime.format(end)
        if query_string != '':
            query_string = '?%s' % query_string

        rv = client.get('/repo/%s' % query_string)
682
        self.assert_search(rv, calcs)
Markus Scheidgen's avatar
Markus Scheidgen committed
683

684
    @pytest.mark.parametrize('calcs, quantity, value', [
685
686
        (2, 'system', 'bulk'),
        (0, 'system', 'atom'),
687
688
        (1, 'atoms', 'Br'),
        (1, 'atoms', 'Fe'),
689
        (0, 'atoms', ['Fe', 'Br', 'A', 'B']),
690
691
        (0, 'only_atoms', ['Br', 'Si']),
        (1, 'only_atoms', ['Fe']),
692
693
        (1, 'only_atoms', ['Br', 'K', 'Si']),
        (1, 'only_atoms', ['Br', 'Si', 'K']),
694
695
696
697
698
699
700
701
702
        (1, 'comment', 'specific'),
        (1, 'authors', 'Hofstadter, Leonard'),
        (2, 'files', 'test/mainfile.txt'),
        (2, 'paths', 'mainfile.txt'),
        (2, 'paths', 'test'),
        (2, 'quantities', ['wyckoff_letters_primitive', 'hall_number']),
        (0, 'quantities', 'dos')
    ])
    def test_search_quantities(self, client, example_elastic_calcs, no_warn, test_user_auth, calcs, quantity, value):
703
        query_string = urlencode({quantity: value}, doseq=True)
704

705
        rv = client.get('/repo/?%s' % query_string, headers=test_user_auth)
706
        logger.debug('run search quantities test', query_string=query_string)
707
        data = self.assert_search(rv, calcs)
708

709
710
        quantities = data.get('quantities', None)
        assert quantities is not None
711
        if quantity == 'system' and calcs != 0:
712
713
714
715
            # for simplicity we only assert on quantities for this case
            assert 'system' in quantities
            assert len(quantities['system']) == 1
            assert value in quantities['system']
716

717
718
    metrics_permutations = [[], search.metrics_names] + [[metric] for metric in search.metrics_names]

719
720
721
722
723
724
725
726
727
728
729
    def test_search_admin(self, client, example_elastic_calcs, no_warn, admin_user_auth):
        rv = client.get('/repo/?owner=admin', headers=admin_user_auth)
        self.assert_search(rv, 4)

    def test_search_admin_auth(self, client, example_elastic_calcs, no_warn, test_user_auth):
        rv = client.get('/repo/?owner=admin', headers=test_user_auth)
        assert rv.status_code == 401

        rv = client.get('/repo/?owner=admin')
        assert rv.status_code == 401

730
    @pytest.mark.parametrize('metrics', metrics_permutations)
Markus Scheidgen's avatar
Markus Scheidgen committed
731
    def test_search_total_metrics(self, client, example_elastic_calcs, no_warn, metrics):
732
733
        rv = client.get('/repo/?%s' % urlencode(dict(metrics=metrics), doseq=True))
        assert rv.status_code == 200, str(rv.data)
Markus Scheidgen's avatar
Markus Scheidgen committed
734
        data = json.loads(rv.data)
735
736
737
        total_metrics = data.get('quantities', {}).get('total', {}).get('all', None)
        assert total_metrics is not None
        assert 'code_runs' in total_metrics
Markus Scheidgen's avatar
Markus Scheidgen committed
738
        for metric in metrics:
739
            assert metric in total_metrics
Markus Scheidgen's avatar
Markus Scheidgen committed
740

741
    @pytest.mark.parametrize('metrics', metrics_permutations)
Markus Scheidgen's avatar
Markus Scheidgen committed
742
    def test_search_aggregation_metrics(self, client, example_elastic_calcs, no_warn, metrics):
743
        rv = client.get('/repo/?%s' % urlencode(dict(metrics=metrics), doseq=True))
Markus Scheidgen's avatar
Markus Scheidgen committed
744
745
        assert rv.status_code == 200
        data = json.loads(rv.data)
746
747
        for name, quantity in data.get('quantities').items():
            for metrics_result in quantity.values():
Markus Scheidgen's avatar
Markus Scheidgen committed
748
                assert 'code_runs' in metrics_result
749
750
751
752
753
                if name != 'authors':
                    for metric in metrics:
                        assert metric in metrics_result
                else:
                    assert len(metrics_result) == 1  # code_runs is the only metric for authors
Markus Scheidgen's avatar
Markus Scheidgen committed
754

755
756
757
758
759
760
761
762
    def test_search_date_histogram(self, client, example_elastic_calcs, no_warn):
        rv = client.get('/repo/?date_histogram=true&metrics=total_energies')
        assert rv.status_code == 200
        data = json.loads(rv.data)
        histogram = data.get('quantities').get('date_histogram')
        print(histogram)
        assert len(histogram) > 0

763
764
765
    @pytest.mark.parametrize('n_results, page, per_page', [(2, 1, 5), (1, 1, 1), (0, 2, 3)])
    def test_search_pagination(self, client, example_elastic_calcs, no_warn, n_results, page, per_page):
        rv = client.get('/repo/?page=%d&per_page=%d' % (page, per_page))
766
767
768
        assert rv.status_code == 200
        data = json.loads(rv.data)
        results = data.get('results', None)
769
        assert data['pagination']['total'] == 2
770
        assert results is not None
771
        assert len(results) == n_results
772

773
774
    @pytest.mark.parametrize('first, order_by, order', [
        ('1', 'formula', -1), ('2', 'formula', 1),
775
776
        ('2', 'basis_set', -1), ('1', 'basis_set', 1),
        (None, 'authors', -1)])
777
778
779
780
781
782
783
    def test_search_order(self, client, example_elastic_calcs, no_warn, first, order_by, order):
        rv = client.get('/repo/?order_by=%s&order=%d' % (order_by, order))
        assert rv.status_code == 200
        data = json.loads(rv.data)
        results = data.get('results', None)
        assert data['pagination']['total'] == 2
        assert len(results) == 2
784
785
        if first is not None:
            assert results[0]['calc_id'] == first
786

787
788
789
790
791
792
793
794
795
796
    @pytest.mark.parametrize('n_results, size', [(2, None), (2, 5), (1, 1)])
    def test_search_scroll(self, client, example_elastic_calcs, no_warn, n_results, size):
        if size is not None:
            rv = client.get('/repo/?scroll=1,&per_page=%d' % size)
        else:
            rv = client.get('/repo/?scroll=1')

        assert rv.status_code == 200
        data = json.loads(rv.data)
        results = data.get('results', None)
797
        assert data.get('scroll', {}).get('size', -1) > 0
798
799
        assert results is not None
        assert len(results) == n_results
800
        scroll_id = data.get('scroll', {}).get('scroll_id', None)
801
802
803
804
805
806
        assert scroll_id is not None

        has_another_page = False
        while scroll_id is not None:
            rv = client.get('/repo/?scroll=1&scroll_id=%s' % scroll_id)
            data = json.loads(rv.data)
807
            scroll_id = data.get('scroll', {}).get('scroll_id', None)
808
809
810
811
812
            has_another_page |= len(data.get('results')) > 0

        if n_results < 2:
            assert has_another_page

813
814
815
    def test_search_user_authrequired(self, client, example_elastic_calcs, no_warn):
        rv = client.get('/repo/?owner=user')
        assert rv.status_code == 401
816

817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
    @pytest.mark.parametrize('calcs, quantity, value', [
        (2, 'system', 'bulk'),
        (0, 'system', 'atom'),
        (1, 'atoms', 'Br'),
        (1, 'atoms', 'Fe'),
        (1, 'authors', 'Hofstadter, Leonard'),
        (2, 'files', 'test/mainfile.txt'),
        (0, 'quantities', 'dos')
    ])
    def test_quantity_search(self, client, example_elastic_calcs, no_warn, test_user_auth, calcs, quantity, value):
        rv = client.get('/repo/%s' % quantity, headers=test_user_auth)
        assert rv.status_code == 200
        data = json.loads(rv.data)

        quantities = data['quantities']
        assert quantity in quantities
        values = quantities[quantity]['values']
        assert (value in values) == (calcs > 0)
        assert values.get(value, 0) == calcs

    def test_quantity_search_after(self, client, example_elastic_calcs, no_warn, test_user_auth):
        rv = client.get('/repo/atoms?size=1')
        assert rv.status_code == 200
        data = json.loads(rv.data)

        quantity = data['quantities']['atoms']
        assert 'after' in quantity
        after = quantity['after']
        assert len(quantity['values']) == 1
        value = list(quantity['values'].keys())[0]

        while True:
            rv = client.get('/repo/atoms?size=1&after=%s' % after)
            assert rv.status_code == 200
            data = json.loads(rv.data)

            quantity = data['quantities']['atoms']

            if 'after' not in quantity:
                assert len(quantity['values']) == 0
                break

            assert len(quantity['values']) == 1
            assert value != list(quantity['values'].keys())[0]
            assert after != quantity['after']
            after = quantity['after']

864

865
class TestRaw(UploadFilesBasedTests):
Markus Scheidgen's avatar
Markus Scheidgen committed
866

867
868
869
870
871
872
873
874
875
876
877
878
879
880
    def test_raw_file_from_calc(self, client, non_empty_processed, test_user_auth):
        calc = list(non_empty_processed.calcs)[0]
        url = '/raw/calc/%s/%s/%s' % (
            non_empty_processed.upload_id, calc.calc_id, os.path.basename(calc.mainfile))
        rv = client.get(url, headers=test_user_auth)
        assert rv.status_code == 200
        assert len(rv.data) > 0

        url = '/raw/calc/%s/%s/' % (non_empty_processed.upload_id, calc.calc_id)
        rv = client.get(url, headers=test_user_auth)
        assert rv.status_code == 200
        result = json.loads(rv.data)
        assert len(result['contents']) > 0

881
882
    @UploadFilesBasedTests.check_authorizaton
    def test_raw_file(self, client, upload, auth_headers):
883
        url = '/raw/%s/%s' % (upload, example_file_mainfile)
884
        rv = client.get(url, headers=auth_headers)
885
886
887
        assert rv.status_code == 200
        assert len(rv.data) > 0

888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
    @UploadFilesBasedTests.check_authorizaton
    def test_raw_file_partial(self, client, upload, auth_headers):
        url = '/raw/%s/%s?offset=0&length=20' % (upload, example_file_mainfile)
        rv = client.get(url, headers=auth_headers)
        assert rv.status_code == 200
        start_data = rv.data
        assert len(start_data) == 20

        url = '/raw/%s/%s?offset=10&length=10' % (upload, example_file_mainfile)
        rv = client.get(url, headers=auth_headers)
        assert rv.status_code == 200
        next_data = rv.data
        assert len(rv.data) == 10
        assert start_data[10:] == next_data

903
904
    @UploadFilesBasedTests.ignore_authorization
    def test_raw_file_signed(self, client, upload, _, test_user_signature_token):
905
        url = '/raw/%s/%s?signature_token=%s' % (upload, example_file_mainfile, test_user_signature_token)
906
907
908
909
        rv = client.get(url)
        assert rv.status_code == 200
        assert len(rv.data) > 0

910
911
    @UploadFilesBasedTests.ignore_authorization
    def test_raw_file_missing_file(self, client, upload, auth_headers):
912
        url = '/raw/%s/does/not/exist' % upload
913
        rv = client.get(url, headers=auth_headers)
914
        assert rv.status_code == 404
915
916
917
        data = json.loads(rv.data)
        assert 'files' not in data

918
    @pytest.mark.parametrize('compress', [True, False])
919
920
    @UploadFilesBasedTests.ignore_authorization
    def test_raw_file_wildcard(self, client, upload, auth_headers, compress):
921
        url = '/raw/%s/examples*' % upload
922
923
        if compress:
            url = '%s?compress=1' % url
924
        rv = client.get(url, headers=auth_headers)
925
926
927
928
929
930
931

        assert rv.status_code == 200
        assert len(rv.data) > 0
        with zipfile.ZipFile(io.BytesIO(rv.data)) as zip_file:
            assert zip_file.testzip() is None
            assert len(zip_file.namelist()) == len(example_file_contents)

932
933
934
935
    @UploadFilesBasedTests.ignore_authorization
    def test_raw_file_wildcard_missing(self, client, upload, auth_headers):
        url = '/raw/%s/does/not/exist*' % upload
        rv = client.get(url, headers=auth_headers)
936
        assert rv.status_code == 404
937

938
939
    @UploadFilesBasedTests.ignore_authorization
    def test_raw_file_missing_upload(self, client, upload, auth_headers):
940
        url = '/raw/doesnotexist/%s' % example_file_mainfile
941
        rv = client.get(url, headers=auth_headers)
942
943
        assert rv.status_code == 404

944
    @pytest.mark.parametrize('compress', [True, False])
945
946
    @UploadFilesBasedTests.check_authorizaton
    def test_raw_files(self, client, upload, auth_headers, compress):
947
        url = '/raw/%s?files=%s' % (
948
            upload, ','.join(example_file_contents))
949
950
        if compress:
            url = '%s&compress=1' % url
951
        rv = client.get(url, headers=auth_headers)
Markus Scheidgen's avatar
Markus Scheidgen committed
952

953
954
955
956
        assert rv.status_code == 200
        assert len(rv.data) > 0
        with zipfile.ZipFile(io.BytesIO(rv.data)) as zip_file:
            assert zip_file.testzip() is None
957
            assert len(zip_file.namelist()) == len(example_file_contents)
Markus Scheidgen's avatar
Markus Scheidgen committed
958

959
960
961
    @pytest.mark.parametrize('compress', [False, True])
    def test_raw_files_from_query_upload_id(self, client, non_empty_processed, test_user_auth, compress):
        url = '/raw/query?upload_id=%s&compress=%s' % (non_empty_processed.upload_id, 'true' if compress else 'false')
962
963
964
965
966
967
968
969
        rv = client.get(url, headers=test_user_auth)

        assert rv.status_code == 200
        assert len(rv.data) > 0
        with zipfile.ZipFile(io.BytesIO(rv.data)) as zip_file:
            assert zip_file.testzip() is None
            assert len(zip_file.namelist()) == len(example_file_contents)

970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
    @pytest.mark.parametrize('query_params', [
        {'atoms': 'Si'},
        {'authors': 'Cooper, Sheldon'}
    ])
    def test_raw_files_from_query(self, client, processeds, test_user_auth, query_params):

        url = '/raw/query?%s' % urlencode(query_params)
        rv = client.get(url, headers=test_user_auth)

        assert rv.status_code == 200
        assert len(rv.data) > 0
        with zipfile.ZipFile(io.BytesIO(rv.data)) as zip_file:
            assert zip_file.testzip() is None
            assert len(zip_file.namelist()) == len(example_file_contents) * len(processeds)