test_api.py 45.7 KB
Newer Older
Markus Scheidgen's avatar
Markus Scheidgen committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
# Copyright 2018 Markus Scheidgen
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an"AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

15
from typing import Any
16
17
18
import pytest
import time
import json
19
20
import zipfile
import io
21
import inspect
Markus Scheidgen's avatar
Markus Scheidgen committed
22
import datetime
23
import os.path
24
from urllib.parse import urlencode
25

Markus Scheidgen's avatar
Markus Scheidgen committed
26
from nomad.api.app import rfc3339DateTime
27
from nomad.api.auth import generate_upload_token, verify_upload_token
28
from nomad import search, parsing, files, config, utils
29
30
from nomad.files import UploadFiles, PublicUploadFiles
from nomad.processing import Upload, Calc, SUCCESS
31
from nomad.datamodel import UploadWithMetadata, CalcWithMetadata, User
32

33
from tests.conftest import create_auth_headers, clear_elastic
34
from tests.test_files import example_file, example_file_mainfile, example_file_contents
35
from tests.test_files import create_staging_upload, create_public_upload, assert_upload_files
36
from tests.test_search import assert_search_upload
37
38


39
40
logger = utils.get_logger(__name__)

Markus Scheidgen's avatar
Markus Scheidgen committed
41

42
43
44
45
46
def test_alive(client):
    rv = client.get('/alive')
    assert rv.status_code == 200


47
48
49
50
51
52
53
@pytest.fixture(scope='function')
def test_user_signature_token(client, test_user_auth):
    rv = client.get('/auth/token', headers=test_user_auth)
    assert rv.status_code == 200
    return json.loads(rv.data)['token']


54
55
56
57
58
59
60
61
def get_upload_with_metadata(upload: dict) -> UploadWithMetadata:
    """ Create a :class:`UploadWithMetadata` from a API upload json record. """
    return UploadWithMetadata(
        upload_id=upload['upload_id'], calcs=[
            CalcWithMetadata(calc_id=calc['calc_id'], mainfile=calc['mainfile'])
            for calc in upload['calcs']['results']])


62
63
64
class TestInfo:
    def test_info(self, client):
        rv = client.get('/info/')
65
66
67
68
        data = json.loads(rv.data)
        assert 'codes' in data
        assert 'parsers' in data
        assert len(data['parsers']) >= len(data['codes'])
69
70
        assert rv.status_code == 200

71

72
class TestAuth:
73
74
75
76
    def test_auth_wo_credentials(self, client, keycloak, no_warn):
        rv = client.get('/auth/')
        assert rv.status_code == 401

77
    def test_auth_with_token(self, client, test_user_auth, keycloak):
78
79
        rv = client.get('/auth/', headers=test_user_auth)
        assert rv.status_code == 200
80
        self.assert_auth(client, json.loads(rv.data))
81

82
83
84
85
    # def test_auth_with_password(self, client, test_user_auth, keycloak):
    #     rv = client.get('/auth/', headers=test_user_auth)
    #     assert rv.status_code == 200
    #     self.assert_auth(client, json.loads(rv.data))
86

87
88
89
    def test_upload_token(self, test_user):
        token = generate_upload_token(test_user)
        assert verify_upload_token(token) == test_user.user_id
90

91
92
    def assert_auth(self, client, user):
        for key in ['first_name', 'last_name', 'email', 'name', 'user_id']:
93
94
            assert key in user

95
96
97
        # rv = client.get('/uploads/', headers={
        #     'X-Token': user['token']
        # })
98

99
        # assert rv.status_code == 200
100

101
102
103
    def test_signature_token(self, test_user_signature_token, no_warn):
        assert test_user_signature_token is not None

104
105
106
107
108

class TestUploads:

    def assert_uploads(self, upload_json_str, count=0, **kwargs):
        data = json.loads(upload_json_str)
109
110
111
112
        assert 'pagination' in data
        assert 'page' in data['pagination']

        data = data['results']
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
        assert isinstance(data, list)
        assert len(data) == count

        if count > 0:
            self.assert_upload(json.dumps(data[0]), **kwargs)

    def assert_upload(self, upload_json_str, id=None, **kwargs):
        data = json.loads(upload_json_str)
        assert 'upload_id' in data
        if id is not None:
            assert id == data['upload_id']
        assert 'create_time' in data

        for key, value in kwargs.items():
            assert data.get(key, None) == value

        return data

    def assert_processing(self, client, test_user_auth, upload_id):
        upload_endpoint = '/uploads/%s' % upload_id

        # poll until completed
135
        upload = self.block_until_completed(client, upload_id, test_user_auth)
136
137

        assert len(upload['tasks']) == 4
138
        assert upload['tasks_status'] == SUCCESS
139
        assert upload['current_task'] == 'cleanup'
140
        assert not upload['process_running']
141

142
143
        calcs = upload['calcs']['results']
        for calc in calcs:
144
            assert calc['tasks_status'] == SUCCESS
145
146
            assert calc['current_task'] == 'archiving'
            assert len(calc['tasks']) == 3
147
            assert client.get('/archive/logs/%s/%s' % (calc['upload_id'], calc['calc_id']), headers=test_user_auth).status_code == 200
148
149

        if upload['calcs']['pagination']['total'] > 1:
150
            rv = client.get('%s?page=2&per_page=1&order_by=tasks_status' % upload_endpoint, headers=test_user_auth)
151
152
153
154
            assert rv.status_code == 200
            upload = self.assert_upload(rv.data)
            assert len(upload['calcs']['results']) == 1

155
156
        upload_with_metadata = get_upload_with_metadata(upload)
        assert_upload_files(upload_with_metadata, files.StagingUploadFiles)
157
        assert_search_upload(upload_with_metadata, additional_keys=['atoms', 'system'])
158

159
    def assert_published(self, client, test_user_auth, upload_id, proc_infra, metadata={}):
160
161
        rv = client.get('/uploads/%s' % upload_id, headers=test_user_auth)
        upload = self.assert_upload(rv.data)
162
163

        upload_with_metadata = get_upload_with_metadata(upload)
164

165
166
167
        rv = client.post(
            '/uploads/%s' % upload_id,
            headers=test_user_auth,
168
            data=json.dumps(dict(operation='publish', metadata=metadata)),
169
            content_type='application/json')
170
        assert rv.status_code == 200
171
        upload = self.assert_upload(rv.data)
172
        assert upload['current_process'] == 'publish_upload'
173
        assert upload['process_running']
174

175
        additional_keys = ['with_embargo']
176

177
        self.block_until_completed(client, upload_id, test_user_auth)
178

179
180
181
        upload_proc = Upload.objects(upload_id=upload_id).first()
        assert upload_proc is not None
        assert upload_proc.published is True
182
        upload_with_metadata = upload_proc.to_upload_with_metadata()
183

184
        assert_upload_files(upload_with_metadata, files.PublicUploadFiles, published=True)
185
186
187
        assert_search_upload(upload_with_metadata, additional_keys=additional_keys, published=True)

    def block_until_completed(self, client, upload_id: str, test_user_auth):
188
189
190
191
192
        while True:
            time.sleep(0.1)
            rv = client.get('/uploads/%s' % upload_id, headers=test_user_auth)
            if rv.status_code == 200:
                upload = self.assert_upload(rv.data)
193
194
                if not upload['process_running'] and not upload['tasks_running']:
                    return upload
195
            elif rv.status_code == 404:
196
                return None
197
198
199
200
            else:
                raise Exception(
                    'unexpected status code while blocking for upload processing: %s' %
                    str(rv.status_code))
201
202
203

    def assert_upload_does_not_exist(self, client, upload_id: str, test_user_auth):
        self.block_until_completed(client, upload_id, test_user_auth)
204

205
206
207
208
209
210
        rv = client.get('/uploads/%s' % upload_id, headers=test_user_auth)
        assert rv.status_code == 404
        assert Upload.objects(upload_id=upload_id).first() is None
        assert Calc.objects(upload_id=upload_id).count() is 0
        upload_files = UploadFiles.get(upload_id)
        assert upload_files is None or isinstance(upload_files, PublicUploadFiles)
Markus Scheidgen's avatar
Markus Scheidgen committed
211

212
213
214
215
216
217
218
    def test_get_command(self, client, test_user_auth, no_warn):
        rv = client.get('/uploads/command', headers=test_user_auth)
        assert rv.status_code == 200
        data = json.loads(rv.data)
        assert 'upload_command' in data
        assert 'upload_url' in data

219
220
    def test_get_empty(self, client, test_user_auth, no_warn):
        rv = client.get('/uploads/', headers=test_user_auth)
Markus Scheidgen's avatar
Markus Scheidgen committed
221

222
223
        assert rv.status_code == 200
        self.assert_uploads(rv.data, count=0)
Markus Scheidgen's avatar
Markus Scheidgen committed
224

225
226
227
    def test_get_not_existing(self, client, test_user_auth, no_warn):
        rv = client.get('/uploads/123456789012123456789012', headers=test_user_auth)
        assert rv.status_code == 404
228

229
230
231
232
233
234
235
    def test_put_upload_token(self, client, non_empty_example_upload, test_user, no_warn):
        url = '/uploads/?token=%s&local_path=%s&name=test_upload' % (
            generate_upload_token(test_user), non_empty_example_upload)
        rv = client.put(url)
        assert rv.status_code == 200
        self.assert_upload(rv.data, name='test_upload')

236
237
    @pytest.mark.parametrize('mode', ['multipart', 'stream', 'local_path'])
    @pytest.mark.parametrize('name', [None, 'test_name'])
Markus Scheidgen's avatar
Markus Scheidgen committed
238
    def test_put(self, client, test_user_auth, proc_infra, example_upload, mode, name, no_warn):
239
        file = example_upload
240
241
242
243
244
245
246
        if name:
            url = '/uploads/?name=%s' % name
        else:
            url = '/uploads/'

        if mode == 'multipart':
            rv = client.put(
247
248
249
                url, data=dict(file=(open(file, 'rb'), 'the_name')), headers=test_user_auth)
            if not name:
                name = 'the_name'
250
251
252
253
254
255
256
257
258
        elif mode == 'stream':
            with open(file, 'rb') as f:
                rv = client.put(url, data=f.read(), headers=test_user_auth)
        elif mode == 'local_path':
            url += '&' if name else '?'
            url += 'local_path=%s' % file
            rv = client.put(url, headers=test_user_auth)
        else:
            assert False
259

260
261
        assert rv.status_code == 200
        if mode == 'local_path':
262
            upload = self.assert_upload(rv.data, upload_path=file, name=name)
263
264
        else:
            upload = self.assert_upload(rv.data, name=name)
265
        assert upload['tasks_running']
266

267
        self.assert_processing(client, test_user_auth, upload['upload_id'])
268

269
270
271
272
273
274
275
276
    def test_upload_limit(self, client, mongo, test_user, test_user_auth, proc_infra):
        for _ in range(0, config.services.upload_limit):
            Upload.create(user=test_user)
        file = example_file
        rv = client.put('/uploads/?local_path=%s' % file, headers=test_user_auth)
        assert rv.status_code == 400
        assert Upload.user_uploads(test_user).count() == config.services.upload_limit

277
278
279
    def test_delete_not_existing(self, client, test_user_auth, no_warn):
        rv = client.delete('/uploads/123456789012123456789012', headers=test_user_auth)
        assert rv.status_code == 404
280

281
282
283
284
285
286
287
288
289
290
291
292
    @pytest.fixture(scope='function')
    def slow_processing(self, monkeypatch):
        old_cleanup = Upload.cleanup

        def slow_cleanup(self):
            time.sleep(0.5)
            old_cleanup(self)

        monkeypatch.setattr('nomad.processing.data.Upload.cleanup', slow_cleanup)
        yield True
        monkeypatch.setattr('nomad.processing.data.Upload.cleanup', old_cleanup)

293
    def test_delete_published(self, client, test_user_auth, proc_infra, no_warn):
294
295
296
        rv = client.put('/uploads/?local_path=%s' % example_file, headers=test_user_auth)
        upload = self.assert_upload(rv.data)
        self.assert_processing(client, test_user_auth, upload['upload_id'])
297
        self.assert_published(client, test_user_auth, upload['upload_id'], proc_infra)
298
        rv = client.delete('/uploads/%s' % upload['upload_id'], headers=test_user_auth)
299
        assert rv.status_code == 400
300

Markus Scheidgen's avatar
Markus Scheidgen committed
301
    def test_delete(self, client, test_user_auth, proc_infra, no_warn):
302
303
304
305
306
        rv = client.put('/uploads/?local_path=%s' % example_file, headers=test_user_auth)
        upload = self.assert_upload(rv.data)
        self.assert_processing(client, test_user_auth, upload['upload_id'])
        rv = client.delete('/uploads/%s' % upload['upload_id'], headers=test_user_auth)
        assert rv.status_code == 200
307
        self.assert_upload_does_not_exist(client, upload['upload_id'], test_user_auth)
308

309
310
311
312
313
314
315
316
317
318
319
    def test_post_empty(self, client, test_user_auth, empty_upload, proc_infra, no_warn):
        rv = client.put('/uploads/?local_path=%s' % empty_upload, headers=test_user_auth)
        assert rv.status_code == 200
        upload = self.assert_upload(rv.data)
        self.assert_processing(client, test_user_auth, upload['upload_id'])
        rv = client.post(
            '/uploads/%s' % upload['upload_id'], headers=test_user_auth,
            data=json.dumps(dict(operation='publish')),
            content_type='application/json')
        assert rv.status_code == 400

320
    def test_post(self, client, test_user_auth, non_empty_example_upload, proc_infra, no_warn):
321
322
        rv = client.put('/uploads/?local_path=%s' % non_empty_example_upload, headers=test_user_auth)
        assert rv.status_code == 200
323
324
        upload = self.assert_upload(rv.data)
        self.assert_processing(client, test_user_auth, upload['upload_id'])
325
        self.assert_published(client, test_user_auth, upload['upload_id'], proc_infra)
326

327
328
329
        # still visible
        assert client.get('/uploads/%s' % upload['upload_id'], headers=test_user_auth).status_code == 200
        # still listed with all=True
330
        rv = client.get('/uploads/?state=all', headers=test_user_auth)
331
        assert rv.status_code == 200
332
        data = json.loads(rv.data)['results']
333
334
335
336
337
        assert len(data) > 0
        assert any(item['upload_id'] == upload['upload_id'] for item in data)
        # not listed with all=False
        rv = client.get('/uploads/', headers=test_user_auth)
        assert rv.status_code == 200
338
        data = json.loads(rv.data)['results']
339
340
        assert not any(item['upload_id'] == upload['upload_id'] for item in data)

341
342
    def test_post_metadata(
            self, client, proc_infra, admin_user_auth, test_user_auth, test_user,
343
            other_test_user, no_warn, example_user_metadata):
344
345
346
        rv = client.put('/uploads/?local_path=%s' % example_file, headers=test_user_auth)
        upload = self.assert_upload(rv.data)
        self.assert_processing(client, test_user_auth, upload['upload_id'])
347
        metadata = dict(**example_user_metadata)
348
        metadata['_upload_time'] = datetime.datetime.utcnow().isoformat()
349
        self.assert_published(client, admin_user_auth, upload['upload_id'], proc_infra, metadata)
350

Markus Scheidgen's avatar
Markus Scheidgen committed
351
    def test_post_metadata_forbidden(self, client, proc_infra, test_user_auth, no_warn):
352
353
354
355
356
357
        rv = client.put('/uploads/?local_path=%s' % example_file, headers=test_user_auth)
        upload = self.assert_upload(rv.data)
        self.assert_processing(client, test_user_auth, upload['upload_id'])
        rv = client.post(
            '/uploads/%s' % upload['upload_id'],
            headers=test_user_auth,
358
            data=json.dumps(dict(operation='publish', metadata=dict(_pid=256))),
359
360
361
            content_type='application/json')
        assert rv.status_code == 401

362
363
364
365
366
367
368
    def test_post_metadata_and_republish(
            self, client, proc_infra, admin_user_auth, test_user_auth, test_user,
            other_test_user, no_warn, example_user_metadata):
        rv = client.put('/uploads/?local_path=%s' % example_file, headers=test_user_auth)
        upload = self.assert_upload(rv.data)
        self.assert_processing(client, test_user_auth, upload['upload_id'])
        metadata = dict(**example_user_metadata)
369
        metadata['_upload_time'] = datetime.datetime.utcnow().isoformat()
370
        self.assert_published(client, admin_user_auth, upload['upload_id'], proc_infra, metadata)
371
        self.assert_published(client, admin_user_auth, upload['upload_id'], proc_infra, {})
372

373
374
375
376
377
378
379
380
381
382
383
384
385
386
    def test_post_re_process(self, client, published, test_user_auth, monkeypatch):
        monkeypatch.setattr('nomad.config.version', 're_process_test_version')
        monkeypatch.setattr('nomad.config.commit', 're_process_test_commit')

        upload_id = published.upload_id
        rv = client.post(
            '/uploads/%s' % upload_id,
            headers=test_user_auth,
            data=json.dumps(dict(operation='re-process')),
            content_type='application/json')

        assert rv.status_code == 200
        assert self.block_until_completed(client, upload_id, test_user_auth) is not None

387
    # TODO validate metadata (or all input models in API for that matter)
388
    # def test_post_bad_metadata(self, client, proc_infra, test_user_auth):
389
390
391
392
393
394
    #     rv = client.put('/uploads/?local_path=%s' % example_file, headers=test_user_auth)
    #     upload = self.assert_upload(rv.data)
    #     self.assert_processing(client, test_user_auth, upload['upload_id'])
    #     rv = client.post(
    #         '/uploads/%s' % upload['upload_id'],
    #         headers=test_user_auth,
395
    #         data=json.dumps(dict(operation='publish', metadata=dict(doesnotexist='hi'))),
396
397
398
    #         content_type='application/json')
    #     assert rv.status_code == 400

399
    def test_potcar(self, client, proc_infra, test_user_auth):
400
        # only the owner, shared with people are supposed to download the original potcar file
401
402
403
404
405
406
        example_file = 'tests/data/proc/examples_potcar.zip'
        rv = client.put('/uploads/?local_path=%s' % example_file, headers=test_user_auth)

        upload = self.assert_upload(rv.data)
        upload_id = upload['upload_id']
        self.assert_processing(client, test_user_auth, upload_id)
407
        self.assert_published(client, test_user_auth, upload_id, proc_infra)
408
409
410
411
412
413
414
        rv = client.get('/raw/%s/examples_potcar/POTCAR' % upload_id)
        assert rv.status_code == 401
        rv = client.get('/raw/%s/examples_potcar/POTCAR' % upload_id, headers=test_user_auth)
        assert rv.status_code == 200
        rv = client.get('/raw/%s/examples_potcar/POTCAR.stripped' % upload_id)
        assert rv.status_code == 200

415

Markus Scheidgen's avatar
Markus Scheidgen committed
416
417
418
today = datetime.datetime.utcnow().date()


419
420
421
422
423
424
425
426
427
428
429
430
431
class UploadFilesBasedTests:

    @staticmethod
    def fix_signature(func, wrapper):
        additional_args = list(inspect.signature(func).parameters.values())[4:]
        wrapper_sig = inspect.signature(wrapper)
        wrapper_args = list(wrapper_sig.parameters.values())[:3] + additional_args
        wrapper_sig = wrapper_sig.replace(parameters=tuple(wrapper_args))
        wrapper.__signature__ = wrapper_sig

    @staticmethod
    def check_authorizaton(func):
        @pytest.mark.parametrize('test_data', [
432
433
434
435
436
437
438
439
440
441
442
            [True, None, True],      # in staging for upload
            [True, None, False],     # in staging for different user
            [True, None, None],      # in staging for guest
            [True, None, 'admin'],   # in staging, for admin
            [False, True, True],     # in public, restricted for uploader
            [False, True, False],    # in public, restricted for different user
            [False, True, None],     # in public, restricted for guest
            [False, True, 'admin'],  # in public, restricted for admin
            [False, False, True],    # in public, public, for uploader
            [False, False, False],   # in public, public, for different user
            [False, False, None]     # in public, public, for guest
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
        ], indirect=True)
        def wrapper(self, client, test_data, *args, **kwargs):
            upload, authorized, auth_headers = test_data
            try:
                func(self, client, upload, auth_headers, *args, **kwargs)
            except AssertionError as assertion:
                assertion_str = str(assertion)
                if not authorized:
                    if '0 == 5' in assertion_str and 'ZipFile' in assertion_str:
                        # the user is not authorized an gets an empty zip as expected
                        return
                    if '401' in assertion_str:
                        # the user is not authorized and gets a 401 as expected
                        return
                raise assertion

            if not authorized:
                assert False
        UploadFilesBasedTests.fix_signature(func, wrapper)
        return wrapper

    @staticmethod
    def ignore_authorization(func):
        @pytest.mark.parametrize('test_data', [
            [True, None, True],      # in staging
            [False, False, None],    # in public
        ], indirect=True)
        def wrapper(self, client, test_data, *args, **kwargs):
            upload, _, auth_headers = test_data
            func(self, client, upload, auth_headers, *args, **kwargs)
        UploadFilesBasedTests.fix_signature(func, wrapper)
        return wrapper
475

476
    @pytest.fixture(scope='function')
477
    def test_data(self, request, mongo, raw_files, no_warn, test_user, other_test_user, admin_user):
478
479
480
481
482
        # delete potential old test files
        for _ in [0, 1]:
            upload_files = UploadFiles.get('test_upload')
            if upload_files:
                upload_files.delete()
483

484
        in_staging, restricted, for_uploader = request.param
485

486
        if in_staging:
487
            authorized = for_uploader is True or for_uploader == 'admin'
488
        else:
489
            authorized = not restricted or for_uploader is True or for_uploader == 'admin'
490

491
        if for_uploader is True:
492
493
494
            auth_headers = create_auth_headers(test_user)
        elif for_uploader is False:
            auth_headers = create_auth_headers(other_test_user)
495
496
        elif for_uploader == 'admin':
            auth_headers = create_auth_headers(admin_user)
497
498
        else:
            auth_headers = None
499

500
        calc_specs = 'r' if restricted else 'p'
501
        Upload.create(user=test_user, upload_id='test_upload')
502
        if in_staging:
503
            _, upload_files = create_staging_upload('test_upload', calc_specs=calc_specs)
504
        else:
505
            _, upload_files = create_public_upload('test_upload', calc_specs=calc_specs)
506

507
        yield 'test_upload', authorized, auth_headers
508

509
        upload_files.delete()
510
511


512
513
514
515
class TestArchive(UploadFilesBasedTests):
    @UploadFilesBasedTests.check_authorizaton
    def test_get(self, client, upload, auth_headers):
        rv = client.get('/archive/%s/0' % upload, headers=auth_headers)
516
        assert rv.status_code == 200
517
        assert json.loads(rv.data) is not None
518

519
520
    @UploadFilesBasedTests.ignore_authorization
    def test_get_signed(self, client, upload, _, test_user_signature_token):
521
        rv = client.get('/archive/%s/0?signature_token=%s' % (upload, test_user_signature_token))
522
523
524
        assert rv.status_code == 200
        assert json.loads(rv.data) is not None

525
526
527
    @UploadFilesBasedTests.check_authorizaton
    def test_get_calc_proc_log(self, client, upload, auth_headers):
        rv = client.get('/archive/logs/%s/0' % upload, headers=auth_headers)
528
        assert rv.status_code == 200
529
        assert len(rv.data) > 0
530

531
532
    @UploadFilesBasedTests.ignore_authorization
    def test_get_calc_proc_log_signed(self, client, upload, _, test_user_signature_token):
533
        rv = client.get('/archive/logs/%s/0?signature_token=%s' % (upload, test_user_signature_token))
534
535
536
        assert rv.status_code == 200
        assert len(rv.data) > 0

537
538
539
    @UploadFilesBasedTests.ignore_authorization
    def test_get_non_existing_archive(self, client, upload, auth_headers):
        rv = client.get('/archive/%s' % 'doesnt/exist', headers=auth_headers)
540
        assert rv.status_code == 404
Markus Scheidgen's avatar
Markus Scheidgen committed
541

542
543
544
545
546
547
548
    @pytest.mark.parametrize('info', [
        'all.nomadmetainfo.json',
        'all.experimental.nomadmetainfo.json',
        'vasp.nomadmetainfo.json',
        'mpes.nomadmetainfo.json'])
    def test_get_metainfo(self, client, info):
        rv = client.get('/archive/metainfo/%s' % info)
549
        assert rv.status_code == 200
550
551
        metainfo = json.loads((rv.data))
        assert len(metainfo) > 0
552

Markus Scheidgen's avatar
Markus Scheidgen committed
553

554
class TestRepo():
555
556
557
    @pytest.fixture(scope='class')
    def example_elastic_calcs(
            self, elastic_infra, normalized: parsing.LocalBackend,
558
            test_user: User, other_test_user: User):
559
560
        clear_elastic(elastic_infra)

Markus Scheidgen's avatar
Markus Scheidgen committed
561
        calc_with_metadata = CalcWithMetadata(upload_id=0, calc_id=0, upload_time=today)
562
        calc_with_metadata.files = ['test/mainfile.txt']
563
        calc_with_metadata.apply_domain_metadata(normalized)
564

Markus Scheidgen's avatar
Markus Scheidgen committed
565
        calc_with_metadata.update(
566
            calc_id='1', uploader=test_user.user_id, published=True, with_embargo=False)
567
568
        search.Entry.from_calc_with_metadata(calc_with_metadata).save(refresh=True)

Markus Scheidgen's avatar
Markus Scheidgen committed
569
        calc_with_metadata.update(
570
            calc_id='2', uploader=other_test_user.user_id, published=True, with_embargo=False,
Markus Scheidgen's avatar
Markus Scheidgen committed
571
            upload_time=today - datetime.timedelta(days=5))
Markus Scheidgen's avatar
Markus Scheidgen committed
572
573
        calc_with_metadata.update(
            atoms=['Fe'], comment='this is a specific word', formula='AAA', basis_set='zzz')
574
575
        search.Entry.from_calc_with_metadata(calc_with_metadata).save(refresh=True)

Markus Scheidgen's avatar
Markus Scheidgen committed
576
        calc_with_metadata.update(
577
            calc_id='3', uploader=other_test_user.user_id, published=False, with_embargo=False)
578
579
        search.Entry.from_calc_with_metadata(calc_with_metadata).save(refresh=True)

Markus Scheidgen's avatar
Markus Scheidgen committed
580
        calc_with_metadata.update(
581
            calc_id='4', uploader=other_test_user.user_id, published=True, with_embargo=True)
582
583
        search.Entry.from_calc_with_metadata(calc_with_metadata).save(refresh=True)

584
    def assert_search(self, rv: Any, number_of_calcs: int) -> dict:
585
586
        if rv.status_code != 200:
            print(rv.data)
587
        assert rv.status_code == 200
588

589
590
591
592
593
594
595
596
597
        data = json.loads(rv.data)

        results = data.get('results', None)
        assert results is not None
        assert isinstance(results, list)
        assert len(results) == number_of_calcs

        return data

598
599
    def test_own_calc(self, client, example_elastic_calcs, no_warn, test_user_auth):
        rv = client.get('/repo/0/1', headers=test_user_auth)
600
601
        assert rv.status_code == 200

602
603
604
605
606
607
608
609
    def test_public_calc(self, client, example_elastic_calcs, no_warn, other_test_user_auth):
        rv = client.get('/repo/0/1', headers=other_test_user_auth)
        assert rv.status_code == 200

    def test_embargo_calc(self, client, example_elastic_calcs, no_warn, test_user_auth):
        rv = client.get('/repo/0/4', headers=test_user_auth)
        assert rv.status_code == 401

610
611
612
613
    def test_own_embargo_calc(self, client, example_elastic_calcs, no_warn, other_test_user_auth):
        rv = client.get('/repo/0/4', headers=other_test_user_auth)
        assert rv.status_code == 200

614
615
616
617
    def test_staging_calc(self, client, example_elastic_calcs, no_warn, test_user_auth):
        rv = client.get('/repo/0/3', headers=test_user_auth)
        assert rv.status_code == 401

618
619
620
621
    def test_own_staging_calc(self, client, example_elastic_calcs, no_warn, other_test_user_auth):
        rv = client.get('/repo/0/3', headers=other_test_user_auth)
        assert rv.status_code == 200

622
623
    def test_non_existing_calcs(self, client, example_elastic_calcs, test_user_auth):
        rv = client.get('/repo/0/10', headers=test_user_auth)
624
625
        assert rv.status_code == 404

626
627
628
    @pytest.mark.parametrize('calcs, owner, auth', [
        (2, 'all', 'none'),
        (2, 'all', 'test_user'),
629
        (4, 'all', 'other_test_user'),
630
        (1, 'user', 'test_user'),
631
        (3, 'user', 'other_test_user'),
632
        (0, 'staging', 'test_user'),
633
        (1, 'staging', 'other_test_user')
634
    ])
635
    def test_search_owner(self, client, example_elastic_calcs, no_warn, test_user_auth, other_test_user_auth, calcs, owner, auth):
636
637
        auth = dict(none=None, test_user=test_user_auth, other_test_user=other_test_user_auth).get(auth)
        rv = client.get('/repo/?owner=%s' % owner, headers=auth)
638
        data = self.assert_search(rv, calcs)
639
640
641
642
643
        results = data.get('results', None)
        if calcs > 0:
            for key in ['uploader', 'calc_id', 'formula', 'upload_id']:
                assert key in results[0]

Markus Scheidgen's avatar
Markus Scheidgen committed
644
    @pytest.mark.parametrize('calcs, start, end', [
Markus Scheidgen's avatar
Markus Scheidgen committed
645
646
647
648
649
650
        (2, today - datetime.timedelta(days=6), today),
        (2, today - datetime.timedelta(days=5), today),
        (1, today - datetime.timedelta(days=4), today),
        (1, today, today),
        (1, today - datetime.timedelta(days=6), today - datetime.timedelta(days=5)),
        (0, today - datetime.timedelta(days=7), today - datetime.timedelta(days=6)),
Markus Scheidgen's avatar
Markus Scheidgen committed
651
        (2, None, None),
Markus Scheidgen's avatar
Markus Scheidgen committed
652
653
        (1, today, None),
        (2, None, today)
Markus Scheidgen's avatar
Markus Scheidgen committed
654
655
656
657
658
659
660
661
662
663
664
665
666
    ])
    def test_search_time(self, client, example_elastic_calcs, no_warn, calcs, start, end):
        query_string = ''
        if start is not None:
            query_string = 'from_time=%s' % rfc3339DateTime.format(start)
        if end is not None:
            if query_string != '':
                query_string += '&'
            query_string += 'until_time=%s' % rfc3339DateTime.format(end)
        if query_string != '':
            query_string = '?%s' % query_string

        rv = client.get('/repo/%s' % query_string)
667
        self.assert_search(rv, calcs)
Markus Scheidgen's avatar
Markus Scheidgen committed
668

669
    @pytest.mark.parametrize('calcs, quantity, value', [
670
671
        (2, 'system', 'bulk'),
        (0, 'system', 'atom'),
672
673
        (1, 'atoms', 'Br'),
        (1, 'atoms', 'Fe'),
674
        (0, 'atoms', ['Fe', 'Br', 'A', 'B']),
675
676
        (0, 'only_atoms', ['Br', 'Si']),
        (1, 'only_atoms', ['Fe']),
677
678
        (1, 'only_atoms', ['Br', 'K', 'Si']),
        (1, 'only_atoms', ['Br', 'Si', 'K']),
679
680
681
682
683
684
685
686
687
        (1, 'comment', 'specific'),
        (1, 'authors', 'Hofstadter, Leonard'),
        (2, 'files', 'test/mainfile.txt'),
        (2, 'paths', 'mainfile.txt'),
        (2, 'paths', 'test'),
        (2, 'quantities', ['wyckoff_letters_primitive', 'hall_number']),
        (0, 'quantities', 'dos')
    ])
    def test_search_quantities(self, client, example_elastic_calcs, no_warn, test_user_auth, calcs, quantity, value):
688
        query_string = urlencode({quantity: value}, doseq=True)
689

690
        rv = client.get('/repo/?%s' % query_string, headers=test_user_auth)
691
        logger.debug('run search quantities test', query_string=query_string)
692
        data = self.assert_search(rv, calcs)
693

694
695
        quantities = data.get('quantities', None)
        assert quantities is not None
696
        if quantity == 'system' and calcs != 0:
697
698
699
700
            # for simplicity we only assert on quantities for this case
            assert 'system' in quantities
            assert len(quantities['system']) == 1
            assert value in quantities['system']
701

702
703
    metrics_permutations = [[], search.metrics_names] + [[metric] for metric in search.metrics_names]

704
705
706
707
708
709
710
711
712
713
714
    def test_search_admin(self, client, example_elastic_calcs, no_warn, admin_user_auth):
        rv = client.get('/repo/?owner=admin', headers=admin_user_auth)
        self.assert_search(rv, 4)

    def test_search_admin_auth(self, client, example_elastic_calcs, no_warn, test_user_auth):
        rv = client.get('/repo/?owner=admin', headers=test_user_auth)
        assert rv.status_code == 401

        rv = client.get('/repo/?owner=admin')
        assert rv.status_code == 401

715
    @pytest.mark.parametrize('metrics', metrics_permutations)
Markus Scheidgen's avatar
Markus Scheidgen committed
716
    def test_search_total_metrics(self, client, example_elastic_calcs, no_warn, metrics):
717
718
        rv = client.get('/repo/?%s' % urlencode(dict(metrics=metrics), doseq=True))
        assert rv.status_code == 200, str(rv.data)
Markus Scheidgen's avatar
Markus Scheidgen committed
719
        data = json.loads(rv.data)
720
721
722
        total_metrics = data.get('quantities', {}).get('total', {}).get('all', None)
        assert total_metrics is not None
        assert 'code_runs' in total_metrics
Markus Scheidgen's avatar
Markus Scheidgen committed
723
        for metric in metrics:
724
            assert metric in total_metrics
Markus Scheidgen's avatar
Markus Scheidgen committed
725

726
    @pytest.mark.parametrize('metrics', metrics_permutations)
Markus Scheidgen's avatar
Markus Scheidgen committed
727
    def test_search_aggregation_metrics(self, client, example_elastic_calcs, no_warn, metrics):
728
        rv = client.get('/repo/?%s' % urlencode(dict(metrics=metrics), doseq=True))
Markus Scheidgen's avatar
Markus Scheidgen committed
729
730
        assert rv.status_code == 200
        data = json.loads(rv.data)
731
732
        for name, quantity in data.get('quantities').items():
            for metrics_result in quantity.values():
Markus Scheidgen's avatar
Markus Scheidgen committed
733
                assert 'code_runs' in metrics_result
734
735
736
737
738
                if name != 'authors':
                    for metric in metrics:
                        assert metric in metrics_result
                else:
                    assert len(metrics_result) == 1  # code_runs is the only metric for authors
Markus Scheidgen's avatar
Markus Scheidgen committed
739

740
741
742
743
744
745
746
747
    def test_search_date_histogram(self, client, example_elastic_calcs, no_warn):
        rv = client.get('/repo/?date_histogram=true&metrics=total_energies')
        assert rv.status_code == 200
        data = json.loads(rv.data)
        histogram = data.get('quantities').get('date_histogram')
        print(histogram)
        assert len(histogram) > 0

748
749
750
    @pytest.mark.parametrize('n_results, page, per_page', [(2, 1, 5), (1, 1, 1), (0, 2, 3)])
    def test_search_pagination(self, client, example_elastic_calcs, no_warn, n_results, page, per_page):
        rv = client.get('/repo/?page=%d&per_page=%d' % (page, per_page))
751
752
753
        assert rv.status_code == 200
        data = json.loads(rv.data)
        results = data.get('results', None)
754
        assert data['pagination']['total'] == 2
755
        assert results is not None
756
        assert len(results) == n_results
757

758
759
    @pytest.mark.parametrize('first, order_by, order', [
        ('1', 'formula', -1), ('2', 'formula', 1),
760
761
        ('2', 'basis_set', -1), ('1', 'basis_set', 1),
        (None, 'authors', -1)])
762
763
764
765
766
767
768
    def test_search_order(self, client, example_elastic_calcs, no_warn, first, order_by, order):
        rv = client.get('/repo/?order_by=%s&order=%d' % (order_by, order))
        assert rv.status_code == 200
        data = json.loads(rv.data)
        results = data.get('results', None)
        assert data['pagination']['total'] == 2
        assert len(results) == 2
769
770
        if first is not None:
            assert results[0]['calc_id'] == first
771

772
773
774
775
776
777
778
779
780
781
    @pytest.mark.parametrize('n_results, size', [(2, None), (2, 5), (1, 1)])
    def test_search_scroll(self, client, example_elastic_calcs, no_warn, n_results, size):
        if size is not None:
            rv = client.get('/repo/?scroll=1,&per_page=%d' % size)
        else:
            rv = client.get('/repo/?scroll=1')

        assert rv.status_code == 200
        data = json.loads(rv.data)
        results = data.get('results', None)
782
        assert data.get('scroll', {}).get('size', -1) > 0
783
784
        assert results is not None
        assert len(results) == n_results
785
        scroll_id = data.get('scroll', {}).get('scroll_id', None)
786
787
788
789
790
791
        assert scroll_id is not None

        has_another_page = False
        while scroll_id is not None:
            rv = client.get('/repo/?scroll=1&scroll_id=%s' % scroll_id)
            data = json.loads(rv.data)
792
            scroll_id = data.get('scroll', {}).get('scroll_id', None)
793
794
795
796
797
            has_another_page |= len(data.get('results')) > 0

        if n_results < 2:
            assert has_another_page

798
799
800
    def test_search_user_authrequired(self, client, example_elastic_calcs, no_warn):
        rv = client.get('/repo/?owner=user')
        assert rv.status_code == 401
801

802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
    @pytest.mark.parametrize('calcs, quantity, value', [
        (2, 'system', 'bulk'),
        (0, 'system', 'atom'),
        (1, 'atoms', 'Br'),
        (1, 'atoms', 'Fe'),
        (1, 'authors', 'Hofstadter, Leonard'),
        (2, 'files', 'test/mainfile.txt'),
        (0, 'quantities', 'dos')
    ])
    def test_quantity_search(self, client, example_elastic_calcs, no_warn, test_user_auth, calcs, quantity, value):
        rv = client.get('/repo/%s' % quantity, headers=test_user_auth)
        assert rv.status_code == 200
        data = json.loads(rv.data)

        quantities = data['quantities']
        assert quantity in quantities
        values = quantities[quantity]['values']
        assert (value in values) == (calcs > 0)
        assert values.get(value, 0) == calcs

    def test_quantity_search_after(self, client, example_elastic_calcs, no_warn, test_user_auth):
        rv = client.get('/repo/atoms?size=1')
        assert rv.status_code == 200
        data = json.loads(rv.data)

        quantity = data['quantities']['atoms']
        assert 'after' in quantity
        after = quantity['after']
        assert len(quantity['values']) == 1
        value = list(quantity['values'].keys())[0]

        while True:
            rv = client.get('/repo/atoms?size=1&after=%s' % after)
            assert rv.status_code == 200
            data = json.loads(rv.data)

            quantity = data['quantities']['atoms']

            if 'after' not in quantity:
                assert len(quantity['values']) == 0
                break

            assert len(quantity['values']) == 1
            assert value != list(quantity['values'].keys())[0]
            assert after != quantity['after']
            after = quantity['after']

849

850
class TestRaw(UploadFilesBasedTests):
Markus Scheidgen's avatar
Markus Scheidgen committed
851

852
853
854
855
856
857
858
859
860
861
862
863
864
865
    def test_raw_file_from_calc(self, client, non_empty_processed, test_user_auth):
        calc = list(non_empty_processed.calcs)[0]
        url = '/raw/calc/%s/%s/%s' % (
            non_empty_processed.upload_id, calc.calc_id, os.path.basename(calc.mainfile))
        rv = client.get(url, headers=test_user_auth)
        assert rv.status_code == 200
        assert len(rv.data) > 0

        url = '/raw/calc/%s/%s/' % (non_empty_processed.upload_id, calc.calc_id)
        rv = client.get(url, headers=test_user_auth)
        assert rv.status_code == 200
        result = json.loads(rv.data)
        assert len(result['contents']) > 0

866
867
    @UploadFilesBasedTests.check_authorizaton
    def test_raw_file(self, client, upload, auth_headers):
868
        url = '/raw/%s/%s' % (upload, example_file_mainfile)
869
        rv = client.get(url, headers=auth_headers)
870
871
872
        assert rv.status_code == 200
        assert len(rv.data) > 0

873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
    @UploadFilesBasedTests.check_authorizaton
    def test_raw_file_partial(self, client, upload, auth_headers):
        url = '/raw/%s/%s?offset=0&length=20' % (upload, example_file_mainfile)
        rv = client.get(url, headers=auth_headers)
        assert rv.status_code == 200
        start_data = rv.data
        assert len(start_data) == 20

        url = '/raw/%s/%s?offset=10&length=10' % (upload, example_file_mainfile)
        rv = client.get(url, headers=auth_headers)
        assert rv.status_code == 200
        next_data = rv.data
        assert len(rv.data) == 10
        assert start_data[10:] == next_data

888
889
    @UploadFilesBasedTests.ignore_authorization
    def test_raw_file_signed(self, client, upload, _, test_user_signature_token):
890
        url = '/raw/%s/%s?signature_token=%s' % (upload, example_file_mainfile, test_user_signature_token)
891
892
893
894
        rv = client.get(url)
        assert rv.status_code == 200
        assert len(rv.data) > 0

895
896
    @UploadFilesBasedTests.ignore_authorization
    def test_raw_file_missing_file(self, client, upload, auth_headers):
897
        url = '/raw/%s/does/not/exist' % upload
898
        rv = client.get(url, headers=auth_headers)
899
        assert rv.status_code == 404
900
901
902
        data = json.loads(rv.data)
        assert 'files' not in data

903
    @pytest.mark.parametrize('compress', [True, False])
904
905
    @UploadFilesBasedTests.ignore_authorization
    def test_raw_file_wildcard(self, client, upload, auth_headers, compress):
906
        url = '/raw/%s/examples*' % upload
907
908
        if compress:
            url = '%s?compress=1' % url
909
        rv = client.get(url, headers=auth_headers)
910
911
912
913
914
915
916

        assert rv.status_code == 200
        assert len(rv.data) > 0
        with zipfile.ZipFile(io.BytesIO(rv.data)) as zip_file:
            assert zip_file.testzip() is None
            assert len(zip_file.namelist()) == len(example_file_contents)

917
918
919
920
    @UploadFilesBasedTests.ignore_authorization
    def test_raw_file_wildcard_missing(self, client, upload, auth_headers):
        url = '/raw/%s/does/not/exist*' % upload
        rv = client.get(url, headers=auth_headers)
921
        assert rv.status_code == 404
922

923
924
    @UploadFilesBasedTests.ignore_authorization
    def test_raw_file_missing_upload(self, client, upload, auth_headers):
925
        url = '/raw/doesnotexist/%s' % example_file_mainfile
926
        rv = client.get(url, headers=auth_headers)
927
928
        assert rv.status_code == 404

929
    @pytest.mark.parametrize('compress', [True, False])
930
931
    @UploadFilesBasedTests.check_authorizaton
    def test_raw_files(self, client, upload, auth_headers, compress):
932
        url = '/raw/%s?files=%s' % (
933
            upload, ','.join(example_file_contents))
934
935
        if compress:
            url = '%s&compress=1' % url
936
        rv = client.get(url, headers=auth_headers)
Markus Scheidgen's avatar
Markus Scheidgen committed
937

938
939
940
941
        assert rv.status_code == 200
        assert len(rv.data) > 0
        with zipfile.ZipFile(io.BytesIO(rv.data)) as zip_file:
            assert zip_file.testzip() is None
942
            assert len(zip_file.namelist()) == len(example_file_contents)
Markus Scheidgen's avatar
Markus Scheidgen committed
943

944
945
946
    @pytest.mark.parametrize('compress', [False, True])
    def test_raw_files_from_query_upload_id(self, client, non_empty_processed, test_user_auth, compress):
        url = '/raw/query?upload_id=%s&compress=%s' % (non_empty_processed.upload_id, 'true' if compress else 'false')
947
948
949
950
951
952
953
954
        rv = client.get(url, headers=test_user_auth)

        assert rv.status_code == 200
        assert len(rv.data) > 0
        with zipfile.ZipFile(io.BytesIO(rv.data)) as zip_file:
            assert zip_file.testzip() is None
            assert len(zip_file.namelist()) == len(example_file_contents)

955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
    @pytest.mark.parametrize('query_params', [
        {'atoms': 'Si'},
        {'authors': 'Cooper, Sheldon'}
    ])
    def test_raw_files_from_query(self, client, processeds, test_user_auth, query_params):

        url = '/raw/query?%s' % urlencode(query_params)
        rv = client.get(url, headers=test_user_auth)

        assert rv.status_code == 200
        assert len(rv.data) > 0
        with zipfile.ZipFile(io.BytesIO(rv.data)) as zip_file:
            assert zip_file.testzip() is None
            assert len(zip_file.namelist()) == len(example_file_contents) * len(processeds)

970
971
972
973
974
975
976
977
978
979
    def test_raw_files_from_empty_query(self, client, elastic):
        url = '/raw/query?upload_id=doesNotExist'
        rv = client.get(url)

        assert rv.status_code == 200
        assert len(rv.data) > 0
        with zipfile.ZipFile(io.BytesIO(rv.data)) as zip_file:
            assert zip_file.testzip() is None
            assert len(zip_file.namelist()) == 0

980
981
    @UploadFilesBasedTests.ignore_authorization
    def test_raw_files_signed(self, client, upload, _, test_user_signature_token):
982
        url = '/raw/%s?files=%s&signature_token=%s' % (
983
984
985
986
987
988
989
990
991
            upload, ','.join(example_file_contents), test_user_signature_token)
        rv = client.get(url)

        assert rv.status_code == 200
        assert len(rv.data) > 0
<