diff --git a/.vscode/launch.json b/.vscode/launch.json index 45f353f6037f90e4c8a7fa8bb5d291991391278e..7b934870170c205e77a6d7f3bb3aaa7d31b5d6e2 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -44,7 +44,7 @@ "cwd": "${workspaceFolder}", "program": "${workspaceFolder}/.pyenv/bin/pytest", "args": [ - "-sv", "tests/test_api.py::TestUploads::test_put[None-multipart-tests/data/proc/examples_template.zip]" + "-sv", "tests/test_api.py::TestUploads::test_post[tests/data/proc/empty.zip]" ] }, { diff --git a/docs/reference.rst b/docs/reference.rst index 00c9e286368ceee0ac389c6422aa521ce678c4d0..db8b7b75ffc5129a45f5b20d278e81015b358efb 100644 --- a/docs/reference.rst +++ b/docs/reference.rst @@ -57,3 +57,7 @@ nomad.utils nomad.migration --------------- .. automodule:: nomad.migration + +tests +----- +.. automodule:: tests diff --git a/docs/test_fixtures.png b/docs/test_fixtures.png new file mode 100644 index 0000000000000000000000000000000000000000..3ade0e7335913fcabd61fb5848159c6f59a6f5bd Binary files /dev/null and b/docs/test_fixtures.png differ diff --git a/nomad/coe_repo/calc.py b/nomad/coe_repo/calc.py index 76cecefc35ac2aad324938ff91cf1b26e3e75e31..f1a949d276e478bfec4fa64e1ff75992e036696b 100644 --- a/nomad/coe_repo/calc.py +++ b/nomad/coe_repo/calc.py @@ -17,6 +17,7 @@ import json from sqlalchemy import Column, Integer, String, ForeignKey from sqlalchemy.orm import relationship, aliased from sqlalchemy.sql.expression import literal +from datetime import datetime from nomad import infrastructure, utils from nomad.datamodel import CalcWithMetadata @@ -149,9 +150,16 @@ class Calc(Base): code_version_obj = CodeVersion(content=source_code_version) repo_db.add(code_version_obj) + if calc.upload_time is not None: + added_time = calc.upload_time + elif self.upload is not None and self.upload.upload_time is not None: + added_time = self.upload.upload_time + else: + added_time = datetime.now() + metadata = CalcMetaData( calc=self, - added=calc.upload_time if calc.upload_time is not None else self.upload.upload_time, + added=added_time, chemical_formula=calc.formula, filenames=('[%s]' % ','.join(['"%s"' % filename for filename in calc.files])).encode('utf-8'), location=calc.mainfile, @@ -183,41 +191,45 @@ class Calc(Base): self._set_value(base.topic_basis_set_type, calc.basis_set) # user relations - if calc.uploader is not None: - uploader = repo_db.query(User).get(calc.uploader.id) - else: - uploader = self.upload.user + def add_users_to_relation(source_users, relation): + for source_user in source_users: + coe_user = repo_db.query(User).get(source_user.id) + source_user.update(coe_user.to_popo()) + relation.append(coe_user) - self.owners.append(uploader) - - for coauthor in calc.coauthors: - self.coauthors.append(repo_db.query(User).get(coauthor.id)) + if calc.uploader is not None: + add_users_to_relation([calc.uploader], self.owners) + elif self.upload is not None and self.upload.user is not None: + self.owners.append(self.upload.user) + calc.uploader = self.upload.user.to_popo() - for shared_with in calc.shared_with: - self.shared_with.append(repo_db.query(User).get(shared_with.id)) + add_users_to_relation(calc.coauthors, self.coauthors) + add_users_to_relation(calc.shared_with, self.shared_with) # datasets for dataset in calc.datasets: dataset_id = dataset.id - coe_dataset = repo_db.query(Calc).get(dataset_id) - if coe_dataset is None: - coe_dataset = Calc(coe_calc_id=dataset_id) - repo_db.add(coe_dataset) + coe_dataset_calc: Calc = repo_db.query(Calc).get(dataset_id) + if coe_dataset_calc is None: + coe_dataset_calc = Calc(coe_calc_id=dataset_id) + repo_db.add(coe_dataset_calc) metadata = CalcMetaData( - calc=coe_dataset, + calc=coe_dataset_calc, added=self.upload.upload_time, chemical_formula=dataset.name) repo_db.add(metadata) if dataset.doi is not None: - self._add_citation(coe_dataset, dataset.doi['value'], 'INTERNAL') + self._add_citation(coe_dataset_calc, dataset.doi['value'], 'INTERNAL') # cause a flush to avoid future inconsistencies - coe_dataset = repo_db.query(Calc).get(dataset_id) + coe_dataset_calc = repo_db.query(Calc).get(dataset_id) - dataset = CalcSet(parent_calc_id=dataset_id, children_calc_id=self.coe_calc_id) - repo_db.add(dataset) + coe_dataset_rel = CalcSet(parent_calc_id=dataset_id, children_calc_id=self.coe_calc_id) + repo_db.add(coe_dataset_rel) + + dataset.update(DataSet(coe_dataset_calc).to_popo()) # references for reference in calc.references: @@ -284,9 +296,7 @@ class Calc(Base): result.pid = self.pid result.uploader = self.uploader.to_popo() result.upload_time = self.calc_metadata.added - result.datasets = list( - utils.POPO(id=ds.id, doi=ds.doi.to_popo(), name=ds.name) - for ds in datasets) + result.datasets = list(ds.to_popo() for ds in datasets) result.with_embargo = self.with_embargo result.comment = self.comment result.references = list( @@ -320,3 +330,6 @@ class DataSet: @property def name(self): return self._dataset_calc.calc_metadata.chemical_formula + + def to_popo(self): + return utils.POPO(id=self.id, doi=self.doi.to_popo(), name=self.name) diff --git a/nomad/coe_repo/user.py b/nomad/coe_repo/user.py index 23820f8de867f4de945845c6435b40ee514a0a10..f3c8244fc458aff5a56a6e9825fe9fffb71b8d48 100644 --- a/nomad/coe_repo/user.py +++ b/nomad/coe_repo/user.py @@ -150,7 +150,7 @@ class User(Base): # type: ignore def to_popo(self) -> utils.POPO: return utils.POPO( - user_id=self.user_id, + id=self.user_id, first_name=self.first_name, last_name=self.last_name, email=self.email, diff --git a/nomad/datamodel.py b/nomad/datamodel.py index 1e06cbc04f607a9e5e16b268ad60bf56831f0498..2df4069f63b260a9130cb2da3bb98f924d19fc52 100644 --- a/nomad/datamodel.py +++ b/nomad/datamodel.py @@ -122,3 +122,23 @@ class CalcWithMetadata(): key: value for key, value in self.__dict__.items() if value is not None } + + def apply_user_metadata(self, metadata: dict): + """ + Applies a user provided metadata dict to this calc. + """ + self.pid = metadata.get('_pid') + self.comment = metadata.get('comment') + self.upload_time = metadata.get('_upload_time') + uploader_id = metadata.get('_uploader') + if uploader_id is not None: + self.uploader = utils.POPO(id=uploader_id) + self.references = [utils.POPO(value=ref) for ref in metadata.get('references', [])] + self.with_embargo = metadata.get('with_embargo', False) + self.coauthors = [ + utils.POPO(id=user) for user in metadata.get('coauthors', [])] + self.shared_with = [ + utils.POPO(id=user) for user in metadata.get('shared_with', [])] + self.datasets = [ + utils.POPO(id=ds['id'], doi=utils.POPO(value=ds.get('_doi')), name=ds.get('_name')) + for ds in metadata.get('datasets', [])] diff --git a/nomad/infrastructure.py b/nomad/infrastructure.py index 22f23fc9a167a7ddd7a2969797433e57351142b1..de85924b4067991eb97d318b231b63b212ac31e4 100644 --- a/nomad/infrastructure.py +++ b/nomad/infrastructure.py @@ -90,6 +90,8 @@ def setup_elastic(): try: from nomad.search import Entry Entry.init(index=config.elastic.index_name) + Entry._index._name = config.elastic.index_name + logger.info('initialized elastic index', index_name=config.elastic.index_name) except RequestError as e: if e.status_code == 400 and 'resource_already_exists_exception' in e.error: @@ -99,6 +101,8 @@ def setup_elastic(): else: logger.info('init elastic index') + return elastic_client + def setup_repository_db(**kwargs): """ Creates a connection and stores it in the module variables. """ diff --git a/nomad/migration.py b/nomad/migration.py index c2a579b1d21432300b104b4e7b6e4c19294dd3df..427e418b9292598efc254925bd62d3054bbbb4b1 100644 --- a/nomad/migration.py +++ b/nomad/migration.py @@ -423,7 +423,7 @@ class NomadCOEMigration: """ Transforms to a dict that fullfils the API's uploade metadata model. """ return dict( _upload_time=source.upload_time, - _uploader=source.uploader['user_id'], + _uploader=source.uploader['id'], _pid=source.pid, references=[ref['value'] for ref in source.references], datasets=[dict( @@ -433,8 +433,8 @@ class NomadCOEMigration: mainfile=source.mainfile, with_embargo=source.with_embargo, comment=source.comment, - coauthors=list(user['user_id'] for user in source.coauthors), - shared_with=list(user['user_id'] for user in source.shared_with) + coauthors=list(user['id'] for user in source.coauthors), + shared_with=list(user['id'] for user in source.shared_with) ) def index(self, *args, **kwargs): diff --git a/nomad/processing/data.py b/nomad/processing/data.py index 84adbc8b6ec001e04bc0f6578ba46b7eb5678788..11517ce755b879a6e5ecdd96e50128255b404670 100644 --- a/nomad/processing/data.py +++ b/nomad/processing/data.py @@ -30,7 +30,7 @@ import logging from structlog import wrap_logger from contextlib import contextmanager -from nomad import utils, coe_repo, config, infrastructure +from nomad import utils, coe_repo, config, infrastructure, search from nomad.files import PathObject, UploadFiles, ExtractError, ArchiveBasedStagingUploadFiles from nomad.processing.base import Proc, Chord, process, task, PENDING, SUCCESS, FAILURE from nomad.parsing import parsers, parser_dict @@ -231,14 +231,19 @@ class Calc(Proc): def archiving(self): logger = self.get_logger() + calc_with_metadata = self._parser_backend.to_calc_with_metadata() + # persist the repository metadata - with utils.timer(logger, 'indexed', step='index'): - self.upload_files.metadata.insert( - self._parser_backend.to_calc_with_metadata().to_dict()) + with utils.timer(logger, 'saved repo metadata', step='persist'): + self.upload_files.metadata.insert(calc_with_metadata.to_dict()) + + # index in search + with utils.timer(logger, 'indexed', step='persist'): + search.Entry.from_calc_with_metadata(calc_with_metadata, published=False).persist() # persist the archive with utils.timer( - logger, 'archived', step='archive', + logger, 'archived', step='persist', input_size=self.mainfile_file.size) as log_data: with self.upload_files.archive_file(self.calc_id, 'wt') as out: self._parser_backend.write_json(out, pretty=True) @@ -248,7 +253,7 @@ class Calc(Proc): # close loghandler if self._calc_proc_logwriter is not None: with utils.timer( - logger, 'archived log', step='archive_log', + logger, 'archived log', step='persist', input_size=self.mainfile_file.size) as log_data: self._calc_proc_logwriter_ctx.__exit__(None, None, None) # pylint: disable=E1101 self._calc_proc_logwriter = None @@ -350,6 +355,11 @@ class Upload(Chord): logger = self.get_logger() with utils.lnr(logger, 'staged upload delete failed'): + with utils.timer( + logger, 'upload deleted from index', step='delete', + upload_size=self.upload_files.size): + search.Entry.delete_upload(self.upload_id) + with utils.timer( logger, 'staged upload deleted', step='delete', upload_size=self.upload_files.size): @@ -369,16 +379,23 @@ class Upload(Chord): logger = self.get_logger() with utils.lnr(logger, 'publish failed'): + upload_with_metadata = self.to_upload_with_metadata() + with utils.timer( logger, 'upload added to repository', step='publish', upload_size=self.upload_files.size): - coe_repo.Upload.add(self.to_upload_with_metadata()) + coe_repo.Upload.add(upload_with_metadata) with utils.timer( logger, 'staged upload files packed', step='publish', upload_size=self.upload_files.size): self.upload_files.pack() + with utils.timer( + logger, 'index updated', step='publish', + upload_size=self.upload_files.size): + search.Entry.publish_upload(upload_with_metadata) + with utils.timer( logger, 'staged upload deleted', step='publish', upload_size=self.upload_files.size): @@ -523,25 +540,8 @@ class Upload(Chord): def apply_metadata(calc): metadata = calc_metadata.get(calc.mainfile, self.metadata) - if metadata is None: - return calc - - calc.pid = metadata.get('_pid') - calc.comment = metadata.get('comment') - calc.upload_time = metadata.get('_upload_time') - uploader_id = metadata.get('_uploader') - if uploader_id is not None: - calc.uploader = utils.POPO(id=uploader_id) - calc.references = [utils.POPO(value=ref) for ref in metadata.get('references', [])] - calc.with_embargo = metadata.get('with_embargo', False) - calc.coauthors = [ - utils.POPO(id=user) for user in metadata.get('coauthors', [])] - calc.shared_with = [ - utils.POPO(id=user) for user in metadata.get('shared_with', [])] - calc.datasets = [ - utils.POPO(id=ds['id'], doi=utils.POPO(value=ds.get('_doi')), name=ds.get('_name')) - for ds in metadata.get('datasets', [])] - + if metadata is not None: + calc.apply_user_metadata(metadata) return calc result = UploadWithMetadata( diff --git a/nomad/search.py b/nomad/search.py index 474ec683429cc660cd8eaa968bd293dc620fd06a..c30f4efc30072b7bf52c57129967530be29adc43 100644 --- a/nomad/search.py +++ b/nomad/search.py @@ -20,20 +20,27 @@ from elasticsearch.exceptions import ConflictError, ConnectionTimeout from datetime import datetime import time from elasticsearch_dsl import Document, InnerDoc, Keyword, Text, Date, \ - Nested + Nested, Boolean, Search -from nomad import config, datamodel, infrastructure, datamodel +from nomad import config, datamodel, infrastructure, datamodel, coe_repo class AlreadyExists(Exception): pass class User(InnerDoc): - def __init__(self, user): - super().__init__( - id=user.user_id, - name='%s %s' % (user.first_name, user.last_name), - name_keyword='%s %s' % (user.first_name, user.last_name)) + + @classmethod + def from_user_popo(cls, user): + self = cls(id=user.id) + + if 'first_name' not in user: + user = coe_repo.User.from_user_id(user.id).to_popo() + + self.name = '%s %s' % (user['first_name'], user['last_name']) + self.name_keyword = '%s %s' % (user['first_name'], user['last_name']) + + return self id = Keyword() name = Text() @@ -41,8 +48,10 @@ class User(InnerDoc): class Dataset(InnerDoc): - def __init__(self, dataset): - super().__init__( + + @classmethod + def from_dataset_popo(cls, dataset): + return cls( id=dataset.id, doi=dataset.doi.value if dataset.doi is not None else None, name=dataset.name) @@ -57,15 +66,17 @@ class Entry(Document): name = config.elastic.index_name upload_id = Keyword() - upload_time = Date(format='epoch_millis') + upload_time = Date() calc_id = Keyword() calc_hash = Keyword() pid = Keyword() mainfile = Keyword() - files = Keyword() + files = Keyword(multi=True) uploader = Nested(User) - with_embargo = Keyword() + with_embargo = Boolean() + published = Boolean() + coauthors = Nested(User) shared_with = Nested(User) comment = Text() @@ -73,7 +84,7 @@ class Entry(Document): datasets = Nested(Dataset) formula = Keyword() - atoms = Keyword() + atoms = Keyword(multi=True) basis_set = Keyword() xc_functional = Keyword() system = Keyword() @@ -83,7 +94,7 @@ class Entry(Document): code_version = Keyword() @classmethod - def from_calc_with_metadata(cls, source: datamodel.CalcWithMetadata) -> 'Entry': + def from_calc_with_metadata(cls, source: datamodel.CalcWithMetadata, published: bool = False) -> 'Entry': return Entry( meta=dict(id=source.calc_id), upload_id=source.upload_id, @@ -93,14 +104,15 @@ class Entry(Document): pid=str(source.pid), mainfile=source.mainfile, files=source.files, - uploader=User(source.uploader) if source.uploader is not None else None, + uploader=User.from_user_popo(source.uploader) if source.uploader is not None else None, with_embargo=source.with_embargo, - coauthors=[User(user) for user in source.coauthors], - shared_with=[User(user) for user in source.shared_with], + published=published, + coauthors=[User.from_user_popo(user) for user in source.coauthors], + shared_with=[User.from_user_popo(user) for user in source.shared_with], comment=source.comment, references=[ref.value for ref in source.references], - datasets=[Dataset(ds) for ds in source.datasets], + datasets=[Dataset.from_dataset_popo(ds) for ds in source.datasets], formula=source.formula, atoms=list(set(source.atoms)), @@ -112,6 +124,11 @@ class Entry(Document): code_name=source.code_name, code_version=source.code_version) + @classmethod + def add_upload(cls, source: datamodel.UploadWithMetadata): + for calc in source.calcs: + cls.from_calc_with_metadata(calc).save(op_type='create') + def persist(self, **kwargs): """ Persist this entry to elastic search. Kwargs are passed to elastic search. @@ -160,6 +177,16 @@ class Entry(Document): } conn.update_by_query(index, doc_type=[doc_type], body=body) + @classmethod + def publish_upload(cls, upload: datamodel.UploadWithMetadata): + cls.update_by_query(upload.upload_id, 'ctx._source["published"] = true') + # TODO run update on all calcs with their new metadata + + @classmethod + def delete_upload(cls, upload_id): + index = cls._default_index() + Search(index=index).query('match', upload_id=upload_id).delete() + @staticmethod def es_search(body): """ Perform an elasticsearch and not elasticsearch_dsl search on the Calc index. """ @@ -175,6 +202,3 @@ class Entry(Document): data['upload_time'] = data['upload_time'].isoformat() return {key: value for key, value in data.items() if value is not None} - - -# Entry.register_mapping(datamodel.CalcWithMetadata, Entry.from_calc_with_metadata) diff --git a/tests/__init__.py b/tests/__init__.py index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..d0a8968a51a41d145789408d2bfffaec7f6568b9 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -0,0 +1,33 @@ +# Copyright 2018 Markus Scheidgen +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an"AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +The nomad@FAIRDI tests are based on the pytest library. Pytest uses *fixtures* to +modularize setup and teardown of mocks, infrastructure, and other context objects. +The following depicts the used hierarchy of fixtures: + +.. image:: test_fixtures.png + +Otherwise the test submodules follow the names of the nomad code modules. +""" + +from nomad import config + +# For convinience we test the api without path prefix. +# This should be setup with a fixture with in conftest.py, but it will be too late. +# After importing the api module, the config values have already been used and +# changing them afterwards does not change anything anymore. +services_config = config.services._asdict() +services_config.update(api_base_path='') +config.services = config.NomadServicesConfig(**services_config) diff --git a/tests/bravado_flaks.py b/tests/bravado_flask.py similarity index 100% rename from tests/bravado_flaks.py rename to tests/bravado_flask.py diff --git a/tests/conftest.py b/tests/conftest.py index 3416451c4a4305c7aa720cac0610726b1a4d09cf..dc01e9ec597c269c378ab3b99206e1b84ce49823 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,3 +1,18 @@ +# Copyright 2018 Markus Scheidgen +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an"AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import Tuple import pytest import logging from sqlalchemy.orm import Session @@ -10,9 +25,20 @@ from threading import Lock, Thread import asyncore import time import pytest -import elasticsearch.exceptions +import shutil +import os.path +import datetime +import base64 +from bravado.client import SwaggerClient -from nomad import config, infrastructure +from nomad import config, infrastructure, files, parsing, processing, coe_repo, api + +from tests import test_parsing, test_normalizing +from tests.processing import test_data as test_processing +from tests.test_files import example_file, empty_file +from tests.bravado_flask import FlaskTestHttpClient + +example_files = [empty_file, example_file] @pytest.fixture(scope="session") @@ -23,12 +49,6 @@ def monkeysession(request): mpatch.undo() -@pytest.fixture(scope='session', autouse=True) -def nomad_files(monkeysession): - monkeysession.setattr('nomad.config.fs', config.FSConfig( - tmp='.volumes/test_fs/tmp', objects='.volumes/test_fs/objects')) - - @pytest.fixture(scope='session', autouse=True) def nomad_logging(): config.logstash = config.logstash._replace(enabled=False) @@ -36,6 +56,36 @@ def nomad_logging(): infrastructure.setup_logging() +@pytest.fixture(scope='session', autouse=True) +def raw_files_infra(monkeysession): + monkeysession.setattr('nomad.config.fs', config.FSConfig( + tmp='.volumes/test_fs/tmp', objects='.volumes/test_fs/objects')) + + +@pytest.fixture(scope='function') +def raw_files(raw_files_infra): + """ Provides cleaned out files directory structure per function. Clears files after test. """ + try: + yield + finally: + try: + shutil.rmtree(config.fs.objects) + except FileNotFoundError: + pass + try: + shutil.rmtree(config.fs.tmp) + except FileNotFoundError: + pass + + +@pytest.fixture(scope='function') +def client(monkeysession): + api.app.config['TESTING'] = True + client = api.app.test_client() + + yield client + + @pytest.fixture(scope='session') def celery_includes(): return ['nomad.processing.base'] @@ -49,57 +99,14 @@ def celery_config(): @pytest.fixture(scope='session') -def purged_app(celery_session_app): - """ - Purges all pending tasks of the celery app before test. This is necessary to - remove tasks from the queue that might be 'left over' from prior tests. - """ - celery_session_app.control.purge() - yield celery_session_app - - -@pytest.fixture() -def patched_celery(monkeypatch): - # There is a bug in celery, which prevents to use the celery_worker for multiple - # tests: https://github.com/celery/celery/issues/4088 - # The bug has a fix from Aug 2018, but it is not yet released (TODO). - # We monkeypatch a similar solution here. - def add_reader(self, fds, callback, *args): - from kombu.utils.eventio import ERR, READ, poll - - if self.poller is None: - self.poller = poll() - - return self.add(fds, callback, READ | ERR, args) - - monkeypatch.setattr('kombu.asynchronous.hub.Hub.add_reader', add_reader) - yield - - -@pytest.fixture(scope='session') -def celery_inspect(purged_app): - yield purged_app.control.inspect() - - -@pytest.fixture() -def worker(patched_celery, celery_inspect, celery_session_worker): - """ - Extension of the celery_session_worker fixture that ensures a clean task queue. - """ - yield - - # wait until there no more active tasks, to leave clean worker and queues for the next - # test. - while True: - empty = True - for value in celery_inspect.active().values(): - empty = empty and len(value) == 0 - if empty: - break +def worker(celery_session_worker): + """ Provides a clean worker (no old tasks) per function. Waits for all tasks to be completed. """ + pass @pytest.fixture(scope='function') -def mockmongo(monkeypatch): +def mongo(monkeypatch): + """ Provides a cleaned mocked mongo per function. """ disconnect() connection = connect('test_db', host='mongomock://localhost') @@ -110,22 +117,25 @@ def mockmongo(monkeypatch): connection.drop_database('test_db') -@pytest.fixture(scope='function') -def elastic(monkeysession): +@pytest.fixture(scope='session') +def elastic_infra(monkeysession): + """ Provides elastic infrastructure to the session """ monkeysession.setattr('nomad.config.elastic', config.elastic._replace(index_name='test_nomad_fairdi_calcs')) - infrastructure.setup_elastic() - try: - from nomad.search import Entry - Entry._index.delete() - Entry.init(index=config.elastic.index_name) - except elasticsearch.exceptions.NotFoundError: - pass + return infrastructure.setup_elastic() + +@pytest.fixture(scope='function') +def elastic(elastic_infra): + """ Provides a clean elastic per function. Clears elastic before test. """ + elastic_infra.delete_by_query( + index='test_nomad_fairdi_calcs', body=dict(query=dict(match_all={})), + wait_for_completion=True, refresh=True) assert infrastructure.elastic_client is not None + return elastic_infra @contextmanager -def create_repository_db(monkeysession=None, **kwargs): +def create_postgres_infra(monkeysession=None, **kwargs): """ A generator that sets up and tears down a test db and monkeypatches it to the respective global infrastructure variables. @@ -176,42 +186,83 @@ def create_repository_db(monkeysession=None, **kwargs): @pytest.fixture(scope='module') -def repository_db(monkeysession): - with create_repository_db(monkeysession, exists=False) as db: +def postgres_infra(monkeysession): + """ Provides a clean coe repository db per module """ + with create_postgres_infra(monkeysession, exists=False) as db: yield db @pytest.fixture(scope='function') -def expandable_repo_db(monkeysession, repository_db): - with create_repository_db(monkeysession, dbname='test_nomad_fairdi_expandable_repo_db', exists=False) as db: +def proc_infra(postgres, elastic, mongo, worker): + """ Combines all fixtures necessary for processing (postgres, elastic, worker, files, mongo) """ + return dict( + postgres=postgres, + elastic=elastic) + + +@pytest.fixture(scope='function') +def expandable_postgres(monkeysession, postgres_infra): + """ Provides a coe repository db that can be deleted during test """ + with create_postgres_infra(monkeysession, dbname='test_nomad_fairdi_expandable_repo_db', exists=False) as db: yield db @pytest.fixture(scope='function') -def clean_repository_db(repository_db): +def postgres(postgres_infra): + """ Provides a clean coe repository db per function. Clears db before test. """ # do not wonder, this will not setback the id counters - repository_db.execute('TRUNCATE uploads CASCADE;') - yield repository_db + postgres_infra.execute('TRUNCATE uploads CASCADE;') + yield postgres_infra @pytest.fixture(scope='module') -def test_user(repository_db): +def test_user(postgres_infra): from nomad import coe_repo return coe_repo.ensure_test_user(email='sheldon.cooper@nomad-fairdi.tests.de') @pytest.fixture(scope='module') -def other_test_user(repository_db): +def other_test_user(postgres_infra): from nomad import coe_repo return coe_repo.ensure_test_user(email='leonard.hofstadter@nomad-fairdi.tests.de') @pytest.fixture(scope='module') -def admin_user(repository_db): +def admin_user(postgres_infra): from nomad import coe_repo return coe_repo.admin_user() +def create_auth_headers(user): + basic_auth_str = '%s:password' % user.email + basic_auth_bytes = basic_auth_str.encode('utf-8') + basic_auth_base64 = base64.b64encode(basic_auth_bytes).decode('utf-8') + return { + 'Authorization': 'Basic %s' % basic_auth_base64 + } + + +@pytest.fixture(scope='module') +def test_user_auth(test_user: coe_repo.User): + return create_auth_headers(test_user) + + +@pytest.fixture(scope='module') +def test_other_user_auth(other_test_user: coe_repo.User): + return create_auth_headers(other_test_user) + + +@pytest.fixture(scope='module') +def admin_user_auth(admin_user: coe_repo.User): + return create_auth_headers(admin_user) + + +@pytest.fixture(scope='function') +def bravado(client, postgres, test_user_auth): + http_client = FlaskTestHttpClient(client, headers=test_user_auth) + return SwaggerClient.from_url('/swagger.json', http_client=http_client) + + @pytest.fixture(scope='function') def no_warn(caplog): yield caplog @@ -231,6 +282,17 @@ def with_error(caplog): assert count > 0 +@pytest.fixture(scope='function') +def with_warn(caplog): + yield caplog + count = 0 + for record in caplog.get_records(when='call'): + if record.levelname in ['WARNING']: + count += 1 + + assert count > 0 + + """ Fixture for mocked SMTP server for testing. Based on https://gist.github.com/akheron/cf3863cdc424f08929e4cb7dc365ef23. @@ -342,3 +404,61 @@ def mails(smtpd, monkeypatch): monkeypatch.setattr('nomad.config.mail', new_config) yield smtpd + + +@pytest.fixture(scope='session') +def example_mainfile() -> Tuple[str, str]: + return ('parsers/template', 'tests/data/parsers/template.json') + + +@pytest.fixture(scope='session', params=example_files) +def example_upload(request) -> str: + return request.param + + +@pytest.fixture(scope='module') +def example_user_metadata(other_test_user, test_user) -> dict: + return { + 'comment': 'test comment', + 'with_embargo': True, + 'references': ['http://external.ref/one', 'http://external.ref/two'], + '_uploader': other_test_user.user_id, + 'coauthors': [test_user.user_id], + '_upload_time': datetime.datetime.now(), + '_pid': 256 + } + + +@pytest.fixture(scope='function') +def parsed(example_mainfile: Tuple[str, str]) -> parsing.LocalBackend: + """ Provides a parsed calculation in the form of a LocalBackend. """ + parser, mainfile = example_mainfile + return test_parsing.run_parser(parser, mainfile) + + +@pytest.fixture(scope='function') +def normalized(parsed: parsing.LocalBackend) -> parsing.LocalBackend: + """ Provides a normalized calculation in the form of a LocalBackend. """ + return test_normalizing.run_normalize(parsed) + + +@pytest.fixture(scope='function') +def uploaded(example_upload: str, raw_files) -> str: + """ + Provides a uploaded with uploaded example file and gives the upload_id. + Clears files after test. + """ + example_upload_id = os.path.basename(example_upload).replace('.zip', '') + upload_files = files.ArchiveBasedStagingUploadFiles(example_upload_id, create=True) + shutil.copyfile(example_upload, upload_files.upload_file_os_path) + + return example_upload_id + + +@pytest.mark.timeout(10) +@pytest.fixture(scope='function') +def processed(uploaded: str, test_user: coe_repo.User, proc_infra) -> processing.Upload: + """ + Provides a processed upload. Upload was uploaded with test_user. + """ + return test_processing.run_processing(uploaded, test_user) diff --git a/tests/processing/test_base.py b/tests/processing/test_base.py index e873a42986bf5df312ee80ec6c2497967e9641e3..ec524b05475ef7653c2b49383cb071bdde990510 100644 --- a/tests/processing/test_base.py +++ b/tests/processing/test_base.py @@ -34,7 +34,7 @@ class SingleTask(Proc): pass -def test_tasks(mockmongo): +def test_tasks(mongo): p = Tasks.create() assert p.tasks == ['a', 'b'] assert_proc(p, None, PENDING) @@ -56,7 +56,7 @@ class FailTasks(Proc): self.fail('fail fail fail') -def test_fail(mockmongo, with_error): +def test_fail(mongo, with_error): p = FailTasks.create() p.will_fail() @@ -84,7 +84,7 @@ class SimpleProc(Proc): pass -def test_simple_process(mockmongo, worker, no_warn): +def test_simple_process(mongo, worker, no_warn): p = SimpleProc.create() p.process() p.block_until_complete() @@ -99,7 +99,7 @@ class TaskInProc(Proc): @pytest.mark.timeout(5) -def test_task_as_proc(mockmongo, worker, no_warn): +def test_task_as_proc(mongo, worker, no_warn): p = TaskInProc.create() p.process() p.block_until_complete() @@ -118,7 +118,7 @@ class ProcInProc(Proc): pass -def test_fail_on_proc_in_proc(mockmongo, worker): +def test_fail_on_proc_in_proc(mongo, worker): p = ProcInProc.create() p.one() p.block_until_complete() @@ -152,7 +152,7 @@ class ChildProc(Proc): @pytest.mark.timeout(10) -def test_counter(mockmongo, worker, no_warn): +def test_counter(mongo, worker, no_warn): p = ParentProc.create() p.spawn_children() p.block_until_complete() diff --git a/tests/processing/test_data.py b/tests/processing/test_data.py index f45a8222865c45d4e7a36d5240843089db92c0f6..770c122a268c86ac19fd22630427d54060730f00 100644 --- a/tests/processing/test_data.py +++ b/tests/processing/test_data.py @@ -31,13 +31,6 @@ from nomad.files import ArchiveBasedStagingUploadFiles, UploadFiles, StagingUplo from nomad.processing import Upload, Calc from nomad.processing.base import task as task_decorator, FAILURE, SUCCESS -from tests.test_files import example_file, empty_file - -# import fixtures -from tests.test_files import clear_files # pylint: disable=unused-import - -example_files = [empty_file, example_file] - def test_send_mail(mails): infrastructure.send_mail('test name', 'test@email.de', 'test message', 'subjct') @@ -47,22 +40,12 @@ def test_send_mail(mails): @pytest.fixture(scope='function', autouse=True) -def mocks_forall(mockmongo): +def mongo_forall(mongo): pass -@pytest.fixture(scope='function', params=example_files) -def uploaded_id(request, clear_files) -> Generator[str, None, None]: - example_file = request.param - example_upload_id = os.path.basename(example_file).replace('.zip', '') - upload_files = ArchiveBasedStagingUploadFiles(example_upload_id, create=True) - shutil.copyfile(example_file, upload_files.upload_file_os_path) - - yield example_upload_id - - @pytest.fixture -def uploaded_id_with_warning(request, clear_files) -> Generator[str, None, None]: +def uploaded_id_with_warning(raw_files) -> Generator[str, None, None]: example_file = 'tests/data/proc/examples_with_warning_template.zip' example_upload_id = os.path.basename(example_file).replace('.zip', '') upload_files = ArchiveBasedStagingUploadFiles(example_upload_id, create=True) @@ -84,11 +67,6 @@ def run_processing(uploaded_id: str, test_user) -> Upload: return upload -@pytest.fixture -def processed_upload(uploaded_id, test_user, worker, no_warn) -> Upload: - return run_processing(uploaded_id, test_user) - - def assert_processing(upload: Upload): assert not upload.tasks_running assert upload.current_task == 'cleanup' @@ -119,22 +97,25 @@ def assert_processing(upload: Upload): assert upload_files.metadata.get(calc.calc_id) is not None -@pytest.mark.timeout(30) -def test_processing(uploaded_id, worker, test_user, no_warn, mails): - upload = run_processing(uploaded_id, test_user) - assert_processing(upload) +def test_processing(processed, no_warn, mails): + assert_processing(processed) assert len(mails.messages) == 1 assert re.search(r'Processing completed', mails.messages[0].data.decode('utf-8')) is not None -@pytest.mark.timeout(30) -def test_processing_with_warning(uploaded_id_with_warning, worker, test_user): - upload = run_processing(uploaded_id_with_warning, test_user) +@pytest.mark.timeout(10) +def test_processing_with_warning(raw_files, worker, test_user, with_warn): + example_file = 'tests/data/proc/examples_with_warning_template.zip' + example_upload_id = os.path.basename(example_file).replace('.zip', '') + upload_files = ArchiveBasedStagingUploadFiles(example_upload_id, create=True) + shutil.copyfile(example_file, upload_files.upload_file_os_path) + + upload = run_processing(example_upload_id, test_user) assert_processing(upload) -@pytest.mark.timeout(30) +@pytest.mark.timeout(10) def test_process_non_existing(worker, test_user, with_error): upload = run_processing('__does_not_exist', test_user) @@ -145,8 +126,8 @@ def test_process_non_existing(worker, test_user, with_error): @pytest.mark.parametrize('task', ['extracting', 'parse_all', 'cleanup', 'parsing']) -@pytest.mark.timeout(30) -def test_task_failure(monkeypatch, uploaded_id, worker, task, test_user, with_error): +@pytest.mark.timeout(10) +def test_task_failure(monkeypatch, uploaded, worker, task, test_user, with_error): # mock the task method to through exceptions if hasattr(Upload, task): cls = Upload @@ -163,7 +144,7 @@ def test_task_failure(monkeypatch, uploaded_id, worker, task, test_user, with_er monkeypatch.setattr('nomad.processing.data.%s.%s' % (cls.__name__, task), mock) # run the test - upload = run_processing(uploaded_id, test_user) + upload = run_processing(uploaded, test_user) assert not upload.tasks_running diff --git a/tests/test_api.py b/tests/test_api.py index d1f2b04310eb69ceb1bd643c88dd2593b1984810..782fa002b837c8f6b0f046df041aaa3e9fe71c83 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -15,45 +15,20 @@ import pytest import time import json -from mongoengine import connect -from mongoengine.connection import disconnect import base64 import zipfile import io import inspect -from nomad import config -# for convinience we test the api without path prefix -services_config = config.services._asdict() -services_config.update(api_base_path='') -config.services = config.NomadServicesConfig(**services_config) +from nomad import config, coe_repo +from nomad.files import UploadFiles, PublicUploadFiles +from nomad.processing import Upload, Calc, SUCCESS +from nomad.coe_repo import User -from nomad import api, coe_repo # noqa -from nomad.files import UploadFiles, PublicUploadFiles # noqa -from nomad.processing import Upload, Calc, SUCCESS # noqa -from nomad.coe_repo import User # noqa - -from tests.processing.test_data import example_files # noqa -from tests.test_files import example_file, example_file_mainfile, example_file_contents # noqa -from tests.test_files import create_staging_upload, create_public_upload # noqa - -# import fixtures -from tests.test_normalizing import normalized_template_example # noqa pylint: disable=unused-import -from tests.test_parsing import parsed_template_example # noqa pylint: disable=unused-import -# from tests.test_repo import example_elastic_calc # noqa pylint: disable=unused-import -from tests.test_coe_repo import assert_coe_upload # noqa - - -@pytest.fixture(scope='function') -def client(mockmongo): - disconnect() - connect('users_test', host=config.mongo.host, port=config.mongo.port, is_mock=True) - - api.app.config['TESTING'] = True - client = api.app.test_client() - - yield client - Upload._get_collection().drop() +from tests.conftest import create_auth_headers +from tests.test_files import example_file, example_file_mainfile, example_file_contents +from tests.test_files import create_staging_upload, create_public_upload +from tests.test_coe_repo import assert_coe_upload def test_alive(client): @@ -61,30 +36,6 @@ def test_alive(client): assert rv.status_code == 200 -def create_auth_headers(user): - basic_auth_str = '%s:password' % user.email - basic_auth_bytes = basic_auth_str.encode('utf-8') - basic_auth_base64 = base64.b64encode(basic_auth_bytes).decode('utf-8') - return { - 'Authorization': 'Basic %s' % basic_auth_base64 - } - - -@pytest.fixture(scope='module') -def test_user_auth(test_user: User): - return create_auth_headers(test_user) - - -@pytest.fixture(scope='module') -def test_other_user_auth(other_test_user: User): - return create_auth_headers(other_test_user) - - -@pytest.fixture(scope='module') -def admin_user_auth(admin_user: User): - return create_auth_headers(admin_user) - - @pytest.fixture(scope='function') def test_user_signature_token(client, test_user_auth): rv = client.get('/auth/token', headers=test_user_auth) @@ -95,13 +46,12 @@ def test_user_signature_token(client, test_user_auth): class TestAdmin: @pytest.mark.timeout(10) - def test_reset(self, client, admin_user_auth, expandable_repo_db): + def test_reset(self, client, admin_user_auth, expandable_postgres): rv = client.post('/admin/reset', headers=admin_user_auth) assert rv.status_code == 200 - # TODO disabled as this will destroy the session repository_db beyond repair. @pytest.mark.timeout(10) - def test_remove(self, client, admin_user_auth, expandable_repo_db): + def test_remove(self, client, admin_user_auth, expandable_postgres): rv = client.post('/admin/remove', headers=admin_user_auth) assert rv.status_code == 200 @@ -128,7 +78,7 @@ class TestAdmin: yield None monkeypatch.setattr(config, 'services', old_config) - def test_disabled(self, client, admin_user_auth, disable_reset, repository_db): + def test_disabled(self, client, admin_user_auth, disable_reset, postgres): rv = client.post('/admin/reset', headers=admin_user_auth) assert rv.status_code == 400 @@ -141,7 +91,7 @@ class TestAuth: assert rv.status_code == 200 - def test_xtoken_auth_denied(self, client, no_warn, repository_db): + def test_xtoken_auth_denied(self, client, no_warn, postgres): rv = client.get('/uploads/', headers={ 'X-Token': 'invalid' }) @@ -178,10 +128,6 @@ class TestAuth: class TestUploads: - @pytest.fixture(scope='function') - def proc_infra(self, repository_db, worker, no_warn): - return dict(repository_db=repository_db) - def assert_uploads(self, upload_json_str, count=0, **kwargs): data = json.loads(upload_json_str) assert isinstance(data, list) @@ -237,7 +183,6 @@ class TestUploads: def assert_unstage(self, client, test_user_auth, upload_id, proc_infra, metadata={}): rv = client.get('/uploads/%s' % upload_id, headers=test_user_auth) upload = self.assert_upload(rv.data) - empty_upload = upload['calcs']['pagination']['total'] == 0 rv = client.post( '/uploads/%s' % upload_id, @@ -250,7 +195,7 @@ class TestUploads: assert upload['process_running'] self.assert_upload_does_not_exist(client, upload_id, test_user_auth) - assert_coe_upload(upload_id, empty=empty_upload, metadata=metadata) + assert_coe_upload(upload_id, user_metadata=metadata) def assert_upload_does_not_exist(self, client, upload_id: str, test_user_auth): # poll until publish/delete completed @@ -289,11 +234,10 @@ class TestUploads: rv = client.get('/uploads/123456789012123456789012', headers=test_user_auth) assert rv.status_code == 404 - @pytest.mark.timeout(30) - @pytest.mark.parametrize('file', example_files) @pytest.mark.parametrize('mode', ['multipart', 'stream', 'local_path']) @pytest.mark.parametrize('name', [None, 'test_name']) - def test_put(self, client, test_user_auth, proc_infra, file, mode, name): + def test_put(self, client, test_user_auth, proc_infra, example_upload, mode, name): + file = example_upload if name: url = '/uploads/?name=%s' % name else: @@ -345,7 +289,7 @@ class TestUploads: assert rv.status_code == 400 self.assert_processing(client, test_user_auth, upload['upload_id']) - def test_delete_unstaged(self, client, test_user_auth, proc_infra, clean_repository_db): + def test_delete_unstaged(self, client, test_user_auth, proc_infra): rv = client.put('/uploads/?local_path=%s' % example_file, headers=test_user_auth) upload = self.assert_upload(rv.data) self.assert_processing(client, test_user_auth, upload['upload_id']) @@ -361,23 +305,22 @@ class TestUploads: assert rv.status_code == 200 self.assert_upload_does_not_exist(client, upload['upload_id'], test_user_auth) - @pytest.mark.parametrize('example_file', example_files) - def test_post(self, client, test_user_auth, example_file, proc_infra, clean_repository_db): - rv = client.put('/uploads/?local_path=%s' % example_file, headers=test_user_auth) + def test_post(self, client, test_user_auth, example_upload, proc_infra): + rv = client.put('/uploads/?local_path=%s' % example_upload, headers=test_user_auth) upload = self.assert_upload(rv.data) self.assert_processing(client, test_user_auth, upload['upload_id']) self.assert_unstage(client, test_user_auth, upload['upload_id'], proc_infra) def test_post_metadata( self, client, proc_infra, admin_user_auth, test_user_auth, test_user, - other_test_user, clean_repository_db): + other_test_user): rv = client.put('/uploads/?local_path=%s' % example_file, headers=test_user_auth) upload = self.assert_upload(rv.data) self.assert_processing(client, test_user_auth, upload['upload_id']) metadata = dict(comment='test comment') self.assert_unstage(client, admin_user_auth, upload['upload_id'], proc_infra, metadata) - def test_post_metadata_forbidden(self, client, proc_infra, test_user_auth, clean_repository_db): + def test_post_metadata_forbidden(self, client, proc_infra, test_user_auth): rv = client.put('/uploads/?local_path=%s' % example_file, headers=test_user_auth) upload = self.assert_upload(rv.data) self.assert_processing(client, test_user_auth, upload['upload_id']) @@ -389,7 +332,7 @@ class TestUploads: assert rv.status_code == 401 # TODO validate metadata (or all input models in API for that matter) - # def test_post_bad_metadata(self, client, proc_infra, test_user_auth, clean_repository_db): + # def test_post_bad_metadata(self, client, proc_infra, test_user_auth, postgres): # rv = client.put('/uploads/?local_path=%s' % example_file, headers=test_user_auth) # upload = self.assert_upload(rv.data) # self.assert_processing(client, test_user_auth, upload['upload_id']) @@ -457,7 +400,7 @@ class UploadFilesBasedTests: return wrapper @pytest.fixture(scope='function') - def test_data(self, request, clean_repository_db, no_warn, test_user, other_test_user): + def test_data(self, request, postgres, mongo, no_warn, test_user, other_test_user): # delete potential old test files for _ in [0, 1]: upload_files = UploadFiles.get('test_upload') @@ -484,12 +427,12 @@ class UploadFilesBasedTests: upload_files = create_staging_upload('test_upload', calc_specs=calc_specs) else: upload_files = create_public_upload('test_upload', calc_specs=calc_specs) - clean_repository_db.begin() + postgres.begin() coe_upload = coe_repo.Upload( upload_name='test_upload', user_id=test_user.user_id, is_processed=True) - clean_repository_db.add(coe_upload) - clean_repository_db.commit() + postgres.add(coe_upload) + postgres.commit() yield 'test_upload', authorized, auth_headers diff --git a/tests/test_client.py b/tests/test_client.py index 95c63484339077fc56e1af2aa587cf366b6c7cbe..ad9a09f94cb76ba42f4fdef156d7d081f3ae610f 100644 --- a/tests/test_client.py +++ b/tests/test_client.py @@ -12,40 +12,30 @@ # See the License for the specific language governing permissions and # limitations under the License. -import pytest -from bravado.client import SwaggerClient import time from nomad.processing import SUCCESS -from tests.test_files import example_file, create_public_upload, clear_files # noqa pylint: disable=unused-import -from tests.test_api import client as flask_client, test_user_auth # noqa pylint: disable=unused-import -from tests.bravado_flaks import FlaskTestHttpClient +from tests.test_files import example_file, create_public_upload -@pytest.fixture(scope='function') -def client(flask_client, repository_db, test_user_auth): - http_client = FlaskTestHttpClient(flask_client, headers=test_user_auth) - return SwaggerClient.from_url('/swagger.json', http_client=http_client) +def test_get_upload_command(bravado): + assert bravado.uploads.get_upload_command().response().result.upload_command is not None -def test_get_upload_command(client): - assert client.uploads.get_upload_command().response().result.upload_command is not None - - -def test_upload(client, worker): +def test_upload(bravado, proc_infra): with open(example_file, 'rb') as f: - upload = client.uploads.upload(file=f, name='test_upload').response().result + upload = bravado.uploads.upload(file=f, name='test_upload').response().result while upload.tasks_running: - upload = client.uploads.get_upload(upload_id=upload.upload_id).response().result + upload = bravado.uploads.get_upload(upload_id=upload.upload_id).response().result time.sleep(0.1) assert upload.tasks_status == SUCCESS -def test_get_repo_calc(client, clear_files): +def test_get_repo_calc(bravado, raw_files): create_public_upload('test_upload', 'pp') - repo = client.repo.get_repo_calc(upload_id='test_upload', calc_id='0').response().result + repo = bravado.repo.get_repo_calc(upload_id='test_upload', calc_id='0').response().result assert repo is not None assert repo['calc_id'] is not None diff --git a/tests/test_coe_repo.py b/tests/test_coe_repo.py index 2894d612fcf088b1193c78ede04f8143055d46ac..20a481af81fd38ef691ef2858259a845c53d5d68 100644 --- a/tests/test_coe_repo.py +++ b/tests/test_coe_repo.py @@ -13,15 +13,9 @@ # limitations under the License. import pytest -import datetime from nomad.coe_repo import User, Calc, Upload -from nomad import processing - -from tests.processing.test_data import processed_upload # pylint: disable=unused-import -from tests.processing.test_data import uploaded_id # pylint: disable=unused-import -from tests.processing.test_data import mocks_forall # pylint: disable=unused-import -from tests.test_files import clear_files # pylint: disable=unused-import +from nomad import processing, parsing, datamodel def assert_user(user, reference): @@ -40,79 +34,103 @@ def test_password_authorize(test_user): assert_user(user, test_user) -def assert_coe_upload(upload_id, empty=False, metadata={}): +def assert_coe_upload(upload_id, upload: datamodel.UploadWithMetadata = None, user_metadata: dict = None): coe_upload = Upload.from_upload_id(upload_id) - if empty: + if upload is not None: + calcs = list(upload.calcs) + elif coe_upload is None: + calcs = [] + else: + calcs = list(calc.to_calc_with_metadata() for calc in coe_upload.calcs) + + if len(calcs) == 0: assert coe_upload is None else: assert coe_upload is not None - assert len(coe_upload.calcs) > 0 - for calc in coe_upload.calcs: - assert_coe_calc(calc, metadata=metadata) + assert len(coe_upload.calcs) == len(calcs) + for coe_calc, calc in zip(coe_upload.calcs, calcs): + if user_metadata is not None: + calc.apply_user_metadata(user_metadata) + + assert_coe_calc(coe_calc, calc) - if '_upload_time' in metadata: - assert coe_upload.created.isoformat()[:26] == metadata['_upload_time'] + if upload is not None and upload.upload_time is not None: + assert coe_upload.created.isoformat()[:26] == upload.upload_time.isoformat() -def assert_coe_calc(calc: Calc, metadata={}): - assert calc.pid == int(metadata.get('_pid', calc.pid)) +def assert_coe_calc(coe_calc: Calc, calc: datamodel.CalcWithMetadata): + if calc.pid is not None: + assert coe_calc.pid == calc.pid # calc data - assert len(calc.files) == 5 - assert calc.formula is not None + assert len(coe_calc.files) == len(calc.files) + assert coe_calc.formula == calc.formula # user meta data - assert calc.comment == metadata.get('comment', None) - assert sorted(calc.references) == sorted(metadata.get('references', [])) - assert calc.uploader is not None - assert calc.uploader.user_id == metadata.get('_uploader', calc.uploader.user_id) - assert sorted(user.user_id for user in calc.coauthors) == sorted(metadata.get('coauthors', [])) - assert sorted(user.user_id for user in calc.shared_with) == sorted(metadata.get('shared_with', [])) - assert calc.with_embargo == metadata.get('with_embargo', False) - - -@pytest.mark.timeout(10) -def test_add_upload(clean_repository_db, processed_upload: processing.Upload): - empty = processed_upload.total_calcs == 0 - - Upload.add(processed_upload.to_upload_with_metadata()) - assert_coe_upload(processed_upload.upload_id, empty=empty) - - -@pytest.mark.timeout(10) -def test_add_upload_metadata(clean_repository_db, processed_upload, other_test_user, test_user): - empty = processed_upload.total_calcs == 0 - - processed_upload.metadata = { - 'comment': 'test comment', - 'with_embargo': True, - 'references': ['http://external.ref/one', 'http://external.ref/two'], - '_uploader': other_test_user.user_id, - 'coauthors': [test_user.user_id], - '_upload_time': datetime.datetime.now().isoformat(), - '_pid': 256 - } - - Upload.add(processed_upload.to_upload_with_metadata()) + assert coe_calc.comment == calc.comment + assert len(coe_calc.references) == len(calc.references) + assert coe_calc.uploader is not None + if calc.uploader is not None: + assert coe_calc.uploader.user_id == calc.uploader.id + assert sorted(user.user_id for user in coe_calc.coauthors) == sorted(user.id for user in calc.coauthors) + assert sorted(user.user_id for user in coe_calc.shared_with) == sorted(user.id for user in calc.shared_with) + if calc.with_embargo is not None: + assert coe_calc.with_embargo == calc.with_embargo + else: + assert not coe_calc.with_embargo + + +def test_add_normalized_calc(postgres, normalized: parsing.LocalBackend, test_user): + calc_with_metadata = normalized.to_calc_with_metadata() + calc_with_metadata.uploader = test_user.to_popo() + calc_with_metadata.files = [calc_with_metadata.mainfile, '1', '2', '3', '4'] + coe_calc = Calc() + coe_calc.apply_calc_with_metadata(calc_with_metadata) + + assert_coe_calc(coe_calc, calc_with_metadata) + + +def test_add_normalized_calc_with_metadata( + postgres, normalized: parsing.LocalBackend, example_user_metadata: dict): + + calc_with_metadata = normalized.to_calc_with_metadata() + calc_with_metadata.files = [calc_with_metadata.mainfile, '1', '2', '3', '4'] + calc_with_metadata.apply_user_metadata(example_user_metadata) + coe_calc = Calc(coe_calc_id=calc_with_metadata.pid) + coe_calc.apply_calc_with_metadata(calc_with_metadata) + + assert_coe_calc(coe_calc, calc_with_metadata) + + +def test_add_upload(processed: processing.Upload): + upload_with_metadata = processed.to_upload_with_metadata() + Upload.add(upload_with_metadata) + assert_coe_upload(processed.upload_id, upload_with_metadata) + + +def test_add_upload_with_metadata(processed, example_user_metadata): + processed.metadata = example_user_metadata + upload_with_metadata = processed.to_upload_with_metadata() + Upload.add(upload_with_metadata) assert_coe_upload( - processed_upload.upload_id, empty=empty, metadata=processed_upload.metadata) + processed.upload_id, upload_with_metadata) class TestDataSets: @pytest.fixture(scope='function') - def datasets(self, clean_repository_db): - clean_repository_db.begin() + def datasets(self, postgres): + postgres.begin() one = Calc() two = Calc() three = Calc() - clean_repository_db.add(one) - clean_repository_db.add(two) - clean_repository_db.add(three) + postgres.add(one) + postgres.add(two) + postgres.add(three) one.children.append(two) two.children.append(three) - clean_repository_db.commit() + postgres.commit() return one, two, three diff --git a/tests/test_files.py b/tests/test_files.py index 888b447cabd15d71a8f23317249e02dff6870b72..3dbc93675946711b885dc293c804e0046e394d1c 100644 --- a/tests/test_files.py +++ b/tests/test_files.py @@ -42,22 +42,6 @@ example_bucket = 'test_bucket' example_data = dict(test_key='test_value') -@pytest.fixture(scope='function') -def clear_files(): - """ Utility fixture that removes all files from files and tmp after test. """ - try: - yield - finally: - try: - shutil.rmtree(config.fs.objects) - except FileNotFoundError: - pass - try: - shutil.rmtree(config.fs.tmp) - except FileNotFoundError: - pass - - class TestObjects: @pytest.fixture(scope='function') diff --git a/tests/test_migration.py b/tests/test_migration.py index 59ea6ebbd2b29188ba87edc8b0475e07c80b49a0..9957195e40792c6c3072a7c9821c23e7e3fea76b 100644 --- a/tests/test_migration.py +++ b/tests/test_migration.py @@ -23,17 +23,16 @@ from nomad import infrastructure, coe_repo from nomad.migration import NomadCOEMigration, SourceCalc from nomad.infrastructure import repository_db_connection -from .bravado_flaks import FlaskTestHttpClient -from tests.conftest import create_repository_db -from tests.test_api import client as flask_client, create_auth_headers # noqa pylint: disable=unused-import -from tests.test_client import client as bravado_client # noqa pylint: disable=unused-import +from tests.conftest import create_postgres_infra, create_auth_headers +from tests.bravado_flask import FlaskTestHttpClient +from tests.test_api import create_auth_headers test_source_db_name = 'test_nomad_fair_migration_source' test_target_db_name = 'test_nomad_fair_migration_target' @pytest.fixture(scope='module') -def source_repo(monkeysession, repository_db): +def source_repo(monkeysession, postgres_infra): """ Fixture for an example migration source db with: - two user @@ -63,13 +62,13 @@ def source_repo(monkeysession, repository_db): with open(sql_file, 'r') as f: cur.execute(f.read()) - with create_repository_db(monkeysession, exists=True, readonly=True, dbname=test_source_db_name) as db: + with create_postgres_infra(monkeysession, exists=True, readonly=True, dbname=test_source_db_name) as db: yield db @pytest.fixture(scope='function') -def target_repo(repository_db): - with create_repository_db(readonly=False, exists=False, dbname=test_target_db_name) as db: +def target_repo(postgres): + with create_postgres_infra(readonly=False, exists=False, dbname=test_target_db_name) as db: db.execute('TRUNCATE users CASCADE;') yield db db.execute('TRUNCATE uploads CASCADE;') @@ -103,23 +102,23 @@ def perform_index(migration, has_indexed, with_metadata, **kwargs): assert test_calc is not None if with_metadata: - assert test_calc.metadata['uploader']['user_id'] == 1 + assert test_calc.metadata['uploader']['id'] == 1 assert test_calc.metadata['comment'] == 'label1' @pytest.mark.parametrize('with_metadata', [False, True]) -def test_create_index(migration, mockmongo, with_metadata: bool): +def test_create_index(migration, mongo, with_metadata: bool): perform_index(migration, has_indexed=True, drop=True, with_metadata=with_metadata) @pytest.mark.parametrize('with_metadata', [True, False]) -def test_update_index(migration, mockmongo, with_metadata: bool): +def test_update_index(migration, mongo, with_metadata: bool): perform_index(migration, has_indexed=True, drop=True, with_metadata=with_metadata) perform_index(migration, has_indexed=False, drop=False, with_metadata=with_metadata) @pytest.fixture(scope='function') -def migrate_infra(migration, target_repo, flask_client, worker, monkeysession): +def migrate_infra(migration, target_repo, proc_infra, client, monkeysession): """ Parameters to test - missing upload, extracted, archive, broken archive @@ -143,7 +142,7 @@ def migrate_infra(migration, target_repo, flask_client, worker, monkeysession): # target repo is the infrastructure repo def create_client(): admin = target_repo.query(coe_repo.User).filter_by(email='admin').first() - http_client = FlaskTestHttpClient(flask_client, headers=create_auth_headers(admin)) + http_client = FlaskTestHttpClient(client, headers=create_auth_headers(admin)) return SwaggerClient.from_url('/swagger.json', http_client=http_client) old_repo = infrastructure.repository_db @@ -193,7 +192,7 @@ def test_migrate(migrate_infra, test, assertions, caplog): assert calc_1 is not None metadata = calc_1.to_calc_with_metadata() assert metadata.pid <= 2 - assert metadata.uploader['user_id'] == 1 + assert metadata.uploader['id'] == 1 assert metadata.upload_time.isoformat() == '2019-01-01T12:00:00+00:00' assert len(metadata.datasets) == 1 assert metadata.datasets[0]['id'] == 3 @@ -201,7 +200,7 @@ def test_migrate(migrate_infra, test, assertions, caplog): assert metadata.datasets[0]['doi']['value'] == 'internal_ref' assert metadata.comment == 'label1' assert len(metadata.coauthors) == 1 - assert metadata.coauthors[0]['user_id'] == 2 + assert metadata.coauthors[0]['id'] == 2 assert len(metadata.references) == 1 assert metadata.references[0]['value'] == 'external_ref' @@ -210,7 +209,7 @@ def test_migrate(migrate_infra, test, assertions, caplog): assert calc_1 is not None metadata = calc_2.to_calc_with_metadata() assert len(metadata.shared_with) == 1 - assert metadata.shared_with[0]['user_id'] == 1 + assert metadata.shared_with[0]['id'] == 1 # assert pid prefix of new calcs if assertions.get('new', 0) > 0: diff --git a/tests/test_search.py b/tests/test_search.py index c3c5a7e907f6ed635f53e581ec23ca0cb8508199..8de0ef9e21e1068c7f1ee373075eb00c97bdec2f 100644 --- a/tests/test_search.py +++ b/tests/test_search.py @@ -12,13 +12,44 @@ # See the License for the specific language governing permissions and # limitations under the License. -from nomad import datamodel, search +from nomad import datamodel, search, processing, parsing +from nomad.search import Entry def test_init_mapping(elastic): pass -def test_index_calc(elastic): - calc = datamodel.CalcWithMetadata(upload_id='test_upload', calc_id='test_calc') - search.Entry.from_calc_with_metadata(calc).save(op_type='create') +def test_index_skeleton_calc(elastic): + calc_with_metadata = datamodel.CalcWithMetadata(upload_id='test_upload', calc_id='test_calc') + + create_entry(calc_with_metadata) + + +def test_index_normalized_calc(elastic, normalized: parsing.LocalBackend): + calc_with_metadata = normalized.to_calc_with_metadata() + + create_entry(calc_with_metadata) + + +def test_index_normalized_calc_with_metadata( + elastic, normalized: parsing.LocalBackend, example_user_metadata: dict): + + calc_with_metadata = normalized.to_calc_with_metadata() + calc_with_metadata.apply_user_metadata(example_user_metadata) + + create_entry(calc_with_metadata) + + +def test_index_upload(elastic, processed: processing.Upload): + pass + + +def create_entry(calc_with_metadata: datamodel.CalcWithMetadata): + search.Entry.from_calc_with_metadata(calc_with_metadata).save(op_type='create') + assert_entry(calc_with_metadata.calc_id) + + +def assert_entry(calc_id): + calc = Entry.get(calc_id) + assert calc is not None