diff --git a/.vscode/launch.json b/.vscode/launch.json index ff04f5a491a24b0f9234a3ad300adc3a9f193152..4e37dcb07a09e2a8bcd00ed2c76884ee5976c023 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -44,7 +44,7 @@ "cwd": "${workspaceFolder}", "program": "${workspaceFolder}/.pyenv/bin/pytest", "args": [ - "-sv", "tests/test_api.py::test_repo_calcs_user" + "-sv", "tests/test_coe_repo.py::test_add_upload" ] }, { diff --git a/gui/src/components/Repo.js b/gui/src/components/Repo.js index b8f5ded2137eab98c725b5f589c0f2ea836bbddd..bb8fc56d921d51db4bb2a88d17990afdf1f5aa3c 100644 --- a/gui/src/components/Repo.js +++ b/gui/src/components/Repo.js @@ -48,9 +48,9 @@ class Repo extends React.Component { }) static rowConfig = { - chemical_composition_bulk_reduced: 'Formula', + chemical_composition: 'Formula', program_name: 'Code', - program_basis_set_type: 'Basis set', + basis_set_type: 'Basis set', system_type: 'System', crystal_system: 'Crystal', space_group_number: 'Space group', diff --git a/nomad/api/app.py b/nomad/api/app.py index cdc1c065ec237d72c036fad6e9a0cfd144e2e4de..947e82eddefcd26b0431ffdbd59fc9edcfa45c36 100644 --- a/nomad/api/app.py +++ b/nomad/api/app.py @@ -29,7 +29,7 @@ from flask_httpauth import HTTPBasicAuth import os.path from nomad import config, infrastructure -from nomad.user import User +from nomad.coe_repo import User from nomad.processing import Upload base_path = config.services.api_base_path diff --git a/nomad/api/repository.py b/nomad/api/repository.py index ccbb805a571d5827d639f1d13eeee6bb92ce3901..586210b80f4ed045bbfddda42099e5e63424f168 100644 --- a/nomad/api/repository.py +++ b/nomad/api/repository.py @@ -67,8 +67,8 @@ class RepoCalcRes(Resource): "mainfile":"RopD3Mo8oMV_-E5bh8uW5PiiCRkH1/data/BrK_svSi/TFCC010.CAB/vasprun.xml.relax1", "program_name":"VASP", "program_version":"4.6.35 3Apr08 complex parallel LinuxIFC", - "chemical_composition_bulk_reduced":"BrKSi2", - "program_basis_set_type":"plane waves", + "chemical_composition":"BrKSi2", + "basis_set_type":"plane waves", "atom_species":[ 35, 19, @@ -135,8 +135,8 @@ class RepoCalcsRes(Resource): "mainfile":"RopD3Mo8oMV_-E5bh8uW5PiiCRkH1/data/BrK_svSi/TFCC010.CAB/vasprun.xml.relax1", "program_name":"VASP", "program_version":"4.6.35 3Apr08 complex parallel LinuxIFC", - "chemical_composition_bulk_reduced":"BrKSi2", - "program_basis_set_type":"plane waves", + "chemical_composition":"BrKSi2", + "basis_set_type":"plane waves", "atom_species":[ 35, 19, @@ -176,12 +176,12 @@ class RepoCalcsRes(Resource): if g.user is None: abort(401, message='Authentication required for owner value user.') search = RepoCalc.search().query('match_all') - search = search.filter('term', user_id=g.user.email) + search = search.filter('term', user_id=str(g.user.user_id)) elif owner == 'staging': if g.user is None: abort(401, message='Authentication required for owner value user.') search = RepoCalc.search().query('match_all') - search = search.filter('term', user_id=g.user.email).filter('term', staging=True) + search = search.filter('term', user_id=str(g.user.user_id)).filter('term', staging=True) else: abort(400, message='Invalid owner value. Valid values are all|user|staging, default is all') diff --git a/nomad/api/upload.py b/nomad/api/upload.py index 75a24169b8a8eef5e62569751fbd4e8ddea8c78a..7cf2ce81c4dfe12a020607b76a6e3661d72f4ec2 100644 --- a/nomad/api/upload.py +++ b/nomad/api/upload.py @@ -233,7 +233,7 @@ class UploadRes(Resource): except KeyError: abort(404, message='Upload with id %s does not exist.' % upload_id) - if upload.user_id != g.user.email: + if upload.user_id != str(g.user.user_id): abort(404, message='Upload with id %s does not exist.' % upload_id) try: @@ -300,7 +300,7 @@ class UploadRes(Resource): except KeyError: abort(404, message='Upload with id %s does not exist.' % upload_id) - if upload.user_id != g.user.email: + if upload.user_id != str(g.user.user_id): abort(404, message='Upload with id %s does not exist.' % upload_id) json_data = request.get_json() @@ -341,7 +341,7 @@ class UploadRes(Resource): except KeyError: abort(404, message='Upload with id %s does not exist.' % upload_id) - if upload.user_id != g.user.email: + if upload.user_id != str(g.user.user_id): abort(404, message='Upload with id %s does not exist.' % upload_id) try: diff --git a/nomad/coe_repo.py b/nomad/coe_repo.py new file mode 100644 index 0000000000000000000000000000000000000000..d04d406fe0f30b7d76807761324d4f7268d4f442 --- /dev/null +++ b/nomad/coe_repo.py @@ -0,0 +1,313 @@ +# Copyright 2018 Markus Scheidgen +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an"AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +Interface to the NOMAD-coe repository postgres database. This implementation is based on +SQLAlchemy. There are model classes that represent entries in the *users* and *session* +tables. + +This module allows to authenticate users based on user password or session tokens. +It allows to access the user data like names and user_id. + +.. autoclass:: User + :members: + :undoc-members: + +.. autoclass:: Session + :members: + :undoc-members: + +.. autofunction:: ensure_test_user + +This module also provides functionality to add parsed calculation data to the db: + +.. autofunction:: add_upload +""" + +from passlib.hash import bcrypt +from sqlalchemy import Column, Integer, String, Boolean, DateTime, ForeignKey +from sqlalchemy.orm import relationship +from sqlalchemy.ext.declarative import declarative_base +from sqlalchemy.dialects.postgresql import BYTEA + +from nomad import utils, infrastructure +from nomad.repo import RepoCalc + + +Base = declarative_base() + + +def add_upload(upload, restricted: bool) -> int: + """ + Add the processed upload to the NOMAD-coe repository db. It creates an + uploads-entry, respective calculation and property entries. Everything in one + transaction. Triggers an updates the NOMAD-coe repository elastic search index after + success. + + TODO deal with the restricted parameter + """ + repo_db = infrastructure.repository_db + logger = utils.get_logger( + __name__, + upload_id=upload.upload_id, + upload_hash=upload.upload_hash) + + result = None + + try: + # create upload + coe_upload = Upload( + upload_name=upload.upload_hash, + created=upload.upload_time, + user_id=int(upload.user_id), + is_processed=True) + + # add calculations and metadata + has_calcs = False + for repo_calc in RepoCalc.upload_calcs(upload.upload_id): + has_calcs = True + add_calculation(upload, coe_upload, repo_calc) + + # commit + if has_calcs: + # empty upload case + repo_db.commit() + result = coe_upload.upload_id + else: + repo_db.rollback() + except Exception as e: + logger.error('Unexpected exception.', exc_info=e) + repo_db.rollback() + raise e + + # trigger index update + pass + + return result + + +def add_calculation(upload, coe_upload, calc: RepoCalc) -> None: + repo_db = infrastructure.repository_db + + # table based properties + coe_calc = Calc(checksum=calc.calc_hash, upload=coe_upload) + repo_db.add(coe_calc) + + program_version = calc.program_version # TODO shorten version names + code_version = repo_db.query(CodeVersion).filter_by(content=program_version).first() + if code_version is None: + code_version = CodeVersion(content=program_version) + repo_db.add(code_version) + + metadata = CalcMetaData( + calc=coe_calc, + added=upload.upload_time, + chemical_formula=calc.chemical_composition, + filenames=','.join(calc.aux_files).encode('utf-8'), # TODO fix paths, has to be aligned with API + location=calc.mainfile, # TODO fix paths, has to be aligned with API + version=code_version) + repo_db.add(metadata) + + spacegroup = Spacegroup( + calc=coe_calc, + n=int(calc.space_group_number) + ) + repo_db.add(spacegroup) + + # topic based properties + coe_calc.set_value(topic_code, calc.program_name) + for atom in set(calc.atom_species): + coe_calc.set_value(topic_atoms, str(atom)) # TODO atom label not number + coe_calc.set_value(topic_system_type, calc.system_type) + coe_calc.set_value(topic_xc_treatment, calc.XC_functional_name) # TODO function->treatment + coe_calc.set_value(topic_crystal_system, calc.crystal_system) + coe_calc.set_value(topic_basis_set_type, calc.basis_set_type) + + +class Calc(Base): # type: ignore + __tablename__ = 'calculations' + + calc_id = Column(Integer, primary_key=True, autoincrement=True) + origin_id = Column(Integer, ForeignKey('uploads.upload_id')) + upload = relationship('Upload') + checksum = Column(String) + + def set_value(self, topic_cid: int, value: str) -> None: + if value is None: + return + + repo_db = infrastructure.repository_db + topic = repo_db.query(Topics).filter_by(topic=value).first() + if not topic: + topic = Topics(cid=topic_cid, topic=value) + repo_db.add(topic) + + tag = Tag(calc=self, topic=topic) + repo_db.add(tag) + + +class CalcMetaData(Base): # type: ignore + __tablename__ = 'metadata' + + calc_id = Column(Integer, ForeignKey('calculations.calc_id'), primary_key=True) + calc = relationship('Calc') + added = Column(DateTime) + chemical_formula = Column(String) + filenames = Column(BYTEA) + location = Column(String) + version_id = Column(Integer, ForeignKey('codeversions.version_id')) + version = relationship('CodeVersion') + + +class CodeVersion(Base): # type: ignore + __tablename__ = 'codeversions' + + version_id = Column(Integer, primary_key=True, autoincrement=True) + content = Column(String) + + +class Spacegroup(Base): # type: ignore + __tablename__ = 'spacegroups' + + calc_id = Column(Integer, ForeignKey('calculations.calc_id'), primary_key=True) + calc = relationship('Calc') + n = Column(Integer) + + +class Tag(Base): # type: ignore + __tablename__ = 'tags' + calc_id = Column(Integer, ForeignKey('calculations.calc_id'), primary_key=True) + calc = relationship('Calc') + tid = Column(Integer, ForeignKey('topics.tid'), primary_key=True) + topic = relationship('Topics') + + def __repr__(self): + return '<Tag(calc_id="%d", tid="%d)>' % (self.calc_id, self.tid) + + +topic_code = 220 +topic_atoms = 10 +topic_system_type = 50 +topic_xc_treatment = 75 +topic_crystal_system = 90 +topic_basis_set_type = 80 + + +class Topics(Base): # type: ignore + __tablename__ = 'topics' + tid = Column(Integer, primary_key=True, autoincrement=True) + cid = Column(Integer) + topic = Column(String) + + +class Upload(Base): # type: ignore + __tablename__ = 'uploads' + + upload_id = Column(Integer, primary_key=True, autoincrement=True) + upload_name = Column(String) + user_id = Column(Integer, ForeignKey('users.user_id')) + user = relationship('User') + is_processed = Column(Boolean) + created = Column(DateTime) + + +class Session(Base): # type: ignore + __tablename__ = 'sessions' + + token = Column(String, primary_key=True) + user_id = Column(String) + + +class LoginException(Exception): + pass + + +class User(Base): # type: ignore + """ + SQLAlchemy model class that represents NOMAD-coe repository postgresdb *users*. + Provides functions for authenticating via password or session token. + + It is not intended to create or update users. This should be done via the + NOMAD-coe repository GUI. + """ + __tablename__ = 'users' + + user_id = Column(Integer, primary_key=True) + email = Column(String) + firstname = Column(String) + lastname = Column(String) + password = Column(String) + + def __repr__(self): + return '<User(email="%s")>' % self.email + + def _hash_password(self, password): + assert False, 'Login functions are done by the NOMAD-coe repository GUI' + # password_hash = bcrypt.encrypt(password, ident='2y') + # self.password = password_hash + + def _verify_password(self, password): + return bcrypt.verify(password, self.password) + + def _generate_auth_token(self, expiration=600): + assert False, 'Login functions are done by the NOMAD-coe repository GUI' + + def get_auth_token(self): + repo_db = infrastructure.repository_db + session = repo_db.query(Session).filter_by(user_id=self.user_id).first() + if not session: + raise LoginException('No session, user probably not logged in at NOMAD-coe repository GUI') + + return session.token.encode('utf-8') + + @staticmethod + def verify_user_password(email, password): + repo_db = infrastructure.repository_db + user = repo_db.query(User).filter_by(email=email).first() + if not user: + return None + + if user._verify_password(password): + return user + else: + raise LoginException('Wrong password') + + @staticmethod + def verify_auth_token(token): + repo_db = infrastructure.repository_db + session = repo_db.query(Session).filter_by(token=token).first() + if session is None: + return None + + user = repo_db.query(User).filter_by(user_id=session.user_id).first() + assert user, 'User in sessions must exist.' + return user + + +def ensure_test_user(email): + """ + Allows tests to make sure that the default test users exist in the database. + Returns: + The user as :class:`User` instance. + """ + repo_db = infrastructure.repository_db + existing = repo_db.query(User).filter_by(email=email).first() + assert existing, 'Test user %s does not exist.' % email + + session = repo_db.query(Session).filter_by( + user_id=existing.user_id).first() + assert session, 'Test user %s has no session.' % email + assert session.token == email, 'Test user %s session has unexpected token.' % email + + return existing diff --git a/nomad/infrastructure.py b/nomad/infrastructure.py index 84b387d34ef6f353dcafcf62b0ea55cfc49dab77..155784a23357cae074ebf14a9e3554114bf41b1d 100644 --- a/nomad/infrastructure.py +++ b/nomad/infrastructure.py @@ -22,6 +22,8 @@ import shutil from contextlib import contextmanager import psycopg2 +from sqlalchemy import create_engine +from sqlalchemy.orm import Session from elasticsearch.exceptions import RequestError from elasticsearch_dsl import connections from mongoengine import connect @@ -37,7 +39,9 @@ mongo_client = None """ The pymongo mongodb client. """ repository_db = None -""" The repository postgres db sqlalchemy client. """ +""" The repository postgres db sqlalchemy session. """ +repository_db_conn = None +""" The repository postgres db sqlalchemy connection. """ def setup(): @@ -106,18 +110,19 @@ def setup_repository_db(): if not exists: reset_repository_db() - from sqlalchemy import create_engine - from sqlalchemy.orm import sessionmaker - global repository_db + global repository_db_conn + url = 'postgresql://%s:%s@%s:%d/%s' % ( config.repository_db.user, config.repository_db.password, config.repository_db.host, config.repository_db.port, config.repository_db.dbname) - engine = create_engine(url, echo=False, isolation_level='AUTOCOMMIT') - repository_db = sessionmaker(bind=engine)() + engine = create_engine(url, echo=False) + + repository_db_conn = engine.connect() + repository_db = Session(bind=repository_db_conn) logger.info('setup repository db') @@ -150,7 +155,14 @@ def repository_db_connection(): config.repository_db.password) conn = psycopg2.connect(conn_str) - yield conn + try: + yield conn + except Exception as e: + logger.error('Unhandled exception within repository db connection.', exc_info=e) + conn.rollback() + conn.close() + return + conn.commit() conn.close() diff --git a/nomad/processing/data.py b/nomad/processing/data.py index 25ab0d609931e62bbb5bf728cc81316fb57c0fa5..7544807874ad760fdcd01ca4d71da0cb57c809df 100644 --- a/nomad/processing/data.py +++ b/nomad/processing/data.py @@ -34,10 +34,9 @@ import time from structlog import wrap_logger from contextlib import contextmanager -from nomad import config, utils +from nomad import config, utils, coe_repo from nomad.files import UploadFile, ArchiveFile, ArchiveLogFile, File from nomad.repo import RepoCalc -from nomad.user import User from nomad.processing.base import Proc, Chord, process, task, PENDING, SUCCESS, FAILURE, RUNNING from nomad.parsing import parsers, parser_dict from nomad.normalizing import normalizers @@ -284,7 +283,7 @@ class Calc(Proc): additional=additional, upload_hash=upload_hash, calc_hash=calc_hash, - upload_id=self.upload_id) + upload_id=self.upload_id).persist() with utils.timer( logger, 'archived', step='archive', @@ -342,6 +341,8 @@ class Upload(Chord): upload_url = StringField(default=None) upload_command = StringField(default=None) + coe_repo_upload_id = IntField(default=None) + _initiated_parsers = IntField(default=-1) meta: Any = { @@ -359,9 +360,9 @@ class Upload(Chord): return cls.get_by_id(id, 'upload_id') @classmethod - def user_uploads(cls, user: User) -> List['Upload']: + def user_uploads(cls, user: coe_repo.User) -> List['Upload']: """ Returns all uploads for the given user. Currently returns all uploads. """ - return cls.objects(user_id=user.email, in_staging=True) + return cls.objects(user_id=str(user.user_id), in_staging=True) def get_logger(self, **kwargs): logger = super().get_logger() @@ -413,13 +414,13 @@ class Upload(Chord): The upload will be already saved to the database. Arguments: - user (User): The user that created the upload. + user (coe_repo.User): The user that created the upload. """ - user: User = kwargs['user'] + user: coe_repo.User = kwargs['user'] del(kwargs['user']) if 'upload_id' not in kwargs: kwargs.update(upload_id=utils.create_uuid()) - kwargs.update(user_id=user.email) + kwargs.update(user_id=str(user.user_id)) self = super().create(**kwargs) basic_auth_token = base64.b64encode(b'%s:' % user.get_auth_token()).decode('utf-8') @@ -443,6 +444,7 @@ class Upload(Chord): self.get_logger().info('unstage') self.in_staging = False RepoCalc.unstage(upload_id=self.upload_id) + # coe_repo.add_upload(self, restricted=False) # TODO allow users to choose restricted self.save() @property @@ -479,6 +481,12 @@ class Upload(Chord): @task def extracting(self): + """ + Task performed before the actual parsing/normalizing. Extracting and bagging + the uploaded files, computing all keys, create an *upload* entry in the NOMAD-coe + repository db, etc. + """ + # extract the uploaded file, this will also create a bagit bag. logger = self.get_logger() try: with utils.timer( @@ -489,12 +497,14 @@ class Upload(Chord): self.fail('process request for non existing upload', level=logging.INFO) return + # create and save a hash for the upload try: self.upload_hash = self.upload_file.upload_hash() except Exception as e: self.fail('could not create upload hash', e) return + # check if the file was already uploaded and processed before if RepoCalc.upload_exists(self.upload_hash): self.fail('The same file was already uploaded and processed.', level=logging.INFO) return diff --git a/nomad/repo.py b/nomad/repo.py index c60aedae0b4c6112049882eb404093dd34461c27..3a322e8a88d8f988fc73ee805db0ccb6122d200c 100644 --- a/nomad/repo.py +++ b/nomad/repo.py @@ -86,8 +86,7 @@ class RepoCalc(ElasticDocument): @classmethod def create_from_backend( cls, backend: LocalBackend, additional: Dict[str, Any], - upload_id: str, upload_hash: str, calc_hash: str, - **kwargs) -> 'RepoCalc': + upload_id: str, upload_hash: str, calc_hash: str) -> 'RepoCalc': """ Create a new calculation instance in elastic search. The data from the given backend will be used. Additional meta-data can be given as *kwargs*. ``upload_id``, @@ -100,12 +99,9 @@ class RepoCalc(ElasticDocument): upload_hash: The upload hash of the originating upload. upload_id: The upload id of the originating upload. calc_hash: The upload unique hash for this calculation. - kwargs: Arguments are passed to elasticsearch index operation. - Raises: - AlreadyExists: If the calculation already exists in elastic search. We use - the elastic document lock here. The elastic document is IDed via the - ``archive_id``. + Returns: + The created instance. """ assert upload_hash is not None and calc_hash is not None and upload_id is not None additional.update(dict(upload_hash=upload_hash, calc_hash=calc_hash, upload_id=upload_id)) @@ -113,33 +109,45 @@ class RepoCalc(ElasticDocument): # prepare the entry with all necessary properties from the backend calc = cls(meta=dict(id='%s/%s' % (upload_hash, calc_hash))) for property in cls._doc_type.mapping: - property = key_mappings.get(property, property) + mapped_property = key_mappings.get(property, property) - if property in additional: - value = additional[property] + if mapped_property in additional: + value = additional[mapped_property] else: try: - value = backend.get_value(property, 0) + value = backend.get_value(mapped_property, 0) + if value is None: + raise KeyError except KeyError: try: program_name = backend.get_value('program_name', 0) except KeyError: program_name = 'unknown' logger.warning( - 'Missing property value', property=property, upload_id=upload_id, + 'Missing property value', property=mapped_property, upload_id=upload_id, upload_hash=upload_hash, calc_hash=calc_hash, code=program_name) continue setattr(calc, property, value) - # persist to elastic search + return calc + + def persist(self, **kwargs): + """ + Persist this entry to elastic search. Kwargs are passed to elastic search. + + Raises: + AlreadyExists: If the calculation already exists in elastic search. We use + the elastic document lock here. The elastic document is IDed via the + ``archive_id``. + """ try: # In practive es operation might fail due to timeout under heavy loads/ # bad configuration. Retries with a small delay is a pragmatic solution. e_after_retries = None for _ in range(0, 2): try: - calc.save(op_type='create', **kwargs) + self.save(op_type='create', **kwargs) e_after_retries = None break except ConnectionTimeout as e: @@ -154,9 +162,7 @@ class RepoCalc(ElasticDocument): # if we had and exception and could not fix with retries, throw it raise e_after_retries # pylint: disable=E0702 except ConflictError: - raise AlreadyExists('Calculation %s does already exist.' % (calc.archive_id)) - - return calc + raise AlreadyExists('Calculation %s does already exist.' % (self.archive_id)) @staticmethod def delete_upload(upload_id): @@ -208,6 +214,13 @@ class RepoCalc(ElasticDocument): return len(search) > 0 + @staticmethod + def upload_calcs(upload_id): + """ Returns an iterable over all entries for the given upload_id. """ + return Search(using=infrastructure.elastic_client, index=config.elastic.calc_index) \ + .query('match', upload_id=upload_id) \ + .scan() + @property def json_dict(self): """ A json serializable dictionary representation. """ diff --git a/nomad/user.py b/nomad/user.py deleted file mode 100644 index 10bc84777541c37f2262e7a0506bf377ad498d29..0000000000000000000000000000000000000000 --- a/nomad/user.py +++ /dev/null @@ -1,133 +0,0 @@ -# Copyright 2018 Markus Scheidgen -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an"AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -""" -Module with some prototypes/placeholder for future user management in nomad@FAIR. -It is currently based on the NOMAD-coe repository postgres API. This module allows -to authenticate users based on user password or session tokens. It allows to access -the user data like names and user_id. - -This implementation is based on SQLAlchemy. There are model classes that represent -entries in the *users* and *session* tables. - -.. autoclass:: User - :members: - :undoc-members: - -.. autoclass:: Session - :members: - :undoc-members: - -.. autofunction:: ensure_test_user -""" - -from passlib.hash import bcrypt -from sqlalchemy import Column, Integer, String -from sqlalchemy.ext.declarative import declarative_base - -from nomad import infrastructure - - -Base = declarative_base() - - -class Session(Base): # type: ignore - __tablename__ = 'sessions' - - token = Column(String, primary_key=True) - user_id = Column(String) - - -class LoginException(Exception): - pass - - -class User(Base): # type: ignore - """ - SQLAlchemy model class that represents NOMAD-coe repository postgresdb *users*. - Provides functions for authenticating via password or session token. - - It is not intended to create or update users. This should be done via the - NOMAD-coe repository GUI. - """ - __tablename__ = 'users' - - user_id = Column(Integer, primary_key=True) - email = Column(String) - firstname = Column(String) - lastname = Column(String) - password = Column(String) - - def __repr__(self): - return '<User(email="%s")>' % self.email - - def _hash_password(self, password): - assert False, 'Login functions are done by the NOMAD-coe repository GUI' - # password_hash = bcrypt.encrypt(password, ident='2y') - # self.password = password_hash - - def _verify_password(self, password): - return bcrypt.verify(password, self.password) - - def _generate_auth_token(self, expiration=600): - assert False, 'Login functions are done by the NOMAD-coe repository GUI' - - def get_auth_token(self): - repository_db = infrastructure.repository_db - session = repository_db.query(Session).filter_by(user_id=self.user_id).first() - if not session: - raise LoginException('No session, user probably not logged in at NOMAD-coe repository GUI') - - return session.token.encode('utf-8') - - @staticmethod - def verify_user_password(email, password): - repository_db = infrastructure.repository_db - user = repository_db.query(User).filter_by(email=email).first() - if not user: - return None - - if user._verify_password(password): - return user - else: - raise LoginException('Wrong password') - - @staticmethod - def verify_auth_token(token): - repository_db = infrastructure.repository_db - session = repository_db.query(Session).filter_by(token=token).first() - if session is None: - return None - - user = repository_db.query(User).filter_by(user_id=session.user_id).first() - assert user, 'User in sessions must exist.' - return user - - -def ensure_test_user(email): - """ - Allows tests to make sure that the default test users exist in the database. - Returns: - The user as :class:`User` instance. - """ - existing = infrastructure.repository_db.query(User).filter_by( - email=email).first() - assert existing, 'Test user %s does not exist.' % email - - session = infrastructure.repository_db.query(Session).filter_by( - user_id=existing.user_id).first() - assert session, 'Test user %s has no session.' % email - assert session.token == email, 'Test user %s session has unexpected token.' % email - - return existing diff --git a/repository/repo_postgres_schema b/repository/repo_postgres_schema index c473a1789122fbf991afb83874afd0671a18737a..ed90960172ff0b84df14bf8fe42691961c705e4c 100644 --- a/repository/repo_postgres_schema +++ b/repository/repo_postgres_schema @@ -1 +1,10 @@ -- "login_token" or "sessions", whats the difference, what should be used for authenticating uploader. \ No newline at end of file +- "login_token" or "sessions", whats the difference, what should be used for authenticating uploader. +- uploads + - upload name + ? is this supposed to be unique, check for doublets + ! there is no index + ? is this a hash/checksum, what kind? + - target_path ? + - is_all_uploaded, skip_extraction ? necessary +- calculation + ? what is the "checksum" \ No newline at end of file diff --git a/tests/conftest.py b/tests/conftest.py index 06aa62d576dbac567d6519d8387c27768c790a10..d10b2cde2065f409f9f87538b34f39010d9b1857 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,9 +1,18 @@ import pytest import logging +from sqlalchemy.orm import Session from mongoengine import connect from mongoengine.connection import disconnect -from nomad import config, user, infrastructure +from nomad import config, coe_repo, infrastructure + + +@pytest.fixture(scope="session") +def monkeysession(request): + from _pytest.monkeypatch import MonkeyPatch + mpatch = MonkeyPatch() + yield mpatch + mpatch.undo() @pytest.fixture(scope='session', autouse=True) @@ -94,19 +103,28 @@ def elastic(): @pytest.fixture(scope='session') -def repository_db(): +def repository_db(monkeysession): infrastructure.setup_repository_db() - assert infrastructure.repository_db is not None + assert infrastructure.repository_db_conn is not None + + # we use a transaction around the session to rollback anything that happens within + # test execution + trans = infrastructure.repository_db_conn.begin() + session = Session(bind=infrastructure.repository_db_conn) + monkeysession.setattr('nomad.infrastructure.repository_db', session) + yield infrastructure.repository_db + trans.rollback() + session.close() @pytest.fixture(scope='session') def test_user(repository_db): - return user.ensure_test_user(email='sheldon.cooper@nomad-fairdi.tests.de') + return coe_repo.ensure_test_user(email='sheldon.cooper@nomad-fairdi.tests.de') @pytest.fixture(scope='session') def other_test_user(repository_db): - return user.ensure_test_user(email='leonard.hofstadter@nomad-fairdi.tests.de') + return coe_repo.ensure_test_user(email='leonard.hofstadter@nomad-fairdi.tests.de') @pytest.fixture(scope='function') @@ -115,30 +133,29 @@ def mocksearch(monkeypatch): uploads_by_id = {} by_archive_id = {} - def create_from_backend(_, **kwargs): - upload_hash = kwargs['upload_hash'] - upload_id = kwargs['upload_id'] - uploads_by_hash[upload_hash] = (upload_id, upload_hash) - uploads_by_id[upload_id] = (upload_id, upload_hash) - archive_id = '%s/%s' % (upload_hash, kwargs['calc_hash']) - - additional = kwargs.pop('additional') - kwargs.update(additional) - by_archive_id[archive_id] = kwargs - return {} + def persist(calc): + uploads_by_hash.setdefault(calc.upload_hash, []).append(calc) + uploads_by_id.setdefault(calc.upload_id, []).append(calc) + by_archive_id[calc.archive_id] = calc def upload_exists(upload_hash): return upload_hash in uploads_by_hash def delete_upload(upload_id): if upload_id in uploads_by_id: - hash, id = uploads_by_id[upload_id] - del(uploads_by_id[id]) - del(uploads_by_hash[hash]) + for calc in uploads_by_id[upload_id]: + del(by_archive_id[calc.archive_id]) + upload_hash = next(uploads_by_id[upload_id]).upload_hash + del(uploads_by_id[upload_id]) + del(uploads_by_hash[upload_hash]) + + def upload_calcs(upload_id): + return uploads_by_id.get(upload_id, []) - monkeypatch.setattr('nomad.repo.RepoCalc.create_from_backend', create_from_backend) + monkeypatch.setattr('nomad.repo.RepoCalc.persist', persist) monkeypatch.setattr('nomad.repo.RepoCalc.upload_exists', upload_exists) monkeypatch.setattr('nomad.repo.RepoCalc.delete_upload', delete_upload) + monkeypatch.setattr('nomad.repo.RepoCalc.upload_calcs', upload_calcs) monkeypatch.setattr('nomad.repo.RepoCalc.unstage', lambda *args, **kwargs: None) return by_archive_id diff --git a/tests/processing/test_data.py b/tests/processing/test_data.py index 14033040c459b7059206d1513ed81b72855b9393..a2dd113be86fc73e940f65732e1663305f7ffcfc 100644 --- a/tests/processing/test_data.py +++ b/tests/processing/test_data.py @@ -25,7 +25,7 @@ import shutil import os.path import json -from nomad import user, utils +from nomad import utils from nomad.files import UploadFile, ArchiveFile, ArchiveLogFile, RepositoryFile from nomad.processing import Upload, Calc from nomad.processing.base import task as task_decorator @@ -77,6 +77,11 @@ def run_processing(uploaded_id: str, test_user) -> Upload: return upload +@pytest.fixture +def processed_upload(uploaded_id, test_user, worker, no_warn) -> Upload: + return run_processing(uploaded_id, test_user) + + def assert_processing(upload: Upload, mocksearch=None): assert upload.completed assert upload.current_task == 'cleanup' @@ -104,7 +109,9 @@ def assert_processing(upload: Upload, mocksearch=None): if mocksearch: repo = mocksearch[calc.archive_id] assert repo is not None - assert len(repo.get('aux_files')) == 4 + assert repo.chemical_composition is not None + assert repo.basis_set_type is not None + assert len(repo.aux_files) == 4 assert RepositoryFile(upload.upload_hash).exists() diff --git a/tests/test_coe_repo.py b/tests/test_coe_repo.py new file mode 100644 index 0000000000000000000000000000000000000000..3b0602022b9b85e835fe0ebde07804f35cd6cb47 --- /dev/null +++ b/tests/test_coe_repo.py @@ -0,0 +1,53 @@ +from nomad.coe_repo import User, Calc, CalcMetaData, Upload, add_upload + +from tests.processing.test_data import processed_upload # pylint: disable=unused-import +from tests.processing.test_data import uploaded_id # pylint: disable=unused-import +from tests.processing.test_data import mocks_forall # pylint: disable=unused-import +from tests.test_files import clear_files # pylint: disable=unused-import + + +def assert_user(user, reference): + assert user is not None + assert user.user_id == reference.user_id + assert user.email == reference.email + + +def test_token_authorize(test_user): + user = User.verify_auth_token(test_user.email) + assert_user(user, test_user) + + +def test_password_authorize(test_user): + user = User.verify_user_password(test_user.email, 'password') + assert_user(user, test_user) + + +def test_rollback(repository_db): + calc = Calc(checksum='test') + repository_db.add(calc) + repository_db.flush() + calc_id = calc.calc_id + + repository_db.rollback() + + assert repository_db.query(Calc).filter_by(calc_id=calc_id).first() is None + + +def assert_upload(coe_upload_id, repository_db): + upload = repository_db.query(Upload).filter_by(upload_id=coe_upload_id).first() + assert upload is not None + for calc in repository_db.query(Calc).filter_by(origin_id=coe_upload_id): + assert calc.origin_id == coe_upload_id + metadata = repository_db.query(CalcMetaData).filter_by(calc_id=calc.calc_id).first() + assert metadata is not None + assert metadata.chemical_formula is not None + + +def test_add_upload(repository_db, processed_upload): + coe_upload_id = add_upload(processed_upload, restricted=False) + if coe_upload_id: + assert_upload(coe_upload_id, repository_db) + + coe_upload_id = add_upload(processed_upload, restricted=False) + if coe_upload_id: + assert_upload(coe_upload_id, repository_db) diff --git a/tests/test_repo.py b/tests/test_repo.py index ef572bf29a59028de3fa546f9abc4082c71e5768..7731b0355b9ebe5824a601a848b71ed9ab752109 100644 --- a/tests/test_repo.py +++ b/tests/test_repo.py @@ -19,7 +19,7 @@ from elasticsearch import NotFoundError from nomad.files import ArchiveFile, UploadFile from nomad.parsing import LocalBackend -from nomad.repo import AlreadyExists, RepoCalc, key_mappings +from nomad.repo import AlreadyExists, RepoCalc from tests.test_files import example_file # noqa from tests.test_normalizing import normalized_template_example # pylint: disable=unused-import @@ -49,9 +49,10 @@ def example_elastic_calc(normalized_template_example: LocalBackend, elastic, tes additional=dict( mainfile=mainfile, upload_time=datetime.now(), - staging=True, restricted=False, user_id=test_user.email, - aux_files=auxfiles), - refresh='true') + staging=True, restricted=False, user_id=str(test_user.user_id), + aux_files=auxfiles)) + + entry.persist(refresh='true') yield entry @@ -66,7 +67,6 @@ def example_elastic_calc(normalized_template_example: LocalBackend, elastic, tes def assert_elastic_calc(calc: RepoCalc): assert calc is not None for property in RepoCalc._doc_type.mapping: - property = key_mappings.get(property, property) assert getattr(calc, property) is not None assert len(getattr(calc, 'aux_files')) > 0 @@ -83,17 +83,19 @@ def test_create_elastic_calc(example_elastic_calc: RepoCalc, no_warn): def test_create_existing_elastic_calc( example_elastic_calc: RepoCalc, normalized_template_example, test_user): + + calc = RepoCalc.create_from_backend( + normalized_template_example, + upload_hash='test_upload_hash', + calc_hash='test_calc_hash', + upload_id='test_upload_id', + additional=dict( + mainfile='/test/mainfile', + upload_time=datetime.now(), + staging=True, restricted=False, user_id=str(test_user.user_id))) + try: - RepoCalc.create_from_backend( - normalized_template_example, - upload_hash='test_upload_hash', - calc_hash='test_calc_hash', - upload_id='test_upload_id', - additional=dict( - mainfile='/test/mainfile', - upload_time=datetime.now(), - staging=True, restricted=False, user_id=test_user.email), - refresh='true') + calc.persist(refresh='true') assert False except AlreadyExists: pass diff --git a/tests/test_user.py b/tests/test_user.py deleted file mode 100644 index 84b0b200bf6051bd5811adb65b79546ab015f909..0000000000000000000000000000000000000000 --- a/tests/test_user.py +++ /dev/null @@ -1,16 +0,0 @@ -from nomad.user import User - - -def assert_user(user, reference): - assert user is not None - assert user.user_id == reference.user_id - - -def test_token_authorize(test_user): - user = User.verify_auth_token(test_user.email) - assert_user(user, test_user) - - -def test_password_authorize(test_user): - user = User.verify_user_password(test_user.email, 'password') - assert_user(user, test_user)