diff --git a/nomad/coe_repo.py b/nomad/coe_repo.py index c9136e761fb1e08569bd82994f652359ad7ac43b..5f03cabcf59fe29dcb9dfc8430f9c0020da286c0 100644 --- a/nomad/coe_repo.py +++ b/nomad/coe_repo.py @@ -35,14 +35,17 @@ This module also provides functionality to add parsed calculation data to the db .. autofunction:: add_upload """ +from typing import List, Type import itertools +import json +import datetime from passlib.hash import bcrypt -from sqlalchemy import Column, Integer, String, Boolean, DateTime, ForeignKey, Enum +from sqlalchemy import Column, Integer, String, Boolean, DateTime, ForeignKey, Enum, Table from sqlalchemy.orm import relationship from sqlalchemy.ext.declarative import declarative_base from sqlalchemy.dialects.postgresql import BYTEA -from nomad import utils, infrastructure +from nomad import utils, infrastructure, datamodel from nomad.repo import RepoCalc @@ -177,16 +180,13 @@ def add_calculation(upload, coe_upload, calc: RepoCalc, calc_meta_data: dict) -> # user relations owner_user_id = calc_meta_data.get('_uploader', int(upload.user_id)) - ownership = Ownership(calc_id=coe_calc.calc_id, user_id=owner_user_id) - repo_db.add(ownership) + coe_calc.owners.append(repo_db.query(User).get(owner_user_id)) for coauthor_id in calc_meta_data.get('coauthors', []): - coauthorship = CoAuthorship(calc_id=coe_calc.calc_id, user_id=int(coauthor_id)) - repo_db.add(coauthorship) + coe_calc.coauthors.append(repo_db.query(User).get(coauthor_id)) for shared_with_id in calc_meta_data.get('shared_with', []): - shareship = Shareship(calc_id=coe_calc.calc_id, user_id=int(shared_with_id)) - repo_db.add(shareship) + coe_calc.shared_with.append(repo_db.query(User).get(shared_with_id)) # datasets for dataset_id in calc_meta_data.get('datasets', []): @@ -203,13 +203,32 @@ def add_calculation(upload, coe_upload, calc: RepoCalc, calc_meta_data: dict) -> citation = Citation(value=reference, kind='EXTERNAL') repo_db.add(citation) - metadata_citation = MetaDataCitation( - calc_id=coe_calc.calc_id, - citation=citation) - repo_db.add(metadata_citation) + coe_calc.citations.append(citation) -class Calc(Base): # type: ignore +calc_citation_association = Table( + 'metadata_citations', Base.metadata, + Column('calc_id', Integer, ForeignKey('calculations.calc_id')), + Column('citation_id', Integer, ForeignKey('citations.citation_id'))) + + +ownership = Table( + 'ownerships', Base.metadata, + Column('calc_id', Integer, ForeignKey('calculations.calc_id')), + Column('user_id', Integer, ForeignKey('users.user_id'))) + +co_authorship = Table( + 'coauthorships', Base.metadata, + Column('calc_id', Integer, ForeignKey('calculations.calc_id')), + Column('user_id', Integer, ForeignKey('users.user_id'))) + +shareship = Table( + 'shareships', Base.metadata, + Column('calc_id', Integer, ForeignKey('calculations.calc_id')), + Column('user_id', Integer, ForeignKey('users.user_id'))) + + +class Calc(Base, datamodel.Calc): # type: ignore __tablename__ = 'calculations' calc_id = Column(Integer, primary_key=True, autoincrement=True) @@ -217,6 +236,56 @@ class Calc(Base): # type: ignore upload = relationship('Upload') checksum = Column(String) + calc_meta_data = relationship('CalcMetaData', uselist=False) + user_meta_data = relationship('UserMetaData', uselist=False) + citations = relationship('Citation', secondary=calc_citation_association) + owners = relationship('User', secondary=ownership) + coauthors = relationship('User', secondary=co_authorship) + shared_with = relationship('User', secondary=shareship) + + @classmethod + def create_from(cls, obj): + repo_db = infrastructure.repository_db + return repo_db.query(Calc).filter_by(calc_id=int(obj.pid)).first() + + @property + def mainfile(self) -> str: + return self.calc_meta_data.location + + @property + def pid(self): + return self.calc_id + + @property + def comment(self) -> str: + return self.user_meta_data.label + + @property + def calc_hash(self) -> str: + return self.checksum + + @property + def references(self) -> List[str]: + return list(citation.value for citation in self.citations if citation.kind == 'EXTERNAL') + + @property + def uploader(self) -> 'User': + assert len(self.owners) == 1, 'A calculation can only have one owner.' + return self.owners[0] + + @property + def with_embargo(self) -> bool: + return self.user_meta_data.permission == 1 + + @property + def chemical_formula(self) -> str: + return self.calc_meta_data.chemical_formula + + @property + def filenames(self) -> List[str]: + filenames = self.calc_meta_data.filenames.decode('utf-8') + return json.loads(filenames) + def set_value(self, topic_cid: int, value: str) -> None: if value is None: return @@ -304,43 +373,47 @@ class Topics(Base): # type: ignore topic = Column(String) -class Upload(Base): # type: ignore +class Upload(Base, datamodel.Upload): # type: ignore __tablename__ = 'uploads' upload_id = Column(Integer, primary_key=True, autoincrement=True) upload_name = Column(String) user_id = Column(Integer, ForeignKey('users.user_id')) - user = relationship('User') is_processed = Column(Boolean) created = Column(DateTime) + user = relationship('User') + calcs = relationship('Calc') -class Session(Base): # type: ignore - __tablename__ = 'sessions' - - token = Column(String, primary_key=True) - user_id = Column(String) - - -class Ownership(Base): # type: ignore - __tablename__ = 'ownerships' + @classmethod + def create_from(cls, obj): + return Upload.from_upload_hash(obj.upload_hash) - calc_id = Column(Integer, ForeignKey('calculations.calc_id'), primary_key=True) - user_id = Column(Integer, ForeignKey('users.user_id'), primary_key=True) + @staticmethod + def from_upload_hash(upload_hash) -> 'Upload': + repo_db = infrastructure.repository_db + uploads = repo_db.query(Upload).filter_by(upload_name=upload_hash) + assert uploads.count() <= 1, 'Upload hash/name must be unique' + return uploads.first() + @property + def upload_hash(self): + return self.upload_name -class CoAuthorship(Base): # type: ignore - __tablename__ = 'coauthorships' + @property + def uploader(self) -> 'User': + return self.user - calc_id = Column(Integer, ForeignKey('calculations.calc_id'), primary_key=True) - user_id = Column(Integer, ForeignKey('users.user_id'), primary_key=True) + @property + def upload_time(self) -> Type[datetime.datetime]: + return self.created -class Shareship(Base): # type: ignore - __tablename__ = 'shareships' +class Session(Base): # type: ignore + __tablename__ = 'sessions' - calc_id = Column(Integer, ForeignKey('calculations.calc_id'), primary_key=True) - user_id = Column(Integer, ForeignKey('users.user_id'), primary_key=True) + token = Column(String, primary_key=True) + user_id = Column(String) class CalcSet(Base): # type: ignore @@ -358,14 +431,6 @@ class Citation(Base): # type: ignore kind = Column(Enum('INTERNAL', 'EXTERNAL', name='citation_kind_enum')) -class MetaDataCitation(Base): # type: ignore - __tablename__ = 'metadata_citations' - - calc_id = Column(Integer, ForeignKey('calculations.calc_id'), primary_key=True) - citation_id = Column(Integer, ForeignKey('citations.citation_id'), primary_key=True) - citation = relationship('Citation') - - class LoginException(Exception): pass diff --git a/nomad/datamodel.py b/nomad/datamodel.py new file mode 100644 index 0000000000000000000000000000000000000000..39d6d40251a2793b3ab920904187be4c6cc2bc48 --- /dev/null +++ b/nomad/datamodel.py @@ -0,0 +1,76 @@ +# Copyright 2018 Markus Scheidgen +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an"AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +This module contains classes that allow to represent and manipulate the core +nomad data entities on a high level of abstraction independent from their representation +in the coe repository db, the elastic index, json-files, or archive data. It is not +about representing every detail, but those parts that are directly involved in +api, processing, migration, mirroring, or other 'infrastructure' operations. +""" + +from typing import Type, TypeVar, Union, Iterable, cast +import datetime + +T = TypeVar('T') + + +class Entity: + @classmethod + def create_from(cls: Type[T], obj) -> T: + raise NotImplementedError + + def to(self, entity_cls: Type[T]) -> T: + if (isinstance(self, entity_cls)): + return cast(T, self) + else: + return cast(T, cast(Type[Entity], entity_cls).create_from(self)) + + +class Calc(Entity): + + @property + def pid(self) -> Union[int, str]: + raise NotImplementedError + + @property + def mainfile(self) -> str: + raise NotImplementedError + + @property + def upload(self) -> 'Upload': + raise NotImplementedError + + +class Upload(Entity): + + @property + def upload_uuid(self) -> str: + return '<not assigned>' + + @property + def upload_hash(self) -> str: + raise NotImplementedError + + @property + def upload_time(self) -> Type[datetime.datetime]: + raise NotImplementedError + + @property + def uploader(self): + raise NotImplementedError + + @property + def calcs(self) -> Iterable[Calc]: + raise NotImplementedError diff --git a/tests/test_api.py b/tests/test_api.py index 9ce043dec894e7869b4eaaef8dd33c2ef3ccbdb0..6f65e9b545027d8975509bab8a260c6befc7aaa8 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -222,9 +222,7 @@ class TestUploads: upload = self.assert_upload(rv.data) empty_upload = upload['calcs']['pagination']['total'] == 0 - assert_coe_upload( - upload['upload_hash'], proc_infra['repository_db'], - empty=empty_upload, meta_data=meta_data) + assert_coe_upload(upload['upload_hash'], empty=empty_upload, meta_data=meta_data) def test_get_command(self, client, test_user_auth, no_warn): rv = client.get('/uploads/command', headers=test_user_auth) diff --git a/tests/test_coe_repo.py b/tests/test_coe_repo.py index 49f825b9c9affa63c76fbf21c5a8d46b0533fa62..dbdf382beafebbe6c749ea7eac0d64466a936f90 100644 --- a/tests/test_coe_repo.py +++ b/tests/test_coe_repo.py @@ -13,11 +13,9 @@ # limitations under the License. import pytest -import json import datetime -from nomad.coe_repo import User, Calc, CalcMetaData, StructRatio, Upload, add_upload, \ - UserMetaData, Citation, MetaDataCitation, Shareship, CoAuthorship, Ownership +from nomad.coe_repo import User, Calc, Upload, add_upload from tests.processing.test_data import processed_upload # pylint: disable=unused-import from tests.processing.test_data import uploaded_id # pylint: disable=unused-import @@ -41,97 +39,36 @@ def test_password_authorize(test_user): assert_user(user, test_user) -def assert_coe_upload(upload_hash, repository_db, empty=False, meta_data={}): - coe_uploads = repository_db.query(Upload).filter_by(upload_name=upload_hash) +def assert_coe_upload(upload_hash, empty=False, meta_data={}): + coe_upload = Upload.from_upload_hash(upload_hash) + if empty: - assert coe_uploads.count() == 0 + assert coe_upload is None else: - assert coe_uploads.count() == 1 - coe_upload = coe_uploads.first() - coe_upload_id = coe_upload.upload_id - one_calc_exist = False - for calc in repository_db.query(Calc).filter_by(origin_id=coe_upload_id): - one_calc_exist = True - assert calc.origin_id == coe_upload_id - assert_coe_calc(calc, repository_db, meta_data=meta_data) + assert len(coe_upload.calcs) > 0 + for calc in coe_upload.calcs: + assert_coe_calc(calc, meta_data=meta_data) if '_upload_time' in meta_data: assert coe_upload.created.isoformat()[:26] == meta_data['_upload_time'] - assert one_calc_exist - - -def assert_coe_calc(calc, repository_db, meta_data={}): - calc_id = calc.calc_id - calc_meta_data = repository_db.query(CalcMetaData).filter_by(calc_id=calc_id).first() - - assert calc_meta_data is not None - assert calc_meta_data.calc is not None - assert calc_meta_data.chemical_formula is not None - filenames = calc_meta_data.filenames.decode('utf-8') - assert len(json.loads(filenames)) == 5 - - # struct ratio - struct_ratio = repository_db.query(StructRatio).filter_by(calc_id=calc_id).first() - assert struct_ratio is not None - assert struct_ratio.chemical_formula == calc_meta_data.chemical_formula - assert struct_ratio.formula_units == 1 - - # pid - if '_pid' in meta_data: - assert calc_id == int(meta_data['_pid']) - - # checksum - if '_checksum' in meta_data: - calc.checksum == meta_data['_checksum'] - # comments - comment = repository_db.query(UserMetaData).filter_by( - label=meta_data.get('comment', 'not existing comment'), - calc_id=calc_id).first() - if 'comment' in meta_data: - assert comment is not None - else: - assert comment is None - - # references - if 'references' in meta_data: - for reference in meta_data['references']: - citation = repository_db.query(Citation).filter_by( - value=reference, kind='EXTERNAL').first() - assert citation is not None - assert repository_db.query(MetaDataCitation).filter_by( - citation_id=citation.citation_id, calc_id=calc_id).first() is not None - else: - repository_db.query(MetaDataCitation).filter_by(calc_id=calc_id).first() is None - - # coauthors - if 'coauthors' in meta_data: - for coauthor in meta_data['coauthors']: - assert repository_db.query(CoAuthorship).filter_by( - user_id=coauthor, calc_id=calc_id).first() is not None - else: - assert repository_db.query(CoAuthorship).filter_by(calc_id=calc_id).first() is None - - # coauthors - if 'shared_with' in meta_data: - for coauthor in meta_data['shared_with']: - assert repository_db.query(Shareship).filter_by( - user_id=coauthor, calc_id=calc_id).first() is not None - else: - assert repository_db.query(Shareship).filter_by(calc_id=calc_id).first() is None +def assert_coe_calc(calc: Calc, meta_data={}): + assert int(calc.pid) == int(meta_data.get('_pid', calc.pid)) + assert calc.calc_hash == meta_data.get('_checksum', calc.calc_hash) - # ownership - owners = repository_db.query(Ownership).filter_by(calc_id=calc_id) - assert owners.count() == 1 - if '_uploader' in meta_data: - assert owners.first().user_id == meta_data['_uploader'] + # calc data + assert len(calc.filenames) == 5 + assert calc.chemical_formula is not None - # embargo/restriction/permission - user_meta_data = repository_db.query(UserMetaData).filter_by( - calc_id=calc_meta_data.calc_id).first() - assert user_meta_data is not None - assert user_meta_data.permission == (1 if meta_data.get('with_embargo', False) else 0) + # user meta data + assert calc.comment == meta_data.get('comment', None) + assert sorted(calc.references) == sorted(meta_data.get('references', [])) + assert calc.uploader is not None + assert calc.uploader.user_id == meta_data.get('_uploader', calc.uploader.user_id) + assert sorted(user.user_id for user in calc.coauthors) == sorted(meta_data.get('coauthors', [])) + assert sorted(user.user_id for user in calc.shared_with) == sorted(meta_data.get('shared_with', [])) + assert calc.with_embargo == meta_data.get('with_embargo', False) @pytest.mark.timeout(10) @@ -140,11 +77,11 @@ def test_add_upload(clean_repository_db, processed_upload): processed_upload.upload_hash = str(1) add_upload(processed_upload) - assert_coe_upload(processed_upload.upload_hash, clean_repository_db, empty=empty) + assert_coe_upload(processed_upload.upload_hash, empty=empty) processed_upload.upload_hash = str(2) add_upload(processed_upload) - assert_coe_upload(processed_upload.upload_hash, clean_repository_db, empty=empty) + assert_coe_upload(processed_upload.upload_hash, empty=empty) @pytest.mark.timeout(10) @@ -157,10 +94,10 @@ def test_add_upload_metadata(clean_repository_db, processed_upload, other_test_u 'references': ['http://external.ref/one', 'http://external.ref/two'], '_uploader': other_test_user.user_id, 'coauthors': [test_user.user_id], - '_checksum': 1, + '_checksum': '1', '_upload_time': datetime.datetime.now().isoformat(), '_pid': 256 } add_upload(processed_upload, meta_data=meta_data) - assert_coe_upload(processed_upload.upload_hash, clean_repository_db, empty=empty, meta_data=meta_data) + assert_coe_upload(processed_upload.upload_hash, empty=empty, meta_data=meta_data)