Skip to content
Snippets Groups Projects
Commit 971beb83 authored by Markus Scheidgen's avatar Markus Scheidgen
Browse files

Refactored coe_repo with new datamodel.

parent 82220c0b
No related branches found
No related tags found
1 merge request!23Merge new upload file structure
...@@ -35,14 +35,17 @@ This module also provides functionality to add parsed calculation data to the db ...@@ -35,14 +35,17 @@ This module also provides functionality to add parsed calculation data to the db
.. autofunction:: add_upload .. autofunction:: add_upload
""" """
from typing import List, Type
import itertools import itertools
import json
import datetime
from passlib.hash import bcrypt from passlib.hash import bcrypt
from sqlalchemy import Column, Integer, String, Boolean, DateTime, ForeignKey, Enum from sqlalchemy import Column, Integer, String, Boolean, DateTime, ForeignKey, Enum, Table
from sqlalchemy.orm import relationship from sqlalchemy.orm import relationship
from sqlalchemy.ext.declarative import declarative_base from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.dialects.postgresql import BYTEA from sqlalchemy.dialects.postgresql import BYTEA
from nomad import utils, infrastructure from nomad import utils, infrastructure, datamodel
from nomad.repo import RepoCalc from nomad.repo import RepoCalc
...@@ -177,16 +180,13 @@ def add_calculation(upload, coe_upload, calc: RepoCalc, calc_meta_data: dict) -> ...@@ -177,16 +180,13 @@ def add_calculation(upload, coe_upload, calc: RepoCalc, calc_meta_data: dict) ->
# user relations # user relations
owner_user_id = calc_meta_data.get('_uploader', int(upload.user_id)) owner_user_id = calc_meta_data.get('_uploader', int(upload.user_id))
ownership = Ownership(calc_id=coe_calc.calc_id, user_id=owner_user_id) coe_calc.owners.append(repo_db.query(User).get(owner_user_id))
repo_db.add(ownership)
for coauthor_id in calc_meta_data.get('coauthors', []): for coauthor_id in calc_meta_data.get('coauthors', []):
coauthorship = CoAuthorship(calc_id=coe_calc.calc_id, user_id=int(coauthor_id)) coe_calc.coauthors.append(repo_db.query(User).get(coauthor_id))
repo_db.add(coauthorship)
for shared_with_id in calc_meta_data.get('shared_with', []): for shared_with_id in calc_meta_data.get('shared_with', []):
shareship = Shareship(calc_id=coe_calc.calc_id, user_id=int(shared_with_id)) coe_calc.shared_with.append(repo_db.query(User).get(shared_with_id))
repo_db.add(shareship)
# datasets # datasets
for dataset_id in calc_meta_data.get('datasets', []): for dataset_id in calc_meta_data.get('datasets', []):
...@@ -203,13 +203,32 @@ def add_calculation(upload, coe_upload, calc: RepoCalc, calc_meta_data: dict) -> ...@@ -203,13 +203,32 @@ def add_calculation(upload, coe_upload, calc: RepoCalc, calc_meta_data: dict) ->
citation = Citation(value=reference, kind='EXTERNAL') citation = Citation(value=reference, kind='EXTERNAL')
repo_db.add(citation) repo_db.add(citation)
metadata_citation = MetaDataCitation( coe_calc.citations.append(citation)
calc_id=coe_calc.calc_id,
citation=citation)
repo_db.add(metadata_citation)
class Calc(Base): # type: ignore calc_citation_association = Table(
'metadata_citations', Base.metadata,
Column('calc_id', Integer, ForeignKey('calculations.calc_id')),
Column('citation_id', Integer, ForeignKey('citations.citation_id')))
ownership = Table(
'ownerships', Base.metadata,
Column('calc_id', Integer, ForeignKey('calculations.calc_id')),
Column('user_id', Integer, ForeignKey('users.user_id')))
co_authorship = Table(
'coauthorships', Base.metadata,
Column('calc_id', Integer, ForeignKey('calculations.calc_id')),
Column('user_id', Integer, ForeignKey('users.user_id')))
shareship = Table(
'shareships', Base.metadata,
Column('calc_id', Integer, ForeignKey('calculations.calc_id')),
Column('user_id', Integer, ForeignKey('users.user_id')))
class Calc(Base, datamodel.Calc): # type: ignore
__tablename__ = 'calculations' __tablename__ = 'calculations'
calc_id = Column(Integer, primary_key=True, autoincrement=True) calc_id = Column(Integer, primary_key=True, autoincrement=True)
...@@ -217,6 +236,56 @@ class Calc(Base): # type: ignore ...@@ -217,6 +236,56 @@ class Calc(Base): # type: ignore
upload = relationship('Upload') upload = relationship('Upload')
checksum = Column(String) checksum = Column(String)
calc_meta_data = relationship('CalcMetaData', uselist=False)
user_meta_data = relationship('UserMetaData', uselist=False)
citations = relationship('Citation', secondary=calc_citation_association)
owners = relationship('User', secondary=ownership)
coauthors = relationship('User', secondary=co_authorship)
shared_with = relationship('User', secondary=shareship)
@classmethod
def create_from(cls, obj):
repo_db = infrastructure.repository_db
return repo_db.query(Calc).filter_by(calc_id=int(obj.pid)).first()
@property
def mainfile(self) -> str:
return self.calc_meta_data.location
@property
def pid(self):
return self.calc_id
@property
def comment(self) -> str:
return self.user_meta_data.label
@property
def calc_hash(self) -> str:
return self.checksum
@property
def references(self) -> List[str]:
return list(citation.value for citation in self.citations if citation.kind == 'EXTERNAL')
@property
def uploader(self) -> 'User':
assert len(self.owners) == 1, 'A calculation can only have one owner.'
return self.owners[0]
@property
def with_embargo(self) -> bool:
return self.user_meta_data.permission == 1
@property
def chemical_formula(self) -> str:
return self.calc_meta_data.chemical_formula
@property
def filenames(self) -> List[str]:
filenames = self.calc_meta_data.filenames.decode('utf-8')
return json.loads(filenames)
def set_value(self, topic_cid: int, value: str) -> None: def set_value(self, topic_cid: int, value: str) -> None:
if value is None: if value is None:
return return
...@@ -304,43 +373,47 @@ class Topics(Base): # type: ignore ...@@ -304,43 +373,47 @@ class Topics(Base): # type: ignore
topic = Column(String) topic = Column(String)
class Upload(Base): # type: ignore class Upload(Base, datamodel.Upload): # type: ignore
__tablename__ = 'uploads' __tablename__ = 'uploads'
upload_id = Column(Integer, primary_key=True, autoincrement=True) upload_id = Column(Integer, primary_key=True, autoincrement=True)
upload_name = Column(String) upload_name = Column(String)
user_id = Column(Integer, ForeignKey('users.user_id')) user_id = Column(Integer, ForeignKey('users.user_id'))
user = relationship('User')
is_processed = Column(Boolean) is_processed = Column(Boolean)
created = Column(DateTime) created = Column(DateTime)
user = relationship('User')
calcs = relationship('Calc')
class Session(Base): # type: ignore @classmethod
__tablename__ = 'sessions' def create_from(cls, obj):
return Upload.from_upload_hash(obj.upload_hash)
token = Column(String, primary_key=True)
user_id = Column(String)
class Ownership(Base): # type: ignore
__tablename__ = 'ownerships'
calc_id = Column(Integer, ForeignKey('calculations.calc_id'), primary_key=True) @staticmethod
user_id = Column(Integer, ForeignKey('users.user_id'), primary_key=True) def from_upload_hash(upload_hash) -> 'Upload':
repo_db = infrastructure.repository_db
uploads = repo_db.query(Upload).filter_by(upload_name=upload_hash)
assert uploads.count() <= 1, 'Upload hash/name must be unique'
return uploads.first()
@property
def upload_hash(self):
return self.upload_name
class CoAuthorship(Base): # type: ignore @property
__tablename__ = 'coauthorships' def uploader(self) -> 'User':
return self.user
calc_id = Column(Integer, ForeignKey('calculations.calc_id'), primary_key=True) @property
user_id = Column(Integer, ForeignKey('users.user_id'), primary_key=True) def upload_time(self) -> Type[datetime.datetime]:
return self.created
class Shareship(Base): # type: ignore class Session(Base): # type: ignore
__tablename__ = 'shareships' __tablename__ = 'sessions'
calc_id = Column(Integer, ForeignKey('calculations.calc_id'), primary_key=True) token = Column(String, primary_key=True)
user_id = Column(Integer, ForeignKey('users.user_id'), primary_key=True) user_id = Column(String)
class CalcSet(Base): # type: ignore class CalcSet(Base): # type: ignore
...@@ -358,14 +431,6 @@ class Citation(Base): # type: ignore ...@@ -358,14 +431,6 @@ class Citation(Base): # type: ignore
kind = Column(Enum('INTERNAL', 'EXTERNAL', name='citation_kind_enum')) kind = Column(Enum('INTERNAL', 'EXTERNAL', name='citation_kind_enum'))
class MetaDataCitation(Base): # type: ignore
__tablename__ = 'metadata_citations'
calc_id = Column(Integer, ForeignKey('calculations.calc_id'), primary_key=True)
citation_id = Column(Integer, ForeignKey('citations.citation_id'), primary_key=True)
citation = relationship('Citation')
class LoginException(Exception): class LoginException(Exception):
pass pass
......
# Copyright 2018 Markus Scheidgen
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an"AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
This module contains classes that allow to represent and manipulate the core
nomad data entities on a high level of abstraction independent from their representation
in the coe repository db, the elastic index, json-files, or archive data. It is not
about representing every detail, but those parts that are directly involved in
api, processing, migration, mirroring, or other 'infrastructure' operations.
"""
from typing import Type, TypeVar, Union, Iterable, cast
import datetime
T = TypeVar('T')
class Entity:
@classmethod
def create_from(cls: Type[T], obj) -> T:
raise NotImplementedError
def to(self, entity_cls: Type[T]) -> T:
if (isinstance(self, entity_cls)):
return cast(T, self)
else:
return cast(T, cast(Type[Entity], entity_cls).create_from(self))
class Calc(Entity):
@property
def pid(self) -> Union[int, str]:
raise NotImplementedError
@property
def mainfile(self) -> str:
raise NotImplementedError
@property
def upload(self) -> 'Upload':
raise NotImplementedError
class Upload(Entity):
@property
def upload_uuid(self) -> str:
return '<not assigned>'
@property
def upload_hash(self) -> str:
raise NotImplementedError
@property
def upload_time(self) -> Type[datetime.datetime]:
raise NotImplementedError
@property
def uploader(self):
raise NotImplementedError
@property
def calcs(self) -> Iterable[Calc]:
raise NotImplementedError
...@@ -222,9 +222,7 @@ class TestUploads: ...@@ -222,9 +222,7 @@ class TestUploads:
upload = self.assert_upload(rv.data) upload = self.assert_upload(rv.data)
empty_upload = upload['calcs']['pagination']['total'] == 0 empty_upload = upload['calcs']['pagination']['total'] == 0
assert_coe_upload( assert_coe_upload(upload['upload_hash'], empty=empty_upload, meta_data=meta_data)
upload['upload_hash'], proc_infra['repository_db'],
empty=empty_upload, meta_data=meta_data)
def test_get_command(self, client, test_user_auth, no_warn): def test_get_command(self, client, test_user_auth, no_warn):
rv = client.get('/uploads/command', headers=test_user_auth) rv = client.get('/uploads/command', headers=test_user_auth)
......
...@@ -13,11 +13,9 @@ ...@@ -13,11 +13,9 @@
# limitations under the License. # limitations under the License.
import pytest import pytest
import json
import datetime import datetime
from nomad.coe_repo import User, Calc, CalcMetaData, StructRatio, Upload, add_upload, \ from nomad.coe_repo import User, Calc, Upload, add_upload
UserMetaData, Citation, MetaDataCitation, Shareship, CoAuthorship, Ownership
from tests.processing.test_data import processed_upload # pylint: disable=unused-import from tests.processing.test_data import processed_upload # pylint: disable=unused-import
from tests.processing.test_data import uploaded_id # pylint: disable=unused-import from tests.processing.test_data import uploaded_id # pylint: disable=unused-import
...@@ -41,97 +39,36 @@ def test_password_authorize(test_user): ...@@ -41,97 +39,36 @@ def test_password_authorize(test_user):
assert_user(user, test_user) assert_user(user, test_user)
def assert_coe_upload(upload_hash, repository_db, empty=False, meta_data={}): def assert_coe_upload(upload_hash, empty=False, meta_data={}):
coe_uploads = repository_db.query(Upload).filter_by(upload_name=upload_hash) coe_upload = Upload.from_upload_hash(upload_hash)
if empty: if empty:
assert coe_uploads.count() == 0 assert coe_upload is None
else: else:
assert coe_uploads.count() == 1 assert len(coe_upload.calcs) > 0
coe_upload = coe_uploads.first() for calc in coe_upload.calcs:
coe_upload_id = coe_upload.upload_id assert_coe_calc(calc, meta_data=meta_data)
one_calc_exist = False
for calc in repository_db.query(Calc).filter_by(origin_id=coe_upload_id):
one_calc_exist = True
assert calc.origin_id == coe_upload_id
assert_coe_calc(calc, repository_db, meta_data=meta_data)
if '_upload_time' in meta_data: if '_upload_time' in meta_data:
assert coe_upload.created.isoformat()[:26] == meta_data['_upload_time'] assert coe_upload.created.isoformat()[:26] == meta_data['_upload_time']
assert one_calc_exist
def assert_coe_calc(calc, repository_db, meta_data={}):
calc_id = calc.calc_id
calc_meta_data = repository_db.query(CalcMetaData).filter_by(calc_id=calc_id).first()
assert calc_meta_data is not None
assert calc_meta_data.calc is not None
assert calc_meta_data.chemical_formula is not None
filenames = calc_meta_data.filenames.decode('utf-8')
assert len(json.loads(filenames)) == 5
# struct ratio
struct_ratio = repository_db.query(StructRatio).filter_by(calc_id=calc_id).first()
assert struct_ratio is not None
assert struct_ratio.chemical_formula == calc_meta_data.chemical_formula
assert struct_ratio.formula_units == 1
# pid
if '_pid' in meta_data:
assert calc_id == int(meta_data['_pid'])
# checksum
if '_checksum' in meta_data:
calc.checksum == meta_data['_checksum']
# comments def assert_coe_calc(calc: Calc, meta_data={}):
comment = repository_db.query(UserMetaData).filter_by( assert int(calc.pid) == int(meta_data.get('_pid', calc.pid))
label=meta_data.get('comment', 'not existing comment'), assert calc.calc_hash == meta_data.get('_checksum', calc.calc_hash)
calc_id=calc_id).first()
if 'comment' in meta_data:
assert comment is not None
else:
assert comment is None
# references
if 'references' in meta_data:
for reference in meta_data['references']:
citation = repository_db.query(Citation).filter_by(
value=reference, kind='EXTERNAL').first()
assert citation is not None
assert repository_db.query(MetaDataCitation).filter_by(
citation_id=citation.citation_id, calc_id=calc_id).first() is not None
else:
repository_db.query(MetaDataCitation).filter_by(calc_id=calc_id).first() is None
# coauthors
if 'coauthors' in meta_data:
for coauthor in meta_data['coauthors']:
assert repository_db.query(CoAuthorship).filter_by(
user_id=coauthor, calc_id=calc_id).first() is not None
else:
assert repository_db.query(CoAuthorship).filter_by(calc_id=calc_id).first() is None
# coauthors
if 'shared_with' in meta_data:
for coauthor in meta_data['shared_with']:
assert repository_db.query(Shareship).filter_by(
user_id=coauthor, calc_id=calc_id).first() is not None
else:
assert repository_db.query(Shareship).filter_by(calc_id=calc_id).first() is None
# ownership # calc data
owners = repository_db.query(Ownership).filter_by(calc_id=calc_id) assert len(calc.filenames) == 5
assert owners.count() == 1 assert calc.chemical_formula is not None
if '_uploader' in meta_data:
assert owners.first().user_id == meta_data['_uploader']
# embargo/restriction/permission # user meta data
user_meta_data = repository_db.query(UserMetaData).filter_by( assert calc.comment == meta_data.get('comment', None)
calc_id=calc_meta_data.calc_id).first() assert sorted(calc.references) == sorted(meta_data.get('references', []))
assert user_meta_data is not None assert calc.uploader is not None
assert user_meta_data.permission == (1 if meta_data.get('with_embargo', False) else 0) assert calc.uploader.user_id == meta_data.get('_uploader', calc.uploader.user_id)
assert sorted(user.user_id for user in calc.coauthors) == sorted(meta_data.get('coauthors', []))
assert sorted(user.user_id for user in calc.shared_with) == sorted(meta_data.get('shared_with', []))
assert calc.with_embargo == meta_data.get('with_embargo', False)
@pytest.mark.timeout(10) @pytest.mark.timeout(10)
...@@ -140,11 +77,11 @@ def test_add_upload(clean_repository_db, processed_upload): ...@@ -140,11 +77,11 @@ def test_add_upload(clean_repository_db, processed_upload):
processed_upload.upload_hash = str(1) processed_upload.upload_hash = str(1)
add_upload(processed_upload) add_upload(processed_upload)
assert_coe_upload(processed_upload.upload_hash, clean_repository_db, empty=empty) assert_coe_upload(processed_upload.upload_hash, empty=empty)
processed_upload.upload_hash = str(2) processed_upload.upload_hash = str(2)
add_upload(processed_upload) add_upload(processed_upload)
assert_coe_upload(processed_upload.upload_hash, clean_repository_db, empty=empty) assert_coe_upload(processed_upload.upload_hash, empty=empty)
@pytest.mark.timeout(10) @pytest.mark.timeout(10)
...@@ -157,10 +94,10 @@ def test_add_upload_metadata(clean_repository_db, processed_upload, other_test_u ...@@ -157,10 +94,10 @@ def test_add_upload_metadata(clean_repository_db, processed_upload, other_test_u
'references': ['http://external.ref/one', 'http://external.ref/two'], 'references': ['http://external.ref/one', 'http://external.ref/two'],
'_uploader': other_test_user.user_id, '_uploader': other_test_user.user_id,
'coauthors': [test_user.user_id], 'coauthors': [test_user.user_id],
'_checksum': 1, '_checksum': '1',
'_upload_time': datetime.datetime.now().isoformat(), '_upload_time': datetime.datetime.now().isoformat(),
'_pid': 256 '_pid': 256
} }
add_upload(processed_upload, meta_data=meta_data) add_upload(processed_upload, meta_data=meta_data)
assert_coe_upload(processed_upload.upload_hash, clean_repository_db, empty=empty, meta_data=meta_data) assert_coe_upload(processed_upload.upload_hash, empty=empty, meta_data=meta_data)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment