Commit 971beb83 authored by Markus Scheidgen's avatar Markus Scheidgen
Browse files

Refactored coe_repo with new datamodel.

parent 82220c0b
......@@ -35,14 +35,17 @@ This module also provides functionality to add parsed calculation data to the db
.. autofunction:: add_upload
"""
from typing import List, Type
import itertools
import json
import datetime
from passlib.hash import bcrypt
from sqlalchemy import Column, Integer, String, Boolean, DateTime, ForeignKey, Enum
from sqlalchemy import Column, Integer, String, Boolean, DateTime, ForeignKey, Enum, Table
from sqlalchemy.orm import relationship
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.dialects.postgresql import BYTEA
from nomad import utils, infrastructure
from nomad import utils, infrastructure, datamodel
from nomad.repo import RepoCalc
......@@ -177,16 +180,13 @@ def add_calculation(upload, coe_upload, calc: RepoCalc, calc_meta_data: dict) ->
# user relations
owner_user_id = calc_meta_data.get('_uploader', int(upload.user_id))
ownership = Ownership(calc_id=coe_calc.calc_id, user_id=owner_user_id)
repo_db.add(ownership)
coe_calc.owners.append(repo_db.query(User).get(owner_user_id))
for coauthor_id in calc_meta_data.get('coauthors', []):
coauthorship = CoAuthorship(calc_id=coe_calc.calc_id, user_id=int(coauthor_id))
repo_db.add(coauthorship)
coe_calc.coauthors.append(repo_db.query(User).get(coauthor_id))
for shared_with_id in calc_meta_data.get('shared_with', []):
shareship = Shareship(calc_id=coe_calc.calc_id, user_id=int(shared_with_id))
repo_db.add(shareship)
coe_calc.shared_with.append(repo_db.query(User).get(shared_with_id))
# datasets
for dataset_id in calc_meta_data.get('datasets', []):
......@@ -203,13 +203,32 @@ def add_calculation(upload, coe_upload, calc: RepoCalc, calc_meta_data: dict) ->
citation = Citation(value=reference, kind='EXTERNAL')
repo_db.add(citation)
metadata_citation = MetaDataCitation(
calc_id=coe_calc.calc_id,
citation=citation)
repo_db.add(metadata_citation)
coe_calc.citations.append(citation)
class Calc(Base): # type: ignore
calc_citation_association = Table(
'metadata_citations', Base.metadata,
Column('calc_id', Integer, ForeignKey('calculations.calc_id')),
Column('citation_id', Integer, ForeignKey('citations.citation_id')))
ownership = Table(
'ownerships', Base.metadata,
Column('calc_id', Integer, ForeignKey('calculations.calc_id')),
Column('user_id', Integer, ForeignKey('users.user_id')))
co_authorship = Table(
'coauthorships', Base.metadata,
Column('calc_id', Integer, ForeignKey('calculations.calc_id')),
Column('user_id', Integer, ForeignKey('users.user_id')))
shareship = Table(
'shareships', Base.metadata,
Column('calc_id', Integer, ForeignKey('calculations.calc_id')),
Column('user_id', Integer, ForeignKey('users.user_id')))
class Calc(Base, datamodel.Calc): # type: ignore
__tablename__ = 'calculations'
calc_id = Column(Integer, primary_key=True, autoincrement=True)
......@@ -217,6 +236,56 @@ class Calc(Base): # type: ignore
upload = relationship('Upload')
checksum = Column(String)
calc_meta_data = relationship('CalcMetaData', uselist=False)
user_meta_data = relationship('UserMetaData', uselist=False)
citations = relationship('Citation', secondary=calc_citation_association)
owners = relationship('User', secondary=ownership)
coauthors = relationship('User', secondary=co_authorship)
shared_with = relationship('User', secondary=shareship)
@classmethod
def create_from(cls, obj):
repo_db = infrastructure.repository_db
return repo_db.query(Calc).filter_by(calc_id=int(obj.pid)).first()
@property
def mainfile(self) -> str:
return self.calc_meta_data.location
@property
def pid(self):
return self.calc_id
@property
def comment(self) -> str:
return self.user_meta_data.label
@property
def calc_hash(self) -> str:
return self.checksum
@property
def references(self) -> List[str]:
return list(citation.value for citation in self.citations if citation.kind == 'EXTERNAL')
@property
def uploader(self) -> 'User':
assert len(self.owners) == 1, 'A calculation can only have one owner.'
return self.owners[0]
@property
def with_embargo(self) -> bool:
return self.user_meta_data.permission == 1
@property
def chemical_formula(self) -> str:
return self.calc_meta_data.chemical_formula
@property
def filenames(self) -> List[str]:
filenames = self.calc_meta_data.filenames.decode('utf-8')
return json.loads(filenames)
def set_value(self, topic_cid: int, value: str) -> None:
if value is None:
return
......@@ -304,43 +373,47 @@ class Topics(Base): # type: ignore
topic = Column(String)
class Upload(Base): # type: ignore
class Upload(Base, datamodel.Upload): # type: ignore
__tablename__ = 'uploads'
upload_id = Column(Integer, primary_key=True, autoincrement=True)
upload_name = Column(String)
user_id = Column(Integer, ForeignKey('users.user_id'))
user = relationship('User')
is_processed = Column(Boolean)
created = Column(DateTime)
user = relationship('User')
calcs = relationship('Calc')
class Session(Base): # type: ignore
__tablename__ = 'sessions'
token = Column(String, primary_key=True)
user_id = Column(String)
class Ownership(Base): # type: ignore
__tablename__ = 'ownerships'
@classmethod
def create_from(cls, obj):
return Upload.from_upload_hash(obj.upload_hash)
calc_id = Column(Integer, ForeignKey('calculations.calc_id'), primary_key=True)
user_id = Column(Integer, ForeignKey('users.user_id'), primary_key=True)
@staticmethod
def from_upload_hash(upload_hash) -> 'Upload':
repo_db = infrastructure.repository_db
uploads = repo_db.query(Upload).filter_by(upload_name=upload_hash)
assert uploads.count() <= 1, 'Upload hash/name must be unique'
return uploads.first()
@property
def upload_hash(self):
return self.upload_name
class CoAuthorship(Base): # type: ignore
__tablename__ = 'coauthorships'
@property
def uploader(self) -> 'User':
return self.user
calc_id = Column(Integer, ForeignKey('calculations.calc_id'), primary_key=True)
user_id = Column(Integer, ForeignKey('users.user_id'), primary_key=True)
@property
def upload_time(self) -> Type[datetime.datetime]:
return self.created
class Shareship(Base): # type: ignore
__tablename__ = 'shareships'
class Session(Base): # type: ignore
__tablename__ = 'sessions'
calc_id = Column(Integer, ForeignKey('calculations.calc_id'), primary_key=True)
user_id = Column(Integer, ForeignKey('users.user_id'), primary_key=True)
token = Column(String, primary_key=True)
user_id = Column(String)
class CalcSet(Base): # type: ignore
......@@ -358,14 +431,6 @@ class Citation(Base): # type: ignore
kind = Column(Enum('INTERNAL', 'EXTERNAL', name='citation_kind_enum'))
class MetaDataCitation(Base): # type: ignore
__tablename__ = 'metadata_citations'
calc_id = Column(Integer, ForeignKey('calculations.calc_id'), primary_key=True)
citation_id = Column(Integer, ForeignKey('citations.citation_id'), primary_key=True)
citation = relationship('Citation')
class LoginException(Exception):
pass
......
# Copyright 2018 Markus Scheidgen
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an"AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
This module contains classes that allow to represent and manipulate the core
nomad data entities on a high level of abstraction independent from their representation
in the coe repository db, the elastic index, json-files, or archive data. It is not
about representing every detail, but those parts that are directly involved in
api, processing, migration, mirroring, or other 'infrastructure' operations.
"""
from typing import Type, TypeVar, Union, Iterable, cast
import datetime
T = TypeVar('T')
class Entity:
@classmethod
def create_from(cls: Type[T], obj) -> T:
raise NotImplementedError
def to(self, entity_cls: Type[T]) -> T:
if (isinstance(self, entity_cls)):
return cast(T, self)
else:
return cast(T, cast(Type[Entity], entity_cls).create_from(self))
class Calc(Entity):
@property
def pid(self) -> Union[int, str]:
raise NotImplementedError
@property
def mainfile(self) -> str:
raise NotImplementedError
@property
def upload(self) -> 'Upload':
raise NotImplementedError
class Upload(Entity):
@property
def upload_uuid(self) -> str:
return '<not assigned>'
@property
def upload_hash(self) -> str:
raise NotImplementedError
@property
def upload_time(self) -> Type[datetime.datetime]:
raise NotImplementedError
@property
def uploader(self):
raise NotImplementedError
@property
def calcs(self) -> Iterable[Calc]:
raise NotImplementedError
......@@ -222,9 +222,7 @@ class TestUploads:
upload = self.assert_upload(rv.data)
empty_upload = upload['calcs']['pagination']['total'] == 0
assert_coe_upload(
upload['upload_hash'], proc_infra['repository_db'],
empty=empty_upload, meta_data=meta_data)
assert_coe_upload(upload['upload_hash'], empty=empty_upload, meta_data=meta_data)
def test_get_command(self, client, test_user_auth, no_warn):
rv = client.get('/uploads/command', headers=test_user_auth)
......
......@@ -13,11 +13,9 @@
# limitations under the License.
import pytest
import json
import datetime
from nomad.coe_repo import User, Calc, CalcMetaData, StructRatio, Upload, add_upload, \
UserMetaData, Citation, MetaDataCitation, Shareship, CoAuthorship, Ownership
from nomad.coe_repo import User, Calc, Upload, add_upload
from tests.processing.test_data import processed_upload # pylint: disable=unused-import
from tests.processing.test_data import uploaded_id # pylint: disable=unused-import
......@@ -41,97 +39,36 @@ def test_password_authorize(test_user):
assert_user(user, test_user)
def assert_coe_upload(upload_hash, repository_db, empty=False, meta_data={}):
coe_uploads = repository_db.query(Upload).filter_by(upload_name=upload_hash)
def assert_coe_upload(upload_hash, empty=False, meta_data={}):
coe_upload = Upload.from_upload_hash(upload_hash)
if empty:
assert coe_uploads.count() == 0
assert coe_upload is None
else:
assert coe_uploads.count() == 1
coe_upload = coe_uploads.first()
coe_upload_id = coe_upload.upload_id
one_calc_exist = False
for calc in repository_db.query(Calc).filter_by(origin_id=coe_upload_id):
one_calc_exist = True
assert calc.origin_id == coe_upload_id
assert_coe_calc(calc, repository_db, meta_data=meta_data)
assert len(coe_upload.calcs) > 0
for calc in coe_upload.calcs:
assert_coe_calc(calc, meta_data=meta_data)
if '_upload_time' in meta_data:
assert coe_upload.created.isoformat()[:26] == meta_data['_upload_time']
assert one_calc_exist
def assert_coe_calc(calc, repository_db, meta_data={}):
calc_id = calc.calc_id
calc_meta_data = repository_db.query(CalcMetaData).filter_by(calc_id=calc_id).first()
assert calc_meta_data is not None
assert calc_meta_data.calc is not None
assert calc_meta_data.chemical_formula is not None
filenames = calc_meta_data.filenames.decode('utf-8')
assert len(json.loads(filenames)) == 5
# struct ratio
struct_ratio = repository_db.query(StructRatio).filter_by(calc_id=calc_id).first()
assert struct_ratio is not None
assert struct_ratio.chemical_formula == calc_meta_data.chemical_formula
assert struct_ratio.formula_units == 1
# pid
if '_pid' in meta_data:
assert calc_id == int(meta_data['_pid'])
# checksum
if '_checksum' in meta_data:
calc.checksum == meta_data['_checksum']
# comments
comment = repository_db.query(UserMetaData).filter_by(
label=meta_data.get('comment', 'not existing comment'),
calc_id=calc_id).first()
if 'comment' in meta_data:
assert comment is not None
else:
assert comment is None
# references
if 'references' in meta_data:
for reference in meta_data['references']:
citation = repository_db.query(Citation).filter_by(
value=reference, kind='EXTERNAL').first()
assert citation is not None
assert repository_db.query(MetaDataCitation).filter_by(
citation_id=citation.citation_id, calc_id=calc_id).first() is not None
else:
repository_db.query(MetaDataCitation).filter_by(calc_id=calc_id).first() is None
# coauthors
if 'coauthors' in meta_data:
for coauthor in meta_data['coauthors']:
assert repository_db.query(CoAuthorship).filter_by(
user_id=coauthor, calc_id=calc_id).first() is not None
else:
assert repository_db.query(CoAuthorship).filter_by(calc_id=calc_id).first() is None
# coauthors
if 'shared_with' in meta_data:
for coauthor in meta_data['shared_with']:
assert repository_db.query(Shareship).filter_by(
user_id=coauthor, calc_id=calc_id).first() is not None
else:
assert repository_db.query(Shareship).filter_by(calc_id=calc_id).first() is None
def assert_coe_calc(calc: Calc, meta_data={}):
assert int(calc.pid) == int(meta_data.get('_pid', calc.pid))
assert calc.calc_hash == meta_data.get('_checksum', calc.calc_hash)
# ownership
owners = repository_db.query(Ownership).filter_by(calc_id=calc_id)
assert owners.count() == 1
if '_uploader' in meta_data:
assert owners.first().user_id == meta_data['_uploader']
# calc data
assert len(calc.filenames) == 5
assert calc.chemical_formula is not None
# embargo/restriction/permission
user_meta_data = repository_db.query(UserMetaData).filter_by(
calc_id=calc_meta_data.calc_id).first()
assert user_meta_data is not None
assert user_meta_data.permission == (1 if meta_data.get('with_embargo', False) else 0)
# user meta data
assert calc.comment == meta_data.get('comment', None)
assert sorted(calc.references) == sorted(meta_data.get('references', []))
assert calc.uploader is not None
assert calc.uploader.user_id == meta_data.get('_uploader', calc.uploader.user_id)
assert sorted(user.user_id for user in calc.coauthors) == sorted(meta_data.get('coauthors', []))
assert sorted(user.user_id for user in calc.shared_with) == sorted(meta_data.get('shared_with', []))
assert calc.with_embargo == meta_data.get('with_embargo', False)
@pytest.mark.timeout(10)
......@@ -140,11 +77,11 @@ def test_add_upload(clean_repository_db, processed_upload):
processed_upload.upload_hash = str(1)
add_upload(processed_upload)
assert_coe_upload(processed_upload.upload_hash, clean_repository_db, empty=empty)
assert_coe_upload(processed_upload.upload_hash, empty=empty)
processed_upload.upload_hash = str(2)
add_upload(processed_upload)
assert_coe_upload(processed_upload.upload_hash, clean_repository_db, empty=empty)
assert_coe_upload(processed_upload.upload_hash, empty=empty)
@pytest.mark.timeout(10)
......@@ -157,10 +94,10 @@ def test_add_upload_metadata(clean_repository_db, processed_upload, other_test_u
'references': ['http://external.ref/one', 'http://external.ref/two'],
'_uploader': other_test_user.user_id,
'coauthors': [test_user.user_id],
'_checksum': 1,
'_checksum': '1',
'_upload_time': datetime.datetime.now().isoformat(),
'_pid': 256
}
add_upload(processed_upload, meta_data=meta_data)
assert_coe_upload(processed_upload.upload_hash, clean_repository_db, empty=empty, meta_data=meta_data)
assert_coe_upload(processed_upload.upload_hash, empty=empty, meta_data=meta_data)
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment