Commit 369779fa authored by Markus Scheidgen's avatar Markus Scheidgen
Browse files

Extended the datamodel to also cover the repo.py and processing module.

parent 971beb83
......@@ -46,7 +46,7 @@ from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.dialects.postgresql import BYTEA
from nomad import utils, infrastructure, datamodel
from nomad.repo import RepoCalc
from nomad.repo import RepoUpload, RepoCalc
Base = declarative_base()
......@@ -63,7 +63,7 @@ class UploadMetaData:
return self._calc_data.get(mainfile, self._upload_data)
def add_upload(upload, meta_data: dict = {}) -> int:
def add_upload(upload: datamodel.Upload, meta_data: dict = {}) -> int:
"""
Add the processed upload to the NOMAD-coe repository db. It creates an
uploads-entry, respective calculation and property entries. Everything in one
......@@ -85,7 +85,7 @@ def add_upload(upload, meta_data: dict = {}) -> int:
logger = utils.get_logger(
__name__,
upload_id=upload.upload_id,
upload_id=upload.upload_uuid,
upload_hash=upload.upload_hash)
result = None
......@@ -95,16 +95,16 @@ def add_upload(upload, meta_data: dict = {}) -> int:
coe_upload = Upload(
upload_name=upload.upload_hash,
created=meta_data.get('_upload_time', upload.upload_time),
user_id=int(upload.user_id),
user=upload.uploader,
is_processed=True)
repo_db.add(coe_upload)
# add calculations and metadata
has_calcs = False
for repo_calc in RepoCalc.upload_calcs(upload.upload_id):
for calc in upload.to(RepoUpload).calcs:
has_calcs = True
add_calculation(
upload, coe_upload, repo_calc, upload_meta_data.get(repo_calc.mainfile))
coe_upload, calc.to(RepoCalc), upload_meta_data.get(calc.mainfile))
# commit
if has_calcs:
......@@ -124,14 +124,14 @@ def add_upload(upload, meta_data: dict = {}) -> int:
return result
def add_calculation(upload, coe_upload, calc: RepoCalc, calc_meta_data: dict) -> None:
def add_calculation(upload: 'Upload', calc: RepoCalc, calc_meta_data: dict) -> None:
repo_db = infrastructure.repository_db
# table based properties
coe_calc = Calc(
calc_id=calc_meta_data.get('_pid', None),
checksum=calc_meta_data.get('_checksum', calc.calc_hash),
upload=coe_upload)
upload=upload)
repo_db.add(coe_calc)
program_version = calc.program_version # TODO shorten version names
......@@ -465,6 +465,10 @@ class User(Base): # type: ignore
def _generate_auth_token(self, expiration=600):
assert False, 'Login functions are done by the NOMAD-coe repository GUI'
@staticmethod
def from_user_id(user_id) -> 'User':
return infrastructure.repository_db.query(User).get(user_id)
def get_auth_token(self):
repo_db = infrastructure.repository_db
session = repo_db.query(Session).filter_by(user_id=self.user_id).first()
......
......@@ -26,7 +26,7 @@ import datetime
T = TypeVar('T')
class Entity:
class Entity():
@classmethod
def create_from(cls: Type[T], obj) -> T:
raise NotImplementedError
......@@ -39,7 +39,13 @@ class Entity:
class Calc(Entity):
"""
Attributes:
pid: The persistent id (pid) for the calculation
mainfile: The mainfile path relative to upload root
calc_hash: A unique hash/checksum that describes unique calculations
upload: The upload object that this calculation belongs to.
"""
@property
def pid(self) -> Union[int, str]:
raise NotImplementedError
......@@ -48,13 +54,24 @@ class Calc(Entity):
def mainfile(self) -> str:
raise NotImplementedError
@property
def calc_hash(self) -> str:
raise NotImplementedError
@property
def upload(self) -> 'Upload':
raise NotImplementedError
class Upload(Entity):
"""
Attributes:
upload_uuid(str): The unique random id that each upload has
upload_hash(str): The hash/checksum that describes unique uploads
upload_time(datatime): The upload time
uploader(repo.User): The user that uploaded this upload
calcs(Iterable[Calc]): An iterable over the calculations of this upload
"""
@property
def upload_uuid(self) -> str:
return '<not assigned>'
......
......@@ -31,7 +31,7 @@ import logging
from structlog import wrap_logger
from contextlib import contextmanager
from nomad import utils, coe_repo
from nomad import utils, coe_repo, datamodel
from nomad.files import UploadFile, ArchiveFile, ArchiveLogFile, File
from nomad.repo import RepoCalc
from nomad.processing.base import Proc, Chord, process, task, PENDING, SUCCESS, FAILURE
......@@ -43,7 +43,7 @@ from nomad.utils import lnr
class NotAllowedDuringProcessing(Exception): pass
class Calc(Proc):
class Calc(Proc, datamodel.Calc):
"""
Instances of this class represent calculations. This class manages the elastic
search index entry, files, and archive for the respective calculation.
......@@ -88,6 +88,10 @@ class Calc(Proc):
def mainfile_file(self) -> File:
return File(self.mainfile_tmp_path)
@property
def calc_hash(self) -> str:
return utils.archive.calc_hash(self.archive_id)
@property
def upload(self) -> 'Upload':
if not self._upload:
......@@ -263,12 +267,13 @@ class Calc(Proc):
with utils.timer(logger, 'indexed', step='index'):
# persist to elastic search
RepoCalc.create_from_backend(
repo_calc = RepoCalc.create_from_backend(
self._parser_backend,
additional=additional,
upload_hash=upload_hash,
calc_hash=calc_hash,
upload_id=self.upload_id).persist()
upload_id=self.upload_id)
repo_calc.persist()
with utils.timer(
logger, 'archived', step='archive',
......@@ -292,7 +297,7 @@ class Calc(Proc):
log_data.update(log_size=self._calc_proc_logfile.size)
class Upload(Chord):
class Upload(Chord, datamodel.Upload):
"""
Represents uploads in the databases. Provides persistence access to the files storage,
and processing state.
......@@ -347,6 +352,14 @@ class Upload(Chord):
""" Returns all uploads for the given user. Currently returns all uploads. """
return cls.objects(user_id=str(user.user_id), in_staging=True)
@property
def upload_uuid(self):
return self.upload_id
@property
def uploader(self):
return coe_repo.User.from_user_id(self.user_id)
def get_logger(self, **kwargs):
logger = super().get_logger()
logger = logger.bind(upload_id=self.upload_id, **kwargs)
......@@ -547,3 +560,7 @@ class Upload(Chord):
def all_calcs(self, start, end, order_by='mainfile'):
return Calc.objects(upload_id=self.upload_id)[start:end].order_by(order_by)
@property
def calcs(self):
return Calc.objects(upload_id=self.upload_id)
......@@ -29,7 +29,7 @@ from elasticsearch_dsl import Document as ElasticDocument, Search, Date, Keyword
from datetime import datetime
import time
from nomad import config, infrastructure
from nomad import config, infrastructure, datamodel
from nomad.parsing import LocalBackend
from nomad.utils import get_logger
......@@ -44,7 +44,21 @@ key_mappings = {
class AlreadyExists(Exception): pass
class RepoCalc(ElasticDocument):
class RepoUpload(datamodel.Entity):
def __init__(self, upload_uuid, upload_hash):
self.upload_uuid = upload_uuid
self.upload_hash = upload_hash
@classmethod
def create_from(cls, obj):
return RepoUpload(obj.upload_uuid, obj.upload_hash)
@property
def calcs(self):
return RepoCalc.upload_calcs(self.upload_uuid)
class RepoCalc(ElasticDocument, datamodel.Entity):
"""
Elastic search document that represents a calculation. It is supposed to be a
component of :class:`Calc`. Should only be created by its parent :class:`Calc`
......@@ -78,6 +92,10 @@ class RepoCalc(ElasticDocument):
aux_files = Keyword()
@property
def upload(self):
return RepoUpload(self.upload_id, self.upload_hash)
@property
def archive_id(self) -> str:
""" The unique id for this calculation. """
......
......@@ -45,6 +45,7 @@ def assert_coe_upload(upload_hash, empty=False, meta_data={}):
if empty:
assert coe_upload is None
else:
assert coe_upload is not None
assert len(coe_upload.calcs) > 0
for calc in coe_upload.calcs:
assert_coe_calc(calc, meta_data=meta_data)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment