Commit 435efe1f authored by Markus Scheidgen's avatar Markus Scheidgen
Browse files

RRplaced by ucalc_hash with calc_id.

parent 14cb7dee
...@@ -95,7 +95,7 @@ def with_logger(func): ...@@ -95,7 +95,7 @@ def with_logger(func):
args = inspect.getcallargs(wrapper, *args, **kwargs) args = inspect.getcallargs(wrapper, *args, **kwargs)
logger_args = { logger_args = {
k: v for k, v in args.items() k: v for k, v in args.items()
if k in ['upload_id', 'calc_hash']} if k in ['upload_id', 'calc_id']}
logger = utils.get_logger(__name__, **logger_args) logger = utils.get_logger(__name__, **logger_args)
args.update(logger=logger) args.update(logger=logger)
try: try:
......
...@@ -42,28 +42,28 @@ class ArchiveCalcLogResource(Resource): ...@@ -42,28 +42,28 @@ class ArchiveCalcLogResource(Resource):
@api.response(401, 'Not authorized to access the data.') @api.response(401, 'Not authorized to access the data.')
@api.response(200, 'Archive data send', headers={'Content-Type': 'application/plain'}) @api.response(200, 'Archive data send', headers={'Content-Type': 'application/plain'})
@login_if_available @login_if_available
def get(self, upload_id, calc_hash): def get(self, upload_id, calc_id):
""" """
Get calculation processing log. Get calculation processing log.
Calcs are references via *upload_id*, *calc_hash* pairs. Calcs are references via *upload_id*, *calc_id* pairs.
""" """
archive_id = '%s/%s' % (upload_id, calc_hash) archive_id = '%s/%s' % (upload_id, calc_id)
upload_files = UploadFiles.get( upload_files = UploadFiles.get(
upload_id, is_authorized=create_authorization_predicate(upload_id, calc_hash)) upload_id, is_authorized=create_authorization_predicate(upload_id, calc_id))
if upload_files is None: if upload_files is None:
abort(404, message='Upload %s does not exist.' % upload_id) abort(404, message='Upload %s does not exist.' % upload_id)
try: try:
return send_file( return send_file(
upload_files.archive_log_file(calc_hash, 'rt'), upload_files.archive_log_file(calc_id, 'rt'),
mimetype='text/plain', mimetype='text/plain',
as_attachment=True, as_attachment=True,
attachment_filename='%s.log' % archive_id) attachment_filename='%s.log' % archive_id)
except Restricted: except Restricted:
abort(401, message='Not authorized to access %s/%s.' % (upload_id, calc_hash)) abort(401, message='Not authorized to access %s/%s.' % (upload_id, calc_id))
except KeyError: except KeyError:
abort(404, message='Calculation %s does not exist.' % archive_id) abort(404, message='Calculation %s does not exist.' % archive_id)
...@@ -75,28 +75,28 @@ class ArchiveCalcResource(Resource): ...@@ -75,28 +75,28 @@ class ArchiveCalcResource(Resource):
@api.response(401, 'Not authorized to access the data.') @api.response(401, 'Not authorized to access the data.')
@api.response(200, 'Archive data send') @api.response(200, 'Archive data send')
@login_if_available @login_if_available
def get(self, upload_id, calc_hash): def get(self, upload_id, calc_id):
""" """
Get calculation data in archive form. Get calculation data in archive form.
Calcs are references via *upload_id*, *calc_hash* pairs. Calcs are references via *upload_id*, *calc_id* pairs.
""" """
archive_id = '%s/%s' % (upload_id, calc_hash) archive_id = '%s/%s' % (upload_id, calc_id)
upload_file = UploadFiles.get( upload_file = UploadFiles.get(
upload_id, is_authorized=create_authorization_predicate(upload_id, calc_hash)) upload_id, is_authorized=create_authorization_predicate(upload_id, calc_id))
if upload_file is None: if upload_file is None:
abort(404, message='Archive %s does not exist.' % upload_id) abort(404, message='Archive %s does not exist.' % upload_id)
try: try:
return send_file( return send_file(
upload_file.archive_file(calc_hash, 'rt'), upload_file.archive_file(calc_id, 'rt'),
mimetype='application/json', mimetype='application/json',
as_attachment=True, as_attachment=True,
attachment_filename='%s.json' % archive_id) attachment_filename='%s.json' % archive_id)
except Restricted: except Restricted:
abort(401, message='Not authorized to access %s/%s.' % (upload_id, calc_hash)) abort(401, message='Not authorized to access %s/%s.' % (upload_id, calc_id))
except KeyError: except KeyError:
abort(404, message='Calculation %s does not exist.' % archive_id) abort(404, message='Calculation %s does not exist.' % archive_id)
......
...@@ -149,7 +149,7 @@ class TokenResource(Resource): ...@@ -149,7 +149,7 @@ class TokenResource(Resource):
'there is no token for you.') 'there is no token for you.')
def create_authorization_predicate(upload_id, calc_hash=None): def create_authorization_predicate(upload_id, calc_id=None):
""" """
Returns a predicate that determines if the logged in user has the authorization Returns a predicate that determines if the logged in user has the authorization
to access the given upload and calculation. to access the given upload and calculation.
...@@ -171,7 +171,7 @@ def create_authorization_predicate(upload_id, calc_hash=None): ...@@ -171,7 +171,7 @@ def create_authorization_predicate(upload_id, calc_hash=None):
# There are no db entries for the given resource # There are no db entries for the given resource
if files.UploadFiles.get(upload_id) is not None: if files.UploadFiles.get(upload_id) is not None:
logger = utils.get_logger(__name__, upload_id=upload_id, calc_hash=calc_hash) logger = utils.get_logger(__name__, upload_id=upload_id, calc_id=calc_id)
logger.error('Upload files without respective db entry') logger.error('Upload files without respective db entry')
raise KeyError raise KeyError
......
...@@ -45,10 +45,10 @@ pagination_request_parser.add_argument( ...@@ -45,10 +45,10 @@ pagination_request_parser.add_argument(
def calc_route(ns, prefix: str = ''): def calc_route(ns, prefix: str = ''):
""" A resource decorator for /<upload>/<calc> based routes. """ """ A resource decorator for /<upload>/<calc> based routes. """
def decorator(func): def decorator(func):
ns.route('%s/<string:upload_id>/<string:calc_hash>' % prefix)( ns.route('%s/<string:upload_id>/<string:calc_id>' % prefix)(
api.doc(params={ api.doc(params={
'upload_id': 'The unique id for the requested upload.', 'upload_id': 'The unique id for the requested upload.',
'calc_hash': 'The upload unique hash for the requested calculation.' 'calc_id': 'The unique id for the requested calculation.'
})(func) })(func)
) )
return decorator return decorator
...@@ -40,7 +40,7 @@ raw_file_from_path_parser.add_argument(**raw_file_compress_argument) ...@@ -40,7 +40,7 @@ raw_file_from_path_parser.add_argument(**raw_file_compress_argument)
@ns.route('/<string:upload_id>/<path:path>') @ns.route('/<string:upload_id>/<path:path>')
@api.doc(params={ @api.doc(params={
'upload_id': 'The unique hash for the requested upload.', 'upload_id': 'The unique id for the requested upload.',
'path': 'The path to a file or directory.' 'path': 'The path to a file or directory.'
}) })
@api.header('Content-Type', 'application/gz') @api.header('Content-Type', 'application/gz')
...@@ -65,7 +65,7 @@ class RawFileFromPathResource(Resource): ...@@ -65,7 +65,7 @@ class RawFileFromPathResource(Resource):
upload_files = UploadFiles.get( upload_files = UploadFiles.get(
upload_id, create_authorization_predicate(upload_id)) upload_id, create_authorization_predicate(upload_id))
if upload_files is None: if upload_files is None:
abort(404, message='The upload with hash %s does not exist.' % upload_id) abort(404, message='The upload with id %s does not exist.' % upload_id)
if upload_filepath[-1:] == '*': if upload_filepath[-1:] == '*':
upload_filepath = upload_filepath[0:-1] upload_filepath = upload_filepath[0:-1]
...@@ -108,7 +108,7 @@ raw_files_request_parser.add_argument( ...@@ -108,7 +108,7 @@ raw_files_request_parser.add_argument(
@ns.route('/<string:upload_id>') @ns.route('/<string:upload_id>')
@api.doc(params={ @api.doc(params={
'upload_id': 'The unique hash for the requested upload.' 'upload_id': 'The unique id for the requested upload.'
}) })
class RawFilesResource(Resource): class RawFilesResource(Resource):
@api.doc('get_files') @api.doc('get_files')
...@@ -154,7 +154,7 @@ def respond_to_get_raw_files(upload_id, files, compress=False): ...@@ -154,7 +154,7 @@ def respond_to_get_raw_files(upload_id, files, compress=False):
upload_files = UploadFiles.get( upload_files = UploadFiles.get(
upload_id, create_authorization_predicate(upload_id)) upload_id, create_authorization_predicate(upload_id))
if upload_files is None: if upload_files is None:
abort(404, message='The upload with hash %s does not exist.' % upload_id) abort(404, message='The upload with id %s does not exist.' % upload_id)
def generator(): def generator():
""" Stream a zip file with all files using zipstream. """ """ Stream a zip file with all files using zipstream. """
......
...@@ -35,19 +35,19 @@ class RepoCalcResource(Resource): ...@@ -35,19 +35,19 @@ class RepoCalcResource(Resource):
@api.response(404, 'The upload or calculation does not exist') @api.response(404, 'The upload or calculation does not exist')
@api.response(200, 'Metadata send') @api.response(200, 'Metadata send')
@api.doc('get_repo_calc') @api.doc('get_repo_calc')
def get(self, upload_id, calc_hash): def get(self, upload_id, calc_id):
""" """
Get calculation metadata in repository form. Get calculation metadata in repository form.
Repository metadata only entails the quanties shown in the repository. Repository metadata only entails the quanties shown in the repository.
This is basically the elastic search index entry for the This is basically the elastic search index entry for the
requested calculations. Calcs are references via *upload_id*, *calc_hash* requested calculations. Calcs are references via *upload_id*, *calc_id*
pairs. pairs.
""" """
try: try:
return RepoCalc.get(id='%s/%s' % (upload_id, calc_hash)).json_dict, 200 return RepoCalc.get(id='%s/%s' % (upload_id, calc_id)).json_dict, 200
except NotFoundError: except NotFoundError:
abort(404, message='There is no calculation for %s/%s' % (upload_id, calc_hash)) abort(404, message='There is no calculation for %s/%s' % (upload_id, calc_id))
except Exception as e: except Exception as e:
abort(500, message=str(e)) abort(500, message=str(e))
......
...@@ -130,7 +130,7 @@ class CalcProcReproduction: ...@@ -130,7 +130,7 @@ class CalcProcReproduction:
(parsing, normalizing) with the locally installed parsers and normalizers. (parsing, normalizing) with the locally installed parsers and normalizers.
The use-case is error/warning reproduction. Use ELK to identify errors, use The use-case is error/warning reproduction. Use ELK to identify errors, use
the upload, archive ids/hashes to given by ELK, and reproduce and fix the error the upload, archive ids to given by ELK, and reproduce and fix the error
in your development environment. in your development environment.
This is a class of :class:`UploadFile` the downloaded raw data will be treated as This is a class of :class:`UploadFile` the downloaded raw data will be treated as
...@@ -142,7 +142,7 @@ class CalcProcReproduction: ...@@ -142,7 +142,7 @@ class CalcProcReproduction:
override: Set to true to override any existing local calculation data. override: Set to true to override any existing local calculation data.
""" """
def __init__(self, archive_id: str, override: bool = False) -> None: def __init__(self, archive_id: str, override: bool = False) -> None:
self.calc_hash = utils.archive.calc_hash(archive_id) self.calc_id = utils.archive.calc_id(archive_id)
self.upload_id = utils.archive.upload_id(archive_id) self.upload_id = utils.archive.upload_id(archive_id)
self.mainfile = None self.mainfile = None
self.parser = None self.parser = None
...@@ -170,10 +170,10 @@ class CalcProcReproduction: ...@@ -170,10 +170,10 @@ class CalcProcReproduction:
self.logger.info('Extracting calc data.') self.logger.info('Extracting calc data.')
self.upload_files.extract() self.upload_files.extract()
# find mainfile matching calc_hash # find mainfile matching calc_id
self.mainfile = next( self.mainfile = next(
filename for filename in self.upload_files.raw_file_manifest() filename for filename in self.upload_files.raw_file_manifest()
if self.upload_files.calc_hash(filename) == self.calc_hash) if self.upload_files.calc_id(filename) == self.calc_id)
assert self.mainfile is not None, 'The mainfile could not be found.' assert self.mainfile is not None, 'The mainfile could not be found.'
self.logger = self.logger.bind(mainfile=self.mainfile) self.logger = self.logger.bind(mainfile=self.mainfile)
......
...@@ -28,7 +28,7 @@ from .base import Base, calc_citation_association, ownership, co_authorship, sha ...@@ -28,7 +28,7 @@ from .base import Base, calc_citation_association, ownership, co_authorship, sha
class Calc(Base, datamodel.Calc): # type: ignore class Calc(Base, datamodel.Calc): # type: ignore
__tablename__ = 'calculations' __tablename__ = 'calculations'
calc_id = Column(Integer, primary_key=True, autoincrement=True) coe_calc_id = Column('calc_id', Integer, primary_key=True, autoincrement=True)
origin_id = Column(Integer, ForeignKey('uploads.upload_id')) origin_id = Column(Integer, ForeignKey('uploads.upload_id'))
upload = relationship('Upload') upload = relationship('Upload')
checksum = Column(String) checksum = Column(String)
...@@ -43,14 +43,14 @@ class Calc(Base, datamodel.Calc): # type: ignore ...@@ -43,14 +43,14 @@ class Calc(Base, datamodel.Calc): # type: ignore
parents = relationship( parents = relationship(
'Calc', 'Calc',
secondary=calc_dataset_containment, secondary=calc_dataset_containment,
primaryjoin=calc_dataset_containment.c.children_calc_id == calc_id, primaryjoin=calc_dataset_containment.c.children_calc_id == coe_calc_id,
secondaryjoin=calc_dataset_containment.c.parent_calc_id == calc_id, secondaryjoin=calc_dataset_containment.c.parent_calc_id == coe_calc_id,
backref='children') backref='children')
@classmethod @classmethod
def load_from(cls, obj): def load_from(cls, obj):
repo_db = infrastructure.repository_db repo_db = infrastructure.repository_db
return repo_db.query(Calc).filter_by(calc_id=int(obj.pid)).first() return repo_db.query(Calc).filter_by(coe_calc_id=int(obj.pid)).first()
@property @property
def mainfile(self) -> str: def mainfile(self) -> str:
...@@ -58,14 +58,14 @@ class Calc(Base, datamodel.Calc): # type: ignore ...@@ -58,14 +58,14 @@ class Calc(Base, datamodel.Calc): # type: ignore
@property @property
def pid(self): def pid(self):
return self.calc_id return self.coe_calc_id
@property @property
def comment(self) -> str: def comment(self) -> str:
return self.user_meta_data.label return self.user_meta_data.label
@property @property
def calc_hash(self) -> str: def calc_id(self) -> str:
return self.checksum return self.checksum
@property @property
...@@ -92,19 +92,19 @@ class Calc(Base, datamodel.Calc): # type: ignore ...@@ -92,19 +92,19 @@ class Calc(Base, datamodel.Calc): # type: ignore
@property @property
def all_datasets(self) -> List['DataSet']: def all_datasets(self) -> List['DataSet']:
assert self.calc_id is not None assert self.coe_calc_id is not None
repo_db = infrastructure.repository_db repo_db = infrastructure.repository_db
query = repo_db.query(literal(self.calc_id).label('calc_id')).cte(recursive=True) query = repo_db.query(literal(self.coe_calc_id).label('coe_calc_id')).cte(recursive=True)
right = aliased(query) right = aliased(query)
left = aliased(CalcSet) left = aliased(CalcSet)
query = query.union_all(repo_db.query(left.parent_calc_id).join( query = query.union_all(repo_db.query(left.parent_calc_id).join(
right, right.c.calc_id == left.children_calc_id)) right, right.c.coe_calc_id == left.children_calc_id))
query = repo_db.query(query) query = repo_db.query(query)
dataset_calc_ids = list(r[0] for r in query if not r[0] == self.calc_id) dataset_calc_ids = list(r[0] for r in query if not r[0] == self.coe_calc_id)
if len(dataset_calc_ids) > 0: if len(dataset_calc_ids) > 0:
return [ return [
DataSet(dataset_calc) DataSet(dataset_calc)
for dataset_calc in repo_db.query(Calc).filter(Calc.calc_id.in_(dataset_calc_ids))] for dataset_calc in repo_db.query(Calc).filter(Calc.coe_calc_id.in_(dataset_calc_ids))]
else: else:
return [] return []
...@@ -132,7 +132,7 @@ class DataSet: ...@@ -132,7 +132,7 @@ class DataSet:
@property @property
def id(self): def id(self):
return self._dataset_calc.calc_id return self._dataset_calc.coe_calc_id
@property @property
def dois(self) -> List[Citation]: def dois(self) -> List[Citation]:
......
...@@ -100,17 +100,17 @@ class Upload(Base, datamodel.Upload): # type: ignore ...@@ -100,17 +100,17 @@ class Upload(Base, datamodel.Upload): # type: ignore
@classmethod @classmethod
def load_from(cls, obj): def load_from(cls, obj):
return Upload.from_upload_id(obj.upload_id) return Upload.from_upload_id(str(obj.upload_id))
@staticmethod @staticmethod
def from_upload_id(upload_id) -> 'Upload': def from_upload_id(upload_id: str) -> 'Upload':
repo_db = infrastructure.repository_db repo_db = infrastructure.repository_db
uploads = repo_db.query(Upload).filter_by(upload_name=upload_id) uploads = repo_db.query(Upload).filter_by(upload_name=upload_id)
assert uploads.count() <= 1, 'Upload hash/name must be unique' assert uploads.count() <= 1, 'Upload id/name must be unique'
return uploads.first() return uploads.first()
@property @property
def upload_id(self): def upload_id(self) -> str:
return self.upload_name return self.upload_name
@property @property
...@@ -163,7 +163,7 @@ class Upload(Base, datamodel.Upload): # type: ignore ...@@ -163,7 +163,7 @@ class Upload(Base, datamodel.Upload): # type: ignore
if has_calcs: if has_calcs:
# empty upload case # empty upload case
repo_db.commit() repo_db.commit()
result = coe_upload.upload_id result = coe_upload.coe_upload_id
else: else:
repo_db.rollback() repo_db.rollback()
except Exception as e: except Exception as e:
...@@ -181,8 +181,8 @@ class Upload(Base, datamodel.Upload): # type: ignore ...@@ -181,8 +181,8 @@ class Upload(Base, datamodel.Upload): # type: ignore
# table based properties # table based properties
coe_calc = Calc( coe_calc = Calc(
calc_id=calc_meta_data.get('_pid', None), coe_calc_id=calc_meta_data.get('_pid', None),
checksum=calc_meta_data.get('_checksum', calc.calc_hash), checksum=calc_meta_data.get('_checksum', calc.calc_id),
upload=self) upload=self)
repo_db.add(coe_calc) repo_db.add(coe_calc)
...@@ -242,7 +242,7 @@ class Upload(Base, datamodel.Upload): # type: ignore ...@@ -242,7 +242,7 @@ class Upload(Base, datamodel.Upload): # type: ignore
# datasets # datasets
for dataset_id in calc_meta_data.get('datasets', []): for dataset_id in calc_meta_data.get('datasets', []):
dataset = CalcSet(parent_calc_id=dataset_id, children_calc_id=coe_calc.calc_id) dataset = CalcSet(parent_calc_id=dataset_id, children_calc_id=coe_calc.coe_calc_id)
repo_db.add(dataset) repo_db.add(dataset)
# references # references
......
...@@ -50,7 +50,7 @@ class Calc(Entity): ...@@ -50,7 +50,7 @@ class Calc(Entity):
Attributes: Attributes:
pid: The persistent id (pid) for the calculation pid: The persistent id (pid) for the calculation
mainfile: The mainfile path relative to upload root mainfile: The mainfile path relative to upload root
calc_hash: A unique hash/checksum that describes unique calculations calc_id: A unique id/checksum that describes unique calculations
upload: The upload object that this calculation belongs to. upload: The upload object that this calculation belongs to.
""" """
@property @property
...@@ -62,7 +62,7 @@ class Calc(Entity): ...@@ -62,7 +62,7 @@ class Calc(Entity):
raise NotImplementedError raise NotImplementedError
@property @property
def calc_hash(self) -> str: def calc_id(self) -> str:
raise NotImplementedError raise NotImplementedError
@property @property
......
...@@ -46,6 +46,7 @@ import shutil ...@@ -46,6 +46,7 @@ import shutil
from zipfile import ZipFile, BadZipFile, is_zipfile from zipfile import ZipFile, BadZipFile, is_zipfile
from bagit import make_bag from bagit import make_bag
import hashlib import hashlib
import base64
import io import io
from nomad import config, utils from nomad import config, utils
...@@ -140,11 +141,11 @@ class Metadata(metaclass=ABCMeta): ...@@ -140,11 +141,11 @@ class Metadata(metaclass=ABCMeta):
pass pass
def insert(self, calc: dict) -> None: def insert(self, calc: dict) -> None:
""" Insert a calc, using hash as key. """ """ Insert a calc, using calc_id as key. """
raise NotImplementedError() raise NotImplementedError()
def update(self, calc_hash: str, updates: dict) -> dict: def update(self, calc_id: str, updates: dict) -> dict:
""" Updating a calc, using hash as key and running dict update with the given data. """ """ Updating a calc, using calc_id as key and running dict update with the given data. """
raise NotImplementedError() raise NotImplementedError()
def get(self, calc_id: str) -> dict: def get(self, calc_id: str) -> dict:
...@@ -181,16 +182,16 @@ class StagingMetadata(Metadata): ...@@ -181,16 +182,16 @@ class StagingMetadata(Metadata):
pass pass
def insert(self, calc: dict) -> None: def insert(self, calc: dict) -> None:
id = calc['hash'] id = calc['calc_id']
path = self._dir.join_file('%s.json' % id) path = self._dir.join_file('%s.json' % id)
assert not path.exists() assert not path.exists()
with open(path.os_path, 'wt') as f: with open(path.os_path, 'wt') as f:
ujson.dump(calc, f) ujson.dump(calc, f)
def update(self, calc_hash: str, updates: dict) -> dict: def update(self, calc_id: str, updates: dict) -> dict:
metadata = self.get(calc_hash) metadata = self.get(calc_id)
metadata.update(updates) metadata.update(updates)
path = self._dir.join_file('%s.json' % calc_hash) path = self._dir.join_file('%s.json' % calc_id)
with open(path.os_path, 'wt') as f: with open(path.os_path, 'wt') as f:
ujson.dump(metadata, f) ujson.dump(metadata, f)
return metadata return metadata
...@@ -263,24 +264,24 @@ class PublicMetadata(Metadata): ...@@ -263,24 +264,24 @@ class PublicMetadata(Metadata):
def insert(self, calc: dict) -> None: def insert(self, calc: dict) -> None:
assert self.data is not None, "Metadata is not open." assert self.data is not None, "Metadata is not open."
id = calc['hash'] id = calc['calc_id']
assert id not in self.data assert id not in self.data
self.data[id] = calc self.data[id] = calc
self._modified = True self._modified = True
def update(self, calc_hash: str, updates: dict) -> dict: def update(self, calc_id: str, updates: dict) -> dict:
assert self.data is not None, "Metadata is not open." assert self.data is not None, "Metadata is not open."
if calc_hash not in self.data: if calc_id not in self.data:
raise KeyError() raise KeyError()