Commit 375a62fc authored by Markus Scheidgen's avatar Markus Scheidgen
Browse files

RRemovedbarchive_id and relpaced with calc_id, which is now unique on its own.

parent 435efe1f
......@@ -62,7 +62,7 @@ upload_model = api.inherit('UploadProcessing', proc_model, {
})
calc_model = api.inherit('UploadCalculationProcessing', proc_model, {
'archive_id': fields.String,
'calc_id': fields.String,
'mainfile': fields.String,
'upload_id': fields.String,
'parser': fields.String
......
......@@ -12,8 +12,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
from nomad import utils
from .normalizer import Normalizer
......@@ -29,7 +27,7 @@ class RepositoryNormalizer(Normalizer):
b.openNonOverlappingSection('section_repository_info')
b.openNonOverlappingSection('section_repository_parserdata')
b.addValue('repository_checksum', utils.archive.calc_id(b.get_value('archive_id', 0)))
b.addValue('repository_checksum', b.get_value('calc_hash', 0))
b.addValue('repository_chemical_formula', b.get_value('chemical_composition_bulk_reduced', 0))
b.addValue('repository_parser_id', b.get_value('parser_name', 0))
atoms = b.get_value('atom_labels', 0)
......
......@@ -513,7 +513,7 @@ class LocalBackend(LegacyParserBackend):
json_writer.open_object()
# TODO the root sections should be determined programatically
for root_section in ['section_run', 'section_calculation_info']:
for root_section in ['section_run', 'section_calculation_info', 'section_repository_info']:
json_writer.key(root_section)
json_writer.open_array()
for run in self._delegate.results[root_section]:
......
......@@ -52,12 +52,12 @@ class Calc(Proc, datamodel.Calc):
while parsing, including ``program_name``, ``program_version``, etc.
Attributes:
archive_id: the full id upload_id and calc_id based id
calc_id: the calc_id of this calc
parser: the name of the parser used to process this calc
upload_id: the id of the upload used to create this calculation
mainfile: the mainfile (including path in upload) that was used to create this calc
"""
archive_id = StringField(primary_key=True)
calc_id = StringField(primary_key=True)
upload_id = StringField()
mainfile = StringField()
parser = StringField()
......@@ -78,16 +78,12 @@ class Calc(Proc, datamodel.Calc):
@classmethod
def get(cls, id):
return cls.get_by_id(id, 'archive_id')
return cls.get_by_id(id, 'calc_id')
@property
def mainfile_file(self) -> PathObject:
return self.upload_files.raw_file_object(self.mainfile)
@property
def calc_id(self) -> str:
return utils.archive.calc_id(self.archive_id)
@property
def upload(self) -> 'Upload':
if not self._upload:
......@@ -103,8 +99,7 @@ class Calc(Proc, datamodel.Calc):
def get_logger(self, **kwargs):
logger = super().get_logger()
logger = logger.bind(
upload_id=self.upload_id, mainfile=self.mainfile, calc_id=self.calc_id,
archive_id=self.archive_id, **kwargs)
upload_id=self.upload_id, mainfile=self.mainfile, calc_id=self.calc_id, **kwargs)
return logger
......@@ -167,7 +162,8 @@ class Calc(Proc, datamodel.Calc):
self._parser_backend.openNonOverlappingSection('section_calculation_info')
self._parser_backend.addValue('upload_id', self.upload_id)
self._parser_backend.addValue('archive_id', self.archive_id)
self._parser_backend.addValue('calc_id', self.calc_id)
self._parser_backend.addValue('calc_hash', self.upload_files.calc_hash(self.mainfile))
self._parser_backend.addValue('main_file', self.mainfile)
self._parser_backend.addValue('parser_name', self.parser)
......@@ -235,7 +231,6 @@ class Calc(Proc, datamodel.Calc):
def archiving(self):
logger = self.get_logger()
_, calc_id = self.archive_id.split('/')
additional = dict(
mainfile=self.mainfile,
upload_time=self.upload.upload_time,
......@@ -249,7 +244,7 @@ class Calc(Proc, datamodel.Calc):
repo_calc = RepoCalc.create_from_backend(
self._parser_backend,
additional=additional,
calc_id=calc_id,
calc_id=self.calc_id,
upload_id=self.upload_id)
repo_calc.persist()
......@@ -448,7 +443,7 @@ class Upload(Chord, datamodel.Upload):
total_calcs = 0
for filename, parser in self.match_mainfiles():
calc = Calc.create(
archive_id='%s/%s' % (self.upload_id, self.upload_files.calc_id(filename)),
calc_id=self.upload_files.calc_id(filename),
mainfile=filename, parser=parser.name,
upload_id=self.upload_id)
......
......@@ -116,11 +116,6 @@ class RepoCalc(ElasticDocument, datamodel.Entity):
def upload(self):
return RepoUpload(self.upload_id)
@property
def archive_id(self) -> str:
""" The unique id for this calculation. """
return '%s/%s' % (self.upload_id, self.calc_id)
@classmethod
def create_from_backend(
cls, backend: LocalBackend, additional: Dict[str, Any],
......@@ -144,7 +139,7 @@ class RepoCalc(ElasticDocument, datamodel.Entity):
additional.update(dict(calc_id=calc_id, upload_id=upload_id))
# prepare the entry with all necessary properties from the backend
calc = cls(meta=dict(id='%s/%s' % (upload_id, calc_id)))
calc = cls(meta=dict(id=calc_id))
for property in cls._doc_type.mapping:
mapped_property = key_mappings.get(property, property)
......@@ -176,7 +171,7 @@ class RepoCalc(ElasticDocument, datamodel.Entity):
Raises:
AlreadyExists: If the calculation already exists in elastic search. We use
the elastic document lock here. The elastic document is IDed via the
``archive_id``.
``calc_id``.
"""
try:
# In practive es operation might fail due to timeout under heavy loads/
......@@ -199,7 +194,7 @@ class RepoCalc(ElasticDocument, datamodel.Entity):
# if we had and exception and could not fix with retries, throw it
raise e_after_retries # pylint: disable=E0702
except ConflictError:
raise AlreadyExists('Calculation %s does already exist.' % (self.archive_id))
raise AlreadyExists('Calculation %s/%s does already exist.' % (self.upload_id, self.calc_id))
@classmethod
def update_by_query(cls, upload_id, script):
......@@ -231,6 +226,4 @@ class RepoCalc(ElasticDocument, datamodel.Entity):
if upload_time is not None and isinstance(upload_time, datetime):
data['upload_time'] = data['upload_time'].isoformat()
data['archive_id'] = self.archive_id
return {key: value for key, value in data.items() if value is not None}
......@@ -97,7 +97,7 @@ class LogstashFormatter(logstash.formatter.LogstashFormatterBase):
if key in ('event', 'stack_info', 'id', 'timestamp'):
continue
elif key in (
'archive_id', 'upload_id', 'calc_id', 'mainfile',
'upload_id', 'calc_id', 'mainfile',
'service', 'release'):
key = 'nomad.%s' % key
else:
......
......@@ -149,7 +149,7 @@ def mocksearch(monkeypatch):
def persist(calc):
uploads_by_id.setdefault(calc.upload_id, []).append(calc)
by_archive_id[calc.archive_id] = calc
by_archive_id[calc.calc_id] = calc
def upload_exists(self):
return self.upload_id in uploads_by_id
......@@ -158,7 +158,7 @@ def mocksearch(monkeypatch):
upload_id = self.upload_id
if upload_id in uploads_by_id:
for calc in uploads_by_id[upload_id]:
del(by_archive_id[calc.archive_id])
del(by_archive_id[calc.calc_id])
del(uploads_by_id[upload_id])
@property
......
......@@ -29,7 +29,6 @@ from nomad import utils
from nomad.files import ArchiveBasedStagingUploadFiles, UploadFiles, StagingUploadFiles
from nomad.processing import Upload, Calc
from nomad.processing.base import task as task_decorator
from nomad.repo import RepoUpload
from tests.test_files import example_file, empty_file
......@@ -95,15 +94,14 @@ def assert_processing(upload: Upload, mocksearch=None):
for calc in Calc.objects(upload_id=upload.upload_id):
assert calc.parser is not None
assert calc.mainfile is not None
assert calc.status == 'SUCCESS', calc.archive_id
calc_id = utils.archive.calc_id(calc.archive_id)
assert calc.status == 'SUCCESS'
with upload_files.archive_file(calc_id) as archive_json:
with upload_files.archive_file(calc.calc_id) as archive_json:
archive = json.load(archive_json)
assert 'section_run' in archive
assert 'section_calculation_info' in archive
with upload_files.archive_log_file(calc_id) as f:
with upload_files.archive_log_file(calc.calc_id) as f:
assert 'a test' in f.read()
assert len(calc.errors) == 0
......@@ -111,7 +109,7 @@ def assert_processing(upload: Upload, mocksearch=None):
f.read()
if mocksearch:
repo = mocksearch[calc.archive_id]
repo = mocksearch[calc.calc_id]
assert repo is not None
assert repo.chemical_composition is not None
assert repo.basis_set_type is not None
......
......@@ -201,7 +201,7 @@ class TestUploads:
assert calc['status'] == 'SUCCESS'
assert calc['current_task'] == 'archiving'
assert len(calc['tasks']) == 3
assert client.get('/archive/logs/%s' % calc['archive_id'], headers=test_user_auth).status_code == 200
assert client.get('/archive/logs/%s/%s' % (calc['upload_id'], calc['calc_id']), headers=test_user_auth).status_code == 200
if upload['calcs']['pagination']['total'] > 1:
rv = client.get('%s?page=2&per_page=1&order_by=status' % upload_endpoint, headers=test_user_auth)
......
......@@ -242,7 +242,8 @@ def parsed_example(request) -> LocalBackend:
def add_calculation_info(backend: LocalBackend) -> LocalBackend:
backend.openNonOverlappingSection('section_calculation_info')
backend.addValue('upload_id', 'test_upload_id')
backend.addValue('archive_id', 'test_upload_id/test_calc_id')
backend.addValue('calc_id', 'test_calc_id')
backend.addValue('calc_hash', 'test_calc_hash')
backend.addValue('main_file', 'test/mainfile.txt')
backend.addValue('parser_name', 'testParser')
backend.closeNonOverlappingSection('section_calculation_info')
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment