Commit d8fd0dc1 authored by Markus Scheidgen's avatar Markus Scheidgen
Browse files

Save calc proc logs to archive.

parent 292cc8a1
Pipeline #37149 failed with stages
in 1 minute and 2 seconds
......@@ -25,7 +25,7 @@ build:
stage: build
script:
- docker login -u gitlab-ci-token -p $CI_BUILD_TOKEN gitlab-registry.mpcdf.mpg.de
- docker build -t $TEST_IMAGE .
- docker build -t $TEST_IMAGE --build-args CACHEBUST=3 .
- docker push $TEST_IMAGE
buildgui:
......
......@@ -37,7 +37,7 @@ RUN pip install -r requirements-dep.txt
COPY requirements.txt requirements.txt
RUN pip install -r requirements.txt
# Use docker build --build-args CACHEBUST=2 to not cache this (e.g. when you know deps have changed)
ARG CACHEBUST=2
ARG CACHEBUST=1
COPY nomad/dependencies.py /install/nomad/dependencies.py
COPY nomad/config.py /install/nomad/config.py
RUN python nomad/dependencies.py
......
......@@ -21,7 +21,7 @@ from datetime import datetime
import os.path
from nomad import config
from nomad.files import UploadFile, ArchiveFile
from nomad.files import UploadFile, ArchiveFile, ArchiveLogFile
from nomad.utils import get_logger
from nomad.processing import Upload, NotAllowedDuringProcessing
from nomad.repo import RepoCalc
......@@ -617,6 +617,53 @@ class RepoCalcsRes(Resource):
}
@app.route('%s/logs/<string:upload_hash>/<string:calc_hash>' % base_path, methods=['GET'])
def get_calc_proc_log(upload_hash, calc_hash):
"""
Get calculation processing log. Calcs are references via *upload_hash*, *calc_hash*
pairs.
.. :quickref: archive; Get calculation data in archive form.
**Example request**:
.. sourcecode:: http
GET /nomad/api/logs/W36aqCzAKxOCfIiMFsBJh3nHPb4a/7ddvtfRfZAvc3Crr7jOJ8UH0T34I HTTP/1.1
Accept: application/json
:param string upload_hash: the hash of the upload (from uploaded file contents)
:param string calc_hash: the hash of the calculation (from mainfile)
:resheader Content-Type: application/json
:status 200: calc successfully retrieved
:status 404: calc with given hashes does not exist
:returns: the log data, a line by line sequence of structured logs
"""
archive_id = '%s/%s' % (upload_hash, calc_hash)
try:
archive = ArchiveLogFile(archive_id)
arhchive_path = archive.os_path
rv = send_file(
arhchive_path,
mimetype='application/text',
as_attachment=True,
attachment_filename=os.path.basename(arhchive_path))
return rv
except KeyError:
abort(404, message='Archive/calculation %s does not exist.' % archive_id)
except FileNotFoundError:
abort(404, message='Archive/calculation %s does not exist.' % archive_id)
except Exception as e:
logger = get_logger(
__name__, endpoint='logs', action='get',
upload_hash=upload_hash, calc_hash=calc_hash)
logger.error('Exception on accessing calc proc log', exc_info=e)
abort(500, message='Could not accessing the logs.')
@app.route('%s/archive/<string:upload_hash>/<string:calc_hash>' % base_path, methods=['GET'])
def get_calc(upload_hash, calc_hash):
"""
......
......@@ -48,6 +48,7 @@ from contextlib import contextmanager
import gzip
import io
import shutil
import logging
from nomad import config, utils
......@@ -316,3 +317,21 @@ class ArchiveFile(File):
""" Delete all archives of one upload with the given hash. """
bucket = config.files.archive_bucket
Objects.delete_all(bucket, upload_hash)
class ArchiveLogFile(File):
"""
Represents a log file that was created for processing a single calculation to create
an archive. Provides a loghandler that can be used to write to this logfile.
Logfiles are stored within the *archive_bucket* alongside the archive files.
"""
def __init__(self, archive_id: str) -> None:
super().__init__(
bucket=config.files.archive_bucket,
object_id=archive_id,
ext='log')
def create_loghandler(self):
fh = logging.FileHandler(self.os_path, 'w')
fh.setLevel(logging.DEBUG)
return fh
......@@ -33,7 +33,7 @@ import base64
import time
from nomad import config, utils
from nomad.files import UploadFile, ArchiveFile
from nomad.files import UploadFile, ArchiveFile, ArchiveLogFile
from nomad.repo import RepoCalc
from nomad.user import User
from nomad.processing.base import Proc, Chord, process, task, PENDING, SUCCESS, FAILURE, RUNNING
......@@ -78,6 +78,7 @@ class Calc(Proc):
super().__init__(*args, **kwargs)
self._parser_backend = None
self._upload = None
self._loghandler = None
@classmethod
def get(cls, id):
......@@ -110,6 +111,11 @@ class Calc(Proc):
logger = logger.bind(
upload_id=self.upload_id, mainfile=self.mainfile,
upload_hash=upload_hash, calc_hash=calc_hash, **kwargs)
if self._loghandler is None:
self._loghandler = ArchiveLogFile(self.archive_id).create_loghandler()
logger.addHandler(self._loghandler)
return logger
@property
......@@ -127,14 +133,24 @@ class Calc(Proc):
@process
def process(self):
self._upload = Upload.get(self.upload_id)
logger = self.get_logger()
if self._upload is None:
self.get_logger().error('calculation upload does not exist')
logger.error('calculation upload does not exist')
try:
self.parsing()
self.normalizing()
self.archiving()
finally:
# close open loghandler
try:
if self._loghandler is not None:
self._loghandler.close()
self._loghandler = None
except Exception as e:
logger.error('could not close calculation proc log', exc_info=e)
# inform parent proc about completion
self._upload.completed_child()
@task
......
......@@ -110,7 +110,10 @@ if not _logging_is_configured:
logger.setLevel(logging.DEBUG)
return logger
structlog.configure(processors=log_processors, logger_factory=logger_factory)
structlog.configure(
processors=log_processors,
logger_factory=logger_factory,
wrapper_class=structlog.stdlib.BoundLogger)
# configure logging in general
logging.basicConfig(level=logging.DEBUG, stream=sys.stdout)
......
......@@ -25,7 +25,7 @@ import shutil
import os.path
from nomad import config
from nomad.files import UploadFile
from nomad.files import UploadFile, ArchiveFile, ArchiveLogFile
from nomad.processing import Upload, Calc
from nomad.processing.base import task as task_decorator
from nomad.user import me
......@@ -78,6 +78,8 @@ def assert_processing(upload: Upload):
assert calc.parser is not None
assert calc.mainfile is not None
assert calc.status == 'SUCCESS', calc.archive_id
assert ArchiveFile(calc.archive_id).exists()
assert ArchiveLogFile(calc.archive_id).exists()
assert len(calc.errors) == 0
......
......@@ -21,7 +21,7 @@ from nomad.processing import Upload # noqa
from tests.processing.test_data import example_files # noqa
# import fixtures
from tests.test_files import clear_files, archive, archive_config # noqa pylint: disable=unused-import
from tests.test_files import clear_files, archive, archive_log, archive_config # noqa pylint: disable=unused-import
from tests.test_normalizing import normalized_template_example # noqa pylint: disable=unused-import
from tests.test_parsing import parsed_template_example # noqa pylint: disable=unused-import
from tests.test_repo import example_elastic_calc # noqa pylint: disable=unused-import
......@@ -186,12 +186,13 @@ def test_processing(client, file, mode, worker, mocksearch, test_user_auth, no_w
assert len(upload['tasks']) == 4
assert upload['status'] == 'SUCCESS'
assert upload['current_task'] == 'cleanup'
assert UploadFile(upload['upload_id']).exists()
calcs = upload['calcs']['results']
for calc in calcs:
assert calc['status'] == 'SUCCESS'
assert calc['current_task'] == 'archiving'
assert len(calc['tasks']) == 3
assert UploadFile(upload['upload_id']).exists()
assert client.get('/logs/%s' % calc['archive_id']).status_code == 200
if upload['calcs']['pagination']['total'] > 1:
rv = client.get('%s?page=2&per_page=1&order_by=status' % upload_endpoint)
......@@ -276,6 +277,13 @@ def test_get_archive(client, archive, no_warn):
assert rv.status_code == 200
def test_get_calc_proc_log(client, archive_log, no_warn):
rv = client.get('/logs/%s' % archive_log.object_id)
assert len(rv.data) > 0
assert rv.status_code == 200
def test_get_non_existing_archive(client, no_warn):
rv = client.get('/archive/%s' % 'doesnt/exist')
assert rv.status_code == 404
......@@ -15,9 +15,10 @@
import pytest
import json
import shutil
import logging
from nomad.files import Objects, ArchiveFile, UploadFile
import nomad.config as config
from nomad.files import Objects, ArchiveFile, UploadFile, ArchiveLogFile
from nomad import config, utils
# example_file uses an artificial parser for faster test execution, can also be
# changed to examples_vasp.zip for using vasp parser
......@@ -145,3 +146,24 @@ class TestUploadFile:
with upload_same_file:
assert hash == upload_same_file.hash()
@pytest.fixture(scope='function')
def archive_log(clear_files, archive_config):
archive_log = ArchiveLogFile('__test_upload_hash/__test_calc_hash')
archive_loghandler = archive_log.create_loghandler()
logger = utils.get_logger('test')
logger.addHandler(archive_loghandler)
logger.setLevel(logging.DEBUG)
logger.debug('This is a test')
archive_loghandler.close()
yield archive_log
class TestArchiveLogFile:
def test_archive_log_file(self, archive_log):
assert archive_log.exists()
log_entry = json.loads(archive_log.open('rt').read())
assert log_entry['event'] == 'This is a test'
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment