diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 4cf8382d610e2f0ff1288c6835692703f0f1f1b4..96c367e1f39eb6c2679c36432c10b09675395032 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -25,7 +25,7 @@ build: stage: build script: - docker login -u gitlab-ci-token -p $CI_BUILD_TOKEN gitlab-registry.mpcdf.mpg.de - - docker build -t $TEST_IMAGE . + - docker build -t $TEST_IMAGE --build-args CACHEBUST=3 . - docker push $TEST_IMAGE buildgui: diff --git a/Dockerfile b/Dockerfile index fb49d18eb060192aa450630b14a6a6ddff0604cf..39b973bed0e5acc1c4d849ae8af891c9b1725b85 100644 --- a/Dockerfile +++ b/Dockerfile @@ -37,7 +37,7 @@ RUN pip install -r requirements-dep.txt COPY requirements.txt requirements.txt RUN pip install -r requirements.txt # Use docker build --build-args CACHEBUST=2 to not cache this (e.g. when you know deps have changed) -ARG CACHEBUST=2 +ARG CACHEBUST=1 COPY nomad/dependencies.py /install/nomad/dependencies.py COPY nomad/config.py /install/nomad/config.py RUN python nomad/dependencies.py diff --git a/nomad/api.py b/nomad/api.py index 21a4026f5ebf7a8062f48ba4167d4668603a5628..1875a067f1c6d4214c46a72e2e903d47ef5080ae 100644 --- a/nomad/api.py +++ b/nomad/api.py @@ -21,7 +21,7 @@ from datetime import datetime import os.path from nomad import config -from nomad.files import UploadFile, ArchiveFile +from nomad.files import UploadFile, ArchiveFile, ArchiveLogFile from nomad.utils import get_logger from nomad.processing import Upload, NotAllowedDuringProcessing from nomad.repo import RepoCalc @@ -617,6 +617,53 @@ class RepoCalcsRes(Resource): } +@app.route('%s/logs/<string:upload_hash>/<string:calc_hash>' % base_path, methods=['GET']) +def get_calc_proc_log(upload_hash, calc_hash): + """ + Get calculation processing log. Calcs are references via *upload_hash*, *calc_hash* + pairs. + + .. :quickref: archive; Get calculation data in archive form. + + **Example request**: + + .. sourcecode:: http + + GET /nomad/api/logs/W36aqCzAKxOCfIiMFsBJh3nHPb4a/7ddvtfRfZAvc3Crr7jOJ8UH0T34I HTTP/1.1 + Accept: application/json + + :param string upload_hash: the hash of the upload (from uploaded file contents) + :param string calc_hash: the hash of the calculation (from mainfile) + :resheader Content-Type: application/json + :status 200: calc successfully retrieved + :status 404: calc with given hashes does not exist + :returns: the log data, a line by line sequence of structured logs + """ + archive_id = '%s/%s' % (upload_hash, calc_hash) + + try: + archive = ArchiveLogFile(archive_id) + arhchive_path = archive.os_path + + rv = send_file( + arhchive_path, + mimetype='application/text', + as_attachment=True, + attachment_filename=os.path.basename(arhchive_path)) + + return rv + except KeyError: + abort(404, message='Archive/calculation %s does not exist.' % archive_id) + except FileNotFoundError: + abort(404, message='Archive/calculation %s does not exist.' % archive_id) + except Exception as e: + logger = get_logger( + __name__, endpoint='logs', action='get', + upload_hash=upload_hash, calc_hash=calc_hash) + logger.error('Exception on accessing calc proc log', exc_info=e) + abort(500, message='Could not accessing the logs.') + + @app.route('%s/archive/<string:upload_hash>/<string:calc_hash>' % base_path, methods=['GET']) def get_calc(upload_hash, calc_hash): """ diff --git a/nomad/files.py b/nomad/files.py index 457ac638c0e514a870ae10448fa47f611bd722e0..76a6252b0c73f2e3db34849e1003f7317dd3f8c7 100644 --- a/nomad/files.py +++ b/nomad/files.py @@ -48,6 +48,7 @@ from contextlib import contextmanager import gzip import io import shutil +import logging from nomad import config, utils @@ -316,3 +317,21 @@ class ArchiveFile(File): """ Delete all archives of one upload with the given hash. """ bucket = config.files.archive_bucket Objects.delete_all(bucket, upload_hash) + + +class ArchiveLogFile(File): + """ + Represents a log file that was created for processing a single calculation to create + an archive. Provides a loghandler that can be used to write to this logfile. + Logfiles are stored within the *archive_bucket* alongside the archive files. + """ + def __init__(self, archive_id: str) -> None: + super().__init__( + bucket=config.files.archive_bucket, + object_id=archive_id, + ext='log') + + def create_loghandler(self): + fh = logging.FileHandler(self.os_path, 'w') + fh.setLevel(logging.DEBUG) + return fh diff --git a/nomad/processing/data.py b/nomad/processing/data.py index 493e5e31365cded82f69dd79e44f27b173c98d6b..42d68605c1e7d73d082e906592e973bcfe283c10 100644 --- a/nomad/processing/data.py +++ b/nomad/processing/data.py @@ -33,7 +33,7 @@ import base64 import time from nomad import config, utils -from nomad.files import UploadFile, ArchiveFile +from nomad.files import UploadFile, ArchiveFile, ArchiveLogFile from nomad.repo import RepoCalc from nomad.user import User from nomad.processing.base import Proc, Chord, process, task, PENDING, SUCCESS, FAILURE, RUNNING @@ -78,6 +78,7 @@ class Calc(Proc): super().__init__(*args, **kwargs) self._parser_backend = None self._upload = None + self._loghandler = None @classmethod def get(cls, id): @@ -110,6 +111,11 @@ class Calc(Proc): logger = logger.bind( upload_id=self.upload_id, mainfile=self.mainfile, upload_hash=upload_hash, calc_hash=calc_hash, **kwargs) + + if self._loghandler is None: + self._loghandler = ArchiveLogFile(self.archive_id).create_loghandler() + + logger.addHandler(self._loghandler) return logger @property @@ -127,14 +133,24 @@ class Calc(Proc): @process def process(self): self._upload = Upload.get(self.upload_id) + logger = self.get_logger() if self._upload is None: - self.get_logger().error('calculation upload does not exist') + logger.error('calculation upload does not exist') try: self.parsing() self.normalizing() self.archiving() finally: + # close open loghandler + try: + if self._loghandler is not None: + self._loghandler.close() + self._loghandler = None + except Exception as e: + logger.error('could not close calculation proc log', exc_info=e) + + # inform parent proc about completion self._upload.completed_child() @task diff --git a/nomad/utils.py b/nomad/utils.py index 97d4e4d0f130d43adb39ef7e9f89219b4e5a3cab..fa48143ae9fe482007950cad561ff1aad90da07e 100644 --- a/nomad/utils.py +++ b/nomad/utils.py @@ -110,7 +110,10 @@ if not _logging_is_configured: logger.setLevel(logging.DEBUG) return logger - structlog.configure(processors=log_processors, logger_factory=logger_factory) + structlog.configure( + processors=log_processors, + logger_factory=logger_factory, + wrapper_class=structlog.stdlib.BoundLogger) # configure logging in general logging.basicConfig(level=logging.DEBUG, stream=sys.stdout) diff --git a/tests/processing/test_data.py b/tests/processing/test_data.py index 396a93154c5a7857f339b12708bb40b187e42ee9..beab5c110931c7aa4935b3c9ad34b096a85f14ed 100644 --- a/tests/processing/test_data.py +++ b/tests/processing/test_data.py @@ -25,7 +25,7 @@ import shutil import os.path from nomad import config -from nomad.files import UploadFile +from nomad.files import UploadFile, ArchiveFile, ArchiveLogFile from nomad.processing import Upload, Calc from nomad.processing.base import task as task_decorator from nomad.user import me @@ -78,6 +78,8 @@ def assert_processing(upload: Upload): assert calc.parser is not None assert calc.mainfile is not None assert calc.status == 'SUCCESS', calc.archive_id + assert ArchiveFile(calc.archive_id).exists() + assert ArchiveLogFile(calc.archive_id).exists() assert len(calc.errors) == 0 diff --git a/tests/test_api.py b/tests/test_api.py index 7543c0cc8cf48b9d66622a362203056b03238eea..918985dc9a33e6744c7ed624e48e44e972f0a33a 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -21,7 +21,7 @@ from nomad.processing import Upload # noqa from tests.processing.test_data import example_files # noqa # import fixtures -from tests.test_files import clear_files, archive, archive_config # noqa pylint: disable=unused-import +from tests.test_files import clear_files, archive, archive_log, archive_config # noqa pylint: disable=unused-import from tests.test_normalizing import normalized_template_example # noqa pylint: disable=unused-import from tests.test_parsing import parsed_template_example # noqa pylint: disable=unused-import from tests.test_repo import example_elastic_calc # noqa pylint: disable=unused-import @@ -186,12 +186,13 @@ def test_processing(client, file, mode, worker, mocksearch, test_user_auth, no_w assert len(upload['tasks']) == 4 assert upload['status'] == 'SUCCESS' assert upload['current_task'] == 'cleanup' + assert UploadFile(upload['upload_id']).exists() calcs = upload['calcs']['results'] for calc in calcs: assert calc['status'] == 'SUCCESS' assert calc['current_task'] == 'archiving' assert len(calc['tasks']) == 3 - assert UploadFile(upload['upload_id']).exists() + assert client.get('/logs/%s' % calc['archive_id']).status_code == 200 if upload['calcs']['pagination']['total'] > 1: rv = client.get('%s?page=2&per_page=1&order_by=status' % upload_endpoint) @@ -276,6 +277,13 @@ def test_get_archive(client, archive, no_warn): assert rv.status_code == 200 +def test_get_calc_proc_log(client, archive_log, no_warn): + rv = client.get('/logs/%s' % archive_log.object_id) + + assert len(rv.data) > 0 + assert rv.status_code == 200 + + def test_get_non_existing_archive(client, no_warn): rv = client.get('/archive/%s' % 'doesnt/exist') assert rv.status_code == 404 diff --git a/tests/test_files.py b/tests/test_files.py index 3abcebd7aec7b40bf623340c0e4024afe57aac4e..fef5b8fe2b6ab25793998839d9bfc03c9bdf2ba2 100644 --- a/tests/test_files.py +++ b/tests/test_files.py @@ -15,9 +15,10 @@ import pytest import json import shutil +import logging -from nomad.files import Objects, ArchiveFile, UploadFile -import nomad.config as config +from nomad.files import Objects, ArchiveFile, UploadFile, ArchiveLogFile +from nomad import config, utils # example_file uses an artificial parser for faster test execution, can also be # changed to examples_vasp.zip for using vasp parser @@ -145,3 +146,24 @@ class TestUploadFile: with upload_same_file: assert hash == upload_same_file.hash() + + +@pytest.fixture(scope='function') +def archive_log(clear_files, archive_config): + archive_log = ArchiveLogFile('__test_upload_hash/__test_calc_hash') + archive_loghandler = archive_log.create_loghandler() + logger = utils.get_logger('test') + logger.addHandler(archive_loghandler) + logger.setLevel(logging.DEBUG) + logger.debug('This is a test') + archive_loghandler.close() + + yield archive_log + + +class TestArchiveLogFile: + + def test_archive_log_file(self, archive_log): + assert archive_log.exists() + log_entry = json.loads(archive_log.open('rt').read()) + assert log_entry['event'] == 'This is a test'