From ef8d65392b2bf643fd862e28e8f9f7074b531912 Mon Sep 17 00:00:00 2001
From: Markus Scheidgen <markus.scheidgen@gmail.com>
Date: Tue, 25 Sep 2018 17:58:36 +0200
Subject: [PATCH] Use the right logger for calc proc logs and log formatting.

---
 nomad/files.py                |  6 +----
 nomad/parsing/artificial.py   |  1 +
 nomad/processing/base.py      |  2 +-
 nomad/processing/data.py      | 48 +++++++++++++++++++++++++----------
 tests/processing/test_data.py |  1 +
 tests/test_files.py           | 12 +++------
 6 files changed, 43 insertions(+), 27 deletions(-)

diff --git a/nomad/files.py b/nomad/files.py
index f0ad0b90a6..75c9bca2ef 100644
--- a/nomad/files.py
+++ b/nomad/files.py
@@ -323,7 +323,7 @@ class ArchiveFile(File):
 class ArchiveLogFile(File):
     """
     Represents a log file that was created for processing a single calculation to create
-    an archive. Provides a loghandler that can be used to write to this logfile.
+    an archive.
     Logfiles are stored within the *archive_bucket* alongside the archive files.
     """
     def __init__(self, archive_id: str) -> None:
@@ -332,7 +332,3 @@ class ArchiveLogFile(File):
             object_id=archive_id,
             ext='log')
 
-    def create_loghandler(self):
-        fh = logging.FileHandler(self.os_path, 'w')
-        fh.setLevel(logging.DEBUG)
-        return fh
diff --git a/nomad/parsing/artificial.py b/nomad/parsing/artificial.py
index 5311aa0003..9a14971e33 100644
--- a/nomad/parsing/artificial.py
+++ b/nomad/parsing/artificial.py
@@ -99,6 +99,7 @@ class TemplateParser(ArtificalParser):
         section = template_json['section_run'][0]
         self.add_section(section)
         self.backend.finishedParsingSession('ParseSuccess', [])
+        logger.debug('a test log entry')
         return self.backend
 
 
diff --git a/nomad/processing/base.py b/nomad/processing/base.py
index aff048a833..56d4e08180 100644
--- a/nomad/processing/base.py
+++ b/nomad/processing/base.py
@@ -130,7 +130,7 @@ class Proc(Document, metaclass=ProcMetaclass):
 
     def get_logger(self):
         return utils.get_logger(
-            __name__, current_task=self.current_task, process=self.__class__.__name__,
+            'nomad.processing', current_task=self.current_task, process=self.__class__.__name__,
             status=self.status)
 
     @classmethod
diff --git a/nomad/processing/data.py b/nomad/processing/data.py
index d96f5591c0..f19ea082f1 100644
--- a/nomad/processing/data.py
+++ b/nomad/processing/data.py
@@ -31,6 +31,8 @@ from mongoengine import StringField, BooleanField, DateTimeField, DictField, Int
 import logging
 import base64
 import time
+from structlog import wrap_logger
+from structlog.processors import StackInfoRenderer, format_exc_info, JSONRenderer
 
 from nomad import config, utils
 from nomad.files import UploadFile, ArchiveFile, ArchiveLogFile
@@ -78,7 +80,7 @@ class Calc(Proc):
         super().__init__(*args, **kwargs)
         self._parser_backend = None
         self._upload = None
-        self._loghandler = None
+        self._calc_proc_logwriter = None
 
     @classmethod
     def get(cls, id):
@@ -112,12 +114,32 @@ class Calc(Proc):
             upload_id=self.upload_id, mainfile=self.mainfile,
             upload_hash=upload_hash, calc_hash=calc_hash, **kwargs)
 
-        if self._loghandler is None:
-            self._loghandler = ArchiveLogFile(self.archive_id).create_loghandler()
-
-        logger.addHandler(self._loghandler)
         return logger
 
+    def get_calc_logger(self, **kwargs):
+        """
+        Returns a wrapped logger that additionally saves all entries to the calculation
+        processing log in the archive.
+        """
+        logger = self.get_logger(**kwargs)
+
+        if self._calc_proc_logwriter is None:
+            self._calc_proc_logwriter = ArchiveLogFile(self.archive_id).open('wt')
+
+        def save_to_cacl_log(logger, method_name, event_dict):
+            program = event_dict.get('normalizer', 'parser')
+            event = event_dict.get('event', '')
+            entry = '[%s] %s: %s' % (method_name, program, event)
+            if len(entry) > 120:
+                self._calc_proc_logwriter.write(entry[:120])
+                self._calc_proc_logwriter.write('...')
+            else:
+                self._calc_proc_logwriter.write(entry)
+            self._calc_proc_logwriter.write('\n')
+            return event_dict
+
+        return wrap_logger(logger, processors=[save_to_cacl_log])
+
     @property
     def json_dict(self):
         """ A json serializable dictionary representation. """
@@ -144,9 +166,9 @@ class Calc(Proc):
         finally:
             # close loghandler that was not closed due to failures
             try:
-                if self._loghandler is not None:
-                    self._loghandler.close()
-                    self._loghandler = None
+                if self._calc_proc_logwriter is not None:
+                    self._calc_proc_logwriter.close()
+                    self._calc_proc_logwriter = None
             except Exception as e:
                 logger.error('could not close calculation proc log', exc_info=e)
 
@@ -155,7 +177,7 @@ class Calc(Proc):
 
     @task
     def parsing(self):
-        logger = self.get_logger()
+        logger = self.get_calc_logger()
         parser = parser_dict[self.parser]
         self._parser_backend = parser.run(self.mainfile_tmp_path, logger=logger)
         if self._parser_backend.status[0] != 'ParseSuccess':
@@ -164,7 +186,7 @@ class Calc(Proc):
 
     @task
     def normalizing(self):
-        logger = self.get_logger()
+        logger = self.get_calc_logger()
         for normalizer in normalizers:
             normalizer_name = normalizer.__name__
             normalizer(self._parser_backend).normalize(logger=logger)
@@ -197,9 +219,9 @@ class Calc(Proc):
             self._parser_backend.write_json(out, pretty=True)
 
         # close loghandler
-        if self._loghandler is not None:
-            self._loghandler.close()
-            self._loghandler = None
+        if self._calc_proc_logwriter is not None:
+            self._calc_proc_logwriter.close()
+            self._calc_proc_logwriter = None
 
 
 class Upload(Chord):
diff --git a/tests/processing/test_data.py b/tests/processing/test_data.py
index beab5c1109..a7a17e3eca 100644
--- a/tests/processing/test_data.py
+++ b/tests/processing/test_data.py
@@ -80,6 +80,7 @@ def assert_processing(upload: Upload):
         assert calc.status == 'SUCCESS', calc.archive_id
         assert ArchiveFile(calc.archive_id).exists()
         assert ArchiveLogFile(calc.archive_id).exists()
+        assert 'a test' in ArchiveLogFile(calc.archive_id).open('rt').read()
         assert len(calc.errors) == 0
 
 
diff --git a/tests/test_files.py b/tests/test_files.py
index fef5b8fe2b..2769a310a6 100644
--- a/tests/test_files.py
+++ b/tests/test_files.py
@@ -151,12 +151,9 @@ class TestUploadFile:
 @pytest.fixture(scope='function')
 def archive_log(clear_files, archive_config):
     archive_log = ArchiveLogFile('__test_upload_hash/__test_calc_hash')
-    archive_loghandler = archive_log.create_loghandler()
-    logger = utils.get_logger('test')
-    logger.addHandler(archive_loghandler)
-    logger.setLevel(logging.DEBUG)
-    logger.debug('This is a test')
-    archive_loghandler.close()
+    f = archive_log.open('wt')
+    f.write('This is a test')
+    f.close()
 
     yield archive_log
 
@@ -165,5 +162,4 @@ class TestArchiveLogFile:
 
     def test_archive_log_file(self, archive_log):
         assert archive_log.exists()
-        log_entry = json.loads(archive_log.open('rt').read())
-        assert log_entry['event'] == 'This is a test'
+        assert 'test' in archive_log.open('rt').read()
-- 
GitLab