From f40d3738bb15760f7effab55ac161d2839f583a7 Mon Sep 17 00:00:00 2001
From: Daniel Lehmberg <d.lehmberg@tum.de>
Date: Wed, 17 Jan 2024 12:18:02 +0000
Subject: [PATCH] Integrate statistics logger to logtransfer

---
 docs/howto/oasis/install.md                   |  14 +-
 nomad/cli/admin/run.py                        |  31 ++-
 nomad/config/models.py                        |  21 +-
 nomad/logtransfer.py                          | 182 ++++++++++--------
 nomad/statistics_logger.py                    | 103 ++++++++++
 .../nomad-oasis/configs/nomad.yaml            |   2 +-
 tests/conftest.py                             |  19 +-
 tests/logtransfer/conftest.py                 |  40 +++-
 tests/logtransfer/test_logtransfer.py         |  27 ++-
 tests/logtransfer/test_statistics_logger.py   |  53 +++++
 10 files changed, 369 insertions(+), 123 deletions(-)
 create mode 100644 nomad/statistics_logger.py
 create mode 100644 tests/logtransfer/test_statistics_logger.py

diff --git a/docs/howto/oasis/install.md b/docs/howto/oasis/install.md
index f581cdd1b8..0191e7d4fd 100644
--- a/docs/howto/oasis/install.md
+++ b/docs/howto/oasis/install.md
@@ -10,12 +10,10 @@ central NOMAD installation.
 
 !!! note
 
-    **Register your oasis**
-
-    If you installed (or even just plan to install) a NOMAD Oasis, please take
-    the time to register your Oasis with FAIRmat. This will help us to assist
-    you in an problems and keep you updated on new releases. You can register
-    by filling out this [simple form](https://www.fairmat-nfdi.eu/fairmat/oasis_registration){:target="_blank"}.
+    **Register your Oasis**
+    If you installed (or even just plan to install) a NOMAD Oasis, please
+    [register your Oasis with FAIRmat](https://www.fairmat-nfdi.eu/fairmat/oasis_registration) 
+    and help us to assist you in the future.
 
 ## Quick-start
 
@@ -80,10 +78,10 @@ RAM and CPU for running tools like jupyter, if you opt to use NOMAD NORTH.
 
 ### Sharing data through the logtransfer service and data privacy notice
 
-The NOMAD includes a `logtransfer` service. When enabled this service automatically collects
+NOMAD includes a `logtransfer` service. When enabled this service automatically collects
 and transfers non-personalized log-data to us. Currently, this service is experimental
 and requires opt-in. However, in upcoming versions of NOMAD Oasis, we might change to out-out.
-See the instructions in the configuration below on how to enable/disable the `logtransfer`.
+See the instructions in the configuration below on how to enable/disable `logtransfer`.
 
 The service collects log-data and aggregated statistics, such as the number of users or the
 number of uploaded datasets. In any case this data does not personally identify any users or
diff --git a/nomad/cli/admin/run.py b/nomad/cli/admin/run.py
index d52a0666a1..c190616867 100644
--- a/nomad/cli/admin/run.py
+++ b/nomad/cli/admin/run.py
@@ -54,14 +54,41 @@ def app(with_gui: bool, **kwargs):
 
 
 @run.command(
-    help='Run server that collects and submits logs to the central Nomad instance.'
+    help='Run service to collect and submits logs to the central Nomad instance.'
 )
 def logtransfer():
     config.meta.service = 'logtransfer'
 
     from nomad.logtransfer import start_logtransfer_service
+    from nomad.statistics_logger import start_statistics_logger_process
 
-    start_logtransfer_service()
+    if not config.logstash.enabled:
+        raise RuntimeError(
+            'To run the logtransfer service it is required that '
+            f'logstash formatting is enabled (found {config.logstash.enabled=}).'
+        )
+
+    is_logtransfer_enabled = config.logtransfer.enable_logtransfer
+    is_statistics_logger_enabled = config.logtransfer.enable_statistics
+
+    if not is_logtransfer_enabled and is_statistics_logger_enabled:
+        raise ValueError(
+            f'If {config.logtransfer.enable_statistics=} then the logstash '
+            f'service must also be enabled (Got: {config.logtransfer.enable_logtransfer=})'
+        )
+
+    if config.logtransfer.enable_statistics:
+        statistics_process = start_statistics_logger_process()
+    else:
+        statistics_process = None
+
+    if config.logtransfer.enable_logtransfer:
+        start_logtransfer_service()
+
+    if config.logtransfer.enable_statistics:
+        if statistics_process.is_alive():
+            statistics_process.kill()
+        statistics_process.join()
 
 
 def run_app(
diff --git a/nomad/config/models.py b/nomad/config/models.py
index d18fd71e27..89a59d2b58 100644
--- a/nomad/config/models.py
+++ b/nomad/config/models.py
@@ -501,16 +501,20 @@ class Logstash(NomadSettings):
 
 
 class Logtransfer(NomadSettings):
-    """
-    Configuration of logtransfer server.
+    """Configuration of logtransfer and statistics service.
 
     Note that other configurations are also used within logtransfer
 
     * class Logstash (Configs: enabled, host, level, tcp_port) such that logs are send to the logstash proxy
-    * class Oasis (Config: central_nomad_api_url) address to which so send the logs
-    * class FS (Config: tmp) path where collected logfiles are stored.
+    * class Oasis (Config: central_nomad_api_url) address to which the logs are sent to
+    * class FS (Config: tmp) path where collected logfiles are stored until they are transferred
     """
 
+    # for logtransfer, see nomad/logtransfer.py
+    enable_logtransfer: bool = Field(
+        False,
+        description='If enabled this starts process that frequently generates logs with statistics.',
+    )
     submit_interval: int = Field(
         60 * 60 * 24,
         description='Time interval in seconds after which logs are transferred.',
@@ -531,6 +535,15 @@ class Logtransfer(NomadSettings):
         False,
         description='Whether to keep the server alive if an unexpected exception is raised. Set to True for testing.',
     )
+    # for statistics (which are submitted to logstash/logtransfer), see nomad/statistics.py
+    enable_statistics: bool = Field(
+        True,
+        description='If enabled this starts a process that frequently generates logs with statistics.',
+    )
+    statistics_interval: int = Field(
+        60 * 60 * 24,
+        description='Time interval in seconds in which statistics are logged.',
+    )
 
 
 class Tests(NomadSettings):
diff --git a/nomad/logtransfer.py b/nomad/logtransfer.py
index ced11999ab..c3682d3fb6 100644
--- a/nomad/logtransfer.py
+++ b/nomad/logtransfer.py
@@ -1,3 +1,21 @@
+#
+# Copyright The NOMAD Authors.
+#
+# This file is part of NOMAD. See https://nomad-lab.eu for further info.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
 import logging
 import socketserver
 from logging.handlers import RotatingFileHandler
@@ -11,19 +29,16 @@ from datetime import datetime, timedelta
 
 from nomad import config
 
-# It is important to make sure that logstash itself is disabled (otherwise it can happen that logs from the logtransfer
-# server are also included to the logs
-config.logstash.enabled = False
+# For logtransfer no Python logging is used, but instead prints. The reason is that logging can interfere with
+# Nomad logging and may be send to
+enable_log_print = True
 
 
-# logger only for the logstash server (none of the logs should be transferred to the central Nomad instance)
-server_logger = logging.getLogger(name='logtransfer_server')
-server_logger.setLevel(logging.INFO)
-server_handler = logging.StreamHandler()
-server_handler.setFormatter(
-    logging.Formatter('%(levelname)s: %(asctime)s %(message)s', '%Y-%m-%d %H:%M:%S')
-)
-server_logger.addHandler(server_handler)
+def log_print(message, level='INFO'):
+    if enable_log_print:
+        current_time = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
+        formatted_message = f'{level}: {current_time} {message}'
+        print(formatted_message)
 
 
 def get_log_filepath():
@@ -41,8 +56,9 @@ def get_all_rotated_logfiles():
 
     where [index] is an integer (the higher the index the older the logs in the file).
 
-    In contrast, for the active logfile, where logs may still be written, [index] is empty.
-    The list is sorted such that the oldest file with the highest index appears first.
+    In contrast, for the active logfile, where logs may still be written,
+    [index] is empty. The list is sorted such that the oldest file with the
+    highest index appears first.
     """
     rotated_logfiles = []
 
@@ -66,8 +82,9 @@ def clear_logfiles():
     """
     Perform a rollover on the current logfile and then remove all rotated logfiles.
 
-    Note that if the logger is still used than this operation is unsafe and potentially breaks the logger. This
-    function is mainly intended for testing.
+    Note that if the logger is still used than this operation is unsafe and
+    potentially breaks the logger. This function is mainly intended for
+    testing.
     """
 
     try:
@@ -94,19 +111,21 @@ def is_empty_logfile():
 class NotifyRotatingFileHandler(RotatingFileHandler):
     """Adapted `RotatingFileHandler` used within in the `Logtransfer` class.
 
-    This adaptation of RotatingFileHandler does not perform a file rollover when `maxBytes` is reached. Instead, it
-    notifies the Logtransfer class thread. This is because data is being sent to the central Nomad, and performing a
-    file rollover at the same time could result in file conflicts and race conditions.
+    This adaptation of RotatingFileHandler does not perform a file rollover
+    when `maxBytes` is reached. Instead, it notifies the Logtransfer class
+    thread. This is because data is being sent to the central Nomad, and
+    performing a file rollover at the same time could result in file conflicts
+    and race conditions.
 
-    The Lock is needed to protect `shouldRollover()` and `doRollover()` as they are not thread-safe (see attribute
-    `stream` in the handler)."""
+    The Lock is needed to protect `shouldRollover()` and `doRollover()` as they
+    are not thread-safe (see attribute `stream` in the handler)."""
 
     rollover_lock = threading.Lock()
     notify_rollover_event: threading.Event = None
 
     def set_event_attribute(self, event: threading.Event):
-        """Set the event to the class on which this handler can notify the thread to safely perform the file
-        rollover."""
+        """Set the event to the class on which this handler can notify the
+        thread to safely perform the file rollover."""
         self.notify_rollover_event = event
 
     def is_event_attribute_set(self):
@@ -115,15 +134,17 @@ class NotifyRotatingFileHandler(RotatingFileHandler):
     def emit(self, record) -> None:
         """Emit a record.
 
-        Output the record to the file, if `maxBytes` (see `RotatingFileHandler`) is reached a
-        `threading.Event` is set (for the event see `Logtransfer.transfer_logs`.
+        Output the record to the file, if `maxBytes` (see
+        `RotatingFileHandler`) is reached a `threading.Event` is set (for the
+        event see `Logtransfer.transfer_logs`.
         """
         try:
-            with self.rollover_lock:  # shouldRollover and doRollover are not thread-safe
-                # mypy does not recognize the method in the superclass
+            # shouldRollover and doRollover are not thread-safe (needs lock)
+            with self.rollover_lock:
+                # Ignore since mypy does not recognize the method in the superclass
                 if self.shouldRollover(record):  # type: ignore
-                    # print(f'notify main logstransfer thread self.stream.tell()={self.stream.tell()}')
-                    # notify a thread which is responsible for the rollover (and other tasks)
+                    # notify a thread which is responsible for the rollover
+                    # (and other tasks)
                     self.notify_rollover_event.set()
 
                 # write to file
@@ -136,8 +157,8 @@ class Logtransfer:
     """
     Responsible for rotating the logfile and transfering the logs to central Nomad.
 
-    The main method is `transfer_logs` which is intended to run in a thread (concurrently to the logstash proxy server
-    that receives and writes new logs).
+    The main method is `transfer_logs` which is intended to run in a thread
+    (concurrently to the logstash proxy server that receives and writes new logs).
     """
 
     def __init__(self, rotating_file: NotifyRotatingFileHandler):
@@ -149,7 +170,8 @@ class Logtransfer:
 
         if self.submit_interval <= 0 and not isinstance(self.submit_interval, int):
             raise TypeError(
-                f'submit_interval must be a positive integer value. Got {self.submit_interval} '
+                'submit_interval must be a positive integer value. '
+                f'Got {self.submit_interval} '
                 f'with type {type(self.submit_interval)}'
             )
 
@@ -166,8 +188,9 @@ class Logtransfer:
         all_rotated_logfiles = get_all_rotated_logfiles()
 
         if len(all_rotated_logfiles) > 0:
-            server_logger.info(
-                f'collected files: {[os.path.basename(f) for f in all_rotated_logfiles]} in '
+            log_print(
+                f'collected files: '
+                f'{[os.path.basename(f) for f in all_rotated_logfiles]} in '
                 f'directory {config.fs.tmp}'
             )
 
@@ -177,7 +200,7 @@ class Logtransfer:
 
                 yield file_content, path
         else:
-            server_logger.info('No logfiles to submit.')
+            log_print('No logfiles to submit.')
 
     def _submit_logfile_to_central(self, logfile_content: bytes):
         central_post_federation_url = f'{self.central_nomad_api_url}federation/logs/'
@@ -195,12 +218,12 @@ class Logtransfer:
 
             if is_successful:
                 submitted_bytes = ret_request.json()['filesize']
-                server_logger.info(
+                log_print(
                     f'Successfully submitted logfile ({submitted_bytes} bytes) with HTTP status code '
                     f'{ret_request.status_code} to central Oasis at {ret_request.url}.'
                 )
             else:
-                server_logger.info(
+                log_print(
                     f'Submission of logfiles to {central_post_federation_url} failed with HTTP '
                     f'status code {ret_request.status_code}. \n '
                     'logfiles will be included again in next submission.'
@@ -208,7 +231,7 @@ class Logtransfer:
 
         except requests.exceptions.ConnectionError:
             is_successful = False
-            server_logger.info(
+            log_print(
                 f'HTTP connection to {central_post_federation_url} could not be established'
             )
 
@@ -222,8 +245,8 @@ class Logtransfer:
 
     def transfer_logs(self) -> None:
         # (mypy does not recognize the superclass attribute)
-        server_logger.info(
-            f'Start logtransfer thread with '  # type: ignore
+        log_print(
+            'Start logtransfer thread with '  # type: ignore
             f'{self.submit_interval=} sec | '
             f'{self.rotating_file.maxBytes=} bytes'
         )
@@ -244,34 +267,36 @@ class Logtransfer:
             except Exception as e:
                 if not self.raise_unexpected_exceptions:
                     # do not kill the thread and hope it will fix for the next iteration
-                    server_logger.info(f'unexpected exception was raised \n{e}')
+                    log_print(f'unexpected exception was raised \n{e}')
                 else:
                     raise e
 
             next_submission = datetime.now() + timedelta(seconds=self.submit_interval)
             next_submission = next_submission.strftime('%Y-%m-%dT%H:%M:%S')  # type: ignore
-            server_logger.info(f'The next planned submission is at {next_submission}.')
+            log_print(f'The next planned submission is at {next_submission}.')
 
             self.reached_max_bytes_event.clear()
             self.reached_max_bytes_event.wait(self.submit_interval)
             is_notified = self.reached_max_bytes_event.is_set()
 
-            server_logger.info(f'Thread is waking up (is_notified={is_notified}).')
+            log_print(f'Thread is waking up (is_notified={is_notified}).')
 
             with self.rotating_file.rollover_lock:
                 # mypy does not recognize the superclass attribute
                 if self.rotating_file.stream.tell() != 0:  # type: ignore
-                    server_logger.info(
+                    log_print(
                         'Perform rollover on logfile. stream position in '  # type: ignore
                         f'bytes={self.rotating_file.stream.tell()}'
                     )
 
-                    # Note: this should be the only place where doRollover() is called. A call outside this thread
-                    # causes a race condition on the methods where the logfiles are submitted and removed
+                    # Note: this should be the only place where doRollover() is called.
+                    # A call outside this thread causes a race condition on the methods
+                    # where the logfiles are submitted and removed
                     self.rotating_file.doRollover()
                 else:
-                    server_logger.info(
-                        'No rollover on logfile performed because rotating_file.stream.tell() is at position 0.'
+                    log_print(
+                        'No rollover on logfile performed because '
+                        'rotating_file.stream.tell() is at position 0.'
                     )
 
 
@@ -283,10 +308,12 @@ class LogstashTCPHandler(socketserver.StreamRequestHandler):
             logline = self.rfile.readline()
 
             if logline == b'':
-                # "empty byte" signals that client closed connection, nothing more to read
+                # "empty byte" signals that client closed connection, nothing
+                # more to read
                 break
 
-            # remove newlines from the right, because a newline is also internally included in the logstash_logger
+            # remove newlines from the right, because a newline is also
+            # internally included in the logstash_logger
             logline = logline.rstrip()
 
             if len(logline) > 0:
@@ -296,26 +323,18 @@ class LogstashTCPHandler(socketserver.StreamRequestHandler):
         self.server.rotating_file_handler.flush()
 
 
-class TCPServerReuseAddress(socketserver.TCPServer):
+class TCPServerReuseAddress(socketserver.ThreadingTCPServer):
     """
     This class overwrites default class parameters of TCPServer.
-    This is recommended at https://stackoverflow.com/a/42147927)
+    This is recommended at https://stackoverflow.com/a/42147927
     """
 
-    # TODO: consider also if it is better to inherit from ThreadingTCPServer because (from Python docu):
-    #  https://docs.python.org/3/library/socketserver.html#socketserver.ThreadingTCPServer
-    #  On the other hand, if you are building an HTTP server where all data is stored externally (for instance, in the
-    #  file system), a synchronous class will essentially render the service â€œdeafâ€ while one request is being handled
-    #  â€“ which may be for a very long time if a client is slow to receive all the data it has requested. Here a
-    #  threading or forking server prevents this.
-    #  -- BUT NOTE: this may not work for Windows, because the ThreadingMixIn is only supported for POSIX systems
-
     # this allows to quickly set up the server again after it may have terminated
     allow_reuse_port = True
     allow_reuse_address = True
-    request_queue_size = (
-        20  # default is 5, set to higher value here to avoid ConnectionRefused errors
-    )
+    # default value of queue_size is 5, set to higher value here to avoid
+    # ConnectionRefused errors
+    request_queue_size = 20
 
     def __init__(self, logger, *args, **kwargs):
         super(TCPServerReuseAddress, self).__init__(*args, **kwargs)
@@ -330,37 +349,40 @@ def _start_logstash_proxy_server(logger, host, port):
             port = int(port)
         except ValueError:
             raise TypeError(
-                f'Server port must be of type integer (or parseable string). Got port={port} of type {type(port)}'
+                'Server port must be of type integer (or parseable string). '
+                f'Got port={port} of type {type(port)}'
             )
 
     logstash_proxy_server = TCPServerReuseAddress(
         logger, (host, port), LogstashTCPHandler, bind_and_activate=True
     )
-    logstash_proxy_server.timeout = (
-        30  # is closed after timeout period with no requests being received
-    )
+
+    # is closed after timeout period with no requests being received
+    logstash_proxy_server.timeout = 30
 
     with logstash_proxy_server as server:
-        server_logger.info(f'Start logstash proxy server on host={host} port={port}.')
+        log_print(f'Start logstash proxy server on host={host} port={port}.')
 
         try:
-            # NOTE: don't use the serve_forever() method as it does not account for the timeout
+            # NOTE: don't use the serve_forever() method as it does not account
+            # for the timeout
             while True:
                 server.handle_request()
         except Exception as e:
-            server_logger.info('logstash proxy server shutdown unexpectedly')
+            log_print('logstash proxy server shutdown unexpectedly')
             raise e
 
 
 def _initialize_logger_and_handler():
-    server_logger.info(
+    log_print(
         f'Initialize logger and handler. Location of intermediate logfiles before '
         f'submission:\n{get_log_filepath()}'
     )
     logstash_logger = logging.getLogger(name='logstash')
-    # Note that backupCount must be a positive number (otherwise no file rollovers is performed).
-    # the number should for performance reasons not be too large. If backupCount is reached then the oldest log file
-    # is deleted. See https://stackoverflow.com/a/56954614
+    # Note that backupCount must be a positive number (otherwise no file
+    # rollovers is performed). the number should for performance reasons not be
+    # too large. If backupCount is reached then the oldest log file is deleted.
+    # See https://stackoverflow.com/a/56954614
     rotating_logfile_handler = NotifyRotatingFileHandler(
         filename=get_log_filepath(),
         mode='a',
@@ -379,13 +401,11 @@ def start_logtransfer_service(host=None, port=None):
     """
     Start logtransfer service.
 
-    The two parameters host and port are mainly necessary for testing. If left None, then the values from the
-    Nomad config are used.
+    The two parameters 'host' and 'port' are mainly necessary for testing. If
+    left None, Nomad config is taken.
     """
-    # for appropriate arguments see attributes in
-    # nomad.config.logstash and nomad.config.logstash_proxy and config.oasis.central_nomad_api_url
 
-    server_logger.info(f'Start logtransfer service')
+    log_print('Start logtransfer service')
 
     logstash_logger, rotating_logfile_handler = _initialize_logger_and_handler()
 
@@ -396,9 +416,9 @@ def start_logtransfer_service(host=None, port=None):
     assert rotating_logfile_handler.is_event_attribute_set()
 
     d = threading.Thread(target=transfer_to_central.transfer_logs, name='logtransfer')
-    d.setDaemon(
-        True
-    )  # kill thread immediately if main thread (running the server) terminates
+
+    # kill thread immediately if main thread (running the server) terminates
+    d.setDaemon(True)
     d.start()
 
     if host is None:
diff --git a/nomad/statistics_logger.py b/nomad/statistics_logger.py
new file mode 100644
index 0000000000..caa53f86c7
--- /dev/null
+++ b/nomad/statistics_logger.py
@@ -0,0 +1,103 @@
+#
+# Copyright The NOMAD Authors.
+#
+# This file is part of NOMAD. See https://nomad-lab.eu for further info.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+import logging
+
+import requests
+import threading
+import time
+from multiprocessing import Process
+
+from nomad import config
+from nomad import utils
+
+
+# The logger (typically with LogstashHandler enabled in the root logger) to
+# submit the logs.
+statistics_logstash_log = utils.get_logger(__name__)
+
+
+class StatisticsLogger:
+    """
+    Service that frequently collects statistics and logs them in logstash format.
+
+    Currently, the statistics are collected from the local FastAPI enpoint /info.
+    The statistics logs are eventually transferred to
+    * logstash directly (on central NOMAD)
+    * logtransfer (on a NOMAD Oasis, see logtransfer.py).
+
+    In the future it is also possible to collect statistics from different sources
+    or perform aggregations.
+    """
+
+    def __init__(self):
+        self.collect_interval = config.logtransfer.statistics_interval
+        self.raise_exceptions = config.logtransfer.raise_unexpected_exceptions
+        self.stop_thread_event = threading.Event()
+
+    def _collect_statistics(self):
+        """Collect statistics from the FastAPI /info endpoint."""
+        info_response = requests.get(
+            f'http://localhost:{config.services.api_port}/api/v1/info'
+        )
+        statistics = info_response.json()['statistics']
+        return statistics
+
+    def run_statistics_service(self):
+        """Main loop of the statistics service."""
+
+        while True:
+            try:
+                statistics = self._collect_statistics()
+
+                for key, value in statistics.items():
+                    statistics_logstash_log.info('statistics', key=key, value=value)
+
+            except Exception as e:
+                if self.raise_exceptions:
+                    raise e
+
+            # The event can be set to terminate the loop. This is mainly
+            # required for testing.
+            import sys
+
+            sys.stdout.flush()
+            time.sleep(self.collect_interval)
+
+
+def start_statistics_logger(fork_mode='process'):
+    """
+    Initializes and starts a new process of the statistics logger.
+    The returned object is the running process or thread.
+    """
+
+    statistics_logger = StatisticsLogger()
+
+    if fork_mode == 'process':
+        process = Process(target=statistics_logger.run_statistics_service)
+        process.start()
+        return process
+    elif fork_mode == 'thread':
+        statistics_thread = threading.Thread(
+            target=statistics_logger.run_statistics_service
+        )
+
+        statistics_thread.setDaemon(True)
+        statistics_thread.start()
+        return statistics_thread
+    else:
+        raise ValueError(f'{fork_mode=} is not valid (select "process" or "thread").')
diff --git a/ops/docker-compose/nomad-oasis/configs/nomad.yaml b/ops/docker-compose/nomad-oasis/configs/nomad.yaml
index db2189e1bf..6418fd5e12 100644
--- a/ops/docker-compose/nomad-oasis/configs/nomad.yaml
+++ b/ops/docker-compose/nomad-oasis/configs/nomad.yaml
@@ -15,7 +15,7 @@ meta:
   maintainer_email: 'me@my-oasis.org'
 
 logstash:
-  enable: false
+  enabled: false
 
 mongo:
     db_name: nomad_oasis_v1
diff --git a/tests/conftest.py b/tests/conftest.py
index c1ce1a9e50..d9e19c8f53 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -24,6 +24,7 @@ import time
 from datetime import datetime, timezone
 import shutil
 import os
+import socket
 import elasticsearch.exceptions
 import json
 import logging
@@ -1330,14 +1331,24 @@ def central_logstash_mock():
     class TCPServerStore(socketserver.TCPServer):
         received_content = []
 
+        def set_request_timeout(self, timeout):
+            # Note acts timeout is on the LogstashCentralHandler socket
+            # this seems to behave differently to self.timeout and is
+            # particularly useful to interrupt blocking "handle_request()"
+            self.RequestHandlerClass.timeout = timeout
+
     class LogstashCentralHandler(socketserver.StreamRequestHandler):
         def handle(self):
-            # print("OPENING SOCKET TO LogstashCentralHandler")
             while True:
-                line = self.rfile.readline()
-                # print(f"received line {line}")
+                try:
+                    line = self.rfile.readline()
+                    # print(f'received {line=}')
+                except socket.timeout:
+                    # print(f'server timed out')
+                    line = b''  # if time out, close connection
+
                 if line == b'':
-                    # print("received closing for LogstashCentralHandler")
+                    # print(f'received closing for LogstashCentralHandler')
                     break
 
                 line = line.strip()
diff --git a/tests/logtransfer/conftest.py b/tests/logtransfer/conftest.py
index 17440c01f3..7bcd4b6a0a 100644
--- a/tests/logtransfer/conftest.py
+++ b/tests/logtransfer/conftest.py
@@ -13,10 +13,10 @@ logging.getLogger('logtransfer_server').setLevel(level=logging.CRITICAL)
 
 
 def start_logtransfer_service():
-    # Change address to such that api_v1 fixture can replace the address to the testserver
+    # Change address such that api_v1 fixture can replace the address to the testserver
     config.oasis.central_nomad_deployment_url = config.client.url + '/v1'
 
-    # Use a slightly different port to the logstash mock (which uses 5000 by default)
+    # Use a different port to the logstash mock (which uses 5000 by default)
     host, port = config.logstash.host, int(config.logstash.tcp_port) + 1
 
     server_process = Process(
@@ -30,8 +30,8 @@ def start_logtransfer_service():
 
     client_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
 
-    # NOTE: it is in the responsibility of the tests to timeout if no connection can be established
-    #   (use @pytest.mark.timeout( SECONDS ))
+    # NOTE: it is in the responsibility of the tests to timeout if no connection can be
+    #  established (use @pytest.mark.timeout( SECONDS ))
     while True:
         time.sleep(0.001)
         try:
@@ -53,8 +53,21 @@ def kill_logtransfer_service(server_process, client_socket):
 
 
 @pytest.fixture(scope='function')
-def logtransfer_rollover_time(monkeysession):
+def logstash_enabled(monkeysession):
+    monkeysession.setattr('nomad.config.logstash.enabled', True)
+
+    # make sure the root logger has the logstash handler
+    from nomad.utils.structlogging import add_logstash_handler, root
+
+    add_logstash_handler(root)
+
+
+@pytest.fixture(scope='function')
+def logtransfer_rollover_time(api_v1, monkeysession):
+    monkeysession.setattr('nomad.config.logtransfer.enable_statistics', False)
     monkeysession.setattr('nomad.config.logtransfer.submit_interval', 0.1)
+    monkeysession.setattr('nomad.config.logtransfer.raise_unexpected_exceptions', True)
+
     server_process, client_socket = start_logtransfer_service()
 
     yield client_socket
@@ -63,9 +76,11 @@ def logtransfer_rollover_time(monkeysession):
 
 
 @pytest.fixture(scope='function')
-def logtransfer_rollover_space(monkeysession):
+def logtransfer_rollover_space(api_v1, monkeysession):
+    monkeysession.setattr('nomad.config.logtransfer.enable_statistics', False)
     monkeysession.setattr('nomad.config.logtransfer.max_bytes', 500)
     monkeysession.setattr('nomad.config.logtransfer.backup_count', 100)
+    monkeysession.setattr('nomad.config.logtransfer.raise_unexpected_exceptions', True)
 
     server_process, client_socket = start_logtransfer_service()
 
@@ -75,12 +90,19 @@ def logtransfer_rollover_space(monkeysession):
 
 
 @pytest.fixture(scope='function')
-def logtransfer_no_rollover(monkeysession):
-    monkeysession.setattr('nomad.config.logtransfer.max_bytes', 1e100)
-    monkeysession.setattr('nomad.config.logtransfer.submit_interval', 1e100)
+def logtransfer_no_rollover(api_v1, monkeysession):
+    monkeysession.setattr('nomad.config.logtransfer.enable_statistics', False)
+    monkeysession.setattr('nomad.config.logtransfer.max_bytes', 1e8)
+    monkeysession.setattr('nomad.config.logtransfer.submit_interval', 1e8)
+    monkeysession.setattr('nomad.config.logtransfer.raise_unexpected_exceptions', True)
 
     server_process, client_socket = start_logtransfer_service()
 
     yield client_socket
 
     kill_logtransfer_service(server_process, client_socket)
+
+
+@pytest.fixture(scope='function')
+def collect_statistics(monkeysession):
+    monkeysession.setattr('nomad.config.logtransfer.statistics_interval', 1)
diff --git a/tests/logtransfer/test_logtransfer.py b/tests/logtransfer/test_logtransfer.py
index 6fd6b81655..1ef1beb769 100644
--- a/tests/logtransfer/test_logtransfer.py
+++ b/tests/logtransfer/test_logtransfer.py
@@ -9,6 +9,8 @@ import logging
 from nomad.utils.structlogging import LogstashFormatter
 from nomad import config
 
+logtransfer.enable_log_print = False
+
 
 def standard_test_log_record(msg='testmsg') -> bytes:
     record = LogRecord(
@@ -102,9 +104,9 @@ def test_logstash_submit_multiple_logs(
 
     is_successful = False
     while not is_successful:
-        # The logfile may also contain a subset of the messages that were sent at this point
-        # leading to an AssertionError. This loop frequently checks if at some all asserts are True and
-        # then terminates
+        # The logfile may also contain a subset of the messages that were sent
+        # at this point leading to an AssertionError. This loop frequently
+        # checks if at some all asserts are True and then terminates
         if not logtransfer.is_empty_logfile():
             with open(logtransfer.get_log_filepath(), 'rb') as f:
                 file_content = f.read()
@@ -130,7 +132,8 @@ def test_logstash_rollover_time(logtransfer_rollover_time):
     ret1 = logtransfer_rollover_time.send(log1)
     assert ret1 > 0
 
-    # this sleep is needed to trigger the rollover after some time interval (check the fixture for the actual setting)
+    # this sleep is needed to trigger the rollover after some time interval
+    # (check the fixture for the actual setting)
     time.sleep(0.15)
 
     log2 = standard_test_log_record(msg=testlog[1]) + b'\n'
@@ -139,8 +142,8 @@ def test_logstash_rollover_time(logtransfer_rollover_time):
 
     is_successful_assert = False
     while not is_successful_assert:
-        # finish the test as soon as all asserts are passed (this can vary depending on when the server has processed
-        # the log records)
+        # finish the test as soon as all asserts are passed (this can vary
+        # depending on when the server has processed the log records)
         try:
             rolled_log_filepath = logtransfer.get_log_filepath() + '.1'
             assert not logtransfer.is_empty_logfile()
@@ -149,7 +152,6 @@ def test_logstash_rollover_time(logtransfer_rollover_time):
             with open(logtransfer.get_log_filepath(), 'rb') as f:
                 # check active logfile
                 file_content = f.read()
-                # print(f"file_content:\n{file_content}")
                 assert testlog[0].encode() not in file_content
                 assert testlog[1].encode() in file_content
 
@@ -207,9 +209,8 @@ def test_logstash_rollover_space(logtransfer_rollover_space):
         if len(logtransfer.get_all_rotated_logfiles()) > 2:
             break
         else:
-            time.sleep(
-                0.01
-            )  # slow down logging a bit, so that server can perform rollovers
+            # slow down logging a bit, so that server can perform rollovers
+            time.sleep(0.01)
 
     expected_logs = b''.join(expected_logs)
     assert_equal_content_logfiles(expected_logs)
@@ -242,14 +243,11 @@ def test_logtransfer_to_federation_backend(
     with central_logstash_mock as server:
         # server is closed after context manager exits
         ret = logtransfer_rollover_time.send(logstash_line)
-        # print(f"sent logstash line {logstash_line} with ret={ret}")
         server.handle_request()
 
     # only one message was sent
     assert len(central_logstash_mock.received_content) == 1
 
-    # print(f'central_logstash_mock.received_content[0]={central_logstash_mock.received_content[0]}')
-
     # transform to json (also validates that both are valid)
     sent_json = json.loads(logstash_line.rstrip())
     received_json = json.loads(central_logstash_mock.received_content[0])
@@ -258,7 +256,8 @@ def test_logtransfer_to_federation_backend(
     assert 'ip_address' not in sent_json.keys()
     assert 'ip_address' in received_json.keys()
 
-    # remove key and convert back to string to compare that both logs have identical key/values
+    # remove key and convert back to string to compare that both logs have
+    # identical key/values
     received_json.pop('ip_address')
     assert json.dumps(sent_json, sort_keys=True) == json.dumps(
         received_json, sort_keys=True
diff --git a/tests/logtransfer/test_statistics_logger.py b/tests/logtransfer/test_statistics_logger.py
new file mode 100644
index 0000000000..aeef7e8f30
--- /dev/null
+++ b/tests/logtransfer/test_statistics_logger.py
@@ -0,0 +1,53 @@
+import json
+import time
+
+import pytest
+import requests
+
+import nomad.statistics_logger as stats
+
+
+@pytest.mark.timeout(3)
+def test_statistics_service(
+    logstash_enabled, central_logstash_mock, collect_statistics, api_v1, elastic_infra
+):
+    # Note: the "elastic" fixture is required here so that info/ request at the
+    # FastAPI backend actually collects statistics.
+
+    statistics_process = stats.start_statistics_logger(fork_mode='process')
+    # statistics_thread = stats.start_statistics_logger(fork_mode='thread')
+
+    with central_logstash_mock as server:
+        # timeout must be smaller than the number set in fixture collect_statistics
+        timeout = 0.5
+        server.set_request_timeout(timeout)
+
+        # server closes when context manager is left
+        server.handle_request()
+
+    while statistics_process.is_alive():
+        statistics_process.kill()
+        time.sleep(0.01)
+
+    # assert that all current statistics end up in logtransfer file
+    collected_statistics = json.loads(requests.get('info').content)['statistics']
+
+    received_content = []
+    for item in central_logstash_mock.received_content:
+        received_content += [json.loads(item.decode())]
+
+    for expected_key, expected_value in collected_statistics.items():
+        for log_entry in received_content:
+            try:
+                actual_key = log_entry['nomad.statistics_logger.key']
+                actual_value = log_entry['nomad.statistics_logger.value']
+                if actual_key == expected_key and actual_value == expected_value:
+                    # success: found key-value - break inner loop
+                    break
+
+            except KeyError:
+                # there may be other logs that do not contain the statistics keys
+                pass
+        else:
+            # executes if inner-loop was *not* terminated with 'break'
+            assert False, f'Could not find {expected_key=}, {expected_value=} pair.'
-- 
GitLab