From a74fdc4055993a17b80c567d175795991b2bf5f9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Hampus=20N=C3=A4sstr=C3=B6m?= <hampus.nasstrom@gmail.com>
Date: Wed, 22 Feb 2023 16:37:24 +0100
Subject: [PATCH] Added Function for Generating NumericalArray Class

---
 nomad/datamodel/metainfo/eln/__init__.py | 132 ++++++++++++++++++++++-
 1 file changed, 131 insertions(+), 1 deletion(-)

diff --git a/nomad/datamodel/metainfo/eln/__init__.py b/nomad/datamodel/metainfo/eln/__init__.py
index e6dfc4f011..df1102916e 100644
--- a/nomad/datamodel/metainfo/eln/__init__.py
+++ b/nomad/datamodel/metainfo/eln/__init__.py
@@ -19,7 +19,7 @@
 import numpy as np
 import datetime
 import re
-from typing import Any, Dict, List
+from typing import Any, Dict, List, Type, Union
 from nomad import utils
 from nomad.units import ureg
 from nomad.datamodel.data import EntryData, ArchiveSection, author_reference, BasicElnCategory
@@ -40,6 +40,136 @@ from nomad.datamodel.metainfo.eln.nexus_data_converter import (
 m_package = Package(name='eln')
 
 
+def numerical_array(unit: str = None, shape: List[Union[str, int]] = None,
+                    dimensionality: int = None) -> Type[ArchiveSection]:
+    '''
+    Function for generating a NumericalArray section with the specified shape and units.
+
+    Args:
+        unit (str, optional): The units of the values in the array. Defaults to None.
+        shape (List[Union[str, int]], optional): The shape of the array. Defaults to None.
+        dimensionality (int, optional): The dimensionality of the array. Defaults to None.
+
+    Raises:
+        ValueError: If both shape and dimensionality are specified but do not match.
+
+    Returns:
+        Type[ArchiveSection]: The NumericalArray class with the specified shape and units.
+    '''
+    if shape is None:
+        if dimensionality is None:
+            dimensionality = 1
+        shape = ['*'] * dimensionality
+    else:
+        if dimensionality is not None and dimensionality != len(shape):
+            raise ValueError(f'Missmatch between dimensionality "{dimensionality}" and '
+                             + f'shape "{shape}"')
+    array_shape = shape
+
+    class NumericalArray(ArchiveSection):
+        '''
+        A section for describing a numerical array with a normalizer for calculating
+        statistical descriptors.
+        '''
+        values = Quantity(
+            type=np.dtype(np.float64),
+            shape=array_shape,
+            description='The values of the numerical array',
+            unit=unit)
+
+        dimensionality = Quantity(
+            type=np.dtype(np.int8),
+            description='The dimensionality of the numerical array')
+
+        shape = Quantity(
+            type=np.dtype(np.int64),
+            shape=[len(array_shape)],
+            description='The shape of the numerical array')
+
+        mean = Quantity(
+            type=np.dtype(np.float64),
+            description='The mean value of the numerical array',
+            unit=unit)
+
+        minimum = Quantity(
+            type=np.dtype(np.float64),
+            description='The minimum value of the numerical array',
+            unit=unit)
+
+        maximum = Quantity(
+            type=np.dtype(np.float64),
+            description='The maximum value of the numerical array',
+            unit=unit)
+
+        standard_deviation = Quantity(
+            type=np.dtype(np.float64),
+            description='The standard deviation of the values in the numerical array',
+            unit=unit)
+
+        first_ventile = Quantity(
+            type=np.dtype(np.float64),
+            description='The first ventile of the values in the numerical array',
+            unit=unit)
+
+        first_quartile = Quantity(
+            type=np.dtype(np.float64),
+            description='The first quartile of the values in the numerical array',
+            unit=unit)
+
+        median = Quantity(
+            type=np.dtype(np.float64),
+            description='The median of the values in the numerical array',
+            unit=unit)
+
+        third_quartile = Quantity(
+            type=np.dtype(np.float64),
+            description='The third quartile of the values in the numerical array',
+            unit=unit)
+
+        nineteenth_ventile = Quantity(
+            type=np.dtype(np.float64),
+            description='The nineteenth ventile of the values in the numerical array',
+            unit=unit)
+
+        def normalize(self, archive, logger: Any) -> None:
+            '''The normalizer for the `NumericalArray` class. If values exist and the
+            statistical descriptors are not filled the normalizer will fill the
+            descriptors using NumPy properties, methods and functions.
+
+            Args:
+                archive (EntryArchive): The archive containing the section that is being
+                normalized.
+                logger (Any): A structlog logger.
+            '''
+            super(NumericalArray, self).normalize(archive, logger)
+
+            if self.values is not None:
+                if self.dimensionality is None:
+                    self.dimensionality = self.values.ndim
+                if self.shape is None:
+                    self.shape = self.values.shape
+                if self.mean is None:
+                    self.mean = self.values.mean()
+                if self.minimum is None:
+                    self.minimum = self.values.min()
+                if self.maximum is None:
+                    self.maximum = self.values.max()
+                if self.standard_deviation is None:
+                    self.standard_deviation = self.values.std()
+                if self.first_ventile is None:
+                    self.first_ventile = np.quantile(self.values, 0.05)
+                if self.first_quartile is None:
+                    self.first_quartile = np.quantile(self.values, 0.25)
+                if self.median is None:
+                    self.median = np.quantile(self.values, 0.5)
+                if self.third_quartile is None:
+                    self.third_quartile = np.quantile(self.values, 0.75)
+                if self.nineteenth_ventile is None:
+                    self.nineteenth_ventile = np.quantile(self.values, 0.95)
+
+    return NumericalArray
+
+
 class User(MSection):
     user = Quantity(
         type=author_reference,
-- 
GitLab