From a74fdc4055993a17b80c567d175795991b2bf5f9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hampus=20N=C3=A4sstr=C3=B6m?= <hampus.nasstrom@gmail.com> Date: Wed, 22 Feb 2023 16:37:24 +0100 Subject: [PATCH] Added Function for Generating NumericalArray Class --- nomad/datamodel/metainfo/eln/__init__.py | 132 ++++++++++++++++++++++- 1 file changed, 131 insertions(+), 1 deletion(-) diff --git a/nomad/datamodel/metainfo/eln/__init__.py b/nomad/datamodel/metainfo/eln/__init__.py index e6dfc4f011..df1102916e 100644 --- a/nomad/datamodel/metainfo/eln/__init__.py +++ b/nomad/datamodel/metainfo/eln/__init__.py @@ -19,7 +19,7 @@ import numpy as np import datetime import re -from typing import Any, Dict, List +from typing import Any, Dict, List, Type, Union from nomad import utils from nomad.units import ureg from nomad.datamodel.data import EntryData, ArchiveSection, author_reference, BasicElnCategory @@ -40,6 +40,136 @@ from nomad.datamodel.metainfo.eln.nexus_data_converter import ( m_package = Package(name='eln') +def numerical_array(unit: str = None, shape: List[Union[str, int]] = None, + dimensionality: int = None) -> Type[ArchiveSection]: + ''' + Function for generating a NumericalArray section with the specified shape and units. + + Args: + unit (str, optional): The units of the values in the array. Defaults to None. + shape (List[Union[str, int]], optional): The shape of the array. Defaults to None. + dimensionality (int, optional): The dimensionality of the array. Defaults to None. + + Raises: + ValueError: If both shape and dimensionality are specified but do not match. + + Returns: + Type[ArchiveSection]: The NumericalArray class with the specified shape and units. + ''' + if shape is None: + if dimensionality is None: + dimensionality = 1 + shape = ['*'] * dimensionality + else: + if dimensionality is not None and dimensionality != len(shape): + raise ValueError(f'Missmatch between dimensionality "{dimensionality}" and ' + + f'shape "{shape}"') + array_shape = shape + + class NumericalArray(ArchiveSection): + ''' + A section for describing a numerical array with a normalizer for calculating + statistical descriptors. + ''' + values = Quantity( + type=np.dtype(np.float64), + shape=array_shape, + description='The values of the numerical array', + unit=unit) + + dimensionality = Quantity( + type=np.dtype(np.int8), + description='The dimensionality of the numerical array') + + shape = Quantity( + type=np.dtype(np.int64), + shape=[len(array_shape)], + description='The shape of the numerical array') + + mean = Quantity( + type=np.dtype(np.float64), + description='The mean value of the numerical array', + unit=unit) + + minimum = Quantity( + type=np.dtype(np.float64), + description='The minimum value of the numerical array', + unit=unit) + + maximum = Quantity( + type=np.dtype(np.float64), + description='The maximum value of the numerical array', + unit=unit) + + standard_deviation = Quantity( + type=np.dtype(np.float64), + description='The standard deviation of the values in the numerical array', + unit=unit) + + first_ventile = Quantity( + type=np.dtype(np.float64), + description='The first ventile of the values in the numerical array', + unit=unit) + + first_quartile = Quantity( + type=np.dtype(np.float64), + description='The first quartile of the values in the numerical array', + unit=unit) + + median = Quantity( + type=np.dtype(np.float64), + description='The median of the values in the numerical array', + unit=unit) + + third_quartile = Quantity( + type=np.dtype(np.float64), + description='The third quartile of the values in the numerical array', + unit=unit) + + nineteenth_ventile = Quantity( + type=np.dtype(np.float64), + description='The nineteenth ventile of the values in the numerical array', + unit=unit) + + def normalize(self, archive, logger: Any) -> None: + '''The normalizer for the `NumericalArray` class. If values exist and the + statistical descriptors are not filled the normalizer will fill the + descriptors using NumPy properties, methods and functions. + + Args: + archive (EntryArchive): The archive containing the section that is being + normalized. + logger (Any): A structlog logger. + ''' + super(NumericalArray, self).normalize(archive, logger) + + if self.values is not None: + if self.dimensionality is None: + self.dimensionality = self.values.ndim + if self.shape is None: + self.shape = self.values.shape + if self.mean is None: + self.mean = self.values.mean() + if self.minimum is None: + self.minimum = self.values.min() + if self.maximum is None: + self.maximum = self.values.max() + if self.standard_deviation is None: + self.standard_deviation = self.values.std() + if self.first_ventile is None: + self.first_ventile = np.quantile(self.values, 0.05) + if self.first_quartile is None: + self.first_quartile = np.quantile(self.values, 0.25) + if self.median is None: + self.median = np.quantile(self.values, 0.5) + if self.third_quartile is None: + self.third_quartile = np.quantile(self.values, 0.75) + if self.nineteenth_ventile is None: + self.nineteenth_ventile = np.quantile(self.values, 0.95) + + return NumericalArray + + class User(MSection): user = Quantity( type=author_reference, -- GitLab