Commit f3e8a9f0 authored by Mohammad Nakhaee's avatar Mohammad Nakhaee Committed by Lauri Himanen
Browse files

Validate eln annotations (#941)

parent 9f63f1ee
......@@ -20,10 +20,9 @@
from typing import List, Any
from enum import Enum
from cachetools import cached, TTLCache
from elasticsearch_dsl import analyzer, tokenizer
from nomad import metainfo, config
from nomad import metainfo
from nomad.metainfo.mongoengine_extension import Mongo, MongoDocument
from nomad.datamodel.metainfo.common import FastAccess
from nomad.metainfo.pydantic_extension import PydanticModel
......@@ -31,6 +30,8 @@ from nomad.metainfo.elasticsearch_extension import Elasticsearch, material_entry
# This is usually defined automatically when the first metainfo definition is evaluated, but
# due to the next imports requireing the m_package already, this would be too late.
from ..metainfo.metainfo import User, Author
m_package = metainfo.Package()
from .results import Results # noqa
......@@ -83,76 +84,6 @@ def QuantitySearch():
field='path', _es_field='')]
class Author(metainfo.MSection):
''' A person that is author of data in NOMAD or references by NOMAD. '''
name = metainfo.Quantity(
type=str,
derived=lambda user: ('%s %s' % (user.first_name, user.last_name)).strip(),
a_elasticsearch=[
Elasticsearch(material_entry_type, _es_field='keyword'),
Elasticsearch(material_entry_type, mapping='text', field='text', _es_field=''),
Elasticsearch(suggestion="default")
])
first_name = metainfo.Quantity(type=metainfo.Capitalized)
last_name = metainfo.Quantity(type=metainfo.Capitalized)
email = metainfo.Quantity(type=str)
affiliation = metainfo.Quantity(type=str)
affiliation_address = metainfo.Quantity(type=str)
class User(Author):
''' A NOMAD user.
Typically a NOMAD user has a NOMAD account. The user related data is managed by
NOMAD keycloak user-management system. Users are used to denote authors,
reviewers, and owners of datasets.
Args:
user_id: The unique, persistent keycloak UUID
username: The unique, persistent, user chosen username
first_name: The users first name (including all other given names)
last_name: The users last name
affiliation: The name of the company and institutes the user identifies with
affiliation_address: The address of the given affiliation
created: The time the account was created
repo_user_id: The id that was used to identify this user in the NOMAD CoE Repository
is_admin: Bool that indicated, iff the user the use admin user
'''
m_def = metainfo.Section(a_pydantic=PydanticModel())
user_id = metainfo.Quantity(
type=str,
a_elasticsearch=Elasticsearch(material_entry_type))
username = metainfo.Quantity(type=str)
created = metainfo.Quantity(type=metainfo.Datetime)
repo_user_id = metainfo.Quantity(
type=str,
description='Optional, legacy user id from the old NOMAD CoE repository.')
is_admin = metainfo.Quantity(
type=bool, derived=lambda user: user.user_id == config.services.admin_user_id)
is_oasis_admin = metainfo.Quantity(type=bool, default=False)
@staticmethod
@cached(cache=TTLCache(maxsize=2048, ttl=24 * 3600))
def get(*args, **kwargs) -> 'User':
from nomad import infrastructure
return infrastructure.user_management.get_user(*args, **kwargs) # type: ignore
def full_user(self) -> 'User':
''' Returns a User object with all attributes loaded from the user management system. '''
from nomad import infrastructure
assert self.user_id is not None
return infrastructure.user_management.get_user(user_id=self.user_id) # type: ignore
class UserReference(metainfo.Reference):
'''
Special metainfo reference type that allows to use user_ids as values. It automatically
......
......@@ -41,8 +41,10 @@ import importlib
import email.utils
from urllib.parse import urlsplit, urlunsplit, SplitResult
from nomad import config
from nomad.config import process
from nomad.units import ureg as units
from cachetools import cached, TTLCache
m_package: 'Package' = None
......@@ -53,6 +55,28 @@ SectionDefOrCls = Union['Section', 'SectionProxy', Type['MSection']]
T = TypeVar('T')
_hash_method = 'sha1' # choose from hashlib.algorithms_guaranteed
validElnTypes = {
'str': ['str'],
'bool': ['bool'],
'number': ['int', 'float', 'np.int', 'np.int32', 'np.int64', 'np.float', 'np.float32', 'np.float64'],
'datetime': ['Datetime'],
'enum': ['{type_kind: Enum, type_data: [Operator, Responsible_person]}'],
'user': ['User'],
'author': ['Author'],
'reference': ['']
}
validElnComponents = {
'str': ['StringEditQuantity', 'FileEditQuantity', 'RichTextEditQuantity', 'EnumEditQuantity'],
'bool': ['BoolEditQuantity'],
'number': ['NumberEditQuantity', 'SliderEditQuantity'],
'datetime': ['DateTimeEditQuantity'],
'enum': ['EnumEditQuantity', 'AutocompleteEditQuantity', 'RadioEnumEditQuantity'],
'user': ['UserEditQuantity'],
'author': ['AuthorEditQuantity'],
'reference': ['ReferenceEditQuantity']
}
def _default_hash():
return hashlib.new(_hash_method)
......@@ -1002,10 +1026,6 @@ Bytes = _Bytes()
File = _File()
URL = _URL()
predefined_datatypes = {
'Dimension': Dimension, 'Unit': Unit, 'Datetime': Datetime,
'JSON': JSON, 'Capitalized': Capitalized, 'bytes': Bytes, 'File': File, 'URL': URL}
# Metainfo data storage and reflection interface
......@@ -3593,6 +3613,78 @@ class Section(Definition):
'Alias %s of %s in %s already exists in %s.' % (alias, definition, definition.m_parent, self)
names.add(alias)
@constraint
def compatible_eln_annotation(self):
def assert_component(component_name, quantity_name, quantity_type, accepted_components):
assert component_name in accepted_components, \
'The component `%s` is not compatible with the quantity `%s` of the type `%s`. Accepted components: %s.' \
% (component_name, quantity_name, quantity_type, ', '.join(accepted_components))
for def_list in [self.quantities, self.sub_sections]:
for definition in def_list:
if bool(definition.m_annotations) and 'eln' in definition.m_annotations and 'component' in definition.m_annotations['eln']:
component = definition.m_annotations['eln']['component']
if component:
if isinstance(definition.type, type):
if definition.type.__name__ == 'str':
assert_component(
component, definition.name, definition.type.__name__,
validElnComponents['str']
)
elif definition.type.__name__ == 'bool':
assert_component(
component, definition.name, definition.type.__name__, validElnComponents['bool']
)
elif definition.type.__name__ in ['float', 'int']:
assert_component(
component, definition.name, definition.type.__name__,
validElnComponents['number']
)
elif definition.type.__name__ == 'User':
assert_component(
component, definition.name, definition.type.__name__,
validElnComponents['user']
)
elif definition.type.__name__ == 'Author':
assert_component(
component, definition.name, definition.type.__name__,
validElnComponents['author']
)
elif definition.type in [np.float64, np.float32, np.float,
np.uint64, np.uint32, np.uint,
np.int64, np.int32, np.int]:
assert_component(
component, definition.name, type(definition.type).__name__,
validElnComponents['number']
)
elif isinstance(definition.type, _Datetime):
assert_component(
component, definition.name, type(definition.type).__name__,
validElnComponents['datetime']
)
elif isinstance(definition.type, MEnum):
assert_component(
component, definition.name, type(definition.type).__name__,
validElnComponents['enum']
)
elif isinstance(definition.type, Reference):
target_class = definition.type.target_section_def.section_cls
if target_class.__name__ == 'User':
assert_component(
component, definition.name, target_class.__name__,
validElnComponents['user']
)
elif target_class.__name__ == 'Author':
assert_component(
component, definition.name, target_class.__name__,
validElnComponents['author']
)
else:
assert_component(
component, definition.name, type(definition.type).__name__,
validElnComponents['reference']
)
@constraint
def resolved_base_sections(self):
for base_section in self.base_sections:
......@@ -4180,3 +4272,84 @@ class Environment(MSection):
raise KeyError('Could not uniquely identify %s, candidates are %s' % (name, defs))
else:
raise KeyError('Could not resolve %s' % name)
class Author(MSection):
from nomad.metainfo.elasticsearch_extension import material_entry_type, Elasticsearch as ElasticSearch
''' A person that is author of data in NOMAD or references by NOMAD. '''
name = Quantity(
type=str,
derived=lambda user: ('%s %s' % (user.first_name, user.last_name)).strip(),
a_elasticsearch=[
ElasticSearch(material_entry_type, _es_field='keyword'), # type: ignore
ElasticSearch(material_entry_type, mapping='text', field='text', _es_field=''), # type: ignore
ElasticSearch(suggestion="default")
])
first_name = Quantity(type=Capitalized)
last_name = Quantity(type=Capitalized)
email = Quantity(type=str)
affiliation = Quantity(type=str)
affiliation_address = Quantity(type=str)
class User(Author):
from nomad.metainfo.pydantic_extension import PydanticModel
from nomad.metainfo.elasticsearch_extension import material_entry_type, Elasticsearch as ElasticSearch
''' A NOMAD user.
Typically a NOMAD user has a NOMAD account. The user related data is managed by
NOMAD keycloak user-management system. Users are used to denote authors,
reviewers, and owners of datasets.
Args:
user_id: The unique, persistent keycloak UUID
username: The unique, persistent, user chosen username
first_name: The users first name (including all other given names)
last_name: The users last name
affiliation: The name of the company and institutes the user identifies with
affiliation_address: The address of the given affiliation
created: The time the account was created
repo_user_id: The id that was used to identify this user in the NOMAD CoE Repository
is_admin: Bool that indicated, iff the user the use admin user
'''
m_def = Section(a_pydantic=PydanticModel())
user_id = Quantity(
type=str,
a_elasticsearch=ElasticSearch(material_entry_type)) # type: ignore
username = Quantity(type=str)
created = Quantity(type=Datetime)
repo_user_id = Quantity(
type=str,
description='Optional, legacy user id from the old NOMAD CoE repository.')
is_admin = Quantity(
type=bool, derived=lambda user: user.user_id == config.services.admin_user_id)
is_oasis_admin = Quantity(type=bool, default=False)
@staticmethod
@cached(cache=TTLCache(maxsize=2048, ttl=24 * 3600))
def get(*args, **kwargs) -> 'User':
from nomad import infrastructure
return infrastructure.user_management.get_user(*args, **kwargs) # type: ignore
def full_user(self) -> 'User':
''' Returns a User object with all attributes loaded from the user management system. '''
from nomad import infrastructure
assert self.user_id is not None
return infrastructure.user_management.get_user(user_id=self.user_id) # type: ignore
predefined_datatypes = {
'Dimension': Dimension, 'Unit': Unit, 'Datetime': Datetime,
'JSON': JSON, 'Capitalized': Capitalized, 'bytes': Bytes, 'File': File,
'URL': URL, 'User': User, 'Author': Author
}
import numpy as np # pylint: disable=unused-import
import pytest
import yaml
from nomad.metainfo.metainfo import validElnComponents, validElnTypes
from nomad.utils import strip
from nomad.metainfo import Package, MSection, Quantity, Reference, SubSection, Section, MProxy, MetainfoError
......@@ -11,7 +13,7 @@ m_package = Package()
class Sample(MSection):
sample_id = Quantity(
type=str,
a_eln=dict(component="StringEditComponent"),
a_eln=dict(component="StringEditQuantity"),
description='''
This is a description with *markup* using [markdown](https://markdown.org).
It can have multiple lines, because yaml allows to easily do this.
......@@ -49,7 +51,7 @@ sections:
It can have multiple lines, because yaml allows to easily do this.
m_annotations:
eln:
component: StringEditComponent
component: StringEditQuantity
Process:
quantities:
samples:
......@@ -186,3 +188,47 @@ def test_sub_section_tree():
''')
assert yaml.m_to_dict() == reference.m_to_dict()
@pytest.mark.parametrize("eln_type", validElnTypes.keys())
@pytest.mark.parametrize("eln_component", sum(validElnComponents.values(), []))
def test_datatype_component_annotations(eln_type, eln_component):
base_schema = '''
m_def: 'nomad.metainfo.metainfo.Package'
sections:
Sample:
base_section: 'nomad.datamodel.metainfo.measurements.Sample'
quantities:
sample_id:
type: str
m_annotations:
eln:
component: StringEditQuantity
Process:
quantities:
quantity_name:
type: quantity_type
m_annotations:
eln:
component: eln_component
'''
for quantity_type in validElnTypes[eln_type]:
if eln_type == 'reference':
yaml_schema = base_schema.replace("quantity_type", "'#/Sample'").replace("eln_component", eln_component)
else:
yaml_schema = base_schema.replace("quantity_type", quantity_type).replace("eln_component", eln_component)
if eln_component not in validElnComponents[eln_type]:
with pytest.raises(Exception) as exception:
package = yaml_to_package(yaml_schema)
type_name = quantity_type
if eln_type == 'number' or eln_type == 'datetime' or eln_type == 'enum' or eln_type == 'reference':
process = next(filter(lambda section: section['name'] == 'Process', package['section_definitions']), None)
quantity = process['quantities'][0]
if type(quantity.type).__name__ != 'type':
type_name = type(quantity.type).__name__
package.__init_metainfo__()
assert isinstance(exception.value, MetainfoError)
assert exception.value.args[0] == 'One constraint was violated: The component `%s` is not compatible with the quantity `%s` of the type `%s`. Accepted components: %s (there are 0 more violations)' \
% (eln_component, 'quantity_name', type_name, ', '.join(validElnComponents[eln_type]))
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment