Commit 249559b5 authored by Markus Scheidgen's avatar Markus Scheidgen
Browse files

Refactored the mongoengine metainfo extension.

parent 3b68e81a
Pipeline #70254 passed with stages
in 14 minutes and 58 seconds
......@@ -62,12 +62,12 @@ class DatasetListResource(Resource):
if prefix is not '':
query_params.update(name=re.compile('^%s.*' % prefix, re.IGNORECASE))
result_query = Dataset.m_def.m_x('me').objects(**query_params)
result_query = Dataset.m_def.a_mongo.objects(**query_params)
return dict(
pagination=dict(total=result_query.count(), page=page, per_page=per_page),
results=[
Dataset.m_def.m_x('me').to_metainfo(result)
Dataset.m_def.a_mongo.to_metainfo(result)
for result in result_query[(page - 1) * per_page: page * per_page]]), 200
@api.doc('create_dataset')
......@@ -86,7 +86,7 @@ class DatasetListResource(Resource):
if name is None:
abort(400, 'Must provide a dataset name.')
if Dataset.m_def.m_x('me').objects(user_id=g.user.user_id, name=name).count() > 0:
if Dataset.m_def.a_mongo.objects(user_id=g.user.user_id, name=name).count() > 0:
abort(400, 'A dataset with name %s does already exist for the current user.' % name)
# only admin can set user or doi
......@@ -101,7 +101,7 @@ class DatasetListResource(Resource):
if 'user_id' not in data:
data['user_id'] = g.user.user_id
dataset_id = data.pop('dataset_id', utils.create_uuid())
return Dataset(dataset_id=dataset_id, **data).m_x('me').create(), 200
return Dataset(dataset_id=dataset_id, **data).a_mongo.create(), 200
@ns.route('/<path:name>')
......@@ -114,7 +114,7 @@ class DatasetResource(Resource):
def get(self, name: str):
''' Retrieve a dataset by name. '''
try:
result = Dataset.m_def.m_x('me').get(user_id=g.user.user_id, name=name)
result = Dataset.m_def.a_mongo.get(user_id=g.user.user_id, name=name)
except KeyError:
abort(404, 'Dataset with name %s does not exist for current user' % name)
......@@ -128,7 +128,7 @@ class DatasetResource(Resource):
def post(self, name: str):
''' Assign a DOI to the dataset. '''
try:
result = Dataset.m_def.m_x('me').get(user_id=g.user.user_id, name=name)
result = Dataset.m_def.a_mongo.get(user_id=g.user.user_id, name=name)
except KeyError:
abort(404, 'Dataset with name %s does not exist for current user' % name)
......@@ -151,7 +151,7 @@ class DatasetResource(Resource):
result.doi = doi.doi
result.m_x('me').save()
result.a_mongo.save()
if doi.state != 'findable':
common.logger.warning(
'doi was created, but is not findable', doi=doi.doi, doi_state=doi.state,
......@@ -170,7 +170,7 @@ class DatasetResource(Resource):
def delete(self, name: str):
''' Delete the dataset. '''
try:
result = Dataset.m_def.m_x('me').get(user_id=g.user.user_id, name=name)
result = Dataset.m_def.a_mongo.get(user_id=g.user.user_id, name=name)
except KeyError:
abort(404, 'Dataset with name %s does not exist for current user' % name)
......@@ -183,7 +183,7 @@ class DatasetResource(Resource):
{'__raw__': {'$pull': {'metadata.datasets': result.dataset_id}}})
# delete the dataset
result.m_x('me').delete()
result.a_mongo.delete()
return result
......@@ -195,7 +195,7 @@ class RepoPidResource(Resource):
@api.marshal_with(dataset_model, skip_none=True, code=200, description='DOI resolved')
@authenticate()
def get(self, doi: str):
dataset_me = Dataset.m_def.m_x('me').objects(doi=doi).first()
dataset_me = Dataset.m_def.a_mongo.objects(doi=doi).first()
if dataset_me is None:
abort(404, 'Dataset with DOI %s does not exist' % doi)
......
......@@ -44,7 +44,7 @@ mirror_query_model = api.model('MirrorQuery', {
description='Mongoengine query that is used to search for uploads to mirror.')
})
_Dataset = Dataset.m_def.m_x('me').me_cls
_Dataset = Dataset.m_def.a_mongo.mongo_cls
@ns.route('/')
......
......@@ -451,7 +451,7 @@ class EditRepoCalcsResource(Resource):
elif flask_verify == datamodel.Dataset:
try:
mongo_value = Dataset.m_def.m_x('me').get(
mongo_value = Dataset.m_def.a_mongo.get(
user_id=g.user.user_id, name=action_value).dataset_id
except KeyError:
action['message'] = 'Dataset does not exist and will be created'
......@@ -460,7 +460,7 @@ class EditRepoCalcsResource(Resource):
dataset = Dataset(
dataset_id=utils.create_uuid(), user_id=g.user.user_id,
name=action_value, created=datetime.utcnow())
dataset.m_x('me').create()
dataset.a_mongo.create()
mongo_value = dataset.dataset_id
elif action_quantity_name == 'with_embargo':
......@@ -508,7 +508,7 @@ class EditRepoCalcsResource(Resource):
if dataset_id not in mongo_update.get(mongo_key, []):
removed_datasets.append(dataset_id)
doi_ds = Dataset.m_def.m_x('me').objects(
doi_ds = Dataset.m_def.a_mongo.objects(
dataset_id__in=removed_datasets, doi__ne=None).first()
if doi_ds is not None:
json_data['success'] = False
......@@ -538,7 +538,7 @@ class EditRepoCalcsResource(Resource):
if removed_datasets is not None:
for dataset in removed_datasets:
if proc.Calc.objects(metadata__dataset_id=dataset).first() is None:
Dataset.m_def.m_x('me').objects(dataset_id=dataset).delete()
Dataset.m_def.a_mongo.objects(dataset_id=dataset).delete()
return json_data, 200
......
......@@ -32,7 +32,7 @@ from .client import client
__in_test = False
''' Will be monkeypatched by tests to alter behavior for testing. '''
_Dataset = Dataset.m_def.m_x('me').me_cls
_Dataset = Dataset.m_def.a_mongo.mongo_cls
__logger = utils.get_logger(__name__)
......
......@@ -22,7 +22,7 @@ import ase.data
from nomad import metainfo, config
from nomad.metainfo.search_extension import Search
from nomad.metainfo.elastic_extension import ElasticDocument
import nomad.metainfo.mongoengine_extension
from nomad.metainfo.mongoengine_extension import Mongo, MongoDocument
from .dft import DFTMetadata
from .ems import EMSMetadata
......@@ -60,7 +60,6 @@ class User(metainfo.MSection):
user_id = metainfo.Quantity(
type=str,
a_me=dict(primary_key=True),
a_search=Search())
name = metainfo.Quantity(
......@@ -72,7 +71,6 @@ class User(metainfo.MSection):
last_name = metainfo.Quantity(type=str)
email = metainfo.Quantity(
type=str,
a_me=dict(index=True),
a_elastic=dict(mapping=Keyword), # TODO remove?
a_search=Search())
......@@ -145,27 +143,29 @@ class Dataset(metainfo.MSection):
datasets based on this id. Is not used for new datasets.
created: The date when the dataset was first created.
'''
m_def = metainfo.Section(a_mongo=MongoDocument())
dataset_id = metainfo.Quantity(
type=str,
a_me=dict(primary_key=True),
a_mongo=Mongo(primary_key=True),
a_search=Search())
name = metainfo.Quantity(
type=str,
a_me=dict(index=True),
a_mongo=Mongo(index=True),
a_search=Search())
user_id = metainfo.Quantity(
type=str,
a_me=dict(index=True))
a_mongo=Mongo(index=True))
doi = metainfo.Quantity(
type=str,
a_me=dict(index=True),
a_mongo=Mongo(index=True),
a_search=Search())
pid = metainfo.Quantity(
type=str,
a_me=dict(index=True))
a_mongo=Mongo(index=True))
created = metainfo.Quantity(
type=metainfo.Datetime,
a_me=dict(index=True),
a_mongo=Mongo(index=True),
a_search=Search())
......@@ -185,7 +185,7 @@ class DatasetReference(metainfo.Reference):
return super().set_normalize(section, quantity_def, value)
def resolve(self, section: metainfo.MSection, quantity_def: metainfo.Quantity, value: Any) -> metainfo.MSection:
return Dataset.m_def.m_x('me').get(dataset_id=value.url)
return Dataset.m_def.a_mongo.get(dataset_id=value.url)
def serialize(self, section: metainfo.MSection, quantity_def: metainfo.Quantity, value: Any) -> Any:
if isinstance(value, metainfo.MProxy):
......@@ -460,7 +460,3 @@ class EntryMetadata(metainfo.MSection):
assert domain_section_def is not None, 'unknown domain %s' % self.domain
domain_section = self.m_create(domain_section_def.section_cls)
domain_section.apply_domain_metadata(backend)
nomad.metainfo.mongoengine_extension.init_section(User)
nomad.metainfo.mongoengine_extension.init_section(Dataset)
......@@ -53,16 +53,11 @@ class ElasticDocument(SectionAnnotation):
self.index_name = index_name
self.id = id
self.m_def: Section = None
self.fields: Dict[Quantity, str] = {}
def new(self, section):
return dict(elastic=ElasticEntry(section))
def init_annotation(self, definition):
assert isinstance(definition, Section), 'The ElasticDocument annotation is only usable with Sections.'
self.m_def = definition
@classmethod
def create_index_entry(cls, section: MSection):
''' Creates an elasticsearch_dsl document instance for the given section. '''
......@@ -127,7 +122,7 @@ class ElasticDocument(SectionAnnotation):
@property
def document(self):
return ElasticDocument.create_document(self.m_def)
return ElasticDocument.create_document(self.definition)
@classmethod
def create_document(
......
......@@ -14,134 +14,171 @@
'''
Adds mongoengine supports to the metainfo. Allows to create, save, and get metainfo
sections from mongoengine. Currently no sub-section support. The annotation key is "a_me",
the annotation object support the following keys:
- ``primary_key``: *Bool*, renders the quantity to be the primary key.
- ``index``: *Bool*, adds this quantity to the index
sections from mongoengine. Currently no sub-section support. The annotation key is 'mongo'.
'''
from typing import Any, Dict
import mongoengine as me
from .metainfo import Section, Quantity, Datetime
def init_section(section_cls):
section_def = section_cls.m_def
assert section_def.m_annotations.get('me') is None, 'Can only initialize once'
section_def.m_annotations['me'] = MESection(section_cls)
# assert getattr(section_cls, '__init__', None) is None, 'Only section classes without constructor can be used for mongoengine'
def __init__(self, *args, **kwargs):
super(section_cls, self).__init__(*args, **kwargs)
self.m_annotations['me'] = MEInstance(section_cls.m_def.m_x('me'), self)
section_cls.__init__ = __init__
class MESection():
def __init__(self, section_cls):
self.section_cls = section_cls
self.me_cls = generate_mongoengine(section_cls.m_def)
section_def = self.section_cls.m_def
id_quantity = None
for quantity in section_def.all_quantities.values():
annotation = quantity.m_annotations.get('me', None)
if annotation is not None and annotation.get('primary_key', False):
id_quantity = quantity.name
assert id_quantity is not None, 'Section %s has no mongoengine primary key' % section_def
self.id_quantity = id_quantity
from typing import Any, Dict, List
from .metainfo import DefinitionAnnotation, SectionAnnotation, Annotation, MSection, Datetime, Quantity
class Mongo(DefinitionAnnotation):
'''
This annotation class can be used to extend metainfo quantities. It enables and
details the mapping of quantities to fields in mongoengine documents.
Attributes:
index: A boolean indicating that this quantity should be indexed.
primary_key: A boolean indicating that this quantity is the primary key.
'''
def __init__(
self, index: bool = False, primary_key: bool = False,
**kwargs):
self.primary_key = primary_key
self.index = index
self.kwargs = kwargs
if kwargs is None:
self.kwargs = {}
if self.primary_key:
kwargs.update(primary_key=primary_key)
class MongoDocument(SectionAnnotation):
'''
This annotation class can be used to extend metainfo section. It allows to get
the mongoengine document class to store instances of this section in mongodb. It
also provides access to the respective mongodb collection.
'''
def __init__(self):
self._mongoengine_cls = None
self.primary_key: Mongo = None
def new(self, section):
return dict(mongo=MongoInstance(section))
@property
def mongo_cls(self):
'''
The mongoengine document class for this section. Only quantities with :class:`Mongo`
annotation are mapped to fields.
'''
if self._mongoengine_cls is not None:
return self._mongoengine_cls
import mongoengine as me
def generate_field(quantity: Quantity, annotation: Mongo):
field = None
if quantity.type == int:
field = me.IntField
elif quantity.type == float:
field = me.FloatField
elif quantity.type == str:
field = me.StringField
elif quantity.type == bool:
field = me.BooleanField
elif quantity.type == Datetime:
field = me.DateTimeField
else:
raise NotImplementedError
result = field(default=quantity.default, **annotation.kwargs)
if len(quantity.shape) == 0:
return result
elif len(quantity.shape) == 1:
return me.ListField(result)
else:
raise NotImplementedError
indexes: List[str] = []
dct: Dict[str, Any] = {}
for quantity in self.definition.all_quantities.values():
annotation = quantity.m_get_annotations(Mongo)
if annotation is None:
continue
if annotation.index:
indexes.append(quantity.name)
dct[quantity.name] = generate_field(quantity, annotation)
if annotation.primary_key:
self.primary_key = annotation
if len(indexes) > 0:
dct['meta'] = dict(indexes=indexes)
self._mongoengine_cls = type(self.definition.name, (me.Document,), dct)
return self._mongoengine_cls
def objects(self, *args, **kwargs):
return self.me_cls.objects(*args, **kwargs)
'''
Allows access to the underlying collection objects function.
Returns mongoengine document instances, not metainfo section instances.
'''
return self.mongo_cls.objects(*args, **kwargs)
def get(self, **kwargs):
me_obj = self.objects(**kwargs).first()
if me_obj is None:
raise KeyError
return self.to_metainfo(me_obj)
def to_metainfo(self, me_obj):
section = self.section_cls()
section.m_x('me').me_obj = me_obj
for quantity in self.section_cls.m_def.all_quantities.keys():
value = getattr(me_obj, quantity)
if value is not None:
setattr(section, quantity, value)
'''
Returns the first entry that matches the given objects query as metainfo
section instance. Raises KeyError.
'''
mongo_instance = self.objects(**kwargs).first()
if mongo_instance is None:
raise KeyError()
return self.to_metainfo(mongo_instance)
def to_metainfo(self, mongo_instance):
'''
Turns the given mongoengine document instance into its metainfo section instance
counterpart.
'''
section = self.definition.section_cls()
section.a_mongo.mongo_instance = mongo_instance
for name, quantity in self.definition.all_quantities.items():
if quantity.m_get_annotations(Mongo) is not None:
value = getattr(mongo_instance, name)
if value is not None:
section.m_set(quantity, value)
return section
class MEInstance():
def __init__(self, me_section: MESection, metainfo):
self.me_section = me_section
self.metainfo = metainfo
self.me_obj = None
class MongoInstance(Annotation):
'''
The annotation that is automatically added to all instances of sections that
feature the :class:`MongoDocument` annotation.
'''
def __init__(self, section: MSection):
self.section = section
self.mongo_instance = None
def save(self):
if self.me_obj is None:
''' Saves the section as mongo entry. Does an upsert. '''
if self.mongo_instance is None:
return self.create()
for quantity_name, quantity in self.metainfo.m_def.all_quantities.items():
me_value = self.metainfo.m_get(quantity)
for quantity_name, quantity in self.section.m_def.all_quantities.items():
value = self.section.m_get(quantity)
setattr(self.me_obj, quantity_name, me_value)
setattr(self.mongo_instance, quantity_name, value)
self.me_obj.save()
return self.metainfo
self.mongo_instance.save()
return self.section
def create(self):
self.me_obj = self.me_section.me_cls()
''' Creates a new mongo entry and saves it. '''
self.mongo_instance = self.section.m_def.a_mongo.mongo_cls()
return self.save()
def delete(self):
self.me_obj.delete()
self.me_obj = None
return self.metainfo
def generate_mongoengine(section_def: Section):
def generate_field(quantity: Quantity):
annotation = quantity.m_x('me', {})
annotation.pop('index', None)
field = None
if quantity.type == int:
field = me.IntField
elif quantity.type == float:
field = me.FloatField
elif quantity.type == str:
field = me.StringField
elif quantity.type == bool:
field = me.BooleanField
elif quantity.type == Datetime:
field = me.DateTimeField
else:
raise NotImplementedError
result = field(default=quantity.default, **annotation)
if len(quantity.shape) == 0:
return result
elif len(quantity.shape) == 1:
return me.ListField(result)
else:
raise NotImplementedError
indexes = [
quantity.name
for quantity in section_def.all_quantities.values()
if quantity.m_annotations.get('a_me', {}).get('index', False)]
dct: Dict[str, Any] = dict()
if len(indexes) > 0:
dct.update(meta=dict(indexes=indexes))
dct.update(**{
name: generate_field(quantity)
for name, quantity in section_def.all_quantities.items()
})
return type(section_def.name, (me.Document,), dct)
''' Deletes the respective entry from mongodb. '''
self.mongo_instance.delete()
self.mongo_instance = None
return self.section
......@@ -706,7 +706,7 @@ class TestRepo():
example_dataset = Dataset(
dataset_id='ds_id', name='ds_name', user_id=test_user.user_id, doi='ds_doi')
example_dataset.m_x('me').create()
example_dataset.a_mongo.create()
entry_metadata = EntryMetadata(
domain='dft', upload_id='example_upload_id', calc_id='0', upload_time=today_datetime)
......@@ -740,7 +740,7 @@ class TestRepo():
yield
example_dataset.m_x('me').me_obj.delete()
example_dataset.a_mongo.delete()
def assert_search(self, rv: Any, number_of_calcs: int) -> dict:
if rv.status_code != 200:
......@@ -1156,22 +1156,22 @@ class TestEditRepo():
def example_datasets(self, test_user, other_test_user):
self.example_dataset = Dataset(
dataset_id='example_ds', name='example_ds', user_id=test_user.user_id)
self.example_dataset.m_x('me').create()
self.example_dataset.a_mongo.create()
self.other_example_dataset = Dataset(
dataset_id='other_example_ds', name='other_example_ds',
user_id=other_test_user.user_id)
self.other_example_dataset.m_x('me').create()
self.other_example_dataset.a_mongo.create()
yield
self.example_dataset.m_x('me').me_obj.delete()
self.other_example_dataset.m_x('me').me_obj.delete()
self.example_dataset.a_mongo.delete()
self.other_example_dataset.a_mongo.delete()
@pytest.fixture(autouse=True)
def remove_new_dataset(self):
yield 'new_ds'
Dataset.m_def.m_x('me').objects(name='new_ds').delete()
Dataset.m_def.a_mongo.objects(name='new_ds').delete()
@pytest.fixture(autouse=True)
def example_data(self, meta_info, class_api, test_user, other_test_user):
......@@ -1346,7 +1346,7 @@ class TestEditRepo():
data = json.loads(rv.data)
assert not data['success']
assert self.example_dataset.name in data['message']
assert Dataset.m_def.m_x('me').get(dataset_id=self.example_dataset.dataset_id) is not None
assert Dataset.m_def.a_mongo.get(dataset_id=self.example_dataset.dataset_id) is not None
def test_edit_ds_remove(self):
rv = self.perform_edit(
......@@ -1355,17 +1355,17 @@ class TestEditRepo():
rv = self.perform_edit(datasets=[], query=dict(upload_id='upload_1'))
assert rv.status_code == 200
with assert_exception(KeyError):
assert Dataset.m_def.m_x('me').get(dataset_id=self.example_dataset.dataset_id) is None
assert Dataset.m_def.a_mongo.get(dataset_id=self.example_dataset.dataset_id) is None
def test_edit_ds_user_namespace(self, test_user):
assert Dataset.m_def.m_x('me').objects(
assert Dataset.m_def.a_mongo.objects(
name=self.other_example_dataset.name).first() is not None
rv = self.perform_edit(
datasets=[self.other_example_dataset.name], query=dict(upload_id='upload_1'))
self.assert_edit(rv, quantity='datasets', success=True, message=True)
new_dataset = Dataset.m_def.m_x('me').objects(