diff --git a/docs/datamodel_metadataflow.png b/docs/datamodel_metadataflow.png
deleted file mode 100644
index 5dc4c1f634d93ebd6911022d5bd3e9763781ecee..0000000000000000000000000000000000000000
Binary files a/docs/datamodel_metadataflow.png and /dev/null differ
diff --git a/docs/datamodel_transformations.png b/docs/datamodel_transformations.png
deleted file mode 100644
index 7898b158421f07d6e6e1b2d714185c968f848e59..0000000000000000000000000000000000000000
Binary files a/docs/datamodel_transformations.png and /dev/null differ
diff --git a/docs/introduction.md b/docs/introduction.md
index 2d297945d78a39776cee2ff7a77f218f4282dadf..e1b6d17fb8f735d453425dfb5cf77c628a5e903c 100644
--- a/docs/introduction.md
+++ b/docs/introduction.md
@@ -216,9 +216,6 @@ There are three catergories of metadata:
 Those sets of metadata along with the actual raw and archive data are often transformed,
 passed, stored, etc. by the various nomad modules.
 
-.. figure:: datamodel_metadataflow.png
-   :alt: nomad's metadata flow
-
 ### Implementation
 The different entities have often multiple implementations for different storage systems.
 For example, aspects of calculations are stored in files (raw files, calc metadata, archive data),
diff --git a/examples/domain.py b/examples/domain.py
index 0454da98da5af84e5ebfa74aedee810a32b90e44..5342cb684a236e77fcabc7c91b1ba16340ce9126 100644
--- a/examples/domain.py
+++ b/examples/domain.py
@@ -1,5 +1,5 @@
 from nomad import datamodel
 
-print(datamodel.CalcWithMetadata(domain='DFT', calc_id='test').__class__.__name__)
-print(datamodel.CalcWithMetadata(calc_id='test').__class__.__name__)
-print(datamodel.CalcWithMetadata(domain='EMS', calc_id='test').__class__.__name__)
+print(datamodel.EntryMetadata(domain='DFT', calc_id='test').__class__.__name__)
+print(datamodel.EntryMetadata(calc_id='test').__class__.__name__)
+print(datamodel.EntryMetadata(domain='EMS', calc_id='test').__class__.__name__)
diff --git a/gui/src/components/metaInfoBrowser/MetaInfoBrowser.js b/gui/src/components/metaInfoBrowser/MetaInfoBrowser.js
index 73ae7513b9c9181aa3c6da36905c713cf0aec743..f6b61bafed640795ed7d8d5ca74f22c9ac453fc0 100644
--- a/gui/src/components/metaInfoBrowser/MetaInfoBrowser.js
+++ b/gui/src/components/metaInfoBrowser/MetaInfoBrowser.js
@@ -89,7 +89,7 @@ class MetaInfoBrowser extends Component {
 
   update(pkg) {
     this.props.api.getInfo().then(info => {
-      const domain = info.domains.find(domain => domain.name === 'dft')  // TODO deal with domains
+      const domain = info.domains.find(domain => domain.name === 'dft') // TODO deal with domains
       this.props.api.getMetaInfo(pkg || domain.metainfo.all_package).then(metainfos => {
         const metainfoName = this.props.metainfo || domain.metainfo.root_sections[0]
         const definition = metainfos.get(metainfoName)
@@ -108,7 +108,7 @@ class MetaInfoBrowser extends Component {
 
   init() {
     this.props.api.getInfo().then(info => {
-      const domain = info.domains.find(domain => domain.name === 'dft')  // TODO deal with domains
+      const domain = info.domains.find(domain => domain.name === 'dft') // TODO deal with domains
       this.props.api.getMetaInfo(domain.metainfo.all_package).then(metainfos => {
         const metainfoName = this.props.metainfo || domain.metainfo.root_sections[0]
         const definition = metainfos.get(metainfoName)
diff --git a/nomad/app/__init__.py b/nomad/app/__init__.py
index 0a8d0f71fbb1c32f7f40f69f0b2dd4835a7deedb..7b1d3a3ee58c38ae59794825c3eb0a8734401048 100644
--- a/nomad/app/__init__.py
+++ b/nomad/app/__init__.py
@@ -12,11 +12,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-"""
+'''
 This module comprises the nomad@FAIRDI APIs. Currently there is NOMAD's official api, and
 we will soon at the optimade api. The app module also servers documentation, gui, and
 alive.
-"""
+'''
 from flask import Flask, Blueprint, jsonify, url_for, abort, request
 from flask_restplus import Api
 from flask_cors import CORS
@@ -36,11 +36,11 @@ from . import common
 
 @property  # type: ignore
 def specs_url(self):
-    """
+    '''
     Fixes issue where swagger-ui makes a call to swagger.json over HTTP.
     This can ONLY be used on servers that actually use HTTPS.  On servers that use HTTP,
     this code should not be used at all.
-    """
+    '''
     return url_for(self.endpoint('specs'), _external=True, _scheme='https')
 
 
@@ -49,7 +49,7 @@ if config.services.https:
 
 
 app = Flask(__name__)
-""" The Flask app that serves all APIs. """
+''' The Flask app that serves all APIs. '''
 
 app.config.APPLICATION_ROOT = common.base_path  # type: ignore
 app.config.RESTPLUS_MASK_HEADER = False  # type: ignore
@@ -105,7 +105,7 @@ def handle(error: Exception):
 
 @app.route('/alive')
 def alive():
-    """ Simple endpoint to utilize kubernetes liveness/readiness probing. """
+    ''' Simple endpoint to utilize kubernetes liveness/readiness probing. '''
     return "I am, alive!"
 
 
diff --git a/nomad/app/api/__init__.py b/nomad/app/api/__init__.py
index e154b7e29fb63183555efa6fcffa92773cc4b5da..de4ed2fde4ba792edb0da23eaf7f06a380b3658d 100644
--- a/nomad/app/api/__init__.py
+++ b/nomad/app/api/__init__.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-"""
+'''
 The official NOMAD API.
 
 There is a separate documentation for the API endpoints from a client perspective.
@@ -22,7 +22,7 @@ There is a separate documentation for the API endpoints from a client perspectiv
 .. automodule:: nomad.app.api.upload
 .. automodule:: nomad.app.api.repo
 .. automodule:: nomad.app.api.archive
-"""
+'''
 
 from .api import blueprint
 from . import info, auth, upload, repo, archive, raw, mirror, dataset
diff --git a/nomad/app/api/api.py b/nomad/app/api/api.py
index 9a949d703978c9f7401e1e4a52fa3b9d54e7b244..45df901c6cf1dc50c081728925f3416ba5b100aa 100644
--- a/nomad/app/api/api.py
+++ b/nomad/app/api/api.py
@@ -23,7 +23,7 @@ api = Api(
     version='1.0', title='NOMAD API',
     description='Official NOMAD API',
     validate=True)
-""" Provides the flask restplus api instance for the regular NOMAD api"""
+''' Provides the flask restplus api instance for the regular NOMAD api'''
 
 # For some unknown reason it is necessary for each fr api to have a handler.
 # Otherwise the global app error handler won't be called.
diff --git a/nomad/app/api/archive.py b/nomad/app/api/archive.py
index 5c635f7e7652a8b54d4f06114e7a09b21ffd164b..4c9f66dd50cf1deea65b3e5ee90ad07606ab06bf 100644
--- a/nomad/app/api/archive.py
+++ b/nomad/app/api/archive.py
@@ -12,10 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-"""
+'''
 The archive API of the nomad@FAIRDI APIs. This API is about serving processed
 (parsed and normalized) calculation data in nomad's *meta-info* format.
-"""
+'''
 
 from typing import Dict, Any
 from io import BytesIO
@@ -51,11 +51,11 @@ class ArchiveCalcLogResource(Resource):
     @api.response(200, 'Archive data send', headers={'Content-Type': 'application/plain'})
     @authenticate(signature_token=True)
     def get(self, upload_id, calc_id):
-        """
+        '''
         Get calculation processing log.
 
         Calcs are references via *upload_id*, *calc_id* pairs.
-        """
+        '''
         archive_id = '%s/%s' % (upload_id, calc_id)
 
         upload_files = UploadFiles.get(
@@ -85,11 +85,11 @@ class ArchiveCalcResource(Resource):
     @api.response(200, 'Archive data send')
     @authenticate(signature_token=True)
     def get(self, upload_id, calc_id):
-        """
+        '''
         Get calculation data in archive form.
 
         Calcs are references via *upload_id*, *calc_id* pairs.
-        """
+        '''
         archive_id = '%s/%s' % (upload_id, calc_id)
 
         upload_file = UploadFiles.get(
@@ -128,7 +128,7 @@ class ArchiveDownloadResource(Resource):
     @api.response(200, 'File(s) send', headers={'Content-Type': 'application/zip'})
     @authenticate(signature_token=True)
     def get(self):
-        """
+        '''
         Get calculation data in archive form from all query results.
 
         See ``/repo`` endpoint for documentation on the search
@@ -138,7 +138,7 @@ class ArchiveDownloadResource(Resource):
         any files that the user is not authorized to access.
 
         The zip file will contain a ``manifest.json`` with the repository meta data.
-        """
+        '''
         try:
             args = _archive_download_parser.parse_args()
             compress = args.get('compress', False)
@@ -229,7 +229,7 @@ class ArchiveQueryResource(Resource):
     @api.marshal_with(_archive_query_model, skip_none=True, code=200, description='Search results sent')
     @authenticate()
     def post(self):
-        """
+        '''
         Post a query schema and return it filled with archive data.
 
         See ``/repo`` endpoint for documentation on the search
@@ -237,7 +237,7 @@ class ArchiveQueryResource(Resource):
 
         The actual data are in results and a supplementary python code (curl) to
         execute search is in python (curl).
-        """
+        '''
         try:
             data_in = request.get_json()
             scroll = data_in.get('scroll', None)
@@ -323,9 +323,9 @@ class MetainfoResource(Resource):
     @api.response(404, 'The metainfo does not exist')
     @api.response(200, 'Metainfo data send')
     def get(self, metainfo_package_name):
-        """
+        '''
         Get a metainfo definition file.
-        """
+        '''
         try:
             return load_metainfo(metainfo_package_name), 200
         except FileNotFoundError:
@@ -345,7 +345,7 @@ metainfo_main_path = os.path.dirname(os.path.abspath(nomad_meta_info.__file__))
 def load_metainfo(
         package_name_or_dependency: str, dependency_source: str = None,
         loaded_packages: Dict[str, Any] = None) -> Dict[str, Any]:
-    """
+    '''
     Loads the given metainfo package and all its dependencies. Returns a dict with
     all loaded package_names and respective packages.
 
@@ -354,7 +354,7 @@ def load_metainfo(
         dependency_source: The path of the metainfo that uses this function to load a relative dependency.
         loaded_packages: Give a dict and the function will added freshly loaded packages
             to it and return it.
-    """
+    '''
     if loaded_packages is None:
         loaded_packages = {}
 
diff --git a/nomad/app/api/auth.py b/nomad/app/api/auth.py
index 0ba9876d9a96b49c665259e4141ff8b8a30ef16a..61b1fbc2399ccca2b5390426bb6308c730b88cd9 100644
--- a/nomad/app/api/auth.py
+++ b/nomad/app/api/auth.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-"""
+'''
 The API is protected with *keycloak* and *OpenIDConnect*. All API endpoints that require
 or support authentication accept OIDC bearer tokens via HTTP header (``Authentication``).
 These token can be acquired from the NOMAD keycloak server or through the ``/auth`` endpoint
@@ -29,7 +29,7 @@ decorator.
 To allow authentification with signed urls, use this decorator:
 
 .. autofunction:: with_signature_token
-"""
+'''
 from flask import g, request
 from flask_restplus import abort, Resource, fields
 import functools
@@ -69,11 +69,11 @@ api.authorizations = {
 
 
 def _verify_upload_token(token) -> str:
-    """
+    '''
     Verifies the upload token generated with :func:`generate_upload_token`.
 
     Returns: The user UUID or None if the toke could not be verified.
-    """
+    '''
     payload, signature = token.split('.')
     payload = utils.base64_decode(payload)
     signature = utils.base64_decode(signature)
@@ -92,7 +92,7 @@ def _verify_upload_token(token) -> str:
 def authenticate(
         basic: bool = False, upload_token: bool = False, signature_token: bool = False,
         required: bool = False, admin_only: bool = False):
-    """
+    '''
     A decorator to protect API endpoints with authentication. Uses keycloak access
     token to authenticate users. Other methods might apply. Will abort with 401
     if necessary.
@@ -103,7 +103,7 @@ def authenticate(
         signature_token: Also allow signed urls
         required: Authentication is required
         admin_only: Only the admin user is allowed to use the endpoint.
-    """
+    '''
     methods = ['OpenIDConnect Bearer Token']
     if basic:
         methods.append('HTTP Basic Authentication')
@@ -192,7 +192,7 @@ class AuthResource(Resource):
     @api.marshal_with(auth_model, skip_none=True, code=200, description='Auth info send')
     @authenticate(required=True, basic=True)
     def get(self):
-        """
+        '''
         Provides authentication information. This endpoint requires authentification.
         Like all endpoints the OIDC access token based authentification. In additional,
         basic HTTP authentification can be used. This allows to login and acquire an
@@ -202,7 +202,7 @@ class AuthResource(Resource):
         URLs with a ``signature_token`` query parameter, e.g. for file downloads on the
         raw or archive api endpoints; a short ``upload_token`` that is used in
         ``curl`` command line based uploads; and the OIDC JWT access token.
-        """
+        '''
 
         def signature_token():
             expires_at = datetime.datetime.utcnow() + datetime.timedelta(seconds=10)
@@ -239,7 +239,7 @@ class UsersResource(Resource):
     @api.marshal_with(users_model, code=200, description='User suggestions send')
     @api.expect(users_parser, validate=True)
     def get(self):
-        """ Get existing users. """
+        ''' Get existing users. '''
         args = users_parser.parse_args()
 
         return dict(users=infrastructure.keycloak.search_user(args.get('query')))
@@ -248,7 +248,7 @@ class UsersResource(Resource):
     @api.marshal_with(user_model, code=200, skip_none=True, description='User invited')
     @api.expect(user_model, validate=True)
     def put(self):
-        """ Invite a new user. """
+        ''' Invite a new user. '''
         if config.keycloak.oasis:
             abort(400, 'User invide does not work this NOMAD OASIS')
 
@@ -273,10 +273,10 @@ class UsersResource(Resource):
 
 
 def with_signature_token(func):
-    """
+    '''
     A decorator for API endpoint implementations that validates signed URLs. Token to
     sign URLs can be retrieved via the ``/auth`` endpoint.
-    """
+    '''
     @functools.wraps(func)
     @api.response(401, 'Invalid or expired signature token')
     def wrapper(*args, **kwargs):
@@ -302,10 +302,10 @@ def with_signature_token(func):
 
 
 def create_authorization_predicate(upload_id, calc_id=None):
-    """
+    '''
     Returns a predicate that determines if the logged in user has the authorization
     to access the given upload and calculation.
-    """
+    '''
     def func():
         if g.user is None:
             # guest users don't have authorized access to anything
diff --git a/nomad/app/api/common.py b/nomad/app/api/common.py
index f9e0692969d9a398da10c53725a32d07516dbf12..f69f5570b60d0fec82dabcb802f86d78b4a33367 100644
--- a/nomad/app/api/common.py
+++ b/nomad/app/api/common.py
@@ -12,9 +12,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-"""
+'''
 Common data, variables, decorators, models used throughout the API.
-"""
+'''
 from typing import Callable, IO, Set, Tuple, Iterable, Dict, Any
 from flask_restplus import fields
 import zipstream
@@ -24,8 +24,7 @@ from urllib.parse import urlencode
 import sys
 import os.path
 
-from nomad import search, config
-from nomad.datamodel import Domain
+from nomad import search, config, datamodel
 from nomad.app.optimade import filterparser
 from nomad.app.common import RFC3339DateTime, rfc3339DateTime
 from nomad.files import Restricted
@@ -57,7 +56,7 @@ pagination_model = api.model('Pagination', {
     'order_by': fields.String(description='Sorting criterion.'),
     'order': fields.Integer(description='Sorting order -1 for descending, 1 for asceding.')
 })
-""" Model used in responses with pagination. """
+''' Model used in responses with pagination. '''
 
 scroll_model = api.model('Scroll', {
     'scroll': fields.Boolean(default=False, description='Flag if scrolling is enables.'),
@@ -79,13 +78,13 @@ search_model_fields = {
 search_model = api.model('Search', search_model_fields)
 
 query_model_fields = {
-    quantity.qualified_name: fields.Raw(description=quantity.description)
-    for quantity in Domain.all_quantities()}
+    qualified_name: fields.Raw(description=quantity.description)
+    for qualified_name, quantity in search.search_quantities.items()}
 
 query_model_fields.update(**{
     'owner': fields.String(description='The group the calculations belong to.', allow_null=True, skip_none=True),
     'domain': fields.String(description='Specify the domain to search in: %s, default is ``%s``' % (
-        ', '.join(['``%s``' % key for key in Domain.instances.keys()]), config.default_domain)),
+        ', '.join(['``%s``' % domain for domain in datamodel.domains]), config.default_domain)),
     'from_time': fields.Raw(description='The minimum entry time.', allow_null=True, skip_none=True),
     'until_time': fields.Raw(description='The maximum entry time.', allow_null=True, skip_none=True)
 })
@@ -94,7 +93,7 @@ query_model = api.model('Query', query_model_fields)
 
 
 def add_pagination_parameters(request_parser):
-    """ Add pagination parameters to Flask querystring parser. """
+    ''' Add pagination parameters to Flask querystring parser. '''
     request_parser.add_argument(
         'page', type=int, help='The page, starting with 1.', location='args')
     request_parser.add_argument(
@@ -111,7 +110,7 @@ pagination_request_parser = request_parser.copy()
 
 
 def add_scroll_parameters(request_parser):
-    """ Add scroll parameters to Flask querystring parser. """
+    ''' Add scroll parameters to Flask querystring parser. '''
     request_parser.add_argument(
         'scroll', type=bool, help='Enable scrolling')
     request_parser.add_argument(
@@ -119,12 +118,12 @@ def add_scroll_parameters(request_parser):
 
 
 def add_search_parameters(request_parser):
-    """ Add search parameters to Flask querystring parser. """
+    ''' Add search parameters to Flask querystring parser. '''
     # more search parameters
     request_parser.add_argument(
         'domain', type=str,
         help='Specify the domain to search in: %s, default is ``%s``' % (
-            ', '.join(['``%s``' % key for key in Domain.instances.keys()]),
+            ', '.join(['``%s``' % domain for domain in datamodel.domains]),
             config.default_domain))
     request_parser.add_argument(
         'owner', type=str,
@@ -137,20 +136,18 @@ def add_search_parameters(request_parser):
         help='A yyyy-MM-ddTHH:mm:ss (RFC3339) maximum entry time (e.g. upload time)')
 
     # main search parameters
-    for quantity in Domain.all_quantities():
+    for qualified_name, quantity in search.search_quantities.items():
         request_parser.add_argument(
-            quantity.qualified_name, help=quantity.description,
-            action=quantity.argparse_action if quantity.multi else None)
+            qualified_name, help=quantity.description, action=quantity.argparse_action)
 
 
-_search_quantities = set([
-    domain.qualified_name for domain in Domain.all_quantities()])
+_search_quantities = set(search.search_quantities.keys())
 
 
 def apply_search_parameters(search_request: search.SearchRequest, args: Dict[str, Any]):
-    """
+    '''
     Help that adds query relevant request args to the given SearchRequest.
-    """
+    '''
     args = {key: value for key, value in args.items() if value is not None}
 
     # domain
@@ -196,7 +193,7 @@ def apply_search_parameters(search_request: search.SearchRequest, args: Dict[str
 
 
 def calc_route(ns, prefix: str = ''):
-    """ A resource decorator for /<upload>/<calc> based routes. """
+    ''' A resource decorator for /<upload>/<calc> based routes. '''
     def decorator(func):
         ns.route('%s/<string:upload_id>/<string:calc_id>' % prefix)(
             api.doc(params={
@@ -208,7 +205,7 @@ def calc_route(ns, prefix: str = ''):
 
 
 def upload_route(ns, prefix: str = ''):
-    """ A resource decorator for /<upload> based routes. """
+    ''' A resource decorator for /<upload> based routes. '''
     def decorator(func):
         ns.route('%s/<string:upload_id>' % prefix)(
             api.doc(params={
@@ -221,7 +218,7 @@ def upload_route(ns, prefix: str = ''):
 def streamed_zipfile(
         files: Iterable[Tuple[str, str, Callable[[str], IO], Callable[[str], int]]],
         zipfile_name: str, compress: bool = False):
-    """
+    '''
     Creates a response that streams the given files as a streamed zip file. Ensures that
     each given file is only streamed once, based on its filename in the resulting zipfile.
 
@@ -232,17 +229,17 @@ def streamed_zipfile(
         zipfile_name: A name that will be used in the content disposition attachment
             used as an HTTP respone.
         compress: Uses compression. Default is stored only.
-    """
+    '''
 
     streamed_files: Set[str] = set()
 
     def generator():
-        """ Stream a zip file with all files using zipstream. """
+        ''' Stream a zip file with all files using zipstream. '''
         def iterator():
-            """
+            '''
             Replace the directory based iter of zipstream with an iter over all given
             files.
-            """
+            '''
             # the actual contents
             for zipped_filename, file_id, open_io, file_size in files:
                 if zipped_filename in streamed_files:
@@ -286,12 +283,12 @@ def streamed_zipfile(
 
 
 def query_api_url(*args, query_string: Dict[str, Any] = None):
-    """
+    '''
     Creates a API URL.
     Arguments:
         *args: URL path segments after the API base URL
         query_string: A dict with query string parameters
-    """
+    '''
     url = os.path.join(config.api_url(False), *args)
     if query_string is not None:
         url = '%s?%s' % (url, urlencode(query_string, doseq=True))
@@ -300,10 +297,10 @@ def query_api_url(*args, query_string: Dict[str, Any] = None):
 
 
 def query_api_python(*args, **kwargs):
-    """
+    '''
     Creates a string of python code to execute a search query to the repository using
     the requests library.
-    """
+    '''
     url = query_api_url(*args, **kwargs)
     return '''import requests
 response = requests.post("{}")
@@ -311,8 +308,8 @@ data = response.json()'''.format(url)
 
 
 def query_api_curl(*args, **kwargs):
-    """
+    '''
     Creates a string of curl command to execute a search query to the repository.
-    """
+    '''
     url = query_api_url(*args, **kwargs)
     return 'curl -X POST %s -H  "accept: application/json" --output "nomad.json"' % url
diff --git a/nomad/app/api/dataset.py b/nomad/app/api/dataset.py
index 7ff4d875675fbf4400ed55223823a5f1eeed8054..5adb190e15f8c0e15f32c724eed877000cbf1c1b 100644
--- a/nomad/app/api/dataset.py
+++ b/nomad/app/api/dataset.py
@@ -49,7 +49,7 @@ class DatasetListResource(Resource):
     @api.expect(list_datasets_parser)
     @authenticate(required=True)
     def get(self):
-        """ Retrieve a list of all datasets of the authenticated user. """
+        ''' Retrieve a list of all datasets of the authenticated user. '''
         args = {
             key: value for key, value in list_datasets_parser.parse_args().items()
             if value is not None}
@@ -76,7 +76,7 @@ class DatasetListResource(Resource):
     @api.expect(dataset_model)
     @authenticate(required=True)
     def put(self):
-        """ Creates a new dataset. """
+        ''' Creates a new dataset. '''
         data = request.get_json()
         if data is None:
             data = {}
@@ -112,7 +112,7 @@ class DatasetResource(Resource):
     @api.marshal_with(dataset_model, skip_none=True, code=200, description='Dateset send')
     @authenticate(required=True)
     def get(self, name: str):
-        """ Retrieve a dataset by name. """
+        ''' Retrieve a dataset by name. '''
         try:
             result = Dataset.m_def.m_x('me').get(user_id=g.user.user_id, name=name)
         except KeyError:
@@ -126,7 +126,7 @@ class DatasetResource(Resource):
     @api.marshal_with(dataset_model, skip_none=True, code=200, description='DOI assigned')
     @authenticate(required=True)
     def post(self, name: str):
-        """ Assign a DOI to the dataset. """
+        ''' Assign a DOI to the dataset. '''
         try:
             result = Dataset.m_def.m_x('me').get(user_id=g.user.user_id, name=name)
         except KeyError:
@@ -168,7 +168,7 @@ class DatasetResource(Resource):
     @api.marshal_with(dataset_model, skip_none=True, code=200, description='Dateset deleted')
     @authenticate(required=True)
     def delete(self, name: str):
-        """ Delete the dataset. """
+        ''' Delete the dataset. '''
         try:
             result = Dataset.m_def.m_x('me').get(user_id=g.user.user_id, name=name)
         except KeyError:
diff --git a/nomad/app/api/info.py b/nomad/app/api/info.py
index e6e87e43781ebd07708901660c23b9aae87794d7..0630498d380f87faa7109ffb86f63b6df8d925ad 100644
--- a/nomad/app/api/info.py
+++ b/nomad/app/api/info.py
@@ -12,9 +12,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-"""
+'''
 API endpoint that deliver backend configuration details.
-"""
+'''
 
 from flask_restplus import Resource, fields
 
@@ -69,7 +69,7 @@ class InfoResource(Resource):
     @api.doc('get_info')
     @api.marshal_with(info_model, skip_none=True, code=200, description='Info send')
     def get(self):
-        """ Return information about the nomad backend and its configuration. """
+        ''' Return information about the nomad backend and its configuration. '''
         codes = [
             parser.code_name
             for parser in parsing.parser_dict.values()
@@ -83,16 +83,13 @@ class InfoResource(Resource):
             'normalizers': [normalizer.__name__ for normalizer in normalizing.normalizers],
             'domains': [
                 {
-                    'name': domain.name,
-                    'quantities': [quantity for quantity in domain.quantities.values()],
-                    'metrics_names': domain.metrics_names,
-                    'aggregations_names': domain.aggregations_names,
+                    'name': domain_name,
                     'metainfo': {
-                        'all_package': domain.metainfo_all_package,
-                        'root_sections': domain.root_sections
+                        'all_package': domain['metainfo_all_package'],
+                        'root_section': domain['root_section']
                     }
                 }
-                for domain in datamodel.Domain.instances.values()
+                for domain_name, domain in datamodel.domains.items()
             ],
             'version': config.version,
             'release': config.release,
diff --git a/nomad/app/api/mirror.py b/nomad/app/api/mirror.py
index 9353600c16fd8044b2986978bb754a0733a0f8bb..e0794d0da9baa521132ff8dc2f77af1af6e0ec10 100644
--- a/nomad/app/api/mirror.py
+++ b/nomad/app/api/mirror.py
@@ -12,9 +12,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-"""
+'''
 The mirror API of the nomad@FAIRDI APIs. Allows to export upload metadata.
-"""
+'''
 
 from flask import request
 from flask_restplus import Resource, abort, fields
@@ -82,9 +82,9 @@ class MirrorUploadResource(Resource):
     @api.doc('get_upload_mirror')
     @authenticate(admin_only=True)
     def get(self, upload_id):
-        """
+        '''
         Export upload (and all calc) metadata for mirrors.
-        """
+        '''
         try:
             upload = proc.Upload.get(upload_id)
         except KeyError:
diff --git a/nomad/app/api/raw.py b/nomad/app/api/raw.py
index b08abd4b056a34d42dffe4472352d24edb81d179..b529c7b43909e3897fa097f307cf4ada02091cc5 100644
--- a/nomad/app/api/raw.py
+++ b/nomad/app/api/raw.py
@@ -12,9 +12,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-"""
+'''
 The raw API of the nomad@FAIRDI APIs. Can be used to retrieve raw calculation files.
-"""
+'''
 
 from typing import IO, Any, Union, List
 import os.path
@@ -71,13 +71,13 @@ _raw_file_from_path_parser.add_argument(
 
 
 class FileView:
-    """
+    '''
     File-like wrapper that restricts the contents to a portion of the file.
     Arguments:
         f: the file-like
         offset: the offset
         length: the amount of bytes
-    """
+    '''
     def __init__(self, f, offset, length):
         self.f = f
         self.f_offset = offset
@@ -110,10 +110,10 @@ class FileView:
 
 def get_raw_file_from_upload_path(
         upload_files, upload_filepath, authorization_predicate, mainfile: str = None):
-    """
+    '''
     Helper method used by func:`RawFileFromUploadPathResource.get` and
     func:`RawFileFromCalcPathResource.get`.
-    """
+    '''
     upload_filepath = upload_filepath.rstrip('/')
 
     if upload_filepath[-1:] == '*':
@@ -197,7 +197,7 @@ class RawFileFromUploadPathResource(Resource):
     @api.expect(_raw_file_from_path_parser, validate=True)
     @authenticate(signature_token=True)
     def get(self, upload_id: str, path: str):
-        """ Get a single raw calculation file, directory contents, or whole directory sub-tree
+        ''' Get a single raw calculation file, directory contents, or whole directory sub-tree
         from a given upload.
 
         The 'upload_id' parameter needs to identify an existing upload.
@@ -223,7 +223,7 @@ class RawFileFromUploadPathResource(Resource):
         match the given path at the start, will be returned as a .zip file body.
         Zip files are streamed; instead of 401 errors, the zip file will just not contain
         any files that the user is not authorized to access.
-        """
+        '''
         # TODO this is a quick fix, since swagger cannot deal with not encoded path parameters
         if path is not None:
             path = urllib.parse.unquote(path)
@@ -258,7 +258,7 @@ class RawFileFromCalcPathResource(Resource):
     @api.expect(_raw_file_from_path_parser, validate=True)
     @authenticate(signature_token=True)
     def get(self, upload_id: str, calc_id: str, path: str):
-        """ Get a single raw calculation file, calculation contents, or all files for a
+        ''' Get a single raw calculation file, calculation contents, or all files for a
         given calculation.
 
         The 'upload_id' parameter needs to identify an existing upload.
@@ -266,7 +266,7 @@ class RawFileFromCalcPathResource(Resource):
 
         This endpoint behaves exactly like /raw/<upload_id>/<path>, but the path is
         now relative to the calculation and not the upload.
-        """
+        '''
         # TODO this is a quick fix, since swagger cannot deal with not encoded path parameters
         if path is not None:
             path = urllib.parse.unquote(path)
@@ -300,11 +300,11 @@ class RawFileFromCalcEmptyPathResource(RawFileFromCalcPathResource):
     @api.expect(_raw_file_from_path_parser, validate=True)
     @authenticate(signature_token=True)
     def get(self, upload_id: str, calc_id: str):
-        """ Get calculation contents.
+        ''' Get calculation contents.
 
         This is basically /raw/calc/<upload_id>/<calc_id>/<path> with an empty path, since
         having an empty path parameter is not possible.
-        """
+        '''
         return super().get(upload_id, calc_id, None)
 
 
@@ -336,11 +336,11 @@ class RawFilesResource(Resource):
     @api.expect(_raw_files_request_model, validate=True)
     @authenticate()
     def post(self, upload_id):
-        """ Download multiple raw calculation files in a .zip file.
+        ''' Download multiple raw calculation files in a .zip file.
 
         Zip files are streamed; instead of 401 errors, the zip file will just not contain
         any files that the user is not authorized to access.
-        """
+        '''
         json_data = request.get_json()
         compress = json_data.get('compress', False)
         files = [file.strip() for file in json_data['files']]
@@ -353,12 +353,12 @@ class RawFilesResource(Resource):
     @api.expect(_raw_files_request_parser, validate=True)
     @authenticate(signature_token=True)
     def get(self, upload_id):
-        """
+        '''
         Download multiple raw calculation files.
         Download multiple raw calculation files in a .zip file.
         Zip files are streamed; instead of 401 errors, the zip file will just not contain
         any files that the user is not authorized to access.
-        """
+        '''
         args = _raw_files_request_parser.parse_args()
 
         files_str = args.get('files')
@@ -401,7 +401,7 @@ class RawFileQueryResource(Resource):
     @api.response(200, 'File(s) send', headers={'Content-Type': 'application/zip'})
     @authenticate(signature_token=True)
     def get(self):
-        """ Download a .zip file with all raw-files for all entries that match the given
+        ''' Download a .zip file with all raw-files for all entries that match the given
         search parameters.
 
         See ``/repo`` endpoint for documentation on the search
@@ -411,7 +411,7 @@ class RawFileQueryResource(Resource):
         any files that the user is not authorized to access.
 
         The zip file will contain a ``manifest.json`` with the repository meta data.
-        """
+        '''
         logger = common.logger.bind(query=urllib.parse.urlencode(request.args, doseq=True))
 
         patterns: List[str] = None
diff --git a/nomad/app/api/repo.py b/nomad/app/api/repo.py
index 6864a2909efa5fc0cb6c883df50dcc2566c904db..dfd783519be1d191f1a5cb7f22e8fa04f5f1fb8d 100644
--- a/nomad/app/api/repo.py
+++ b/nomad/app/api/repo.py
@@ -12,10 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-"""
+'''
 The repository API of the nomad@FAIRDI APIs. Currently allows to resolve repository
 meta-data.
-"""
+'''
 
 from typing import List, Dict, Any
 from flask_restplus import Resource, abort, fields
@@ -26,7 +26,7 @@ import elasticsearch.helpers
 from datetime import datetime
 
 from nomad import search, utils, datamodel, processing as proc, infrastructure
-from nomad.datamodel import UserMetadata, Dataset, User, Domain
+from nomad.datamodel import Dataset, User, EditableUserMetadata
 from nomad.app import common
 from nomad.app.common import RFC3339DateTime, DotKeyNested
 
@@ -47,12 +47,12 @@ class RepoCalcResource(Resource):
     @api.doc('get_repo_calc')
     @authenticate()
     def get(self, upload_id, calc_id):
-        """
+        '''
         Get calculation metadata in repository form.
 
         Repository metadata only entails the quantities shown in the repository.
         Calcs are references via *upload_id*, *calc_id* pairs.
-        """
+        '''
         try:
             calc = search.Entry.get(calc_id)
         except NotFoundError:
@@ -88,7 +88,7 @@ _search_request_parser.add_argument(
     'exclude', type=str, action='split', help='Excludes the given keys in the returned data.')
 for group_name in search.groups:
     _search_request_parser.add_argument(
-        group_name, type=bool, help=('Return %s group data.' % group_name))
+        'group_%s' % group_name, type=bool, help=('Return %s group data.' % group_name))
     _search_request_parser.add_argument(
         '%s_after' % group_name, type=str,
         help='The last %s id of the last scroll window for the %s group' % (group_name, group_name))
@@ -100,14 +100,14 @@ _repo_calcs_model_fields = {
         'There is a pseudo quantity "total" with a single value "all" that contains the '
         ' metrics over all results. ' % ', '.join(search.metrics_names)))}
 
-for group_name, (group_quantity, _) in search.groups.items():
+for group_name in search.groups:
     _repo_calcs_model_fields[group_name] = (DotKeyNested if '.' in group_name else fields.Nested)(api.model('RepoGroup', {
         'after': fields.String(description='The after value that can be used to retrieve the next %s.' % group_name),
-        'values': fields.Raw(description='A dict with %s as key. The values are dicts with "total" and "examples" keys.' % group_quantity)
+        'values': fields.Raw(description='A dict with %s as key. The values are dicts with "total" and "examples" keys.' % group_name)
     }), skip_none=True)
 
-for quantity in Domain.all_quantities():
-    _repo_calcs_model_fields[quantity.name] = fields.Raw(
+for qualified_name, quantity in search.search_quantities.items():
+    _repo_calcs_model_fields[qualified_name] = fields.Raw(
         description=quantity.description, allow_null=True, skip_none=True)
 
 _repo_calcs_model = api.inherit('RepoCalculations', search_model, _repo_calcs_model_fields)
@@ -121,7 +121,7 @@ class RepoCalcsResource(Resource):
     @api.marshal_with(_repo_calcs_model, skip_none=True, code=200, description='Search results send')
     @authenticate()
     def get(self):
-        """
+        '''
         Search for calculations in the repository form, paginated.
 
         The ``owner`` parameter determines the overall entries to search through.
@@ -151,7 +151,7 @@ class RepoCalcsResource(Resource):
 
         Ordering is determined by ``order_by`` and ``order`` parameters. Default is
         ``upload_time`` in decending order.
-        """
+        '''
 
         try:
             parsed_args = _search_request_parser.parse_args()
@@ -170,7 +170,7 @@ class RepoCalcsResource(Resource):
             metrics: List[str] = request.args.getlist('metrics')
 
             with_statistics = args.get('statistics', False) or \
-                any(args.get(group_name, False) for group_name in search.groups)
+                any(args.get('group_%s' % group_name, False) for group_name in search.groups)
         except Exception as e:
             abort(400, message='bad parameters: %s' % str(e))
 
@@ -196,9 +196,9 @@ class RepoCalcsResource(Resource):
             search_request.default_statistics(metrics_to_use=metrics)
 
             additional_metrics = [
-                metric
-                for group_name, (_, metric) in search.groups.items()
-                if args.get(group_name, False)]
+                group_quantity.metric_name
+                for group_name, group_quantity in search.groups.items()
+                if args.get('group_%s' % group_name, False)]
 
             total_metrics = metrics + additional_metrics
 
@@ -217,13 +217,13 @@ class RepoCalcsResource(Resource):
                 results = search_request.execute_scrolled(scroll_id=scroll_id, size=per_page)
 
             else:
-                for group_name, (group_quantity, _) in search.groups.items():
-                    if args.get(group_name, False):
+                for group_name, group_quantity in search.groups.items():
+                    if args.get('group_%s' % group_name, False):
                         kwargs: Dict[str, Any] = {}
-                        if group_name == 'uploads':
+                        if group_name == 'group_uploads':
                             kwargs.update(order_by='upload_time', order='desc')
                         search_request.quantity(
-                            group_quantity, size=per_page, examples=1,
+                            group_quantity.qualified_name, size=per_page, examples=1,
                             after=request.args.get('%s_after' % group_name, None),
                             **kwargs)
 
@@ -239,9 +239,9 @@ class RepoCalcsResource(Resource):
                 if 'quantities' in results:
                     quantities = results.pop('quantities')
 
-                for group_name, (group_quantity, _) in search.groups.items():
-                    if args.get(group_name, False):
-                        results[group_name] = quantities[group_quantity]
+                for group_name, group_quantity in search.groups.items():
+                    if args.get('group_%s' % group_name, False):
+                        results[group_name] = quantities[group_quantity.qualified_name]
 
             # build python code/curl snippet
             code_args = dict(request.args)
@@ -265,13 +265,13 @@ _query_model_parameters = {
     'until_time': RFC3339DateTime(description='A yyyy-MM-ddTHH:mm:ss (RFC3339) maximum entry time (e.g. upload time)')
 }
 
-for quantity in datamodel.Domain.all_quantities():
-    if quantity.multi and quantity.argparse_action is None:
+for qualified_name, quantity in search.search_quantities.items():
+    if quantity.many_and:
         def field(**kwargs):
             return fields.List(fields.String(**kwargs))
     else:
         field = fields.String
-    _query_model_parameters[quantity.name] = field(description=quantity.description)
+    _query_model_parameters[qualified_name] = field(description=quantity.description)
 
 _repo_query_model = api.model('RepoQuery', _query_model_parameters, skip_none=True)
 
@@ -296,13 +296,16 @@ _repo_edit_model = api.model('RepoEdit', {
     'actions': fields.Nested(
         api.model('RepoEditActions', {
             quantity.name: repo_edit_action_field(quantity)
-            for quantity in UserMetadata.m_def.all_quantities.values()
+            for quantity in EditableUserMetadata.m_def.definitions
         }), skip_none=True,
         description='Each action specifies a single value (even for multi valued quantities).'),
     'success': fields.Boolean(description='If the overall edit can/could be done. Only in API response.'),
     'message': fields.String(description='A message that details the overall edit result. Only in API response.')
 })
 
+_editable_quantities = {
+    quantity.name: quantity for quantity in EditableUserMetadata.m_def.definitions}
+
 
 def edit(parsed_query: Dict[str, Any], mongo_update: Dict[str, Any] = None, re_index=True) -> List[str]:
     # get all calculations that have to change
@@ -327,8 +330,8 @@ def edit(parsed_query: Dict[str, Any], mongo_update: Dict[str, Any] = None, re_i
         if re_index:
             def elastic_updates():
                 for calc in proc.Calc.objects(calc_id__in=calc_ids):
-                    entry = search.Entry.from_calc_with_metadata(
-                        datamodel.CalcWithMetadata(**calc['metadata']))
+                    entry = search.create_entry(
+                        datamodel.EntryMetadata.m_from_dict(calc['metadata']))
                     entry = entry.to_dict(include_meta=True)
                     entry['_op_type'] = 'index'
                     yield entry
@@ -345,7 +348,7 @@ def edit(parsed_query: Dict[str, Any], mongo_update: Dict[str, Any] = None, re_i
 
 
 def get_uploader_ids(query):
-    """ Get all the uploader from the query, to check coauthers and shared_with for uploaders. """
+    ''' Get all the uploader from the query, to check coauthers and shared_with for uploaders. '''
     search_request = search.SearchRequest()
     apply_search_parameters(search_request, query)
     search_request.quantity(name='uploader_id')
@@ -360,7 +363,7 @@ class EditRepoCalcsResource(Resource):
     @api.marshal_with(_repo_edit_model, skip_none=True, code=200, description='Edit verified/performed')
     @authenticate()
     def post(self):
-        """ Edit repository metadata. """
+        ''' Edit repository metadata. '''
 
         # basic body parsing and some semantic checks
         json_data = request.get_json()
@@ -382,9 +385,10 @@ class EditRepoCalcsResource(Resource):
         parsed_query = {}
         for quantity_name, value in query.items():
             if quantity_name in _search_quantities:
-                quantity = datamodel.Domain.get_quantity(quantity_name)
-                if quantity.multi and quantity.argparse_action == 'split' and not isinstance(value, list):
-                    value = value.split(',')
+                quantity = search.search_quantities[quantity_name]
+                if quantity.many:
+                    if not isinstance(value, list):
+                        value = value.split(',')
                 parsed_query[quantity_name] = value
         parsed_query['owner'] = owner
         parsed_query['domain'] = query.get('domain')
@@ -398,7 +402,7 @@ class EditRepoCalcsResource(Resource):
 
         with utils.timer(common.logger, 'edit verified'):
             for action_quantity_name, quantity_actions in actions.items():
-                quantity = UserMetadata.m_def.all_quantities.get(action_quantity_name)
+                quantity = _editable_quantities.get(action_quantity_name)
                 if quantity is None:
                     abort(400, 'Unknown quantity %s' % action_quantity_name)
 
@@ -564,7 +568,7 @@ class RepoQuantityResource(Resource):
     @api.marshal_with(_repo_quantity_values_model, skip_none=True, code=200, description='Search results send')
     @authenticate()
     def get(self, quantity: str):
-        """
+        '''
         Retrieve quantity values from entries matching the search.
 
         You can use the various quantities to search/filter for. For some of the
@@ -580,7 +584,7 @@ class RepoQuantityResource(Resource):
         The result will contain a 'quantity' key with quantity values and the "after"
         value. There will be upto 'size' many values. For the rest of the values use the
         "after" parameter in another request.
-        """
+        '''
 
         search_request = search.SearchRequest()
         args = {
@@ -631,7 +635,7 @@ class RepoQuantitiesResource(Resource):
     @api.marshal_with(_repo_quantities_model, skip_none=True, code=200, description='Search results send')
     @authenticate()
     def get(self):
-        """
+        '''
         Retrieve quantity values for multiple quantities at once.
 
         You can use the various quantities to search/filter for. For some of the
@@ -645,7 +649,7 @@ class RepoQuantitiesResource(Resource):
 
         The result will contain a 'quantities' key with a dict of quantity names and the
         retrieved values as values.
-        """
+        '''
 
         search_request = search.SearchRequest()
         args = {
diff --git a/nomad/app/api/upload.py b/nomad/app/api/upload.py
index 8655a0e8db4ecf05480369b7652ca7d66587cda5..cb04f51589a96fbf16b75489d3fbef593a642a3f 100644
--- a/nomad/app/api/upload.py
+++ b/nomad/app/api/upload.py
@@ -12,11 +12,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-"""
+'''
 The upload API of the nomad@FAIRDI APIs. Provides endpoints to upload files and
 get the processing status of uploads.
-"""
+'''
 
+from typing import Dict, Any
 from flask import g, request, Response
 from flask_restplus import Resource, fields, abort
 from datetime import datetime
@@ -44,8 +45,8 @@ ns = api.namespace(
 
 class CalcMetadata(fields.Raw):
     def format(self, value):
-        calc_with_metadata = datamodel.CalcWithMetadata(**value)
-        return search.Entry.from_calc_with_metadata(calc_with_metadata).to_dict()
+        entry_metadata = datamodel.EntryMetadata.m_from_dict(value)
+        return search.create_entry(entry_metadata).to_dict()
 
 
 proc_model = api.model('Processing', {
@@ -141,10 +142,10 @@ def disable_marshalling(f):
 
 
 def marshal_with(*args, **kwargs):
-    """
+    '''
     A special version of the RESTPlus marshal_with decorator that allows to disable
     marshalling at runtime by raising DisableMarshalling.
-    """
+    '''
     def decorator(func):
         @api.marshal_with(*args, **kwargs)
         def with_marshalling(*args, **kwargs):
@@ -175,7 +176,7 @@ class UploadListResource(Resource):
     @api.expect(upload_list_parser)
     @authenticate(required=True)
     def get(self):
-        """ Get the list of all uploads from the authenticated user. """
+        ''' Get the list of all uploads from the authenticated user. '''
         try:
             state = request.args.get('state', 'unpublished')
             name = request.args.get('name', None)
@@ -220,7 +221,7 @@ class UploadListResource(Resource):
     @marshal_with(upload_model, skip_none=True, code=200, description='Upload received')
     @authenticate(required=True, upload_token=True)
     def put(self):
-        """
+        '''
         Upload a file and automatically create a new upload in the process.
         Can be used to upload files via browser or other http clients like curl.
         This will also start the processing of the upload.
@@ -237,7 +238,7 @@ class UploadListResource(Resource):
 
         There is a general limit on how many unpublished uploads a user can have. Will
         return 400 if this limit is exceeded.
-        """
+        '''
         # check existence of local_path if local_path is used
         local_path = request.args.get('local_path')
         if local_path:
@@ -345,12 +346,12 @@ class UploadResource(Resource):
     @api.expect(pagination_request_parser)
     @authenticate(required=True)
     def get(self, upload_id: str):
-        """
+        '''
         Get an update for an existing upload.
 
         Will not only return the upload, but also its calculations paginated.
         Use the pagination params to determine the page.
-        """
+        '''
         try:
             upload = Upload.get(upload_id)
         except KeyError:
@@ -398,12 +399,12 @@ class UploadResource(Resource):
     @api.marshal_with(upload_model, skip_none=True, code=200, description='Upload deleted')
     @authenticate(required=True)
     def delete(self, upload_id: str):
-        """
+        '''
         Delete an existing upload.
 
         Only uploads that are sill in staging, not already deleted, not still uploaded, and
         not currently processed, can be deleted.
-        """
+        '''
         try:
             upload = Upload.get(upload_id)
         except KeyError:
@@ -436,7 +437,7 @@ class UploadResource(Resource):
     @api.expect(upload_operation_model)
     @authenticate(required=True)
     def post(self, upload_id):
-        """
+        '''
         Execute an upload operation. Available operations are ``publish`` and ``re-process``
 
         Publish accepts further meta data that allows to provide coauthors, comments,
@@ -449,7 +450,7 @@ class UploadResource(Resource):
         Re-process will re-process the upload and produce updated repository metadata and
         archive. Only published uploads that are not processing at the moment are allowed.
         Only for uploads where calculations have been processed with an older nomad version.
-        """
+        '''
         try:
             upload = Upload.get(upload_id)
         except KeyError:
@@ -464,12 +465,18 @@ class UploadResource(Resource):
 
         operation = json_data.get('operation')
 
-        metadata = json_data.get('metadata', {})
-        for key in metadata:
-            if key.startswith('_'):
+        user_metadata: Dict[str, Any] = json_data.get('metadata', {})
+        metadata: Dict[str, Any] = {}
+        for user_key in user_metadata:
+            if user_key.startswith('_'):
                 if not g.user.is_admin:
                     abort(401, message='Only admin users can use _metadata_keys.')
-                break
+
+                key = user_key[1:]
+            else:
+                key = user_key
+
+            metadata[key] = user_metadata[user_key]
 
         if operation == 'publish':
             if upload.tasks_running:
@@ -519,7 +526,7 @@ class UploadCommandResource(Resource):
     @api.marshal_with(upload_command_model, code=200, description='Upload command send')
     @authenticate(required=True)
     def get(self):
-        """ Get url and example command for shell based uploads. """
+        ''' Get url and example command for shell based uploads. '''
         token = generate_upload_token(g.user)
         upload_url = '%s/uploads/?token=%s' % (config.api_url(ssl=False), token)
         upload_url_with_name = upload_url + '&name=<name>'
diff --git a/nomad/app/common.py b/nomad/app/common.py
index 3ca5c2999592ae2abfe58b6e649d08461ce44a18..b4c5323b864af04864976c2097c0daad7210a211 100644
--- a/nomad/app/common.py
+++ b/nomad/app/common.py
@@ -22,10 +22,10 @@ from nomad import config
 
 
 logger: BoundLogger = None
-""" A logger pre configured with information about the current request. """
+''' A logger pre configured with information about the current request. '''
 
 base_path = config.services.api_base_path
-""" Provides the root path of the nomad APIs. """
+''' Provides the root path of the nomad APIs. '''
 
 
 class RFC3339DateTime(fields.DateTime):
@@ -41,7 +41,7 @@ rfc3339DateTime = RFC3339DateTime()
 
 
 class DotKeyFieldMixin:
-    """ Allows use of flask_restplus fields with '.' in key names. By default, '.'
+    ''' Allows use of flask_restplus fields with '.' in key names. By default, '.'
     is used as a separator for accessing nested properties. Mixin prevents this,
     allowing fields to use '.' in the key names.
 
@@ -53,7 +53,7 @@ class DotKeyFieldMixin:
 
     flask_restplus tries to fetch values for data['my']['dot']['field'] instead
     of data['my.dot.field'] which is the desired behaviour in this case.
-    """
+    '''
 
     def output(self, key, obj, **kwargs):
         transformed_obj = {k.replace(".", "___"): v for k, v in obj.items()}
@@ -67,10 +67,10 @@ class DotKeyFieldMixin:
 
     @contextmanager
     def toggle_attribute(self):
-        """ Context manager to temporarily set self.attribute to None
+        ''' Context manager to temporarily set self.attribute to None
 
         Yields self.attribute before setting to None
-        """
+        '''
         attribute = self.attribute
         self.attribute = None
         yield attribute
diff --git a/nomad/app/optimade/__init__.py b/nomad/app/optimade/__init__.py
index b2573598d83d2481db75ddc24a7bd71adc42f59e..913892a96a55341268645f3f0519086b17df30fb 100644
--- a/nomad/app/optimade/__init__.py
+++ b/nomad/app/optimade/__init__.py
@@ -12,9 +12,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-"""
+'''
 The optimade implementation of NOMAD.
-"""
+'''
 
 from flask import Blueprint
 from flask_restplus import Api
diff --git a/nomad/app/optimade/api.py b/nomad/app/optimade/api.py
index 6a9057b4729da46f4c0bbb12f5276bad68d44f4b..c974d712e234498921401f9d0e428db739ee6167 100644
--- a/nomad/app/optimade/api.py
+++ b/nomad/app/optimade/api.py
@@ -26,7 +26,7 @@ base_url = 'http://%s/%s/optimade' % (
 
 
 def url(endpoint: str = None, **kwargs):
-    """ Returns the full optimade api url (for a given endpoint) including query parameters. """
+    ''' Returns the full optimade api url (for a given endpoint) including query parameters. '''
     if endpoint is None:
         url = base_url
     else:
@@ -43,7 +43,7 @@ api = Api(
     version='1.0', title='NOMAD\'s OPTiMaDe API implementation',
     description='NOMAD\'s OPTiMaDe API implementation, version 0.10.0.',
     validate=True)
-""" Provides the flask restplust api instance for the optimade api"""
+''' Provides the flask restplust api instance for the optimade api'''
 
 
 # For some unknown reason it is necessary for each fr api to have a handler.
diff --git a/nomad/app/optimade/endpoints.py b/nomad/app/optimade/endpoints.py
index 9efb314e01825993b6037f159fbcccd1e508b0de..5f41a8f292bf282c189112b95e1fd8a0beef36e4 100644
--- a/nomad/app/optimade/endpoints.py
+++ b/nomad/app/optimade/endpoints.py
@@ -41,9 +41,9 @@ def base_request_args():
 
 
 def base_search_request():
-    """ Creates a search request for all public and optimade enabled data. """
+    ''' Creates a search request for all public and optimade enabled data. '''
     return search.SearchRequest().owner('all', None).query(
-        Q('exists', field='dft.optimade.nelements'))  # TODO use the elastic annotations when done
+        Q('exists', field='dft.optimade.elements'))  # TODO use the elastic annotations when done
 
 
 @ns.route('/calculations')
@@ -53,7 +53,7 @@ class CalculationList(Resource):
     @api.expect(entry_listing_endpoint_parser, validate=True)
     @api.marshal_with(json_api_list_response_model, skip_none=True, code=200)
     def get(self):
-        """ Retrieve a list of calculations that match the given Optimade filter expression. """
+        ''' Retrieve a list of calculations that match the given Optimade filter expression. '''
         request_fields = base_request_args()
 
         try:
@@ -106,7 +106,7 @@ class Calculation(Resource):
     @api.expect(single_entry_endpoint_parser, validate=True)
     @api.marshal_with(json_api_single_response_model, skip_none=True, code=200)
     def get(self, id: str):
-        """ Retrieve a single calculation for the given id. """
+        ''' Retrieve a single calculation for the given id. '''
         request_fields = base_request_args()
         search_request = base_search_request().search_parameters(calc_id=id)
 
@@ -134,7 +134,7 @@ class CalculationInfo(Resource):
     @api.expect(base_endpoint_parser, validate=True)
     @api.marshal_with(json_api_info_response_model, skip_none=True, code=200)
     def get(self):
-        """ Returns information relating to the API implementation- """
+        ''' Returns information relating to the API implementation- '''
         base_request_args()
 
         result = {
@@ -160,7 +160,7 @@ class Info(Resource):
     @api.expect(base_endpoint_parser, validate=True)
     @api.marshal_with(json_api_single_response_model, skip_none=True, code=200)
     def get(self):
-        """ Returns information relating to the API implementation- """
+        ''' Returns information relating to the API implementation- '''
         base_request_args()
 
         result = {
diff --git a/nomad/app/optimade/filterparser.py b/nomad/app/optimade/filterparser.py
index c238709292e9ef938c78cd04eaf380769425826a..a95e78100af84b5bd8a457ed596ea40919f5b95a 100644
--- a/nomad/app/optimade/filterparser.py
+++ b/nomad/app/optimade/filterparser.py
@@ -20,17 +20,17 @@ from nomad.metainfo.optimade import OptimadeEntry
 
 
 class FilterException(Exception):
-    """ Raised on parsing a filter expression with syntactic of semantic errors. """
+    ''' Raised on parsing a filter expression with syntactic of semantic errors. '''
     pass
 
 
 quantities: Dict[str, Quantity] = {
     q.name: Quantity(
         q.name, es_field='dft.optimade.%s' % q.name,
-        elastic_mapping_type=q.m_annotations['elastic']['type'])
+        elastic_mapping_type=q.m_x('search').es_mapping.__class__)
 
     for q in OptimadeEntry.m_def.all_quantities.values()
-    if 'elastic' in q.m_annotations}
+    if 'search' in q.m_annotations}
 
 quantities['elements'].length_quantity = quantities['nelements']
 quantities['dimension_types'].length_quantity = quantities['dimension_types']
@@ -43,7 +43,7 @@ _transformer = Transformer(quantities=quantities.values())
 
 
 def parse_filter(filter_str: str) -> Q:
-    """ Parses the given optimade filter str and returns a suitable elastic search query.
+    ''' Parses the given optimade filter str and returns a suitable elastic search query.
 
     Arguments:
         filter_str: Can be direct user input with no prior processing.
@@ -51,7 +51,7 @@ def parse_filter(filter_str: str) -> Q:
     Raises:
         FilterException: If the given str cannot be parsed, or if there are any semantic
             errors in the given expression.
-    """
+    '''
 
     try:
         parse_tree = _parser.parse(filter_str)
diff --git a/nomad/app/optimade/models.py b/nomad/app/optimade/models.py
index ac0643b283cf5b32381f4e06b03108cbb21f072f..9f423a171279a491b21e7ba57c5df9c3cc499386 100644
--- a/nomad/app/optimade/models.py
+++ b/nomad/app/optimade/models.py
@@ -12,9 +12,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-"""
+'''
 All the API flask restplus models.
-"""
+'''
 
 from typing import Set
 from flask_restplus import fields
@@ -23,7 +23,7 @@ import math
 
 from nomad import config
 from nomad.app.common import RFC3339DateTime
-from nomad.datamodel import CalcWithMetadata
+from nomad.datamodel import EntryMetadata
 
 from .api import api, base_url, url
 
@@ -235,7 +235,7 @@ json_api_calculation_info_model = api.model('CalculationInfo', {
 
 
 class CalculationDataObject:
-    def __init__(self, calc: CalcWithMetadata, request_fields: Set[str] = None):
+    def __init__(self, calc: EntryMetadata, request_fields: Set[str] = None):
 
         def include(key):
             if request_fields is None or (key in request_fields):
@@ -243,7 +243,7 @@ class CalculationDataObject:
 
             return False
 
-        attrs = {key: value for key, value in calc['optimade'].items() if include(key)}
+        attrs = {key: value for key, value in calc.dft.optimade.m_to_dict().items() if include(key)}
 
         self.type = 'calculation'
         self.id = calc.calc_id
diff --git a/nomad/archive.py b/nomad/archive.py
index f68c5d85399ac3c8e8b1d97303115cc0aad9ed68..67963f1d0268431be6485241c59fca03bbab3e26 100644
--- a/nomad/archive.py
+++ b/nomad/archive.py
@@ -33,12 +33,12 @@ class ArchiveError(Exception):
 
 
 class TOCPacker(Packer):
-    """
+    '''
     A special msgpack packer that records a TOC while packing.
 
     Uses a combination of the pure python msgpack fallback packer and the "real"
     c-based packing.
-    """
+    '''
     def __init__(self, toc_depth: int, *args, **kwargs):
         self.toc_depth = toc_depth
         self.toc: Dict[str, Any] = None
@@ -403,7 +403,7 @@ class ArchiveReader(ArchiveObject):
 def write_archive(
         path_or_file: Union[str, BytesIO], n_entries: int, data: Iterable[Tuple[str, Any]],
         entry_toc_depth: int = 2) -> None:
-    """
+    '''
     Writes a msgpack-based archive file. The file contents will be a valid msgpack-object.
     The data will contain extra table-of-contents (TOC) objects that map some keys to
     positions in the file. Data can be partially read from these positions and deserialized
@@ -456,14 +456,14 @@ def write_archive(
         data: The file contents as an iterator of entry id, data tuples.
         entry_toc_depth: The depth of the table of contents in each entry. Only objects will
             count for calculating the depth.
-    """
+    '''
     with ArchiveWriter(path_or_file, n_entries, entry_toc_depth=entry_toc_depth) as writer:
         for uuid, entry in data:
             writer.add(uuid, entry)
 
 
 def read_archive(file_or_path: str, **kwargs) -> ArchiveReader:
-    """
+    '''
     Allows to read a msgpack-based archive.
 
     Arguments:
@@ -475,7 +475,7 @@ def read_archive(file_or_path: str, **kwargs) -> ArchiveReader:
         A mapping (dict-like) that can be used to access the archive data. The mapping
         will lazyly load data as it is used. The mapping needs to be closed or used within
         a 'with' statement to free the underlying file resource after use.
-    """
+    '''
 
     return ArchiveReader(file_or_path, **kwargs)
 
diff --git a/nomad/archive_query.py b/nomad/archive_query.py
index 893c39e09975691aef8fb84241bd2a73c7bb8f37..cf1af8b3d510d1e1aa655be215cb732930496720 100644
--- a/nomad/archive_query.py
+++ b/nomad/archive_query.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-"""
+'''
 Contains interfaces to the archive metainfo and query.
 
 In module ``ArchiveMetainfo``, the data is provided either from raw
@@ -32,7 +32,7 @@ and a query schema similar to the archive json format can be provided to filter
     metainfo = q.query()
     for c in metainfo.calcs:
         print(c.section_run.section_single_configuration_calculation[0]({'energy_total':'*'}))
-"""
+'''
 
 import numpy as np
 import requests
@@ -47,11 +47,11 @@ from nomad.cli.client.client import KeycloakAuthenticator
 
 
 class ArchiveMetainfo:
-    """
+    '''
     Converts archive data in json format to the new nomad metainfo model
     Arguments:
         archive_data: the archive data in json format
-    """
+    '''
     def __init__(self, archive_data: List[Dict[str, Any]]):
         self._archive_data = archive_data
         self.metainfo = None
@@ -107,10 +107,10 @@ class ArchiveMetainfo:
 
     @property
     def calcs(self):
-        """
+        '''
         Calculations in metainfo form which can be actively queried by using the get
         functionality and providing a schema
-        """
+        '''
         if not self._calcs:
             self._init_calcs()
         for calc_id, calc in self._calcs.items():
@@ -126,9 +126,9 @@ class ArchiveMetainfo:
 
     @property
     def base_metacls(self):
-        """
+        '''
         The base metaclass to apply a calculation
-        """
+        '''
         if self._base_metacls is None:
             name = self._prefix
             self._base_metacls = self._build_meta_cls(self.base_data, name)
diff --git a/nomad/cli/__init__.py b/nomad/cli/__init__.py
index a04890b79dcae5ef7e0d963514de3712bc05a814..7fb192f3956e326735f05cac81aa2d00584bb1c1 100644
--- a/nomad/cli/__init__.py
+++ b/nomad/cli/__init__.py
@@ -12,13 +12,13 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-"""
+'''
 Command line interface (CLI) for nomad. Provides a group/sub-command structure, think git,
 that offers various functionality to the command line user.
 
 Use it from the command line with ``nomad --help`` or ``python -m nomad.cli --help`` to learn
 more.
-"""
+'''
 
 from nomad.utils import POPO
 
diff --git a/nomad/cli/admin/admin.py b/nomad/cli/admin/admin.py
index c44107b25cbd4420568abb9de8e451c2f721682f..8ee72493e58939b1df9a6f5c0a2f1b8d112b694a 100644
--- a/nomad/cli/admin/admin.py
+++ b/nomad/cli/admin/admin.py
@@ -158,9 +158,7 @@ def lift_embargo(dry, parallel):
                 uploads_to_repack.append(upload)
                 upload.save()
 
-                upload_with_metadata = upload.to_upload_with_metadata()
-                calcs = upload_with_metadata.calcs
-                search.index_all(calcs)
+                search.index_all(upload.entries_metadata())
 
     if not dry:
         __run_processing(uploads_to_repack, parallel, lambda upload: upload.re_pack(), 're-packing')
@@ -182,8 +180,8 @@ def index(threads, dry):
             for calc in proc.Calc.objects():
                 eta.add()
                 entry = None
-                entry = search.Entry.from_calc_with_metadata(
-                    datamodel.CalcWithMetadata(**calc.metadata))
+                entry = search.create_entry(
+                    datamodel.EntryMetadata.m_from_dict(calc.metadata))
                 entry = entry.to_dict(include_meta=True)
                 entry['_op_type'] = 'index'
                 yield entry
@@ -335,20 +333,20 @@ AllowEncodedSlashes On
 
 
 def write_prototype_data_file(aflow_prototypes: dict, filepath) -> None:
-    """Writes the prototype data file in a compressed format to a python
+    '''Writes the prototype data file in a compressed format to a python
     module.
 
     Args:
         aflow_prototypes
-    """
+    '''
     class NoIndent(object):
         def __init__(self, value):
             self.value = value
 
     class NoIndentEncoder(json.JSONEncoder):
-        """A custom JSON encoder that can pretty-print objects wrapped in the
+        '''A custom JSON encoder that can pretty-print objects wrapped in the
         NoIndent class.
-        """
+        '''
         def __init__(self, *args, **kwargs):
             super(NoIndentEncoder, self).__init__(*args, **kwargs)
             self.kwargs = dict(kwargs)
diff --git a/nomad/cli/admin/migration.py b/nomad/cli/admin/migration.py
index 72a4de7d7e6672942170e79fc4b3c01e1d3c25ef..a515b66dd738779b4e49ea0464dd0d15bd5693f3 100644
--- a/nomad/cli/admin/migration.py
+++ b/nomad/cli/admin/migration.py
@@ -20,7 +20,7 @@ import datetime
 import json
 
 from nomad import utils, processing as proc, search
-from nomad.datamodel import CalcWithMetadata
+from nomad.datamodel import EntryMetadata
 from nomad.cli.client.mirror import transform_reference, tarnsform_user_id, transform_dataset
 
 
@@ -28,14 +28,14 @@ __logger = utils.get_logger(__name__)
 
 
 class SourceCalc(Document):
-    """
+    '''
     Mongo document used as a calculation, upload, and metadata db and index
     build from a given source db. Each :class:`SourceCacl` entry relates
     a pid, mainfile, upload "id" with each other for a corressponding calculation.
     It might alos contain the user metadata. The uploads are "id"ed via the
     specific path segment that identifies an upload on the CoE repo FS(s) without
     any prefixes (e.g. $EXTRACTED, /data/upload, etc.)
-    """
+    '''
     pid = IntField(primary_key=True)
     mainfile = StringField()
     upload = StringField()
@@ -53,14 +53,14 @@ class SourceCalc(Document):
 
 
 def update_user_metadata(bulk_size: int = 1000, update_index: bool = False, **kwargs):
-    """ Goes through the whole source index to sync differences between repo user metadata
+    ''' Goes through the whole source index to sync differences between repo user metadata
     and metadata in fairdi.
 
     It goes through the source index calc by calc, working in bulks. Getting the samedata
     for fairdi and updating the different calcs in mongo. Will only update user metadata.
 
     Uses kwargs as filters for the used source index query.
-    """
+    '''
     logger = utils.get_logger(__name__)
     start_time = time.time()
 
@@ -96,7 +96,7 @@ def update_user_metadata(bulk_size: int = 1000, update_index: bool = False, **kw
                         important_changes['missing_calcs'].setdefault(source.upload, []).append(source.pid)
                         continue
 
-                target_metadata = CalcWithMetadata(**target.metadata)
+                target_metadata = EntryMetadata(**target.metadata)
                 source_metadata_normalized: Dict[str, Any] = dict(
                     comment=source.metadata.get('comment'),
                     references={transform_reference(ref) for ref in source.metadata['references']},
diff --git a/nomad/cli/admin/uploads.py b/nomad/cli/admin/uploads.py
index 7d7feb6271b03cc32c54aa3bde72c80c457f2ddd..3fe4d3e87f687700831df158d038c1091c3937aa 100644
--- a/nomad/cli/admin/uploads.py
+++ b/nomad/cli/admin/uploads.py
@@ -144,8 +144,7 @@ def chown(ctx, username, uploads):
 
     for upload in uploads:
         upload.user_id = user.user_id
-        upload_with_metadata = upload.to_upload_with_metadata()
-        calcs = upload_with_metadata.calcs
+        calcs = upload.entries_metadata()
 
         def create_update(calc):
             return UpdateOne(
@@ -155,8 +154,7 @@ def chown(ctx, username, uploads):
         proc.Calc._get_collection().bulk_write([create_update(calc) for calc in calcs])
         upload.save()
 
-        upload_with_metadata = upload.to_upload_with_metadata()
-        calcs = upload_with_metadata.calcs
+        calcs = upload.entries_metadata()
         search.index_all(calcs, do_refresh=False)
         search.refresh()
 
@@ -194,8 +192,7 @@ def index(ctx, uploads):
 
     i, failed = 0, 0
     for upload in uploads:
-        upload_with_metadata = upload.to_upload_with_metadata()
-        calcs = upload_with_metadata.calcs
+        calcs = upload.entries_metadata()
         failed += search.index_all(calcs)
         i += 1
 
diff --git a/nomad/cli/client/client.py b/nomad/cli/client/client.py
index 48c7265a76f85e00f749e98fc1c67f5828740a95..d846a42f8fe2f2915fd4b545d13abba949bd4836 100644
--- a/nomad/cli/client/client.py
+++ b/nomad/cli/client/client.py
@@ -65,7 +65,7 @@ def __create_client(
         user: str = nomad_config.client.user,
         password: str = nomad_config.client.password,
         ssl_verify: bool = True, use_token: bool = True):
-    """ A factory method to create the client. """
+    ''' A factory method to create the client. '''
     if not ssl_verify:
         import warnings
         warnings.filterwarnings("ignore")
diff --git a/nomad/cli/client/integrationtests.py b/nomad/cli/client/integrationtests.py
index 1baca89ed3296311c9a350aa9e7dbe2eafc0d0ee..f22dde7a6f5e2af9361c4353c7854a1da7a6f469 100644
--- a/nomad/cli/client/integrationtests.py
+++ b/nomad/cli/client/integrationtests.py
@@ -12,10 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-"""
+'''
 A command that runs some example operations on a working nomad@FAIRDI installation
 as a final integration test.
-"""
+'''
 
 import time
 import os
diff --git a/nomad/cli/client/local.py b/nomad/cli/client/local.py
index e6001c09ca63810fbf469a50a43f1ec0800ee960..427210522eda2c04944f91dc708eb55ff59a8dbc 100644
--- a/nomad/cli/client/local.py
+++ b/nomad/cli/client/local.py
@@ -24,7 +24,7 @@ import bravado.exception
 
 from nomad import config, utils
 from nomad.files import ArchiveBasedStagingUploadFiles
-from nomad.datamodel import CalcWithMetadata
+from nomad.datamodel import EntryMetadata
 from nomad.parsing import LocalBackend
 from nomad.cli.parse import parse, normalize, normalize_all
 
@@ -32,7 +32,7 @@ from .client import client
 
 
 class CalcProcReproduction:
-    """
+    '''
     Instances represent a local reproduction of the processing for a single calculation.
     It allows to download raw data from a nomad server and reproduce its processing
     (parsing, normalizing) with the locally installed parsers and normalizers.
@@ -44,7 +44,7 @@ class CalcProcReproduction:
     Arguments:
         calc_id: The calc_id of the calculation to locally process.
         override: Set to true to override any existing local calculation data.
-    """
+    '''
     def __init__(self, archive_id: str, override: bool = False, mainfile: str = None) -> None:
         if '/' in archive_id:
             self.calc_id = utils.archive.calc_id(archive_id)
@@ -125,25 +125,25 @@ class CalcProcReproduction:
         self.upload_files.delete()
 
     def parse(self, parser_name: str = None, **kwargs) -> LocalBackend:
-        """
+        '''
         Run the given parser on the downloaded calculation. If no parser is given,
         do parser matching and use the respective parser.
-        """
+        '''
         return parse(self.mainfile, self.upload_files, parser_name=parser_name, logger=self.logger, **kwargs)
 
     def normalize(self, normalizer: Union[str, Callable], parser_backend: LocalBackend = None):
-        """
+        '''
         Parse the downloaded calculation and run the given normalizer.
-        """
+        '''
         if parser_backend is None:
             parser_backend = self.parse()
 
         return normalize(parser_backend=parser_backend, normalizer=normalizer, logger=self.logger)
 
     def normalize_all(self, parser_backend: LocalBackend = None):
-        """
+        '''
         Parse the downloaded calculation and run the whole normalizer chain.
-        """
+        '''
         return normalize_all(parser_backend=parser_backend, logger=self.logger)
 
 
@@ -173,6 +173,6 @@ def local(calc_id, show_backend, show_metadata, skip_normalizers, not_strict, **
             backend.write_json(sys.stdout, pretty=True)
 
         if show_metadata:
-            metadata = CalcWithMetadata(domain=local.parser.domain)
+            metadata = EntryMetadata(domain=local.parser.domain)
             metadata.apply_domain_metadata(backend)
-            ujson.dump(metadata.to_dict(), sys.stdout, indent=4)
+            ujson.dump(metadata.m_to_dict(), sys.stdout, indent=4)
diff --git a/nomad/cli/client/mirror.py b/nomad/cli/client/mirror.py
index 85b388d65a5d8488949948524a522ee79b339ed8..d3f07fe9ba435458f9574905549a680925491328 100644
--- a/nomad/cli/client/mirror.py
+++ b/nomad/cli/client/mirror.py
@@ -30,7 +30,7 @@ from .client import client
 
 
 __in_test = False
-""" Will be monkeypatched by tests to alter behavior for testing. """
+''' Will be monkeypatched by tests to alter behavior for testing. '''
 
 _Dataset = Dataset.m_def.m_x('me').me_cls
 __logger = utils.get_logger(__name__)
@@ -82,7 +82,7 @@ def transform_reference(reference):
 
 
 def v0Dot6(upload_data):
-    """ Inplace transforms v0.6.x upload data into v0.7.x upload data. """
+    ''' Inplace transforms v0.6.x upload data into v0.7.x upload data. '''
     upload = json.loads(upload_data.upload)
     upload['user_id'] = tarnsform_user_id(upload['user_id'])
     upload_data.upload = json.dumps(upload)
@@ -318,7 +318,7 @@ def mirror(
             proc.Calc._get_collection().insert(upload_data.calcs)
 
             # index es
-            search.index_all(upload.to_upload_with_metadata().calcs)
+            search.index_all(upload.entries_metadata())
 
         print(
             'Mirrored %s with %d calcs at %s' %
diff --git a/nomad/cli/client/statistics.py b/nomad/cli/client/statistics.py
index 58339bc2f28e9dccb58541d67f343a513e85a701..1878c5f2a474995c48d4d1d6c1e65f80c0830a7d 100644
--- a/nomad/cli/client/statistics.py
+++ b/nomad/cli/client/statistics.py
@@ -12,9 +12,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-"""
+'''
 A command that generates various statistics.
-"""
+'''
 
 from matplotlib import scale as mscale
 from matplotlib import transforms as mtransforms
diff --git a/nomad/cli/client/update_database.py b/nomad/cli/client/update_database.py
index 703796fc2ade509c657a10216bc09d440614d0c6..4d1f44bdfc58db3934b1de8e2b06fb30c086f7da 100644
--- a/nomad/cli/client/update_database.py
+++ b/nomad/cli/client/update_database.py
@@ -12,12 +12,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-"""
+'''
 Automatically synchronizes nomad it with a given database. It creates a list of paths
 to mainfiles in nomad and compares it with paths in the external database. The missing
 paths in nomad will then be downloaded from the external database and subsequently
 uploaded to nomad. The downloaded files are by default saved in '/nomad/fairdi/external'.
-"""
+'''
 
 import requests
 import re
diff --git a/nomad/cli/client/upload.py b/nomad/cli/client/upload.py
index fd1ac0848de191fae226cd660f89d5eb28c1a933..a37fe80853a99a8d08be13f710a46bf3084ea080 100644
--- a/nomad/cli/client/upload.py
+++ b/nomad/cli/client/upload.py
@@ -41,7 +41,7 @@ def stream_upload_with_client(client, stream, name=None):
 
 
 def upload_file(file_path: str, name: str = None, offline: bool = False, publish: bool = False, client=None):
-    """
+    '''
     Upload a file to nomad.
 
     Arguments:
@@ -51,7 +51,7 @@ def upload_file(file_path: str, name: str = None, offline: bool = False, publish
         publish: automatically publish after successful processing
 
     Returns: The upload_id
-    """
+    '''
     if client is None:
         from nomad.cli.client import create_client
         client = create_client()
diff --git a/nomad/cli/parse.py b/nomad/cli/parse.py
index f1faaa30cb83c050c02f516b75caacd2926ee43f..be462f842cd0554af68ea08b486d72a5618a14cf 100644
--- a/nomad/cli/parse.py
+++ b/nomad/cli/parse.py
@@ -8,7 +8,7 @@ from nomad import config, utils, files
 from nomad.parsing import LocalBackend, parser_dict, match_parser, MatchingParser, MetainfoBackend
 from nomad.metainfo.legacy import LegacyMetainfoEnvironment
 from nomad.normalizing import normalizers
-from nomad.datamodel import CalcWithMetadata
+from nomad.datamodel import EntryMetadata
 
 from nomadcore import simple_parser
 
@@ -20,10 +20,10 @@ def parse(
         parser_name: str = None,
         backend_factory: Callable = None,
         strict: bool = True, logger=None) -> LocalBackend:
-    """
+    '''
     Run the given parser on the downloaded calculation. If no parser is given,
     do parser matching and use the respective parser.
-    """
+    '''
     if logger is None:
         logger = utils.get_logger(__name__)
     if parser_name is not None:
@@ -87,9 +87,9 @@ def normalize(
 
 
 def normalize_all(parser_backend: LocalBackend = None, logger=None) -> LocalBackend:
-    """
+    '''
     Parse the downloaded calculation and run the whole normalizer chain.
-    """
+    '''
     for normalizer in normalizers:
         parser_backend = normalize(normalizer, parser_backend=parser_backend, logger=logger)
 
@@ -129,6 +129,6 @@ def _parse(
     if show_backend:
         backend.write_json(sys.stdout, pretty=True)
     if show_metadata:
-        metadata = CalcWithMetadata(domain='dft')  # TODO take domain from matched parser
+        metadata = EntryMetadata(domain='dft')  # TODO take domain from matched parser
         metadata.apply_domain_metadata(backend)
-        json.dump(metadata.to_dict(), sys.stdout, indent=4)
+        json.dump(metadata.m_to_dict(), sys.stdout, indent=4)
diff --git a/nomad/config.py b/nomad/config.py
index 06bcb228eeb28059950f5cbec69193d491dcb990..e195cc62533207246b56cf19864386983df28c6b 100644
--- a/nomad/config.py
+++ b/nomad/config.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-"""
+'''
 This module describes all configurable parameters for the nomad python code. The
 configuration is used for all executed python code including API, worker, CLI, and other
 scripts. To use the configuration in your own scripts or new modules, simply import
@@ -30,7 +30,7 @@ over defaults.
 .. autoclass:: nomad.config.NomadConfig
 .. autofunction:: nomad.config.apply
 .. autofunction:: nomad.config.load_config
-"""
+'''
 
 import logging
 import os
@@ -46,10 +46,10 @@ warnings.filterwarnings("ignore", message="numpy.ufunc size changed")
 
 
 class NomadConfig(dict):
-    """
+    '''
     A class for configuration categories. It is a dict subclass that uses attributes as
     key/value pairs.
-    """
+    '''
     def __init__(self, **kwargs):
         super().__init__(**kwargs)
 
@@ -246,11 +246,11 @@ logger = logging.getLogger(__name__)
 
 
 def apply(key, value) -> None:
-    """
+    '''
     Changes the config according to given key and value. The keys are interpreted as paths
     to config values with ``_`` as a separator. E.g. ``fs_staging`` leading to
     ``config.fs.staging``
-    """
+    '''
     path = list(reversed(key.split('_')))
     child_segment = None
     current_value = None
@@ -299,13 +299,13 @@ def apply(key, value) -> None:
 
 
 def load_config(config_file: str = os.environ.get('NOMAD_CONFIG', 'nomad.yaml')) -> None:
-    """
+    '''
     Loads the configuration from the ``config_file`` and environment.
 
     Arguments:
         config_file: Override the configfile, default is file stored in env variable
             NOMAD_CONFIG or ``nomad.yaml``.
-    """
+    '''
     # load yaml and override defaults (only when not in test)
     if os.path.exists(config_file):
         with open(config_file, 'r') as stream:
diff --git a/nomad/datamodel/__init__.py b/nomad/datamodel/__init__.py
index ef643602377f3bd3222377cb7a78e2762c411873..c4f152be38c377a9defbf687b879f9b393e96b16 100644
--- a/nomad/datamodel/__init__.py
+++ b/nomad/datamodel/__init__.py
@@ -12,39 +12,15 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-"""
+'''
 This module contains classes that allow to represent the core
-nomad data entities :class:`Upload` and :class:`Calc` on a high level of abstraction
+nomad data entities (entries/calculations, users, datasets) on a high level of abstraction
 independent from their representation in the different modules
 :py:mod:`nomad.processing`, :py:mod:`nomad.coe_repo`, :py:mod:`nomad.parsing`,
 :py:mod:`nomad.search`, :py:mod:`nomad.app`, :py:mod:`nomad.migration`.
 
 It is not about representing every detail, but those parts that are directly involved in
-api, processing, migration, mirroring, or other 'infrastructure' operations.
-
-Transformations between different implementations of the same entity can be build
-and used. To ease the number of necessary transformations the classes
-:class:`UploadWithMetadata` and :class:`CalcWithMetadata` can act as intermediate
-representations. Therefore, implement only transformation from and to these
-classes. These are the implemented transformations:
-
-.. image:: datamodel_transformations.png
-
-.. autoclass:: nomad.datamodel.UploadWithMetadata
-    :members:
-.. autoclass:: nomad.datamodel.CalcWithMetadata
-    :members:
-
-The class :class:`CalcWithMetadata` only defines non domain specific metadata quantities
-about ids, user metadata, etc. To define domain specific quantities :class:`CalcWithMetadata`
-must be subclassed. The classes
-:class:`Domain` and :class:`DomainQuantity` can be used to further define domain specific
-quantities.
-
-.. autoclass:: nomad.datamodel.Domain
-    :members:
-.. autoclass:: nomad.datamodel.DomainQuantity
-    :members:
+api, processing, mirroring, or other 'infrastructure' operations.
 
 The class :class:`User` is used to represent users and their attributes.
 
@@ -55,12 +31,33 @@ The class :class:`Dataset` is used to represent datasets and their attributes.
 
 .. autoclass:: nomad.datamodel.Dataset
     :members:
-"""
 
-import sys
+The class :class:`UserMetadata` is used to represent user determined entry metadata.
+
+.. autoclass:: nomad.datamodel.UserMetadata
+    :members:
+
+The class :class:`EntryMetadata` is used to represent all metadata about an entry.
 
-from nomad.datamodel.base import UploadWithMetadata, CalcWithMetadata, Domain, DomainQuantity
-from nomad.datamodel import ems, dft
-from nomad.datamodel.dft import DFTCalcWithMetadata
-from nomad.datamodel.ems import EMSEntryWithMetadata
-from nomad.datamodel.metainfo import Dataset, User, UserMetadata
+.. autoclass:: nomad.datamodel.EntryMetadata
+    :members:
+'''
+
+from .dft import DFTMetadata
+from .ems import EMSMetadata
+from .metainfo import Dataset, User, EditableUserMetadata, UserMetadata, EntryMetadata
+
+domains = {
+    'dft': {
+        'metadata': DFTMetadata,
+        'metainfo_all_package': 'all.nomadmetainfo.json',
+        'root_section': 'section_run'
+    },
+    'ems': {
+        'metadata': EMSMetadata,
+        'metainfo_all_package': 'all.experimental.nomadmetainfo.json',
+        'root_section': 'section_experiment'
+    }
+}
+
+root_sections = [domain['root_section'] for domain in domains.values()] + ['section_entry_info']
diff --git a/nomad/datamodel/base.py b/nomad/datamodel/base.py
index 5dd084ebc92f3825b81e5e9390aca271323d08a4..99705430c081d9468eec637d3538da74a3e1a34e 100644
--- a/nomad/datamodel/base.py
+++ b/nomad/datamodel/base.py
@@ -12,508 +12,335 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from typing import Iterable, List, Dict, Type, Tuple, Callable, Any
-import datetime
-from elasticsearch_dsl import Keyword, Integer
-from collections.abc import Mapping
 import numpy as np
-import ase.data
 
 from nomad import config
 
-from .metainfo import Dataset, User
-
-
-class UploadWithMetadata():
-    """
-    See :class:`CalcWithMetadata`.
-    """
-
-    def __init__(self, **kwargs):
-        self.upload_id: str = None
-        self.uploader: str = None
-        self.upload_time: datetime.datetime = None
-
-        self.calcs: Iterable['CalcWithMetadata'] = list()
-
-        for key, value in kwargs.items():
-            setattr(self, key, value)
-
-    @property
-    def calcs_dict(self) -> Dict[str, 'CalcWithMetadata']:
-        return {calc.calc_id: calc for calc in self.calcs}
-
-
-class CalcWithMetadata(Mapping):
-    """
-    A dict/POPO class that can be used for mapping calc representations with calc metadata.
-    We have multi representations of calcs and their calc metadata. To avoid implement
-    mappings between all combinations, just implement mappings with the class and use
-    mapping transitivity. E.g. instead of A -> B, A -> this -> B.
-
-    This is basically an abstract class and it has to be subclassed for each :class:`Domain`.
-    Subclasses can define additional attributes and have to implement :func:`apply_domain_metadata`
-    to fill these attributes from processed entries, i.e. instance of :class:`nomad.parsing.LocalBackend`.
-
-    Attributes:
-        domain: Must be the key for a registered domain. This determines which actual
-            subclass is instantiated.
-        upload_id: The ``upload_id`` of the calculations upload (random UUID).
-        calc_id: The unique mainfile based calculation id.
-        calc_hash: The raw file content based checksum/hash of this calculation.
-        pid: The unique persistent id of this calculation.
-        mainfile: The upload relative mainfile path.
-
-        files: A list of all files, relative to upload.
-        upload_time: The time when the calc was uploaded.
-        uploader: An object describing the uploading user, has at least ``user_id``
-        processed: Boolean indicating if this calc was successfully processed and archive
-            data and calc metadata is available.
-        last_processing: A datatime with the time of the last successful processing.
-        nomad_version: A string that describes the version of the nomad software that was
-            used to do the last successful processing.
-
-        with_embargo: Show if user set an embargo on the calculation.
-        coauthors: List of coauther user objects with at ``user_id``.
-        shared_with: List of users this calcs ownership is shared with, objects with at ``user_id``.
-        comment: String comment.
-        references: Objects describing user provided references, keys are ``id`` and ``value``.
-        datasets: A list of dataset ids. The corresponding :class:`Dataset`s must exist.
-    """
-
-    def __new__(cls, domain: str = None, **kwargs):
-        if domain is not None:
-            domain_obj = Domain.instances.get(domain)
-            assert domain_obj is not None
-            return super().__new__(domain_obj.domain_entry_class)
-        else:
-            return super().__new__(cls)
-
-    def __init__(self, domain: str = None, **kwargs):
-        self.domain = domain
-
-        # id relevant metadata
-        self.upload_id: str = None
-        self.calc_id: str = None
-        self.calc_hash: str = None
-        self.mainfile: str = None
-        self.pid: int = None
-        self.raw_id: str = None
-
-        # basic upload and processing related metadata
-        self.upload_time: datetime.datetime = None
-        self.upload_name: str = None
-        self.files: List[str] = None
-        self.uploader: str = None
-        self.processed: bool = False
-        self.last_processing: datetime.datetime = None
-        self.nomad_version: str = None
-        self.nomad_commit: str = None
-
-        # user metadata, i.e. quantities given and editable by the user
-        self.with_embargo: bool = None
-        self.published: bool = False
-        self.coauthors: List[str] = []
-        self.shared_with: List[str] = []
-        self.comment: str = None
-        self.references: List[str] = []
-        self.datasets: List[str] = []
-        self.external_id: str = None
-        self.last_edit: datetime.datetime = None
-
-        # parser related general (not domain specific) metadata
-        self.parser_name = None
-
-        # domain generic metadata
-        self.formula: str = None
-        self.atoms: List[str] = []
-        self.n_atoms: int = 0
-
-        self.update(**kwargs)
-
-    def __getitem__(self, key):
-        value = getattr(self, key, None)
-
-        if value is None or key in ['backend']:
-            raise KeyError()
-
-        return value
-
-    def __iter__(self):
-        for key, value in self.__dict__.items():
-            if value is None or key in ['backend']:
-                continue
-
-            yield key
-
-    def __len__(self):
-        count = 0
-        for key, value in self.__dict__.items():
-            if value is None or key in ['backend']:
-                continue
-            count += 1
-
-        return count
-
-    def to_dict(self):
-        return {key: value for key, value in self.items()}
-
-    def __str__(self):
-        return str(self.to_dict())
-
-    def update(self, **kwargs):
-        for key, value in kwargs.items():
-            if value is None:
-                continue
-
-            setattr(self, key, value)
-
-    def apply_user_metadata(self, metadata: dict):
-        """
-        Applies a user provided metadata dict to this calc.
-        """
-        self.pid = metadata.get('_pid', self.pid)
-        self.comment = metadata.get('comment', self.comment)
-        self.upload_time = metadata.get('_upload_time', self.upload_time)
-        uploader_id = metadata.get('_uploader')
-        if uploader_id is not None:
-            self.uploader = uploader_id
-        self.references = metadata.get('references', [])
-        self.with_embargo = metadata.get('with_embargo', self.with_embargo)
-        self.coauthors = [
-            user_id for user_id in metadata.get('coauthors', self.coauthors)
-            if User.get(user_id=user_id) is not None]
-        self.shared_with = [
-            user_id for user_id in metadata.get('shared_with', self.shared_with)
-            if User.get(user_id=user_id) is not None]
-        self.datasets = [
-            dataset_id for dataset_id in metadata.get('datasets', self.datasets)
-            if Dataset.m_def.m_x('me').get(dataset_id=dataset_id) is not None]
-        self.external_id = metadata.get('external_id')
-
-    def apply_domain_metadata(self, backend):
-        raise NotImplementedError()
-
-
-class DomainQuantity:
-    """
-    This class can be used to define further details about a domain specific metadata
-    quantity.
-
-    Attributes:
-        name: The name of the quantity, also the key used to store values in
-            :class:`CalcWithMetadata`
-        description: A human friendly description. The description is used to define
-            the swagger documentation on the relevant API endpoints.
-        multi: Indicates a list of values. This is important for the elastic mapping.
-        order_default: Indicates that this metric should be used for the default order of
-            search results.
-        aggregations: Indicates that search aggregations (and how many) should be provided.
-            0 (the default) means no aggregations.
-        metric: Indicates that this quantity should be used as search metric. Values need
-            to be tuples with metric name and elastic aggregation (e.g. sum, cardinality)
-        elastic_mapping: An optional elasticsearch_dsl mapping. Default is ``Keyword``.
-        elastic_search_type: An optional elasticsearch search type. Default is ``term``.
-        elastic_field: An optional elasticsearch key. Default is the name of the quantity.
-        elastic_value: A collable that takes a :class:`CalcWithMetadata` as input and produces the
-            value for the elastic search index.
-        argparse_action: Action to use on argparse, either append or split for multi values. Append is default.
-    """
-
-    def __init__(
-            self, description: str = None, multi: bool = False, aggregations: int = 0,
-            order_default: bool = False, metric: Tuple[str, str] = None,
-            metadata_field: str = None, elastic_mapping: type = None,
-            elastic_search_type: str = 'term', elastic_field: str = None,
-            elastic_value: Callable[[Any], Any] = None,
-            argparse_action: str = 'append'):
-
-        self.domain: str = None
-        self._name: str = None
-        self.description = description
-        self.multi = multi
-        self.order_default = order_default
-        self.aggregations = aggregations
-        self.metric = metric
-        self.elastic_mapping = elastic_mapping
-        self.elastic_search_type = elastic_search_type
-        self.metadata_field = metadata_field
-        self.elastic_field = elastic_field
-        self.argparse_action = argparse_action
-
-        self.elastic_value = elastic_value
-        if self.elastic_value is None:
-            self.elastic_value = lambda o: o
-
-        if self.elastic_mapping is None:
-            self.elastic_mapping = Keyword(multi=self.multi)
-
-    @property
-    def name(self) -> str:
-        return self._name
-
-    @name.setter
-    def name(self, name: str) -> None:
-        self._name = name
-        if self.metadata_field is None:
-            self.metadata_field = name
-        if self.elastic_field is None:
-            self.elastic_field = self.name
-
-    @property
-    def qualified_elastic_field(self) -> str:
-        if self.domain is None:
-            return self.elastic_field
-        else:
-            return '%s.%s' % (self.domain, self.elastic_field)
-
-    @property
-    def qualified_name(self) -> str:
-        if self.domain is None:
-            return self.name
-        else:
-            return '%s.%s' % (self.domain, self.name)
-
-
-def only_atoms(atoms):
-    numbers = [ase.data.atomic_numbers[atom] for atom in atoms]
-    only_atoms = [ase.data.chemical_symbols[number] for number in sorted(numbers)]
-    return ''.join(only_atoms)
-
-
-class Domain:
-    """
-    A domain defines all metadata quantities that are specific to a certain scientific
-    domain, e.g. DFT calculations, or experimental material science.
-
-    Each domain needs to define a subclass of :class:`CalcWithMetadata`. This
-    class has to define the necessary domain specific metadata quantities and how these
-    are filled from parser results (usually an instance of :class:LocalBackend).
-
-    Furthermore, the class method :func:`register_domain` of this ``Domain`` class has
-    to be used to register a domain with ``domain_nam``. This also allows to provide
-    further descriptions on each domain specific quantity via instance of :class:`DomainQuantity`.
-
-    While there can be multiple domains registered. Currently, only one domain can be
-    active. This active domain is define in the configuration using the ``domain_name``.
-
-    Arguments:
-        name: A name for the domain. This is used as key in the configuration ``config.domain``.
-        domain_entry_class: A subclass of :class:`CalcWithMetadata` that adds the
-            domain specific quantities.
-        quantities: Additional specifications for the quantities in ``domain_entry_class`` as
-            instances of :class:`DomainQuantity`.
-        metrics: Tuples of elastic field name and elastic aggregation operation that
-            can be used to create statistic values.
-        groups: Tuple of quantity name and metric that describes quantities that
-            can be used to group entries by quantity values.
-        root_sections: The name of the possible root sections for this domain.
-        metainfo_all_package: The name of the full metainfo package for this domain.
-    """
-    instances: Dict[str, 'Domain'] = {}
-
-    base_quantities = dict(
-        authors=DomainQuantity(
-            elastic_field='authors.name.keyword', multi=True, aggregations=1000,
-            description=(
-                'Search for the given author. Exact keyword matches in the form "Lastname, '
-                'Firstname".')),
-        uploader_id=DomainQuantity(
-            elastic_field='uploader.user_id', multi=False, aggregations=5,
-            description=('Search for the given uploader id.')),
-        uploader_name=DomainQuantity(
-            elastic_field='uploader.name.keyword', multi=False,
-            description=('Search for the exact uploader\'s full name')),
-        comment=DomainQuantity(
-            elastic_search_type='match', multi=True,
-            description='Search within the comments. This is a text search ala google.'),
-        paths=DomainQuantity(
-            elastic_search_type='match', elastic_field='files', multi=True,
-            description='Search for elements in one of the file paths. The paths are split at all "/".'),
-        files=DomainQuantity(
-            elastic_field='files.keyword', multi=True,
-            description='Search for exact file name with full path.'),
-        quantities=DomainQuantity(
-            multi=True,
-            description='Search for the existence of a certain meta-info quantity'),
-        upload_id=DomainQuantity(
-            description='Search for the upload_id.',
-            multi=True, argparse_action='split', elastic_search_type='terms'),
-        upload_time=DomainQuantity(
-            description='Search for the exact upload time.', elastic_search_type='terms'),
-        upload_name=DomainQuantity(
-            description='Search for the upload_name.',
-            multi=True, argparse_action='split', elastic_search_type='terms'),
-        calc_id=DomainQuantity(
-            description='Search for the calc_id.',
-            multi=True, argparse_action='split', elastic_search_type='terms'),
-        pid=DomainQuantity(
-            description='Search for the pid.',
-            multi=True, argparse_action='split', elastic_search_type='terms'),
-        raw_id=DomainQuantity(
-            description='Search for the raw_id.',
-            multi=True, argparse_action='split', elastic_search_type='terms'),
-        mainfile=DomainQuantity(
-            description='Search for the mainfile.',
-            multi=True, argparse_action='append', elastic_search_type='terms'),
-        external_id=DomainQuantity(
-            description='External user provided id. Does not have to be unique necessarily.',
-            multi=True, argparse_action='split', elastic_search_type='terms'),
-        calc_hash=DomainQuantity(
-            description='Search for the entries hash.',
-            multi=True, argparse_action='split', elastic_search_type='terms'),
-        dataset=DomainQuantity(
-            elastic_field='datasets.name', multi=True, elastic_search_type='match',
-            description='Search for a particular dataset by name.'),
-        dataset_id=DomainQuantity(
-            elastic_field='datasets.id', multi=True,
-            description='Search for a particular dataset by its id.'),
-        doi=DomainQuantity(
-            elastic_field='datasets.doi', multi=True,
-            description='Search for a particular dataset by doi (incl. http://dx.doi.org).'),
-        formula=DomainQuantity(
-            'The chemical (hill) formula of the simulated system.',
-            order_default=True),
-        atoms=DomainQuantity(
-            'The atom labels of all atoms in the simulated system.',
-            aggregations=len(ase.data.chemical_symbols), multi=True),
-        only_atoms=DomainQuantity(
-            'The atom labels concatenated in species-number order. Used with keyword search '
-            'to facilitate exclusive searches.',
-            elastic_value=only_atoms, metadata_field='atoms', multi=True),
-        n_atoms=DomainQuantity(
-            'Number of atoms in the simulated system',
-            elastic_mapping=Integer()))
-
-    base_metrics = dict(
-        datasets=('dataset_id', 'cardinality'),
-        uploads=('upload_id', 'cardinality'),
-        uploaders=('uploader_name', 'cardinality'),
-        authors=('authors', 'cardinality'),
-        unique_entries=('calc_hash', 'cardinality'))
-
-    base_groups = dict(
-        datasets=('dataset_id', 'datasets'),
-        uploads=('upload_id', 'uploads'))
-
-    @classmethod
-    def get_quantity(cls, name_spec) -> DomainQuantity:
-        """
-        Returns the quantity definition for the given quantity name. The name can be the
-        qualified name (``domain.quantity``) or in Django-style (``domain__quantity``).
-        """
-        qualified_name = name_spec.replace('__', '.')
-        split_name = qualified_name.split('.')
-        if len(split_name) == 1:
-            return cls.base_quantities[split_name[0]]
-        elif len(split_name) == 2:
-            return cls.instances[split_name[0]].quantities[split_name[1]]
-        else:
-            assert False, 'qualified quantity name depth must be 2 max'
-
-    @classmethod
-    def all_quantities(cls) -> Iterable[DomainQuantity]:
-        return set([quantity for domain in cls.instances.values() for quantity in domain.quantities.values()])
-
-    def __init__(
-            self, name: str, domain_entry_class: Type[CalcWithMetadata],
-            quantities: Dict[str, DomainQuantity],
-            metrics: Dict[str, Tuple[str, str]],
-            groups: Dict[str, Tuple[str, str]],
-            default_statistics: List[str],
-            root_sections=['section_run', 'section_entry_info'],
-            metainfo_all_package='all.nomadmetainfo.json') -> None:
-
-        domain_quantities = quantities
-
-        Domain.instances[name] = self
-
-        self.name = name
-        self.domain_entry_class = domain_entry_class
-        self.domain_quantities: Dict[str, DomainQuantity] = {}
-        self.root_sections = root_sections
-        self.metainfo_all_package = metainfo_all_package
-        self.default_statistics = default_statistics
-
-        reference_domain_calc = CalcWithMetadata(domain=name)
-        reference_general_calc = CalcWithMetadata(domain=None)
-
-        # add non specified quantities from additional metadata class fields
-        for quantity_name in reference_domain_calc.__dict__.keys():
-            if not hasattr(reference_general_calc, quantity_name):
-                quantity = domain_quantities.get(quantity_name, None)
-                if quantity is None:
-                    domain_quantities[quantity_name] = DomainQuantity()
-
-        # ensure domain quantity names and domains
-        for quantity_name, quantity in domain_quantities.items():
-            quantity.domain = name
-            quantity.name = quantity_name
-
-        # add domain prefix to domain metrics and groups
-        domain_metrics = {
-            '%s.%s' % (name, key): (quantities[quantity].qualified_elastic_field, es_op)
-            for key, (quantity, es_op) in metrics.items()}
-        domain_groups = {
-            '%s.%s' % (name, key): (quantities[quantity].qualified_name, '%s.%s' % (name, metric))
-            for key, (quantity, metric) in groups.items()}
-
-        # add all domain quantities
-        for quantity_name, quantity in domain_quantities.items():
-            self.domain_quantities[quantity.name] = quantity
-
-            # update the multi status from an example value
-            if quantity.metadata_field in reference_domain_calc.__dict__:
-                quantity.multi = isinstance(
-                    reference_domain_calc.__dict__[quantity.metadata_field], list)
-
-            assert not hasattr(reference_general_calc, quantity_name), \
-                'quantity overrides general non domain quantity: %s' % quantity_name
-
-        # construct search quantities from base and domain quantities
-        self.quantities = dict(**Domain.base_quantities)
-        for quantity_name, quantity in self.quantities.items():
-            quantity.name = quantity_name
-        self.quantities.update(self.domain_quantities)
-
-        assert any(quantity.order_default for quantity in Domain.instances[name].quantities.values()), \
-            'you need to define a order default quantity'
-
-        # construct metrics from base and domain metrics
-        self.metrics = dict(**Domain.base_metrics)
-        self.metrics.update(**domain_metrics)
-        self.groups = dict(**Domain.base_groups)
-        self.groups.update(**domain_groups)
-
-    @property
-    def metrics_names(self) -> Iterable[str]:
-        """ Just the names of all metrics. """
-        return list(self.metrics.keys())
-
-    @property
-    def aggregations(self) -> Dict[str, int]:
-        """
-        The search aggregations and the default maximum number of calculated buckets. See also
-        :func:`nomad.search.aggregations`.
-        """
-        return {
-            quantity.name: quantity.aggregations
-            for quantity in self.quantities.values()
-            if quantity.aggregations > 0
-        }
-
-    @property
-    def aggregations_names(self) -> Iterable[str]:
-        """ Just the names of all metrics. """
-        return list(self.aggregations.keys())
-
-    @property
-    def order_default_quantity(self) -> str:
-        for quantity in self.quantities.values():
-            if quantity.order_default:
-                return quantity.qualified_name
-
-        assert False, 'each domain must defina an order_default quantity'
+# from .metainfo import Dataset, User, EntryMetadata
+
+
+# class DomainQuantity:
+#     '''
+#     This class can be used to define further details about a domain specific metadata
+#     quantity.
+
+#     Attributes:
+#         name: The name of the quantity, also the key used to store values in
+#             :class:`EntryMetadata`
+#         description: A human friendly description. The description is used to define
+#             the swagger documentation on the relevant API endpoints.
+#         multi: Indicates a list of values. This is important for the elastic mapping.
+#         order_default: Indicates that this metric should be used for the default order of
+#             search results.
+#         aggregations: Indicates that search aggregations (and how many) should be provided.
+#             0 (the default) means no aggregations.
+#         metric: Indicates that this quantity should be used as search metric. Values need
+#             to be tuples with metric name and elastic aggregation (e.g. sum, cardinality)
+#         elastic_mapping: An optional elasticsearch_dsl mapping. Default is ``Keyword``.
+#         elastic_search_type: An optional elasticsearch search type. Default is ``term``.
+#         elastic_field: An optional elasticsearch key. Default is the name of the quantity.
+#         elastic_value: A collable that takes a :class:`EntryMetadata` as input and produces the
+#             value for the elastic search index.
+#         argparse_action: Action to use on argparse, either append or split for multi values. Append is default.
+#     '''
+
+#     def __init__(
+#             self, description: str = None, multi: bool = False, aggregations: int = 0,
+#             order_default: bool = False, metric: Tuple[str, str] = None,
+#             metadata_field: str = None, elastic_mapping: type = None,
+#             elastic_search_type: str = 'term', elastic_field: str = None,
+#             elastic_value: Callable[[Any], Any] = None,
+#             argparse_action: str = 'append'):
+
+#         self.domain: str = None
+#         self._name: str = None
+#         self.description = description
+#         self.multi = multi
+#         self.order_default = order_default
+#         self.aggregations = aggregations
+#         self.metric = metric
+#         self.elastic_mapping = elastic_mapping
+#         self.elastic_search_type = elastic_search_type
+#         self.metadata_field = metadata_field
+#         self.elastic_field = elastic_field
+#         self.argparse_action = argparse_action
+
+#         self.elastic_value = elastic_value
+#         if self.elastic_value is None:
+#             self.elastic_value = lambda o: o
+
+#         if self.elastic_mapping is None:
+#             self.elastic_mapping = Keyword(multi=self.multi)
+
+#     @property
+#     def name(self) -> str:
+#         return self._name
+
+#     @name.setter
+#     def name(self, name: str) -> None:
+#         self._name = name
+#         if self.metadata_field is None:
+#             self.metadata_field = name
+#         if self.elastic_field is None:
+#             self.elastic_field = self.name
+
+#     @property
+#     def qualified_elastic_field(self) -> str:
+#         if self.domain is None:
+#             return self.elastic_field
+#         else:
+#             return '%s.%s' % (self.domain, self.elastic_field)
+
+#     @property
+#     def qualified_name(self) -> str:
+#         if self.domain is None:
+#             return self.name
+#         else:
+#             return '%s.%s' % (self.domain, self.name)
+
+
+# def only_atoms(atoms):
+#     numbers = [ase.data.atomic_numbers[atom] for atom in atoms]
+#     only_atoms = [ase.data.chemical_symbols[number] for number in sorted(numbers)]
+#     return ''.join(only_atoms)
+
+
+# class Domain:
+#     '''
+#     A domain defines all metadata quantities that are specific to a certain scientific
+#     domain, e.g. DFT calculations, or experimental material science.
+
+#     Each domain needs to define a subclass of :class:`EntryMetadata`. This
+#     class has to define the necessary domain specific metadata quantities and how these
+#     are filled from parser results (usually an instance of :class:LocalBackend).
+
+#     Furthermore, the class method :func:`register_domain` of this ``Domain`` class has
+#     to be used to register a domain with ``domain_nam``. This also allows to provide
+#     further descriptions on each domain specific quantity via instance of :class:`DomainQuantity`.
+
+#     While there can be multiple domains registered. Currently, only one domain can be
+#     active. This active domain is define in the configuration using the ``domain_name``.
+
+#     Arguments:
+#         name: A name for the domain. This is used as key in the configuration ``config.domain``.
+#         domain_entry_class: A subclass of :class:`EntryMetadata` that adds the
+#             domain specific quantities.
+#         quantities: Additional specifications for the quantities in ``domain_entry_class`` as
+#             instances of :class:`DomainQuantity`.
+#         metrics: Tuples of elastic field name and elastic aggregation operation that
+#             can be used to create statistic values.
+#         groups: Tuple of quantity name and metric that describes quantities that
+#             can be used to group entries by quantity values.
+#         root_sections: The name of the possible root sections for this domain.
+#         metainfo_all_package: The name of the full metainfo package for this domain.
+#     '''
+#     instances: Dict[str, 'Domain'] = {}
+
+#     base_quantities = dict(
+#         authors=DomainQuantity(
+#             elastic_field='authors.name.keyword', multi=True, aggregations=1000,
+#             description=(
+#                 'Search for the given author. Exact keyword matches in the form "Lastname, '
+#                 'Firstname".')),
+#         uploader_id=DomainQuantity(
+#             elastic_field='uploader.user_id', multi=False, aggregations=5,
+#             description=('Search for the given uploader id.')),
+#         uploader_name=DomainQuantity(
+#             elastic_field='uploader.name.keyword', multi=False,
+#             description=('Search for the exact uploader\'s full name')),
+#         comment=DomainQuantity(
+#             elastic_search_type='match', multi=True,
+#             description='Search within the comments. This is a text search ala google.'),
+#         paths=DomainQuantity(
+#             elastic_search_type='match', elastic_field='files', multi=True,
+#             description='Search for elements in one of the file paths. The paths are split at all "/".'),
+#         files=DomainQuantity(
+#             elastic_field='files.keyword', multi=True,
+#             description='Search for exact file name with full path.'),
+#         quantities=DomainQuantity(
+#             multi=True,
+#             description='Search for the existence of a certain meta-info quantity'),
+#         upload_id=DomainQuantity(
+#             description='Search for the upload_id.',
+#             multi=True, argparse_action='split', elastic_search_type='terms'),
+#         upload_time=DomainQuantity(
+#             description='Search for the exact upload time.', elastic_search_type='terms'),
+#         upload_name=DomainQuantity(
+#             description='Search for the upload_name.',
+#             multi=True, argparse_action='split', elastic_search_type='terms'),
+#         calc_id=DomainQuantity(
+#             description='Search for the calc_id.',
+#             multi=True, argparse_action='split', elastic_search_type='terms'),
+#         pid=DomainQuantity(
+#             description='Search for the pid.',
+#             multi=True, argparse_action='split', elastic_search_type='terms'),
+#         raw_id=DomainQuantity(
+#             description='Search for the raw_id.',
+#             multi=True, argparse_action='split', elastic_search_type='terms'),
+#         mainfile=DomainQuantity(
+#             description='Search for the mainfile.',
+#             multi=True, argparse_action='append', elastic_search_type='terms'),
+#         external_id=DomainQuantity(
+#             description='External user provided id. Does not have to be unique necessarily.',
+#             multi=True, argparse_action='split', elastic_search_type='terms'),
+#         calc_hash=DomainQuantity(
+#             description='Search for the entries hash.',
+#             multi=True, argparse_action='split', elastic_search_type='terms'),
+#         dataset=DomainQuantity(
+#             elastic_field='datasets.name', multi=True, elastic_search_type='match',
+#             description='Search for a particular dataset by name.'),
+#         dataset_id=DomainQuantity(
+#             elastic_field='datasets.id', multi=True,
+#             description='Search for a particular dataset by its id.'),
+#         doi=DomainQuantity(
+#             elastic_field='datasets.doi', multi=True,
+#             description='Search for a particular dataset by doi (incl. http://dx.doi.org).'),
+#         formula=DomainQuantity(
+#             'The chemical (hill) formula of the simulated system.',
+#             order_default=True),
+#         atoms=DomainQuantity(
+#             'The atom labels of all atoms in the simulated system.',
+#             aggregations=len(ase.data.chemical_symbols), multi=True),
+#         only_atoms=DomainQuantity(
+#             'The atom labels concatenated in species-number order. Used with keyword search '
+#             'to facilitate exclusive searches.',
+#             elastic_value=only_atoms, metadata_field='atoms', multi=True),
+#         n_atoms=DomainQuantity(
+#             'Number of atoms in the simulated system',
+#             elastic_mapping=Integer()))
+
+#     base_metrics = dict(
+#         datasets=('dataset_id', 'cardinality'),
+#         uploads=('upload_id', 'cardinality'),
+#         uploaders=('uploader_name', 'cardinality'),
+#         authors=('authors', 'cardinality'),
+#         unique_entries=('calc_hash', 'cardinality'))
+
+#     base_groups = dict(
+#         datasets=('dataset_id', 'datasets'),
+#         uploads=('upload_id', 'uploads'))
+
+#     @classmethod
+#     def get_quantity(cls, name_spec) -> DomainQuantity:
+#         '''
+#         Returns the quantity definition for the given quantity name. The name can be the
+#         qualified name (``domain.quantity``) or in Django-style (``domain__quantity``).
+#         '''
+#         qualified_name = name_spec.replace('__', '.')
+#         split_name = qualified_name.split('.')
+#         if len(split_name) == 1:
+#             return cls.base_quantities[split_name[0]]
+#         elif len(split_name) == 2:
+#             return cls.instances[split_name[0]].quantities[split_name[1]]
+#         else:
+#             assert False, 'qualified quantity name depth must be 2 max'
+
+#     @classmethod
+#     def all_quantities(cls) -> Iterable[DomainQuantity]:
+#         return set([quantity for domain in cls.instances.values() for quantity in domain.quantities.values()])
+
+#     def __init__(
+#             self, name: str, domain_entry_class: Type[EntryMetadata],
+#             quantities: Dict[str, DomainQuantity],
+#             metrics: Dict[str, Tuple[str, str]],
+#             groups: Dict[str, Tuple[str, str]],
+#             default_statistics: List[str],
+#             root_sections=['section_run', 'section_entry_info'],
+#             metainfo_all_package='all.nomadmetainfo.json') -> None:
+
+#         domain_quantities = quantities
+
+#         Domain.instances[name] = self
+
+#         self.name = name
+#         self.domain_entry_class = domain_entry_class
+#         self.domain_quantities: Dict[str, DomainQuantity] = {}
+#         self.root_sections = root_sections
+#         self.metainfo_all_package = metainfo_all_package
+#         self.default_statistics = default_statistics
+
+#         # TODO
+#         return
+
+#         reference_domain_calc = EntryMetadata(domain=name)
+#         reference_general_calc = EntryMetadata(domain=None)
+
+#         # add non specified quantities from additional metadata class fields
+#         for quantity_name in reference_domain_calc.__dict__.keys():
+#             if not hasattr(reference_general_calc, quantity_name):
+#                 quantity = domain_quantities.get(quantity_name, None)
+#                 if quantity is None:
+#                     domain_quantities[quantity_name] = DomainQuantity()
+
+#         # ensure domain quantity names and domains
+#         for quantity_name, quantity in domain_quantities.items():
+#             quantity.domain = name
+#             quantity.name = quantity_name
+
+#         # add domain prefix to domain metrics and groups
+#         domain_metrics = {
+#             '%s.%s' % (name, key): (quantities[quantity].qualified_elastic_field, es_op)
+#             for key, (quantity, es_op) in metrics.items()}
+#         domain_groups = {
+#             '%s.%s' % (name, key): (quantities[quantity].qualified_name, '%s.%s' % (name, metric))
+#             for key, (quantity, metric) in groups.items()}
+
+#         # add all domain quantities
+#         for quantity_name, quantity in domain_quantities.items():
+#             self.domain_quantities[quantity.name] = quantity
+
+#             # update the multi status from an example value
+#             if quantity.metadata_field in reference_domain_calc.__dict__:
+#                 quantity.multi = isinstance(
+#                     reference_domain_calc.__dict__[quantity.metadata_field], list)
+
+#             assert not hasattr(reference_general_calc, quantity_name), \
+#                 'quantity overrides general non domain quantity: %s' % quantity_name
+
+#         # construct search quantities from base and domain quantities
+#         self.quantities = dict(**Domain.base_quantities)
+#         for quantity_name, quantity in self.quantities.items():
+#             quantity.name = quantity_name
+#         self.quantities.update(self.domain_quantities)
+
+#         assert any(quantity.order_default for quantity in Domain.instances[name].quantities.values()), \
+#             'you need to define a order default quantity'
+
+#         # construct metrics from base and domain metrics
+#         self.metrics = dict(**Domain.base_metrics)
+#         self.metrics.update(**domain_metrics)
+#         self.groups = dict(**Domain.base_groups)
+#         self.groups.update(**domain_groups)
+
+#     @property
+#     def metrics_names(self) -> Iterable[str]:
+#         ''' Just the names of all metrics. '''
+#         return list(self.metrics.keys())
+
+#     @property
+#     def aggregations(self) -> Dict[str, int]:
+#         '''
+#         The search aggregations and the default maximum number of calculated buckets. See also
+#         :func:`nomad.search.aggregations`.
+#         '''
+#         return {
+#             quantity.name: quantity.aggregations
+#             for quantity in self.quantities.values()
+#             if quantity.aggregations > 0
+#         }
+
+#     @property
+#     def aggregations_names(self) -> Iterable[str]:
+#         ''' Just the names of all metrics. '''
+#         return list(self.aggregations.keys())
+
+#     @property
+#     def order_default_quantity(self) -> str:
+#         for quantity in self.quantities.values():
+#             if quantity.order_default:
+#                 return quantity.qualified_name
+
+#         assert False, 'each domain must defina an order_default quantity'
 
 
 def get_optional_backend_value(backend, key, section, unavailable_value=None, logger=None):
diff --git a/nomad/datamodel/dft.py b/nomad/datamodel/dft.py
index f9cdb22341f0dd2ee7fdb677852a72afa8aa444f..49efdfcd8165e1c7aadbd3c1fe95e443f2852a08 100644
--- a/nomad/datamodel/dft.py
+++ b/nomad/datamodel/dft.py
@@ -12,21 +12,19 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-"""
+'''
 DFT specific metadata
-"""
+'''
 
-from typing import List
 import re
-from elasticsearch_dsl import Integer, Object, InnerDoc, Keyword
 
 from nomadcore.local_backend import ParserEvent
 
 from nomad import utils, config
-from nomad.metainfo import optimade, MSection, Section, Quantity, MEnum
-from nomad.metainfo.elastic import elastic_mapping, elastic_obj
+from nomad.metainfo import optimade, MSection, Section, Quantity, MEnum, SubSection
+from nomad.metainfo.search import SearchQuantity
 
-from .base import CalcWithMetadata, DomainQuantity, Domain, get_optional_backend_value
+from .base import get_optional_backend_value
 
 
 xc_treatments = {
@@ -38,7 +36,7 @@ xc_treatments = {
     'vdw': 'vdW',
     'lda': 'LDA',
 }
-""" https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-meta-info/wikis/metainfo/XC-functional """
+''' https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-meta-info/wikis/metainfo/XC-functional '''
 
 basis_sets = {
     'gaussians': 'gaussians',
@@ -70,81 +68,130 @@ def simplify_version(version):
 
 
 class Label(MSection):
-    """
+    '''
     Label that further classify a structure.
 
     Attributes:
         label: The label as a string
         type: The type of the label
         source: The source that this label was taken from.
-    """
 
-    m_def = Section(a_elastic=dict(type=InnerDoc))
-
-    label = Quantity(type=str, a_elastic=dict(type=Keyword))
+    '''
+    label = Quantity(type=str, a_search=SearchQuantity())
 
     type = Quantity(type=MEnum(
         'compound_class', 'classification', 'prototype', 'prototype_id'),
-        a_elastic=dict(type=Keyword))
+        a_search=SearchQuantity())
 
     source = Quantity(
         type=MEnum('springer', 'aflow_prototype_library'),
-        a_elastic=dict(type=Keyword))
-
-
-ESLabel = elastic_mapping(Label.m_def, InnerDoc)
-
-
-class DFTCalcWithMetadata(CalcWithMetadata):
-
-    def __init__(self, **kwargs):
-        self.basis_set: str = None
-        self.xc_functional: str = None
-        self.system: str = None
-        self.crystal_system: str = None
-        self.spacegroup: str = None
-        self.spacegroup_symbol: str = None
-        self.code_name: str = None
-        self.code_version: str = None
-
-        self.n_geometries = 0
-        self.n_calculations = 0
-        self.n_total_energies = 0
-        self.n_quantities = 0
-        self.quantities = []
-        self.geometries = []
-        self.group_hash: str = None
-
-        self.labels: List[Label] = []
-        self.optimade: optimade.OptimadeEntry = None
-
-        super().__init__(**kwargs)
-
-    def update(self, **kwargs):
-        super().update(**kwargs)
-
-        if len(self.labels) > 0:
-            self.labels = [Label.m_from_dict(label) for label in self.labels]
-
-        if self.optimade is not None and isinstance(self.optimade, dict):
-            self.optimade = optimade.OptimadeEntry.m_from_dict(self.optimade)
-
-    def __getitem__(self, key):
-        value = super().__getitem__(key)
-
-        if key == 'labels':
-            return [item.m_to_dict() for item in value]
-
-        if key == 'optimade':
-            return value.m_to_dict()
-
-        return value
+        a_search=SearchQuantity())
+
+
+class DFTMetadata(MSection):
+    m_def = Section(a_domain='dft')
+
+    basis_set = Quantity(
+        type=str, default='not processed',
+        description='The used basis set functions.',
+        a_search=SearchQuantity(statistic_size=20, default_statistic=True))
+
+    xc_functional = Quantity(
+        type=str, default='not processed',
+        description='The libXC based xc functional classification used in the simulation.',
+        a_search=SearchQuantity(statistic_size=20, default_statistic=True))
+
+    system = Quantity(
+        type=str, default='not processed',
+        description='The system type of the simulated system.',
+        a_search=SearchQuantity(default_statistic=True))
+
+    crystal_system = Quantity(
+        type=str, default='not processed',
+        description='The crystal system type of the simulated system.',
+        a_search=SearchQuantity(default_statistic=True))
+
+    spacegroup = Quantity(
+        type=int, default='not processed',
+        description='The spacegroup of the simulated system as number.',
+        a_search=SearchQuantity())
+
+    spacegroup_symbol = Quantity(
+        type=str, default='not processed',
+        description='The spacegroup as international short symbol.',
+        a_search=SearchQuantity())
+
+    code_name = Quantity(
+        type=str, default='not processed',
+        description='The name of the used code.',
+        a_search=SearchQuantity(statistic_size=40, default_statistic=True))
+
+    code_version = Quantity(
+        type=str, default='not processed',
+        description='The version of the used code.',
+        a_search=SearchQuantity())
+
+    n_geometries = Quantity(
+        type=int, description='Number of unique geometries.',
+        a_sesrch=SearchQuantity(metric_name='geometries', metric='sum'))
+
+    n_calculations = Quantity(
+        type=int,
+        description='Number of single configuration calculation sections',
+        a_search=SearchQuantity(metric_name='calculations', metric='sum'))
+
+    n_total_energies = Quantity(
+        type=int, description='Number of total energy calculations',
+        a_search=SearchQuantity(metric_name='total_energies', metric='sum'))
+
+    n_quantities = Quantity(
+        type=int, description='Number of metainfo quantities parsed from the entry.',
+        a_search=SearchQuantity(metric='sum', metric_name='quantities'))
+
+    quantities = Quantity(
+        type=str, shape=['0..*'],
+        description='All quantities that are used by this entry.',
+        a_search=SearchQuantity(
+            metric_name='distinct_quantities', metric='cardinality', many_and='append'))
+
+    geometries = Quantity(
+        type=str, shape=['0..*'],
+        description='Hashes for each simulated geometry',
+        a_search=SearchQuantity(metric_name='unique_geometries', metric='cardinality'))
+
+    group_hash = Quantity(
+        type=str,
+        description='Hashes that describe unique geometries simulated by this code run.',
+        a_search=SearchQuantity(many_or='append', group='groups', metric_name='groups', metric='cardinality'))
+
+    labels = SubSection(
+        sub_section=Label, repeats=True,
+        description='The labels taken from AFLOW prototypes and springer.',
+        a_search='labels')
+
+    optimade = SubSection(
+        sub_section=optimade.OptimadeEntry,
+        description='Metadata used for the optimade API.',
+        a_search='optimade')
+
+    def m_update(self, **kwargs):
+        # TODO necessary?
+        if 'labels' in kwargs:
+            print('########################## A')
+            self.labels = [Label.m_from_dict(label) for label in kwargs.pop('labels')]
+
+        if 'optimade' in kwargs:
+            print('########################## B')
+            self.optimade = optimade.OptimadeEntry.m_from_dict(kwargs.pop('optimade'))
+
+        super().m_update(**kwargs)
 
     def apply_domain_metadata(self, backend):
         from nomad.normalizing.system import normalized_atom_labels
+        entry = self.m_parent
 
         logger = utils.get_logger(__name__).bind(
-            upload_id=self.upload_id, calc_id=self.calc_id, mainfile=self.mainfile)
+            upload_id=entry.upload_id, calc_id=entry.calc_id, mainfile=entry.mainfile)
 
         # code and code specific ids
         self.code_name = backend.get_value('program_name', 0)
@@ -153,44 +200,44 @@ class DFTCalcWithMetadata(CalcWithMetadata):
         except KeyError:
             self.code_version = config.services.unavailable_value
 
-        self.raw_id = get_optional_backend_value(backend, 'raw_id', 'section_run', 0)
+        raw_id = get_optional_backend_value(backend, 'raw_id', 'section_run', None)
+        if raw_id is not None:
+            entry.raw_id = raw_id
 
         # metadata (system, method, chemistry)
-        self.atoms = get_optional_backend_value(backend, 'atom_labels', 'section_system', [], logger=logger)
-        if hasattr(self.atoms, 'tolist'):
-            self.atoms = self.atoms.tolist()
-        self.n_atoms = len(self.atoms)
-        self.atoms = list(set(normalized_atom_labels(set(self.atoms))))
-        self.atoms.sort()
+        atoms = get_optional_backend_value(backend, 'atom_labels', 'section_system', [], logger=logger)
+        if hasattr(atoms, 'tolist'):
+            atoms = atoms.tolist()
+        entry.n_atoms = len(atoms)
+        atoms = list(set(normalized_atom_labels(set(atoms))))
+        atoms.sort()
+        entry.atoms = atoms
 
         self.crystal_system = get_optional_backend_value(
             backend, 'crystal_system', 'section_symmetry', logger=logger)
         self.spacegroup = get_optional_backend_value(
             backend, 'space_group_number', 'section_symmetry', 0, logger=logger)
         self.spacegroup_symbol = get_optional_backend_value(
-            backend, 'international_short_symbol', 'section_symmetry', 0, logger=logger)
+            backend, 'international_short_symbol', 'section_symmetry', logger=logger)
         self.basis_set = map_basis_set_to_basis_set_label(
             get_optional_backend_value(backend, 'program_basis_set_type', 'section_run', logger=logger))
         self.system = get_optional_backend_value(
             backend, 'system_type', 'section_system', logger=logger)
-        self.formula = get_optional_backend_value(
+        entry.formula = get_optional_backend_value(
             backend, 'chemical_composition_bulk_reduced', 'section_system', logger=logger)
         self.xc_functional = map_functional_name_to_xc_treatment(
             get_optional_backend_value(backend, 'XC_functional_name', 'section_method', logger=logger))
 
         # grouping
         self.group_hash = utils.hash(
-            self.formula,
+            entry.formula,
             self.spacegroup,
             self.basis_set,
             self.xc_functional,
             self.code_name,
             self.code_version,
-            self.with_embargo,
-            self.comment,
-            self.references,
-            self.uploader,
-            self.coauthors)
+            entry.with_embargo,
+            entry.uploader)
 
         # metrics and quantities
         quantities = set()
@@ -247,69 +294,3 @@ class DFTCalcWithMetadata(CalcWithMetadata):
 
         # optimade
         self.optimade = backend.get_mi2_section(optimade.OptimadeEntry.m_def)
-
-
-def _elastic_label_value(label):
-    if isinstance(label, str):
-        return label
-    else:
-        return elastic_obj(label, ESLabel)
-
-
-Domain(
-    'dft', DFTCalcWithMetadata,
-    quantities=dict(
-        basis_set=DomainQuantity(
-            'The used basis set functions.', aggregations=20),
-        xc_functional=DomainQuantity(
-            'The xc functional type used for the simulation.', aggregations=20),
-        system=DomainQuantity(
-            'The system type of the simulated system.', aggregations=10),
-        crystal_system=DomainQuantity(
-            'The crystal system type of the simulated system.', aggregations=10),
-        code_name=DomainQuantity(
-            'The code name.', aggregations=40),
-        spacegroup=DomainQuantity('The spacegroup of the simulated system as number'),
-        spacegroup_symbol=DomainQuantity('The spacegroup as international short symbol'),
-        geometries=DomainQuantity(
-            'Hashes that describe unique geometries simulated by this code run.', multi=True),
-        group_hash=DomainQuantity(
-            'A hash from key metadata used to group similar entries.'),
-        quantities=DomainQuantity(
-            'All quantities that are used by this calculation',
-            metric=('quantities', 'value_count'), multi=True),
-        n_total_energies=DomainQuantity(
-            'Number of total energy calculations',
-            elastic_mapping=Integer()),
-        n_calculations=DomainQuantity(
-            'Number of single configuration calculation sections',
-            elastic_mapping=Integer()),
-        n_quantities=DomainQuantity(
-            'Number of overall parsed quantities',
-            elastic_mapping=Integer()),
-        n_geometries=DomainQuantity(
-            'Number of unique geometries',
-            elastic_mapping=Integer()),
-        labels=DomainQuantity(
-            'Search based for springer classification and aflow prototypes',
-            elastic_field='labels.label',
-            elastic_mapping=Object(ESLabel),
-            elastic_value=lambda labels: [_elastic_label_value(label) for label in labels],
-            multi=True),
-        optimade=DomainQuantity(
-            'Search based on optimade\'s filter query language',
-            elastic_mapping=Object(optimade.ESOptimadeEntry),
-            elastic_value=lambda entry: elastic_obj(entry, optimade.ESOptimadeEntry)
-        )),
-    metrics=dict(
-        total_energies=('n_total_energies', 'sum'),
-        calculations=('n_calculations', 'sum'),
-        quantities=('n_quantities', 'sum'),
-        geometries=('n_geometries', 'sum'),
-        unique_geometries=('geometries', 'cardinality'),
-        groups=('group_hash', 'cardinality')
-    ),
-    groups=dict(
-        groups=('group_hash', 'groups')),
-    default_statistics=[
-        'atoms', 'dft.basis_set', 'dft.xc_functional', 'dft.system', 'dft.crystal_system', 'dft.code_name'])
diff --git a/nomad/datamodel/ems.py b/nomad/datamodel/ems.py
index 14277f4a9f213a3b4086e1b84c732001780e6912..ff6a983567124ba34fe46e22ba52c314e18f2c01 100644
--- a/nomad/datamodel/ems.py
+++ b/nomad/datamodel/ems.py
@@ -12,55 +12,60 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-"""
+'''
 Experimental material science specific metadata
-"""
+'''
 
 from nomad import utils
+from nomad.metainfo import Quantity, MSection, Section, Datetime
+from nomad.metainfo.search import SearchQuantity
 
-from .base import CalcWithMetadata, DomainQuantity, Domain, get_optional_backend_value
+from .base import get_optional_backend_value
 
 
-class EMSEntryWithMetadata(CalcWithMetadata):
+class EMSMetadata(MSection):
+    m_def = Section(a_domain='ems')
 
-    def __init__(self, **kwargs):
-        # sample quantities
-        self.chemical: str = None
-        self.sample_constituents: str = None
-        self.sample_microstructure: str = None
+    # sample quantities
+    chemical = Quantity(type=str, default='not processed', a_search=SearchQuantity())
+    sample_constituents = Quantity(type=str, default='not processed', a_search=SearchQuantity(default_statistic=True))
+    sample_microstructure = Quantity(type=str, default='not processed', a_search=SearchQuantity(default_statistic=True))
 
-        # general metadata
-        self.experiment_summary: str = None
-        self.experiment_location: str = None
-        self.experiment_time: str = None
+    # general metadata
+    experiment_summary = Quantity(type=str, default='not processed', a_search=SearchQuantity())
+    experiment_location = Quantity(type=str, default='not processed', a_search=SearchQuantity())
+    experiment_time = Quantity(type=Datetime, default='not processed', a_search=SearchQuantity())
 
-        # method
-        self.method: str = None
-        self.probing_method: str = None
+    # method
+    method = Quantity(type=str, default='not processed', a_search=SearchQuantity(default_statistic=True))
+    probing_method = Quantity(type=str, default='not processed', a_search=SearchQuantity(default_statistic=True))
 
-        # data metadata
-        self.repository_name: str = None
-        self.repository_url: str = None
-        self.preview_url: str = None
+    # data metadata
+    repository_name = Quantity(type=str, default='not processed', a_search=SearchQuantity())
+    repository_url = Quantity(type=str, default='not processed', a_search=SearchQuantity())
+    preview_url = Quantity(type=str, default='not processed', a_search=SearchQuantity())
 
-        self.quantities = []
-        self.group_hash: str = None
-
-        super().__init__(**kwargs)
+    # TODO move
+    quantities = Quantity(type=str, shape=['0..*'], default=[], a_search=SearchQuantity())
+    group_hash = Quantity(type=str, a_search=SearchQuantity())
 
     def apply_domain_metadata(self, backend):
+        entry = self.m_parent
         logger = utils.get_logger(__name__).bind(
-            upload_id=self.upload_id, calc_id=self.calc_id, mainfile=self.mainfile)
+            upload_id=entry.upload_id, calc_id=entry.calc_id, mainfile=entry.mainfile)
 
-        self.formula = get_optional_backend_value(
+        entry.formula = get_optional_backend_value(
             backend, 'sample_chemical_formula', 'section_sample', logger=logger)
-        self.atoms = get_optional_backend_value(
+        atoms = get_optional_backend_value(
             backend, 'sample_atom_labels', 'section_sample', logger=logger)
-        if hasattr(self.atoms, 'tolist'):
-            self.atoms = self.atoms.tolist()
-        self.n_atoms = len(self.atoms)
-        self.atoms = list(set(self.atoms))
-        self.atoms.sort()
+        if hasattr(atoms, 'tolist'):
+            atoms = atoms.tolist()
+        entry.n_atoms = len(atoms)
+
+        atoms = list(set(atoms))
+        atoms.sort()
+        entry.atoms = atoms
+
         self.chemical = get_optional_backend_value(
             backend, 'sample_chemical_name', 'section_sample', logger=logger)
         self.sample_microstructure = get_optional_backend_value(
@@ -88,14 +93,11 @@ class EMSEntryWithMetadata(CalcWithMetadata):
             backend, 'data_preview_url', 'section_data', logger=logger)
 
         self.group_hash = utils.hash(
-            self.formula,
+            entry.formula,
             self.method,
             self.experiment_location,
-            self.with_embargo,
-            self.comment,
-            self.references,
-            self.uploader,
-            self.coauthors)
+            entry.with_embargo,
+            entry.uploader)
 
         quantities = set()
 
@@ -103,26 +105,3 @@ class EMSEntryWithMetadata(CalcWithMetadata):
             quantities.add(meta_info)
 
         self.quantities = list(quantities)
-
-
-Domain(
-    'ems', EMSEntryWithMetadata,
-    root_sections=['section_experiment', 'section_entry_info'],
-    metainfo_all_package='all.experimental.nomadmetainfo.json',
-    quantities=dict(
-        method=DomainQuantity(
-            'The experimental method used.', aggregations=20),
-        probing_method=DomainQuantity(
-            'The used probing method.', aggregations=10),
-        sample_microstructure=DomainQuantity(
-            'The sample micro structure.', aggregations=10),
-        sample_constituents=DomainQuantity(
-            'The sample constituents.', aggregations=10),
-        quantities=DomainQuantity(
-            'All quantities that are used by this calculation')),
-    metrics=dict(
-        quantities=('quantities', 'value_count')),
-    groups=dict(),
-    default_statistics=[
-        'atoms', 'ems.method', 'ems.probing_method', 'ems.sample_microstructure',
-        'ems.sample_constituents'])
diff --git a/nomad/datamodel/metainfo.py b/nomad/datamodel/metainfo.py
index ff6eaebd254ae01b4d462eeeeaaa74c59d2022ba..07212222e06dfcb19a171c8d298daff921a857de 100644
--- a/nomad/datamodel/metainfo.py
+++ b/nomad/datamodel/metainfo.py
@@ -12,20 +12,36 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-"""
+'''
 This duplicates functionality for .base.py. It represents first pieces of a transition
 towards using the new metainfo system for all repository metadata.
-"""
-from typing import Dict
+'''
+from typing import Dict, Any
 from cachetools import cached, TTLCache
-from elasticsearch_dsl import Keyword
+from elasticsearch_dsl import Keyword, Text, analyzer, tokenizer
+import ase.data
 
 from nomad import metainfo, config
+from nomad.metainfo.search import SearchQuantity
 import nomad.metainfo.mongoengine
 
+from .dft import DFTMetadata
+from .ems import EMSMetadata
+
+
+def _only_atoms(atoms):
+    numbers = [ase.data.atomic_numbers[atom] for atom in atoms]
+    only_atoms = [ase.data.chemical_symbols[number] for number in sorted(numbers)]
+    return ''.join(only_atoms)
+
+
+path_analyzer = analyzer(
+    'path_analyzer',
+    tokenizer=tokenizer('path_tokenizer', 'pattern', pattern='/'))
+
 
 class User(metainfo.MSection):
-    """ A NOMAD user.
+    ''' A NOMAD user.
 
     Typically a NOMAD user has a NOMAD account. The user related data is managed by
     NOMAD keycloak user-management system. Users are used to denote uploaders, authors,
@@ -41,16 +57,26 @@ class User(metainfo.MSection):
         create: The time the account was created
         repo_user_id: The id that was used to identify this user in the NOMAD CoE Repository
         is_admin: Bool that indicated, iff the user the use admin user
-    """
+    '''
+
+    user_id = metainfo.Quantity(
+        type=str,
+        a_me=dict(primary_key=True),
+        a_search=SearchQuantity())
 
-    user_id = metainfo.Quantity(type=str, a_me=dict(primary_key=True))
     name = metainfo.Quantity(
         type=str,
-        derived=lambda user: ('%s %s' % (user.first_name, user.last_name)).strip())
+        derived=lambda user: ('%s %s' % (user.first_name, user.last_name)).strip(),
+        a_search=SearchQuantity(es_mapping=Text(fields={'keyword': Keyword()})))
+
     first_name = metainfo.Quantity(type=str)
     last_name = metainfo.Quantity(type=str)
     email = metainfo.Quantity(
-        type=str, a_me=dict(index=True), a_elastic=dict(mapping=Keyword))
+        type=str,
+        a_me=dict(index=True),
+        a_elastic=dict(mapping=Keyword),  # TODO remove?
+        a_search=SearchQuantity())
+
     username = metainfo.Quantity(type=str)
     affiliation = metainfo.Quantity(type=str)
     affiliation_address = metainfo.Quantity(type=str)
@@ -76,8 +102,33 @@ class User(metainfo.MSection):
         }
 
 
+class UserReference(metainfo.Reference):
+    '''
+    Special metainfo reference type that allows to use user_ids as values. It automatically
+    resolves user_ids to User objects. This is done lazily on getting the value.
+    '''
+
+    def __init__(self):
+        super().__init__(User.m_def)
+
+    def set_normalize(self, section: metainfo.MSection, quantity_def: metainfo.Quantity, value: Any) -> Any:
+        if isinstance(value, str):
+            return metainfo.MProxy(value)
+        else:
+            return super().set_normalize(section, quantity_def, value)
+
+    def resolve(self, section: metainfo.MSection, quantity_def: metainfo.Quantity, value: Any) -> metainfo.MSection:
+        return User.get(user_id=value.url)
+
+    def serialize(self, section: metainfo.MSection, quantity_def: metainfo.Quantity, value: Any) -> Any:
+        return value.user_id
+
+
+user_reference = UserReference()
+
+
 class Dataset(metainfo.MSection):
-    """ A Dataset is attached to one or many entries to form a set of data.
+    ''' A Dataset is attached to one or many entries to form a set of data.
 
     Args:
         dataset_id: The unique identifier for this dataset as a string. It should be
@@ -94,31 +145,96 @@ class Dataset(metainfo.MSection):
         pid: The original NOMAD CoE Repository dataset PID. Old DOIs still reference
             datasets based on this id. Is not used for new datasets.
         created: The date when the dataset was first created.
-    """
+    '''
     dataset_id = metainfo.Quantity(
         type=str,
-        a_me=dict(primary_key=True))
+        a_me=dict(primary_key=True),
+        a_search=SearchQuantity())
     name = metainfo.Quantity(
         type=str,
-        a_me=dict(index=True))
+        a_me=dict(index=True),
+        a_search=SearchQuantity())
     user_id = metainfo.Quantity(
         type=str,
         a_me=dict(index=True))
     doi = metainfo.Quantity(
         type=str,
-        a_me=dict(index=True))
+        a_me=dict(index=True),
+        a_search=SearchQuantity())
     pid = metainfo.Quantity(
         type=str,
         a_me=dict(index=True))
     created = metainfo.Quantity(
         type=metainfo.Datetime,
-        a_me=dict(index=True))
+        a_me=dict(index=True),
+        a_search=SearchQuantity())
 
 
-class UserMetadata(metainfo.MSection):
-    """ NOMAD entry quantities that are given by the user or determined by user actions.
+class DatasetReference(metainfo.Reference):
+    '''
+    Special metainfo reference type that allows to use dataset_ids as values. It automatically
+    resolves dataset_ids to Dataset objects. This is done lazily on getting the value.
+    '''
+
+    def __init__(self):
+        super().__init__(Dataset.m_def)
+
+    def set_normalize(self, section: metainfo.MSection, quantity_def: metainfo.Quantity, value: Any) -> Any:
+        if isinstance(value, str):
+            return metainfo.MProxy(value)
+        else:
+            return super().set_normalize(section, quantity_def, value)
+
+    def resolve(self, section: metainfo.MSection, quantity_def: metainfo.Quantity, value: Any) -> metainfo.MSection:
+        return Dataset.m_def.m_x('me').get(dataset_id=value.url)
+
+    def serialize(self, section: metainfo.MSection, quantity_def: metainfo.Quantity, value: Any) -> Any:
+        if isinstance(value, metainfo.MProxy):
+            return value.url
+        else:
+            return value.user_id
+
+    def deserialize(self, section: metainfo.MSection, quantity_def: metainfo.Quantity, value: Any) -> Any:
+        return metainfo.MProxy(value)
+
+
+dataset_reference = DatasetReference()
+
+
+class EditableUserMetadata(metainfo.MCategory):
+    ''' NOMAD entry quantities that can be edited by the user after publish. '''
+
+
+class UserMetadata(metainfo.MCategory):
+    ''' NOMAD entry quantities that are given by the user or determined by user actions. '''
+    pass
+
+
+class DomainMetadata(metainfo.MCategory):
+    ''' NOMAD entry quantities that are determined by the uploaded data. '''
+    pass
+
+
+class EntryMetadata(metainfo.MSection):
+    '''
+    Attributes:
+        upload_id: The ``upload_id`` of the calculations upload (random UUID).
+        calc_id: The unique mainfile based calculation id.
+        calc_hash: The raw file content based checksum/hash of this calculation.
+        pid: The unique persistent id of this calculation.
+        mainfile: The upload relative mainfile path.
+        domain: Must be the key for a registered domain. This determines which actual
+            subclass is instantiated.
+
+        files: A list of all files, relative to upload.
+        upload_time: The time when the calc was uploaded.
+        uploader: An object describing the uploading user, has at least ``user_id``
+        processed: Boolean indicating if this calc was successfully processed and archive
+            data and calc metadata is available.
+        last_processing: A datatime with the time of the last successful processing.
+        nomad_version: A string that describes the version of the nomad software that was
+            used to do the last successful processing.
 
-    Args:
         comment: An arbitrary string with user provided information about the entry.
         references: A list of URLs for resources that are related to the entry.
         uploader: Id of the uploader of this entry.
@@ -131,16 +247,217 @@ class UserMetadata(metainfo.MSection):
             user, and users the entry is shared with (see shared_with).
         upload_time: The time that this entry was uploaded
         datasets: Ids of all datasets that this entry appears in
-    """
-
-    comment = metainfo.Quantity(type=str)
-    references = metainfo.Quantity(type=str, shape=['0..*'])
-    uploader = metainfo.Quantity(type=str, a_flask=dict(admin_only=True, verify=User))
-    coauthors = metainfo.Quantity(type=str, shape=['0..*'], a_flask=dict(verify=User))
-    shared_with = metainfo.Quantity(type=str, shape=['0..*'], a_flask=dict(verify=User))
-    with_embargo = metainfo.Quantity(type=bool)
-    upload_time = metainfo.Quantity(type=metainfo.Datetime, a_flask=dict(admin_only=True))
-    datasets = metainfo.Quantity(type=str, shape=['0..*'], a_flask=dict(verify=Dataset))
+    '''
+    upload_id = metainfo.Quantity(
+        type=str,
+        description='A random UUID that uniquely identifies the upload of the entry.',
+        a_search=SearchQuantity(
+            many_or='append', group='uploads', metric_name='uploads', metric='cardinality'))
+
+    calc_id = metainfo.Quantity(
+        type=str,
+        description='A unique ID based on the upload id and entry\'s mainfile.',
+        a_search=SearchQuantity(many_or='append'))
+
+    calc_hash = metainfo.Quantity(
+        type=str,
+        description='A raw file content based checksum/hash.',
+        a_search=SearchQuantity(
+            many_or='append', metric_name='unique_entries', metric='cardinality'))
+
+    mainfile = metainfo.Quantity(
+        type=str,
+        description='The upload relative mainfile path.',
+        a_search=[
+            SearchQuantity(
+                description='Search within the mainfile path.',
+                es_mapping=Text(multi=True, analyzer=path_analyzer, fields={'keyword': Keyword()}),
+                many_or='append', es_quantity='mainfile.keyword'),
+            SearchQuantity(
+                description='Search for the exact mainfile.',
+                many_and='append', name='mainfile_path', es_quantity='mainfile.keyword')])
+
+    files = metainfo.Quantity(
+        type=str, shape=['0..*'],
+        description='The entries raw file paths relative to its upload.',
+        a_search=[
+            SearchQuantity(
+                description='Search within the paths.', name='path',
+                es_mapping=Text(
+                    multi=True, analyzer=path_analyzer, fields={'keyword': Keyword()})
+            ),
+            SearchQuantity(
+                description='Search for exact paths.',
+                many_or='append', name='files', es_quantity='files.keyword')])
+
+    pid = metainfo.Quantity(
+        type=int,
+        description='The unique, sequentially enumerated, integer persistent identifier',
+        a_search=SearchQuantity(many_or='append'))
+
+    raw_id = metainfo.Quantity(
+        type=str,
+        description='A raw format specific id that was acquired from the files of this entry',
+        a_search=SearchQuantity(many_or='append'))
+
+    domain = metainfo.Quantity(
+        type=metainfo.MEnum('dft', 'ems'),
+        description='The material science domain',
+        a_search=SearchQuantity())
+
+    published = metainfo.Quantity(
+        type=bool, default=False,
+        description='Indicates if the entry is published',
+        a_search=SearchQuantity())
+
+    processed = metainfo.Quantity(
+        type=bool, default=False,
+        description='Indicates that the entry is successfully processed.',
+        a_search=SearchQuantity())
+
+    last_processing = metainfo.Quantity(
+        type=metainfo.Datetime,
+        description='The datetime of the last attempted processing.')
+
+    nomad_version = metainfo.Quantity(
+        type=str,
+        description='The NOMAD version used for the last processing attempt.',
+        a_search=SearchQuantity(many_or='append'))
+    nomad_commit = metainfo.Quantity(
+        type=str,
+        description='The NOMAD commit used for the last processing attempt.',
+        a_search=SearchQuantity(many_or='append'))
+    parser_name = metainfo.Quantity(
+        type=str,
+        description='The NOMAD parser used for the last processing attempt.',
+        a_search=SearchQuantity(many_or='append'))
+
+    comment = metainfo.Quantity(
+        type=str, categories=[UserMetadata, EditableUserMetadata],
+        description='A user provided comment.',
+        a_search=SearchQuantity(es_mapping=Text()))
+
+    references = metainfo.Quantity(
+        type=str, shape=['0..*'], categories=[UserMetadata, EditableUserMetadata],
+        description='User provided references (URLs).',
+        a_search=SearchQuantity())
+
+    uploader = metainfo.Quantity(
+        type=user_reference, categories=[UserMetadata],
+        description='The uploader of the entry',
+        a_flask=dict(admin_only=True, verify=User),
+        a_search=[
+            SearchQuantity(
+                description='Search uploader with exact names.',
+                metric_name='uploaders', metric='cardinality',
+                many_or='append', es_quantity='uploader.name.keyword'),
+            SearchQuantity(
+                name='uploader_id', es_quantity='uploader.user_id')
+        ])
+
+    coauthors = metainfo.Quantity(
+        type=user_reference, shape=['0..*'], default=[], categories=[UserMetadata, EditableUserMetadata],
+        description='A user provided list of co-authors.',
+        a_flask=dict(verify=User))
+
+    authors = metainfo.Quantity(
+        type=user_reference, shape=['0..*'],
+        description='All authors (uploader and co-authors).',
+        derived=lambda entry: ([entry.uploader] if entry.uploader is not None else []) + entry.coauthors,
+        a_search=SearchQuantity(
+            description='Search authors with exact names.',
+            metric='cardinality',
+            many_or='append', es_quantity='authors.name.keyword', statistic_size=1000))
+
+    shared_with = metainfo.Quantity(
+        type=user_reference, shape=['0..*'], default=[], categories=[UserMetadata, EditableUserMetadata],
+        description='A user provided list of userts to share the entry with.',
+        a_flask=dict(verify=User))
+
+    owners = metainfo.Quantity(
+        type=user_reference, shape=['0..*'],
+        description='All owner (uploader and shared with users).',
+        derived=lambda entry: ([entry.uploader] if entry.uploader is not None else []) + entry.shared_with,
+        a_search=SearchQuantity(
+            description='Search owner with exact names.',
+            many_or='append', es_quantity='owners.name.keyword'))
+
+    with_embargo = metainfo.Quantity(
+        type=bool, default=False, categories=[UserMetadata, EditableUserMetadata],
+        description='Indicated if this entry is under an embargo',
+        a_search=SearchQuantity())
+
+    upload_time = metainfo.Quantity(
+        type=metainfo.Datetime, categories=[UserMetadata],
+        description='The datetime this entry was uploaded to nomad',
+        a_flask=dict(admin_only=True),
+        a_search=SearchQuantity(order_default=True))
+
+    upload_name = metainfo.Quantity(
+        type=str, categories=[UserMetadata],
+        description='The user provided upload name',
+        a_search=SearchQuantity(many_or='append'))
+
+    datasets = metainfo.Quantity(
+        type=dataset_reference, shape=['0..*'], default=[],
+        categories=[UserMetadata, EditableUserMetadata],
+        description='A list of user curated datasets this entry belongs to.',
+        a_flask=dict(verify=Dataset),
+        a_search=[
+            SearchQuantity(
+                es_quantity='datasets.name', many_or='append',
+                description='Search for a particular dataset by exact name.'),
+            SearchQuantity(
+                name='dataset_id', es_quantity='datasets.dataset_id', many_or='append',
+                group='datasets',
+                metric='cardinality', metric_name='datasets',
+                description='Search for a particular dataset by its id.')])
+
+    external_id = metainfo.Quantity(
+        type=str, categories=[UserMetadata],
+        description='A user provided external id.',
+        a_search=SearchQuantity(many_or='split'))
+
+    last_edit = metainfo.Quantity(
+        type=metainfo.Datetime, categories=[UserMetadata],
+        description='The datetime the user metadata was edited last.',
+        a_search=SearchQuantity())
+
+    formula = metainfo.Quantity(
+        type=str, categories=[DomainMetadata],
+        description='A (reduced) chemical formula.',
+        a_search=SearchQuantity())
+
+    atoms = metainfo.Quantity(
+        type=str, shape=['n_atoms'], default=[], categories=[DomainMetadata],
+        description='The atom labels of all atoms of the entry\'s material.',
+        a_search=SearchQuantity(
+            many_and='append', default_statistic=True, statistic_size=len(ase.data.chemical_symbols)))
+
+    only_atoms = metainfo.Quantity(
+        type=str, categories=[DomainMetadata],
+        description='The atom labels concatenated in order-number order.',
+        derived=lambda entry: _only_atoms(entry.atoms),
+        a_search=SearchQuantity(many_and='append', derived=_only_atoms))
+
+    n_atoms = metainfo.Quantity(
+        type=int, categories=[DomainMetadata],
+        description='The number of atoms in the entry\'s material',
+        a_search=SearchQuantity())
+
+    ems = metainfo.SubSection(sub_section=EMSMetadata, a_search='ems')
+    dft = metainfo.SubSection(sub_section=DFTMetadata, a_search='dft')
+
+    def apply_user_metadata(self, metadata: dict):
+        ''' Applies a user provided metadata dict to this calc. '''
+        self.m_update(**metadata)
+
+    def apply_domain_metadata(self, backend):
+        assert self.domain is not None, 'all entries must have a domain'
+        domain_section_def = self.m_def.all_sub_sections.get(self.domain).sub_section
+        assert domain_section_def is not None, 'unknown domain %s' % self.domain
+        domain_section = self.m_create(domain_section_def.section_cls)
+        domain_section.apply_domain_metadata(backend)
 
 
 nomad.metainfo.mongoengine.init_section(User)
diff --git a/nomad/doi.py b/nomad/doi.py
index f05cf45a15bbff8a50c459cc4277f0764752c11b..8582a2fd125a0af658a8382f77e461e0cb98720a 100644
--- a/nomad/doi.py
+++ b/nomad/doi.py
@@ -12,10 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-"""
+'''
 This module contains all functions necessary to manage DOI via datacite.org and its
 MDS API (https://support.datacite.org/docs/mds-api-guide).
-"""
+'''
 import xml.etree.ElementTree as ET
 import datetime
 import requests
@@ -28,7 +28,7 @@ from nomad import config, utils
 
 
 def edit_url(doi: str, url: str = None):
-    """ Changes the URL of an already findable DOI. """
+    ''' Changes the URL of an already findable DOI. '''
     if url is None:
         url = 'https://repository.nomad-coe.eu/app/gui/datasets/doi/%s' % doi
 
@@ -70,7 +70,7 @@ class DOI(Document):
 
     @staticmethod
     def create(title: str, user: User) -> 'DOI':
-        """ Creates a unique DOI with the NOMAD DOI prefix. """
+        ''' Creates a unique DOI with the NOMAD DOI prefix. '''
         # TODO We use a collection of all DOIs in mongo to ensure uniqueness. We attempt
         # to create new DOIs based on a counter per day until we find a non existing DOI.
         # This might be bad if many DOIs per day are to be expected.
diff --git a/nomad/files.py b/nomad/files.py
index 40a0456a7dbb7c33e50f4a7e0e6dfcaab1b7fe36..f6918865b653bbcf157e910a36bf08dd1ce3669e 100644
--- a/nomad/files.py
+++ b/nomad/files.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-"""
+'''
 Uploads contains classes and functions to create and maintain file structures
 for uploads.
 
@@ -46,7 +46,7 @@ might be published!
 There are multiple ways to solve this. Due to the rarity of the case, we take the
 most simple solution: if one file is public, all files are made public, execpt those
 being other mainfiles. Therefore, the aux files of a restricted calc might become public!
-"""
+'''
 
 from abc import ABCMeta
 import sys
@@ -60,8 +60,7 @@ import io
 import pickle
 import json
 
-from nomad import config, utils
-from nomad.datamodel import UploadWithMetadata
+from nomad import config, utils, datamodel
 from nomad.archive import write_archive
 
 # TODO this should become obsolete, once we are going beyong python 3.6. For now
@@ -76,21 +75,21 @@ user_metadata_filename = 'user_metadata.pickle'
 
 
 def always_restricted(path: str):
-    """
+    '''
     Used to put general restrictions on files, e.g. due to licensing issues. Will be
     called during packing and while accessing public files.
-    """
+    '''
     basename = os.path.basename(path)
     if basename.startswith('POTCAR') and not basename.endswith('.stripped'):
         return True
 
 
 def copytree(src, dst):
-    """
+    '''
     A close on ``shutils.copytree`` that does not try to copy the stats on all files.
     This is unecessary for our usecase and also causes permission denies for unknown
     reasons.
-    """
+    '''
     os.makedirs(dst, exist_ok=False)
 
     for item in os.listdir(src):
@@ -103,7 +102,7 @@ def copytree(src, dst):
 
 
 class PathObject:
-    """
+    '''
     Object storage-like abstraction for paths in general.
     Arguments:
         bucket: The bucket to store this object in
@@ -111,7 +110,7 @@ class PathObject:
         os_path: Override the "object storage" path with the given path.
         prefix: Add a x-digit prefix directory, e.g. foo/test/ -> foo/tes/test
         create_prefix: Create the prefix right away
-    """
+    '''
     def __init__(
             self, bucket: str, object_id: str, os_path: str = None,
             prefix: bool = False, create_prefix: bool = False) -> None:
@@ -153,7 +152,7 @@ class PathObject:
 
     @property
     def size(self) -> int:
-        """ The os determined file size. """
+        ''' The os determined file size. '''
         return os.stat(self.os_path).st_size
 
     def __repr__(self) -> str:
@@ -161,13 +160,13 @@ class PathObject:
 
 
 class DirectoryObject(PathObject):
-    """
+    '''
     Object storage-like abstraction for directories.
     Arguments:
         bucket: The bucket to store this object in
         object_id: The object id (i.e. directory path)
         create: True if the directory structure should be created. Default is False.
-    """
+    '''
     def __init__(self, bucket: str, object_id: str, create: bool = False, **kwargs) -> None:
         super().__init__(bucket, object_id, **kwargs)
         self._create = create
@@ -234,7 +233,7 @@ class UploadFiles(DirectoryObject, metaclass=ABCMeta):
             pickle.dump(data, f)
 
     def to_staging_upload_files(self, create: bool = False) -> 'StagingUploadFiles':
-        """ Casts to or creates corresponding staging upload files or returns None. """
+        ''' Casts to or creates corresponding staging upload files or returns None. '''
         raise NotImplementedError()
 
     @staticmethod
@@ -247,7 +246,7 @@ class UploadFiles(DirectoryObject, metaclass=ABCMeta):
             return None
 
     def raw_file(self, file_path: str, *args, **kwargs) -> IO:
-        """
+        '''
         Opens a raw file and returns a file-like object. Additional args, kwargs are
         delegated to the respective `open` call.
         Arguments:
@@ -255,38 +254,38 @@ class UploadFiles(DirectoryObject, metaclass=ABCMeta):
         Raises:
             KeyError: If the file does not exist.
             Restricted: If the file is restricted and upload access evaluated to False.
-        """
+        '''
         raise NotImplementedError()
 
     def raw_file_size(self, file_path: str) -> int:
-        """
+        '''
         Returns:
             The size of the given raw file.
-        """
+        '''
         raise NotImplementedError()
 
     def raw_file_manifest(self, path_prefix: str = None) -> Generator[str, None, None]:
-        """
+        '''
         Returns the path for all raw files in the archive (with a given prefix).
         Arguments:
             path_prefix: An optional prefix; only returns those files that have the prefix.
         Returns:
             An iterable over all (matching) raw files.
-        """
+        '''
         raise NotImplementedError()
 
     def raw_file_list(self, directory: str) -> List[Tuple[str, int]]:
-        """
+        '''
         Gives a list of directory contents and its size.
         Arguments:
             directory: The directory to list
         Returns:
             A list of tuples with file name and size.
-        """
+        '''
         raise NotImplementedError()
 
     def archive_file(self, calc_id: str, *args, **kwargs) -> IO:
-        """
+        '''
         Opens a archive file and returns a file-like objects. Additional args, kwargs are
         delegated to the respective `open` call.
         Arguments:
@@ -294,18 +293,18 @@ class UploadFiles(DirectoryObject, metaclass=ABCMeta):
         Raises:
             KeyError: If the calc does not exist.
             Restricted: If the file is restricted and upload access evaluated to False.
-        """
+        '''
         raise NotImplementedError()
 
     def archive_file_size(self, calc_id: str) -> int:
-        """
+        '''
         Returns:
             The size of the archive.
-        """
+        '''
         raise NotImplementedError()
 
     def archive_log_file(self, calc_id: str, *args, **kwargs) -> IO:
-        """
+        '''
         Opens a archive log file and returns a file-like objects. Additional args, kwargs are
         delegated to the respective `open` call.
         Arguments:
@@ -313,11 +312,11 @@ class UploadFiles(DirectoryObject, metaclass=ABCMeta):
         Raises:
             KeyError: If the calc does not exist.
             Restricted: If the file is restricted and upload access evaluated to False.
-        """
+        '''
         raise NotImplementedError()
 
     def open_zipfile_cache(self):
-        """ Allows to reuse the same zipfile for multiple file operations. Must be closed. """
+        ''' Allows to reuse the same zipfile for multiple file operations. Must be closed. '''
         pass
 
     def close_zipfile_cache(self):
@@ -398,7 +397,7 @@ class StagingUploadFiles(UploadFiles):
     def add_rawfiles(
             self, path: str, move: bool = False, prefix: str = None,
             force_archive: bool = False, target_dir: DirectoryObject = None) -> None:
-        """
+        '''
         Add rawfiles to the upload. The given file will be copied, moved, or extracted.
 
         Arguments:
@@ -408,7 +407,7 @@ class StagingUploadFiles(UploadFiles):
             force_archive: Expect the file to be a zip or other support archive file.
                 Usually those files are only extracted if they can be extracted and copied instead.
             target_dir: Overwrite the used directory to extract to. Default is the raw directory of this upload.
-        """
+        '''
         assert not self.is_frozen
         assert os.path.exists(path)
         self._size += os.stat(path).st_size
@@ -449,13 +448,13 @@ class StagingUploadFiles(UploadFiles):
 
     @property
     def is_frozen(self) -> bool:
-        """ Returns True if this upload is already *bagged*. """
+        ''' Returns True if this upload is already *bagged*. '''
         return self._frozen_file.exists()
 
     def pack(
-            self, upload: UploadWithMetadata, target_dir: DirectoryObject = None,
+            self, entries: Iterable[datamodel.EntryMetadata], target_dir: DirectoryObject = None,
             skip_raw: bool = False, skip_archive: bool = False) -> None:
-        """
+        '''
         Replaces the staging upload data with a public upload record by packing all
         data into files. It is only available if upload *is_bag*.
         This is potentially a long running operation.
@@ -466,7 +465,7 @@ class StagingUploadFiles(UploadFiles):
                 is the corresponding public upload files directory.
             skip_raw: determine to not pack the raw data, only archive and user metadata
             skip_raw: determine to not pack the archive data, only raw and user metadata
-        """
+        '''
         self.logger.info('started to pack upload')
 
         # freeze the upload
@@ -501,25 +500,25 @@ class StagingUploadFiles(UploadFiles):
         # zip archives
         if not skip_archive:
             with utils.timer(self.logger, 'packed zip json archive'):
-                self._pack_archive_files(upload, create_zipfile)
+                self._pack_archive_files(entries, create_zipfile)
             with utils.timer(self.logger, 'packed msgpack archive'):
-                self._pack_archive_files_msgpack(upload, write_msgfile)
+                self._pack_archive_files_msgpack(entries, write_msgfile)
 
         # zip raw files
         if not skip_raw:
             with utils.timer(self.logger, 'packed raw files'):
-                self._pack_raw_files(upload, create_zipfile)
+                self._pack_raw_files(entries, create_zipfile)
 
-    def _pack_archive_files_msgpack(self, upload: UploadWithMetadata, write_msgfile):
+    def _pack_archive_files_msgpack(self, entries: Iterable[datamodel.EntryMetadata], write_msgfile):
         restricted, public = 0, 0
-        for calc in upload.calcs:
+        for calc in entries:
             if calc.with_embargo:
                 restricted += 1
             else:
                 public += 1
 
         def create_iterator(with_embargo: bool):
-            for calc in upload.calcs:
+            for calc in entries:
                 if with_embargo == calc.with_embargo:
                     archive_file = self.archive_file_object(calc.calc_id)
                     if archive_file.exists():
@@ -535,12 +534,12 @@ class StagingUploadFiles(UploadFiles):
         except Exception as e:
             self.logger.error('exception during packing archives', exc_info=e)
 
-    def _pack_archive_files(self, upload: UploadWithMetadata, create_zipfile):
+    def _pack_archive_files(self, entries: Iterable[datamodel.EntryMetadata], create_zipfile):
         archive_public_zip = create_zipfile('archive', 'public', self._archive_ext)
         archive_restricted_zip = create_zipfile('archive', 'restricted', self._archive_ext)
 
         try:
-            for calc in upload.calcs:
+            for calc in entries:
                 archive_zip = archive_restricted_zip if calc.with_embargo else archive_public_zip
 
                 archive_filename = '%s.%s' % (calc.calc_id, self._archive_ext)
@@ -560,7 +559,7 @@ class StagingUploadFiles(UploadFiles):
             archive_restricted_zip.close()
             archive_public_zip.close()
 
-    def _pack_raw_files(self, upload: UploadWithMetadata, create_zipfile):
+    def _pack_raw_files(self, entries: Iterable[datamodel.EntryMetadata], create_zipfile):
         raw_public_zip = create_zipfile('raw', 'public', 'plain')
         raw_restricted_zip = create_zipfile('raw', 'restricted', 'plain')
 
@@ -568,7 +567,7 @@ class StagingUploadFiles(UploadFiles):
             # 1. add all public raw files
             # 1.1 collect all public mainfiles and aux files
             public_files: Dict[str, str] = {}
-            for calc in upload.calcs:
+            for calc in entries:
                 if not calc.with_embargo:
                     mainfile = calc.mainfile
                     assert mainfile is not None
@@ -578,7 +577,7 @@ class StagingUploadFiles(UploadFiles):
                             if not always_restricted(filepath):
                                 public_files[filepath] = None
             # 1.2 remove the non public mainfiles that have been added as auxfiles of public mainfiles
-            for calc in upload.calcs:
+            for calc in entries:
                 if calc.with_embargo:
                     mainfile = calc.mainfile
                     assert mainfile is not None
@@ -629,14 +628,14 @@ class StagingUploadFiles(UploadFiles):
         return results
 
     def calc_files(self, mainfile: str, with_mainfile: bool = True, with_cutoff: bool = True) -> Iterable[str]:
-        """
+        '''
         Returns all the auxfiles and mainfile for a given mainfile. This implements
         nomad's logic about what is part of a calculation and what not. The mainfile
         is first entry, the rest is sorted.
         Arguments:
             mainfile: The mainfile relative to upload
             with_mainfile: Do include the mainfile, default is True
-        """
+        '''
         mainfile_object = self._raw_dir.join_file(mainfile)
         if not mainfile_object.exists():
             raise KeyError(mainfile)
@@ -666,7 +665,7 @@ class StagingUploadFiles(UploadFiles):
             return aux_files
 
     def calc_id(self, mainfile: str) -> str:
-        """
+        '''
         Calculates a id for the given calc.
         Arguments:
             mainfile: The mainfile path relative to the upload that identifies the calc in the folder structure.
@@ -674,11 +673,11 @@ class StagingUploadFiles(UploadFiles):
             The calc id
         Raises:
             KeyError: If the mainfile does not exist.
-        """
+        '''
         return utils.hash(self.upload_id, mainfile)
 
     def calc_hash(self, mainfile: str) -> str:
-        """
+        '''
         Calculates a hash for the given calc based on file contents and aux file contents.
         Arguments:
             mainfile: The mainfile path relative to the upload that identifies the calc in the folder structure.
@@ -686,7 +685,7 @@ class StagingUploadFiles(UploadFiles):
             The calculated hash
         Raises:
             KeyError: If the mainfile does not exist.
-        """
+        '''
         hash = hashlib.sha512()
         for filepath in self.calc_files(mainfile):
             with open(self._raw_dir.join_file(filepath).os_path, 'rb') as f:
@@ -702,12 +701,12 @@ class StagingUploadFiles(UploadFiles):
 
 
 class ArchiveBasedStagingUploadFiles(StagingUploadFiles):
-    """
+    '''
     :class:`StagingUploadFiles` based on a single uploaded archive file (.zip)
 
     Arguments:
         upload_path: The path to the uploaded file.
-    """
+    '''
 
     def __init__(
             self, upload_id: str, upload_path: str, *args, **kwargs) -> None:
@@ -736,12 +735,12 @@ class ArchiveBasedStagingUploadFiles(StagingUploadFiles):
 
 
 class PublicUploadFilesBasedStagingUploadFiles(StagingUploadFiles):
-    """
+    '''
     :class:`StagingUploadFiles` based on a single uploaded archive file (.zip)
 
     Arguments:
         upload_path: The path to the uploaded file.
-    """
+    '''
 
     def __init__(
             self, public_upload_files: 'PublicUploadFiles', *args, **kwargs) -> None:
@@ -763,9 +762,9 @@ class PublicUploadFilesBasedStagingUploadFiles(StagingUploadFiles):
     def add_rawfiles(self, *args, **kwargs) -> None:
         assert False, 'do not add_rawfiles to a %s' % self.__class__.__name__
 
-    def pack(self, upload: UploadWithMetadata, *args, **kwargs) -> None:
-        """ Packs only the archive contents and stores it in the existing public upload files. """
-        super().pack(upload, target_dir=self.public_upload_files, skip_raw=True)
+    def pack(self, entries: Iterable[datamodel.EntryMetadata], *args, **kwargs) -> None:
+        ''' Packs only the archive contents and stores it in the existing public upload files. '''
+        super().pack(entries, target_dir=self.public_upload_files, skip_raw=True)
 
 
 class PublicUploadFiles(UploadFiles):
@@ -952,13 +951,13 @@ class PublicUploadFiles(UploadFiles):
         return self._file('archive', self._archive_ext, '%s.log' % calc_id, *args, **kwargs)
 
     def re_pack(
-            self, upload: UploadWithMetadata, skip_raw: bool = False,
+            self, entries: Iterable[datamodel.EntryMetadata], skip_raw: bool = False,
             skip_archive: bool = False) -> None:
-        """
+        '''
         Replaces the existing public/restricted data file pairs with new ones, based
         on current restricted information in the metadata. Should be used after updating
         the restrictions on calculations. This is potentially a long running operation.
-        """
+        '''
         # compute a list of files to repack
         files = []
         kinds = []
@@ -991,10 +990,10 @@ class PublicUploadFiles(UploadFiles):
         # perform the repacking
         try:
             if not skip_archive:
-                staging_upload._pack_archive_files(upload, create_zipfile)
-                staging_upload._pack_archive_files_msgpack(upload, write_msgfile)
+                staging_upload._pack_archive_files(entries, create_zipfile)
+                staging_upload._pack_archive_files_msgpack(entries, write_msgfile)
             if not skip_raw:
-                staging_upload._pack_raw_files(upload, create_zipfile)
+                staging_upload._pack_raw_files(entries, create_zipfile)
         finally:
             staging_upload.delete()
 
diff --git a/nomad/infrastructure.py b/nomad/infrastructure.py
index 84da75147b87b5309d6558aff2ff6f111902ab74..11aa8d2d6f9ff50813684d0121df6499c56ead2d 100644
--- a/nomad/infrastructure.py
+++ b/nomad/infrastructure.py
@@ -12,12 +12,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-"""
+'''
 This module provides function to establish connections to the database, searchengine, etc.
 infrastructure services. Usually everything is setup at once with :func:`setup`. This
 is run once for each *api* and *worker* process. Individual functions for partial setups
 exist to facilitate testing, :py:mod:`nomad.migration`, aspects of :py:mod:`nomad.cli`, etc.
-"""
+'''
 
 import os.path
 import shutil
@@ -42,19 +42,19 @@ from nomad import config, utils
 logger = None
 
 elastic_client = None
-""" The elastic search client. """
+''' The elastic search client. '''
 
 mongo_client = None
-""" The pymongo mongodb client. """
+''' The pymongo mongodb client. '''
 
 
 def setup():
-    """
+    '''
     Uses the current configuration (nomad/config.py and environment) to setup all the
     infrastructure services (repository db, mongo, elastic search) and logging.
     Will create client instances for the databases and has to be called before they
     can be used.
-    """
+    '''
     setup_logging()
     setup_mongo()
     setup_elastic()
@@ -75,7 +75,7 @@ def setup_logging():
 
 
 def setup_mongo():
-    """ Creates connection to mongodb. """
+    ''' Creates connection to mongodb. '''
     global mongo_client
     try:
         mongo_client = connect(db=config.mongo.db_name, host=config.mongo.host, port=config.mongo.port)
@@ -88,7 +88,7 @@ def setup_mongo():
 
 
 def setup_elastic():
-    """ Creates connection to elastic search. """
+    ''' Creates connection to elastic search. '''
     global elastic_client
     elastic_client = connections.create_connection(
         hosts=['%s:%d' % (config.elastic.host, config.elastic.port)],
@@ -111,10 +111,10 @@ def setup_elastic():
 
 
 class Keycloak():
-    """
+    '''
     A class that encapsulates all keycloak related functions for easier mocking and
     configuration
-    """
+    '''
     def __init__(self):
         self.__oidc_client = None
         self.__admin_client = None
@@ -148,7 +148,7 @@ class Keycloak():
         return self.__public_keys
 
     def authorize_flask(self, basic: bool = True) -> str:
-        """
+        '''
         Authorizes the current flask request with keycloak. Uses either Bearer or Basic
         authentication, depending on available headers in the request. Bearer auth is
         basically offline (besides retrieving and caching keycloaks public key for signature
@@ -157,7 +157,7 @@ class Keycloak():
         Will set ``g.user``, either with None or user data from the respective OIDC token.
 
         Returns: An error message or None
-        """
+        '''
         g.oidc_access_token = None
         if 'Authorization' in request.headers and request.headers['Authorization'].startswith('Bearer '):
             g.oidc_access_token = request.headers['Authorization'].split(None, 1)[1].strip()
@@ -235,10 +235,10 @@ class Keycloak():
             pass
 
     def add_user(self, user, bcrypt_password=None, invite=False):
-        """
+        '''
         Adds the given :class:`nomad.datamodel.User` instance to the configured keycloak
         realm using the keycloak admin API.
-        """
+        '''
         from nomad import datamodel
         if not isinstance(user, datamodel.User):
             if 'user_id' not in user:
@@ -337,12 +337,12 @@ class Keycloak():
             for keycloak_user in keycloak_results]
 
     def get_user(self, user_id: str = None, username: str = None, user=None) -> object:
-        """
+        '''
         Retrives all available information about a user from the keycloak admin
         interface. This must be used to retrieve complete user information, because
         the info solely gathered from tokens (i.e. for the authenticated user ``g.user``)
         is generally incomplete.
-        """
+        '''
 
         if user is not None and user_id is None:
             user_id = user.user_id
@@ -390,7 +390,7 @@ keycloak = Keycloak()
 
 
 def reset(remove: bool):
-    """
+    '''
     Resets the databases mongo, elastic/calcs, and all files. Be careful.
     In contrast to :func:`remove`, it will only remove the contents of dbs and indicies.
     This function just attempts to remove everything, there is no exception handling
@@ -398,7 +398,7 @@ def reset(remove: bool):
 
     Args:
         remove: Do not try to recreate empty databases, remove entirely.
-    """
+    '''
     try:
         if not mongo_client:
             setup_mongo()
diff --git a/nomad/metainfo/CONCEPT.md b/nomad/metainfo/CONCEPT.md
index 9d1fb4324d5695a4bdd3dfda7db36c345cdb97be..f99214f37c77f143e75a1e41dff2d5e1adad0e05 100644
--- a/nomad/metainfo/CONCEPT.md
+++ b/nomad/metainfo/CONCEPT.md
@@ -179,9 +179,9 @@ Arbitrary serializable objects that can contain additional information.
 This could be code, from a python module that represents the NOMAD *common* package `nomad.metainfo.common`:
 ```python
 class System(MSection):
-    """
+    '''
     The system is ...
-    """
+    '''
 
     n_atoms = Quantity(type=int, derived_from='atom_labels')
 
@@ -189,9 +189,9 @@ class System(MSection):
         shape=['n_atoms'],
         type=MEnum(ase.data.chemical_symbols),
         annotations=[ElasticSearchQuantity('keyword')])
-    """
+    '''
     Atom labels are ...
-    """
+    '''
 
     formula_hill = Quantity(type=str, derived_from=['atom_labels'])
 
diff --git a/nomad/metainfo/__init__.py b/nomad/metainfo/__init__.py
index 9a54c3c36ac07652ba13b80bf5d239907dbd5400..0521e9d1cb0d621e00d45355459a6db6c56f0f1f 100644
--- a/nomad/metainfo/__init__.py
+++ b/nomad/metainfo/__init__.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-"""
+'''
 The NOMAD meta-info allows to define schemas for physics data independent of the used
 storage format. It allows to define physics quantities with types, complex shapes
 (vetors, matrices, etc.), units, links, and descriptions. It allows to organize large
@@ -32,15 +32,15 @@ Starting example
     from nomad.metainfo import MSection, Quantity, SubSection, Units
 
     class System(MSection):
-        \"\"\"
+        \'\'\'
         A system section includes all quantities that describe a single a simulated
         system (a.k.a. geometry).
-        \"\"\"
+        \'\'\'
 
         n_atoms = Quantity(
-            type=int, description='''
+            type=int, description=\'\'\'
             A Defines the number of atoms in the system.
-            ''')
+            \'\'\')
 
         atom_labels = Quantity(type=MEnum(ase.data.chemical_symbols), shape['n_atoms'])
         atom_positions = Quantity(type=float, shape=['n_atoms', 3], unit=Units.m)
@@ -146,7 +146,7 @@ A `section class` looks like this:
 .. code-block:: python
 
     class SectionName(BaseSection):
-        ''' Section description '''
+        \'\'\' Section description \'\'\'
         m_def = Section(**section_attributes)
 
         quantity_name = Quantity(**quantity_attributes)
@@ -186,7 +186,7 @@ category looks like this:
 .. code-block:: python
 
     class CategoryName(MCategory):
-        ''' Category description '''
+        \'\'\' Category description \'\'\'
         m_def = Category(links=['http://further.explanation.eu'], categories=[ParentCategory])
 
 Packages
@@ -272,7 +272,7 @@ A more complex example
 .. literalinclude:: ../nomad/metainfo/example.py
     :language: python
 
-"""
+'''
 
 from .metainfo import MSection, MCategory, Definition, Property, Quantity, SubSection, \
     Section, Category, Package, Environment, MEnum, Datetime, MProxy, MetainfoError, DeriveError, \
diff --git a/nomad/metainfo/elastic.py b/nomad/metainfo/elastic.py
index 352b74b7aec3eda5f355ea1653065a2167c1ada0..c386703864b22cda1bf727d4b6cedeaedfad3d3f 100644
--- a/nomad/metainfo/elastic.py
+++ b/nomad/metainfo/elastic.py
@@ -12,15 +12,15 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-"""
+'''
 Adds elastic search support to the metainfo.
-"""
+'''
 
 from . import Section, MSection
 
 
 def elastic_mapping(section: Section, base_cls: type) -> type:
-    """ Creates an elasticsearch_dsl document class from a section definition. """
+    ''' Creates an elasticsearch_dsl document class from a section definition. '''
 
     dct = {
         name: quantity.m_annotations['elastic']['type']()
diff --git a/nomad/metainfo/example.py b/nomad/metainfo/example.py
index 3be207f5678cbee913ab704b4168074a935dd2f9..3f04abd9cb0324ac0f5646dc18f51f4b09a168b5 100644
--- a/nomad/metainfo/example.py
+++ b/nomad/metainfo/example.py
@@ -1,4 +1,4 @@
-""" An example metainfo package. """
+''' An example metainfo package. '''
 
 import numpy as np
 from datetime import datetime
@@ -9,28 +9,28 @@ m_package = Package(links=['http://metainfo.nomad-coe.eu'])
 
 
 class SystemHash(MCategory):
-    """ All quantities that contribute to what makes a system unique. """
+    ''' All quantities that contribute to what makes a system unique. '''
 
 
 class Parsing(MSection):
-    """ All data that describes the NOMAD parsing of this run.
+    ''' All data that describes the NOMAD parsing of this run.
 
     Quantities can also be documented like this:
 
     Args:
         parser_name: 'Name of the used parser'
         parser_version: 'Version of the used parser'
-    """
+    '''
 
     parser_name = Quantity(type=str)
     parser_version = Quantity(type=str)
-    nomad_version = Quantity(type=str)
+    nomad_version = Quantity(type=str, default='latest')
     warnings = Quantity(type=str, shape=['0..*'])
     parse_time = Quantity(type=Datetime)
 
 
 class System(MSection):
-    """ All data that describes a simulated system. """
+    ''' All data that describes a simulated system. '''
 
     n_atoms = Quantity(
         type=int, derived=lambda system: len(system.atom_labels),
@@ -63,7 +63,7 @@ class SCC(MSection):
 
 
 class Run(MSection):
-    """ All data that belongs to a single code run. """
+    ''' All data that belongs to a single code run. '''
 
     code_name = Quantity(type=str, description='The name of the code that was run.')
     code_version = Quantity(type=str, description='The version of the code that was run.')
@@ -78,7 +78,7 @@ class Run(MSection):
 
 
 class VaspRun(Run):
-    """ All VASP specific quantities for section Run. """
+    ''' All VASP specific quantities for section Run. '''
     m_def = Section(extends_base_section=True)
 
     x_vasp_raw_format = Quantity(
diff --git a/nomad/metainfo/flask_restplus.py b/nomad/metainfo/flask_restplus.py
index 5ee7fa53dea772927a579d05141a1e3e37a5f733..a7621e5089cfff53a93827cf56421c1d75095cd2 100644
--- a/nomad/metainfo/flask_restplus.py
+++ b/nomad/metainfo/flask_restplus.py
@@ -6,7 +6,7 @@ from .metainfo import Section, Quantity, Datetime
 
 
 def field(quantity: Quantity):
-    """ Returns a flask restplus field with quantity type and shape. """
+    ''' Returns a flask restplus field with quantity type and shape. '''
     field = None
     if quantity.type == int:
         field = fields.Integer
diff --git a/nomad/metainfo/legacy.py b/nomad/metainfo/legacy.py
index 3b0a3c4f8485b035f84a9443ae781edc548d09e0..e66b5cf4c5e8e903da9aaee4f7c7fa3d33f09d7e 100644
--- a/nomad/metainfo/legacy.py
+++ b/nomad/metainfo/legacy.py
@@ -50,7 +50,7 @@ def from_legacy_metainfo(meta_info_env, package_names: List[str] = None) \
 
 
 class LegacyMetainfoEnvironment:
-    """
+    '''
     Args:
         env: The metainfo environment that is used to manage the definitions.
         orig_legacy_env: The old metainfo :class:`InfoKindEnv` environment with the
@@ -59,7 +59,7 @@ class LegacyMetainfoEnvironment:
             converted metainfo environment.
         all_legacy_defs: A dict that stores the original :class:`InfoKindEl`s by name.
         all_defs: A dict that stroed the converted section and category definitions.
-    """
+    '''
     def __init__(self, metainfo=Union[InfoKindEnv, str], package_names: List[str] = None, logger=None):
         self.logger = utils.get_logger(__name__) if logger is None else logger
         self.env = Environment()
@@ -109,9 +109,9 @@ class LegacyMetainfoEnvironment:
 
     def convert_package(
             self, legacy_definitions: List[InfoKindEl], **kwargs) -> Package:
-        """ Converts a single legacy metainfo package, i.e. a list of :class:`InfoKindEl`
+        ''' Converts a single legacy metainfo package, i.e. a list of :class:`InfoKindEl`
         into a metainfo package.
-        """
+        '''
         package = Package(**kwargs)
 
         definition: Definition = None
@@ -212,7 +212,7 @@ class LegacyMetainfoEnvironment:
         return package
 
     def legacy_info(self, definition: Definition, *args, **kwargs) -> InfoKindEl:
-        """ Creates a legacy metainfo objects for the given definition. """
+        ''' Creates a legacy metainfo objects for the given definition. '''
         super_names: List[str] = list()
         result: Dict[str, Any] = dict(
             name=definition.name,
@@ -266,7 +266,7 @@ class LegacyMetainfoEnvironment:
         return InfoKindEl(*args, **result, **kwargs)
 
     def legacy_info_env(self, packages: List[Package] = None, *args, **kwargs) -> InfoKindEnv:
-        """ Creates a legacy metainfo environment with all definitions from the given packages. """
+        ''' Creates a legacy metainfo environment with all definitions from the given packages. '''
         if packages is None:
             packages = self.env.packages
 
@@ -331,7 +331,7 @@ class LegacyMetainfoEnvironment:
 
 
 if __name__ == '__main__':
-    """ Converts the old metainfo and code-generates definitions for the new metainfo """
+    ''' Converts the old metainfo and code-generates definitions for the new metainfo '''
     env = LegacyMetainfoEnvironment(
         metainfo='vasp.nomadmetainfo.json',
         package_names=['%s.nomadmetainfo.json' % pkg for pkg in ['common', 'public', 'vasp']])
diff --git a/nomad/metainfo/metainfo.py b/nomad/metainfo/metainfo.py
index 9c96ca4b204683414f4cbe1ed646da89bc0517ef..4febbc2e5a0bb54e1fe29a560e067f3c3f055d53 100644
--- a/nomad/metainfo/metainfo.py
+++ b/nomad/metainfo/metainfo.py
@@ -29,6 +29,7 @@ import aniso8601
 from datetime import datetime
 import pytz
 import docstring_parser
+import flask_restplus.inputs
 
 
 m_package: 'Package' = None
@@ -41,24 +42,24 @@ T = TypeVar('T')
 # Metainfo errors
 
 class MetainfoError(Exception):
-    """ Metainfo related errors. """
+    ''' Metainfo related errors. '''
     pass
 
 
 class DeriveError(MetainfoError):
-    """ An error occurred while computing a derived value. """
+    ''' An error occurred while computing a derived value. '''
     pass
 
 
 class MetainfoReferenceError(MetainfoError):
-    """ An error indicating that a reference could not be resolved. """
+    ''' An error indicating that a reference could not be resolved. '''
     pass
 
 
 # Metainfo quantity data types
 
 class MEnum():
-    """Allows to define str types with values limited to a pre-set list of possible values."""
+    '''Allows to define str types with values limited to a pre-set list of possible values.'''
     def __init__(self, *args, **kwargs):
         # Supports one big list in place of args
         if len(args) == 1 and isinstance(args[0], list):
@@ -80,18 +81,18 @@ class MEnum():
 
 
 class MProxy():
-    """ A placeholder object that acts as reference to a value that is not yet resolved.
+    ''' A placeholder object that acts as reference to a value that is not yet resolved.
 
     Attributes:
         url: The reference represented as an URL string.
-    """
+    '''
 
     def __init__(self, url: str):
         self.url = url
 
 
 class DataType:
-    """
+    '''
     Allows to define custom data types that can be used in the meta-info.
 
     The metainfo supports the most types out of the box. These includes the python build-in
@@ -102,21 +103,21 @@ class DataType:
     type checks and various value transformations. This allows to store values in the
     section differently from how the usermight set/get them, and it allows to have non
     serializeable values that are transformed on de-/serialization.
-    """
+    '''
     def set_normalize(self, section: 'MSection', quantity_def: 'Quantity', value: Any) -> Any:
-        """ Transforms the given value before it is set and checks its type. """
+        ''' Transforms the given value before it is set and checks its type. '''
         return value
 
     def get_normalize(self, section: 'MSection', quantity_def: 'Quantity', value: Any) -> Any:
-        """ Transforms the given value when it is get. """
+        ''' Transforms the given value when it is get. '''
         return value
 
     def serialize(self, section: 'MSection', quantity_def: 'Quantity', value: Any) -> Any:
-        """ Transforms the given value when making the section serializeable. """
+        ''' Transforms the given value when making the section serializeable. '''
         return value
 
     def deserialize(self, section: 'MSection', quantity_def: 'Quantity', value: Any) -> Any:
-        """ Transforms the given value from its serializeable form. """
+        ''' Transforms the given value from its serializeable form. '''
         return value
 
 
@@ -175,7 +176,7 @@ class _Unit(DataType):
 
 
 units = pint.UnitRegistry()
-""" The default pint unit registry that should be used to give units to quantity definitions. """
+''' The default pint unit registry that should be used to give units to quantity definitions. '''
 
 
 class _Callable(DataType):
@@ -187,7 +188,7 @@ class _Callable(DataType):
 
 
 class _QuantityType(DataType):
-    """ Data type for defining the type of a metainfo quantity.
+    ''' Data type for defining the type of a metainfo quantity.
 
     A metainfo quantity type can be one of
 
@@ -197,7 +198,7 @@ class _QuantityType(DataType):
     - an MEnum instance to use it's values as possible str values
     - a custom datatype, i.e. instance of :class:`DataType`
     - Any
-    """
+    '''
 
     def set_normalize(self, section, quantity_def, value):
         if value in [str, int, float, bool]:
@@ -261,7 +262,7 @@ class _QuantityType(DataType):
 
 
 class Reference(DataType):
-    """ Datatype used for reference quantities. """
+    ''' Datatype used for reference quantities. '''
 
     def __init__(self, section_def: 'Section'):
         if not isinstance(section_def, Section):
@@ -292,12 +293,15 @@ class Reference(DataType):
 
         return value
 
+    def resolve(self, section: 'MSection', quantity_def: 'Quantity', value: Any) -> 'MSection':
+        return section.m_resolve(value.url)
+
     def get_normalize(self, section: 'MSection', quantity_def: 'Quantity', value: Any) -> Any:
         if isinstance(value, MProxy):
-            resolved: 'MSection' = section.m_resolve(value.url)
+            resolved: 'MSection' = self.resolve(section, quantity_def, value)
             if resolved is None:
                 raise ReferenceError('Could not resolve %s from %s.' % (value, section))
-            section.m_set(quantity_def, value)
+
             return resolved
 
         return value
@@ -311,31 +315,46 @@ class Reference(DataType):
 
 class _Datetime(DataType):
 
-    def __parse(self, datetime_str: str) -> datetime:
+    def _parse(self, datetime_str: str) -> datetime:
         try:
-            try:
-                return aniso8601.parse_datetime(datetime_str)
-            except ValueError:
-                date = aniso8601.parse_date(datetime_str)
-                return datetime(date.year, date.month, date.day)
-        except Exception:
-            raise TypeError('Invalid date literal "{0}"'.format(datetime_str))
+            return aniso8601.parse_datetime(datetime_str)
+        except ValueError:
+            pass
 
-    def set_normalize(self, section: 'MSection', quantity_def: 'Quantity', value: Any) -> Any:
+        try:
+            return aniso8601.parse_date(datetime_str)
+        except ValueError:
+            pass
+
+        try:
+            # TODO necessary?
+            return flask_restplus.inputs.datetime_from_rfc822(datetime_str)
+        except ValueError:
+            pass
+
+        raise TypeError('Invalid date literal "{0}"'.format(datetime_str))
+
+    def _convert(self, value):
         if isinstance(value, str):
-            value = self.__parse(value)
+            value = self._parse(value)
+
+        elif isinstance(value, (int, float)):
+            value = datetime.fromtimestamp(value)
 
         if not isinstance(value, datetime):
             raise TypeError('%s is not a datetime.' % value)
 
         return value
 
+    def set_normalize(self, section: 'MSection', quantity_def: 'Quantity', value: Any) -> Any:
+        return self._convert(value)
+
     def serialize(self, section: 'MSection', quantity_def: 'Quantity', value: Any) -> Any:
         value.replace(tzinfo=pytz.utc)
         return value.isoformat()
 
     def deserialize(self, section: 'MSection', quantity_def: 'Quantity', value: Any) -> Any:
-        return self.__parse(value)
+        return self._convert(value)
 
 
 Dimension = _Dimension()
@@ -365,7 +384,7 @@ class MObjectMeta(type):
 
 
 SectionDef = Union[str, 'Section', 'SubSection', Type[MSectionBound]]
-""" Type for section definition references.
+''' Type for section definition references.
 
 This can either be :
 
@@ -373,11 +392,11 @@ This can either be :
 - the section definition itself
 - the definition of a sub section
 - or the section definition Python class
-"""
+'''
 
 
 class MData:
-    """ An interface for low-level metainfo data objects.
+    ''' An interface for low-level metainfo data objects.
 
     Metainfo data objects store the data of a single section instance. This interface
     constitutes the minimal functionality for accessing and modifying section data.
@@ -386,7 +405,7 @@ class MData:
     All section instances will implement this interface, usually be delegating calls to
     a standalone implementation of this interface. This allows to configure various
     data backends on section instance creation.
-    """
+    '''
 
     def __getitem__(self, key):
         raise NotImplementedError()
@@ -395,47 +414,52 @@ class MData:
         raise NotImplementedError()
 
     def m_set(self, section: 'MSection', quantity_def: 'Quantity', value: Any) -> None:
-        """ Set the given value for the given quantity. """
+        ''' Set the given value for the given quantity. '''
         raise NotImplementedError()
 
     def m_get(self, section: 'MSection', quantity_def: 'Quantity') -> Any:
-        """ Retrieve the given value for the given quantity. """
+        ''' Retrieve the given value for the given quantity. '''
         raise NotImplementedError()
 
     def m_is_set(self, section: 'MSection', quantity_def: 'Quantity') -> bool:
-        """ True iff this quantity was explicitely set. """
+        ''' True iff this quantity was explicitely set. '''
         raise NotImplementedError()
 
     def m_add_values(
             self, section: 'MSection', quantity_def: 'Quantity', values: Any,
             offset: int) -> None:
-        """ Add (partial) values for the given quantity of higher dimensionality. """
+        ''' Add (partial) values for the given quantity of higher dimensionality. '''
         raise NotImplementedError()
 
     def m_add_sub_section(
             self, section: 'MSection', sub_section_def: 'SubSection',
             sub_section: 'MSection') -> None:
-        """ Adds the given section instance as a sub section of the given sub section definition. """
+        ''' Adds the given section instance as a sub section of the given sub section definition. '''
+        raise NotImplementedError()
+
+    def m_remove_sub_section(
+            self, section: 'MSection', sub_section_def: 'SubSection', index: int) -> None:
+        ''' Removes the given section instance as a sub section of the given sub section definition. '''
         raise NotImplementedError()
 
     def m_get_sub_section(
             self, section: 'MSection', sub_section_def: 'SubSection',
             index: int) -> 'MSection':
-        """ Retrieves a single sub section of the given sub section definition. """
+        ''' Retrieves a single sub section of the given sub section definition. '''
         raise NotImplementedError()
 
     def m_get_sub_sections(
             self, section: 'MSection', sub_section_def: 'SubSection') -> Iterable['MSection']:
-        """ Retrieves  all sub sections of the given sub section definition. """
+        ''' Retrieves  all sub sections of the given sub section definition. '''
         raise NotImplementedError()
 
     def m_sub_section_count(self, section: 'MSection', sub_section_def: 'SubSection') -> int:
-        """ Returns the number of sub sections for the given sub section definition. """
+        ''' Returns the number of sub sections for the given sub section definition. '''
         raise NotImplementedError()
 
 
 class MDataDict(MData):
-    """ A simple dict backed implementaton of :class:`MData`. It is used by default. """
+    ''' A simple dict backed implementaton of :class:`MData`. It is used by default. '''
 
     def __init__(self, dct: Dict[str, Any] = None):
         if dct is None:
@@ -484,6 +508,15 @@ class MDataDict(MData):
         else:
             self.dct[sub_section_name] = sub_section
 
+    def m_remove_sub_section(
+            self, section: 'MSection', sub_section_def: 'SubSection', index: int) -> None:
+
+        if sub_section_def.repeats:
+            del(self.dct[sub_section_def.name][index])
+
+        elif sub_section_def.name in self.dct:
+            del(self.dct[sub_section_def.name])
+
     def m_get_sub_section(
             self, section: 'MSection', sub_section_def: 'SubSection',
             index: int) -> 'MSection':
@@ -510,17 +543,17 @@ class MDataDict(MData):
 
 
 class MResource():
-    """Represents a collection of related metainfo data, i.e. a set of :class:`MSection` instances.
+    '''Represents a collection of related metainfo data, i.e. a set of :class:`MSection` instances.
 
     MResource allows to keep related objects together and resolve sections of certain
     section definitions.
-    """
+    '''
     def __init__(self):
         self.__data: Dict['Section', List['MSection']] = dict()
         self.contents: List['MSection'] = []
 
     def create(self, section_cls: Type[MSectionBound], *args, **kwargs) -> MSectionBound:
-        """ Create an instance of the given section class and adds it to this resource. """
+        ''' Create an instance of the given section class and adds it to this resource. '''
         result = section_cls(*args, **kwargs)
         self.add(result)
         return cast(MSectionBound, result)
@@ -539,11 +572,11 @@ class MResource():
             self.contents.remove(section)
 
     def all(self, section_cls: Type[MSectionBound]) -> List[MSectionBound]:
-        """ Returns all instances of the given section class in this resource. """
+        ''' Returns all instances of the given section class in this resource. '''
         return cast(List[MSectionBound], self.__data.get(section_cls.m_def, []))
 
     def unload(self):
-        """ Breaks all references among the contain metainfo sections to allow GC. """
+        ''' Breaks all references among the contain metainfo sections to allow GC. '''
         for collections in self.__data.values():
             for section in collections:
                 section.m_parent = None
@@ -552,8 +585,8 @@ class MResource():
         # TODO break actual references via quantities
 
 
-class MSection(metaclass=MObjectMeta):
-    """Base class for all section instances on all meta-info levels.
+class MSection(metaclass=MObjectMeta):  # TODO find a way to make this a subclass of collections.abs.Mapping
+    '''Base class for all section instances on all meta-info levels.
 
     All `section instances` indirectly instantiate the :class:`MSection` and therefore all
     members of :class:`MSection` are available on all `section instances`. :class:`MSection`
@@ -585,7 +618,7 @@ class MSection(metaclass=MObjectMeta):
 
         m_resource: The :class:`MResource` that contains and manages this section.
 
-    """
+    '''
 
     m_def: 'Section' = None
 
@@ -689,7 +722,7 @@ class MSection(metaclass=MObjectMeta):
         constraints: Set[str] = set()
         event_handlers: Set[Callable] = set(m_def.event_handlers)
         for name, attr in cls.__dict__.items():
-            # transfer names and descriptions for properties
+            # transfer names and descriptions for properties, init properties
             if isinstance(attr, Property):
                 attr.name = name
                 if attr.description is not None:
@@ -703,6 +736,8 @@ class MSection(metaclass=MObjectMeta):
                 else:
                     raise NotImplementedError('Unknown property kind.')
 
+                attr.__init_property__()
+
             if inspect.isfunction(attr):
                 method_name = attr.__name__
 
@@ -856,7 +891,7 @@ class MSection(metaclass=MObjectMeta):
         return self.__check_np(quantity_def, value)
 
     def m_set(self, quantity_def: 'Quantity', value: Any) -> None:
-        """ Set the given value for the given quantity. """
+        ''' Set the given value for the given quantity. '''
         quantity_def = self.__resolve_synonym(quantity_def)
 
         if quantity_def.derived is not None:
@@ -890,7 +925,7 @@ class MSection(metaclass=MObjectMeta):
                 handler(self, quantity_def, value)
 
     def m_get(self, quantity_def: 'Quantity') -> Any:
-        """ Retrieve the given value for the given quantity. """
+        ''' Retrieve the given value for the given quantity. '''
         quantity_def = self.__resolve_synonym(quantity_def)
         if quantity_def.derived is not None:
             try:
@@ -918,6 +953,10 @@ class MSection(metaclass=MObjectMeta):
                     'Only numpy arrays and dtypes can be used for higher dimensional '
                     'quantities.')
 
+            if isinstance(quantity_def.type, Reference):
+                # save the resolved values for the next access to avoid re-resolve
+                self.m_data.m_set(self, quantity_def, value)
+
         elif type(quantity_def.type) == np.dtype:
             if quantity_def.unit is not None:
                 value = value * quantity_def.unit
@@ -925,7 +964,7 @@ class MSection(metaclass=MObjectMeta):
         return value
 
     def m_is_set(self, quantity_def: 'Quantity') -> bool:
-        """ True if the given quantity is set. """
+        ''' True if the given quantity is set. '''
         quantity_def = self.__resolve_synonym(quantity_def)
         if quantity_def.derived is not None:
             return True
@@ -933,15 +972,25 @@ class MSection(metaclass=MObjectMeta):
         return self.m_data.m_is_set(self, quantity_def)
 
     def m_add_values(self, quantity_def: 'Quantity', values: Any, offset: int) -> None:
-        """ Add (partial) values for the given quantity of higher dimensionality. """
+        ''' Add (partial) values for the given quantity of higher dimensionality. '''
         self.m_data.m_add_values(self, quantity_def, values, offset)
 
     def m_add_sub_section(self, sub_section_def: 'SubSection', sub_section: 'MSection') -> None:
-        """ Adds the given section instance as a sub section of the given sub section definition. """
+        ''' Adds the given section instance as a sub section of the given sub section definition. '''
 
         parent_index = -1
         if sub_section_def.repeats:
             parent_index = self.m_sub_section_count(sub_section_def)
+
+        else:
+            old_sub_section = self.m_data.m_get_sub_section(self, sub_section_def, -1)
+            if old_sub_section is not None:
+                old_sub_section.m_parent = None
+                old_sub_section.m_parent_sub_section = None
+                old_sub_section.m_parent_index = -1
+                if self.m_resource is not None:
+                    self.m_resource.remove(sub_section)
+
         sub_section.m_parent = self
         sub_section.m_parent_sub_section = sub_section_def
         sub_section.m_parent_index = parent_index
@@ -956,29 +1005,33 @@ class MSection(metaclass=MObjectMeta):
             if handler.__name__.startswith('on_add_sub_section'):
                 handler(self, sub_section_def, sub_section)
 
+    def m_remove_sub_section(self, sub_section_def: 'SubSection', index: int) -> None:
+        ''' Removes the exiting section for a non repeatable sub section '''
+        self.m_data.m_remove_sub_section(self, sub_section_def, index)
+
     def m_get_sub_section(self, sub_section_def: 'SubSection', index: int) -> 'MSection':
-        """ Retrieves a single sub section of the given sub section definition. """
+        ''' Retrieves a single sub section of the given sub section definition. '''
         return self.m_data.m_get_sub_section(self, sub_section_def, index)
 
     def m_get_sub_sections(self, sub_section_def: 'SubSection') -> Iterable['MSection']:
-        """ Retrieves  all sub sections of the given sub section definition. """
+        ''' Retrieves  all sub sections of the given sub section definition. '''
         return self.m_data.m_get_sub_sections(self, sub_section_def)
 
     def m_sub_section_count(self, sub_section_def: 'SubSection') -> int:
-        """ Returns the number of sub sections for the given sub section definition. """
+        ''' Returns the number of sub sections for the given sub section definition. '''
         return self.m_data.m_sub_section_count(self, sub_section_def)
 
     def m_create(
             self, section_cls: Type[MSectionBound], sub_section_def: 'SubSection' = None,
             **kwargs) -> MSectionBound:
-        """ Creates a section instance and adds it to this section provided there is a
+        ''' Creates a section instance and adds it to this section provided there is a
         corresponding sub section.
 
         Args:
             section_cls: The section class for the sub-secton to create
             sub_section_def: If there are multiple sub-sections for the given class,
                 this must be used to explicitely state the sub-section definition.
-        """
+        '''
 
         section_def = section_cls.m_def
         sub_section_defs = self.m_def.all_sub_sections_by_section.get(section_def, [])
@@ -1005,7 +1058,7 @@ class MSection(metaclass=MObjectMeta):
         return cast(MSectionBound, sub_section)
 
     def m_update(self, safe: bool = True, **kwargs):
-        """ Updates all quantities and sub-sections with the given arguments. """
+        ''' Updates all quantities and sub-sections with the given arguments. '''
         if safe:
             for name, value in kwargs.items():
                 prop = self.m_def.all_properties.get(name, None)
@@ -1029,15 +1082,22 @@ class MSection(metaclass=MObjectMeta):
             self.m_data.m_data.dct.update(**kwargs)  # type: ignore
 
     def m_as(self, section_cls: Type[MSectionBound]) -> MSectionBound:
-        """ 'Casts' this section to the given extending sections. """
+        ''' 'Casts' this section to the given extending sections. '''
         return cast(MSectionBound, self)
 
     def m_follows(self, definition: 'Section') -> bool:
-        """ Determines if this section's definition is or is derived from the given definition. """
+        ''' Determines if this section's definition is or is derived from the given definition. '''
         return self.m_def == definition or definition in self.m_def.all_base_sections
 
-    def m_to_dict(self, with_meta: bool = False) -> Dict[str, Any]:
-        """Returns the data of this section as a json serializeable dictionary. """
+    def m_to_dict(self, with_meta: bool = False, include_defaults: bool = False) -> Dict[str, Any]:
+        '''
+        Returns the data of this section as a json serializeable dictionary.
+
+        Arguments:
+            with_meta: Include information about the section definition and the sections
+                position in its parent.
+            include_defaults: Include default values of unset quantities.
+        '''
 
         def items() -> Iterable[Tuple[str, Any]]:
             # metadata
@@ -1050,81 +1110,100 @@ class MSection(metaclass=MObjectMeta):
 
             # quantities
             for name, quantity in self.m_def.all_quantities.items():
-                if quantity.virtual or not self.m_is_set(quantity):
+                if quantity.virtual:
                     continue
 
-                if self.m_is_set(quantity) and quantity.derived is None:
-                    serialize: TypingCallable[[Any], Any] = str
-                    if isinstance(quantity.type, DataType):
+                is_set = self.m_is_set(quantity)
+                if not is_set:
+                    if not include_defaults or not quantity.m_is_set(Quantity.default):
+                        continue
 
-                        def data_type_serialize(value):
-                            return quantity.type.serialize(self, quantity, value)
+                quantity_type = quantity.type
 
-                        serialize = data_type_serialize
+                serialize: TypingCallable[[Any], Any] = str
+                if isinstance(quantity_type, Reference):
 
-                    elif quantity.type in [str, int, float, bool]:
-                        serialize = quantity.type
+                    def reference_serialize(value):
+                        if isinstance(value, MProxy):
+                            return value.url
+                        else:
+                            return quantity_type.serialize(self, quantity, value)
 
-                    elif type(quantity.type) == np.dtype:
-                        pass
+                    serialize = reference_serialize
 
-                    elif isinstance(quantity.type, MEnum):
-                        pass
+                elif isinstance(quantity_type, DataType):
 
-                    elif quantity.type == Any:
-                        def _serialize(value: Any):
-                            if type(value) not in [str, int, float, bool, list, type(None)]:
-                                raise MetainfoError(
-                                    'Only python primitives are allowed for Any typed non '
-                                    'virtual quantities: %s of quantity %s in section %s' %
-                                    (value, quantity, self))
+                    def data_type_serialize(value):
+                        return quantity_type.serialize(self, quantity, value)
 
-                            return value
+                    serialize = data_type_serialize
 
-                        serialize = _serialize
+                elif quantity_type in [str, int, float, bool]:
+                    serialize = quantity_type
 
-                    else:
-                        raise MetainfoError(
-                            'Do not know how to serialize data with type %s for quantity %s' %
-                            (quantity.type, quantity))
+                elif type(quantity_type) == np.dtype:
+                    pass
+
+                elif isinstance(quantity_type, MEnum):
+                    pass
+
+                elif quantity_type == Any:
+                    def _serialize(value: Any):
+                        if type(value) not in [str, int, float, bool, list, type(None)]:
+                            raise MetainfoError(
+                                'Only python primitives are allowed for Any typed non '
+                                'virtual quantities: %s of quantity %s in section %s' %
+                                (value, quantity, self))
+
+                        return value
 
+                    serialize = _serialize
+
+                else:
+                    raise MetainfoError(
+                        'Do not know how to serialize data with type %s for quantity %s' %
+                        (quantity_type, quantity))
+
+                if is_set:
                     value = cast(MDataDict, self.m_data).dct[name]
+                else:
+                    value = quantity.default
 
-                    if type(quantity.type) == np.dtype:
-                        serializable_value = value.tolist()
+                if type(quantity_type) == np.dtype:
+                    serializable_value = value.tolist()
 
+                else:
+                    if len(quantity.shape) == 0:
+                        serializable_value = serialize(value)
+                    elif len(quantity.shape) == 1:
+                        serializable_value = [serialize(i) for i in value]
                     else:
-                        if len(quantity.shape) == 0:
-                            serializable_value = serialize(value)
-                        elif len(quantity.shape) == 1:
-                            serializable_value = [serialize(i) for i in value]
-                        else:
-                            raise NotImplementedError('Higher shapes (%s) not supported: %s' % (quantity.shape, quantity))
+                        raise NotImplementedError('Higher shapes (%s) not supported: %s' % (quantity.shape, quantity))
 
-                    yield name, serializable_value
+                yield name, serializable_value
 
             # sub sections
             for name, sub_section_def in self.m_def.all_sub_sections.items():
                 if sub_section_def.repeats:
                     if self.m_sub_section_count(sub_section_def) > 0:
                         yield name, [
-                            item.m_to_dict()
+                            item.m_to_dict(with_meta=with_meta, include_defaults=include_defaults)
                             for item in self.m_get_sub_sections(sub_section_def)]
                 else:
                     sub_section = self.m_get_sub_section(sub_section_def, -1)
                     if sub_section is not None:
-                        yield name, sub_section.m_to_dict()
+                        yield name, sub_section.m_to_dict(with_meta=with_meta, include_defaults=include_defaults)
 
         return {key: value for key, value in items()}
 
     @classmethod
     def m_from_dict(cls: Type[MSectionBound], dct: Dict[str, Any]) -> MSectionBound:
-        """ Creates a section from the given serializable data dictionary.
+        ''' Creates a section from the given serializable data dictionary.
 
         This is the 'opposite' of :func:`m_to_dict`. It takes a deserialised dict, e.g
         loaded from JSON, and turns it into a proper section, i.e. instance of the given
         section class.
-        """
+        '''
 
         section_def = cls.m_def
 
@@ -1173,11 +1252,11 @@ class MSection(metaclass=MObjectMeta):
         return section
 
     def m_to_json(self, **kwargs):
-        """ Returns the data of this section as a json string. """
+        ''' Returns the data of this section as a json string. '''
         return json.dumps(self.m_to_dict(), **kwargs)
 
     def m_all_contents(self) -> Iterable['MSection']:
-        """ Returns an iterable over all sub and sub subs sections. """
+        ''' Returns an iterable over all sub and sub subs sections. '''
         for content in self.m_contents():
             for sub_content in content.m_all_contents():
                 yield sub_content
@@ -1185,7 +1264,7 @@ class MSection(metaclass=MObjectMeta):
             yield content
 
     def m_contents(self) -> Iterable['MSection']:
-        """ Returns an iterable over all direct subs sections. """
+        ''' Returns an iterable over all direct subs sections. '''
         for sub_section_def in self.m_def.all_sub_sections.values():
             if sub_section_def.repeats:
                 index = 0
@@ -1198,7 +1277,7 @@ class MSection(metaclass=MObjectMeta):
                 yield sub_section
 
     def m_path(self, quantity_def: 'Quantity' = None) -> str:
-        """ Returns the path of this section or the given quantity within the section hierarchy. """
+        ''' Returns the path of this section or the given quantity within the section hierarchy. '''
         if self.m_parent is None:
             return '/'
 
@@ -1213,19 +1292,21 @@ class MSection(metaclass=MObjectMeta):
         return '%s/%s' % (self.m_parent.m_path().rstrip('/'), segment)
 
     def m_root(self, cls: Type[MSectionBound] = None) -> MSectionBound:
-        """ Returns the first parent of the parent section that has no parent; the root. """
+        ''' Returns the first parent of the parent section that has no parent; the root. '''
         if self.m_parent is None:
             return cast(MSectionBound, self)
         else:
             return self.m_parent.m_root(cls)
 
     def m_parent_as(self, cls: Type[MSectionBound] = None) -> MSectionBound:
-        """ Returns the parent section with the given section class type. """
+        ''' Returns the parent section with the given section class type. '''
         return cast(MSectionBound, self.m_parent)
 
     def m_resolve(self, path: str, cls: Type[MSectionBound] = None) -> MSectionBound:
-        """ Resolves the given path using this section as context. """
-
+        '''
+        Resolves the given path or dotted quantity name using this section as context and
+        returns the sub_section or value.
+        '''
         if path.startswith('/'):
             context: 'MSection' = self.m_root()
         else:
@@ -1233,7 +1314,7 @@ class MSection(metaclass=MObjectMeta):
 
         path_stack = path.strip('/').split('/')
         path_stack.reverse()
-        while len(path_stack) > 1:
+        while len(path_stack) > 0:
             prop_name = path_stack.pop()
             prop_def = context.m_def.all_properties.get(prop_name, None)
 
@@ -1275,7 +1356,7 @@ class MSection(metaclass=MObjectMeta):
         return cast(MSectionBound, context)
 
     def m_x(self, key: str, default=None):
-        """ Convinience method for get the annotation with name ``key``. """
+        ''' Convinience method for get the annotation with name ``key``. '''
         return self.m_annotations.get(key, default)
 
     def __validate_shape(self, quantity_def: 'Quantity', value):
@@ -1301,7 +1382,7 @@ class MSection(metaclass=MObjectMeta):
         return True
 
     def m_validate(self):
-        """ Evaluates all constraints and shapes of this section and returns a list of errors. """
+        ''' Evaluates all constraints and shapes of this section and returns a list of errors. '''
         errors: List[str] = []
         for constraint_name in self.m_def.constraints:
             constraint = getattr(self, 'c_%s' % constraint_name, None)
@@ -1327,7 +1408,7 @@ class MSection(metaclass=MObjectMeta):
         return errors
 
     def m_all_validate(self):
-        """ Evaluates all constraints in the whole section hierarchy, incl. this section. """
+        ''' Evaluates all constraints in the whole section hierarchy, incl. this section. '''
         errors: List[str] = []
         for section in itertools.chain([self], self.m_all_contents()):
             for error in section.m_validate():
@@ -1347,6 +1428,16 @@ class MSection(metaclass=MObjectMeta):
 
         return '%s:%s' % (name, m_section_name)
 
+    def __getitem__(self, key):
+        key = key.replace('.', '/')
+        return self.m_resolve(key)
+
+    def __iter__(self):
+        return self.m_def.all_properties.__iter__()
+
+    def __len__(self):
+        return len(self.m_def.all_properties)
+
 
 class MCategory(metaclass=MObjectMeta):
 
@@ -1374,7 +1465,7 @@ class MCategory(metaclass=MObjectMeta):
 # Metainfo M3 (i.e. definitions of definitions)
 
 class Definition(MSection):
-    """ A common base for all metainfo definitions.
+    ''' A common base for all metainfo definitions.
 
     All metainfo `definitions` (sections, quantities, sub-sections, packages, ...) share
     some common attributes. These are defined in a common base: all
@@ -1403,7 +1494,7 @@ class Definition(MSection):
     Additional helper functions for `definitions`:
 
     .. automethod:: all_definitions
-    """
+    '''
 
     __all_definitions: Dict[Type[MSection], List[MSection]] = {}
 
@@ -1423,11 +1514,11 @@ class Definition(MSection):
 
     @classmethod
     def all_definitions(cls: Type[MSectionBound]) -> Iterable[MSectionBound]:
-        """ Class method that returns all definitions of this class.
+        ''' Class method that returns all definitions of this class.
 
         This can be used to get a list of all globally available `defintions` or a certain
         kind. E.g. to get all `quantities`: ``Quantity.all_definitions()``.
-        """
+        '''
         return cast(Iterable[MSectionBound], Definition.__all_definitions.get(cls, []))
 
     def qualified_name(self):
@@ -1449,11 +1540,14 @@ class Definition(MSection):
 
 
 class Property(Definition):
-    pass
+
+    def __init_property__(self):
+        ''' Is called during section initialisation to allow property initialisation '''
+        pass
 
 
 class Quantity(Property):
-    """ Definition of an atomic piece of data.
+    ''' Definition of an atomic piece of data.
 
     Quantity definitions are the main building block of meta-info schemas. Each quantity
     represents a single piece of data.
@@ -1551,7 +1645,7 @@ class Quantity(Property):
 
         is_scalar:
             Derived quantity that is True, iff this quantity has shape of length 0
-        """
+        '''
 
     type: 'Quantity' = None
     shape: 'Quantity' = None
@@ -1564,6 +1658,10 @@ class Quantity(Property):
 
     # TODO derived_from = Quantity(type=Quantity, shape=['0..*'])
 
+    def __init_property__(self):
+        if self.derived is not None:
+            self.virtual = True
+
     def __get__(self, obj, cls):
         if obj is None:
             # class (def) attribute case
@@ -1610,7 +1708,7 @@ class Quantity(Property):
 
 
 class DirectQuantity(Quantity):
-    """ Used for quantities that would cause indefinite loops due to bootstrapping. """
+    ''' Used for quantities that would cause indefinite loops due to bootstrapping. '''
 
     def __init__(self, **kwargs):
         super().__init__(**kwargs)
@@ -1637,7 +1735,7 @@ class DirectQuantity(Quantity):
 
 
 class SubSection(Property):
-    """ Defines what sections can appear as sub-sections of another section.
+    ''' Defines what sections can appear as sub-sections of another section.
 
     Like quantities, sub-sections are defined in a `section class` as attributes
     of this class. An like quantities, each sub-section definition becomes a property of
@@ -1656,7 +1754,7 @@ class SubSection(Property):
 
         repeats: A boolean that determines wether this sub-section can appear multiple
             times in the parent section.
-    """
+    '''
 
     sub_section: 'Quantity' = None
     repeats: 'Quantity' = None
@@ -1674,14 +1772,24 @@ class SubSection(Property):
                 return obj.m_get_sub_section(self, -1)
 
     def __set__(self, obj, value):
-        raise NotImplementedError('Sub sections cannot be set directly. Use m_create.')
+        if obj is None:
+            raise NotImplementedError()
+
+        if self.repeats:
+            raise NotImplementedError('Cannot set a repeating sub section use m_create or m_add_sub_section.')
+
+        else:
+            if value is None:
+                obj.m_remove_sub_section(self, -1)
+            else:
+                obj.m_add_sub_section(self, value)
 
     def __delete__(self, obj):
         raise NotImplementedError('Deleting sub sections is not supported.')
 
 
 class Section(Definition):
-    """ Sections define blocks of related quantities and allows hierarchical data.
+    ''' Sections define blocks of related quantities and allows hierarchical data.
 
     Section definitions determine what quantities and sub-sections can appear in a
     following section instance.
@@ -1756,7 +1864,7 @@ class Section(Definition):
         parent_section_sub_section_defs:
             A helper attribute that gives all sub-section definitions that this section
             is used in.
-    """
+    '''
 
     section_cls: Type[MSection] = None
 
@@ -1820,7 +1928,7 @@ class Section(Definition):
 
 
 class Package(Definition):
-    """ Packages organize metainfo defintions alongside Python modules
+    ''' Packages organize metainfo defintions alongside Python modules
 
     Each Python module with metainfo Definition (explicitely or implicitely) has a member
     ``m_package`` with an instance of this class. Definitions (categories, sections) in
@@ -1843,7 +1951,7 @@ class Package(Definition):
 
         all_definitions: A helper attribute that provides all section definitions
             by name.
-    """
+    '''
 
     section_definitions: 'SubSection' = None
     category_definitions: 'SubSection' = None
@@ -1874,7 +1982,7 @@ class Package(Definition):
 
 
 class Category(Definition):
-    """ Categories allow to organize metainfo definitions (not metainfo data like sections do)
+    ''' Categories allow to organize metainfo definitions (not metainfo data like sections do)
 
     Each definition, including categories themselves, can belong to a set of categories.
     Categories therefore form a hierarchy of concepts that definitions can belong to, i.e.
@@ -1883,7 +1991,7 @@ class Category(Definition):
     Args:
         definitions: A helper attribute that gives all definitions that are directly or
             indirectly in this category.
-    """
+    '''
 
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
@@ -1955,7 +2063,7 @@ SubSection.__init_cls__()
 
 
 class Environment(MSection):
-    """ Environments allow to manage many metainfo packages and quickly access all definitions.
+    ''' Environments allow to manage many metainfo packages and quickly access all definitions.
 
     Environments provide a name-table for large-sets of metainfo definitions that span
     multiple packages. It provides various functions to resolve metainfo definitions by
@@ -1963,7 +2071,7 @@ class Environment(MSection):
 
     Args:
         packages: Packages in this environment.
-    """
+    '''
 
     packages = SubSection(sub_section=Package, repeats=True)
 
diff --git a/nomad/metainfo/mongoengine.py b/nomad/metainfo/mongoengine.py
index 47455a28d6e209a36547d4440ef632a47777aef8..020704f9db5175ee4954bc86ad9a6254ab356025 100644
--- a/nomad/metainfo/mongoengine.py
+++ b/nomad/metainfo/mongoengine.py
@@ -12,14 +12,14 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-"""
+'''
 Adds mongoengine supports to the metainfo. Allows to create, save, and get metainfo
 sections from mongoengine. Currently no sub-section support. The annotation key is "a_me",
 the annotation object support the following keys:
 
 - ``primary_key``: *Bool*, renders the quantity to be the primary key.
 - ``index``: *Bool*, adds this quantity to the index
-"""
+'''
 
 from typing import Any, Dict
 import mongoengine as me
diff --git a/nomad/metainfo/optimade.py b/nomad/metainfo/optimade.py
index 62378b1fab4ad0bb6ad99478a4e682ff4b578b7f..18d45824c96d1172c5bddfbad692d9ae5c8dac0d 100644
--- a/nomad/metainfo/optimade.py
+++ b/nomad/metainfo/optimade.py
@@ -1,11 +1,13 @@
 from ase.data import chemical_symbols
-from elasticsearch_dsl import Keyword, Integer, Float, InnerDoc, Nested
+from elasticsearch_dsl import Keyword, Float, InnerDoc, Nested
 import numpy as np
 
 from . import MSection, Section, Quantity, SubSection, MEnum, units
-from .elastic import elastic_mapping
+from .search import SearchQuantity
 
 
+# TODO move the module
+
 def optimade_links(section: str):
     return [
         'https://github.com/Materials-Consortia/OPTiMaDe/blob/develop/optimade.md#%s' %
@@ -29,11 +31,11 @@ class Optimade():
 
 
 class Species(MSection):
-    """
+    '''
     Used to describe the species of the sites of this structure. Species can be pure
     chemical elements, or virtual-crystal atoms representing a statistical occupation of a
     given site by multiple chemical elements.
-    """
+    '''
 
     m_def = Section(links=optimade_links('h.6.2.13'))
 
@@ -96,13 +98,12 @@ class Species(MSection):
 class OptimadeEntry(MSection):
     m_def = Section(
         links=optimade_links('h.6.2'),
-        a_flask=dict(skip_none=True),
-        a_elastic=dict(type=InnerDoc))
+        a_flask=dict(skip_none=True))
 
     elements = Quantity(
         type=MEnum(chemical_symbols), shape=['1..*'],
         links=optimade_links('h.6.2.1'),
-        a_elastic=dict(type=Keyword),
+        a_search=SearchQuantity(),
         a_optimade=Optimade(query=True, entry=True),
         description='''
             Names of the different elements present in the structure.
@@ -111,7 +112,7 @@ class OptimadeEntry(MSection):
     nelements = Quantity(
         type=int,
         links=optimade_links('h.6.2.2'),
-        a_elastic=dict(type=Integer),
+        a_search=SearchQuantity(),
         a_optimade=Optimade(query=True, entry=True),
         description='''
             Number of different elements in the structure as an integer.
@@ -120,7 +121,7 @@ class OptimadeEntry(MSection):
     elements_ratios = Quantity(
         type=float, shape=['nelements'],
         links=optimade_links('h.6.2.3'),
-        a_elastic=dict(type=lambda: Nested(ElementRatio), mapping=ElementRatio.from_structure_entry),
+        a_search=SearchQuantity(es_mapping=Nested(ElementRatio), es_value=ElementRatio.from_structure_entry),
         a_optimade=Optimade(query=True, entry=True),
         description='''
             Relative proportions of different elements in the structure.
@@ -129,7 +130,7 @@ class OptimadeEntry(MSection):
     chemical_formula_descriptive = Quantity(
         type=str,
         links=optimade_links('h.6.2.4'),
-        a_elastic=dict(type=Keyword),
+        a_search=SearchQuantity(),
         a_optimade=Optimade(query=True, entry=True),
         description='''
             The chemical formula for a structure as a string in a form chosen by the API
@@ -139,7 +140,7 @@ class OptimadeEntry(MSection):
     chemical_formula_reduced = Quantity(
         type=str,
         links=optimade_links('h.6.2.5'),
-        a_elastic=dict(type=Keyword),
+        a_search=SearchQuantity(),
         a_optimade=Optimade(query=True, entry=True),
         description='''
             The reduced chemical formula for a structure as a string with element symbols and
@@ -149,7 +150,7 @@ class OptimadeEntry(MSection):
     chemical_formula_hill = Quantity(
         type=str,
         links=optimade_links('h.6.2.6'),
-        a_elastic=dict(type=Keyword),
+        a_search=SearchQuantity(),
         a_optimade=Optimade(query=True, entry=False),
         description='''
             The chemical formula for a structure in Hill form with element symbols followed by
@@ -159,7 +160,7 @@ class OptimadeEntry(MSection):
     chemical_formula_anonymous = Quantity(
         type=str,
         links=optimade_links('h.6.2.7'),
-        a_elastic=dict(type=Keyword),
+        a_search=SearchQuantity(),
         a_optimade=Optimade(query=True, entry=True),
         description='''
             The anonymous formula is the chemical_formula_reduced, but where the elements are
@@ -171,7 +172,7 @@ class OptimadeEntry(MSection):
     dimension_types = Quantity(
         type=int, shape=[3],
         links=optimade_links('h.6.2.8'),
-        a_elastic=dict(type=Integer, mapping=lambda a: sum(a.dimension_types)),
+        a_search=SearchQuantity(es_value=lambda a: sum(a.dimension_types)),
         a_optimade=Optimade(query=True, entry=True),
         description='''
             List of three integers. For each of the three directions indicated by the three lattice
@@ -201,7 +202,7 @@ class OptimadeEntry(MSection):
     nsites = Quantity(
         type=int,
         links=optimade_links('h.6.2.11'),
-        a_elastic=dict(type=Integer),
+        a_search=SearchQuantity(),
         a_optimade=Optimade(query=True, entry=True), description='''
             An integer specifying the length of the cartesian_site_positions property.
         ''')
@@ -220,7 +221,7 @@ class OptimadeEntry(MSection):
     structure_features = Quantity(
         type=MEnum(['disorder', 'unknown_positions', 'assemblies']), shape=['1..*'],
         links=optimade_links('h.6.2.15'),
-        a_elastic=dict(type=Keyword),
+        a_search=SearchQuantity(),
         a_optimade=Optimade(query=True, entry=True), description='''
             A list of strings that flag which special features are used by the structure.
 
@@ -232,6 +233,3 @@ class OptimadeEntry(MSection):
         ''')
 
     species = SubSection(sub_section=Species.m_def, repeats=True)
-
-
-ESOptimadeEntry = elastic_mapping(OptimadeEntry.m_def, InnerDoc)
diff --git a/nomad/metainfo/search.py b/nomad/metainfo/search.py
new file mode 100644
index 0000000000000000000000000000000000000000..45f1d317fa3859f3c973ff8ae9d2aea9174197b8
--- /dev/null
+++ b/nomad/metainfo/search.py
@@ -0,0 +1,116 @@
+from typing import Callable, Any
+
+from nomad import metainfo
+
+
+# TODO multi, split are more flask related
+class SearchQuantity:
+    '''
+    A metainfo quantity annotation class that defines additional properties that determine
+    how to search for the respective quantity. Only quantities that have this will
+    be mapped to elastic search.
+
+    Attributes:
+        name: The name of this search quantity. Will be the name in the elastic index and
+            the name for the search parameter. Default is the metainfo quantity name.
+        many_or: Indicates that an 'or' (es terms) search is performed if many values are given.
+            Otherwise an 'and' (es bool->should->match) is performed.  Values are 'split' and
+            'append' to indicate how URL search parameters should be treated.
+        many_and: Indicates that many values can be supplied for search. Values are 'split' and
+            'append' to indicate how URL search parameters should be treated.
+        order_default: Indicates that this quantity is used to order search results
+            if no other ordering was specificed.
+        metric: Quantity can be used to build statistics. Statistics provide a metric
+            value for each value of the quantity. E.g. number of datasets with a given atom label.
+            This defines a metric based on this quantity. Values need to be a valid
+            elastic search aggregation (e.g. sum, cardinality, etc.).
+        metric_name: If this quantity is indicated to function as a metric, the metric
+            needs a name. By default the quantities name is used.
+        default_statistic: Indicates this quantity to be part of the default statistics.
+        statistics_size:
+            The maximum number of values in a statistic. Default is 10.
+        group: Indicates that his quantity can be used to group results. The value will
+            be the name of the group.
+        es_quantity: The quantity in the elastic mapping that is used to search. This is
+            especially useful if the quantity represents a inner document and only one
+            quantity of this inner object is used. Default is the name of the quantity.
+        es_mapping: A valid elasticsearch_dsl mapping. Default is ``Keyword()``.
+        es_value: A callable that is applied to section to get a value for this quantity in the elastic index.
+        derived: A callable that is applied to search parameter values before search.
+    '''
+
+    def __init__(
+            self,
+            name: str = None, description: str = None,
+            many_and: str = None, many_or: str = None,
+            order_default: bool = False,
+            group: str = None, metric: str = None, metric_name: str = None,
+            default_statistic: bool = False,
+            statistic_size: int = 10,
+            es_quantity: str = None,
+            es_mapping: Any = None,
+            es_value: Callable[[Any], Any] = None,
+            derived: Callable[[Any], Any] = None):
+
+        self.name = name
+        self.description = description
+        self.many_and = many_and
+        self.many_or = many_or
+        self.order_default = order_default
+        self.group = group
+        self.default_statistic = default_statistic
+        self.metric = metric
+        self.metric_name = metric_name
+        self.statistic_size = statistic_size
+        self.es_quantity = es_quantity
+        self.es_mapping = es_mapping
+        self.es_value = es_value
+        self.derived = derived
+
+        self.prefix: str = None
+        self.qualified_name: str = None
+
+        assert many_and is None or many_or is None, 'A search quantity can only be used for multi or many search'
+        assert many_and in [None, 'split', 'append'], 'Only split and append are valid values'
+        assert many_or in [None, 'split', 'append'], 'Only split and append are valid values'
+
+    def configure(self, quantity: metainfo.Quantity, prefix: str = None):
+        if self.name is None:
+            self.name = quantity.name
+
+        if self.description is None:
+            self.description = quantity.description
+
+        if prefix is not None:
+            self.qualified_name = '%s.%s' % (prefix, self.name)
+            if self.es_quantity is not None:
+                self.es_quantity = '%s.%s' % (prefix, self.es_quantity)
+            if self.metric_name is not None:
+                self.metric_name = '%s.%s' % (prefix, self.metric_name)
+            if self.group is not None:
+                self.group = '%s.%s' % (prefix, self.group)
+        else:
+            self.qualified_name = self.name
+
+        if self.es_quantity is None:
+            self.es_quantity = self.qualified_name
+        if self.metric_name is None and self.metric is not None:
+            self.metric_name = self.qualified_name
+
+    @property
+    def argparse_action(self):
+        if self.many_or is not None:
+            return self.many_or
+
+        if self.many_and is not None:
+            return self.many_and
+
+        return None
+
+    @property
+    def many(self):
+        return self.many_and is not None or self.many_or is not None
+
+
+def init(section: metainfo.MSection):
+    pass
diff --git a/nomad/normalizing/__init__.py b/nomad/normalizing/__init__.py
index 20502f9951d5738732309ee998d667a7c654f207..cd527d48da12258045901808535e62c8baf18517 100644
--- a/nomad/normalizing/__init__.py
+++ b/nomad/normalizing/__init__.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-"""
+'''
 After parsing calculations have to be normalized with a set of *normalizers*.
 In NOMAD-coe those were programmed in python (we'll reuse) and scala (we'll rewrite).
 
@@ -29,7 +29,7 @@ There is one ABC for all normalizer:
 
 .. autoclass::nomad.normalizing.normalizer.Normalizer
     :members:
-"""
+'''
 
 from typing import List, Any, Iterable, Type
 
diff --git a/nomad/normalizing/data/springer_msgpack.py b/nomad/normalizing/data/springer_msgpack.py
index b0590f20dafcb54dfd6f4c98279e098330f73b9a..9a9bfdda45998ee9a24a7da09cec2d2744ad77f8 100644
--- a/nomad/normalizing/data/springer_msgpack.py
+++ b/nomad/normalizing/data/springer_msgpack.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-"""
+'''
 Generates and queries a msgpack database of springer-related quantities downloaded from
 http://materials.springer.com. The database is stuctured as
 
@@ -21,7 +21,7 @@ space_group_number : normalized_formula : springer_id : entry
 The msgpack file can be queried using ArchiveFileDB.
 
 The html parser was taken from a collection of scripts from FHI without further testing.
-"""
+'''
 
 import requests
 import re
@@ -89,9 +89,9 @@ def normalize_formula(formula_str: str) -> str:
 
 
 def parse(htmltext: str) -> Dict[str, str]:
-    """
+    '''
     Parser the quantities in required_items from an html text.
-    """
+    '''
     soup = BeautifulSoup(htmltext, "html.parser")
     results = {}
     for item in soup.find_all(attrs={"class": "data-list__item"}):
@@ -158,10 +158,10 @@ def _download(path: str, max_n_query: int = 10) -> str:
 
 
 def download_springer_data(max_n_query: int = 10):
-    """
+    '''
     Downloads the springer quantities related to a structure from springer and updates
     database.
-    """
+    '''
     # load database
     # querying database with unvailable dataset leads to error,
     # get toc keys first by making an empty query
@@ -219,9 +219,9 @@ def download_springer_data(max_n_query: int = 10):
 
 
 def query_springer_data(normalized_formula: str, space_group_number: int) -> Dict[str, Any]:
-    """
+    '''
     Queries a msgpack database for springer-related quantities.
-    """
+    '''
     entries = query_archive(DB_NAME, {str(space_group_number): {normalized_formula: '*'}})
     db_dict = {}
     entries = entries.get(str(space_group_number), {}).get(normalized_formula, {})
diff --git a/nomad/normalizing/normalizer.py b/nomad/normalizing/normalizer.py
index 2ca1efbf906e3479aa0aa5080537f5006b494e78..ea0d70f2bfe77e8b5945f97ffae19c91464ae9ab 100644
--- a/nomad/normalizing/normalizer.py
+++ b/nomad/normalizing/normalizer.py
@@ -20,16 +20,16 @@ from nomad.utils import get_logger
 
 
 class Normalizer(metaclass=ABCMeta):
-    """
+    '''
     A base class for normalizers. Normalizers work on a :class:`AbstractParserBackend` instance
     for read and write. Normalizer instances are reused.
 
     Arguments:
         backend: The backend used to read and write data from and to.
-    """
+    '''
 
     domain = 'dft'
-    """ The domain this normalizer should be used in. Default for all normalizer is 'DFT'. """
+    ''' The domain this normalizer should be used in. Default for all normalizer is 'DFT'. '''
 
     def __init__(self, backend: AbstractParserBackend) -> None:
         self._backend = backend
@@ -42,7 +42,7 @@ class Normalizer(metaclass=ABCMeta):
 
 
 class SystemBasedNormalizer(Normalizer, metaclass=ABCMeta):
-    """
+    '''
     A normalizer base class for normalizers that only touch a section_system.
 
     The normalizer is run on all section systems in a run. However, some systems,
@@ -51,7 +51,7 @@ class SystemBasedNormalizer(Normalizer, metaclass=ABCMeta):
 
     Args:
         only_representatives: Will only normalize the `representative` systems.
-    """
+    '''
     def __init__(self, backend: AbstractParserBackend, only_representatives: bool = False):
         super().__init__(backend)
         self.only_representatives = only_representatives
@@ -78,15 +78,15 @@ class SystemBasedNormalizer(Normalizer, metaclass=ABCMeta):
 
     @abstractmethod
     def normalize_system(self, section_system_index: int, is_representative: bool) -> bool:
-        """ Normalize the given section and returns True, iff successful"""
+        ''' Normalize the given section and returns True, iff successful'''
         pass
 
     def __representative_system(self):
-        """Used to select a representative system for this entry.
+        '''Used to select a representative system for this entry.
 
         Attempt to find a single section_system that is representative for the
         entry. The selection depends on the type of calculation.
-        """
+        '''
         # Try to find a frame sequence, only first found is considered
         try:
             frame_seq = self._backend['section_frame_sequence'][0]
diff --git a/nomad/normalizing/optimade.py b/nomad/normalizing/optimade.py
index 114b2064143fd6b0d463f193a82cad7521e22797..d0ddf643ac1f4e497cc831d48645c4fcec926314 100644
--- a/nomad/normalizing/optimade.py
+++ b/nomad/normalizing/optimade.py
@@ -28,19 +28,19 @@ species_re = re.compile(r'^([A-Z][a-z]?)(\d*)$')
 
 class OptimadeNormalizer(SystemBasedNormalizer):
 
-    """
+    '''
     This normalizer performs all produces a section all data necessary for the Optimade API.
     It assumes that the :class:`SystemNormalizer` was run before.
-    """
+    '''
     def __init__(self, backend):
         super().__init__(backend, only_representatives=True)
 
     def get_optimade_data(self, index) -> OptimadeEntry:
-        """
+        '''
         The 'main' method of this :class:`SystemBasedNormalizer`.
         Normalizes the section with the given `index`.
         Normalizes geometry, classifies, system_type, and runs symmetry analysis.
-        """
+        '''
         optimade = OptimadeEntry()
 
         def get_value(key: str, default: Any = None, numpy: bool = False, unit=None) -> Any:
diff --git a/nomad/normalizing/structure.py b/nomad/normalizing/structure.py
index 5632d16f118f637554468df4db012878f40079b2..b1661c56616d89882101259a0cf01694e1f524e9 100644
--- a/nomad/normalizing/structure.py
+++ b/nomad/normalizing/structure.py
@@ -32,7 +32,7 @@ if old_symmetry_tolerance != symmetry_tolerance:
 
 
 def get_normalized_wyckoff(atomic_numbers: np.array, wyckoff_letters: np.array) -> Dict[str, Dict[str, int]]:
-    """Returns a normalized Wyckoff sequence for the given atomic numbers and
+    '''Returns a normalized Wyckoff sequence for the given atomic numbers and
     corresponding wyckoff letters. In a normalized sequence the chemical
     species are "anonymized" by replacing them with upper case alphabets.
 
@@ -45,7 +45,7 @@ def get_normalized_wyckoff(atomic_numbers: np.array, wyckoff_letters: np.array)
         dictionary. The dictionary contains the number of atoms for each
         species, where the species names have been anomymized in the form
         "X_<index>".
-    """
+    '''
     # Count the occurrence of each chemical species
     atom_count: Dict[int, int] = {}
     for atomic_number in atomic_numbers:
@@ -106,7 +106,7 @@ def get_normalized_wyckoff(atomic_numbers: np.array, wyckoff_letters: np.array)
 
 
 def search_aflow_prototype(space_group: int, norm_wyckoff: dict) -> dict:
-    """Searches the AFLOW prototype library for a match for the given space
+    '''Searches the AFLOW prototype library for a match for the given space
     group and normalized Wyckoff sequence. The normalized Wyckoff sequence is
     assumed to come from the MatID symmetry routine.
 
@@ -121,7 +121,7 @@ def search_aflow_prototype(space_group: int, norm_wyckoff: dict) -> dict:
 
     Returns:
         Dictionary containing the AFLOW prototype information.
-    """
+    '''
     structure_type_info = None
     type_descriptions = aflow_prototypes["prototypes_by_spacegroup"].get(space_group, [])
     for type_description in type_descriptions:
diff --git a/nomad/normalizing/system.py b/nomad/normalizing/system.py
index e91043ab88c15176e26e35edacce3136a8a1ceca..eaca47f82f3ea461d9765fb63234b957f1d4eb86 100644
--- a/nomad/normalizing/system.py
+++ b/nomad/normalizing/system.py
@@ -40,10 +40,10 @@ springer_db_connection = None
 
 
 def open_springer_database():
-    """
+    '''
     Create a global connection to the Springer database in a way that
     each worker opens the database just once.
-    """
+    '''
     global springer_db_connection
     if springer_db_connection is None:
         # filepath definition in 'nomad-FAIR/nomad/config.py'
@@ -59,22 +59,22 @@ def open_springer_database():
 
 
 def normalized_atom_labels(atom_labels):
-    """
+    '''
     Normalizes the given atom labels: they either are labels right away, or contain
     additional numbers (to distinguish same species but different labels, see meta-info),
     or we replace them with ase placeholder atom for unknown elements 'X'.
-    """
+    '''
     return [
         ase.data.chemical_symbols[0] if match is None else match.group(0)
         for match in [re.search(atom_label_re, atom_label) for atom_label in atom_labels]]
 
 
 def formula_normalizer(atoms):
-    """
+    '''
     Reads the chemical symbols in ase.atoms and returns a normalized formula.
     Formula normalization is on the basis of atom counting,
     e.g., Tc ->  Tc100, SZn -> S50Zn50, Co2Nb -> Co67Nb33
-    """
+    '''
     #
     chem_symb = atoms.get_chemical_symbols()
     atoms_counter = Counter(chem_symb)  # dictionary
@@ -91,10 +91,10 @@ def formula_normalizer(atoms):
 
 class SystemNormalizer(SystemBasedNormalizer):
 
-    """
+    '''
     This normalizer performs all system (atoms, cells, etc.) related normalizations
     of the legacy NOMAD-coe *stats* normalizer.
-    """
+    '''
 
     @staticmethod
     def atom_label_to_num(atom_label):
@@ -109,13 +109,13 @@ class SystemNormalizer(SystemBasedNormalizer):
         return 0
 
     def normalize_system(self, index, is_representative) -> bool:
-        """
+        '''
         The 'main' method of this :class:`SystemBasedNormalizer`.
         Normalizes the section with the given `index`.
         Normalizes geometry, classifies, system_type, and runs symmetry analysis.
 
         Returns: True, iff the normalization was successful
-        """
+        '''
 
         def get_value(key: str, default: Any = None, numpy: bool = True) -> Any:
             try:
@@ -262,13 +262,13 @@ class SystemNormalizer(SystemBasedNormalizer):
         return True
 
     def system_type_analysis(self, atoms: Atoms) -> None:
-        """
+        '''
         Determine the system type with MatID. Write the system type to the
         backend.
 
         Args:
             atoms: The structure to analyse
-        """
+        '''
         system_type = config.services.unavailable_value
         if atoms.get_number_of_atoms() <= config.normalize.system_classification_with_clusters_threshold:
             try:
@@ -297,7 +297,7 @@ class SystemNormalizer(SystemBasedNormalizer):
         self._backend.addValue('system_type', system_type)
 
     def symmetry_analysis(self, atoms) -> None:
-        """Analyze the symmetry of the material being simulated.
+        '''Analyze the symmetry of the material being simulated.
 
         We feed in the parsed values in section_system to the
         the symmetry analyzer. We then use the Matid library
@@ -312,7 +312,7 @@ class SystemNormalizer(SystemBasedNormalizer):
         Returns:
             None: The method should write symmetry variables
             to the backend which is member of this class.
-        """
+        '''
         # Try to use Matid's symmetry analyzer to analyze the ASE object.
         try:
             symm = SymmetryAnalyzer(atoms, symmetry_tol=config.normalize.symmetry_tolerance)
@@ -410,7 +410,7 @@ class SystemNormalizer(SystemBasedNormalizer):
 
             # SQL QUERY
             # (this replaces the four queries done in the old 'classify4me_SM_normalizer.py')
-            cur.execute("""
+            cur.execute('''
                 SELECT
                     entry.entry_id,
                     entry.alphabetic_formula,
@@ -425,7 +425,7 @@ class SystemNormalizer(SystemBasedNormalizer):
                 LEFT JOIN reference ON reference.reference_nr = er.entry_nr
                 WHERE entry.normalized_formula = ( %r ) and entry.space_group_number = '%d'
                 GROUP BY entry.entry_id;
-                """ % (normalized_formula, space_group_number))
+                ''' % (normalized_formula, space_group_number))
 
             results = cur.fetchall()
             # 'results' is a list of tuples, i.e. '[(a,b,c,d), ..., (a,b,c,d)]'
@@ -487,14 +487,14 @@ class SystemNormalizer(SystemBasedNormalizer):
                     self.logger.warning('Mismatch in Springer classification or compounds')
 
     def prototypes(self, atom_species: np.array, wyckoffs: np.array, spg_number: int) -> None:
-        """Tries to match the material to an entry in the AFLOW prototype data.
+        '''Tries to match the material to an entry in the AFLOW prototype data.
         If a match is found, a section_prototype is added to section_system.
 
         Args:
             atomic_numbers: Array of atomic numbers.
             wyckoff_letters: Array of Wyckoff letters as strings.
             spg_number: Space group number.
-        """
+        '''
         norm_wyckoff = structure.get_normalized_wyckoff(atom_species, wyckoffs)
         protoDict = structure.search_aflow_prototype(spg_number, norm_wyckoff)
         if protoDict is not None:
diff --git a/nomad/parsing/__init__.py b/nomad/parsing/__init__.py
index 4144ce44a4cb306f1c7b423e338bb01ea35cd074..7327895df2314156ddd87da4f552763f5eb1a72a 100644
--- a/nomad/parsing/__init__.py
+++ b/nomad/parsing/__init__.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-"""
+'''
 The *parsing* module is an interface for the existing NOMAD-coe parsers.
 This module redefines some of the old NOMAD-coe python-common functionality to create a
 more coherent interface to the parsers.
@@ -69,7 +69,7 @@ based on NOMAD-coe's *python-common* module.
     :members:
 .. autoclass:: nomad.parsing.LocalBackend
     :members:
-"""
+'''
 
 from typing import Callable, IO, Union, Dict
 import magic
@@ -96,7 +96,7 @@ encoding_magic = magic.Magic(mime_encoding=True)
 
 
 def match_parser(mainfile: str, upload_files: Union[str, files.StagingUploadFiles], strict=True) -> 'Parser':
-    """
+    '''
     Performs parser matching. This means it take the given mainfile and potentially
     opens it with the given callback and tries to identify a parser that can parse
     the file.
@@ -111,7 +111,7 @@ def match_parser(mainfile: str, upload_files: Union[str, files.StagingUploadFile
         strict: Only match strict parsers, e.g. no artificial parsers for missing or empty entries.
 
     Returns: The parser, or None if no parser could be matched.
-    """
+    '''
     if mainfile.startswith('.') or mainfile.startswith('~'):
         return None
 
@@ -484,7 +484,7 @@ if config.use_empty_parsers:
 
 parsers.append(BrokenParser())
 
-""" Instantiation and constructor based config of all parsers. """
+''' Instantiation and constructor based config of all parsers. '''
 
 parser_dict = {parser.name: parser for parser in parsers}  # type: ignore
-""" A dict to access parsers by name. Usually 'parsers/<...>', e.g. 'parsers/vasp'. """
+''' A dict to access parsers by name. Usually 'parsers/<...>', e.g. 'parsers/vasp'. '''
diff --git a/nomad/parsing/artificial.py b/nomad/parsing/artificial.py
index 969f8f2009a5bdd88548cd97b9afddc3d6a68619..fe85e9bf53dc8d0f996ff0c928aca1c75d2c629f 100644
--- a/nomad/parsing/artificial.py
+++ b/nomad/parsing/artificial.py
@@ -12,9 +12,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-"""
+'''
 Parser for creating artificial test, brenchmark, and demonstration data.
-"""
+'''
 
 import json
 import os.path
@@ -40,7 +40,7 @@ meta_info_env, _ = loadJsonFile(filePath=meta_info_path, dependencyLoader=None,
 
 
 class ArtificalParser(Parser):
-    """ Base class for artifical parsers based on VASP metainfo. """
+    ''' Base class for artifical parsers based on VASP metainfo. '''
     def __init__(self):
         super().__init__()
         self.backend = None
@@ -54,9 +54,9 @@ class ArtificalParser(Parser):
 
 
 class EmptyParser(MatchingParser):
-    """
+    '''
     Implementation that produces an empty code_run
-    """
+    '''
     def run(self, mainfile: str, logger=None) -> LocalBackend:
         backend = LocalBackend(metaInfoEnv=meta_info_env, debug=False)  # type: ignore
         backend.openSection('section_run')
@@ -66,10 +66,10 @@ class EmptyParser(MatchingParser):
 
 
 class TemplateParser(ArtificalParser):
-    """
+    '''
     A parser that generates data based on a template given via the
     mainfile. The template is basically some archive json. Only
-    """
+    '''
     name = 'parsers/template'
 
     def is_mainfile(
@@ -78,11 +78,11 @@ class TemplateParser(ArtificalParser):
         return filename.endswith('template.json')
 
     def transform_value(self, name, value):
-        """ allow subclasses to modify values """
+        ''' allow subclasses to modify values '''
         return value
 
     def transform_section(self, name, section):
-        """ allow subclasses to modify sections """
+        ''' allow subclasses to modify sections '''
         return section
 
     def add_section(self, section):
@@ -130,7 +130,7 @@ class TemplateParser(ArtificalParser):
 
 
 class ChaosParser(ArtificalParser):
-    """
+    '''
     Parser that emulates typical error situations. Files can contain a json string (or
     object with key `chaos`) with one of the following string values:
     - exit
@@ -139,7 +139,7 @@ class ChaosParser(ArtificalParser):
     - exception
     - segfault
     - random
-    """
+    '''
     name = 'parsers/chaos'
 
     def is_mainfile(
@@ -212,7 +212,7 @@ class GenerateRandomParser(TemplateParser):
         return os.path.basename(filename).startswith('random_')
 
     def transform_section(self, name, section):
-        """ allow subclasses to modify sections """
+        ''' allow subclasses to modify sections '''
         if name == 'section_system':
             atoms = []
             atom_positions = []
diff --git a/nomad/parsing/backend.py b/nomad/parsing/backend.py
index 28b118b10a86facbf1e569aac217b582669db63f..0f25a91b546063789ea34d9d910041bd3a713794 100644
--- a/nomad/parsing/backend.py
+++ b/nomad/parsing/backend.py
@@ -55,124 +55,124 @@ class WrongContextState(Exception):
 
 
 class AbstractParserBackend(metaclass=ABCMeta):
-    """
+    '''
     This ABS provides the parser backend interface used by the NOMAD-coe parsers
     and normalizers.
-    """
+    '''
     @abstractmethod
     def metaInfoEnv(self):
-        """ Returns the meta info used by this backend. """
+        ''' Returns the meta info used by this backend. '''
         pass
 
     @abstractmethod
     def startedParsingSession(
             self, mainFileUri, parserInfo, parserStatus=None, parserErrors=None):
-        """
+        '''
         Should be called when the parsing starts.
         ParserInfo should be a valid json dictionary.
-        """
+        '''
         pass
 
     @abstractmethod
     def finishedParsingSession(
             self, parserStatus, parserErrors, mainFileUri=None, parserInfo=None,
             parsingStats=None):
-        """ Called when the parsing finishes. """
+        ''' Called when the parsing finishes. '''
         pass
 
     @abstractmethod
     def openContext(self, contextUri: str):
-        """ Open existing archive data to introduce new data into an existing section. """
+        ''' Open existing archive data to introduce new data into an existing section. '''
         pass
 
     @abstractmethod
     def closeContext(self, contextUri: str):
-        """ Close priorly opened existing archive data again. """
+        ''' Close priorly opened existing archive data again. '''
         pass
 
     @abstractmethod
     def openSection(self, metaName, parent_index=-1):
-        """ Opens a new section and returns its new unique gIndex. """
+        ''' Opens a new section and returns its new unique gIndex. '''
         pass
 
     @abstractmethod
     def closeSection(self, metaName, gIndex):
-        """
+        '''
         Closes the section with the given meta name and index. After this, no more
         value can be added to this section.
-        """
+        '''
         pass
 
     @abstractmethod
     def openNonOverlappingSection(self, metaName):
-        """ Opens a new non overlapping section. """
+        ''' Opens a new non overlapping section. '''
         pass
 
     @abstractmethod
     def setSectionInfo(self, metaName, gIndex, references):
-        """
+        '''
         Sets info values of an open section references should be a dictionary with the
         gIndexes of the root sections this section refers to.
-        """
+        '''
         pass
 
     @abstractmethod
     def closeNonOverlappingSection(self, metaName):
-        """
+        '''
         Closes the current non overlapping section for the given meta name. After
         this, no more value can be added to this section.
-        """
+        '''
         pass
 
     @abstractmethod
     def openSections(self):
-        """ Returns the sections that are still open as metaName, gIndex tuples. """
+        ''' Returns the sections that are still open as metaName, gIndex tuples. '''
         pass
 
     @abstractmethod
     def addValue(self, metaName, value, gIndex=-1):
-        """
+        '''
         Adds a json value for the given metaName. The gIndex is used to identify
         the right parent section.
-        """
+        '''
         pass
 
     @abstractmethod
     def addRealValue(self, metaName, value, gIndex=-1):
-        """
+        '''
         Adds a float value for the given metaName. The gIndex is used to identify
         the right parent section.
-        """
+        '''
         pass
 
     @abstractmethod
     def addArray(self, metaName, shape, gIndex=-1):
-        """
+        '''
         Adds an unannitialized array of the given shape for the given metaName.
         The gIndex is used to identify the right parent section.
         This is neccessary before array values can be set with :func:`setArrayValues`.
-        """
+        '''
 
     @abstractmethod
     def setArrayValues(self, metaName, values, offset=None, gIndex=-1):
-        """
+        '''
         Adds values of the given numpy array to the last array added for the given
         metaName and parent gIndex.
-        """
+        '''
         pass
 
     @abstractmethod
     def addArrayValues(self, metaName, values, gIndex=-1, override: bool = False):
-        """
+        '''
         Adds an array with the given numpy array values for the given metaName and
         parent section gIndex. Override determines whether to rewrite exisiting values
         in the backend.
-        """
+        '''
         pass
 
     @abstractmethod
     def pwarn(self, msg):
-        """ Used to catch parser warnings. """
+        ''' Used to catch parser warnings. '''
         pass
 
     # The following are extensions to the origin NOMAD-coe parser backend. And allow
@@ -185,34 +185,34 @@ class AbstractParserBackend(metaclass=ABCMeta):
 
     @abstractmethod
     def get_sections(self, meta_name: str, g_index: int = -1) -> List[int]:
-        """ Return all gIndices for existing sections of the given meta_name and parent section index. """
+        ''' Return all gIndices for existing sections of the given meta_name and parent section index. '''
         pass
 
     @abstractmethod
     def get_value(self, metaName: str, g_index=-1) -> Any:
-        """
+        '''
         Return the value set to the given meta_name in its parent section of the given index.
         An index of -1 (default) is only allowed if there is exactly one parent section.
-        """
+        '''
         pass
 
     def write_json(
             self, out: TextIO, pretty=True, filter: Callable[[str, Any], Any] = None,
             root_sections: List[str] = ['section_run', 'section_entry_info']):
-        """ Writes the backend contents. """
+        ''' Writes the backend contents. '''
         pass
 
     def add_mi2_section(self, section: MSection):
-        """ Allows to mix a metainfo2 style section into backend. """
+        ''' Allows to mix a metainfo2 style section into backend. '''
         pass
 
     def get_mi2_section(self, section_def: MI2Section):
-        """ Allows to mix a metainfo2 style section into backend. """
+        ''' Allows to mix a metainfo2 style section into backend. '''
         pass
 
     def traverse(self, *args, **kwargs) -> Iterable[Tuple[str, str, Any]]:
-        """ Traverses the backend data and yiels tuples with metainfo name, event type,
-        and value """
+        ''' Traverses the backend data and yiels tuples with metainfo name, event type,
+        and value '''
         pass
 
 
@@ -222,7 +222,7 @@ class JSONStreamWriter():
     ARRAY = 2
     KEY_VALUE = 3
 
-    """
+    '''
     A generator that allows to output JSON based on calling 'event' functions.
     Its pure python and could be replaced by some faster implementation, e.g. yajl-py.
     It uses standard json decode to write values. This allows to mix streaming with
@@ -236,7 +236,7 @@ class JSONStreamWriter():
     Raises:
         AssertionError: If methods were called in a non JSON fashion. Call :func:`close`
         to make sure everything was closed properly.
-    """
+    '''
     def __init__(self, file, pretty=False):
         self._fp = file
         self._pretty = pretty
@@ -335,10 +335,10 @@ class JSONStreamWriter():
 
 
 class LegacyParserBackend(AbstractParserBackend):
-    """
+    '''
     Partial implementation of :class:`AbstractParserBackend` that implements some
     methods that are independent from the core backend implementation.
-    """
+    '''
     def __init__(self, logger):
         self.logger = logger if logger is not None else get_logger(__name__)
 
@@ -365,10 +365,10 @@ class LegacyParserBackend(AbstractParserBackend):
             self._warnings.append('There are more warnings, check the processing logs.')
 
     def _parse_context_uri(self, context_uri: str) -> Tuple[str, int]:
-        """
+        '''
         Returns the last segment of the given context uri, i.e. the section that
         constitutes the context.
-        """
+        '''
         path_str = re.sub(r'^(nmd://[^/]+/[^/]+)?/', '', context_uri, count=1)
         path = path_str.split('/')[::-1]  # reversed path via extended slice syntax
 
@@ -388,7 +388,7 @@ class LegacyParserBackend(AbstractParserBackend):
 
     @property
     def status(self) -> ParserStatus:
-        """ Returns status and potential errors. """
+        ''' Returns status and potential errors. '''
         return (self._status, self._errors)
 
     def reset_status(self) -> None:
@@ -398,12 +398,12 @@ class LegacyParserBackend(AbstractParserBackend):
 
 
 class LocalBackend(LegacyParserBackend, metaclass=DelegatingMeta):
-    """
+    '''
     This implementation of :class:`AbstractParserBackend` is a extended version of
     NOMAD-coe's ``LocalBackend`` that allows to write the results in an *archive*-style .json.
     It can be used like the original thing, but also allows to output archive JSON
     after parsing via :func:`write_json`.
-    """
+    '''
     def __init__(self, *args, **kwargs):
         logger = kwargs.pop('logger', None)
         super().__init__(logger=logger)
@@ -417,7 +417,7 @@ class LocalBackend(LegacyParserBackend, metaclass=DelegatingMeta):
         return self.data[metaname]
 
     def __getattr__(self, name):
-        """ Support for unimplemented and unexpected methods. """
+        ''' Support for unimplemented and unexpected methods. '''
         if name not in self._known_attributes and self._unknown_attributes.get(name) is None:
             self.logger.debug('Access of unexpected backend attribute/method', attribute=name)
             self._unknown_attributes[name] = name
@@ -425,11 +425,11 @@ class LocalBackend(LegacyParserBackend, metaclass=DelegatingMeta):
         return getattr(self._delegate, name)
 
     def add_mi2_section(self, section: MSection):
-        """ Allows to mix a metainfo2 style section into backend. """
+        ''' Allows to mix a metainfo2 style section into backend. '''
         self.mi2_data[section.m_def.name] = section
 
     def get_mi2_section(self, section_def: MI2Section):
-        """ Allows to mix a metainfo2 style section into backend. """
+        ''' Allows to mix a metainfo2 style section into backend. '''
         return self.mi2_data.get(section_def.name, None)
 
     def finishedParsingSession(self, *args, **kwargs):
@@ -558,7 +558,7 @@ class LocalBackend(LegacyParserBackend, metaclass=DelegatingMeta):
     def write_json(
             self, out: TextIO, pretty=True, filter: Callable[[str, Any], Any] = None,
             root_sections: List[str] = ['section_run', 'section_entry_info']):
-        """
+        '''
         Writes the results stored in the backend after parsing in an 'archive'.json
         style format.
 
@@ -566,7 +566,7 @@ class LocalBackend(LegacyParserBackend, metaclass=DelegatingMeta):
             out: The file-like that is used to write the json to.
             pretty: Format the json or not.
             filter: Optional filter that takes metaname, value pairs and returns a new value.
-        """
+        '''
         json_writer = JSONStreamWriter(out, pretty=pretty)
         json_writer.open_object()
 
diff --git a/nomad/parsing/metainfo.py b/nomad/parsing/metainfo.py
index 618b1bd8638b4d0100ac4cdfd2a4295e1c5edb1a..8db70c5b27c330b5950fd6918c380a210c3664bb 100644
--- a/nomad/parsing/metainfo.py
+++ b/nomad/parsing/metainfo.py
@@ -29,7 +29,7 @@ from .backend import LegacyParserBackend
 
 
 class MetainfoBackend(LegacyParserBackend):
-    """ A backend that uses the new metainfo to store all data. """
+    ''' A backend that uses the new metainfo to store all data. '''
 
     def __init__(self, env: LegacyMetainfoEnvironment, logger=None):
         super().__init__(logger=logger)
@@ -69,22 +69,22 @@ class MetainfoBackend(LegacyParserBackend):
         return current
 
     def openContext(self, context_uri: str):
-        """ Open existing archive data to introduce new data into an existing section. """
+        ''' Open existing archive data to introduce new data into an existing section. '''
         resolved = self.resolve_context(context_uri)
         self.open_sections_by_def.setdefault(resolved.m_def, []).append(resolved)
 
     def closeContext(self, context_uri: str):
-        """ Close priorly opened existing archive data again. """
+        ''' Close priorly opened existing archive data again. '''
         resolved = self.resolve_context(context_uri)
         self.open_sections_by_def.setdefault(resolved.m_def, []).remove(resolved)
 
     def openSection(self, name):
-        """
+        '''
         It will assume that there is a sub-section def with the given name.
         It will use the latest opened section of the sub-sections parent as the parent
         for the new section.
         An Exception will be known root sections, e.g. 'section_run'.
-        """
+        '''
         if name in ['section_run', 'section_entry_info']:
             section_def = self.env.resolve_definition(name, Section)
             sub_section = self.resource.create(section_def.section_cls)
@@ -108,7 +108,7 @@ class MetainfoBackend(LegacyParserBackend):
         return sub_section.m_parent_index
 
     def get_open_section_for_quantity(self, name, g_index):
-        """ Returns the open section that contains the quantity of the given name. """
+        ''' Returns the open section that contains the quantity of the given name. '''
         quantity_def = self.env.resolve_definition(name, Quantity)
         section_def = quantity_def.m_parent_as(Section)
         sections = self.open_sections_by_def.get(section_def, [])
@@ -149,10 +149,10 @@ class MetainfoBackend(LegacyParserBackend):
         return self.openSection(metaName)
 
     def setSectionInfo(self, metaName, gIndex, references):
-        """
+        '''
         Sets info values of an open section references should be a dictionary with the
         gIndexes of the root sections this section refers to.
-        """
+        '''
         # TODO might be necessary to make references work?
         pass
 
@@ -160,7 +160,7 @@ class MetainfoBackend(LegacyParserBackend):
         return self.closeSection(name, -1)
 
     def openSections(self):
-        """ Returns the sections that are still open as metaName, gIndex tuples. """
+        ''' Returns the sections that are still open as metaName, gIndex tuples. '''
         for section_def, sub_sections in self.open_sections_by_def:
             for sub_section in sub_sections:
                 yield section_def.name, sub_section.m_parent_index
@@ -187,26 +187,26 @@ class MetainfoBackend(LegacyParserBackend):
         self.addValue(name, value, g_index)
 
     def addArray(self, name, shape, g_index=-1):
-        """
+        '''
         Adds an unannitialized array of the given shape for the given metaName.
         The gIndex is used to identify the right parent section.
         This is neccessary before array values can be set with :func:`setArrayValues`.
-        """
+        '''
         raise NotImplementedError()
 
     def setArrayValues(self, metaName, values, offset=None, gIndex=-1):
-        """
+        '''
         Adds values of the given numpy array to the last array added for the given
         metaName and parent gIndex.
-        """
+        '''
         raise NotImplementedError()
 
     def addArrayValues(self, name, values, g_index=-1, override: bool = False):
-        """
+        '''
         Adds an array with the given numpy array values for the given metaName and
         parent section gIndex. Override determines whether to rewrite exisiting values
         in the backend.
-        """
+        '''
         section, quantity_def = self.get_open_section_for_quantity(name, g_index)
         if isinstance(quantity_def.type, Reference):
             # quantity is a reference
@@ -239,17 +239,17 @@ class MetainfoBackend(LegacyParserBackend):
             'This method does not make sense in the context of the new metainfo.')
 
     def get_sections(self, meta_name: str, g_index: int = -1) -> List[int]:
-        """ Return all gIndices for existing sections of the given meta_name and parent index. """
+        ''' Return all gIndices for existing sections of the given meta_name and parent index. '''
         section_def = self.env.resolve_definition(meta_name, Section)
         return [
             section.m_parent_index for section in self.resource.all(section_def.section_cls)
             if g_index == -1 or section.m_parent.m_parent_index == g_index]
 
     def get_value(self, meta_name: str, g_index=-1) -> Any:
-        """
+        '''
         Return the value set to the given meta_name in its parent section of the given index.
         An index of -1 (default) is only allowed if there is exactly one parent section.
-        """
+        '''
         try:
             quantity = self.env.resolve_definition(meta_name, Quantity)
         except KeyError:
diff --git a/nomad/parsing/parser.py b/nomad/parsing/parser.py
index a9c14db93fd156ae2c7a248704d4c869ab0c7cff..b4204634ff5c3d6cd42dc8f9f6f04743cf4757ef 100644
--- a/nomad/parsing/parser.py
+++ b/nomad/parsing/parser.py
@@ -29,10 +29,10 @@ from nomad.parsing.backend import LocalBackend
 
 
 class Parser(metaclass=ABCMeta):
-    """
+    '''
     Instances specify a parser. It allows to find *main files* from  given uploaded
     and extracted files. Further, allows to run the parser on those 'main files'.
-    """
+    '''
 
     def __init__(self):
         self.domain = 'dft'
@@ -41,7 +41,7 @@ class Parser(metaclass=ABCMeta):
     def is_mainfile(
             self, filename: str, mime: str, buffer: bytes, decoded_buffer: str,
             compression: str = None) -> bool:
-        """
+        '''
         Checks if a file is a mainfile for the parsers.
 
         Arguments:
@@ -49,12 +49,12 @@ class Parser(metaclass=ABCMeta):
             mime: The mimetype of the mainfile guessed with libmagic
             buffer: The first 2k of the mainfile contents
             compression: The compression of the mainfile ``[None, 'gz', 'bz2']``
-        """
+        '''
         pass
 
     @abstractmethod
     def run(self, mainfile: str, logger=None) -> LocalBackend:
-        """
+        '''
         Runs the parser on the given mainfile. It uses :class:`LocalBackend` as
         a backend. The meta-info access is handled by the underlying NOMAD-coe parser.
 
@@ -64,14 +64,14 @@ class Parser(metaclass=ABCMeta):
 
         Returns:
             The used :class:`LocalBackend` with status information and result data.
-        """
+        '''
 
 
 class BrokenParser(Parser):
-    """
+    '''
     A parser implementation that just fails and is used to match mainfiles with known
     patterns of corruption.
-    """
+    '''
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
         self.name = 'parser/broken'
@@ -97,7 +97,7 @@ class BrokenParser(Parser):
 
 
 class MatchingParser(Parser):
-    """
+    '''
     A parser implementation that used regular experessions to match mainfiles.
 
     Arguments:
@@ -107,7 +107,7 @@ class MatchingParser(Parser):
         mainfile_name_re: A regexp that is used to match the paths of potential mainfiles
         domain: The domain that this parser should be used for. Default is 'dft'.
         supported_compressions: A list of [gz, bz2], if the parser supports compressed files
-    """
+    '''
     def __init__(
             self, name: str, code_name: str,
             mainfile_contents_re: str = None,
@@ -153,10 +153,10 @@ class MatchingParser(Parser):
 
 
 class MissingParser(MatchingParser):
-    """
+    '''
     A parser implementation that just fails and is used to match mainfiles with known
     patterns of corruption.
-    """
+    '''
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
 
@@ -165,14 +165,14 @@ class MissingParser(MatchingParser):
 
 
 class LegacyParser(MatchingParser):
-    """
+    '''
     A parser implementation for legacy NOMAD-coe parsers. It assumes that parsers
     are installed to the python environment.
 
     Arguments:
         parser_class_name: the main parser class that implements NOMAD-coe's
         backend_factory: a callable that returns a backend, takes meta_info and logger as argument
-    """
+    '''
     def __init__(self, parser_class_name: str, *args, backend_factory=None, **kwargs) -> None:
         super().__init__(*args, **kwargs)
 
@@ -212,11 +212,11 @@ class LegacyParser(MatchingParser):
 
 
 class VaspOutcarParser(LegacyParser):
-    """
+    '''
     LegacyParser that only matches mailfiles, if there is no .xml in the
     same directory, i.e. to use the VASP OUTCAR parser in absence of .xml
     output file.
-    """
+    '''
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
         self.name = 'parsers/vaspoutcar'
diff --git a/nomad/processing/__init__.py b/nomad/processing/__init__.py
index 4454cdbc60f56ba267f741b804d5d7de86c11dbc..34cd30e2e9ad969498f7d486c937adec238e8e02 100644
--- a/nomad/processing/__init__.py
+++ b/nomad/processing/__init__.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-"""
+'''
 Processing comprises everything that is necessary to take an uploaded user file,
 processes it, and store all necessary data for *repository*, *archive*, and potential
 future services (e.g. *encyclopedia*).
@@ -54,7 +54,7 @@ classes do represent the processing state, as well as the respective entity.
     :members:
 .. autoclass:: nomad.processing.data.Calc
     :members:
-"""
+'''
 
 from nomad.processing.base import app, InvalidId, ProcNotRegistered, SUCCESS, FAILURE, \
     RUNNING, PENDING, PROCESS_COMPLETED, PROCESS_RUNNING, ProcessAlreadyRunning
diff --git a/nomad/processing/base.py b/nomad/processing/base.py
index 0fd5c3dc9b17a95aac3c946812a8741934de4a11..3dc11be8791e83a41a0ff1ca623e6899f1e563ea 100644
--- a/nomad/processing/base.py
+++ b/nomad/processing/base.py
@@ -107,7 +107,7 @@ class ProcMetaclass(TopLevelDocumentMetaclass):
 
 
 class Proc(Document, metaclass=ProcMetaclass):
-    """
+    '''
     Base class for objects that are involved in processing and need persistent processing
     state.
 
@@ -133,14 +133,14 @@ class Proc(Document, metaclass=ProcMetaclass):
         complete_time: the time that processing completed (successfully or not)
         current_process: the currently or last run asyncronous process
         process_status: the status of the currently or last run asyncronous process
-    """
+    '''
 
     meta: Any = {
         'abstract': True,
     }
 
     tasks: List[str] = None
-    """ the ordered list of tasks that comprise a processing run """
+    ''' the ordered list of tasks that comprise a processing run '''
 
     current_task = StringField(default=None)
     tasks_status = StringField(default=CREATED)
@@ -158,17 +158,17 @@ class Proc(Document, metaclass=ProcMetaclass):
 
     @property
     def tasks_running(self) -> bool:
-        """ Returns True of the process has failed or succeeded. """
+        ''' Returns True of the process has failed or succeeded. '''
         return self.tasks_status not in [SUCCESS, FAILURE]
 
     @property
     def process_running(self) -> bool:
-        """ Returns True of an asynchrounous process is currently running. """
+        ''' Returns True of an asynchrounous process is currently running. '''
         return self.process_status is not None and self.process_status != PROCESS_COMPLETED
 
     @classmethod
     def process_running_mongoengine_query(cls):
-        """ Returns a mongoengine query dict (to be used in objects) to find running processes. """
+        ''' Returns a mongoengine query dict (to be used in objects) to find running processes. '''
         return dict(process_status__in=[PROCESS_CALLED, PROCESS_RUNNING])
 
     def get_logger(self):
@@ -179,9 +179,9 @@ class Proc(Document, metaclass=ProcMetaclass):
 
     @classmethod
     def create(cls, **kwargs):
-        """ Factory method that must be used instead of regular constructor. """
+        ''' Factory method that must be used instead of regular constructor. '''
         assert 'tasks_status' not in kwargs, \
-            """ do not set the status manually, its managed """
+            ''' do not set the status manually, its managed '''
 
         kwargs.setdefault('create_time', datetime.utcnow())
         self = cls(**kwargs)
@@ -194,7 +194,7 @@ class Proc(Document, metaclass=ProcMetaclass):
         return self
 
     def reset(self, worker_hostname: str = None):
-        """ Resets the task chain. Assumes there no current running process. """
+        ''' Resets the task chain. Assumes there no current running process. '''
         assert not self.process_running
 
         self.current_task = None
@@ -206,7 +206,7 @@ class Proc(Document, metaclass=ProcMetaclass):
 
     @classmethod
     def reset_pymongo_update(cls, worker_hostname: str = None):
-        """ Returns a pymongo update dict part to reset calculations. """
+        ''' Returns a pymongo update dict part to reset calculations. '''
         return dict(
             current_task=None, process_status=None, tasks_status=PENDING, errors=[], warnings=[],
             worker_hostname=worker_hostname)
@@ -244,7 +244,7 @@ class Proc(Document, metaclass=ProcMetaclass):
             logger.critical(msg, **kwargs)
 
     def fail(self, *errors, log_level=logging.ERROR, **kwargs):
-        """ Allows to fail the process. Takes strings or exceptions as args. """
+        ''' Allows to fail the process. Takes strings or exceptions as args. '''
         assert self.process_running or self.tasks_running, 'Cannot fail a completed process.'
 
         failed_with_exception = False
@@ -274,7 +274,7 @@ class Proc(Document, metaclass=ProcMetaclass):
         self.save()
 
     def warning(self, *warnings, log_level=logging.WARNING, **kwargs):
-        """ Allows to save warnings. Takes strings or exceptions as args. """
+        ''' Allows to save warnings. Takes strings or exceptions as args. '''
         assert self.process_running or self.tasks_running
 
         logger = self.get_logger(**kwargs)
@@ -326,30 +326,30 @@ class Proc(Document, metaclass=ProcMetaclass):
             self.get_logger().info('completed process')
 
     def on_tasks_complete(self):
-        """ Callback that is called when the list of task are completed """
+        ''' Callback that is called when the list of task are completed '''
         pass
 
     def on_process_complete(self, process_name):
-        """ Callback that is called when the corrent process completed """
+        ''' Callback that is called when the corrent process completed '''
         pass
 
     def block_until_complete(self, interval=0.01):
-        """
+        '''
         Reloads the process constantly until it sees a completed process. Should be
         used with care as it can block indefinitely. Just intended for testing purposes.
-        """
+        '''
         while self.tasks_running or self.process_running:
             time.sleep(interval)
             self.reload()
 
     @classmethod
     def process_all(cls, func, query: Dict[str, Any], exclude: List[str] = []):
-        """
+        '''
         Allows to run process functions for all objects on the given query. Calling
         process functions though the func:`process` wrapper might be slow, because
         it causes a save on each call. This function will use a query based update to
         do the same for all objects at once.
-        """
+        '''
 
         running_query = dict(cls.process_running_mongoengine_query())
         running_query.update(query)
@@ -388,14 +388,14 @@ class Proc(Document, metaclass=ProcMetaclass):
 
 
 def task(func):
-    """
+    '''
     The decorator for tasks that will be wrapped in exception handling that will fail the process.
     The task methods of a :class:`Proc` class/document comprise a sequence
     (order of methods in class namespace) of tasks. Tasks must be executed in that order.
     Completion of the last task, will put the :class:`Proc` instance into the
     SUCCESS state. Calling the first task will put it into RUNNING state. Tasks will
     only be executed, if the process has not yet reached FAILURE state.
-    """
+    '''
     @functools.wraps(func)
     def wrapper(self, *args, **kwargs):
         try:
@@ -425,20 +425,20 @@ def task(func):
 
 
 def all_subclasses(cls):
-    """ Helper method to calculate set of all subclasses of a given class. """
+    ''' Helper method to calculate set of all subclasses of a given class. '''
     return set(cls.__subclasses__()).union(
         [s for c in cls.__subclasses__() for s in all_subclasses(c)])
 
 
 all_proc_cls = {cls.__name__: cls for cls in all_subclasses(Proc)}
-""" Name dictionary for all Proc classes. """
+''' Name dictionary for all Proc classes. '''
 
 
 class NomadCeleryRequest(Request):
-    """
+    '''
     A custom celery request class that allows to catch error in the worker main
     thread, which cannot be caught on the worker threads themselves.
-    """
+    '''
 
     def _fail(self, event, **kwargs):
         args = self._payload[0]
@@ -480,9 +480,9 @@ class NomadCeleryTask(Task):
 
 
 def unwarp_task(task, cls_name, self_id, *args, **kwargs):
-    """
+    '''
     Retrieves the proc object that the given task is executed on from the database.
-    """
+    '''
     logger = utils.get_logger(__name__, cls=cls_name, id=self_id)
 
     # get the process class
@@ -521,13 +521,13 @@ def unwarp_task(task, cls_name, self_id, *args, **kwargs):
     acks_late=config.celery.acks_late, soft_time_limit=config.celery.timeout,
     time_limit=config.celery.timeout * 2)
 def proc_task(task, cls_name, self_id, func_attr):
-    """
+    '''
     The celery task that is used to execute async process functions.
     It ignores results, since all results are handled via the self document.
     It retries for 3 times with a countdown of 3 on missing 'selfs', since this
     might happen in sharded, distributed mongo setups where the object might not
     have yet been propagated and therefore appear missing.
-    """
+    '''
     self = unwarp_task(task, cls_name, self_id)
 
     logger = self.get_logger()
@@ -576,14 +576,14 @@ def proc_task(task, cls_name, self_id, func_attr):
 
 
 def process(func):
-    """
+    '''
     The decorator for process functions that will be called async via celery.
     All calls to the decorated method will result in celery task requests.
     To transfer state, the instance will be saved to the database and loading on
     the celery task worker. Process methods can call other (process) functions/methods on
     other :class:`Proc` instances. Each :class:`Proc` instance can only run one
     any process at a time.
-    """
+    '''
     @functools.wraps(func)
     def wrapper(self, *args, **kwargs):
         assert len(args) == 0 and len(kwargs) == 0, 'process functions must not have arguments'
diff --git a/nomad/processing/data.py b/nomad/processing/data.py
index 465c684af773bc69abd70f5b58bb25af1abc4110..7a58323da1798476f82a242b6a4e3a8f450f7afd 100644
--- a/nomad/processing/data.py
+++ b/nomad/processing/data.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-"""
+'''
 This module comprises a set of persistent document classes that hold all user related
 data. These are information about users, their uploads and datasets, the associated
 calculations, and files
@@ -22,9 +22,9 @@ calculations, and files
 
 .. autoclass:: Upload
 
-"""
+'''
 
-from typing import cast, List, Any, ContextManager, Tuple, Generator, Dict, cast
+from typing import cast, List, Any, ContextManager, Tuple, Generator, Dict, cast, Iterable
 from mongoengine import StringField, DateTimeField, DictField, BooleanField, IntField
 import logging
 from structlog import wrap_logger
@@ -41,7 +41,6 @@ from nomad.files import PathObject, UploadFiles, ExtractError, ArchiveBasedStagi
 from nomad.processing.base import Proc, process, task, PENDING, SUCCESS, FAILURE
 from nomad.parsing import parser_dict, match_parser, LocalBackend
 from nomad.normalizing import normalizers
-from nomad.datamodel import UploadWithMetadata
 
 
 def _pack_log_event(logger, method_name, event_dict):
@@ -66,14 +65,8 @@ _log_processors = [
     TimeStamper(fmt="%Y-%m-%d %H:%M.%S", utc=False)]
 
 
-_all_root_sections = []
-for domain in datamodel.Domain.instances.values():
-    for root_section in domain.root_sections:
-        _all_root_sections.append(root_section)
-
-
 class Calc(Proc):
-    """
+    '''
     Instances of this class represent calculations. This class manages the elastic
     search index entry, files, and archive for the respective calculation.
 
@@ -88,8 +81,8 @@ class Calc(Proc):
         upload_id: the id of the upload used to create this calculation
         mainfile: the mainfile (including path in upload) that was used to create this calc
 
-        metadata: the metadata record wit calc and user metadata, see :class:`datamodel.CalcWithMetadata`
-    """
+        metadata: the metadata record wit calc and user metadata, see :class:`datamodel.EntryMetadata`
+    '''
     calc_id = StringField(primary_key=True)
     upload_id = StringField()
     mainfile = StringField()
@@ -120,12 +113,12 @@ class Calc(Proc):
         self._calc_proc_logwriter_ctx: ContextManager = None
 
     @classmethod
-    def from_calc_with_metadata(cls, calc_with_metadata):
+    def from_entry_metadata(cls, entry_metadata):
         calc = Calc.create(
-            calc_id=calc_with_metadata.calc_id,
-            upload_id=calc_with_metadata.upload_id,
-            mainfile=calc_with_metadata.mainfile,
-            metadata=calc_with_metadata.to_dict())
+            calc_id=entry_metadata.calc_id,
+            upload_id=entry_metadata.upload_id,
+            mainfile=entry_metadata.mainfile,
+            metadata=entry_metadata.m_to_dict(include_defaults=True))
 
         return calc
 
@@ -152,10 +145,10 @@ class Calc(Proc):
         return self._upload_files
 
     def get_logger(self, **kwargs):
-        """
+        '''
         Returns a wrapped logger that additionally saves all entries to the calculation
         processing log in the archive.
-        """
+        '''
         logger = super().get_logger()
         logger = logger.bind(
             upload_id=self.upload_id, mainfile=self.mainfile, calc_id=self.calc_id, **kwargs)
@@ -189,11 +182,11 @@ class Calc(Proc):
 
     @process
     def re_process_calc(self):
-        """
+        '''
         Processes a calculation again. This means there is already metadata and
         instead of creating it initially, we are just updating the existing
         records.
-        """
+        '''
         parser = match_parser(self.mainfile, self.upload_files, strict=False)
 
         if parser is None and not config.reprocess_unmatched:
@@ -228,16 +221,16 @@ class Calc(Proc):
                 parser=parser.name)
 
         try:
-            calc_with_metadata = datamodel.CalcWithMetadata(**self.metadata)
-            calc_with_metadata.upload_id = self.upload_id
-            calc_with_metadata.calc_id = self.calc_id
-            calc_with_metadata.calc_hash = self.upload_files.calc_hash(self.mainfile)
-            calc_with_metadata.mainfile = self.mainfile
-            calc_with_metadata.nomad_version = config.version
-            calc_with_metadata.nomad_commit = config.commit
-            calc_with_metadata.last_processing = datetime.utcnow()
-            calc_with_metadata.files = self.upload_files.calc_files(self.mainfile)
-            self.metadata = calc_with_metadata.to_dict()
+            entry_metadata = datamodel.EntryMetadata.m_from_dict(self.metadata)
+            entry_metadata.upload_id = self.upload_id
+            entry_metadata.calc_id = self.calc_id
+            entry_metadata.calc_hash = self.upload_files.calc_hash(self.mainfile)
+            entry_metadata.mainfile = self.mainfile
+            entry_metadata.nomad_version = config.version
+            entry_metadata.nomad_commit = config.commit
+            entry_metadata.last_processing = datetime.utcnow()
+            entry_metadata.files = self.upload_files.calc_files(self.mainfile)
+            self.metadata = entry_metadata.m_to_dict(include_defaults=True)
 
             self.parsing()
             self.normalizing()
@@ -253,10 +246,10 @@ class Calc(Proc):
 
     @process
     def process_calc(self):
-        """
+        '''
         Processes a new calculation that has no prior records in the mongo, elastic,
         or filesystem storage. It will create an initial set of (user) metadata.
-        """
+        '''
         logger = self.get_logger()
         if self.upload is None:
             logger.error('calculation upload does not exist')
@@ -264,23 +257,23 @@ class Calc(Proc):
         try:
             # save preliminary minimum calc metadata in case processing fails
             # successful processing will replace it with the actual metadata
-            calc_with_metadata = datamodel.CalcWithMetadata(
-                domain=parser_dict[self.parser].domain,
-                upload_id=self.upload_id,
-                calc_id=self.calc_id,
-                calc_hash=self.upload_files.calc_hash(self.mainfile),
-                mainfile=self.mainfile)
-            calc_with_metadata.published = False
-            calc_with_metadata.uploader = self.upload.user_id
-            calc_with_metadata.upload_time = self.upload.upload_time
-            calc_with_metadata.upload_name = self.upload.name
-            calc_with_metadata.nomad_version = config.version
-            calc_with_metadata.nomad_commit = config.commit
-            calc_with_metadata.last_processing = datetime.utcnow()
-            calc_with_metadata.files = self.upload_files.calc_files(self.mainfile)
-            self.metadata = calc_with_metadata.to_dict()
-
-            if len(calc_with_metadata.files) >= config.auxfile_cutoff:
+            calc_metadata = datamodel.EntryMetadata()
+            calc_metadata.domain = parser_dict[self.parser].domain
+            calc_metadata.upload_id = self.upload_id
+            calc_metadata.calc_id = self.calc_id
+            calc_metadata.calc_hash = self.upload_files.calc_hash(self.mainfile)
+            calc_metadata.mainfile = self.mainfile
+            calc_metadata.calc_hash = self.upload_files.calc_hash(self.mainfile)
+            calc_metadata.nomad_version = config.version
+            calc_metadata.nomad_commit = config.commit
+            calc_metadata.last_processing = datetime.utcnow()
+            calc_metadata.files = self.upload_files.calc_files(self.mainfile)
+            calc_metadata.uploader = self.upload.user_id
+            calc_metadata.upload_time = self.upload.upload_time
+            calc_metadata.upload_name = self.upload.name
+            self.metadata = calc_metadata.m_to_dict(include_defaults=True)  # TODO use embedded doc?
+
+            if len(calc_metadata.files) >= config.auxfile_cutoff:
                 self.warning(
                     'This calc has many aux files in its directory. '
                     'Have you placed many calculations in the same directory?')
@@ -301,25 +294,16 @@ class Calc(Proc):
         # in case of failure, index a minimum set of metadata and mark
         # processing failure
         try:
-            calc_with_metadata = datamodel.CalcWithMetadata(**self.metadata)
-            calc_with_metadata.formula = config.services.not_processed_value
-            calc_with_metadata.basis_set = config.services.not_processed_value
-            calc_with_metadata.xc_functional = config.services.not_processed_value
-            calc_with_metadata.system = config.services.not_processed_value
-            calc_with_metadata.crystal_system = config.services.not_processed_value
-            calc_with_metadata.spacegroup = config.services.not_processed_value
-            calc_with_metadata.spacegroup_symbol = config.services.not_processed_value
-            calc_with_metadata.code_version = config.services.not_processed_value
-
-            calc_with_metadata.code_name = config.services.not_processed_value
+            entry_metadata = datamodel.EntryMetadata.m_from_dict(self.metadata)
             if self.parser is not None:
                 parser = parser_dict[self.parser]
                 if hasattr(parser, 'code_name'):
-                    calc_with_metadata.code_name = parser.code_name
+                    entry_metadata.code_name = parser.code_name
 
-            calc_with_metadata.processed = False
-            self.metadata = calc_with_metadata.to_dict()
-            search.Entry.from_calc_with_metadata(calc_with_metadata).save()
+            entry_metadata.processed = False
+            self.metadata = entry_metadata.m_to_dict(include_defaults=True)
+
+            search.create_entry(entry_metadata).save()
         except Exception as e:
             self.get_logger().error('could not index after processing failure', exc_info=e)
 
@@ -335,7 +319,7 @@ class Calc(Proc):
 
     @task
     def parsing(self):
-        """ The *task* that encapsulates all parsing related actions. """
+        ''' The *task* that encapsulates all parsing related actions. '''
         context = dict(parser=self.parser, step=self.parser)
         logger = self.get_logger(**context)
         parser = parser_dict[self.parser]
@@ -405,7 +389,7 @@ class Calc(Proc):
 
     @task
     def normalizing(self):
-        """ The *task* that encapsulates all normalizing related actions. """
+        ''' The *task* that encapsulates all normalizing related actions. '''
         for normalizer in normalizers:
             if normalizer.domain != parser_dict[self.parser].domain:
                 continue
@@ -435,27 +419,27 @@ class Calc(Proc):
 
     @task
     def archiving(self):
-        """ The *task* that encapsulates all archival related actions. """
+        ''' The *task* that encapsulates all archival related actions. '''
         logger = self.get_logger()
 
-        calc_with_metadata = datamodel.CalcWithMetadata(**self.metadata)
-        calc_with_metadata.apply_domain_metadata(self._parser_backend)
-        calc_with_metadata.processed = True
+        entry_metadata = datamodel.EntryMetadata.m_from_dict(self.metadata)
+        entry_metadata.apply_domain_metadata(self._parser_backend)
+        entry_metadata.processed = True
 
         # persist the calc metadata
         with utils.timer(logger, 'saved calc metadata', step='metadata'):
-            self.metadata = calc_with_metadata.to_dict()
+            self.metadata = entry_metadata.m_to_dict(include_defaults=True)
 
         # index in search
         with utils.timer(logger, 'indexed', step='index'):
-            search.Entry.from_calc_with_metadata(calc_with_metadata).save()
+            search.create_entry(entry_metadata).save()
 
         # persist the archive
         with utils.timer(
                 logger, 'archived', step='archive',
                 input_size=self.mainfile_file.size) as log_data:
             with self.upload_files.archive_file(self.calc_id, 'wt') as out:
-                self._parser_backend.write_json(out, pretty=True, root_sections=_all_root_sections)
+                self._parser_backend.write_json(out, pretty=True, root_sections=datamodel.root_sections)
 
             log_data.update(archive_size=self.upload_files.archive_file_object(self.calc_id).size)
 
@@ -474,7 +458,7 @@ class Calc(Proc):
 
 
 class Upload(Proc):
-    """
+    '''
     Represents uploads in the databases. Provides persistence access to the files storage,
     and processing state.
 
@@ -489,7 +473,7 @@ class Upload(Proc):
         publish_time: Date when the upload was initially published
         last_update: Date of the last publishing/re-processing
         joined: Boolean indicates if the running processing has joined (:func:`check_join`)
-    """
+    '''
     id_field = 'upload_id'
 
     upload_id = StringField(primary_key=True)
@@ -518,13 +502,13 @@ class Upload(Proc):
 
     @property
     def metadata(self) -> dict:
-        """
+        '''
         Getter, setter for user metadata. Metadata is pickled to and from the public
         bucket to allow sharing among all processes. Usually uploads do not have (much)
         user defined metadata, but users provide all metadata per upload as part of
         the publish process. This will change, when we introduce editing functionality
         and metadata will be provided through different means.
-        """
+        '''
         try:
             upload_files = PublicUploadFiles(self.upload_id, is_authorized=lambda: True)
         except KeyError:
@@ -542,7 +526,7 @@ class Upload(Proc):
 
     @classmethod
     def user_uploads(cls, user: datamodel.User, **kwargs) -> List['Upload']:
-        """ Returns all uploads for the given user. Kwargs are passed to mongo query. """
+        ''' Returns all uploads for the given user. Kwargs are passed to mongo query. '''
         return cls.objects(user_id=str(user.user_id), **kwargs)
 
     @property
@@ -561,14 +545,14 @@ class Upload(Proc):
 
     @classmethod
     def create(cls, **kwargs) -> 'Upload':
-        """
+        '''
         Creates a new upload for the given user, a user given name is optional.
         It will populate the record with a signed url and pending :class:`UploadProc`.
         The upload will be already saved to the database.
 
         Arguments:
             user: The user that created the upload.
-        """
+        '''
         # use kwargs to keep compatibility with super method
         user: datamodel.User = kwargs['user']
         del(kwargs['user'])
@@ -583,15 +567,15 @@ class Upload(Proc):
         return self
 
     def delete(self):
-        """ Deletes this upload process state entry and its calcs. """
+        ''' Deletes this upload process state entry and its calcs. '''
         Calc.objects(upload_id=self.upload_id).delete()
         super().delete()
 
     def delete_upload_local(self):
-        """
+        '''
         Deletes the upload, including its processing state and
         staging files. Local version without celery processing.
-        """
+        '''
         logger = self.get_logger()
 
         with utils.lnr(logger, 'staged upload delete failed'):
@@ -609,28 +593,27 @@ class Upload(Proc):
 
     @process
     def delete_upload(self):
-        """
+        '''
         Deletes of the upload, including its processing state and
         staging files. This starts the celery process of deleting the upload.
-        """
+        '''
         self.delete_upload_local()
 
         return True  # do not save the process status on the delete upload
 
     @process
     def publish_upload(self):
-        """
+        '''
         Moves the upload out of staging to the public area. It will
         pack the staging upload files in to public upload files.
-        """
+        '''
         assert self.processed_calcs > 0
 
         logger = self.get_logger()
         logger.info('started to publish')
 
         with utils.lnr(logger, 'publish failed'):
-            upload_with_metadata = self.to_upload_with_metadata(self.metadata)
-            calcs = upload_with_metadata.calcs
+            calcs = self.entries_metadata(self.metadata)
 
             with utils.timer(
                     logger, 'upload metadata updated', step='metadata',
@@ -641,7 +624,7 @@ class Upload(Proc):
                     calc.with_embargo = calc.with_embargo if calc.with_embargo is not None else False
                     return UpdateOne(
                         {'_id': calc.calc_id},
-                        {'$set': {'metadata': calc.to_dict()}})
+                        {'$set': {'metadata': calc.m_to_dict(include_defaults=True)}})
 
                 Calc._get_collection().bulk_write([create_update(calc) for calc in calcs])
 
@@ -649,7 +632,7 @@ class Upload(Proc):
                 with utils.timer(
                         logger, 'staged upload files packed', step='pack',
                         upload_size=self.upload_files.size):
-                    self.upload_files.pack(upload_with_metadata)
+                    self.upload_files.pack(calcs)
 
             with utils.timer(
                     logger, 'index updated', step='index',
@@ -671,7 +654,7 @@ class Upload(Proc):
 
     @process
     def re_process_upload(self):
-        """
+        '''
         A *process* that performs the re-processing of a earlier processed
         upload.
 
@@ -681,7 +664,7 @@ class Upload(Proc):
 
         TODO this implementation does not do any re-matching. This will be more complex
         due to handling of new or missing matches.
-        """
+        '''
         assert self.published
 
         logger = self.get_logger()
@@ -730,7 +713,7 @@ class Upload(Proc):
 
     @process
     def re_pack(self):
-        """ A *process* that repacks the raw and archive data based on the current embargo data. """
+        ''' A *process* that repacks the raw and archive data based on the current embargo data. '''
         assert self.published
 
         # mock the steps of actual processing
@@ -739,19 +722,19 @@ class Upload(Proc):
         self._continue_with('parse_all')
         self._continue_with('cleanup')
 
-        self.upload_files.re_pack(self.to_upload_with_metadata())
+        self.upload_files.re_pack(self.entries_metadata())
         self.joined = True
         self._complete()
 
     @process
     def process_upload(self):
-        """ A *process* that performs the initial upload processing. """
+        ''' A *process* that performs the initial upload processing. '''
         self.extracting()
         self.parse_all()
 
     @task
     def uploading(self):
-        """ A no-op *task* as a stand-in for receiving upload data. """
+        ''' A no-op *task* as a stand-in for receiving upload data. '''
         pass
 
     @property
@@ -772,10 +755,10 @@ class Upload(Proc):
 
     @task
     def extracting(self):
-        """
+        '''
         The *task* performed before the actual parsing/normalizing: extracting
         the uploaded files.
-        """
+        '''
         # extract the uploaded file
         self._upload_files = ArchiveBasedStagingUploadFiles(
             upload_id=self.upload_id, is_authorized=lambda: True, create=True,
@@ -800,10 +783,10 @@ class Upload(Proc):
             return
 
     def _preprocess_files(self, path):
-        """
+        '''
         Some files need preprocessing. Currently we need to add a stripped POTCAR version
         and always restrict/embargo the original.
-        """
+        '''
         if os.path.basename(path).startswith('POTCAR'):
             # create checksum
             hash = hashlib.sha224()
@@ -829,13 +812,13 @@ class Upload(Proc):
                     self.staging_upload_files.raw_file_object(stripped_path).os_path))
 
     def match_mainfiles(self) -> Generator[Tuple[str, object], None, None]:
-        """
+        '''
         Generator function that matches all files in the upload to all parsers to
         determine the upload's mainfiles.
 
         Returns:
             Tuples of mainfile, filename, and parsers
-        """
+        '''
         directories_with_match: Dict[str, str] = dict()
         upload_files = self.staging_upload_files
         for filename in upload_files.raw_file_manifest():
@@ -859,10 +842,10 @@ class Upload(Proc):
 
     @task
     def parse_all(self):
-        """
+        '''
         The *task* used to identify mainfile/parser combinations among the upload's files, creates
         respective :class:`Calc` instances, and triggers their processing.
-        """
+        '''
         logger = self.get_logger()
 
         with utils.timer(
@@ -882,14 +865,14 @@ class Upload(Proc):
             self.check_join()
 
     def check_join(self):
-        """
+        '''
         Performs an evaluation of the join condition and triggers the :func:`cleanup`
         task if necessary. The join condition allows to run the ``cleanup`` after
         all calculations have been processed. The upload processing stops after all
         calculation processings have been triggered (:func:`parse_all` or
         :func:`re_process_upload`). The cleanup task is then run within the last
         calculation process (the one that triggered the join by calling this method).
-        """
+        '''
         total_calcs = self.total_calcs
         processed_calcs = self.processed_calcs
 
@@ -951,7 +934,7 @@ class Upload(Proc):
                 logger, 'reprocessed staged upload packed', step='delete staged',
                 upload_size=self.upload_files.size):
 
-            staging_upload_files.pack(self.to_upload_with_metadata(), skip_raw=True)
+            staging_upload_files.pack(self.entries_metadata(), skip_raw=True)
 
         with utils.timer(
                 logger, 'reprocessed staged upload deleted', step='delete staged',
@@ -963,10 +946,10 @@ class Upload(Proc):
 
     @task
     def cleanup(self):
-        """
+        '''
         The *task* that "cleans" the processing, i.e. removed obsolete files and performs
         pending archival operations. Depends on the type of processing.
-        """
+        '''
         search.refresh()
 
         if self.current_process == 're_process_upload':
@@ -975,58 +958,58 @@ class Upload(Proc):
             self._cleanup_after_processing()
 
     def get_calc(self, calc_id) -> Calc:
-        """ Returns the upload calc with the given id or ``None``. """
+        ''' Returns the upload calc with the given id or ``None``. '''
         return Calc.objects(upload_id=self.upload_id, calc_id=calc_id).first()
 
     @property
     def processed_calcs(self):
-        """
+        '''
         The number of successfully or not successfully processed calculations. I.e.
         calculations that have finished processing.
-        """
+        '''
         return Calc.objects(upload_id=self.upload_id, tasks_status__in=[SUCCESS, FAILURE]).count()
 
     @property
     def total_calcs(self):
-        """ The number of all calculations. """
+        ''' The number of all calculations. '''
         return Calc.objects(upload_id=self.upload_id).count()
 
     @property
     def failed_calcs(self):
-        """ The number of calculations with failed processing. """
+        ''' The number of calculations with failed processing. '''
         return Calc.objects(upload_id=self.upload_id, tasks_status=FAILURE).count()
 
     @property
     def pending_calcs(self) -> int:
-        """ The number of calculations with pending processing. """
+        ''' The number of calculations with pending processing. '''
         return Calc.objects(upload_id=self.upload_id, tasks_status=PENDING).count()
 
     def all_calcs(self, start, end, order_by=None):
-        """
+        '''
         Returns all calculations, paginated and ordered.
 
         Arguments:
             start: the start index of the requested page
             end: the end index of the requested page
             order_by: the property to order by
-        """
+        '''
         query = Calc.objects(upload_id=self.upload_id)[start:end]
         return query.order_by(order_by) if order_by is not None else query
 
     @property
     def outdated_calcs(self):
-        """ All successfully processed and outdated calculations. """
+        ''' All successfully processed and outdated calculations. '''
         return Calc.objects(
             upload_id=self.upload_id, tasks_status=SUCCESS,
             metadata__nomad_version__ne=config.version)
 
     @property
     def calcs(self):
-        """ All successfully processed calculations. """
+        ''' All successfully processed calculations. '''
         return Calc.objects(upload_id=self.upload_id, tasks_status=SUCCESS)
 
-    def to_upload_with_metadata(self, user_metadata: dict = None) -> UploadWithMetadata:
-        """
+    def entries_metadata(self, user_metadata: dict = None) -> Iterable[datamodel.EntryMetadata]:
+        '''
         This is the :py:mod:`nomad.datamodel` transformation method to transform
         processing uploads into datamodel uploads. It will also implicitely transform
         all calculations of this upload.
@@ -1034,10 +1017,10 @@ class Upload(Proc):
         Arguments:
             user_metadata: A dict of user metadata that is applied to the resulting
                 datamodel data and the respective calculations.
-        """
+        '''
         # prepare user metadata per upload and per calc
         if user_metadata is not None:
-            calc_metadatas: Dict[str, Any] = dict()
+            entries_metadata_dict: Dict[str, Any] = dict()
             upload_metadata: Dict[str, Any] = dict()
 
             upload_metadata.update(user_metadata)
@@ -1045,53 +1028,42 @@ class Upload(Proc):
                 del(upload_metadata['calculations'])
 
             for calc in user_metadata.get('calculations', []):  # pylint: disable=no-member
-                calc_metadatas[calc['mainfile']] = calc
+                entries_metadata_dict[calc['mainfile']] = calc
 
-            user_upload_time = upload_metadata.get('_upload_time', None)
-            user_upload_name = upload_metadata.get('_upload_name', None)
+            user_upload_time = upload_metadata.get('upload_time', None)
+            user_upload_name = upload_metadata.get('upload_name', None)
 
             def get_metadata(calc: Calc):
-                """
-                Assemble metadata from calc's processed calc metadata and the uploads
-                user metadata.
-                """
-                calc_data = calc.metadata
-                calc_with_metadata = datamodel.CalcWithMetadata(**calc_data)
-                calc_metadata = dict(upload_metadata)
-                calc_metadata.update(calc_metadatas.get(calc.mainfile, {}))
-                calc_with_metadata.apply_user_metadata(calc_metadata)
-                if calc_with_metadata.upload_time is None:
-                    calc_with_metadata.upload_time = self.upload_time if user_upload_time is None else user_upload_time
-                if calc_with_metadata.upload_name is None:
-                    calc_with_metadata.upload_name = self.name if user_upload_name is None else user_upload_name
-
-                return calc_with_metadata
+                entry_metadata = datamodel.EntryMetadata.m_from_dict(calc.metadata)
+                entry_user_metadata = dict(upload_metadata)
+                entry_user_metadata.pop('embargo_length', None)  # this is for uploads only
+                entry_user_metadata.update(entries_metadata_dict.get(calc.mainfile, {}))
+                entry_metadata.apply_user_metadata(entry_user_metadata)
+                if entry_metadata.upload_time is None:
+                    entry_metadata.upload_time = self.upload_time if user_upload_time is None else user_upload_time
+                if entry_metadata.upload_name is None:
+                    entry_metadata.upload_name = self.name if user_upload_name is None else user_upload_name
+
+                return entry_metadata
         else:
             user_upload_time = None
 
             def get_metadata(calc: Calc):
-                calc_with_metadata = datamodel.CalcWithMetadata(**calc.metadata)
-                calc_with_metadata.upload_time = self.upload_time
-                calc_with_metadata.upload_name = self.name
-
-                return calc_with_metadata
-
-        result = UploadWithMetadata(
-            upload_id=self.upload_id,
-            uploader=self.user_id,
-            upload_time=self.upload_time if user_upload_time is None else user_upload_time)
+                entry_metadata = datamodel.EntryMetadata.m_from_dict(calc.metadata)
+                entry_metadata.upload_time = self.upload_time
+                entry_metadata.upload_name = self.name
 
-        result.calcs = [get_metadata(calc) for calc in Calc.objects(upload_id=self.upload_id)]
+                return entry_metadata
 
-        return result
+        return [get_metadata(calc) for calc in Calc.objects(upload_id=self.upload_id)]
 
     def compress_and_set_metadata(self, metadata: Dict[str, Any]) -> None:
-        """
+        '''
         Stores the given user metadata in the upload document. This is the metadata
         adhering to the API model (``UploadMetaData``). Most quantities can be stored
         for the upload and for each calculation. This method will try to move same values
         from the calculation to the upload to "compress" the data.
-        """
+        '''
         self.embargo_length = min(metadata.get('embargo_length', 36), 36)
 
         compressed = {
@@ -1103,7 +1075,7 @@ class Upload(Proc):
             compressed_calc: Dict[str, Any] = {}
             calculations.append(compressed_calc)
             for key, value in calc.items():
-                if key in ['_pid', 'mainfile', 'external_id']:
+                if key in ['pid', 'mainfile', 'external_id']:
                     # these quantities are explicitly calc specific and have to stay with
                     # the calc
                     compressed_calc[key] = value
diff --git a/nomad/search.py b/nomad/search.py
index 0644b12f676790836752d4d9d8f5928580be98b6..d30268fe1ecd287a57f136eca67dc716029bdd67 100644
--- a/nomad/search.py
+++ b/nomad/search.py
@@ -12,22 +12,20 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-"""
+'''
 This module represents calculations in elastic search.
-"""
+'''
 
-from typing import Iterable, Dict, List, Any
-from elasticsearch_dsl import Document, InnerDoc, Keyword, Text, Date, \
-    Object, Boolean, Search, Q, A, analyzer, tokenizer
-from elasticsearch_dsl.document import IndexMeta
+from typing import Iterable, Dict, List, Any, Union, cast
+from elasticsearch_dsl import Document, InnerDoc, Keyword, Date, \
+    Object, Boolean, Integer, Search, Q, A, analyzer, tokenizer
 import elasticsearch.helpers
 from elasticsearch.exceptions import NotFoundError
 from datetime import datetime
 import json
 
-from nomad import config, datamodel, infrastructure, datamodel, utils, processing as proc
-from nomad.datamodel import Domain
-import nomad.datamodel.base
+from nomad import config, datamodel, infrastructure, datamodel, utils, metainfo, processing as proc
+from nomad.metainfo.search import SearchQuantity
 
 
 path_analyzer = analyzer(
@@ -44,173 +42,374 @@ class ElasticSearchError(Exception): pass
 class ScrollIdNotFound(Exception): pass
 
 
-class User(InnerDoc):
+_elastic_documents: Dict[str, Union[Document, InnerDoc]] = {}
 
-    @classmethod
-    def from_user(cls, user):
-        self = cls(user_id=user.user_id)
-        self.name = user.name
-        self.email = user.email
+search_quantities: Dict[str, SearchQuantity] = {}
+''' All available search quantities by their full qualified name. '''
 
-        return self
+metrics: Dict[str, SearchQuantity] = {}
+'''
+The available search metrics. Metrics are integer values given for each entry that can
+be used in statistics (aggregations), e.g. the sum of all total energy calculations or cardinality of
+all unique geometries.
+'''
+
+groups: Dict[str, SearchQuantity] = {}
+''' The available groupable quantities '''
+
+order_default_quantities: Dict[str, SearchQuantity] = {}
+
+default_statistics: Dict[str, List[SearchQuantity]] = {}
+
+
+# TODO make search the search quantities are initialized even without/before creating an elastic document
+# otherwise a dependency on import order is created
+def create_elastic_document(
+        section: metainfo.Section, document_name: str = None, super_cls=Document,
+        prefix: str = None, domain: str = None,
+        attrs: Dict[str, Any] = None) -> Union[Document, InnerDoc]:
+    '''
+    Create all elasticsearch_dsl mapping classes for the section and its sub sections.
+    '''
+    domain = section.m_x('domain', domain)
+    domain_or_all = domain if domain is not None else '__all__'
+
+    if document_name is None:
+        document_name = section.name
+
+    if attrs is None:
+        attrs = {}
+
+    def get_inner_document(section: metainfo.Section, **kwargs) -> type:
+        inner_document = _elastic_documents.get(section.qualified_name())
+        if inner_document is None:
+            inner_document = create_elastic_document(
+                section, super_cls=InnerDoc, **kwargs)
+
+        return inner_document
+
+    # create an attribute for each sub section
+    for sub_section in section.all_sub_sections.values():
+        sub_section_prefix = sub_section.m_x('search')
+        if sub_section_prefix is None:
+            continue
+
+        if prefix is not None:
+            sub_section_prefix = '%s.%s' % (prefix, sub_section_prefix)
+
+        inner_document = get_inner_document(
+            sub_section.sub_section, domain=domain, prefix=sub_section_prefix)
+        attrs[sub_section.name] = Object(inner_document)
+
+    # create an attribute for each quantity
+    for quantity in section.all_quantities.values():
+        local_search_quantities = quantity.m_x('search')
+
+        if local_search_quantities is None:
+            continue
+
+        if not isinstance(local_search_quantities, List):
+            local_search_quantities = [local_search_quantities]
+
+        for i, search_quantity in enumerate(local_search_quantities):
+            search_quantity.configure(quantity=quantity, prefix=prefix)
+
+            # only prefixed or top-level quantities are considered for being
+            # searched directly. Other nested quantities can only be used via
+            # other search_quantities's es_quantity
+            if prefix is not None or super_cls == Document:
+                qualified_name = search_quantity.qualified_name
+                assert qualified_name not in search_quantities, 'Search quantities must have a unique name: %s' % qualified_name
+                search_quantities[qualified_name] = search_quantity
+
+                if search_quantity.metric is not None:
+                    qualified_metric_name = search_quantity.metric_name
+                    assert qualified_metric_name not in metrics, 'Metric names must be unique: %s' % qualified_metric_name
+                    metrics[qualified_metric_name] = search_quantity
+
+                if search_quantity.group is not None:
+                    qualified_group = search_quantity.group
+                    assert qualified_group not in groups, 'Groups must be unique'
+                    groups[qualified_group] = search_quantity
+
+                if search_quantity.default_statistic:
+                    default_statistics.setdefault(domain_or_all, []).append(search_quantity)
+
+                if search_quantity.order_default:
+                    assert order_default_quantities.get(domain_or_all) is None, 'Only one quantity can be the order default'
+                    order_default_quantities[domain_or_all] = search_quantity
+
+            if i != 0:
+                # only the first quantity gets is mapped, unless the other has an
+                # explicit mapping
+                assert search_quantity.es_mapping is None, 'only the first quantity gets is mapped'
+                continue
+
+            if search_quantity.es_mapping is None:
+                # find a mapping based on quantity type
+                if quantity.type == str:
+                    search_quantity.es_mapping = Keyword()
+                elif quantity.type == int:
+                    search_quantity.es_mapping = Integer()
+                elif quantity.type == bool:
+                    search_quantity.es_mapping = Boolean()
+                elif quantity.type == metainfo.Datetime:
+                    search_quantity.es_mapping = Date()
+                elif isinstance(quantity.type, metainfo.Reference):
+                    inner_document = get_inner_document(quantity.type.target_section_def)
+                    search_quantity.es_mapping = Object(inner_document)
+                elif isinstance(quantity.type, metainfo.MEnum):
+                    search_quantity.es_mapping = Keyword()
+                else:
+                    raise NotImplementedError(
+                        'Quantity type %s for quantity %s is not supported.' % (quantity.type, quantity))
+
+            attrs[quantity.name] = search_quantity.es_mapping
+
+    document = type(document_name, (super_cls,), attrs)
+    _elastic_documents[section.qualified_name()] = document
+    return document
+
+
+# TODO move to a init function that is triggered by elastic setup in infrastructure
+Entry = cast(Document, create_elastic_document(
+    datamodel.EntryMetadata.m_def, document_name='Entry',
+    attrs=dict(Index=type('Index', (), dict(name=config.elastic.index_name)))))
+''' The elasticsearch_dsl Document class that constitutes the entry index. '''
+
+metrics_names = list(metrics.keys())
+''' Names of all available metrics '''
+
+for domain in datamodel.domains:
+    order_default_quantities.setdefault(domain, order_default_quantities.get('__all__'))
+    default_statistics.setdefault(domain, []).append(*default_statistics.get('__all__'))
+
+
+# class User(InnerDoc):
+
+#     @classmethod
+#     def from_user(cls, user):
+#         self = cls(user_id=user.user_id)
+#         self.name = user.name
+#         self.email = user.email
+
+#         return self
+
+#     user_id = Keyword()
+#     email = Keyword()
+#     name = Text(fields={'keyword': Keyword()})
+
+
+# class Dataset(InnerDoc):
+
+#     @classmethod
+#     def from_dataset_id(cls, dataset_id):
+#         dataset = datamodel.Dataset.m_def.m_x('me').get(dataset_id=dataset_id)
+#         return cls(id=dataset.dataset_id, doi=dataset.doi, name=dataset.name, created=dataset.created)
+
+#     id = Keyword()
+#     doi = Keyword()
+#     name = Keyword()
+#     created = Date()
+
+
+# _domain_inner_doc_types: Dict[str, type] = {}
 
-    user_id = Keyword()
-    email = Keyword()
-    name = Text(fields={'keyword': Keyword()})
-
-
-class Dataset(InnerDoc):
-
-    @classmethod
-    def from_dataset_id(cls, dataset_id):
-        dataset = datamodel.Dataset.m_def.m_x('me').get(dataset_id=dataset_id)
-        return cls(id=dataset.dataset_id, doi=dataset.doi, name=dataset.name, created=dataset.created)
-
-    id = Keyword()
-    doi = Keyword()
-    name = Keyword()
-    created = Date()
-
-
-_domain_inner_doc_types: Dict[str, type] = {}
-
-
-class WithDomain(IndexMeta):
-    """ Override elasticsearch_dsl metaclass to sneak in domain specific mappings """
-    def __new__(cls, name, bases, attrs):
-        for domain in Domain.instances.values():
-            inner_doc_type = _domain_inner_doc_types.get(domain.name)
-            if inner_doc_type is None:
-                domain_attrs = {
-                    quantity.elastic_field: quantity.elastic_mapping
-                    for quantity in domain.domain_quantities.values()}
-
-                inner_doc_type = type(domain.name, (InnerDoc,), domain_attrs)
-                _domain_inner_doc_types[domain.name] = inner_doc_type
-
-            attrs[domain.name] = Object(inner_doc_type)
-
-        return super(WithDomain, cls).__new__(cls, name, bases, attrs)
-
-
-class Entry(Document, metaclass=WithDomain):
-
-    class Index:
-        name = config.elastic.index_name
-
-    domain = Keyword()
-    upload_id = Keyword()
-    upload_time = Date()
-    upload_name = Keyword()
-    calc_id = Keyword()
-    calc_hash = Keyword()
-    pid = Keyword()
-    raw_id = Keyword()
-    mainfile = Keyword()
-    files = Text(multi=True, analyzer=path_analyzer, fields={'keyword': Keyword()})
-    uploader = Object(User)
-
-    with_embargo = Boolean()
-    published = Boolean()
-
-    processed = Boolean()
-    last_processing = Date()
-    nomad_version = Keyword()
-    nomad_commit = Keyword()
-
-    authors = Object(User, multi=True)
-    owners = Object(User, multi=True)
-    comment = Text()
-    references = Keyword()
-    datasets = Object(Dataset)
-    external_id = Keyword()
-
-    atoms = Keyword()
-    only_atoms = Keyword()
-    formula = Keyword()
-
-    @classmethod
-    def from_calc_with_metadata(cls, source: datamodel.CalcWithMetadata) -> 'Entry':
-        entry = Entry(meta=dict(id=source.calc_id))
-        entry.update(source)
-        return entry
-
-    def update(self, source: datamodel.CalcWithMetadata) -> None:
-        self.domain = source.domain
-        self.upload_id = source.upload_id
-        self.upload_time = source.upload_time
-        self.upload_name = source.upload_name
-        self.calc_id = source.calc_id
-        self.calc_hash = source.calc_hash
-        self.pid = None if source.pid is None else str(source.pid)
-        self.raw_id = None if source.raw_id is None else str(source.raw_id)
-
-        self.processed = source.processed
-        self.last_processing = source.last_processing
-        self.nomad_version = source.nomad_version
-        self.nomad_commit = source.nomad_commit
-
-        self.mainfile = source.mainfile
-        if source.files is None:
-            self.files = [self.mainfile]
-        elif self.mainfile not in source.files:
-            self.files = [self.mainfile] + source.files
-        else:
-            self.files = source.files
 
-        self.with_embargo = bool(source.with_embargo)
-        self.published = source.published
+# class WithDomain(IndexMeta):
+#     ''' Override elasticsearch_dsl metaclass to sneak in domain specific mappings '''
+#     def __new__(cls, name, bases, attrs):
+#         for domain in Domain.instances.values():
+#             inner_doc_type = _domain_inner_doc_types.get(domain.name)
+#             if inner_doc_type is None:
+#                 domain_attrs = {
+#                     quantity.elastic_field: quantity.elastic_mapping
+#                     for quantity in domain.domain_quantities.values()}
+
+#                 inner_doc_type = type(domain.name, (InnerDoc,), domain_attrs)
+#                 _domain_inner_doc_types[domain.name] = inner_doc_type
+
+#             attrs[domain.name] = Object(inner_doc_type)
+
+#         return super(WithDomain, cls).__new__(cls, name, bases, attrs)
+
+
+# class Entry(Document, metaclass=WithDomain):
 
-        uploader = datamodel.User.get(user_id=source.uploader) if source.uploader is not None else None
-        authors = [datamodel.User.get(user_id) for user_id in source.coauthors]
-        owners = [datamodel.User.get(user_id) for user_id in source.shared_with]
-        if uploader is not None:
-            authors.append(uploader)
-            owners.append(uploader)
-        authors.sort(key=lambda user: user.last_name + ' ' + user.first_name)
-        owners.sort(key=lambda user: user.last_name + ' ' + user.first_name)
+#     class Index:
+#         name = config.elastic.index_name
 
-        self.uploader = User.from_user(uploader) if uploader is not None else None
-        self.authors = [User.from_user(user) for user in authors]
-        self.owners = [User.from_user(user) for user in owners]
+#     domain = Keyword()
+#     upload_id = Keyword()
+#     upload_time = Date()
+#     upload_name = Keyword()
+#     calc_id = Keyword()
+#     calc_hash = Keyword()
+#     pid = Keyword()
+#     raw_id = Keyword()
+#     mainfile = Keyword()
+#     files = Text(multi=True, analyzer=path_analyzer, fields={'keyword': Keyword()})
+#     uploader = Object(User)
+
+#     with_embargo = Boolean()
+#     published = Boolean()
+
+#     processed = Boolean()
+#     last_processing = Date()
+#     nomad_version = Keyword()
+#     nomad_commit = Keyword()
+
+#     authors = Object(User, multi=True)
+#     owners = Object(User, multi=True)
+#     comment = Text()
+#     references = Keyword()
+#     datasets = Object(Dataset)
+#     external_id = Keyword()
+
+#     atoms = Keyword()
+#     only_atoms = Keyword()
+#     formula = Keyword()
+
+#     @classmethod
+#     def from_entry_metadata(cls, source: datamodel.EntryMetadata) -> 'Entry':
+#         entry = Entry(meta=dict(id=source.calc_id))
+#         entry.update(source)
+#         return entry
+
+#     def update(self, source: datamodel.EntryMetadata) -> None:
+#         self.domain = source.domain
+#         self.upload_id = source.upload_id
+#         self.upload_time = source.upload_time
+#         self.upload_name = source.upload_name
+#         self.calc_id = source.calc_id
+#         self.calc_hash = source.calc_hash
+#         self.pid = None if source.pid is None else str(source.pid)
+#         self.raw_id = None if source.raw_id is None else str(source.raw_id)
+
+#         self.processed = source.processed
+#         self.last_processing = source.last_processing
+#         self.nomad_version = source.nomad_version
+#         self.nomad_commit = source.nomad_commit
+
+#         self.mainfile = source.mainfile
+#         if source.files is None:
+#             self.files = [self.mainfile]
+#         elif self.mainfile not in source.files:
+#             self.files = [self.mainfile] + source.files
+#         else:
+#             self.files = source.files
+
+#         self.with_embargo = bool(source.with_embargo)
+#         self.published = source.published
+
+#         uploader = datamodel.User.get(user_id=source.uploader) if source.uploader is not None else None
+#         authors = [datamodel.User.get(user_id) for user_id in source.coauthors]
+#         owners = [datamodel.User.get(user_id) for user_id in source.shared_with]
+#         if uploader is not None:
+#             authors.append(uploader)
+#             owners.append(uploader)
+#         authors.sort(key=lambda user: user.last_name + ' ' + user.first_name)
+#         owners.sort(key=lambda user: user.last_name + ' ' + user.first_name)
+
+#         self.uploader = User.from_user(uploader) if uploader is not None else None
+#         self.authors = [User.from_user(user) for user in authors]
+#         self.owners = [User.from_user(user) for user in owners]
+
+#         self.comment = source.comment
+#         self.references = source.references
+#         self.datasets = [Dataset.from_dataset_id(dataset_id) for dataset_id in source.datasets]
+#         self.external_id = source.external_id
+
+#         self.atoms = source.atoms
+#         self.only_atoms = nomad.datamodel.base.only_atoms(source.atoms)
+#         self.formula = source.formula
+#         self.n_atoms = source.n_atoms
+
+#         if self.domain is not None:
+#             inner_doc_type = _domain_inner_doc_types[self.domain]
+#             inner_doc = inner_doc_type()
+#             for quantity in Domain.instances[self.domain].domain_quantities.values():
+#                 quantity_value = quantity.elastic_value(getattr(source, quantity.metadata_field))
+#                 setattr(inner_doc, quantity.elastic_field, quantity_value)
+
+#             setattr(self, self.domain, inner_doc)
+
+
+def create_entry(section: metainfo.MSection) -> Any:
+    ''' Creates a elasticsearch_dsl document for the given section. '''
+    cls = _elastic_documents[section.m_def.qualified_name()]
+
+    if section.m_def == datamodel.EntryMetadata.m_def:
+        obj = cls(meta=dict(id=section.m_get(datamodel.EntryMetadata.calc_id)))
+    else:
+        obj = cls()
+
+    for quantity in section.m_def.all_quantities.values():
+        search_quantities = quantity.m_x('search')
+        if search_quantities is None:
+            continue
+
+        if not isinstance(search_quantities, list):
+            search_quantities = [search_quantities]
+
+        value = section.m_get(quantity)
+        if value is None or value == []:
+            continue
 
-        self.comment = source.comment
-        self.references = source.references
-        self.datasets = [Dataset.from_dataset_id(dataset_id) for dataset_id in source.datasets]
-        self.external_id = source.external_id
+        for i, search_quantity in enumerate(search_quantities):
+            if i != 0:
+                # Only the value is only written for the first quantity
+                continue
 
-        self.atoms = source.atoms
-        self.only_atoms = nomad.datamodel.base.only_atoms(source.atoms)
-        self.formula = source.formula
-        self.n_atoms = source.n_atoms
+            quantity_type = quantity.type
+            if isinstance(quantity_type, metainfo.Reference):
+                if quantity.is_scalar:
+                    value = create_entry(cast(metainfo.MSection, value))
+                else:
+                    value = [create_entry(item) for item in value]
+
+            elif search_quantity.es_value is not None:
+                value = search_quantity.es_value(section)
 
-        if self.domain is not None:
-            inner_doc_type = _domain_inner_doc_types[self.domain]
-            inner_doc = inner_doc_type()
-            for quantity in Domain.instances[self.domain].domain_quantities.values():
-                quantity_value = quantity.elastic_value(getattr(source, quantity.metadata_field))
-                setattr(inner_doc, quantity.elastic_field, quantity_value)
+            setattr(obj, quantity.name, value)
 
-            setattr(self, self.domain, inner_doc)
+    for sub_section in section.m_def.all_sub_sections.values():
+        if not sub_section.m_x('search'):
+            continue
+
+        if sub_section.repeats:
+            mi_values = list(section.m_get_sub_sections(sub_section))
+            if len(mi_values) == 0:
+                continue
+            value = [create_entry(value) for value in mi_values]
+        else:
+            mi_value = section.m_get_sub_section(sub_section, -1)
+            if mi_value is None:
+                continue
+            value = create_entry(mi_value)
+
+        setattr(obj, sub_section.name, value)
+
+    return obj
 
 
 def delete_upload(upload_id):
-    """ Delete all entries with given ``upload_id`` from the index. """
+    ''' Delete all entries with given ``upload_id`` from the index. '''
     index = Entry._default_index()
     Search(index=index).query('match', upload_id=upload_id).delete()
 
 
 def delete_entry(calc_id):
-    """ Delete the entry with the given ``calc_id`` from the index. """
+    ''' Delete the entry with the given ``calc_id`` from the index. '''
     index = Entry._default_index()
     Search(index=index).query('match', calc_id=calc_id).delete()
 
 
-def publish(calcs: Iterable[datamodel.CalcWithMetadata]) -> None:
-    """ Update all given calcs with their metadata and set ``publish = True``. """
+def publish(calcs: Iterable[datamodel.EntryMetadata]) -> None:
+    ''' Update all given calcs with their metadata and set ``publish = True``. '''
     def elastic_updates():
         for calc in calcs:
-            entry = Entry.from_calc_with_metadata(calc)
+            entry = create_entry(calc)
             entry.published = True
             entry = entry.to_dict(include_meta=True)
             source = entry.pop('_source')
@@ -222,16 +421,16 @@ def publish(calcs: Iterable[datamodel.CalcWithMetadata]) -> None:
     refresh()
 
 
-def index_all(calcs: Iterable[datamodel.CalcWithMetadata], do_refresh=True) -> None:
-    """
+def index_all(calcs: Iterable[datamodel.EntryMetadata], do_refresh=True) -> None:
+    '''
     Adds all given calcs with their metadata to the index.
 
     Returns:
         Number of failed entries.
-    """
+    '''
     def elastic_updates():
         for calc in calcs:
-            entry = Entry.from_calc_with_metadata(calc)
+            entry = create_entry(calc)
             entry = entry.to_dict(include_meta=True)
             entry['_op_type'] = 'index'
             yield entry
@@ -248,36 +447,6 @@ def refresh():
     infrastructure.elastic_client.indices.refresh(config.elastic.index_name)
 
 
-metrics = {
-    metric_name: metric
-    for domain in Domain.instances.values()
-    for metric_name, metric in domain.metrics.items()}
-"""
-The available search metrics. Metrics are integer values given for each entry that can
-be used in statistics (aggregations), e.g. the sum of all total energy calculations or cardinality of
-all unique geometries.
-"""
-
-metrics_names = [metric_name for domain in Domain.instances.values() for metric_name in domain.metrics_names]
-""" Names of all available metrics """
-
-groups = {
-    key: value
-    for domain in Domain.instances.values()
-    for key, value in domain.groups.items()}
-"""The available groupable quantities"""
-
-order_default_quantities = {
-    domain_name: domain.order_default_quantity
-    for domain_name, domain in Domain.instances.items()
-}
-
-default_statistics = {
-    domain_name: domain.default_statistics
-    for domain_name, domain in Domain.instances.items()
-}
-
-
 class SearchRequest:
     '''
     Represents a search request and allows to execute that request.
@@ -313,10 +482,10 @@ class SearchRequest:
         self._search = Search(index=config.elastic.index_name)
 
     def domain(self, domain: str = None):
-        """
+        '''
         Applies the domain of this request to the query. Allows to optionally update
         the domain of this request.
-        """
+        '''
         if domain is not None:
             self._domain = domain
 
@@ -324,7 +493,7 @@ class SearchRequest:
         return self
 
     def owner(self, owner_type: str = 'all', user_id: str = None):
-        """
+        '''
         Uses the query part of the search to restrict the results based on the owner.
         The possible types are: ``all`` for all calculations; ``public`` for
         calculations visible by everyone, excluding embargo-ed entries and entries only visible
@@ -340,7 +509,7 @@ class SearchRequest:
             KeyError: If the given owner_type is not supported
             ValueError: If the owner_type requires a user but none is given, or the
                 given user is not allowed to use the given owner_type.
-        """
+        '''
         if owner_type == 'all':
             q = Q('term', published=True)
             if user_id is not None:
@@ -378,31 +547,31 @@ class SearchRequest:
         return self
 
     def search_parameters(self, **kwargs):
-        """
+        '''
         Configures the existing query with additional search parameters. Kwargs are
         interpreted as key value pairs. Keys have to coresspond to valid entry quantities
         in the domain's (DFT calculations) datamodel. Alternatively search parameters
         can be set via attributes.
-        """
+        '''
         for name, value in kwargs.items():
             self.search_parameter(name, value)
 
         return self
 
     def search_parameter(self, name, value):
-        quantity = Domain.get_quantity(name)
+        quantity = search_quantities[name]
 
-        if quantity.multi and not isinstance(value, list):
+        if quantity.many and not isinstance(value, list):
             value = [value]
 
-        value = quantity.elastic_value(value)
+        if quantity.many_or and isinstance(value, List):
+            self.q &= Q('terms', **{quantity.es_quantity: value})
+            return self
 
-        if quantity.elastic_search_type == 'terms':
-            if not isinstance(value, list):
+        if quantity.derived:
+            if quantity.many and not isinstance(value, list):
                 value = [value]
-            self.q &= Q('terms', **{quantity.qualified_elastic_field: value})
-
-            return self
+            value = quantity.derived(value)
 
         if isinstance(value, list):
             values = value
@@ -410,18 +579,18 @@ class SearchRequest:
             values = [value]
 
         for item in values:
-            self.q &= Q(quantity.elastic_search_type, **{quantity.qualified_elastic_field: item})
+            self.q &= Q('match', **{quantity.es_quantity: item})
 
         return self
 
     def query(self, query):
-        """ Adds the given query as a 'and' (i.e. 'must') clause to the request. """
+        ''' Adds the given query as a 'and' (i.e. 'must') clause to the request. '''
         self._query &= query
 
         return self
 
     def time_range(self, start: datetime, end: datetime):
-        """ Adds a time range to the query. """
+        ''' Adds a time range to the query. '''
         if start is None and end is None:
             return self
 
@@ -436,7 +605,7 @@ class SearchRequest:
 
     @property
     def q(self):
-        """ The underlying elasticsearch_dsl query object """
+        ''' The underlying elasticsearch_dsl query object '''
         if self._query is None:
             return Q('match_all')
         else:
@@ -447,30 +616,30 @@ class SearchRequest:
         self._query = q
 
     def totals(self, metrics_to_use: List[str] = []):
-        """
+        '''
         Configure the request to return overall totals for the given metrics.
 
         The statics are returned with the other quantity statistics under the pseudo
         quantity name 'total'. 'total' contains the pseudo value 'all'. It is used to
         store the metrics aggregated over all entries in the search results.
-        """
+        '''
         self._add_metrics(self._search.aggs, metrics_to_use)
         return self
 
     def default_statistics(self, metrics_to_use: List[str] = []):
-        """
+        '''
         Configures the domain's default statistics.
-        """
-        for name in default_statistics[self._domain]:
+        '''
+        for search_quantity in default_statistics[self._domain]:
             self.statistic(
-                name,
-                Domain.get_quantity(name).aggregations,
+                search_quantity.qualified_name,
+                search_quantity.statistic_size,
                 metrics_to_use=metrics_to_use)
 
         return self
 
     def statistic(self, quantity_name: str, size: int, metrics_to_use: List[str] = []):
-        """
+        '''
         This can be used to display statistics over the searched entries and allows to
         implement faceted search on the top values for each quantity.
 
@@ -493,9 +662,9 @@ class SearchRequest:
             metrics_to_use: The metrics calculated over the aggregations. Can be
                 ``unique_code_runs``, ``datasets``, other domain specific metrics.
                 The basic doc_count metric ``code_runs`` is always given.
-        """
-        quantity = Domain.get_quantity(quantity_name)
-        terms = A('terms', field=quantity.qualified_elastic_field, size=size, order=dict(_key='asc'))
+        '''
+        quantity = search_quantities[quantity_name]
+        terms = A('terms', field=quantity.es_quantity, size=size, order=dict(_key='asc'))
 
         buckets = self._search.aggs.bucket('statistics:%s' % quantity_name, terms)
         self._add_metrics(buckets, metrics_to_use)
@@ -507,24 +676,26 @@ class SearchRequest:
             parent = self._search.aggs
 
         for metric in metrics_to_use:
-            quantity, metric_kind = metrics[metric]
-            field = Domain.get_quantity(quantity).elastic_field
-            parent.metric('metric:%s' % metric, A(metric_kind, field=field))
+            metric_quantity = metrics[metric]
+            field = metric_quantity.es_quantity
+            parent.metric(
+                'metric:%s' % metric_quantity.metric_name,
+                A(metric_quantity.metric, field=field))
 
     def date_histogram(self, metrics_to_use: List[str] = []):
-        """
+        '''
         Adds a date histogram on the given metrics to the statistics part.
-        """
+        '''
         histogram = A('date_histogram', field='upload_time', interval='1M', format='yyyy-MM-dd')
         self._add_metrics(self._search.aggs.bucket('statistics:date_histogram', histogram), metrics_to_use)
 
         return self
 
     def quantities(self, **kwargs):
-        """
+        '''
         Shorthand for adding multiple quantities. See :func:`quantity`. Keywork argument
         keys are quantity name, values are tuples of size and after value.
-        """
+        '''
         for name, spec in kwargs:
             size, after = spec
             self.quantity(name, after=after, size=size)
@@ -534,7 +705,7 @@ class SearchRequest:
     def quantity(
             self, name, size=100, after=None, examples=0, examples_source=None,
             order_by: str = None, order: str = 'desc'):
-        """
+        '''
         Adds a requests for values of the given quantity.
         It allows to scroll through all values via elasticsearch's
         composite aggregations. The response will contain the quantity values and
@@ -564,12 +735,12 @@ class SearchRequest:
                 value bucket is used.
             order:
                 "desc" or "asc"
-        """
+        '''
         if size is None:
             size = 100
 
-        quantity = Domain.get_quantity(name)
-        terms = A('terms', field=quantity.qualified_elastic_field)
+        quantity = search_quantities[name]
+        terms = A('terms', field=quantity.es_quantity)
 
         # We are using elastic searchs 'composite aggregations' here. We do not really
         # compose aggregations, but only those pseudo composites allow us to use the
@@ -597,36 +768,36 @@ class SearchRequest:
         return self
 
     def exclude(self, *args):
-        """ Exclude certain elastic fields from the search results. """
+        ''' Exclude certain elastic fields from the search results. '''
         self._search = self._search.source(excludes=args)
         return self
 
     def include(self, *args):
-        """ Include only the given fields in the search results. """
+        ''' Include only the given fields in the search results. '''
         self._search = self._search.source(includes=args)
         return self
 
     def execute(self):
-        """
+        '''
         Exectutes without returning actual results. Only makes sense if the request
         was configured for statistics or quantity values.
-        """
+        '''
         return self._response(self._search.query(self.q)[0:0].execute())
 
     def execute_scan(self, order_by: str = None, order: int = -1, **kwargs):
-        """
+        '''
         This execute the search as scan. The result will be a generator over the found
         entries. Everything but the query part of this object, will be ignored.
-        """
+        '''
         search = self._search.query(self.q)
 
         if order_by is not None:
-            order_by_quantity = Domain.get_quantity(order_by)
+            order_by_quantity = search_quantities[order_by]
 
             if order == 1:
-                search = search.sort(order_by_quantity.qualified_elastic_field)
+                search = search.sort(order_by_quantity.es_quantity)
             else:
-                search = search.sort('-%s' % order_by_quantity.qualified_elastic_field)
+                search = search.sort('-%s' % order_by_quantity.es_quantity)
 
             search = search.params(preserve_order=True)
 
@@ -636,7 +807,7 @@ class SearchRequest:
     def execute_paginated(
             self, page: int = 1, per_page=10, order_by: str = None,
             order: int = -1):
-        """
+        '''
         Executes the search and returns paginated results. Those are sorted.
 
         Arguments:
@@ -644,21 +815,22 @@ class SearchRequest:
             per_page: The number of entries per page.
             order_by: The quantity to order by.
             order: -1 or 1 for descending or ascending order.
-        """
+        '''
         if order_by is None:
-            order_by = order_default_quantities[self._domain]
+            order_by_quantity = order_default_quantities[self._domain]
+        else:
+            order_by_quantity = search_quantities[order_by]
 
         search = self._search.query(self.q)
 
-        order_by_quantity = Domain.get_quantity(order_by)
-
         if order == 1:
-            search = search.sort(order_by_quantity.qualified_elastic_field)
+            search = search.sort(order_by_quantity.es_quantity)
         else:
-            search = search.sort('-%s' % order_by_quantity.qualified_elastic_field)
+            search = search.sort('-%s' % order_by_quantity.es_quantity)
         search = search[(page - 1) * per_page: page * per_page]
 
         es_result = search.execute()
+
         result = self._response(es_result, with_hits=True)
 
         result.update(pagination=dict(total=result['total'], page=page, per_page=per_page))
@@ -667,7 +839,7 @@ class SearchRequest:
     def execute_scrolled(
             self, scroll_id: str = None, size: int = 1000, scroll: str = u'5m',
             order_by: str = None, order: int = -1):
-        """
+        '''
         Executes a scrolling search. based on ES scroll API. Pagination is replaced with
         scrolling, no ordering is available, no statistics, no quantities will be provided.
 
@@ -687,7 +859,7 @@ class SearchRequest:
                 to this method) in ES time units. Default is 5 minutes.
 
         TODO support order and order_by
-        """
+        '''
         es = infrastructure.elastic_client
 
         if scroll_id is None:
@@ -726,11 +898,11 @@ class SearchRequest:
         return dict(scroll=scroll_info, results=results)
 
     def _response(self, response, with_hits: bool = False) -> Dict[str, Any]:
-        """
+        '''
         Prepares a response object covering the total number of results, hits, statistics,
         and quantities. Other aspects like pagination and scrolling have to be added
         elsewhere.
-        """
+        '''
         result: Dict[str, Any] = dict()
         aggs = response.aggregations.to_dict()
 
@@ -809,24 +981,25 @@ class SearchRequest:
 
 
 def to_calc_with_metadata(results: List[Dict[str, Any]]):
-    """ Translates search results into :class:`CalcWithMetadata` objects read from mongo. """
+    ''' Translates search results into :class:`EntryMetadata` objects read from mongo. '''
     ids = [result['calc_id'] for result in results]
     return [
-        datamodel.CalcWithMetadata(**calc.metadata)
+        datamodel.EntryMetadata.m_from_dict(calc.metadata)
         for calc in proc.Calc.objects(calc_id__in=ids)]
 
 
 def flat(obj, prefix=None):
-    """
+    '''
     Helper that translates nested result objects into flattened dicts with
     ``domain.quantity`` as keys.
-    """
+    '''
     if isinstance(obj, dict):
         result = {}
         for key, value in obj.items():
             if isinstance(value, dict):
+                value = flat(value)
                 for child_key, child_value in value.items():
-                    result['%s.%s' % (key, child_key)] = flat(child_value)
+                    result['%s.%s' % (key, child_key)] = child_value
 
             else:
                 result[key] = value
diff --git a/nomad/utils.py b/nomad/utils.py
index 1cdb8c933b0956e586fbd2d4b499a86c675d13f2..c2a71853c3cbedad352c04f9dcd39a5ef198264e 100644
--- a/nomad/utils.py
+++ b/nomad/utils.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-"""
+'''
 .. autofunc::nomad.utils.create_uuid
 .. autofunc::nomad.utils.hash
 .. autofunc::nomad.utils.timer
@@ -31,7 +31,7 @@ Depending on the configuration all logs will also be send to a central logstash.
 .. autofunc::nomad.utils.create_uuid
 .. autofunc::nomad.utils.timer
 .. autofunc::nomad.utils.lnr
-"""
+'''
 
 from typing import List
 import base64
@@ -53,7 +53,7 @@ from datetime import timedelta
 from nomad import config
 
 default_hash_len = 28
-""" Length of hashes and hash-based ids (e.g. calc, upload) in nomad. """
+''' Length of hashes and hash-based ids (e.g. calc, upload) in nomad. '''
 
 
 def decode_handle_id(handle_str: str):
@@ -73,7 +73,7 @@ def decode_handle_id(handle_str: str):
 
 
 def hash(*args, length: int = default_hash_len) -> str:
-    """ Creates a websave hash of the given length based on the repr of the given arguments. """
+    ''' Creates a websave hash of the given length based on the repr of the given arguments. '''
     hash = hashlib.sha512()
     for arg in args:
         hash.update(str(arg).encode('utf-8'))
@@ -82,7 +82,7 @@ def hash(*args, length: int = default_hash_len) -> str:
 
 
 def make_websave(hash, length: int = default_hash_len) -> str:
-    """ Creates a websave string for a hashlib hash object. """
+    ''' Creates a websave string for a hashlib hash object. '''
     if length > 0:
         return base64.b64encode(hash.digest(), altchars=b'-_')[:length].decode('utf-8')
     else:
@@ -90,30 +90,30 @@ def make_websave(hash, length: int = default_hash_len) -> str:
 
 
 def base64_encode(string):
-    """
+    '''
     Removes any `=` used as padding from the encoded string.
-    """
+    '''
     encoded = base64.urlsafe_b64encode(string).decode('utf-8')
     return encoded.rstrip("=")
 
 
 def base64_decode(string):
-    """
+    '''
     Adds back in the required padding before decoding.
-    """
+    '''
     padding = 4 - (len(string) % 4)
     bytes = (string + ("=" * padding)).encode('utf-8')
     return base64.urlsafe_b64decode(bytes)
 
 
 def sanitize_logevent(event: str) -> str:
-    """
+    '''
     Prepares a log event or message for analysis in elastic stack. It removes numbers,
     list, and matrices of numbers from the event string and limits its size. The
     goal is to make it easier to define aggregations over events by using event
     strings as representatives for event classes rather than event instances (with
     concrete numbers, etc).
-    """
+    '''
     sanitized_event = event[:120]
     sanitized_event = re.sub(r'(\d*\.\d+|\d+(\.\d*)?)', 'X', sanitized_event)
     sanitized_event = re.sub(r'((\[|\()\s*)?X\s*(,\s*X)+(\s*(\]|\)))?', 'L', sanitized_event)
@@ -123,7 +123,7 @@ def sanitize_logevent(event: str) -> str:
 
 @contextmanager
 def legacy_logger(logger):
-    """ Context manager that makes the given logger the logger for legacy log entries. """
+    ''' Context manager that makes the given logger the logger for legacy log entries. '''
     LogstashHandler.legacy_logger = logger
     try:
         yield
@@ -132,14 +132,14 @@ def legacy_logger(logger):
 
 
 class LogstashHandler(logstash.TCPLogstashHandler):
-    """
+    '''
     A log handler that emits records to logstash. It also filters logs for being
     structlog entries. All other entries are diverted to a global `legacy_logger`.
     This legacy logger is supposed to be a structlog logger that turns legacy
     records into structlog entries with reasonable binds depending on the current
     execution context (e.g. parsing/normalizing, etc.). If no legacy logger is
     set, they get emitted as usual (e.g. non nomad logs, celery, dbs, etc.)
-    """
+    '''
 
     legacy_logger = None
 
@@ -349,15 +349,15 @@ def configure_logging():
 
 
 def create_uuid() -> str:
-    """ Returns a web-save base64 encoded random uuid (type 4). """
+    ''' Returns a web-save base64 encoded random uuid (type 4). '''
     return base64.b64encode(uuid.uuid4().bytes, altchars=b'-_').decode('utf-8')[0:-2]
 
 
 def get_logger(name, **kwargs):
-    """
+    '''
     Returns a structlog logger that is already attached with a logstash handler.
     Use additional *kwargs* to pre-bind some values to all events.
-    """
+    '''
     if name.startswith('nomad.'):
         name = '.'.join(name.split('.')[:2])
 
@@ -367,14 +367,14 @@ def get_logger(name, **kwargs):
 
 @contextmanager
 def lnr(logger, event, **kwargs):
-    """
+    '''
     A context manager that Logs aNd Raises all exceptions with the given logger.
 
     Arguments:
         logger: The logger that should be used for logging exceptions.
         event: the log message
         **kwargs: additional properties for the structured log
-    """
+    '''
     try:
         yield
     except HTTPException as e:
@@ -387,7 +387,7 @@ def lnr(logger, event, **kwargs):
 
 @contextmanager
 def timer(logger, event, method='info', **kwargs):
-    """
+    '''
     A context manager that takes execution time and produces a log entry with said time.
 
     Arguments:
@@ -399,7 +399,7 @@ def timer(logger, event, method='info', **kwargs):
 
     Returns:
         The method yields a dictionary that can be used to add further log data.
-    """
+    '''
     start = time.time()
 
     try:
@@ -441,15 +441,15 @@ def to_tuple(self, *args):
 
 
 def chunks(list, n):
-    """ Chunks up the given list into parts of size n. """
+    ''' Chunks up the given list into parts of size n. '''
     for i in range(0, len(list), n):
         yield list[i:i + n]
 
 
 class POPO(dict):
-    """
+    '''
     A dict subclass that uses attributes as key/value pairs.
-    """
+    '''
     def __init__(self, **kwargs):
         super().__init__(**kwargs)
 
@@ -470,10 +470,10 @@ class POPO(dict):
 
 
 class SleepTimeBackoff:
-    """
+    '''
     Provides increasingly larger sleeps. Useful when
     observing long running processes with unknown runtime.
-    """
+    '''
 
     def __init__(self, start_time: float = 0.1, max_time: float = 5):
         self.current_time = start_time
@@ -517,10 +517,10 @@ class ETA:
 
 
 def common_prefix(paths):
-    """
+    '''
     Computes the longest common file path prefix (with respect to '/' separated segments).
     Returns empty string is ne common prefix exists.
-    """
+    '''
     common_prefix = None
 
     for path in paths:
diff --git a/tests/__init__.py b/tests/__init__.py
index e48f987ae9f076668b44484fd972c9599a585b40..05d19b7d8a11b9f922d7200eb9155c74f5dc3b9d 100644
--- a/tests/__init__.py
+++ b/tests/__init__.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-"""
+'''
 The nomad@FAIRDI tests are based on the pytest library. Pytest uses *fixtures* to
 modularize setup and teardown of mocks, infrastructure, and other context objects.
 The following depicts the used hierarchy of fixtures:
@@ -20,7 +20,7 @@ The following depicts the used hierarchy of fixtures:
 .. image:: test_fixtures.png
 
 Otherwise the test submodules follow the names of the nomad code modules.
-"""
+'''
 
 from nomad import config
 
diff --git a/tests/app/resource.py b/tests/app/resource.py
index 877031f9df65f1d5c7e5bf12b4549f8d7df9a216..7215b4bee77b5a8f06723e6f49118213aa381726 100644
--- a/tests/app/resource.py
+++ b/tests/app/resource.py
@@ -12,10 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-"""
+'''
 API endpoints that cause various scenerios to test general API aspects like logging,
 error handling, etc.
-"""
+'''
 
 from flask_restplus import Resource
 
diff --git a/tests/app/test_api.py b/tests/app/test_api.py
index 041825488e1c32842c85a683c3e8c912075ce409..5d37a087470ff03c05b11d6e891e6885a1d7128b 100644
--- a/tests/app/test_api.py
+++ b/tests/app/test_api.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from typing import Any
+from typing import Any, Iterable
 import pytest
 import time
 import json
@@ -30,7 +30,7 @@ from nomad.app.api.auth import generate_upload_token
 from nomad import search, parsing, files, config, utils, infrastructure
 from nomad.files import UploadFiles, PublicUploadFiles
 from nomad.processing import Upload, Calc, SUCCESS
-from nomad.datamodel import UploadWithMetadata, CalcWithMetadata, User, Dataset
+from nomad.datamodel import EntryMetadata, User, Dataset
 
 from tests.conftest import create_auth_headers, clear_elastic, create_test_structure
 from tests.test_files import example_file, example_file_mainfile, example_file_contents
@@ -56,12 +56,11 @@ def test_user_signature_token(api, test_user_auth):
     return json.loads(rv.data)['signature_token']
 
 
-def get_upload_with_metadata(upload: dict) -> UploadWithMetadata:
-    """ Create a :class:`UploadWithMetadata` from a API upload json record. """
-    return UploadWithMetadata(
-        upload_id=upload['upload_id'], calcs=[
-            CalcWithMetadata(domain='dft', calc_id=calc['calc_id'], mainfile=calc['mainfile'])
-            for calc in upload['calcs']['results']])
+def get_upload_entries_metadata(upload: dict) -> Iterable[EntryMetadata]:
+    ''' Create a iterable of :class:`EntryMetadata` from a API upload json record. '''
+    return [
+        EntryMetadata(domain='dft', calc_id=entry['calc_id'], mainfile=entry['mainfile'])
+        for entry in upload['calcs']['results']]
 
 
 def assert_zip_file(rv, files: int = -1, basename: bool = None):
@@ -233,16 +232,14 @@ class TestUploads:
             upload = self.assert_upload(rv.data)
             assert len(upload['calcs']['results']) == 1
 
-        upload_with_metadata = get_upload_with_metadata(upload)
-        assert_upload_files(upload_with_metadata, files.StagingUploadFiles)
-        assert_search_upload(upload_with_metadata, additional_keys=['atoms', 'dft.system'])
+        entries = get_upload_entries_metadata(upload)
+        assert_upload_files(upload_id, entries, files.StagingUploadFiles)
+        assert_search_upload(entries, additional_keys=['atoms', 'dft.system'])
 
     def assert_published(self, api, test_user_auth, upload_id, proc_infra, metadata={}):
         rv = api.get('/uploads/%s' % upload_id, headers=test_user_auth)
         upload = self.assert_upload(rv.data)
 
-        upload_with_metadata = get_upload_with_metadata(upload)
-
         rv = api.post(
             '/uploads/%s' % upload_id,
             headers=test_user_auth,
@@ -263,10 +260,22 @@ class TestUploads:
         assert upload_proc is not None
         assert upload_proc.published is True
         assert upload_proc.embargo_length == min(36, metadata.get('embargo_length', 36))
-        upload_with_metadata = upload_proc.to_upload_with_metadata()
+        entries = upload_proc.entries_metadata()
 
-        assert_upload_files(upload_with_metadata, files.PublicUploadFiles, published=True)
-        assert_search_upload(upload_with_metadata, additional_keys=additional_keys, published=True)
+        for entry in entries:
+            for key, transform in {
+                    'comment': lambda e: e.comment,
+                    'with_embargo': lambda e: e.with_embargo,
+                    'references': lambda e: e.references,
+                    'coauthors': lambda e: [u.user_id for u in e.coauthors],
+                    '_uploader': lambda e: e.uploader.user_id,
+                    '_pid': lambda e: e.pid,
+                    'external_id': lambda e: e.external_id}.items():
+                if key in metadata:
+                    assert transform(entry) == metadata[key], key
+
+        assert_upload_files(upload_id, entries, files.PublicUploadFiles, published=True)
+        assert_search_upload(entries, additional_keys=additional_keys, published=True)
 
     def block_until_completed(self, api, upload_id: str, test_user_auth):
         while True:
@@ -504,6 +513,7 @@ class TestUploads:
 
 
 today = datetime.datetime.utcnow().date()
+today_datetime = datetime.datetime(*today.timetuple()[:6])
 
 
 class UploadFilesBasedTests:
@@ -590,9 +600,9 @@ class UploadFilesBasedTests:
         calc_specs = 'r' if restricted else 'p'
         Upload.create(user=test_user, upload_id='test_upload')
         if in_staging:
-            _, upload_files = create_staging_upload('test_upload', calc_specs=calc_specs)
+            _, _, upload_files = create_staging_upload('test_upload', calc_specs=calc_specs)
         else:
-            _, upload_files = create_public_upload('test_upload', calc_specs=calc_specs)
+            _, _, upload_files = create_public_upload('test_upload', calc_specs=calc_specs)
 
         yield 'test_upload', authorized, auth_headers
 
@@ -697,34 +707,35 @@ class TestRepo():
             dataset_id='ds_id', name='ds_name', user_id=test_user.user_id, doi='ds_doi')
         example_dataset.m_x('me').create()
 
-        calc_with_metadata = CalcWithMetadata(
-            domain='dft', upload_id='example_upload_id', calc_id='0', upload_time=today)
-        calc_with_metadata.files = ['test/mainfile.txt']
-        calc_with_metadata.apply_domain_metadata(normalized)
+        entry_metadata = EntryMetadata(
+            domain='dft', upload_id='example_upload_id', calc_id='0', upload_time=today_datetime)
+        entry_metadata.files = ['test/mainfile.txt']
+        entry_metadata.apply_domain_metadata(normalized)
 
-        calc_with_metadata.update(datasets=[example_dataset.dataset_id])
+        entry_metadata.m_update(datasets=[example_dataset.dataset_id])
 
-        calc_with_metadata.update(
+        entry_metadata.m_update(
             calc_id='1', uploader=test_user.user_id, published=True, with_embargo=False)
-        search.Entry.from_calc_with_metadata(calc_with_metadata).save(refresh=True)
+        search.create_entry(entry_metadata).save(refresh=True)
 
-        calc_with_metadata.update(
+        entry_metadata.m_update(
             calc_id='2', uploader=other_test_user.user_id, published=True,
-            with_embargo=False, pid=2, upload_time=today - datetime.timedelta(days=5),
+            with_embargo=False, pid=2, upload_time=today_datetime - datetime.timedelta(days=5),
             external_id='external_2')
-        calc_with_metadata.update(
-            atoms=['Fe'], comment='this is a specific word', formula='AAA', basis_set='zzz')
-        search.Entry.from_calc_with_metadata(calc_with_metadata).save(refresh=True)
+        entry_metadata.m_update(
+            atoms=['Fe'], comment='this is a specific word', formula='AAA')
+        entry_metadata.dft.basis_set = 'zzz'
+        search.create_entry(entry_metadata).save(refresh=True)
 
-        calc_with_metadata.update(
+        entry_metadata.m_update(
             calc_id='3', uploader=other_test_user.user_id, published=False,
             with_embargo=False, pid=3, external_id='external_3')
-        search.Entry.from_calc_with_metadata(calc_with_metadata).save(refresh=True)
+        search.create_entry(entry_metadata).save(refresh=True)
 
-        calc_with_metadata.update(
+        entry_metadata.m_update(
             calc_id='4', uploader=other_test_user.user_id, published=True,
             with_embargo=True, pid=4, external_id='external_4')
-        search.Entry.from_calc_with_metadata(calc_with_metadata).save(refresh=True)
+        search.create_entry(entry_metadata).save(refresh=True)
 
         yield
 
@@ -780,28 +791,27 @@ class TestRepo():
         assert rv.status_code == 404
 
     def test_search_datasets(self, api, example_elastic_calcs, no_warn, other_test_user_auth):
-        rv = api.get('/repo/?owner=all&datasets=true', headers=other_test_user_auth)
+        rv = api.get('/repo/?owner=all&group_datasets=true', headers=other_test_user_auth)
         data = self.assert_search(rv, 4)
 
         datasets = data.get('datasets', None)
         assert datasets is not None
         values = datasets['values']
         assert values['ds_id']['total'] == 4
-        assert values['ds_id']['examples'][0]['datasets'][0]['id'] == 'ds_id'
+        assert values['ds_id']['examples'][0]['datasets'][0]['dataset_id'] == 'ds_id'
         assert 'after' in datasets
         assert 'datasets' in data['statistics']['total']['all']
         assert data['statistics']['total']['all']['datasets'] > 0
 
     def test_search_uploads(self, api, example_elastic_calcs, no_warn, other_test_user_auth):
-        rv = api.get('/repo/?owner=all&uploads=true', headers=other_test_user_auth)
+        rv = api.get('/repo/?owner=all&group_uploads=true', headers=other_test_user_auth)
         data = self.assert_search(rv, 4)
 
         uploads = data.get('uploads', None)
         assert uploads is not None
         values = uploads['values']
-        # the 4 uploads have "example upload id", but 3 have newer upload time. Therefore,
-        # only 3 calc will be in the last (and therefore used) bucket of 'example_upload_id'.
-        assert values['example_upload_id']['total'] == 3
+
+        assert values['example_upload_id']['total'] == 4
         assert values['example_upload_id']['examples'][0]['upload_id'] == 'example_upload_id'
         assert 'after' in uploads
         assert 'uploads' in data['statistics']['total']['all']
@@ -930,10 +940,10 @@ class TestRepo():
     def test_search_aggregation_metrics(self, api, example_elastic_calcs, no_warn, metrics):
         rv = api.get('/repo/?%s' % urlencode({
             'metrics': metrics,
-            'statistics': True,
-            'dft.groups': True,
-            'datasets': True,
-            'uploads': True}, doseq=True))
+            'group_statistics': True,
+            'group_dft.groups': True,
+            'group_datasets': True,
+            'group_uploads': True}, doseq=True))
 
         assert rv.status_code == 200
         data = json.loads(rv.data)
@@ -1169,10 +1179,10 @@ class TestEditRepo():
             create_test_structure(meta_info, id, 2, 1, [], 0, metadata=metadata)
 
         entries = [
-            dict(calc_id='1', upload_id='upload_1', user=test_user, published=True, embargo=False),
-            dict(calc_id='2', upload_id='upload_2', user=test_user, published=True, embargo=True),
-            dict(calc_id='3', upload_id='upload_2', user=test_user, published=False, embargo=False),
-            dict(calc_id='4', upload_id='upload_3', user=other_test_user, published=True, embargo=False)]
+            dict(calc_id='1', upload_id='upload_1', user=test_user, published=True, with_embargo=False),
+            dict(calc_id='2', upload_id='upload_2', user=test_user, published=True, with_embargo=True),
+            dict(calc_id='3', upload_id='upload_2', user=test_user, published=False, with_embargo=False),
+            dict(calc_id='4', upload_id='upload_3', user=other_test_user, published=True, with_embargo=False)]
 
         i = 0
         for entry in entries:
@@ -1253,6 +1263,7 @@ class TestEditRepo():
             shared_with=[other_test_user.user_id])
         rv = self.perform_edit(**edit_data, query=dict(upload_id='upload_1'))
         result = json.loads(rv.data)
+        assert rv.status_code == 200
         actions = result.get('actions')
         for key in edit_data:
             assert key in actions
@@ -1393,7 +1404,7 @@ def test_edit_lift_embargo(api, published, other_test_user_auth):
                 }
             }
         }))
-    assert rv.status_code == 200
+    assert rv.status_code == 200, rv.data
     assert not Calc.objects(calc_id=example_calc.calc_id).first().metadata['with_embargo']
 
     Upload.get(published.upload_id).block_until_complete()
@@ -1780,13 +1791,13 @@ class TestDataset:
 
     @pytest.fixture()
     def example_dataset_with_entry(self, mongo, elastic, example_datasets):
-        calc = CalcWithMetadata(
+        entry_metadata = EntryMetadata(
             domain='dft', calc_id='1', upload_id='1', published=True, with_embargo=False,
             datasets=['1'])
         Calc(
             calc_id='1', upload_id='1', create_time=datetime.datetime.now(),
-            metadata=calc.to_dict()).save()
-        search.Entry.from_calc_with_metadata(calc).save()
+            metadata=entry_metadata.m_to_dict()).save()
+        search.create_entry(entry_metadata).save()
         search.refresh()
 
     def test_delete_dataset(self, api, test_user_auth, example_dataset_with_entry):
@@ -1818,12 +1829,12 @@ class TestDataset:
         assert rv.status_code == 400
 
     def test_assign_doi_unpublished(self, api, test_user_auth, example_datasets):
-        calc = CalcWithMetadata(
+        entry_metadata = EntryMetadata(
             domain='dft', calc_id='1', upload_id='1', published=False, with_embargo=False,
             datasets=['1'])
         Calc(
             calc_id='1', upload_id='1', create_time=datetime.datetime.now(),
-            metadata=calc.to_dict()).save()
+            metadata=entry_metadata.m_to_dict()).save()
         rv = api.post('/datasets/ds1', headers=test_user_auth)
         assert rv.status_code == 400
 
diff --git a/tests/app/test_optimade.py b/tests/app/test_optimade.py
index f63a7d7bd0d035e50ea9be6408aa8f9a52b921d0..c531bbf350f6aa2225b5200ce8b108ab1b819859 100644
--- a/tests/app/test_optimade.py
+++ b/tests/app/test_optimade.py
@@ -36,10 +36,10 @@ def test_get_entry(published: Upload):
         data = json.load(f)
     assert 'OptimadeEntry' in data
     search_result = search.SearchRequest().search_parameter('calc_id', calc_id).execute_paginated()['results'][0]
-    assert 'dft.optimade' in search.flat(search_result)
+    assert 'dft.optimade.chemical_formula_hill' in search.flat(search_result)
 
 
-def test_no_optimade(meta_info, elastic, api):
+def test_no_optimade(meta_info, mongo, elastic, api):
     create_test_structure(meta_info, 1, 2, 1, [], 0)
     create_test_structure(meta_info, 2, 2, 1, [], 0, optimade=False)
     search.refresh()
diff --git a/tests/bravado_flask.py b/tests/bravado_flask.py
index 2616a1bfe795d77392cfe407ee8920587dcb2b40..35c83c279aa641c461089659248b9acfd37d3cb7 100644
--- a/tests/bravado_flask.py
+++ b/tests/bravado_flask.py
@@ -25,7 +25,7 @@ class FlaskTestHttpClient(HttpClient):
         self._headers = headers
 
     def request(self, request_params, *args, **kwargs):
-        """
+        '''
         Taken from `bravado.http_client.HttpClient`.
 
         Args:
@@ -40,7 +40,7 @@ class FlaskTestHttpClient(HttpClient):
                 `bravado.http_future.HttpFuture`.
         Returns:
             `bravado_core.http_future.HttpFuture`: HTTP Future object
-        """
+        '''
         request_params.setdefault('headers', {}).update(self._headers)
         test_future = FlaskTestFutureAdapter(request_params, self._flask_client)
 
@@ -48,7 +48,7 @@ class FlaskTestHttpClient(HttpClient):
 
 
 class FlaskTestFutureAdapter:
-    """
+    '''
     Mimics a :class:`concurrent.futures.Future` for the purposes of making it work with
     Bravado's :class:`bravado.http_future.HttpFuture` when simulating calls to a Falcon API.
     Those calls will be validated by Bravado.
@@ -59,7 +59,7 @@ class FlaskTestFutureAdapter:
         falcon_api (`falcon.API`): API object to send the request to.
         response_encoding (str): Encoding that will be used to decode response's body.
             If set to None then the body won't be decoded.
-    """
+    '''
 
     def __init__(self, request_params, flask_client, response_encoding='utf-8'):
         self._flask_client = flask_client
@@ -70,10 +70,10 @@ class FlaskTestFutureAdapter:
         self.connection_errors = None
 
     def result(self, **_):
-        """
+        '''
         Args:
             **_: Ignore all the keyword arguments (right now it's just timeout) passed by Bravado.
-        """
+        '''
         # Bravado will create the URL by appending request path to 'http://localhost'
         path = self._request_params['url'].replace('http://localhost', '')
         method = self._request_params.get('method')
@@ -100,54 +100,54 @@ class FlaskTestFutureAdapter:
 
 
 class FlaskTestResponseAdapter(IncomingResponse):
-    """
+    '''
     Wraps a response from Falcon test client to provide a uniform interface
     expected by Bravado's :class:`bravado.http_future.HttpFuture`.
     Args:
         flask_response: Response to a call simulated with flask's test client.
-    """
+    '''
 
     def __init__(self, flask_response):
         self._response = flask_response
 
     @property
     def status_code(self):
-        """
+        '''
         Returns:
             int: HTTP status code
-        """
+        '''
         return self._response.status_code
 
     @property
     def text(self):
-        """
+        '''
         Returns:
             str: Textual representation of the response's body.
-        """
+        '''
         return self._response.data
 
     @property
     def reason(self):
-        """
+        '''
         Returns:
             str: Reason-phrase of the HTTP response (e.g. "OK", or "Not Found")
-        """
+        '''
         # status codes from Falcon look like this: "200 OK"
         return self._response.status[4:]
 
     @property
     def headers(self):
-        """
+        '''
         Returns:
             dict: Headers attached to the response.
-        """
+        '''
         return self._response.headers
 
     def json(self, **kwargs):
-        """
+        '''
         Args:
             **kwargs: This is a part of the interface, but we don't do anything with it.
         Returns:
             dict: JSON representation of the response's body.
-        """
+        '''
         return json.loads(self._response.data)
diff --git a/tests/conftest.py b/tests/conftest.py
index 9584b3dd8e23bac1dc6be5b091a1ab03a22d83b9..ff8713b2c870e656fcc40a3718e0f59e558b560f 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -35,7 +35,7 @@ from nomadcore.local_meta_info import loadJsonFile
 import nomad_meta_info
 
 from nomad import config, infrastructure, parsing, processing, app, search, utils
-from nomad.datamodel import User, CalcWithMetadata
+from nomad.datamodel import User, EntryMetadata
 from nomad.parsing import LocalBackend
 
 from tests import test_parsing, test_normalizing
@@ -77,7 +77,7 @@ def raw_files_infra():
 
 @pytest.fixture(scope='function')
 def raw_files(raw_files_infra):
-    """ Provides cleaned out files directory structure per function. Clears files after test. """
+    ''' Provides cleaned out files directory structure per function. Clears files after test. '''
     directories = [config.fs.staging, config.fs.public, config.fs.tmp]
     for directory in directories:
         if not os.path.exists(directory):
@@ -123,10 +123,10 @@ def celery_config():
 
 @pytest.fixture(scope='session')
 def purged_app(celery_session_app):
-    """
+    '''
     Purges all pending tasks of the celery app before test. This is necessary to
     remove tasks from the queue that might be 'left over' from prior tests.
-    """
+    '''
     celery_session_app.control.purge()
     yield celery_session_app
 
@@ -140,7 +140,7 @@ def celery_inspect(purged_app):
 # 'bleeding' into successive tests.
 @pytest.fixture(scope='function')
 def worker(mongo, celery_session_worker, celery_inspect):
-    """ Provides a clean worker (no old tasks) per function. Waits for all tasks to be completed. """
+    ''' Provides a clean worker (no old tasks) per function. Waits for all tasks to be completed. '''
     yield
 
     # wait until there no more active tasks, to leave clean worker and queues for the next
@@ -164,7 +164,7 @@ def mongo_infra(monkeysession):
 
 @pytest.fixture(scope='function')
 def mongo(mongo_infra):
-    """ Provides a cleaned mocked mongo per function. """
+    ''' Provides a cleaned mocked mongo per function. '''
     # Some test cases need to reset the database connection
     if infrastructure.mongo_client != mongo_infra:
         mongo_infra = infrastructure.mongo_client
@@ -174,7 +174,7 @@ def mongo(mongo_infra):
 
 @pytest.fixture(scope='session')
 def elastic_infra(monkeysession):
-    """ Provides elastic infrastructure to the session """
+    ''' Provides elastic infrastructure to the session '''
     monkeysession.setattr('nomad.config.elastic.index_name', 'nomad_fairdi_test')
     try:
         return infrastructure.setup_elastic()
@@ -199,7 +199,7 @@ def clear_elastic(elastic):
 
 @pytest.fixture(scope='function')
 def elastic(elastic_infra):
-    """ Provides a clean elastic per function. Clears elastic before test. """
+    ''' Provides a clean elastic per function. Clears elastic before test. '''
     clear_elastic(elastic_infra)
 
     assert infrastructure.elastic_client is not None
@@ -280,7 +280,7 @@ def keycloak(monkeypatch):
 
 @pytest.fixture(scope='function')
 def proc_infra(worker, elastic, mongo, raw_files):
-    """ Combines all fixtures necessary for processing (elastic, worker, files, mongo) """
+    ''' Combines all fixtures necessary for processing (elastic, worker, files, mongo) '''
     return dict(elastic=elastic)
 
 
@@ -384,10 +384,10 @@ def with_warn(caplog):
     assert count > 0
 
 
-"""
+'''
 Fixture for mocked SMTP server for testing.
 Based on https://gist.github.com/akheron/cf3863cdc424f08929e4cb7dc365ef23.
-"""
+'''
 
 RecordedMessage = namedtuple(
     'RecordedMessage',
@@ -527,31 +527,38 @@ def example_user_metadata(other_test_user, test_user) -> dict:
     }
 
 
+@pytest.fixture(scope='module')
+def internal_example_user_metadata(example_user_metadata) -> dict:
+    return {
+        key[1:] if key[0] == '_' else key: value
+        for key, value in example_user_metadata.items()}
+
+
 @pytest.fixture(scope='session')
 def parsed(example_mainfile: Tuple[str, str]) -> parsing.LocalBackend:
-    """ Provides a parsed calculation in the form of a LocalBackend. """
+    ''' Provides a parsed calculation in the form of a LocalBackend. '''
     parser, mainfile = example_mainfile
     return test_parsing.run_parser(parser, mainfile)
 
 
 @pytest.fixture(scope='session')
 def parsed_ems() -> parsing.LocalBackend:
-    """ Provides a parsed experiment in the form of a LocalBackend. """
+    ''' Provides a parsed experiment in the form of a LocalBackend. '''
     return test_parsing.run_parser('parsers/skeleton', 'tests/data/parsers/skeleton/example.metadata.json')
 
 
 @pytest.fixture(scope='session')
 def normalized(parsed: parsing.LocalBackend) -> parsing.LocalBackend:
-    """ Provides a normalized calculation in the form of a LocalBackend. """
+    ''' Provides a normalized calculation in the form of a LocalBackend. '''
     return test_normalizing.run_normalize(parsed)
 
 
 @pytest.fixture(scope='function')
 def uploaded(example_upload: str, raw_files) -> Tuple[str, str]:
-    """
+    '''
     Provides a uploaded with uploaded example file and gives the upload_id.
     Clears files after test.
-    """
+    '''
     example_upload_id = os.path.basename(example_upload).replace('.zip', '')
     return example_upload_id, example_upload
 
@@ -565,9 +572,9 @@ def non_empty_uploaded(non_empty_example_upload: str, raw_files) -> Tuple[str, s
 @pytest.mark.timeout(config.tests.default_timeout)
 @pytest.fixture(scope='function')
 def processed(uploaded: Tuple[str, str], test_user: User, proc_infra) -> processing.Upload:
-    """
+    '''
     Provides a processed upload. Upload was uploaded with test_user.
-    """
+    '''
     return test_processing.run_processing(uploaded, test_user)
 
 
@@ -586,19 +593,19 @@ def processeds(non_empty_example_upload: str, test_user: User, proc_infra) -> Li
 @pytest.mark.timeout(config.tests.default_timeout)
 @pytest.fixture(scope='function')
 def non_empty_processed(non_empty_uploaded: Tuple[str, str], test_user: User, proc_infra) -> processing.Upload:
-    """
+    '''
     Provides a processed upload. Upload was uploaded with test_user.
-    """
+    '''
     return test_processing.run_processing(non_empty_uploaded, test_user)
 
 
 @pytest.mark.timeout(config.tests.default_timeout)
 @pytest.fixture(scope='function')
-def published(non_empty_processed: processing.Upload, example_user_metadata) -> processing.Upload:
-    """
+def published(non_empty_processed: processing.Upload, internal_example_user_metadata) -> processing.Upload:
+    '''
     Provides a processed upload. Upload was uploaded with test_user.
-    """
-    non_empty_processed.compress_and_set_metadata(example_user_metadata)
+    '''
+    non_empty_processed.compress_and_set_metadata(internal_example_user_metadata)
     non_empty_processed.publish_upload()
     try:
         non_empty_processed.block_until_complete(interval=.01)
@@ -611,9 +618,9 @@ def published(non_empty_processed: processing.Upload, example_user_metadata) ->
 @pytest.mark.timeout(config.tests.default_timeout)
 @pytest.fixture(scope='function')
 def published_wo_user_metadata(non_empty_processed: processing.Upload) -> processing.Upload:
-    """
+    '''
     Provides a processed upload. Upload was uploaded with test_user.
-    """
+    '''
     non_empty_processed.publish_upload()
     try:
         non_empty_processed.block_until_complete(interval=.01)
@@ -625,7 +632,7 @@ def published_wo_user_metadata(non_empty_processed: processing.Upload) -> proces
 
 @pytest.fixture
 def reset_config():
-    """ Fixture that resets configuration. """
+    ''' Fixture that resets configuration. '''
     service = config.service
     log_level = config.console_log_level
     yield None
@@ -636,14 +643,14 @@ def reset_config():
 
 @pytest.fixture
 def reset_infra(mongo, elastic):
-    """ Fixture that resets infrastructure after deleting db or search index. """
+    ''' Fixture that resets infrastructure after deleting db or search index. '''
     yield None
 
 
 def create_test_structure(
         meta_info, id: int, h: int, o: int, extra: List[str], periodicity: int,
         optimade: bool = True, metadata: dict = None):
-    """ Creates a calculation in Elastic and Mongodb with the given properties.
+    ''' Creates a calculation in Elastic and Mongodb with the given properties.
 
     Does require initialized :func:`elastic_infra` and :func:`mongo_infra`.
 
@@ -656,7 +663,7 @@ def create_test_structure(
         periodicity: The number of dimensions to repeat the structure in
         optimade: A boolean. Iff true the entry will have optimade metadata. Default is True.
         metadata: Additional (user) metadata.
-    """
+    '''
 
     atom_labels = ['H' for i in range(0, h)] + ['O' for i in range(0, o)] + extra
     test_vector = np.array([0, 0, 0])
@@ -679,19 +686,19 @@ def create_test_structure(
     backend.closeSection('section_run', 0)
 
     backend = run_normalize(backend)
-    calc = CalcWithMetadata(
+    calc = EntryMetadata(
         domain='dft', upload_id='test_uload_id', calc_id='test_calc_id_%d' % id,
         mainfile='test_mainfile', published=True, with_embargo=False)
     calc.apply_domain_metadata(backend)
     if metadata is not None:
-        calc.update(**metadata)
+        calc.m_update(**metadata)
 
     if not optimade:
-        calc.optimade = None  # type: ignore
+        calc.dft.optimade = None
 
-    proc_calc = processing.Calc.from_calc_with_metadata(calc)
+    proc_calc = processing.Calc.from_entry_metadata(calc)
     proc_calc.save()
-    search_entry = search.Entry.from_calc_with_metadata(calc)
+    search_entry = search.create_entry(calc)
     search_entry.save()
 
     assert processing.Calc.objects(calc_id__in=[calc.calc_id]).count() == 1
diff --git a/tests/data/parsers/octopus/stdout.txt b/tests/data/parsers/octopus/stdout.txt
index 2b43895be9dd549f5fbadb074ab657f0b9ccf44a..94b5baabf3b5b659be5aa5f48b5100bcde7d7290 100644
--- a/tests/data/parsers/octopus/stdout.txt
+++ b/tests/data/parsers/octopus/stdout.txt
@@ -7,7 +7,7 @@
                    _.._     |0) ~ (0) |    _.---'`__.-( (_.
             __.--'`_.. '.__.\    '--. \_.-' ,.--'`     `""`
            ( ,.--'`   ',__ /./;   ;, '.__.'`    __
-           _`) )  .---.__.' / |   |\   \__..--""  """--.,_
+           _`) )  .---.__.' / |   |\   \__..--""  '''--.,_
           `---' .'.''-._.-'`_./  /\ '.  \ _.-~~~````~~~-._`-.__.'
                 | |  .' _.-' |  |  \  \  '.               `~---`
                  \ \/ .'     \  \   '. '-._)
diff --git a/tests/processing/test_data.py b/tests/processing/test_data.py
index 537aa2c84718c353c00790bd38824941a0a9f328..2a2f401d140c9514c09bcd3d0a2c8aa885fd52f0 100644
--- a/tests/processing/test_data.py
+++ b/tests/processing/test_data.py
@@ -129,9 +129,9 @@ def test_processing_with_large_dir(test_user, proc_infra):
         assert len(calc.warnings) == 1
 
 
-def test_publish(non_empty_processed: Upload, no_warn, example_user_metadata, monkeypatch):
+def test_publish(non_empty_processed: Upload, no_warn, internal_example_user_metadata, monkeypatch):
     processed = non_empty_processed
-    processed.compress_and_set_metadata(example_user_metadata)
+    processed.compress_and_set_metadata(internal_example_user_metadata)
 
     additional_keys = ['with_embargo']
 
@@ -141,17 +141,17 @@ def test_publish(non_empty_processed: Upload, no_warn, example_user_metadata, mo
     except Exception:
         pass
 
-    upload = processed.to_upload_with_metadata(example_user_metadata)
+    entries = processed.entries_metadata(internal_example_user_metadata)
 
-    assert_upload_files(upload, PublicUploadFiles, published=True)
-    assert_search_upload(upload, additional_keys, published=True)
+    assert_upload_files(processed.upload_id, entries, PublicUploadFiles, published=True)
+    assert_search_upload(entries, additional_keys, published=True)
 
-    assert_processing(Upload.get(upload.upload_id, include_published=True), published=True)
+    assert_processing(Upload.get(processed.upload_id, include_published=True), published=True)
 
 
-def test_republish(non_empty_processed: Upload, no_warn, example_user_metadata, monkeypatch):
+def test_republish(non_empty_processed: Upload, no_warn, internal_example_user_metadata, monkeypatch):
     processed = non_empty_processed
-    processed.compress_and_set_metadata(example_user_metadata)
+    processed.compress_and_set_metadata(internal_example_user_metadata)
 
     additional_keys = ['with_embargo']
 
@@ -162,20 +162,20 @@ def test_republish(non_empty_processed: Upload, no_warn, example_user_metadata,
     processed.publish_upload()
     processed.block_until_complete(interval=.01)
 
-    upload = processed.to_upload_with_metadata(example_user_metadata)
+    entries = processed.entries_metadata(internal_example_user_metadata)
 
-    assert_upload_files(upload, PublicUploadFiles, published=True)
-    assert_search_upload(upload, additional_keys, published=True)
+    assert_upload_files(processed.upload_id, entries, PublicUploadFiles, published=True)
+    assert_search_upload(entries, additional_keys, published=True)
 
 
 def test_publish_failed(
-        non_empty_uploaded: Tuple[str, str], example_user_metadata, test_user,
+        non_empty_uploaded: Tuple[str, str], internal_example_user_metadata, test_user,
         monkeypatch, proc_infra):
 
     mock_failure(Calc, 'parsing', monkeypatch)
 
     processed = run_processing(non_empty_uploaded, test_user)
-    processed.compress_and_set_metadata(example_user_metadata)
+    processed.compress_and_set_metadata(internal_example_user_metadata)
 
     additional_keys = ['with_embargo']
 
@@ -185,9 +185,9 @@ def test_publish_failed(
     except Exception:
         pass
 
-    upload = processed.to_upload_with_metadata(example_user_metadata)
+    entries = processed.entries_metadata(internal_example_user_metadata)
 
-    assert_search_upload(upload, additional_keys, published=True, processed=False)
+    assert_search_upload(entries, additional_keys, published=True, processed=False)
 
 
 @pytest.mark.timeout(config.tests.default_timeout)
@@ -211,7 +211,7 @@ def test_process_non_existing(proc_infra, test_user, with_error):
 
 @pytest.mark.timeout(config.tests.default_timeout)
 @pytest.mark.parametrize('with_failure', [None, 'before', 'after', 'not-matched'])
-def test_re_processing(published: Upload, example_user_metadata, monkeypatch, with_failure):
+def test_re_processing(published: Upload, internal_example_user_metadata, monkeypatch, with_failure):
     if with_failure == 'not-matched':
         monkeypatch.setattr('nomad.config.reprocess_unmatched', False)
 
@@ -249,7 +249,7 @@ def test_re_processing(published: Upload, example_user_metadata, monkeypatch, wi
     shutil.copyfile(
         raw_files, published.upload_files.join_file('raw-restricted.plain.zip').os_path)
 
-    upload = published.to_upload_with_metadata(example_user_metadata)
+    entries = published.entries_metadata(internal_example_user_metadata)
 
     # reprocess
     monkeypatch.setattr('nomad.config.version', 're_process_test_version')
@@ -292,10 +292,10 @@ def test_re_processing(published: Upload, example_user_metadata, monkeypatch, wi
         assert old_log_lines != new_log_lines
 
     # assert maintained user metadata (mongo+es)
-    assert_upload_files(upload, PublicUploadFiles, published=True)
-    assert_search_upload(upload, published=True)
+    assert_upload_files(published.upload_id, entries, PublicUploadFiles, published=True)
+    assert_search_upload(entries, published=True)
     if with_failure not in ['after', 'not-matched']:
-        assert_processing(Upload.get(upload.upload_id, include_published=True), published=True)
+        assert_processing(Upload.get(published.upload_id, include_published=True), published=True)
 
     # assert changed calc metadata (mongo)
     if with_failure not in ['after', 'not-matched']:
@@ -306,7 +306,7 @@ def test_re_processing(published: Upload, example_user_metadata, monkeypatch, wi
 
 @pytest.mark.timeout(config.tests.default_timeout)
 @pytest.mark.parametrize('with_failure', [None, 'before', 'after'])
-def test_re_pack(published: Upload, example_user_metadata, monkeypatch, with_failure):
+def test_re_pack(published: Upload, monkeypatch, with_failure):
     upload_id = published.upload_id
     calc = Calc.objects(upload_id=upload_id).first()
     assert calc.metadata['with_embargo']
@@ -403,6 +403,6 @@ def test_ems_data(proc_infra, test_user):
     assert upload.total_calcs == 1
     assert len(upload.calcs) == 1
 
-    upload_with_metadata = upload.to_upload_with_metadata()
-    assert_upload_files(upload_with_metadata, StagingUploadFiles, published=False)
-    assert_search_upload(upload_with_metadata, additional_keys, published=False)
+    entries = upload.entries_metadata()
+    assert_upload_files(upload.upload_id, entries, StagingUploadFiles, published=False)
+    assert_search_upload(entries, additional_keys, published=False)
diff --git a/tests/test_client.py b/tests/test_client.py
index c2ee35226d5f19368b162abfec3342e2abe745c7..354bca0f0cd0318be179021f26f97be865896764 100644
--- a/tests/test_client.py
+++ b/tests/test_client.py
@@ -15,7 +15,7 @@
 import time
 
 from nomad.processing import SUCCESS
-from nomad.datamodel import CalcWithMetadata
+from nomad.datamodel import EntryMetadata
 
 from tests.test_files import example_file
 from tests.test_search import create_entry
@@ -37,8 +37,8 @@ def test_upload(bravado, proc_infra, no_warn):
 
 
 def test_get_repo_calc(bravado, proc_infra, raw_files):
-    create_entry(CalcWithMetadata(
-        domain='dft', calc_id=0, upload_id='test_upload', published=True, with_embargo=False))
+    create_entry(EntryMetadata(
+        domain='dft', calc_id='0', upload_id='test_upload', published=True, with_embargo=False))
     repo = bravado.repo.get_repo_calc(upload_id='test_upload', calc_id='0').response().result
     assert repo is not None
     assert repo['calc_id'] is not None
diff --git a/tests/test_datamodel.py b/tests/test_datamodel.py
index f00ea36b5b0e5e6cc6bd81a2f9d30e0996dfdabd..2125b99d895cf60d334556763a0ae40ae168e056 100644
--- a/tests/test_datamodel.py
+++ b/tests/test_datamodel.py
@@ -12,9 +12,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-"""
+'''
 A generator for random test calculations.
-"""
+'''
 
 import random
 from essential_generators import DocumentGenerator
@@ -65,49 +65,50 @@ def _gen_ref():
     return random.choice(references)
 
 
-def generate_calc(pid: int = 0, calc_id: str = None, upload_id: str = None) -> datamodel.CalcWithMetadata:
+def generate_calc(pid: int = 0, calc_id: str = None, upload_id: str = None) -> datamodel.EntryMetadata:
     random.seed(pid)
 
-    self = datamodel.DFTCalcWithMetadata()
-
-    self.upload_id = upload_id if upload_id is not None else utils.create_uuid()
-    self.calc_id = calc_id if calc_id is not None else utils.create_uuid()
-
-    self.upload_time = datetime.datetime.utcnow()
-    self.calc_hash = utils.create_uuid()
-    self.pid = pid
-    self.mainfile = random.choice(filepaths)
-    self.files = list([self.mainfile] + random.choices(filepaths, k=random.choice(low_numbers_for_files)))
-    self.uploader = _gen_user()
-
-    self.with_embargo = random.choice([True, False])
-    self.published = True
-    self.coauthors = list(_gen_user() for _ in range(0, random.choice(low_numbers_for_refs_and_datasets)))
-    self.shared_with = list(_gen_user() for _ in range(0, random.choice(low_numbers_for_refs_and_datasets)))
-    self.comment = random.choice(comments)
-    self.references = list(_gen_ref() for _ in range(0, random.choice(low_numbers_for_refs_and_datasets)))
-    self.datasets = list(
+    entry = datamodel.EntryMetadata()
+
+    entry.upload_id = upload_id if upload_id is not None else utils.create_uuid()
+    entry.calc_id = calc_id if calc_id is not None else utils.create_uuid()
+
+    entry.upload_time = datetime.datetime.utcnow()
+    entry.calc_hash = utils.create_uuid()
+    entry.pid = pid
+    entry.mainfile = random.choice(filepaths)
+    entry.files = list([entry.mainfile] + random.choices(filepaths, k=random.choice(low_numbers_for_files)))
+    entry.uploader = _gen_user()
+
+    entry.with_embargo = random.choice([True, False])
+    entry.published = True
+    entry.coauthors = list(_gen_user() for _ in range(0, random.choice(low_numbers_for_refs_and_datasets)))
+    entry.shared_with = list(_gen_user() for _ in range(0, random.choice(low_numbers_for_refs_and_datasets)))
+    entry.comment = random.choice(comments)
+    entry.references = list(_gen_ref() for _ in range(0, random.choice(low_numbers_for_refs_and_datasets)))
+    entry.datasets = list(
         _gen_dataset()
         for _ in range(0, random.choice(low_numbers_for_refs_and_datasets)))
 
-    self.atoms = list(random.choices(chemical_symbols[1:], k=random.choice(low_numbers_for_atoms)))
-    self.formula = ''.join('%s%d' % (atom, random.choice(low_numbers_for_atoms)) for atom in self.atoms)
-    self.formula = self.formula.replace('1', '')
+    entry.atoms = list(random.choices(chemical_symbols[1:], k=random.choice(low_numbers_for_atoms)))
+    entry.formula = ''.join('%s%d' % (atom, random.choice(low_numbers_for_atoms)) for atom in entry.atoms)
+    entry.formula = entry.formula.replace('1', '')
 
-    self.basis_set = random.choice(basis_sets)
-    self.xc_functional = random.choice(xc_functionals)
-    self.system = random.choice(systems)
-    self.crystal_system = random.choice(crystal_systems)
+    dft_metadata = entry.m_create(datamodel.DFTMetadata)
+    dft_metadata.basis_set = random.choice(basis_sets)
+    dft_metadata.xc_functional = random.choice(xc_functionals)
+    dft_metadata.system = random.choice(systems)
+    dft_metadata.crystal_system = random.choice(crystal_systems)
     spacegroup = random.randint(1, 225)
-    self.spacegroup = str(spacegroup)
-    self.spacegroup_symbol = Spacegroup(spacegroup).symbol
-    self.code_name = random.choice(codes)
-    self.code_version = '1.0.0'
+    dft_metadata.spacegroup = str(spacegroup)
+    dft_metadata.spacegroup_symbol = Spacegroup(spacegroup).symbol
+    dft_metadata.code_name = random.choice(codes)
+    dft_metadata.code_version = '1.0.0'
 
-    self.n_total_energies = random.choice(range(0, 5))
-    self.geometries = ['%d' % random.randint(1, 500), '%d' % random.randint(1, 500)]
+    dft_metadata.n_total_energies = random.choice(range(0, 5))
+    dft_metadata.geometries = ['%d' % random.randint(1, 500), '%d' % random.randint(1, 500)]
 
-    return self
+    return entry
 
 
 if __name__ == '__main__':
@@ -130,7 +131,6 @@ if __name__ == '__main__':
 
     for calcs_per_upload in utils.chunks(range(0, n_calcs), int(n_calcs / n_uploads)):
         upload_id = utils.create_uuid()
-        upload = datamodel.UploadWithMetadata(upload_id=upload_id)
         upload_files = files.StagingUploadFiles(
             upload_id=upload_id, create=True, is_authorized=lambda: True)
 
@@ -150,7 +150,7 @@ if __name__ == '__main__':
             with upload_files.archive_log_file(calc.calc_id, 'wt') as f:
                 f.write('this is a generated test file')
 
-            search_entry = search.Entry.from_calc_with_metadata(calc)
+            search_entry = search.Entry.from_entry_metadata(calc)
             search_entry.n_total_energies = random.choice(low_numbers_for_total_energies)
             search_entry.n_geometries = low_numbers_for_geometries
             for _ in range(0, random.choice(search_entry.n_geometries)):
@@ -160,11 +160,9 @@ if __name__ == '__main__':
             pid += 1
             calcs.append(calc)
 
-        upload.calcs = calcs
-
         bulk(
             infrastructure.elastic_client,
             [entry.to_dict(include_meta=True) for entry in search_entries])
 
-        upload_files.pack(upload)
+        upload_files.pack(calcs)
         upload_files.delete()
diff --git a/tests/test_files.py b/tests/test_files.py
index 62d071f4d82e847a3029788250dbf48ba0242cce..ce9d0cffa8952f9f7f294ec7ed517d86d1368a8b 100644
--- a/tests/test_files.py
+++ b/tests/test_files.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from typing import Generator, Any, Dict, Tuple
+from typing import Generator, Any, Dict, Tuple, Iterable
 import os
 import os.path
 import shutil
@@ -22,8 +22,7 @@ import itertools
 import zipfile
 import re
 
-from nomad import config
-from nomad.datamodel import UploadWithMetadata, CalcWithMetadata
+from nomad import config, datamodel
 from nomad.files import DirectoryObject, PathObject
 from nomad.files import StagingUploadFiles, PublicUploadFiles, UploadFiles, Restricted, \
     ArchiveBasedStagingUploadFiles
@@ -31,10 +30,10 @@ from nomad.files import StagingUploadFiles, PublicUploadFiles, UploadFiles, Rest
 from tests.utils import assert_exception
 
 
-CalcWithFiles = Tuple[CalcWithMetadata, str]
-UploadWithFiles = Tuple[UploadWithMetadata, UploadFiles]
-StagingUploadWithFiles = Tuple[UploadWithMetadata, StagingUploadFiles]
-PublicUploadWithFiles = Tuple[UploadWithMetadata, PublicUploadFiles]
+CalcWithFiles = Tuple[datamodel.EntryMetadata, str]
+UploadWithFiles = Tuple[str, Iterable[datamodel.EntryMetadata], UploadFiles]
+StagingUploadWithFiles = Tuple[str, Iterable[datamodel.EntryMetadata], StagingUploadFiles]
+PublicUploadWithFiles = Tuple[str, Iterable[datamodel.EntryMetadata], PublicUploadFiles]
 
 # example_file uses an artificial parser for faster test execution, can also be
 # changed to examples_vasp.zip for using vasp parser
@@ -56,7 +55,7 @@ example_data = dict(test_key='test_value')
 
 @pytest.fixture(scope='function', autouse=True)
 def raw_files_on_all_tests(raw_files):
-    """ Autouse fixture to apply raw_files to all tests. """
+    ''' Autouse fixture to apply raw_files to all tests. '''
     pass
 
 
@@ -125,9 +124,9 @@ example_calc_id = example_calc['calc_id']
 def generate_example_calc(
         calc_id: int, with_mainfile_prefix: bool, subdirectory: str = None,
         **kwargs) -> CalcWithFiles:
-    """ Generate an example calc with :class:`CalcWithMetadata` and rawfile. """
+    ''' Generate an example calc with :class:`EntryMetadata` and rawfile. '''
 
-    example_calc = CalcWithMetadata(domain='dft', calc_id=str(calc_id))
+    example_calc = datamodel.EntryMetadata(domain='dft', calc_id=str(calc_id))
 
     if with_mainfile_prefix:
         mainfile = '%d.template.json' % calc_id
@@ -138,7 +137,7 @@ def generate_example_calc(
         mainfile = os.path.join(subdirectory, mainfile)
 
     example_calc.mainfile = mainfile
-    example_calc.update(**kwargs)
+    example_calc.m_update(**kwargs)
 
     example_file = os.path.join(config.fs.tmp, 'example.zip')
     example_calc.files = []
@@ -209,8 +208,8 @@ class UploadFilesContract(UploadFilesFixtures):
         assert UploadFiles.get(empty_test_upload.upload_id).__class__ == empty_test_upload.__class__
 
     def test_rawfile(self, test_upload: UploadWithFiles):
-        upload, upload_files = test_upload
-        for calc in upload.calcs:
+        _, entries, upload_files = test_upload
+        for calc in entries:
             try:
                 for file_path in calc.files:
                     with upload_files.raw_file(file_path) as f:
@@ -222,8 +221,8 @@ class UploadFilesContract(UploadFilesFixtures):
                 assert calc.with_embargo
 
     def test_rawfile_size(self, test_upload: UploadWithFiles):
-        upload, upload_files = test_upload
-        for calc in upload.calcs:
+        _, entries, upload_files = test_upload
+        for calc in entries:
             try:
                 for file_path in calc.files:
                     assert upload_files.raw_file_size(file_path) > 0
@@ -235,13 +234,13 @@ class UploadFilesContract(UploadFilesFixtures):
 
     @pytest.mark.parametrize('prefix', [None, 'examples'])
     def test_raw_file_manifest(self, test_upload: UploadWithFiles, prefix: str):
-        _, upload_files = test_upload
+        _, _, upload_files = test_upload
         raw_files = list(upload_files.raw_file_manifest(path_prefix=prefix))
         assert_example_files(raw_files)
 
     @pytest.mark.parametrize('prefix', [None, 'examples_template'])
     def test_raw_file_list(self, test_upload: UploadWithFiles, prefix: str):
-        _, upload_files = test_upload
+        _, _, upload_files = test_upload
         raw_files = list(upload_files.raw_file_list(directory=prefix))
         if prefix is None:
             assert len(raw_files) == 0
@@ -256,8 +255,8 @@ class UploadFilesContract(UploadFilesFixtures):
 
     @pytest.mark.parametrize('test_logs', [True, False])
     def test_archive(self, test_upload: UploadWithFiles, test_logs: bool):
-        upload, upload_files = test_upload
-        calcs = upload.calcs_dict
+        _, entries, upload_files = test_upload
+        calcs_dict = {entry.calc_id: entry for entry in entries}
         try:
             if test_logs:
                 with upload_files.archive_log_file(example_calc_id, 'rt') as f:
@@ -267,26 +266,26 @@ class UploadFilesContract(UploadFilesFixtures):
                 assert json.load(f) == json.loads(example_archive_contents)
 
             if not upload_files._is_authorized():
-                assert not calcs.get(example_calc_id).with_embargo
+                assert not calcs_dict.get(example_calc_id).with_embargo
         except Restricted:
             assert not upload_files._is_authorized()
-            assert calcs.get(example_calc_id).with_embargo
+            assert calcs_dict.get(example_calc_id).with_embargo
 
     def test_archive_size(self, test_upload: UploadWithFiles):
-        upload, upload_files = test_upload
-        calcs = upload.calcs_dict
+        _, entries, upload_files = test_upload
+        calcs_dict = {entry.calc_id: entry for entry in entries}
         try:
             assert upload_files.archive_file_size(example_calc_id) > 0
 
             if not upload_files._is_authorized():
-                assert not calcs.get(example_calc_id).with_embargo
+                assert not calcs_dict.get(example_calc_id).with_embargo
         except Restricted:
             assert not upload_files._is_authorized()
-            assert calcs.get(example_calc_id).with_embargo
+            assert calcs_dict.get(example_calc_id).with_embargo
 
 
 def create_staging_upload(upload_id: str, calc_specs: str) -> StagingUploadWithFiles:
-    """
+    '''
     Create an upload according to given spec. Additional arguments are given to
     the StagingUploadFiles contstructor.
 
@@ -297,9 +296,8 @@ def create_staging_upload(upload_id: str, calc_specs: str) -> StagingUploadWithF
             The calcs will be copies of calcs in `example_file`.
             First calc is at top level, following calcs will be put under 1/, 2/, etc.
             All calcs with capital `P`/`R` will be put in the same directory under multi/.
-    """
+    '''
     upload_files = StagingUploadFiles(upload_id, create=True, is_authorized=lambda: True)
-    upload = UploadWithMetadata(upload_id=upload_id)
     calcs = []
 
     prefix = 0
@@ -327,8 +325,7 @@ def create_staging_upload(upload_id: str, calc_specs: str) -> StagingUploadWithF
         prefix += 1
 
     assert len(calcs) == len(calc_specs)
-    upload.calcs = calcs
-    return upload, upload_files
+    return upload_id, calcs, upload_files
 
 
 class TestStagingUploadFiles(UploadFilesContract):
@@ -353,27 +350,27 @@ class TestStagingUploadFiles(UploadFilesContract):
                     assert len(content) > 0
 
     def test_write_archive(self, test_upload: StagingUploadWithFiles):
-        _, upload_files = test_upload
+        _, _, upload_files = test_upload
         assert json.load(upload_files.archive_file(example_calc_id, 'rt')) == json.loads(example_archive_contents)
 
     def test_calc_id(self, test_upload: StagingUploadWithFiles):
-        _, upload_files = test_upload
+        _, _, upload_files = test_upload
         assert upload_files.calc_id(example_file_mainfile) is not None
 
     def test_pack(self, test_upload: StagingUploadWithFiles):
-        upload, upload_files = test_upload
-        upload_files.pack(upload)
+        _, entries, upload_files = test_upload
+        upload_files.pack(entries)
 
     @pytest.mark.parametrize('with_mainfile', [True, False])
     def test_calc_files(self, test_upload: StagingUploadWithFiles, with_mainfile):
-        upload, upload_files = test_upload
-        for calc in upload.calcs:
+        _, entries, upload_files = test_upload
+        for calc in entries:
             mainfile = calc.mainfile
             calc_files = upload_files.calc_files(mainfile, with_mainfile=with_mainfile)
             assert_example_files(calc_files, with_mainfile=with_mainfile)
 
     def test_delete(self, test_upload: StagingUploadWithFiles):
-        _, upload_files = test_upload
+        _, _, upload_files = test_upload
         upload_files.delete()
         assert not upload_files.exists()
 
@@ -396,17 +393,17 @@ class TestArchiveBasedStagingUploadFiles(UploadFilesFixtures):
 def create_public_upload(
         upload_id: str, calc_specs: str, **kwargs) -> PublicUploadWithFiles:
 
-    upload, upload_files = create_staging_upload(upload_id, calc_specs)
-    upload_files.pack(upload)
+    _, entries, upload_files = create_staging_upload(upload_id, calc_specs)
+    upload_files.pack(entries)
     upload_files.delete()
-    return upload, PublicUploadFiles(upload_id, **kwargs)
+    return upload_id, entries, PublicUploadFiles(upload_id, **kwargs)
 
 
 class TestPublicUploadFiles(UploadFilesContract):
 
     @pytest.fixture(scope='function')
     def empty_test_upload(self, test_upload_id: str) -> UploadFiles:
-        _, upload_files = create_public_upload(
+        _, _, upload_files = create_public_upload(
             test_upload_id, calc_specs='', is_authorized=lambda: True)
 
         return upload_files
@@ -415,13 +412,13 @@ class TestPublicUploadFiles(UploadFilesContract):
         ['r', 'rr', 'pr', 'rp', 'p', 'pp', 'RP', 'RR', 'PP'], [True, False]))
     def test_upload(self, request, test_upload_id: str) -> PublicUploadWithFiles:
         calc_specs, protected = request.param
-        upload, upload_files = create_staging_upload(test_upload_id, calc_specs=calc_specs)
-        upload_files.pack(upload)
+        _, entries, upload_files = create_staging_upload(test_upload_id, calc_specs=calc_specs)
+        upload_files.pack(entries)
         upload_files.delete()
-        return upload, PublicUploadFiles(test_upload_id, is_authorized=lambda: not protected)
+        return test_upload_id, entries, PublicUploadFiles(test_upload_id, is_authorized=lambda: not protected)
 
     def test_to_staging_upload_files(self, test_upload):
-        upload, upload_files = test_upload
+        _, entries, upload_files = test_upload
         assert upload_files.to_staging_upload_files() is None
         staging_upload_files = upload_files.to_staging_upload_files(create=True)
         assert staging_upload_files is not None
@@ -438,7 +435,7 @@ class TestPublicUploadFiles(UploadFilesContract):
             with open(f, 'wt') as fh:
                 fh.write('')
 
-        staging_upload_files.pack(upload)
+        staging_upload_files.pack(entries)
         staging_upload_files.delete()
 
         # We do a very simple check. We made all files empty, those that are rezipped
@@ -453,19 +450,20 @@ class TestPublicUploadFiles(UploadFilesContract):
         assert upload_files.to_staging_upload_files() is None
 
     def test_repack(self, test_upload):
-        upload, upload_files = test_upload
-        for calc in upload.calcs:
+        upload_id, entries, upload_files = test_upload
+        for calc in entries:
             calc.with_embargo = False
-        upload_files.re_pack(upload)
-        assert_upload_files(upload, PublicUploadFiles, with_embargo=False)
+        upload_files.re_pack(entries)
+        assert_upload_files(upload_id, entries, PublicUploadFiles, with_embargo=False)
         assert len(os.listdir(upload_files.os_path)) == 8
         with assert_exception(KeyError):
             StagingUploadFiles(upload_files.upload_id)
 
 
 def assert_upload_files(
-        upload: UploadWithMetadata, cls, no_archive: bool = False, **kwargs):
-    """
+        upload_id: str, entries: Iterable[datamodel.EntryMetadata], cls,
+        no_archive: bool = False, **kwargs):
+    '''
     Asserts the files aspect of uploaded data after processing or publishing
 
     Arguments:
@@ -473,13 +471,13 @@ def assert_upload_files(
         cls: The :class:`UploadFiles` subclass that this upload should have
         n_calcs: The number of expected calcs in the upload
         **kwargs: Key, value pairs that each calc metadata should have
-    """
-    upload_files = UploadFiles.get(upload.upload_id, is_authorized=lambda: True)
+    '''
+    upload_files = UploadFiles.get(upload_id, is_authorized=lambda: True)
     assert upload_files is not None
     assert isinstance(upload_files, cls)
 
-    upload_files = UploadFiles.get(upload.upload_id)
-    for calc in upload.calcs:
+    upload_files = UploadFiles.get(upload_id)
+    for calc in entries:
         try:
             with upload_files.raw_file(calc.mainfile) as f:
                 f.read()
diff --git a/tests/test_metainfo.py b/tests/test_metainfo.py
index 32adbc749b3a764af2f73d6ea1f9f21318e8fb85..4bb544c277065a16a604cf0a200f7b6cd42a4df4 100644
--- a/tests/test_metainfo.py
+++ b/tests/test_metainfo.py
@@ -45,7 +45,7 @@ def assert_section_instance(section: MSection):
 
 
 class TestM3:
-    """ Test for meta-info definition that are used to define other definitions. """
+    ''' Test for meta-info definition that are used to define other definitions. '''
 
     def test_section(self):
         assert Section.m_def == Section.m_def.m_def
@@ -84,7 +84,7 @@ class TestM3:
 
 
 class TestPureReflection:
-    """ Test for using meta-info instances without knowing/using the respective definitions. """
+    ''' Test for using meta-info instances without knowing/using the respective definitions. '''
 
     def test_instantiation(self):
         test_section_def = Section(name='TestSection')
@@ -98,19 +98,19 @@ class TestPureReflection:
 
 
 class MaterialDefining(MCategory):
-    """Quantities that add to what constitutes a different material."""
+    '''Quantities that add to what constitutes a different material.'''
     pass
 
 
 class TestM2:
-    """ Test for meta-info definitions. """
+    ''' Test for meta-info definitions. '''
 
     def test_basics(self):
         assert_section_def(Run.m_def)
         assert_section_def(System.m_def)
 
     def test_default_section_def(self):
-        """ A section class without an explicit section def must set a default section def. """
+        ''' A section class without an explicit section def must set a default section def. '''
         assert Run.m_def is not None
         assert Run.m_def.name == 'Run'
 
@@ -231,9 +231,12 @@ class TestM2:
     def test_qualified_name(self):
         assert System.m_def.qualified_name() == 'nomad.metainfo.example.System'
 
+    def test_derived_virtual(self):
+        assert System.n_atoms.virtual
+
 
 class TestM1:
-    """ Test for meta-info instances. """
+    ''' Test for meta-info instances. '''
 
     def test_run(self):
         class Run(MSection):
@@ -257,6 +260,30 @@ class TestM1:
 
         assert_section_instance(system)
 
+    def test_set_none(self):
+        run = Run()
+        run.code_name = 'test'
+        assert run.code_name is not None
+
+        run.code_name = None
+        assert run.code_name is None
+
+    def test_set_subsection(self):
+        run = Run()
+        first = Parsing()
+        run.parsing = first
+        assert first.m_parent == run
+        assert run.parsing == first
+
+        second = Parsing()
+        run.parsing = second
+        assert first.m_parent is None
+        assert second.m_parent == run
+        assert run.parsing == second
+
+        run.parsing = None
+        assert run.parsing is None
+
     def test_defaults(self):
         assert len(System().periodic_dimensions) == 3
         assert System().atom_labels is None
@@ -333,6 +360,7 @@ class TestM1:
     def example_data(self):
         run = Run()
         run.code_name = 'test code name'
+        run.m_create(Parsing)
         system: System = run.m_create(System)
         system.atom_labels = ['H', 'H', 'O']
         system.atom_positions = np.array([[1.2e-10, 0, 0], [0, 1.2e-10, 0], [0, 0, 1.2e-10]])
@@ -356,6 +384,15 @@ class TestM1:
 
         self.assert_example_data(new_example_data)
 
+    def test_to_dict_defaults(self, example_data):
+        dct = example_data.m_to_dict()
+        assert 'nomad_version' not in dct['parsing']
+        assert 'n_atoms' not in dct['systems'][0]
+
+        dct = example_data.m_to_dict(include_defaults=True)
+        assert 'nomad_version' in dct['parsing']
+        assert 'n_atoms' not in dct['systems'][0]
+
     def test_derived(self):
         system = System()
 
@@ -412,6 +449,17 @@ class TestM1:
 
         assert len(resource.all(System)) == 2
 
+    def test_mapping(self):
+        run = Run()
+        run.m_create(Parsing).parser_name = 'test'
+        system = run.m_create(System)
+        system.atom_labels = ['H', 'O']
+
+        assert run.systems[0].atom_labels == ['H', 'O']
+        assert run['systems.0.atom_labels'] == ['H', 'O']
+        assert run['systems/0/atom_labels'] == ['H', 'O']
+        assert run['parsing.parser_name'] == 'test'
+
 
 class TestEnvironment:
 
diff --git a/tests/test_normalizing.py b/tests/test_normalizing.py
index 9354d0aedcde713283484f3cbd54001e5a3a7fe9..8e32d6b0e7de47ffe95bce52f10a893e64da531c 100644
--- a/tests/test_normalizing.py
+++ b/tests/test_normalizing.py
@@ -50,36 +50,36 @@ vasp_parser_dos = (
 glucose_atom_labels = (
     'parsers/template', 'tests/data/normalizers/glucose_atom_labels.json')
 
-symmetry_keys = ['spacegroup', 'spacegroup_symbol', 'crystal_system']
+symmetry_keys = ['dft.spacegroup', 'dft.spacegroup_symbol', 'dft.crystal_system']
 calc_metadata_keys = [
-    'code_name', 'code_version', 'basis_set', 'xc_functional', 'system', 'formula'] + symmetry_keys
+    'dft.code_name', 'dft.code_version', 'dft.basis_set', 'dft.xc_functional', 'dft.system', 'formula'] + symmetry_keys
 
 parser_exceptions = {
-    'parsers/wien2k': ['xc_functional'],
+    'parsers/wien2k': ['dft.xc_functional'],
     'parsers/nwchem': symmetry_keys,
     'parsers/bigdft': symmetry_keys,
     'parsers/gaussian': symmetry_keys,
-    'parsers/abinit': ['formula', 'system'] + symmetry_keys,
-    'parsers/dl-poly': ['formula', 'basis_set', 'xc_functional', 'system'] + symmetry_keys,
-    'parsers/lib-atoms': ['basis_set', 'xc_functional'],
+    'parsers/abinit': ['formula', 'dft.system'] + symmetry_keys,
+    'parsers/dl-poly': ['formula', 'dft.basis_set', 'dft.xc_functional', 'dft.system'] + symmetry_keys,
+    'parsers/lib-atoms': ['dft.basis_set', 'dft.xc_functional'],
     'parsers/orca': symmetry_keys,
     'parsers/octopus': symmetry_keys,
-    'parsers/phonopy': ['basis_set', 'xc_functional'],
+    'parsers/phonopy': ['dft.basis_set', 'dft.xc_functional'],
     'parsers/gpaw2': symmetry_keys,
-    'parsers/gamess': ['formula', 'system'] + symmetry_keys,
-    'parsers/gulp': ['formula', 'xc_functional', 'system', 'basis_set'] + symmetry_keys,
+    'parsers/gamess': ['formula', 'dft.system', 'dft.xc_functional'] + symmetry_keys,
+    'parsers/gulp': ['formula', 'dft.xc_functional', 'dft.system', 'dft.basis_set'] + symmetry_keys,
     'parsers/turbomole': symmetry_keys,
-    'parsers/elastic': ['basis_set', 'xc_functional', 'system'] + symmetry_keys,
-    'parsers/dmol': ['system'] + symmetry_keys,
+    'parsers/elastic': ['dft.basis_set', 'dft.xc_functional', 'dft.system'] + symmetry_keys,
+    'parsers/dmol': ['dft.system'] + symmetry_keys,
     'parser/molcas': symmetry_keys,
-    'parsers/band': ['system'] + symmetry_keys,
-    'parsers/qbox': ['xc_functional'],
-    'parser/onetep': ['formula', 'basis_set', 'xc_functional', 'system'] + symmetry_keys
+    'parsers/band': ['dft.system'] + symmetry_keys,
+    'parsers/qbox': ['dft.xc_functional'],
+    'parser/onetep': ['formula', 'dft.basis_set', 'dft.xc_functional', 'dft.system'] + symmetry_keys
 }
-"""
+'''
 Keys that the normalizer for certain parsers might not produce. In an ideal world this
 map would be empty.
-"""
+'''
 
 
 def run_normalize(backend: LocalBackend) -> LocalBackend:
@@ -209,17 +209,17 @@ def test_template_example_normalizer(parsed_template_example, no_warn, caplog):
 
 
 def assert_normalized(backend: LocalBackend):
-    metadata = datamodel.DFTCalcWithMetadata()
+    metadata = datamodel.EntryMetadata(domain='dft')
     metadata.apply_domain_metadata(backend)
     assert metadata.formula is not None
-    assert metadata.code_name is not None
-    assert metadata.code_version is not None
-    assert metadata.basis_set is not None
-    assert metadata.xc_functional is not None
-    assert metadata.system is not None
-    assert metadata.crystal_system is not None
+    assert metadata.dft.code_name is not None
+    assert metadata.dft.code_version is not None
+    assert metadata.dft.basis_set is not None
+    assert metadata.dft.xc_functional is not None
+    assert metadata.dft.system is not None
+    assert metadata.dft.crystal_system is not None
     assert len(metadata.atoms) is not None
-    assert metadata.spacegroup is not None
+    assert metadata.dft.spacegroup is not None
 
     exceptions = parser_exceptions.get(backend.get_value('parser_name'), [])
 
@@ -228,7 +228,7 @@ def assert_normalized(backend: LocalBackend):
 
     for key in calc_metadata_keys:
         if key not in exceptions:
-            assert getattr(metadata, key) != config.services.unavailable_value
+            assert metadata[key] != config.services.unavailable_value
 
 
 def test_normalizer(normalized_example: LocalBackend):
@@ -236,7 +236,7 @@ def test_normalizer(normalized_example: LocalBackend):
 
 
 def test_normalizer_faulty_matid(caplog):
-    """ Runs normalizer on an example w/ bools for atom pos. Should force matid error."""
+    ''' Runs normalizer on an example w/ bools for atom pos. Should force matid error.'''
     # assert isinstance(backend, LocalBackend)
     backend = parse_file(boolean_positions)
     run_normalize(backend)
@@ -245,26 +245,26 @@ def test_normalizer_faulty_matid(caplog):
 
 
 def test_normalizer_single_string_atom_labels(caplog):
-    """
+    '''
     Runs normalizer on ['Br1SiSiK'] expects error. Should replace the label with 'X' and
     the numbers of postitions should not match the labels.
-    """
+    '''
     backend = parse_file(single_string_atom_labels)
     run_normalize(backend)
     assert_log(caplog, 'ERROR', 'len of atom position does not match number of atoms')
 
 
 def test_normalizer_unknown_atom_label(caplog, no_warn):
-    """ Runs normalizer on ['Br','Si','Si','Za'], for normalizeation Za will be replaced,
+    ''' Runs normalizer on ['Br','Si','Si','Za'], for normalizeation Za will be replaced,
         but stays int the labels.
-    """
+    '''
     backend = parse_file(unknown_atom_label)
     run_normalize(backend)
     assert backend.get_value('atom_labels')[3] == 'Za'
 
 
 def test_symmetry_classification_fcc():
-    """Runs normalizer where lattice vectors should give fcc symmetry."""
+    '''Runs normalizer where lattice vectors should give fcc symmetry.'''
     backend = parse_file(fcc_symmetry)
     backend = run_normalize(backend)
     expected_crystal_system = 'cubic'
@@ -297,9 +297,9 @@ def test_system_classification(atom, molecule, one_d, two_d, surface, bulk):
 
 
 def test_representative_systems(single_point, molecular_dynamics, geometry_optimization, phonon):
-    """Checks that the representative systems are correctly identified and
+    '''Checks that the representative systems are correctly identified and
     processed by SystemNormalizer.
-    """
+    '''
     def check_representative_frames(backend):
         # For systems with multiple frames the first and two last should be processed.
         try:
@@ -343,9 +343,9 @@ def test_reduced_chemical_formula():
 
 
 def test_vasp_incar_system():
-    """
+    '''
     Ensure we can test an incar value in the VASP example
-    """
+    '''
     backend = parse_file(vasp_parser)
     backend = run_normalize(backend)
     expected_value = 'SrTiO3'  # material's formula in vasp.xml
@@ -359,8 +359,8 @@ def test_vasp_incar_system():
 
 
 def test_aflow_prototypes():
-    """Tests that some basis structures are matched with the correct AFLOW prototypes
-    """
+    '''Tests that some basis structures are matched with the correct AFLOW prototypes
+    '''
     # No prototype info for non-bulk structures
     backend = run_normalize_for_structure(ase.build.molecule("H2O"))
     assert len(backend["section_prototype"]) == 0
@@ -422,9 +422,9 @@ def test_aflow_prototypes():
 
 
 def test_springer_normalizer():
-    """
+    '''
     Ensure the Springer normalizer works well with the VASP example.
-    """
+    '''
     backend = parse_file(vasp_parser)
     backend = run_normalize(backend)
 
@@ -442,9 +442,9 @@ def test_springer_normalizer():
 
 
 def test_dos_normalizer():
-    """
+    '''
     Ensure the DOS normalizer acted on the DOS values. We take a VASP example.
-    """
+    '''
     backend = parse_file(vasp_parser_dos)
     backend = run_normalize(backend)
 
diff --git a/tests/test_parsing.py b/tests/test_parsing.py
index 65b65c1001f3e9fd5b96e771c01de8034a277047..63cd5d0a353c86fc174daf48987c1ce4231d1bae 100644
--- a/tests/test_parsing.py
+++ b/tests/test_parsing.py
@@ -132,7 +132,7 @@ class TestLocalBackend(object):
         assert backend.get_sections('section_symmetry', 2) == [1]
 
     def test_section_override(self, backend, no_warn):
-        """ Test whether we can overwrite values already in the backend."""
+        ''' Test whether we can overwrite values already in the backend.'''
         expected_value = ['Cl', 'Zn']
         backend.openSection('section_run')
         backend.openSection('section_system')
@@ -328,7 +328,7 @@ def assert_parser_result(backend, error=False):
 
 
 def assert_parser_dir_unchanged(previous_wd, current_wd):
-    """Assert working directory has not been changed from parser."""
+    '''Assert working directory has not been changed from parser.'''
     assert previous_wd == current_wd
 
 
diff --git a/tests/test_search.py b/tests/test_search.py
index b51f05c777aeeb89026a2a4e0f81f5595a0dabdb..cb8ef43873649b3274fe324da74a36484024f621 100644
--- a/tests/test_search.py
+++ b/tests/test_search.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from typing import List
+from typing import List, Iterable
 from elasticsearch_dsl import Q
 import pytest
 
@@ -25,36 +25,39 @@ def test_init_mapping(elastic):
 
 
 def test_index_skeleton_calc(elastic):
-    calc_with_metadata = datamodel.CalcWithMetadata(
-        domain='dft', upload_id='test_upload', calc_id='test_calc')
+    entry_metadata = datamodel.EntryMetadata(
+        domain='dft', upload_id='test_upload', calc_id='test_calc',
+        mainfile='test/mainfile', files=['test/file1', 'test/file2'])
 
-    create_entry(calc_with_metadata)
+    create_entry(entry_metadata)
 
 
 def test_index_normalized_calc(elastic, normalized: parsing.LocalBackend):
-    calc_with_metadata = datamodel.CalcWithMetadata(
+    entry_metadata = datamodel.EntryMetadata(
         domain='dft', upload_id='test upload id', calc_id='test id')
-    calc_with_metadata.apply_domain_metadata(normalized)
+    entry_metadata.apply_domain_metadata(normalized)
 
-    entry = search.flat(create_entry(calc_with_metadata).to_dict())
+    search_entry = create_entry(entry_metadata)
+    entry = search.flat(search_entry.to_dict())
 
     assert 'calc_id' in entry
     assert 'atoms' in entry
     assert 'dft.code_name' in entry
+    assert 'dft.optimade.elements_ratios' in entry
 
 
 def test_index_normalized_calc_with_metadata(
-        elastic, normalized: parsing.LocalBackend, example_user_metadata: dict):
-
-    calc_with_metadata = datamodel.CalcWithMetadata(
+        elastic, normalized: parsing.LocalBackend, internal_example_user_metadata: dict):
+    entry_metadata = datamodel.EntryMetadata(
         domain='dft', upload_id='test upload id', calc_id='test id')
-    calc_with_metadata.apply_domain_metadata(normalized)
-    calc_with_metadata.apply_user_metadata(example_user_metadata)
+    entry_metadata.apply_domain_metadata(normalized)
+    internal_example_user_metadata.pop('embargo_length')  # is for uploads only
+    entry_metadata.apply_user_metadata(internal_example_user_metadata)
 
-    entry = create_entry(calc_with_metadata)
+    entry = create_entry(entry_metadata)
 
-    assert getattr(entry, 'with_embargo') == example_user_metadata['with_embargo']
-    assert getattr(entry, 'comment') == example_user_metadata['comment']
+    assert getattr(entry, 'with_embargo') == internal_example_user_metadata['with_embargo']
+    assert getattr(entry, 'comment') == internal_example_user_metadata['comment']
 
 
 def test_index_upload(elastic, processed: processing.Upload):
@@ -63,10 +66,10 @@ def test_index_upload(elastic, processed: processing.Upload):
 
 @pytest.fixture()
 def example_search_data(elastic, normalized: parsing.LocalBackend):
-    calc_with_metadata = datamodel.CalcWithMetadata(
+    entry_metadata = datamodel.EntryMetadata(
         domain='dft', upload_id='test upload id', calc_id='test id')
-    calc_with_metadata.apply_domain_metadata(normalized)
-    create_entry(calc_with_metadata)
+    entry_metadata.apply_domain_metadata(normalized)
+    create_entry(entry_metadata)
     refresh_index()
 
     return normalized
@@ -74,10 +77,10 @@ def example_search_data(elastic, normalized: parsing.LocalBackend):
 
 @pytest.fixture()
 def example_ems_search_data(elastic, parsed_ems: parsing.LocalBackend):
-    calc_with_metadata = datamodel.CalcWithMetadata(
+    entry_metadata = datamodel.EntryMetadata(
         domain='ems', upload_id='test upload id', calc_id='test id')
-    calc_with_metadata.apply_domain_metadata(parsed_ems)
-    create_entry(calc_with_metadata)
+    entry_metadata.apply_domain_metadata(parsed_ems)
+    create_entry(entry_metadata)
     refresh_index()
 
     return parsed_ems
@@ -200,15 +203,15 @@ def test_search_quantity(
         elastic, normalized: parsing.LocalBackend, test_user: datamodel.User,
         other_test_user: datamodel.User, order_by: str):
 
-    calc_with_metadata = datamodel.CalcWithMetadata(
+    entry_metadata = datamodel.EntryMetadata(
         domain='dft', upload_id='test upload id', calc_id='test id')
-    calc_with_metadata.apply_domain_metadata(normalized)
-    calc_with_metadata.uploader = test_user.user_id
-    create_entry(calc_with_metadata)
+    entry_metadata.apply_domain_metadata(normalized)
+    entry_metadata.uploader = test_user.user_id
+    create_entry(entry_metadata)
 
-    calc_with_metadata.calc_id = 'other test id'
-    calc_with_metadata.uploader = other_test_user.user_id
-    create_entry(calc_with_metadata)
+    entry_metadata.calc_id = 'other test id'
+    entry_metadata.uploader = other_test_user.user_id
+    create_entry(entry_metadata)
     refresh_index()
 
     request = SearchRequest(domain='dft').quantity(
@@ -228,10 +231,10 @@ def refresh_index():
     infrastructure.elastic_client.indices.refresh(index=config.elastic.index_name)
 
 
-def create_entry(calc_with_metadata: datamodel.CalcWithMetadata):
-    entry = search.Entry.from_calc_with_metadata(calc_with_metadata)
+def create_entry(entry_metadata: datamodel.EntryMetadata):
+    entry = search.create_entry(entry_metadata)
     entry.save()
-    assert_entry(calc_with_metadata.calc_id)
+    assert_entry(entry_metadata.calc_id)
     return entry
 
 
@@ -246,11 +249,13 @@ def assert_entry(calc_id):
     assert results[0]['calc_id'] == calc_id
 
 
-def assert_search_upload(upload: datamodel.UploadWithMetadata, additional_keys: List[str] = [], **kwargs):
+def assert_search_upload(
+        upload_entries: Iterable[datamodel.EntryMetadata],
+        additional_keys: List[str] = [], **kwargs):
     keys = ['calc_id', 'upload_id', 'mainfile', 'calc_hash']
     refresh_index()
     search_results = Entry.search().query('match_all')[0:10]
-    assert search_results.count() == len(list(upload.calcs))
+    assert search_results.count() == len(list(upload_entries))
     if search_results.count() > 0:
         for hit in search_results:
             hit = search.flat(hit.to_dict())
@@ -287,7 +292,7 @@ if __name__ == '__main__':
     def gen_data():
         for pid in range(0, n):
             calc = generate_calc(pid)
-            calc = Entry.from_calc_with_metadata(calc)
+            calc = Entry.from_entry_metadata(calc)
             yield calc.to_dict(include_meta=True)
 
     bulk(infrastructure.elastic_client, gen_data())
diff --git a/tests/utils.py b/tests/utils.py
index 67f8340fab0142e4b93a0ce48a260677e38091d0..194da6e4e0a79110c6fcafaa580b158631f10ed6 100644
--- a/tests/utils.py
+++ b/tests/utils.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-""" Methods to help with testing of nomad@FAIRDI."""
+''' Methods to help with testing of nomad@FAIRDI.'''
 
 from typing import Type
 import json
@@ -21,7 +21,7 @@ from logging import LogRecord
 
 
 def assert_log(caplog, level: str, event_part: str) -> LogRecord:
-    """
+    '''
     Assert whether a log message exists in the logs of the tests at a certain level.
 
     Parameters
@@ -35,7 +35,7 @@ def assert_log(caplog, level: str, event_part: str) -> LogRecord:
         The error message we're after. We search the logs matching level if they
         contain this string.
 
-    """
+    '''
     record = None
     for record in caplog.get_records(when='call'):
         if record.levelname == level:
@@ -50,10 +50,10 @@ def assert_log(caplog, level: str, event_part: str) -> LogRecord:
 
 @contextmanager
 def assert_exception(exception_cls: Type = Exception):
-    """
+    '''
     A context manager that can be used to assert that the given exception is thrown
     within the respective ``with``clause.
-    """
+    '''
     has_exception = False
     try:
         yield