datamodel.py 24.6 KB
Newer Older
Markus Scheidgen's avatar
Markus Scheidgen committed
1
2
3
4
#
# Copyright The NOMAD Authors.
#
# This file is part of NOMAD. See https://nomad-lab.eu for further info.
5
6
7
8
9
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
Markus Scheidgen's avatar
Markus Scheidgen committed
10
#     http://www.apache.org/licenses/LICENSE-2.0
11
12
#
# Unless required by applicable law or agreed to in writing, software
Markus Scheidgen's avatar
Markus Scheidgen committed
13
# distributed under the License is distributed on an "AS IS" BASIS,
14
15
16
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
Markus Scheidgen's avatar
Markus Scheidgen committed
17
#
18

19
20
''' All generic entry metadata and related classes. '''

21
from typing import Any
22
from cachetools import cached, TTLCache
23
24
from elasticsearch_dsl import Keyword, Text, analyzer, tokenizer
import ase.data
25
26

from nomad import metainfo, config
27
28
from nomad.metainfo.search_extension import Search
from nomad.metainfo.elastic_extension import ElasticDocument
29
from nomad.metainfo.mongoengine_extension import Mongo, MongoDocument
30
from nomad.datamodel.metainfo.common_dft import FastAccess
31
from nomad.metainfo.pydantic_extension import PydanticModel
32

33
34
from .dft import DFTMetadata
from .ems import EMSMetadata
35
from .qcms import QCMSMetadata
36
37
38
39
40
41

# This is usually defined automatically when the first metainfo definition is evaluated, but
# due to the next imports requireing the m_package already, this would be too late.
m_package = metainfo.Package()

from .encyclopedia import EncyclopediaMetadata  # noqa
42
from .metainfo.common_dft import Run, Workflow  # noqa
43
from .metainfo.common_experimental import Experiment  # noqa
44
from .metainfo.common_qcms import QuantumCMS  # noqa
45
46
47
48
49
50
51
52
53
54
55
56


def _only_atoms(atoms):
    numbers = [ase.data.atomic_numbers[atom] for atom in atoms]
    only_atoms = [ase.data.chemical_symbols[number] for number in sorted(numbers)]
    return ''.join(only_atoms)


path_analyzer = analyzer(
    'path_analyzer',
    tokenizer=tokenizer('path_tokenizer', 'pattern', pattern='/'))

57

58
59
60
61
62
63
64
class Author(metainfo.MSection):
    ''' A person that is author of data in NOMAD or references by NOMAD. '''
    name = metainfo.Quantity(
        type=str,
        derived=lambda user: ('%s %s' % (user.first_name, user.last_name)).strip(),
        a_search=Search(mapping=Text(fields={'keyword': Keyword()})))

65
66
    first_name = metainfo.Quantity(type=metainfo.Capitalized)
    last_name = metainfo.Quantity(type=metainfo.Capitalized)
67
68
69
70
71
72
73
74
75
76
    email = metainfo.Quantity(
        type=str,
        a_elastic=dict(mapping=Keyword),  # TODO remove?
        a_search=Search())

    affiliation = metainfo.Quantity(type=str)
    affiliation_address = metainfo.Quantity(type=str)


class User(Author):
77
    ''' A NOMAD user.
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92

    Typically a NOMAD user has a NOMAD account. The user related data is managed by
    NOMAD keycloak user-management system. Users are used to denote uploaders, authors,
    people to shared data with embargo with, and owners of datasets.

    Args:
        user_id: The unique, persistent keycloak UUID
        username: The unique, persistent, user chosen username
        first_name: The users first name (including all other given names)
        last_name: The users last name
        affiliation: The name of the company and institutes the user identifies with
        affiliation_address: The address of the given affiliation
        create: The time the account was created
        repo_user_id: The id that was used to identify this user in the NOMAD CoE Repository
        is_admin: Bool that indicated, iff the user the use admin user
93
94
    '''

95
96
    m_def = metainfo.Section(a_pydantic=PydanticModel())

97
98
    user_id = metainfo.Quantity(
        type=str,
99
        a_search=Search())
100

101
    username = metainfo.Quantity(type=str)
102

103
104
105
    created = metainfo.Quantity(type=metainfo.Datetime)

    repo_user_id = metainfo.Quantity(
106
        type=str,
107
        description='Optional, legacy user id from the old NOMAD CoE repository.')
108

109
110
111
    is_admin = metainfo.Quantity(
        type=bool, derived=lambda user: user.user_id == config.services.admin_user_id)

Markus Scheidgen's avatar
Markus Scheidgen committed
112
113
    is_oasis_admin = metainfo.Quantity(type=bool, default=False)

114
115
116
117
118
119
120
    @staticmethod
    @cached(cache=TTLCache(maxsize=2048, ttl=24 * 3600))
    def get(*args, **kwargs) -> 'User':
        from nomad import infrastructure
        return infrastructure.keycloak.get_user(*args, **kwargs)  # type: ignore


121
122
123
124
125
126
127
128
129
class UserReference(metainfo.Reference):
    '''
    Special metainfo reference type that allows to use user_ids as values. It automatically
    resolves user_ids to User objects. This is done lazily on getting the value.
    '''

    def __init__(self):
        super().__init__(User.m_def)

130
    def resolve(self, proxy: metainfo.MProxy) -> metainfo.MSection:
131
        return User.get(user_id=proxy.m_proxy_value)
132
133
134
135
136
137
138
139

    def serialize(self, section: metainfo.MSection, quantity_def: metainfo.Quantity, value: Any) -> Any:
        return value.user_id


user_reference = UserReference()


140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
class AuthorReference(metainfo.Reference):
    '''
    Special metainfo reference type that allows to use either user_ids or direct author
    information as values. It automatically resolves user_ids to User objects and author
    data into Author objects.
    '''

    def __init__(self):
        super().__init__(Author.m_def)

    def resolve(self, proxy: metainfo.MProxy) -> metainfo.MSection:
        proxy_value = proxy.m_proxy_value
        if isinstance(proxy_value, str):
            return User.get(user_id=proxy.m_proxy_value)
        elif isinstance(proxy_value, dict):
            return Author.m_from_dict(proxy_value)
        else:
            raise metainfo.MetainfoReferenceError()

    def serialize(self, section: metainfo.MSection, quantity_def: metainfo.Quantity, value: Any) -> Any:
        if isinstance(value, User):
            return value.user_id
        elif isinstance(value, Author):
            return value.m_to_dict()
        else:
            raise metainfo.MetainfoReferenceError()


author_reference = AuthorReference()


171
class Dataset(metainfo.MSection):
172
    ''' A Dataset is attached to one or many entries to form a set of data.
173
174
175
176
177
178
179
180
181
182
183

    Args:
        dataset_id: The unique identifier for this dataset as a string. It should be
            a randomly generated UUID, similar to other nomad ids.
        name: The human readable name of the dataset as string. The dataset name must be
            unique for the user.
        user_id: The unique user_id of the owner and creator of this dataset. The owner
            must not change after creation.
        doi: The optional Document Object Identifier (DOI) associated with this dataset.
            Nomad can register DOIs that link back to the respective representation of
            the dataset in the nomad UI. This quantity holds the string representation of
184
185
            this DOI. There is only one per dataset. The DOI is just the DOI name, not its
            full URL, e.g. "10.17172/nomad/2019.10.29-1".
186
        pid: The original NOMAD CoE Repository dataset PID. Old DOIs still reference
187
            datasets based on this id. Is not used for new datasets.
Markus Scheidgen's avatar
Markus Scheidgen committed
188
        created: The date when the dataset was first created.
189
190
191
192
193
194
        modified: The date when the dataset was last modified. An owned dataset can only
            be extended after a DOI was assigned. A foreign dataset cannot be changed
            once a DOI was assigned.
        dataset_type: The type determined if a dataset is owned, i.e. was created by
            the uploader/owner of the contained entries; or if a dataset is foreign,
            i.e. it was created by someone not necessarily related to the entries.
195
    '''
196
    m_def = metainfo.Section(a_mongo=MongoDocument(), a_pydantic=PydanticModel())
197

198
199
    dataset_id = metainfo.Quantity(
        type=str,
200
        a_mongo=Mongo(primary_key=True),
201
        a_search=Search())
202
203
    name = metainfo.Quantity(
        type=str,
204
        a_mongo=Mongo(index=True),
205
        a_search=Search())
206
207
    user_id = metainfo.Quantity(
        type=str,
208
        a_mongo=Mongo(index=True))
209
210
    doi = metainfo.Quantity(
        type=str,
211
        a_mongo=Mongo(index=True),
212
        a_search=Search())
213
    pid = metainfo.Quantity(
214
        type=str,
215
        a_mongo=Mongo(index=True))
Markus Scheidgen's avatar
Markus Scheidgen committed
216
217
    created = metainfo.Quantity(
        type=metainfo.Datetime,
218
        a_mongo=Mongo(index=True),
219
        a_search=Search())
220
221
222
223
224
225
226
227
    modified = metainfo.Quantity(
        type=metainfo.Datetime,
        a_mongo=Mongo(index=True),
        a_search=Search())
    dataset_type = metainfo.Quantity(
        type=metainfo.MEnum('owned', 'foreign'),
        a_mongo=Mongo(index=True),
        a_search=Search())
228
229


230
231
232
233
234
235
236
237
238
class DatasetReference(metainfo.Reference):
    '''
    Special metainfo reference type that allows to use dataset_ids as values. It automatically
    resolves dataset_ids to Dataset objects. This is done lazily on getting the value.
    '''

    def __init__(self):
        super().__init__(Dataset.m_def)

239
    def resolve(self, proxy: metainfo.MProxy) -> metainfo.MSection:
240
        return Dataset.m_def.a_mongo.get(dataset_id=proxy.m_proxy_value)
241
242
243

    def serialize(self, section: metainfo.MSection, quantity_def: metainfo.Quantity, value: Any) -> Any:
        if isinstance(value, metainfo.MProxy):
244
            return value.m_proxy_value
245
        else:
246
            return value.dataset_id
247
248
249
250
251


dataset_reference = DatasetReference()


252
253
254
255
class UserProvidableMetadata(metainfo.MCategory):
    ''' NOMAD entry metadata quantities that can be determined by the user, e.g. via nomad.yaml. '''


256
class EditableUserMetadata(metainfo.MCategory):
257
258
    ''' NOMAD entry metadata quantities that can be edited by the user after publish. '''
    m_def = metainfo.Category(categories=[UserProvidableMetadata])
259
260


261
262
263
264
265
class OasisMetadata(metainfo.MCategory):
    ''' NOMAD entry metadata quantities that can be provided by an OASIS. '''
    m_def = metainfo.Category(categories=[EditableUserMetadata])


266
267
class MongoMetadata(metainfo.MCategory):
    ''' NOMAD entry quantities that are stored in mongodb and not necessarely in the archive. '''
268
269
270
271
272
273
274
275
    pass


class DomainMetadata(metainfo.MCategory):
    ''' NOMAD entry quantities that are determined by the uploaded data. '''
    pass


276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
def derive_origin(entry):
    if entry.external_db is not None:
        return str(entry.external_db)

    if entry.uploader:
        return entry.uploader.name

    return None


def derive_authors(entry):
    uploaders = []
    if entry.uploader is not None and entry.external_db is None:
        uploaders = [entry.uploader]
    return uploaders + entry.coauthors


293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
class EntryMetadata(metainfo.MSection):
    '''
    Attributes:
        upload_id: The ``upload_id`` of the calculations upload (random UUID).
        calc_id: The unique mainfile based calculation id.
        calc_hash: The raw file content based checksum/hash of this calculation.
        pid: The unique persistent id of this calculation.
        mainfile: The upload relative mainfile path.
        domain: Must be the key for a registered domain. This determines which actual
            subclass is instantiated.

        files: A list of all files, relative to upload.
        upload_time: The time when the calc was uploaded.
        uploader: An object describing the uploading user, has at least ``user_id``
        processed: Boolean indicating if this calc was successfully processed and archive
            data and calc metadata is available.
        last_processing: A datatime with the time of the last successful processing.
        nomad_version: A string that describes the version of the nomad software that was
            used to do the last successful processing.
312
313
314
315
316
317
318
319
320
321
322
323
324

        comment: An arbitrary string with user provided information about the entry.
        references: A list of URLs for resources that are related to the entry.
        uploader: Id of the uploader of this entry.
        coauthors: Ids of all co-authors (excl. the uploader) of this entry. Co-authors are
            shown as authors of this entry alongside its uploader.
        shared_with: Ids of all users that this entry is shared with. These users can find,
            see, and download all data for this entry, even if it is in staging or
            has an embargo.
        with_embargo: Entries with embargo are only visible to the uploader, the admin
            user, and users the entry is shared with (see shared_with).
        upload_time: The time that this entry was uploaded
        datasets: Ids of all datasets that this entry appears in
325
    '''
326
327
    m_def = metainfo.Section(
        a_elastic=ElasticDocument(index_name=config.elastic.index_name, id=lambda x: x.calc_id))
328

329
330
    upload_id = metainfo.Quantity(
        type=str,
Markus Scheidgen's avatar
Markus Scheidgen committed
331
        description='The persistent and globally unique identifier for the upload of the entry',
332
        a_search=Search(
333
            many_or='append', group='uploads_grouped', metric_name='uploads', metric='cardinality'))
334
335
336

    calc_id = metainfo.Quantity(
        type=str,
Markus Scheidgen's avatar
Markus Scheidgen committed
337
        description='A persistent and globally unique identifier for the entry',
338
        categories=[OasisMetadata],
339
        a_search=Search(many_or='append'))
340
341
342

    calc_hash = metainfo.Quantity(
        type=str,
Markus Scheidgen's avatar
Markus Scheidgen committed
343
        description='A raw file content based checksum/hash',
344
        categories=[MongoMetadata],
345
        a_search=Search(
346
347
348
349
            many_or='append', metric_name='unique_entries', metric='cardinality'))

    mainfile = metainfo.Quantity(
        type=str,
Markus Scheidgen's avatar
Markus Scheidgen committed
350
        description='The path to the mainfile from the root directory of the uploaded files',
351
        a_search=[
352
            Search(
353
                description='Search within the mainfile path.',
354
355
356
                mapping=Text(multi=True, analyzer=path_analyzer, fields={'keyword': Keyword()}),
                many_or='append', search_field='mainfile.keyword'),
            Search(
357
                description='Search for the exact mainfile.',
358
                many_and='append', name='mainfile_path', search_field='mainfile.keyword')])
359
360
361

    files = metainfo.Quantity(
        type=str, shape=['0..*'],
Markus Scheidgen's avatar
Markus Scheidgen committed
362
363
364
365
366
        description='''
        The paths to the files within the upload that belong to this entry.
        All files within the same directory as the entry's mainfile are considered the
        auxiliary files that belong to the entry.
        ''',
367
        a_search=[
368
            Search(
369
                description='Search within the paths.', name='path',
370
                mapping=Text(
371
372
                    multi=True, analyzer=path_analyzer, fields={'keyword': Keyword()})
            ),
373
            Search(
374
                description='Search for exact paths.',
375
                many_or='append', name='files', search_field='files.keyword')])
376
377

    pid = metainfo.Quantity(
378
        type=str,
Markus Scheidgen's avatar
Markus Scheidgen committed
379
380
381
        description='''
        The unique, sequentially enumerated, integer PID that was used in the legacy
        NOMAD CoE. It allows to resolve URLs of the old NOMAD CoE Repository.''',
382
        categories=[MongoMetadata],
383
        a_search=Search(many_or='append'))
384
385
386

    raw_id = metainfo.Quantity(
        type=str,
Markus Scheidgen's avatar
Markus Scheidgen committed
387
388
389
390
        description='''
        The code specific identifier extracted from the entrie's raw files if such an
        identifier is supported by the underlying code
        ''',
391
        categories=[MongoMetadata, UserProvidableMetadata],
392
        a_search=Search(many_or='append'))
393
394

    domain = metainfo.Quantity(
395
        type=metainfo.MEnum('dft', 'ems', 'qcms'),
396
        description='The material science domain',
397
        categories=[MongoMetadata, UserProvidableMetadata],
398
        a_search=Search())
399
400
401
402

    published = metainfo.Quantity(
        type=bool, default=False,
        description='Indicates if the entry is published',
403
        categories=[MongoMetadata, OasisMetadata],
404
        a_search=Search())
405
406
407
408

    processed = metainfo.Quantity(
        type=bool, default=False,
        description='Indicates that the entry is successfully processed.',
409
        categories=[MongoMetadata],
410
        a_search=Search())
411
412
413

    last_processing = metainfo.Quantity(
        type=metainfo.Datetime,
Markus Scheidgen's avatar
Markus Scheidgen committed
414
        description='The datetime of the last processing',
415
416
        categories=[MongoMetadata],
        a_search=Search())
417

418
419
420
421
    processing_errors = metainfo.Quantity(
        type=str, shape=['*'], description='Errors that occured during processing',
        a_search=Search(many_and='append'))

422
423
    nomad_version = metainfo.Quantity(
        type=str,
Markus Scheidgen's avatar
Markus Scheidgen committed
424
        description='The NOMAD version used for the last processing',
425
        categories=[MongoMetadata],
426
        a_search=Search(many_or='append'))
427
428
    nomad_commit = metainfo.Quantity(
        type=str,
Markus Scheidgen's avatar
Markus Scheidgen committed
429
        description='The NOMAD commit used for the last processing',
430
        categories=[MongoMetadata],
431
        a_search=Search(many_or='append'))
432
433
    parser_name = metainfo.Quantity(
        type=str,
Markus Scheidgen's avatar
Markus Scheidgen committed
434
        description='The NOMAD parser used for the last processing',
435
        a_search=Search(many_or='append'))
436
437

    comment = metainfo.Quantity(
438
        type=str, categories=[MongoMetadata, EditableUserMetadata],
Markus Scheidgen's avatar
Markus Scheidgen committed
439
        description='A user provided comment for this entry',
440
        a_search=Search(mapping=Text()))
441
442

    references = metainfo.Quantity(
443
        type=str, shape=['0..*'], categories=[MongoMetadata, EditableUserMetadata],
Markus Scheidgen's avatar
Markus Scheidgen committed
444
        description='User provided references (URLs) for this entry',
445
        a_search=Search())
446

447
    external_db = metainfo.Quantity(
448
        type=metainfo.MEnum('EELSDB', 'Materials Project', 'AFLOW', 'OQMD'), categories=[MongoMetadata, UserProvidableMetadata],
Markus Scheidgen's avatar
Markus Scheidgen committed
449
        description='The repository or external database where the original data resides',
450
451
        a_search=Search())

452
    uploader = metainfo.Quantity(
453
        type=user_reference, categories=[MongoMetadata],
454
455
456
        description='The uploader of the entry',
        a_flask=dict(admin_only=True, verify=User),
        a_search=[
457
            Search(
Markus Scheidgen's avatar
Markus Scheidgen committed
458
                description='The full name of the authors for exact searches',
459
                metric_name='uploaders', metric='cardinality',
460
                many_or='append', search_field='uploader.name.keyword',
461
                statistic_size=10,
462
                statistic_order='_count'),
463
            Search(
Markus Scheidgen's avatar
Markus Scheidgen committed
464
465
                name='uploader_id', search_field='uploader.user_id',
                description='The full name of the authors',)
466
467
        ])

468
469
470
471
472
473
474
    origin = metainfo.Quantity(
        type=str,
        description='''
            A short human readable description of the entries origin. Usually it is the
            handle of an external database/repository or the name of the uploader.
        ''',
        derived=derive_origin,
475
        a_search=Search(statistic_size=10, statistic_order='_count'))
476

477
    coauthors = metainfo.Quantity(
478
        type=author_reference, shape=['0..*'], default=[], categories=[MongoMetadata, EditableUserMetadata],
Markus Scheidgen's avatar
Markus Scheidgen committed
479
        description='A user provided list of co-authors',
480
481
482
        a_flask=dict(verify=User))

    authors = metainfo.Quantity(
483
        type=author_reference, shape=['0..*'],
Markus Scheidgen's avatar
Markus Scheidgen committed
484
        description='All authors (uploader and co-authors)',
485
        derived=derive_authors,
486
        a_search=Search(
Markus Scheidgen's avatar
Markus Scheidgen committed
487
            description='The full name of the authors for exact searches',
488
            metric='cardinality',
Markus Scheidgen's avatar
Markus Scheidgen committed
489
            many_or='append', search_field='authors.name.keyword'))
490
491

    shared_with = metainfo.Quantity(
492
        type=user_reference, shape=['0..*'], default=[], categories=[MongoMetadata, EditableUserMetadata],
Markus Scheidgen's avatar
Markus Scheidgen committed
493
        description='A user provided list of userts to share the entry with',
494
495
496
497
        a_flask=dict(verify=User))

    owners = metainfo.Quantity(
        type=user_reference, shape=['0..*'],
Markus Scheidgen's avatar
Markus Scheidgen committed
498
        description='All owner (uploader and shared with users)',
499
        derived=lambda entry: ([entry.uploader] if entry.uploader is not None else []) + entry.shared_with,
500
        a_search=Search(
Markus Scheidgen's avatar
Markus Scheidgen committed
501
            description='The full name of the owners for exact searches',
502
            many_or='append', search_field='owners.name.keyword'))
503

504
505
506
507
508
509
510
511
512
    license = metainfo.Quantity(
        type=str,
        description='''
            A short license description (e.g. CC BY 4.0), that refers to the
            license of this entry.
        ''',
        default='CC BY 4.0',
        categories=[MongoMetadata, EditableUserMetadata])

513
    with_embargo = metainfo.Quantity(
514
        type=bool, default=False, categories=[MongoMetadata, EditableUserMetadata],
515
        description='Indicated if this entry is under an embargo',
516
        a_search=Search())
517
518

    upload_time = metainfo.Quantity(
519
        type=metainfo.Datetime, categories=[MongoMetadata, OasisMetadata],
Markus Scheidgen's avatar
Markus Scheidgen committed
520
        description='The date and time this entry was uploaded to nomad',
521
        a_flask=dict(admin_only=True),
522
        a_search=Search(order_default=True))
523
524

    upload_name = metainfo.Quantity(
525
        type=str, categories=[MongoMetadata],
526
        description='The user provided upload name',
527
        a_search=Search(many_or='append'))
528
529
530

    datasets = metainfo.Quantity(
        type=dataset_reference, shape=['0..*'], default=[],
531
        categories=[MongoMetadata, EditableUserMetadata],
532
533
534
        description='A list of user curated datasets this entry belongs to.',
        a_flask=dict(verify=Dataset),
        a_search=[
535
536
            Search(
                search_field='datasets.name', many_or='append',
Markus Scheidgen's avatar
Markus Scheidgen committed
537
                description='A list of user curated datasets this entry belongs to for exact name search'),
538
539
            Search(
                name='dataset_id', search_field='datasets.dataset_id', many_or='append',
540
                group='datasets_grouped',
541
                metric='cardinality', metric_name='datasets',
Markus Scheidgen's avatar
Markus Scheidgen committed
542
                description='A list of user curated datasets this entry belongs to for exact name search')])
543
544

    external_id = metainfo.Quantity(
545
        type=str, categories=[MongoMetadata, UserProvidableMetadata],
Markus Scheidgen's avatar
Markus Scheidgen committed
546
547
548
        description='''
        A user provided external id. Usually the id for an entry in an external database
        where the data was imported from.''',
549
        a_search=Search(many_or='split'))
550
551

    last_edit = metainfo.Quantity(
552
        type=metainfo.Datetime, categories=[MongoMetadata, OasisMetadata],
Markus Scheidgen's avatar
Markus Scheidgen committed
553
        description='The date and time the user metadata was edited last',
554
        a_search=Search())
555
556
557

    formula = metainfo.Quantity(
        type=str, categories=[DomainMetadata],
Markus Scheidgen's avatar
Markus Scheidgen committed
558
        description='A (reduced) chemical formula',
559
        a_search=Search())
560
561
562

    atoms = metainfo.Quantity(
        type=str, shape=['n_atoms'], default=[], categories=[DomainMetadata],
Markus Scheidgen's avatar
Markus Scheidgen committed
563
        description='The atom labels of all atoms of the entry\'s material',
564
        a_search=Search(
565
            many_and='append', statistic_size=len(ase.data.chemical_symbols)))
566
567
568

    only_atoms = metainfo.Quantity(
        type=str, categories=[DomainMetadata],
Markus Scheidgen's avatar
Markus Scheidgen committed
569
        description='The atom labels concatenated in order-number order',
570
        derived=lambda entry: _only_atoms(entry.atoms),
571
        a_search=Search(many_and='append', derived=_only_atoms))
572
573

    n_atoms = metainfo.Quantity(
574
        type=int, categories=[DomainMetadata], default=0,
575
        description='The number of atoms in the entry\'s material',
576
        a_search=Search())
577

578
579
580
581
    ems = metainfo.SubSection(sub_section=EMSMetadata, a_search=Search())
    dft = metainfo.SubSection(sub_section=DFTMetadata, a_search=Search(), categories=[FastAccess])
    qcms = metainfo.SubSection(sub_section=QCMSMetadata, a_search=Search())
    encyclopedia = metainfo.SubSection(sub_section=EncyclopediaMetadata, categories=[FastAccess], a_search=Search())
582
583
584
585
586

    def apply_user_metadata(self, metadata: dict):
        ''' Applies a user provided metadata dict to this calc. '''
        self.m_update(**metadata)

Markus Scheidgen's avatar
Markus Scheidgen committed
587
    def apply_domain_metadata(self, archive):
588
        ''' Used to apply metadata that is related to the domain. '''
589
        assert self.domain is not None, 'all entries must have a domain'
590
591
        domain_sub_section_def = self.m_def.all_sub_sections.get(self.domain)
        domain_section_def = domain_sub_section_def.sub_section
592
        assert domain_section_def is not None, 'unknown domain %s' % self.domain
593
594
595
596
597
598

        # add domain section if not already there
        domain_section = self.m_get_sub_section(domain_sub_section_def, -1)
        if domain_section is None:
            domain_section = self.m_create(domain_section_def.section_cls)

Markus Scheidgen's avatar
Markus Scheidgen committed
599
        domain_section.apply_domain_metadata(archive)
600
601
602
603


class EntryArchive(metainfo.MSection):

604
    section_run = metainfo.SubSection(sub_section=Run, repeats=True)
605
    section_experiment = metainfo.SubSection(sub_section=Experiment)
606
    section_quantum_cms = metainfo.SubSection(sub_section=QuantumCMS)
607
608
    section_workflow = metainfo.SubSection(sub_section=Workflow, categories=[FastAccess])
    section_metadata = metainfo.SubSection(sub_section=EntryMetadata, categories=[FastAccess])
609
610
611
612

    processing_logs = metainfo.Quantity(
        type=Any, shape=['0..*'],
        description='The processing logs for this entry as a list of structlog entries.')
613
614
615
616
617


# preemptively create the elasticsearch document definition, which populates metrics and
# search quantities in the search_extension
EntryMetadata.m_def.a_elastic.document