Commit 9aea5db2 authored by Markus Scheidgen's avatar Markus Scheidgen
Browse files

Added support for non user authors, external dbs. Added an eels crawler and...

Added support for non user authors, external dbs. Added an eels crawler and updated converter. #404 #404 #330
parent c23881c2
......@@ -189,3 +189,6 @@
[submodule "dependencies/nomad-dos-fingerprints"]
path = dependencies/nomad-dos-fingerprints
url = https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-dos-fingerprints.git
[submodule "dependencies/parsers/eelsdb"]
path = dependencies/parsers/eelsdb
url = https://gitlab.mpcdf.mpg.de/nomad-lab/eelsdb.git
Subproject commit ed482b9956088163db9370dd5eee9b184c47870a
Subproject commit c6bf6c2ca01bff208e424b531b8c214ab709b9b8
Subproject commit 136ab4f5c17e340009a14df8dbb72dc101acb1b1
Subproject commit b42d4aae3fe431ed4afbbc738f5e19cec12634bd
Subproject commit 0d67544ea06713b0cf333eeee8d6ef6929a47f5e
Subproject commit d4083cdadd4b34b9d99cbeed579c56c09aa7606b
Subproject commit 0533ef8393da2501226d60f4db9c0d7be32dbf81
Subproject commit 3054fcd44013c78b55a784328b99c16ffd32710c
Subproject commit 715042d11d6beec998d33b4ea89f0d25fe5d7009
......@@ -230,12 +230,12 @@ const originLabels = {
function UsersVisualization() {
const {setStatistics} = useContext(searchContext)
useEffect(() => {
setStatistics(['uploader'])
setStatistics(['origin'])
// eslint-disable-next-line
}, [])
return <div>
<UploadsHistogram tooltips initialScale={0.5} />
<QuantityHistogram quantity="uploader" title="Uploader/origin" valueLabels={originLabels}/>
<QuantityHistogram quantity="origin" title="Uploader/origin" valueLabels={originLabels}/>
</div>
}
......
......@@ -276,7 +276,6 @@ export default function SearchContext({initialRequest, initialQuery, query, chil
setGroups: setGroups,
setDomain: setDomain,
setOwner: setOwner,
setStatisticsToRefresh: () => null, // TODO remove
setStatistics: setStatistics,
setDateHistogram: setDateHistogram,
update: runRequest
......
......@@ -20,17 +20,12 @@ class UploadersList extends React.Component {
static contextType = searchContext
componentDidMount() {
const {setStatisticsToRefresh} = this.context
setStatisticsToRefresh('uploader')
}
render() {
const {state: {usedMetric}} = this.context
return (
<Grid>
<Quantity quantity="uploader" title="Uploaders" scale={1} metric={usedMetric} />
<Quantity quantity="origin" title="Uploaders/origin" scale={1} metric={usedMetric} />
</Grid>
)
}
......
......@@ -93,7 +93,7 @@ export default function UploadsHistogram({title = 'Uploads over time', initialSc
const height = 250
const marginRight = 32
const marginTop = 16
const marginBottom = 16
const marginBottom = 17 // 16 misses a pixel in safari
const y = scalePow().range([height - marginBottom, marginTop]).exponent(scale)
const max = d3.max(data, d => d.value) || 0
......
......@@ -45,15 +45,6 @@ def fix_time(data, keys):
data[key] = datetime.datetime.utcfromtimestamp(time)
def tarnsform_user_id(source_user_id):
target_user = datamodel.User.repo_users().get(str(source_user_id))
if target_user is None:
__logger.error('user does not exist in target', source_user_id=source_user_id)
raise KeyError
return target_user.user_id
def transform_dataset(source_dataset):
pid = str(source_dataset['id'])
target_dataset = _Dataset.objects(pid=pid).first()
......@@ -103,31 +94,6 @@ def v0Dot7(upload_data):
return upload_data
def v0Dot6(upload_data):
''' Inplace transforms v0.6.x upload data into v0.7.x upload data. '''
upload = json.loads(upload_data.upload)
upload['user_id'] = tarnsform_user_id(upload['user_id'])
upload_data.upload = json.dumps(upload)
for i, calc_data_json in enumerate(upload_data.calcs):
calc_data = json.loads(calc_data_json)
metadata = calc_data['metadata']
# transform users
metadata['uploader'] = tarnsform_user_id(metadata['uploader']['id'])
metadata['coauthors'] = [tarnsform_user_id(user['id']) for user in metadata['coauthors']]
metadata['shared_with'] = [tarnsform_user_id(user['id']) for user in metadata['shared_with']]
# transform datasets
metadata['datasets'] = [transform_dataset(dataset) for dataset in metadata['datasets']]
# transform references
metadata['references'] = [transform_reference(reference) for reference in metadata['references']]
upload_data.calcs[i] = json.dumps(calc_data)
return upload_data
class Mapping:
def __init__(self, mapping: str):
......@@ -210,8 +176,6 @@ def mirror(
migration_func = None
if migration is not None:
if migration == 'v0.6.x':
migration_func = v0Dot6
if migration == 'v0.7.x':
migration_func = v0Dot7
else:
......
......@@ -14,7 +14,7 @@
''' All generic entry metadata and related classes. '''
from typing import Dict, Any
from typing import Any
from cachetools import cached, TTLCache
from elasticsearch_dsl import Keyword, Text, analyzer, tokenizer
import ase.data
......@@ -47,7 +47,25 @@ path_analyzer = analyzer(
tokenizer=tokenizer('path_tokenizer', 'pattern', pattern='/'))
class User(metainfo.MSection):
class Author(metainfo.MSection):
''' A person that is author of data in NOMAD or references by NOMAD. '''
name = metainfo.Quantity(
type=str,
derived=lambda user: ('%s %s' % (user.first_name, user.last_name)).strip(),
a_search=Search(mapping=Text(fields={'keyword': Keyword()})))
first_name = metainfo.Quantity(type=str)
last_name = metainfo.Quantity(type=str)
email = metainfo.Quantity(
type=str,
a_elastic=dict(mapping=Keyword), # TODO remove?
a_search=Search())
affiliation = metainfo.Quantity(type=str)
affiliation_address = metainfo.Quantity(type=str)
class User(Author):
''' A NOMAD user.
Typically a NOMAD user has a NOMAD account. The user related data is managed by
......@@ -70,23 +88,14 @@ class User(metainfo.MSection):
type=str,
a_search=Search())
name = metainfo.Quantity(
type=str,
derived=lambda user: ('%s %s' % (user.first_name, user.last_name)).strip(),
a_search=Search(mapping=Text(fields={'keyword': Keyword()})))
username = metainfo.Quantity(type=str)
first_name = metainfo.Quantity(type=str)
last_name = metainfo.Quantity(type=str)
email = metainfo.Quantity(
created = metainfo.Quantity(type=metainfo.Datetime)
repo_user_id = metainfo.Quantity(
type=str,
a_elastic=dict(mapping=Keyword), # TODO remove?
a_search=Search())
description='Optional, legacy user id from the old NOMAD CoE repository.')
username = metainfo.Quantity(type=str)
affiliation = metainfo.Quantity(type=str)
affiliation_address = metainfo.Quantity(type=str)
created = metainfo.Quantity(type=metainfo.Datetime)
repo_user_id = metainfo.Quantity(type=str)
is_admin = metainfo.Quantity(
type=bool, derived=lambda user: user.user_id == config.services.admin_user_id)
......@@ -96,16 +105,6 @@ class User(metainfo.MSection):
from nomad import infrastructure
return infrastructure.keycloak.get_user(*args, **kwargs) # type: ignore
@staticmethod
@cached(cache=TTLCache(maxsize=1, ttl=24 * 3600))
def repo_users() -> Dict[str, 'User']:
from nomad import infrastructure
return {
str(user.repo_user_id): user
for user in infrastructure.keycloak.search_user()
if user.repo_user_id is not None
}
class UserReference(metainfo.Reference):
'''
......@@ -117,7 +116,7 @@ class UserReference(metainfo.Reference):
super().__init__(User.m_def)
def resolve(self, proxy: metainfo.MProxy) -> metainfo.MSection:
return User.get(user_id=proxy.m_proxy_url)
return User.get(user_id=proxy.m_proxy_value)
def serialize(self, section: metainfo.MSection, quantity_def: metainfo.Quantity, value: Any) -> Any:
return value.user_id
......@@ -126,6 +125,37 @@ class UserReference(metainfo.Reference):
user_reference = UserReference()
class AuthorReference(metainfo.Reference):
'''
Special metainfo reference type that allows to use either user_ids or direct author
information as values. It automatically resolves user_ids to User objects and author
data into Author objects.
'''
def __init__(self):
super().__init__(Author.m_def)
def resolve(self, proxy: metainfo.MProxy) -> metainfo.MSection:
proxy_value = proxy.m_proxy_value
if isinstance(proxy_value, str):
return User.get(user_id=proxy.m_proxy_value)
elif isinstance(proxy_value, dict):
return Author.m_from_dict(proxy_value)
else:
raise metainfo.MetainfoReferenceError()
def serialize(self, section: metainfo.MSection, quantity_def: metainfo.Quantity, value: Any) -> Any:
if isinstance(value, User):
return value.user_id
elif isinstance(value, Author):
return value.m_to_dict()
else:
raise metainfo.MetainfoReferenceError()
author_reference = AuthorReference()
class Dataset(metainfo.MSection):
''' A Dataset is attached to one or many entries to form a set of data.
......@@ -181,11 +211,11 @@ class DatasetReference(metainfo.Reference):
super().__init__(Dataset.m_def)
def resolve(self, proxy: metainfo.MProxy) -> metainfo.MSection:
return Dataset.m_def.a_mongo.get(dataset_id=proxy.m_proxy_url)
return Dataset.m_def.a_mongo.get(dataset_id=proxy.m_proxy_value)
def serialize(self, section: metainfo.MSection, quantity_def: metainfo.Quantity, value: Any) -> Any:
if isinstance(value, metainfo.MProxy):
return value.m_proxy_url
return value.m_proxy_value
else:
return value.dataset_id
......@@ -207,6 +237,23 @@ class DomainMetadata(metainfo.MCategory):
pass
def derive_origin(entry):
if entry.external_db is not None:
return str(entry.external_db)
if entry.uploader:
return entry.uploader.name
return None
def derive_authors(entry):
uploaders = []
if entry.uploader is not None and entry.external_db is None:
uploaders = [entry.uploader]
return uploaders + entry.coauthors
class EntryMetadata(metainfo.MSection):
'''
Attributes:
......@@ -347,6 +394,11 @@ class EntryMetadata(metainfo.MSection):
description='User provided references (URLs).',
a_search=Search())
external_db = metainfo.Quantity(
type=metainfo.MEnum('EELSDB'), categories=[MongoMetadata],
description='The repository or external database where the original entry resides.',
a_search=Search())
uploader = metainfo.Quantity(
type=user_reference, categories=[MongoMetadata],
description='The uploader of the entry',
......@@ -362,15 +414,24 @@ class EntryMetadata(metainfo.MSection):
name='uploader_id', search_field='uploader.user_id')
])
origin = metainfo.Quantity(
type=str,
description='''
A short human readable description of the entries origin. Usually it is the
handle of an external database/repository or the name of the uploader.
''',
derived=derive_origin,
a_search=Search())
coauthors = metainfo.Quantity(
type=user_reference, shape=['0..*'], default=[], categories=[MongoMetadata, EditableUserMetadata],
type=author_reference, shape=['0..*'], default=[], categories=[MongoMetadata, EditableUserMetadata],
description='A user provided list of co-authors.',
a_flask=dict(verify=User))
authors = metainfo.Quantity(
type=user_reference, shape=['0..*'],
type=author_reference, shape=['0..*'],
description='All authors (uploader and co-authors).',
derived=lambda entry: ([entry.uploader] if entry.uploader is not None else []) + entry.coauthors,
derived=derive_authors,
a_search=Search(
description='Search authors with exact names.',
metric='cardinality',
......
......@@ -582,7 +582,7 @@ class Properties(MSection):
description="""
Reference to an electronic band structure.
""",
a_search=Search(value=lambda section: section.electronic_band_structure.m_proxy_url if section.electronic_band_structure is not None else None, mapping=Keyword())
a_search=Search(value=lambda section: section.electronic_band_structure.m_proxy_value if section.electronic_band_structure is not None else None, mapping=Keyword())
)
electronic_dos = Quantity(
type=Reference(section_dos.m_def),
......@@ -590,7 +590,7 @@ class Properties(MSection):
description="""
Reference to an electronic density of states.
""",
a_search=Search(value=lambda section: section.electronic_dos.m_proxy_url if section.electronic_dos is not None else None, mapping=Keyword())
a_search=Search(value=lambda section: section.electronic_dos.m_proxy_value if section.electronic_dos is not None else None, mapping=Keyword())
)
phonon_band_structure = Quantity(
type=Reference(section_k_band.m_def),
......@@ -598,7 +598,7 @@ class Properties(MSection):
description="""
Reference to a phonon band structure.
""",
a_search=Search(value=lambda section: section.phonon_band_structure.m_proxy_url if section.phonon_band_structure is not None else None, mapping=Keyword())
a_search=Search(value=lambda section: section.phonon_band_structure.m_proxy_value if section.phonon_band_structure is not None else None, mapping=Keyword())
)
phonon_dos = Quantity(
type=Reference(section_dos.m_def),
......@@ -606,7 +606,7 @@ class Properties(MSection):
description="""
Reference to a phonon density of states.
""",
a_search=Search(value=lambda section: section.phonon_dos.m_proxy_url if section.phonon_dos is not None else None, mapping=Keyword())
a_search=Search(value=lambda section: section.phonon_dos.m_proxy_value if section.phonon_dos is not None else None, mapping=Keyword())
)
thermodynamical_properties = Quantity(
type=Reference(section_thermodynamical_properties.m_def),
......@@ -614,7 +614,7 @@ class Properties(MSection):
description="""
Reference to a section containing thermodynamical properties.
""",
a_search=Search(value=lambda section: section.thermodynamical_properties.m_proxy_url if section.thermodynamical_properties is not None else None, mapping=Keyword())
a_search=Search(value=lambda section: section.thermodynamical_properties.m_proxy_value if section.thermodynamical_properties is not None else None, mapping=Keyword())
)
......
......@@ -2,7 +2,7 @@ import numpy as np # pylint: disable=unused-import
import typing # pylint: disable=unused-import
from nomad.metainfo import ( # pylint: disable=unused-import
MSection, MCategory, Category, Package, Quantity, Section, SubSection, SectionProxy,
Reference
Reference, Datetime, JSON
)
from nomad.metainfo.legacy import LegacyDefinition
......@@ -53,6 +53,12 @@ class section_experiment(MSection):
''',
a_legacy=LegacyDefinition(name='experiment_facility_name'))
experiment_publish_time = Quantity(
type=Datetime,
description='''
The datetime when this experiment was published.
''')
experiment_time = Quantity(
type=np.dtype(np.int64),
shape=[],
......@@ -71,11 +77,27 @@ class section_experiment(MSection):
''',
a_legacy=LegacyDefinition(name='experiment_end_time'))
raw_metadata = Quantity(
type=JSON,
description='''
The whole or partial metadata in its original source JSON format.
''')
section_data = SubSection(
sub_section=SectionProxy('section_data'),
repeats=True,
a_legacy=LegacyDefinition(name='section_data'))
section_method = SubSection(
sub_section=SectionProxy('section_method'),
repeats=True,
a_legacy=LegacyDefinition(name='section_method'))
section_sample = SubSection(
sub_section=SectionProxy('section_sample'),
repeats=True,
a_legacy=LegacyDefinition(name='section_sample'))
class section_data(MSection):
'''
......@@ -108,5 +130,153 @@ class section_data(MSection):
''',
a_legacy=LegacyDefinition(name='data_preview_url'))
entry_repository_url = Quantity(
type=str,
shape=[],
description='''
An URL to the entry on the repository, where the data is stored.
''',
a_legacy=LegacyDefinition(name='entry_repository_url'))
class section_method(MSection):
'''
This section contains information about the applied experimental method.
'''
m_def = Section(validate=False, a_legacy=LegacyDefinition(name='section_method'))
experiment_method_name = Quantity(
type=str,
shape=[],
description='''
Full name of the experimental method in use
''',
a_legacy=LegacyDefinition(name='experiment_method_name'))
experiment_method_abbreviation = Quantity(
type=str,
shape=[],
description='''
Abbreviated name (i.e. acronym) of the experimental method
''',
a_legacy=LegacyDefinition(name='experiment_method_abbreviation'))
equipment_description = Quantity(
type=str,
shape=[],
description='''
Name or model of the equipment (e.g. in full or an acronym).
''',
a_legacy=LegacyDefinition(name='equipment_description'))
probing_method = Quantity(
type=str,
shape=[],
description='''
The probing method used
''',
a_legacy=LegacyDefinition(name='probing_method'))
class section_sample(MSection):
'''
The section for all sample related (meta)data that was used in the experiment.
'''
m_def = Section(validate=False, a_legacy=LegacyDefinition(name='section_sample'))
sample_description = Quantity(
type=str,
shape=[],
unit='dimensionless',
description='''
Description of the sample used in the experiment.
''',
a_legacy=LegacyDefinition(name='sample_description'))
sample_id = Quantity(
type=str,
shape=[],
unit='dimensionless',
description='''
Identification number or signatures of the sample used.
''',
a_legacy=LegacyDefinition(name='sample_id'))
sample_state = Quantity(
type=str,
shape=[],
description='''
The physical state of the sample.
''',
a_legacy=LegacyDefinition(name='sample_state'))
sample_chemical_formula = Quantity(
type=str,
shape=[],
description='''
The chemical formula that describes the sample
''',
a_legacy=LegacyDefinition(name='sample_chemical_formula'))
sample_chemical_name = Quantity(
type=str,
shape=[],
description='''
The chemical name that describes the sample
''',
a_legacy=LegacyDefinition(name='sample_chemical_name'))
sample_atom_labels = Quantity(
type=str,
shape=['n'],
description='''
The chemical name that describes the sample
''',
a_legacy=LegacyDefinition(name='sample_atom_labels'))
number_of_elements = Quantity(
type=int,
shape=[],
description='''
Number of distinct chemical elements in the sample.
''',
a_legacy=LegacyDefinition(name='number_of_elements'))
sample_space_group = Quantity(
type=np.dtype(np.int32),
shape=[],
unit='dimensionless',
description='''
Space group of the sample compound (if crystalline).
''',
a_legacy=LegacyDefinition(name='sample_space_group'))
sample_temperature = Quantity(
type=np.dtype(np.float64),
shape=[],
unit='kelvin',
description='''
The temperature of the sample during the experiment in K.
''',
a_legacy=LegacyDefinition(name='sample_temperature'))
sample_microstructure = Quantity(
type=str,
shape=[],
description='''
The sample microstructure
''',
a_legacy=LegacyDefinition(name='sample_microstructure'))
sample_constituents = Quantity(
type=str,
shape=[],
description='''
The constituents
''',
a_legacy=LegacyDefinition(name='sample_constituents'))
m_package.__init_metainfo__()
import numpy as np # pylint: disable=unused-import
import typing # pylint: disable=unused-import
from nomad.metainfo import ( # pylint: disable=unused-import
MSection, MCategory, Category, Package, Quantity, Section, SubSection, SectionProxy,
Reference
)
from nomad.metainfo.legacy import LegacyDefinition
from nomad.datamodel.metainfo import general_experimental
m_package = Package(
name='general_experimental_data_nomadmetainfo_json',