Commit f7a48fa1 authored by David Sikter's avatar David Sikter
Browse files

Renaming Upload.user_id and EntryMetadata.uploader to main_author

parent e8c332ad
......@@ -160,18 +160,20 @@ belong to one *material* based on the simulated system.
(even among different nomad installations)
### Uploads
- An upload contains related calculations in the form of raw code input and output files
- Uploader are encouraged to upload all relevant files
- The directory structure of an upload might be used to relate calculations to each other
- An upload contains a directory structure of *raw files*, which are parsed to produce *archive entries*.
- Uploads have a unique randomly choosen `upload_id` (UUID)
- The `uploader` is the user that created the upload (always set, and cannot be changed).
- Currently, uploads can be provided as `.zip` or `.tar.gz` files.
- Uploaders are encouraged to put all files that could be relevant in the same upload
- If a file is encountered, which can be parsed, this file will be the *mainfile* of the resulting entry.
- After parsing a mainfile, an *archive file* containing the parser result/archive entry is created and stored within the upload (separated from the raw files).
- The directory structure of an upload is used to relate different entries and files within the upload to each other. Files in the same directory as a mainfile are considered to be auxiliary files belonging to this entry. (Note, if there are two mainfiles in the same directory, they will therefore be considered aux files of each other)
- The `main_author` is set at creation to the user that created the upload. The field cannot be modified after that (unless by an admin user).
- When an upload is created, it is first created in the *staging area*. Here, the upload metadata can be edited, raw files can be uploaded or deleted, etc.
- You can upload uncompressed or compressed (`.zip` or `.tar.gz`) files to an upload when it is in staging. Compressed files will be unpacked automatically.
### Entries (Calculations, Code runs)
- There are confusing names. Internally, in the nomad source code, the term `calc` is used.
An entry represents a single set of input/output used and produces by an individual run of a
DFT code. If nomad is applied to other domains, i.e. experimental material science, entries might represent
experiments or other entities.
DFT code. If nomad is applied to other domains, i.e. experimental material science, entries might represent experiments or other entities.
- An entry (calculation) has a unique `calc_id` that is based on the upload's id and the `mainfile`
- The `mainfile` is a upload relative path to the main output file.
- Each calculation, when published, gets a unique `pid`. Pids are ascending intergers. For
......@@ -18,7 +18,7 @@ from nomad.client import Auth, upload_file
nomad_url = config.client.url
user = 'youruser'
password = 'yourpassword'
uploader_id = None
main_author = None
# create an auth object
......@@ -27,7 +27,7 @@ auth = Auth(user=user, password=password)
def upload(
path: str, local_path: bool = False, metadata_path: str = None,
publish_directly: bool = False, uploader_id: str = None):
publish_directly: bool = False, main_author: str = None):
path: The file path to the upload file.
......@@ -53,8 +53,8 @@ def upload(
if publish_directly:
kwargs['publish_directly'] = True
if uploader_id is not None:
kwargs['uploader_id'] = uploader_id
if main_author is not None:
kwargs['main_author'] = main_author
upload_id = upload_file(path, auth, local_path=local_path)
if upload_id is None:
......@@ -77,4 +77,4 @@ if __name__ == '__main__':
for path in paths:
path, metadata_path=metadata_path, local_path=True, publish_directly=True,
......@@ -172,9 +172,9 @@ class FAQ extends React.Component {
### How can I share credit with my co authors?
When you edit entries, you can mark other NOMAD users as *co-authors*. You
(the uploader) and the *co-authors* will comprise the respective authors list
displayed for each entry.
When you edit the metadata, you can mark other NOMAD users as *co-authors* of
an upload or an entry. The main author and the co-authors will comprise the
respective authors list displayed for each entry.
### I want to upload data from a code that is not yet supported?
......@@ -809,10 +809,10 @@ class EditUserMetadataDialogUnstyled extends React.Component {
comment: example.comment || '',
references: example.references || [],
entry_coauthors: (example.authors || [])
.filter(user => user.user_id !== example.uploader.user_id)
.filter(user => user.user_id !== example.main_author.user_id)
.map(user => user.user_id),
reviewers: (example.owners || [])
.filter(user => user.user_id !== example.uploader.user_id)
.filter(user => user.user_id !== example.main_author.user_id)
.map(user => user.user_id),
datasets: (example.datasets || []).map(ds => ds.dataset_name)
......@@ -953,7 +953,7 @@ class EditUserMetadataDialogUnstyled extends React.Component {
const { classes, buttonProps, total, user, example, disabled, title } = this.props
const { open, actions, verified, submitting, success, message } = this.state
const dialogEnabled = user && example.uploader && example.uploader.user_id === user.sub && !disabled
const dialogEnabled = user && example.main_author && example.main_author.user_id === user.sub && !disabled
const submitEnabled = Object.keys(actions).length && !submitting && verified
const editDataToActions = editData => {
......@@ -142,7 +142,7 @@ export function Published(props) {
const {entry} = props
if (entry.published) {
if (entry.with_embargo) {
if (user && entry.uploader.user_id === user.sub) {
if (user && entry.main_author.user_id === user.sub) {
if (entry.owners.length === 1) {
return <Tooltip title="published with embargo by you and only accessible by you">
<UploaderIcon color="error" />
......@@ -98,7 +98,7 @@ test('correctly renders metadata and all properties', async () => {
const references = screen.getByTitle('User provided references (URLs) for this entry')
const authors = screen.getByTitle('All authors (uploader and co-authors)')
const authors = screen.getByTitle('All authors (main author and co-authors)')
const datasets = screen.getByTitle('A list of user curated datasets this entry belongs to.')
......@@ -193,7 +193,7 @@ export default function RawFiles({data, entryId}) {
const filterPotcar = useCallback((file) => {
if (file.substring(file.lastIndexOf('/')).includes('POTCAR') && !file.endsWith('.stripped')) {
return user && data.uploader.user_id === user.sub
return user && data.main_author.user_id === user.sub
} else {
return true
......@@ -103,13 +103,13 @@ class Mapping():
self.g.add((dataset, DCT.license, URIRef('')))
self.g.add((dataset, DCT.language, URIRef('')))
self.g.add((dataset, DCT.publisher, self.map_user(entry['uploader']['user_id'])))
self.g.add((dataset, DCT.publisher, self.map_user(entry['main_author']['user_id'])))
for author in entry['authors']:
self.g.add((dataset, DCT.creator, self.map_user(author['user_id'])))
except (KeyError, AttributeError):
self.g.add((dataset, DCAT.contactPoint, self.map_contact(entry['uploader']['user_id'])))
self.g.add((dataset, DCAT.contactPoint, self.map_contact(entry['main_author']['user_id'])))
self.g.add((dataset, DCAT.distribution, self.map_distribution(entry, 'api')))
self.g.add((dataset, DCAT.distribution, self.map_distribution(entry, 'json')))
......@@ -1185,7 +1185,7 @@ async def post_entry_metadata_edit(
verify = data.verify
data.success = True
mongo_update = {}
uploader_ids = None
main_author_ids = None
has_error = False
removed_datasets = None
......@@ -1204,7 +1204,7 @@ async def post_entry_metadata_edit(
# TODO this does not work. Because the quantities are not in EditableUserMetadata
# they are also not in the model and ignored by fastapi. This probably
# also did not work in the old API.
if action_quantity_name in ['uploader', 'upload_create_time']:
if action_quantity_name in ['main_author', 'upload_create_time']:
if not user.is_admin():
raise HTTPException(
......@@ -1249,13 +1249,13 @@ async def post_entry_metadata_edit(
action.message = 'User does not exist'
if uploader_ids is None:
uploader_ids = get_quantity_values(
quantity='uploader.user_id', owner=Owner.user, query=data.query, user_id=user.user_id)
if action_value in uploader_ids:
if main_author_ids is None:
main_author_ids = get_quantity_values(
quantity='main_author.user_id', owner=Owner.user, query=data.query, user_id=user.user_id)
if action_value in main_author_ids:
action.success = False
has_error = True
action.message = 'This user is already an uploader of one entry in the query'
action.message = 'This user is already the main author of an entry in the query'
elif verify_reference == datamodel.Dataset:
......@@ -51,8 +51,8 @@ class UploadMetadata(BaseModel):
A user-firendly name of the upload. Does not need to be unique'''))
embargo_length: Optional[int] = Field(None, description=strip('''
The embargo length in months (max 36).'''))
uploader: Optional[str] = Field(None, description=strip('''
The uploader (owner) of the upload. **Note! Can only be updated by admin users.**'''))
main_author: Optional[str] = Field(None, description=strip('''
The main author of the upload. **Note! Can only be updated by admin users.**'''))
upload_create_time: Optional[datetime] = Field(None, description=strip('''
The time of the creation of the upload. **Note! Can only be updated by admin users.**'''))
......@@ -363,7 +363,7 @@ async def get_uploads(
# Build query
query_kwargs: Dict[str, Any] = {}
if query.upload_id:
......@@ -844,7 +844,7 @@ async def post_upload(
if not user.is_admin:
# Check upload limit
if _query_mongodb(user_id=str(user.user_id), publish_time=None).count() >= # type: ignore
if _query_mongodb(main_author=str(user.user_id), publish_time=None).count() >= # type: ignore
raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=strip('''
Limit of unpublished uploads exceeded for user.'''))
......@@ -869,7 +869,7 @@ async def post_upload(
upload = Upload.create(
user=checked_upload_metadata.uploader or user,
main_author=checked_upload_metadata.main_author or user,
upload_create_time=checked_upload_metadata.upload_create_time or datetime.utcnow(),
embargo_length=checked_upload_metadata.embargo_length or 0,
......@@ -1381,7 +1381,7 @@ def _get_upload_with_read_access(upload_id: str, user: User, include_others: boo
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=strip('''
The specified upload_id was not found.'''))
upload = mongodb_query.first()
if user and (user.is_admin or upload.user_id == str(user.user_id)):
if user and (user.is_admin or upload.main_author == str(user.user_id)):
# Ok, it exists and belongs to user, or we have an admin user
return upload
elif include_others:
......@@ -1413,7 +1413,7 @@ def _get_upload_with_write_access(
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=strip('''
The specified upload_id was not found.'''))
upload = mongodb_query.first()
if upload.user_id != str(user.user_id) and not user.is_admin:
if upload.main_author != str(user.user_id) and not user.is_admin:
raise HTTPException(status_code=status.HTTP_401_UNAUTHORIZED, detail=strip('''
You do not have write access to the specified upload.'''))
if upload.published:
......@@ -1463,7 +1463,7 @@ def _check_upload_metadata(
current_embargo_length: the current embargo_length of the upload.
if not is_admin:
for field in ('uploader', 'upload_create_time'):
for field in ('main_author', 'upload_create_time'):
if getattr(metadata, field) is not None:
raise HTTPException(
......@@ -1474,15 +1474,15 @@ def _check_upload_metadata(
raise HTTPException(
detail=f'Upload is published, changing {field} requires admin.')
if metadata.uploader is not None:
if metadata.main_author is not None:
uploader = datamodel.User.get(user_id=metadata.uploader)
main_author = datamodel.User.get(user_id=metadata.main_author)
except KeyError:
raise HTTPException(
detail='`uploader` is not a valid user id.')
detail='`main_author` is not a valid user id.')
uploader = None
main_author = None
if metadata.embargo_length is not None:
if not 0 <= metadata.embargo_length <= 36:
raise HTTPException(
......@@ -1501,6 +1501,6 @@ def _check_upload_metadata(
upload_metadata: datamodel.UploadMetadata = datamodel.UploadMetadata()
upload_metadata.upload_name = metadata.upload_name
upload_metadata.embargo_length = metadata.embargo_length
upload_metadata.uploader = uploader
upload_metadata.main_author = main_author
upload_metadata.upload_create_time = metadata.upload_create_time
return upload_metadata
......@@ -252,7 +252,7 @@ def ls(ctx, uploads, calculations, ids, json):
row = [
......@@ -295,7 +295,7 @@ def chown(ctx, username, uploads):
print('%d uploads selected, changing owner ...' % uploads.count())
user = datamodel.User.get(username=username)
upload_metadata = datamodel.UploadMetadata(uploader=user)
upload_metadata = datamodel.UploadMetadata(main_author=user)
for upload in uploads:
......@@ -188,9 +188,9 @@ class DbUpdater:
servers = ['LIB%d_LIB' % n for n in range(1, 10)] + ['ICSD_WEB']
paths = [s for s in servers if s in self.root_url]
paths = paths if paths else servers
# uploader: Stefano Curtarolo
# main_author: Stefano Curtarolo
query = dict(
self.nomad_files = []
......@@ -290,19 +290,19 @@ def update_parser_readmes(parser):
@dev.command(help='Adds a few pieces of data to NOMAD.')
@click.option('--username', '-u', type=str, help='The uploader username.')
@click.option('--username', '-u', type=str, help='The main author username.')
def example_data(username: str):
from nomad import infrastructure, utils
from tests.utils import ExampleData
user = infrastructure.keycloak.get_user(username=username)
if user is None:
main_author = infrastructure.keycloak.get_user(username=username)
if main_author is None:
print(f'The user {username} does not exist.')
data = ExampleData(uploader=user)
data = ExampleData(main_author=main_author)
# one upload with two calc published with embargo, one shared
upload_id = utils.create_uuid()
......@@ -91,8 +91,8 @@ class User(Author):
''' A NOMAD user.
Typically a NOMAD user has a NOMAD account. The user related data is managed by
NOMAD keycloak user-management system. Users are used to denote uploaders, authors,
people to shared data with embargo with, and owners of datasets.
NOMAD keycloak user-management system. Users are used to denote authors,
reviewers, and owners of datasets.
user_id: The unique, persistent keycloak UUID
......@@ -210,7 +210,7 @@ class Dataset(metainfo.MSection):
can only be extended after a DOI was assigned. A foreign dataset cannot be changed
once a DOI was assigned.
dataset_type: The type determined if a dataset is owned, i.e. was created by
the uploader/owner of the contained entries; or if a dataset is foreign,
the authors of the contained entries; or if a dataset is foreign,
i.e. it was created by someone not necessarily related to the entries.
m_def = metainfo.Section(a_mongo=MongoDocument(), a_pydantic=PydanticModel())
......@@ -315,17 +315,19 @@ def derive_origin(entry: 'EntryMetadata') -> str:
if entry.external_db is not None:
return str(entry.external_db)
if entry.uploader:
if entry.main_author:
return None
def derive_authors(entry: 'EntryMetadata') -> List[User]:
uploaders: List[User] = []
if entry.uploader is not None and entry.external_db is None:
uploaders = [entry.uploader]
return uploaders + entry.entry_coauthors
authors: List[User] = []
if entry.main_author is not None and entry.external_db is None:
if entry.entry_coauthors:
return authors
class UploadMetadata(metainfo.MSection):
......@@ -339,7 +341,7 @@ class UploadMetadata(metainfo.MSection):
upload_create_time = metainfo.Quantity(
description='The date and time when the upload was created')
uploader = metainfo.Quantity(
main_author = metainfo.Quantity(
description='The creator of the upload')
embargo_length = metainfo.Quantity(
......@@ -369,15 +371,16 @@ class EntryMetadata(metainfo.MSection):
nomad_commit: The NOMAD commit used for the last processing.
comment: An arbitrary string with user provided information about the entry.
references: A list of URLs for resources that are related to the entry.
uploader: Id of the uploader of this entry.
reviewers: Ids of users who can review the upload which this entry belongs to. Like the
uploader, reviewers can find, see, and download all data from the upload, even
if it is in staging or has an embargo.
entry_coauthors: Ids of all co-authors (excl. the uploader) specified on the entry level,
rather than on the upload level. They are shown as authors of this entry alongside
its uploader.
with_embargo: Entries with embargo are only visible to the uploader, the admin
user, and users registered as reviewers of the uplod (see reviewers).
main_author: Id of the main author of this entry.
reviewers: Ids of users who can review the upload which this entry belongs to. Like
the main author and the upload coauthors, reviewers can find, see, and download
all data from the upload and all its entries, even if it is in staging or has
an embargo.
entry_coauthors: Ids of all co-authors (excl. the main author and upload coauthors)
specified on the entry level, rather than on the upload level. They are shown
as authors of this entry alongside its main author and upload coauthors.
with_embargo: Entries with embargo are only visible to the main author, the upload
coauthors, and the upload reviewers (and the admin user).
upload_create_time: The time that the upload was created
entry_create_time: The time that the entry was created
publish_time: The time when the upload was published
......@@ -491,16 +494,16 @@ class EntryMetadata(metainfo.MSection):
description='The repository or external database where the original data resides',
uploader = metainfo.Quantity(
type=user_reference, categories=[], # TODO: fix categories after refactoring
description='The uploader of the entry',
main_author = metainfo.Quantity(
type=user_reference, categories=[MongoUploadMetadata],
description='The main author of the entry',
origin = metainfo.Quantity(
A short human readable description of the entries origin. Usually it is the
handle of an external database/repository or the name of the uploader.
handle of an external database/repository or the name of the main author.
......@@ -514,7 +517,7 @@ class EntryMetadata(metainfo.MSection):
authors = metainfo.Quantity(
type=author_reference, shape=['0..*'],
description='All authors (uploader and co-authors)',
description='All authors (main author and co-authors)',
a_elasticsearch=Elasticsearch(material_entry_type, metrics=dict(n_authors='cardinality')))
......@@ -527,8 +530,8 @@ class EntryMetadata(metainfo.MSection):
owners = metainfo.Quantity(
type=user_reference, shape=['0..*'],
description='All owner (uploader and shared with users)',
derived=lambda entry: ([entry.uploader] if entry.uploader is not None else []) + entry.reviewers,
description='All viewers (main author, upload coauthors, and reviewers)',
derived=lambda entry: ([entry.main_author] if entry.main_author is not None else []) + entry.reviewers,
license = metainfo.Quantity(
......@@ -320,7 +320,6 @@ class Calc(Proc):
# Upload metadata
for field in _mongo_upload_metadata:
setattr(entry_metadata, field, getattr(upload, field))
entry_metadata.uploader = upload.user_id # TODO: Refactor and treat like the other fields
# Entry metadata
for field in _mongo_entry_metadata:
setattr(entry_metadata, field, getattr(self, field))
......@@ -792,7 +791,7 @@ class Upload(Proc):
upload_id: The upload id generated by the database or the uploaded NOMAD deployment.
upload_name: Optional user provided upload name.
upload_create_time: Datetime of creation of the upload.
user_id: The id of the user that created this upload.
main_author: The id of the main author of this upload (normally its creator).
reviewers: A user provided list of reviewers. Reviewers can see the whole upload,
also if it is unpublished or embargoed.
publish_time: Datetime when the upload was initially published on this NOMAD deployment.
......@@ -810,7 +809,7 @@ class Upload(Proc):
upload_id = StringField(primary_key=True)
upload_name = StringField(default=None)
upload_create_time = DateTimeField(required=True)
user_id = StringField(required=True)
main_author = StringField(required=True)
reviewers = ListField(StringField(), default=None)
last_update = DateTimeField()
publish_time = DateTimeField()
......@@ -828,7 +827,7 @@ class Upload(Proc):
meta: Any = {
'strict': False,
'indexes': [
'user_id', 'process_status', 'upload_create_time', 'publish_time'
'main_author', 'process_status', 'upload_create_time', 'publish_time'
......@@ -863,11 +862,11 @@ class Upload(Proc):
def user_uploads(cls, user: datamodel.User, **kwargs) -> Sequence['Upload']:
''' Returns all uploads for the given user. Kwargs are passed to mongo query. '''
return cls.objects(user_id=str(user.user_id), **kwargs)
return cls.objects(main_author=str(user.user_id), **kwargs)
def uploader(self) -> datamodel.User:
return datamodel.User.get(self.user_id)
def main_author_user(self) -> datamodel.User:
return datamodel.User.get(self.main_author)
def published(self) -> bool:
......@@ -879,31 +878,29 @@ class Upload(Proc):
def get_logger(self, **kwargs):
logger = super().get_logger()
user = self.uploader
user_name = '%s %s' % (user.first_name, user.last_name)
# We are not using 'user_id' because logstash (?) will filter these entries ?!
main_author_user = self.main_author_user
main_author_name = '%s %s' % (main_author_user.first_name, main_author_user.last_name)
# We are not using 'main_author' because logstash (?) will filter these entries ?!
logger = logger.bind(
upload_id=self.upload_id, upload_name=self.upload_name, user_name=user_name,
user=user.user_id, **kwargs)
upload_id=self.upload_id, upload_name=self.upload_name, main_author_name=main_author_name,
main_author=self.main_author, **kwargs)
return logger
def create(cls, **kwargs) -> 'Upload':
def create(cls, main_author: datamodel.User = None, **kwargs) -> 'Upload':
Creates a new upload for the given user, a user given upload_name is optional.
Creates a new upload for the given main_author, a user given upload_name is optional.
It will populate the record with a signed url and pending :class:`UploadProc`.
The upload will be already saved to the database.
user: The user that created the upload.
main_author: The main author of the upload.
# use kwargs to keep compatibility with super method
user: datamodel.User = kwargs['user']
assert main_author is not None, 'No `main_author` provided.'
if 'upload_id' not in kwargs:
self = super().create(**kwargs)
return self
......@@ -916,19 +913,19 @@ class Upload(Proc):
process to import the bundle data.
bundle_info = bundle.bundle_info
keys_exist(bundle_info, ('upload_id', 'upload.user_id'), 'Missing key in bundle_info.json: {key}')
keys_exist(bundle_info, ('upload_id', 'upload.main_author'), 'Missing key in bundle_info.json: {key}')
upload_id = bundle_info['upload_id']
user_id = bundle_info['upload']['user_id']
main_author = bundle_info['upload']['main_author']
assert False, f'Upload with id {upload_id} already exists'
except KeyError:
upload_user = datamodel.User.get(user_id=user_id)
assert upload_user is not None, f'Invalid user_id: {user_id}'
main_author_user = datamodel.User.get(user_id=main_author)
assert main_author_user is not None, f'Invalid main_author: {main_author}'
return Upload.create(
def delete(self):
''' Deletes this upload process state entry and its calcs. '''
......@@ -1359,7 +1356,7 @@ class Upload(Proc):
logger = self.get_logger()
# send email about process finish
if not self.publish_directly:
user = self.uploader
user = self.main_author_user
name = '%s %s' % (user.first_name, user.last_name)
message = '\n'.join([
'Dear %s,' % name,
......@@ -1540,8 +1537,8 @@ class Upload(Proc):
need_to_repack = True
need_to_reindex = True
self.embargo_length = upload_metadata.embargo_length
if upload_metadata.uploader is not None:
self.user_id = upload_metadata.uploader.user_id
if upload_metadata.main_author is not None:
self.main_author = upload_metadata.main_author.user_id
need_to_reindex = True
if upload_metadata.upload_create_time is not None:
self.upload_create_time = upload_metadata.upload_create_time
......@@ -1685,7 +1682,7 @@ class Upload(Proc):
'upload._id', 'upload.user_id',
'upload._id', 'upload.main_author',
'upload.upload_create_time', 'upload.process_status', 'upload.license',
......@@ -1719,11 +1716,11 @@ class Upload(Proc):
if published:
assert bundle_info['entries'], 'Upload published but no entries in bundle_info.json'
# Check user references