Commit f782e11e authored by Markus Scheidgen's avatar Markus Scheidgen
Browse files

Merge branch 'new-edit-api' into 'v1.0.0'

New api for editing metadata

See merge request !457
parents 39b7e362 08253e66
Pipeline #115421 passed with stages
in 32 minutes and 6 seconds
......@@ -377,7 +377,7 @@ class Api {
}
}
})
return this.post('entries/edit', edit)
return this.post('entries/edit_v0', edit)
}
}
......
......@@ -30,13 +30,14 @@ from pydantic import ( # pylint: disable=unused-import
validator,
root_validator,
)
from pydantic.main import create_model
import datetime
import numpy as np
import re
import fnmatch
import json
from nomad import datamodel # pylint: disable=unused-import
from nomad import datamodel, metainfo # pylint: disable=unused-import
from nomad.utils import strip
from nomad.metainfo import Datetime, MEnum
from nomad.metainfo.elasticsearch_extension import DocumentType, material_entry_type, material_type
......@@ -51,6 +52,44 @@ Value = Union[StrictInt, StrictFloat, StrictBool, str, datetime.datetime]
ComparableValue = Union[StrictInt, StrictFloat, str, datetime.datetime]
class Owner(str, enum.Enum):
'''
The `owner` allows to limit the scope of the searched based on entry ownership.
This is useful, if you only want to search among all publically downloadable
entries, or only among your own entries, etc.
These are the possible owner values and their meaning:
* `all`: Consider all entries.
* `public` (default): Consider all entries that can be publically downloaded,
i.e. only published entries without embargo
* `user`: Only consider entries that belong to you.
* `shared`: Only consider entries that belong to you or are shared with you.
* `visible`: Consider all entries that are visible to you. This includes
entries with embargo or unpublished entries that belong to you or are
shared with you.
* `staging`: Only search through unpublished entries.
'''
# There seems to be a slight bug in fast API. When it creates the example in OpenAPI
# it will ignore any given default or example and simply take the first enum value.
# Therefore, we put public first, which is the most default and save in most contexts.
public = 'public'
all_ = 'all'
visible = 'visible'
shared = 'shared'
user = 'user'
staging = 'staging'
admin = 'admin'
class Direction(str, enum.Enum):
'''
Order direction, either ascending (`asc`) or descending (`desc`)
'''
asc = 'asc'
desc = 'desc'
class HTTPExceptionModel(BaseModel):
detail: str
......@@ -156,36 +195,6 @@ Not.update_forward_refs()
Nested.update_forward_refs()
class Owner(str, enum.Enum):
'''
The `owner` allows to limit the scope of the searched based on entry ownership.
This is useful, if you only want to search among all publically downloadable
entries, or only among your own entries, etc.
These are the possible owner values and their meaning:
* `all`: Consider all entries.
* `public` (default): Consider all entries that can be publically downloaded,
i.e. only published entries without embargo
* `user`: Only consider entries that belong to you.
* `shared`: Only consider entries that belong to you or are shared with you.
* `visible`: Consider all entries that are visible to you. This includes
entries with embargo or unpublished entries that belong to you or are
shared with you.
* `staging`: Only search through unpublished entries.
'''
# There seems to be a slight bug in fast API. When it creates the example in OpenAPI
# it will ignore any given default or example and simply take the first enum value.
# Therefore, we put public first, which is the most default and save in most contexts.
public = 'public'
all_ = 'all'
visible = 'visible'
shared = 'shared'
user = 'user'
staging = 'staging'
admin = 'admin'
class WithQuery(BaseModel):
owner: Optional[Owner] = Body('public')
query: Optional[Query] = Body(
......@@ -424,14 +433,6 @@ class QueryParameters:
return WithQuery(query=query, owner=owner)
class Direction(str, enum.Enum):
'''
Order direction, either ascending (`asc`) or descending (`desc`)
'''
asc = 'asc'
desc = 'desc'
class MetadataRequired(BaseModel):
''' Defines which metadata quantities are included or excluded in the response. '''
......@@ -1051,6 +1052,52 @@ class Metadata(WithQueryAndPagination):
'''))
class MetadataEditListAction(BaseModel):
'''
Defines an action to perform on a list quantity. This enables users to add and remove values.
'''
op: str = Field(description=strip('''
Defines the type of operation (either `set`, `add` or `remove`)'''))
values: Union[str, List[str]] = Field(description=strip('''
The value or values to set/add/remove (string or list of strings)'''))
# Generate model for MetadataEditActions
_metadata_edit_actions_fields = {}
for quantity in datamodel.EditableUserMetadata.m_def.definitions:
if quantity.is_scalar:
pydantic_type = quantity.type if quantity.type in (str, int, float, bool) else str
else:
pydantic_type = Union[str, List[str], MetadataEditListAction]
if getattr(quantity, 'a_auth_level', None) == datamodel.AuthLevel.admin:
description = '**NOTE:** Only editable by admin user'
else:
description = None
_metadata_edit_actions_fields[quantity.name] = (
Optional[pydantic_type], Field(description=description))
MetadataEditActions = create_model('MetadataEditActions', **_metadata_edit_actions_fields) # type: ignore
class MetadataEditRequest(WithQuery):
''' Defines a request to edit metadata. '''
metadata: Optional[MetadataEditActions] = Field( # type: ignore
description=strip('''
Metadata to set, on the upload and/or selected entries.'''))
entries: Optional[Dict[str, MetadataEditActions]] = Field( # type: ignore
description=strip('''
An optional dictionary, specifying metadata to set on individual entries. The field
`entries_metadata_key` defines which type of key is used in the dictionary to identify
the entries. Note, only quantities defined on the entry level can be set using this method.'''))
entries_key: Optional[str] = Field(
default='calc_id', description=strip('''
Defines which type of key is used in `entries_metadata`. Default is `calc_id`.'''))
verify_only: Optional[bool] = Field(
default=False, description=strip('''
Do not execute the request, just verifies it and provides detailed feedback on
encountered errors etc.'''))
class Files(BaseModel):
''' Configures the download of files. '''
compress: Optional[bool] = Field(
......
......@@ -48,8 +48,9 @@ from ..utils import (
create_download_stream_zipped, create_download_stream_raw_file,
DownloadItem, create_responses)
from ..models import (
Aggregation, Pagination, PaginationResponse, MetadataPagination, TermsAggregation, WithQuery, WithQueryAndPagination, MetadataRequired,
MetadataResponse, Metadata, Files, Query, User, Owner,
Aggregation, Pagination, PaginationResponse, MetadataPagination, TermsAggregation,
WithQuery, WithQueryAndPagination, MetadataRequired, MetadataResponse, Metadata,
MetadataEditRequest, Files, Query, User, Owner,
QueryParameters, metadata_required_parameters, files_parameters, metadata_pagination_parameters,
HTTPExceptionModel)
......@@ -280,6 +281,18 @@ _bad_path_response = status.HTTP_404_NOT_FOUND, {
'model': HTTPExceptionModel,
'description': strip('File or directory not found.')}
_bad_edit_request = status.HTTP_400_BAD_REQUEST, {
'model': HTTPExceptionModel,
'description': strip('Edit request could not be executed.')}
_bad_edit_request_authorization = status.HTTP_401_UNAUTHORIZED, {
'model': HTTPExceptionModel,
'description': strip('Not enough permissions to execute edit request.')}
_bad_edit_request_empty_query = status.HTTP_404_NOT_FOUND, {
'model': HTTPExceptionModel,
'description': strip('No matching entries found.')}
_raw_download_response = 200, {
'content': {'application/zip': {}},
'description': strip('''
......@@ -1161,7 +1174,7 @@ _editable_quantities = {
@router.post(
'/edit',
'/edit_v0',
tags=[metadata_tag],
summary='Edit the user metadata of a set of entries',
response_model=EntryMetadataEditResponse,
......@@ -1328,3 +1341,39 @@ async def post_entry_metadata_edit(
datamodel.Dataset.m_def.a_mongo.objects(dataset_id=dataset).delete()
return data
@router.post(
'/edit',
tags=[metadata_tag],
summary='Edit the user metadata of a set of entries',
response_model=MetadataEditRequest,
response_model_exclude_unset=True,
response_model_exclude_none=True,
responses=create_responses(
_bad_edit_request, _bad_edit_request_authorization, _bad_edit_request_empty_query))
async def post_entries_edit(
request: Request,
data: MetadataEditRequest,
user: User = Depends(create_user_dependency(required=True))):
'''
Updates the metadata of the specified entries.
**Note:**
- Only admins can edit some of the fields.
- Only entry level attributes (like `comment`, `references` etc.) can be set using
this endpoint; upload level attributes (like `upload_name`, `coauthors`, embargo
settings, etc) need to be set through the endpoint **uploads/upload_id/edit**.
- If the upload is published, the only operation permitted using this endpoint is to
edit the entries in datasets that where created by the current user.
'''
edit_request_json = await request.json()
try:
verified_json = proc.MetadataEditRequestHandler.edit_metadata(
edit_request_json=edit_request_json, upload_id=None, user=user)
return verified_json
except RequestValidationError as e:
raise # A problem which we have handled explicitly. Fastapi does json conversion.
except Exception as e:
# The upload is processing or some kind of unexpected error has occured
raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=str(e))
......@@ -25,23 +25,28 @@ from fastapi import (
APIRouter, Request, File, UploadFile, status, Depends, Path, Query as FastApiQuery,
HTTPException)
from fastapi.responses import StreamingResponse
from fastapi.exceptions import RequestValidationError
from nomad import utils, config, files, datamodel
from nomad.files import UploadFiles, StagingUploadFiles, UploadBundle, is_safe_relative_path
from nomad.processing import Upload, Calc, ProcessAlreadyRunning, ProcessStatus
from nomad.processing import Upload, Calc, ProcessAlreadyRunning, ProcessStatus, MetadataEditRequestHandler
from nomad.utils import strip
from nomad.search import search
from .auth import create_user_dependency, generate_upload_token
from ..models import (
BaseModel, MetadataPagination, User, Direction, Pagination, PaginationResponse, HTTPExceptionModel,
Files, files_parameters, WithQuery)
MetadataPagination, User, Direction, Pagination, PaginationResponse, HTTPExceptionModel,
Files, files_parameters, WithQuery, MetadataEditRequest)
from ..utils import (
parameter_dependency_from_model, create_responses, DownloadItem,
create_download_stream_zipped, create_download_stream_raw_file, create_stream_from_string)
router = APIRouter()
default_tag = 'uploads'
metadata_tag = 'uploads/metadata'
raw_tag = 'uploads/raw'
action_tag = 'uploads/action'
bundle_tag = 'uploads/bundle'
logger = utils.get_logger(__name__)
......@@ -369,7 +374,7 @@ async def get_command_examples(user: User = Depends(create_user_dependency(requi
@router.get(
'', tags=[default_tag],
'', tags=[metadata_tag],
summary='List uploads of authenticated user.',
response_model=UploadProcDataQueryResponse,
responses=create_responses(_not_authorized, _bad_pagination),
......@@ -430,7 +435,7 @@ async def get_uploads(
@router.get(
'/{upload_id}', tags=[default_tag],
'/{upload_id}', tags=[metadata_tag],
summary='Get a specific upload',
response_model=UploadProcDataResponse,
responses=create_responses(_upload_not_found, _not_authorized_to_upload),
......@@ -453,7 +458,7 @@ async def get_upload(
@router.get(
'/{upload_id}/entries', tags=[default_tag],
'/{upload_id}/entries', tags=[metadata_tag],
summary='Get the entries of the specific upload as a list',
response_model=EntryProcDataQueryResponse,
responses=create_responses(_upload_not_found, _not_authorized_to_upload, _bad_pagination),
......@@ -515,7 +520,7 @@ async def get_upload_entries(
@router.get(
'/{upload_id}/entries/{entry_id}', tags=[default_tag],
'/{upload_id}/entries/{entry_id}', tags=[metadata_tag],
summary='Get a specific entry for a specific upload',
response_model=EntryProcDataResponse,
responses=create_responses(_entry_not_found, _not_authorized_to_entry),
......@@ -552,7 +557,7 @@ async def get_upload_entry(
@router.get(
'/{upload_id}/raw/{path:path}', tags=[default_tag],
'/{upload_id}/raw/{path:path}', tags=[raw_tag],
summary='Get the raw files and folders for a given upload and path.',
response_class=StreamingResponse,
responses=create_responses(
......@@ -684,7 +689,7 @@ async def get_upload_raw_path(
@router.put(
'/{upload_id}/raw/{path:path}', tags=[default_tag],
'/{upload_id}/raw/{path:path}', tags=[raw_tag],
summary='Put (add or replace) files to an upload at the specified path.',
response_class=StreamingResponse,
responses=create_responses(
......@@ -764,7 +769,7 @@ async def put_upload_raw_path(
@router.delete(
'/{upload_id}/raw/{path:path}', tags=[default_tag],
'/{upload_id}/raw/{path:path}', tags=[raw_tag],
summary='Delete file or folder located at the specified path in the specified upload.',
response_model=UploadProcDataResponse,
responses=create_responses(_upload_not_found, _not_authorized_to_upload, _bad_request),
......@@ -919,7 +924,7 @@ async def post_upload(
@router.put(
'/{upload_id}/metadata', tags=[default_tag],
'/{upload_id}/metadata', tags=[metadata_tag],
summary='Updates the metadata of the specified upload.',
response_model=UploadProcDataResponse,
responses=create_responses(_upload_not_found, _not_authorized_to_upload, _bad_request),
......@@ -950,6 +955,47 @@ async def put_upload_metadata(
return UploadProcDataResponse(upload_id=upload_id, data=_upload_to_pydantic(upload))
@router.post(
'/{upload_id}/edit', tags=[metadata_tag],
summary='Updates the metadata of the specified upload.',
response_model=MetadataEditRequest,
responses=create_responses(_upload_not_found, _not_authorized_to_upload, _bad_request),
response_model_exclude_unset=True,
response_model_exclude_none=True)
async def post_upload_edit(
request: Request,
data: MetadataEditRequest,
upload_id: str = Path(..., description='The unique id of the upload.'),
user: User = Depends(create_user_dependency(required=True))):
'''
Updates the metadata of the specified upload and entries. An optional `query` can be
specified to select only some of the entries of the upload (the query results are
automatically restricted to the specified upload).
**Note:**
- Only admins can edit some of the fields.
- The embargo of a published upload is lifted by setting the `embargo_length` attribute
to 0.
- If the upload is published, the only operations permitted using this endpoint is to
lift the embargo, i.e. set `embargo_length` to 0, and to edit the entries in datasets
that where created by the current user.
- If a query is specified, it is not possible to edit upload level metadata (like
`upload_name`, `coauthors`, etc.), as the purpose of queries is to select only a
subset of the upload entries to edit, but changing upload level metadata would affect
**all** entries of the upload.
'''
edit_request_json = await request.json()
try:
verified_json = MetadataEditRequestHandler.edit_metadata(
edit_request_json=edit_request_json, upload_id=upload_id, user=user)
return verified_json
except RequestValidationError as e:
raise # A problem which we have handled explicitly. Fastapi does json conversion.
except Exception as e:
# The upload is processing or some kind of unexpected error has occured
raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=str(e))
@router.delete(
'/{upload_id}', tags=[default_tag],
summary='Delete an upload',
......@@ -988,7 +1034,7 @@ async def delete_upload(
@router.post(
'/{upload_id}/action/publish', tags=[default_tag],
'/{upload_id}/action/publish', tags=[action_tag],
summary='Publish an upload',
response_model=UploadProcDataResponse,
responses=create_responses(_upload_not_found, _not_authorized_to_upload, _bad_request),
......@@ -1071,7 +1117,7 @@ async def post_upload_action_publish(
@router.post(
'/{upload_id}/action/process', tags=[default_tag],
'/{upload_id}/action/process', tags=[action_tag],
summary='Manually triggers processing of an upload.',
response_model=UploadProcDataResponse,
responses=create_responses(_upload_not_found, _not_authorized_to_upload, _bad_request),
......@@ -1098,8 +1144,43 @@ async def post_upload_action_process(
data=_upload_to_pydantic(upload))
@router.post(
'/{upload_id}/action/lift-embargo', tags=[action_tag],
summary='Lifts the embargo of an upload.',
response_model=UploadProcDataResponse,
responses=create_responses(_upload_not_found, _not_authorized_to_upload, _bad_request),
response_model_exclude_unset=True,
response_model_exclude_none=True)
async def post_upload_action_lift_embargo(
upload_id: str = Path(
...,
description='The unique id of the upload to lift the embargo for.'),
user: User = Depends(create_user_dependency(required=True))):
''' Lifts the embargo of an upload. '''
upload = _get_upload_with_write_access(
upload_id, user, include_published=True, published_requires_admin=False)
_check_upload_not_processing(upload)
if not upload.published:
raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=strip('''
Upload is not published, no embargo to lift.'''))
if not upload.with_embargo:
raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=strip('''
Upload has no embargo.'''))
# Lift the embargo using MetadataEditRequestHandler.edit_metadata
try:
MetadataEditRequestHandler.edit_metadata(
edit_request_json={'metadata': {'embargo_length': 0}}, upload_id=upload_id, user=user)
upload.reload()
return UploadProcDataResponse(
upload_id=upload_id,
data=_upload_to_pydantic(upload))
except Exception as e:
# Should only happen if the upload just started processing or something unexpected happens
raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=str(e))
@router.get(
'/bundle/{upload_id}', tags=[default_tag],
'/{upload_id}/bundle', tags=[bundle_tag],
summary='Gets an *upload bundle* for the specified upload.',
response_class=StreamingResponse,
responses=create_responses(
......@@ -1145,7 +1226,7 @@ async def get_upload_bundle(
@router.post(
'/bundle', tags=[default_tag],
'/bundle', tags=[bundle_tag],
summary='Posts an *upload bundle* to this NOMAD deployment.',
response_model=UploadProcDataResponse,
responses=create_responses(_not_authorized, _bad_request),
......@@ -1430,9 +1511,10 @@ def _get_upload_with_write_access(
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=strip('''
The specified upload_id was not found.'''))
upload = mongodb_query.first()
if upload.main_author != str(user.user_id) and not user.is_admin:
raise HTTPException(status_code=status.HTTP_401_UNAUTHORIZED, detail=strip('''
You do not have write access to the specified upload.'''))
if not user.is_admin and upload.main_author != str(user.user_id):
if not upload.coauthors or str(user.user_id) not in upload.coauthors:
raise HTTPException(status_code=status.HTTP_401_UNAUTHORIZED, detail=strip('''
You do not have write access to the specified upload.'''))
if upload.published:
if not include_published:
raise HTTPException(status_code=status.HTTP_401_UNAUTHORIZED, detail=strip('''
......
......@@ -95,7 +95,9 @@ def lift_embargo(dry, parallel):
upload.upload_id, upload.publish_time, embargo_length))
if not dry:
upload.set_upload_metadata({'embargo_length': 0})
upload.edit_upload_metadata(
edit_request_json=dict(metadata={'embargo_length': 0}),
user_id=config.services.admin_user_id)
return
......
......@@ -295,9 +295,10 @@ def chown(ctx, username, uploads):
print('%d uploads selected, changing owner ...' % uploads.count())
user = datamodel.User.get(username=username)
upload_metadata = datamodel.UploadMetadata(main_author=user)
for upload in uploads:
upload.set_upload_metadata(upload_metadata.m_to_dict())
upload.edit_upload_metadata(
edit_request_json=dict(metadata={'main_author': user.user_id}),
user_id=config.services.admin_user_id)
@uploads.command(help='Reset the processing state.')
......
......@@ -180,7 +180,7 @@ def integrationtests(auth: api.Auth, skip_parsers: bool, skip_publish: bool, ski
'datasets': [{'value': dataset}]}
response = api.post(
'entries/edit',
'entries/edit_v0',
data=json.dumps(dict(actions=actions, **query_request_params)),
auth=auth)
assert response.status_code == 200, response.text
......
......@@ -77,7 +77,7 @@ import sys
from nomad.metainfo import Environment
from .datamodel import (
Dataset, User, Author, EditableUserMetadata, UserProvidableMetadata,
Dataset, User, Author, EditableUserMetadata, UserProvidableMetadata, AuthLevel,
UploadMetadata, MongoUploadMetadata, MongoEntryMetadata, MongoSystemMetadata,
EntryMetadata, EntryArchive)
from .optimade import OptimadeEntry, Species
......
......@@ -19,6 +19,7 @@
''' All generic entry metadata and related classes. '''
from typing import List, Any
from enum import Enum
from cachetools import cached, TTLCache
from elasticsearch_dsl import analyzer, tokenizer
......@@ -40,6 +41,20 @@ from .metainfo.workflow import Workflow # noqa
from .metainfo.common_experimental import Measurement # noqa
class AuthLevel(int, Enum):
'''
Used to decorate fields with the authorization level required to edit them (using `a_auth_level`).
* `none`: No authorization required
* `coauthor`: You must be at least a coauthor of the upload to edit the field.
* `main_author`: You must be the main author of the upload to edit the field.
* `admin`: You must be admin to edit the field.
'''
none = 0
coauthor = 1
main_author = 2
admin = 3
path_analyzer = analyzer(
'path_analyzer',
tokenizer=tokenizer('path_tokenizer', 'pattern', pattern='/'))
......@@ -404,13 +419,14 @@ class EntryMetadata(metainfo.MSection):
a_elasticsearch=Elasticsearch(material_entry_type, metrics=dict(n_uploads='cardinality')))
upload_name = metainfo.Quantity(
type=str, categories=[MongoUploadMetadata],
type=str, categories=[MongoUploadMetadata, EditableUserMetadata],
description='The user provided upload name',
a_elasticsearch=Elasticsearch())
upload_create_time = metainfo.Quantity(
type=metainfo.Datetime, categories=[MongoUploadMetadata],
type=metainfo.Datetime, categories=[MongoUploadMetadata, EditableUserMetadata],
description='The date and time when the upload was created in nomad',
a_auth_level=AuthLevel.admin,
a_elasticsearch=Elasticsearch(material_entry_type))
calc_id = metainfo.Quantity(
......@@ -421,17 +437,19 @@ class EntryMetadata(metainfo.MSection):
a_elasticsearch=Elasticsearch(material_entry_type, metrics=dict(n_entries='cardinality')))
calc_hash = metainfo.Quantity(
# Note: This attribute is not stored in ES
type=str,
description='A raw file content based checksum/hash',
categories=[MongoEntryMetadata])
entry_create_time = metainfo.Quantity(
type=metainfo.Datetime, categories=[MongoEntryMetadata, MongoSystemMetadata],
type=metainfo.Datetime, categories=[MongoEntryMetadata, MongoSystemMetadata, EditableUserMetadata],
description='The date and time when the entry was created in nomad',
a_flask=dict(admin_only=True),
a_auth_level=AuthLevel.admin,
a_elasticsearch=Elasticsearch(material_entry_type))
last_edit_time = metainfo.Quantity(
# Note: This attribute is not stored in ES
type=metainfo.Datetime, categories=[MongoEntryMetadata],
description='The date and time the user metadata was last edited.')
......@@ -473,7 +491,7 @@ class EntryMetadata(metainfo.MSection):
a_elasticsearch=Elasticsearch(entry_type))
external_id = metainfo.Quantity(
type=str, categories=[MongoEntryMetadata, UserProvidableMetadata],
type=str, categories=[MongoEntryMetadata, UserProvidableMetadata, EditableUserMetadata],
description='''
A user provided external id. Usually the id for an entry in an external database
where the data was imported from.
......@@ -487,9 +505,9 @@ class EntryMetadata(metainfo.MSection):
a_elasticsearch=Elasticsearch(material_entry_type))