Commit a85cf18a authored by David Sikter's avatar David Sikter
Browse files

Adding support for queries

parent 952694ad
......@@ -962,6 +962,7 @@ async def put_upload_metadata(
response_model_exclude_unset=True,
response_model_exclude_none=True)
async def post_upload_edit(
request: Request,
data: MetadataEditRequest,
upload_id: str = Path(..., description='The unique id of the upload.'),
user: User = Depends(create_user_dependency(required=True))):
......@@ -982,8 +983,9 @@ async def post_upload_edit(
subset of the upload entries to edit, but changing upload level metadata would affect
all entries of the upload.
'''
edit_request_json = await request.json()
response, status_code = MetadataEditRequestHandler.edit_metadata(
edit_request=data, upload_id=upload_id, user=user)
edit_request_json=edit_request_json, upload_id=upload_id, user=user)
if status_code != status.HTTP_200_OK and not data.verify_only:
raise HTTPException(status_code=status_code, detail=response.error)
return response
......
......@@ -96,7 +96,7 @@ def lift_embargo(dry, parallel):
if not dry:
upload.edit_upload_metadata(
edit_request=dict(metadata={'embargo_length': 0}),
edit_request_json=dict(metadata={'embargo_length': 0}),
user_id=config.services.admin_user_id)
return
......
......@@ -297,7 +297,7 @@ def chown(ctx, username, uploads):
user = datamodel.User.get(username=username)
for upload in uploads:
upload.edit_upload_metadata(
edit_request=dict(metadata={'main_author': user.user_id}),
edit_request_json=dict(metadata={'main_author': user.user_id}),
user_id=config.services.admin_user_id)
......
......@@ -34,6 +34,7 @@ from mongoengine import (
from pymongo import UpdateOne
from structlog import wrap_logger
from contextlib import contextmanager
import copy
import os.path
from datetime import datetime, timedelta
import hashlib
......@@ -56,7 +57,9 @@ from nomad.datamodel import (
from nomad.archive import (
write_partial_archive_to_mongo, delete_partial_archives_from_mongo)
from nomad.datamodel.encyclopedia import EncyclopediaMetadata
from nomad.app.v1.models import MetadataEditRequest, MetadataEditRequestResponse
from nomad.app.v1.models import (
MetadataEditRequest, MetadataEditRequestResponse, And,
Aggregation, TermsAggregation, MetadataPagination, MetadataRequired)
from nomad.search import update_metadata as es_update_metadata
section_metadata = datamodel.EntryArchive.metadata.name
......@@ -137,24 +140,26 @@ def generate_entry_id(upload_id: str, mainfile: str) -> str:
class MetadataEditRequestHandler:
'''
Class for handling a request to edit metadata. The request may originate
either from metadata files in the raw directory or from a :class:`MetadataEditRequest`.
If the edit request is limited to a specific upload, `upload_id` should be specified
(only when this is the case can upload level metadata be edited).
Class for handling a request to edit metadata. The request may originate either from
metadata files in the raw directory or from a json dictionary complying with the
:class:`MetadataEditRequest` format. If the edit request is limited to a specific upload,
`upload_id` should be specified (only when this is the case can upload level metadata be edited).
'''
@classmethod
def edit_metadata(
cls, edit_request: MetadataEditRequest, upload_id: str,
cls, edit_request_json: Dict[str, Any], upload_id: str,
user: datamodel.User) -> Tuple[MetadataEditRequestResponse, int]:
'''
Method to verify and execute a request to edit metadata from a certain user. Optionally,
the request could be restricted to a single upload by specifying `upload_id`.
If `edit_request.verify_only` is True, only verification is carried out (i.e. nothing
is actually updated). To just run the verification should be quick in comparison to
actually executing the request (which may take some time and requires one or more
@process to finish). If `edit_request.verify_only` is False and the request passes
the verification step, we will send it for execution, by initiating the
@process :func:`edit_upload_metadata` for each affected upload.
Method to verify and execute a generic request to edit metadata from a certain user.
The request is specified as a json dictionary. Optionally, the request could be restricted
to a single upload by specifying `upload_id` (this is necessary when editing upload
level attributes). If `edit_request_json` has `verify_only` set to True, only
verification is carried out (i.e. nothing is actually updated). To just run the
verification should be quick in comparison to actually executing the request (which
may take some time and requires one or more @process to finish). If the request passes
the verification step and `verify_only` is not set to True, we will send the request
for execution, by initiating the @process :func:`edit_upload_metadata` for each affected
upload.
The method returns a :class:`MetadataEditRequestResponse` with feedback about how it
went, and a html style status code (int, 200 if successful, otherwise an error code).
......@@ -163,14 +168,12 @@ class MetadataEditRequestHandler:
'''
logger = utils.get_logger('nomad.processing.edit_metadata')
handler = MetadataEditRequestHandler(
logger, user, edit_request=edit_request, upload_id=upload_id)
logger, user, edit_request_json=edit_request_json, upload_id=upload_id)
# Validate the request
handler.validate_request()
# Create response
if not handler.error and not edit_request.verify_only:
# Try to execute for all affected uploads
request_dict = edit_request.dict()
# First check if any of the affected uploads are processing
if not handler.error and not edit_request_json.get('verify_only'):
# Check if any of the affected uploads are processing
for upload in handler.affected_uploads:
upload.reload()
if upload.process_running:
......@@ -179,7 +182,7 @@ class MetadataEditRequestHandler:
# Looks good, try to trigger processing
for upload in handler.affected_uploads:
try:
upload.edit_upload_metadata(request_dict, user.user_id) # Trigger the process
upload.edit_upload_metadata(edit_request_json, user.user_id) # Trigger the process
except Exception as e:
handler._fatal_error(f'Failed to start process for upload {upload.upload_id}: {e}')
return handler.create_request_response(), 400
......@@ -187,16 +190,16 @@ class MetadataEditRequestHandler:
def __init__(
self, logger, user: datamodel.User,
edit_request: MetadataEditRequest = None,
edit_request_json: Dict[str, Any] = None,
upload_files: StagingUploadFiles = None,
upload_id: str = None):
# Initialization
assert user, 'Must specify `user`'
assert (edit_request is None) != (upload_files is None), (
assert (edit_request_json is None) != (upload_files is None), (
'Must specify either `edit_request` or `upload_files`')
self.logger = logger
self.user = user
self.edit_request = edit_request
self.edit_request_json = edit_request_json
self.upload_files = upload_files
self.upload_id = upload_id
......@@ -211,6 +214,7 @@ class MetadataEditRequestHandler:
self.root_metadata: Dict[str, Any] = None # The metadata specified at the top/root level
# Specific to the MetadataEditRequest case
self.edit_request: MetadataEditRequest = None
self.affected_uploads: List['Upload'] = None # A MetadataEditRequest may involve multiple uploads
self.entries_metadata: Dict[str, Dict[str, Any]] = {} # Metadata specified for individual entries
......@@ -220,8 +224,10 @@ class MetadataEditRequestHandler:
def validate_request(self):
''' Validates the provided :class:`MetadataEditRequest` '''
try:
if not self.edit_request:
return self._fatal_error('No `edit_request`')
self.edit_request = MetadataEditRequest(**self.edit_request_json)
except Exception as e:
return self._fatal_error(f'Failed to parse request json: {e}')
try:
if not self.upload_id and not self.edit_request.query:
return self._fatal_error('Must specify `query`')
if self.edit_request.entries and not self.edit_request.entries_key:
......@@ -246,7 +252,10 @@ class MetadataEditRequestHandler:
if can_edit_upload_fields and self.edit_request.metadata:
embargo_length = self.edit_request.metadata.embargo_length # type: ignore
self.affected_uploads = self._find_request_uploads()
try:
self.affected_uploads = self._find_request_uploads()
except Exception as e:
return self._fatal_error('Could not evaluate query: ' + str(e))
if not self.affected_uploads:
return self._fatal_error('No matching uploads/entries found', 404)
for upload in self.affected_uploads:
......@@ -541,17 +550,22 @@ class MetadataEditRequestHandler:
query = self.edit_request.query
if upload_id and query:
# Restrict query to the specified upload
assert False, 'query not yet supported' # TODO
return 'todo'
return And(**{'and': [{'upload_id': upload_id}, query]})
return query
def _find_request_uploads(self) -> List['Upload']:
'''
Returns a list of :class:`Upload`s matching the edit request
'''
''' Returns a list of :class:`Upload`s matching the edit request. '''
query = self._restricted_request_query(self.upload_id)
if query:
assert False, 'query not supported yet'
# Perform the search, aggregating by upload_id
search_response = search.search(
user_id=self.user.user_id,
owner=self.edit_request.owner,
query=query,
aggregations=dict(agg=Aggregation(terms=TermsAggregation(quantity='upload_id'))),
pagination=MetadataPagination(page_size=0))
terms = search_response.aggregations['agg'].terms # pylint: disable=no-member
return [Upload.get(bucket.value) for bucket in terms.data] # type: ignore
elif self.upload_id:
# Request just specifies an upload_id, no query
try:
......@@ -565,22 +579,29 @@ class MetadataEditRequestHandler:
query = self._restricted_request_query(upload.upload_id)
if query:
# We have a query. Execute it to get the entries.
assert False, 'query not yet supported' # TODO
search_result = search.search_iterator(
user_id=self.user.user_id,
owner=self.edit_request.owner,
query=query,
required=MetadataRequired(include=['calc_id']))
for result in search_result:
yield Calc.get(result['calc_id'])
else:
# We have no query. Return all entries for the upload
return Calc.objects(upload_id=upload.upload_id)
for entry in Calc.objects(upload_id=upload.upload_id):
yield entry
def create_request_response(self) -> MetadataEditRequestResponse:
''' Creates a :class:`MetadataEditRequestResponse` with the validation results. '''
verified_dict = self.edit_request.dict()
# Overwrite input values with verified values when possible
# Create response by updating the request json with the verified values when possible
verified_json = copy.deepcopy(self.edit_request_json)
if self.root_metadata:
verified_dict['metadata'].update(self.root_metadata)
verified_json['metadata'].update(self.root_metadata)
if self.entries_metadata:
for key, entry_metadata in verified_dict['entries'].items():
for key, entry_metadata in verified_json['entries'].items():
entry_metadata.update(self.entries_metadata.get(key, {}))
# Create response
response = MetadataEditRequestResponse(**verified_dict)
response = MetadataEditRequestResponse(**verified_json)
response.error = self.error
response.feedback = self.feedback
return response
......@@ -1995,21 +2016,21 @@ class Upload(Proc):
search.update_metadata(entries_metadata, update_materials=True, refresh=True)
@process
def edit_upload_metadata(self, edit_request: Dict[str, Any], user_id: str):
def edit_upload_metadata(self, edit_request_json: Dict[str, Any], user_id: str):
'''
A @process that executes a metadata edit request, restricted to a specific upload,
on behalf of the provided user. The `edit_request` should be a dictionary of the same
format as specified by the pydantic model :class:`MetadataEditRequest`, but using
primitive data types (i.e. dict, list, str, etc) so it can be sent via rabbitmq.
on behalf of the provided user. The `edit_request_json` should be a json dict of the
format specified by the pydantic model :class:`MetadataEditRequest` (we need to use
primitive data types, i.e. the json format, to be able to pass the request to a
rabbitmq task).
'''
logger = self.get_logger()
user = datamodel.User.get(user_id=user_id)
assert not edit_request.get('verify_only'), 'Request has verify_only'
edit_request_obj = MetadataEditRequest(**edit_request)
assert not edit_request_json.get('verify_only'), 'Request has verify_only'
# Validate the request (the @process could have been invoked directly, without previous validation)
handler = MetadataEditRequestHandler(
logger, user, edit_request=edit_request_obj, upload_id=self.upload_id)
logger, user, edit_request_json=edit_request_json, upload_id=self.upload_id)
handler.validate_request()
assert not handler.error, handler.error
......
......@@ -32,7 +32,7 @@ update the v1 materials index according to the performed changes. TODO this is o
partially implemented.
'''
from typing import Union, List, Iterable, Any, cast, Dict, Generator
from typing import Union, List, Iterable, Any, cast, Dict, Iterator, Generator
import json
import elasticsearch
from elasticsearch.exceptions import TransportError, RequestError
......@@ -1110,6 +1110,35 @@ def search(
return result
def search_iterator(
owner: str = 'public',
query: Union[Query, EsQuery] = None,
order_by: str = 'calc_id',
required: MetadataRequired = None,
aggregations: Dict[str, Aggregation] = {},
user_id: str = None,
index: Index = entry_index) -> Iterator[Dict[str, Any]]:
'''
Works like :func:`search`, but returns an iterator for iterating over the results.
Consequently, you cannot specify `pagination`, only `order_buy`.
'''
page_after_value = None
while True:
response = search(
owner=owner, query=query,
pagination=MetadataPagination(
page_size=100, page_after_value=page_after_value, order_by=order_by),
required=required, aggregations=aggregations, user_id=user_id, index=index)
page_after_value = response.pagination.next_page_after_value
for result in response.data:
yield result
if page_after_value is None or len(response.data) == 0:
break
def _index(entries, **kwargs):
index_entries(entries, **kwargs)
......
......@@ -32,7 +32,6 @@ from tests.test_search import assert_search_upload
from tests.processing.test_edit_metadata import (
assert_metadata_edited, all_coauthor_metadata, all_admin_metadata)
from tests.app.v1.routers.common import assert_response
from nomad.app.v1.models import MetadataEditRequest
from nomad import config, files, infrastructure
from nomad.processing import Upload, Calc, ProcessStatus
from nomad.files import UploadFiles, StagingUploadFiles, PublicUploadFiles
......@@ -1109,7 +1108,27 @@ def test_put_upload_metadata(
pytest.param(
'test_user', 'id_empty_w', dict(
metadata=dict(upload_name='test_name')),
id='empty-upload-ok')])
id='empty-upload-ok'),
pytest.param(
'test_user', 'id_unpublished_w', dict(
query={'and': [{'upload_create_time:gt': '2021-01-01'}, {'published': False}]},
owner='user',
metadata=dict(comment='a test comment')),
id='query-ok'),
pytest.param(
'test_user', 'id_unpublished_w', dict(
query={'and': [{'upload_create_time:gt': '2021-01-01'}, {'published': False}]},
owner='user',
metadata=dict(upload_name='a test name'),
expected_status_code=400),
id='query-cannot-edit-upload-data'),
pytest.param(
'test_user', 'id_unpublished_w', dict(
query={'upload_create_time:lt': '2021-01-01'},
owner='user',
metadata=dict(comment='a test comment'),
expected_status_code=404),
id='query-no-results')])
def test_post_upload_edit(
client, proc_infra, example_data_writeable, a_dataset, test_auth_dict, test_users_dict,
user, upload_id, kwargs):
......@@ -1121,6 +1140,7 @@ def test_post_upload_edit(
user_auth, _token = test_auth_dict[user]
user = test_users_dict.get(user)
query = kwargs.get('query')
owner = kwargs.get('owner')
metadata = kwargs.get('metadata')
entries = kwargs.get('entries')
entries_key = kwargs.get('entries_key')
......@@ -1136,11 +1156,12 @@ def test_post_upload_edit(
upload.coauthors = [user.user_id]
upload.save()
mer = MetadataEditRequest(
query=query, metadata=metadata, entries=entries, entries_key=entries_key, verify_only=verify_only)
edit_request_json = dict(
query=query, owner=owner, metadata=metadata, entries=entries, entries_key=entries_key,
verify_only=verify_only)
url = f'uploads/{upload_id}/edit'
edit_start = datetime.utcnow().isoformat()[0:22]
response = client.post(url, headers=user_auth, json=mer.dict())
response = client.post(url, headers=user_auth, json=edit_request_json)
assert_response(response, expected_status_code)
if expected_status_code == 200:
assert_metadata_edited(
......
......@@ -20,9 +20,8 @@ from datetime import datetime
from nomad import datamodel, metainfo
from nomad.processing import Upload, MetadataEditRequestHandler
from nomad.processing.data import _editable_metadata
from nomad.processing.data import _editable_metadata, _mongo_upload_metadata
from nomad.search import search
from nomad.app.v1.models import MetadataEditRequest
all_coauthor_metadata = dict(
......@@ -37,6 +36,12 @@ all_coauthor_metadata = dict(
reviewers=['lhofstadter'],
datasets=['a dataset'])
all_coauthor_upload_metadata = {
k: v for k, v in all_coauthor_metadata.items() if k in _mongo_upload_metadata}
all_coauthor_entry_metadata = {
k: v for k, v in all_coauthor_metadata.items() if k not in _mongo_upload_metadata}
all_admin_metadata = dict(
# Every attribute which only admins can set
upload_create_time='2021-05-04T11:00:00',
......@@ -50,6 +55,7 @@ def assert_edit_request(user, **kwargs):
# Extract test parameters (lots of defaults)
upload_id = kwargs.get('upload_id', 'id_unpublished_w')
query = kwargs.get('query')
owner = kwargs.get('owner')
metadata = kwargs.get('metadata')
entries = kwargs.get('entries')
entries_key = kwargs.get('entries_key', 'calc_id')
......@@ -59,9 +65,11 @@ def assert_edit_request(user, **kwargs):
affected_upload_ids = kwargs.get('affected_upload_ids', [upload_id])
expected_metadata = kwargs.get('expected_metadata', metadata)
# Perform edit request
mer = MetadataEditRequest(query=query, metadata=metadata, entries=entries, verify=verify_only)
edit_request_json = dict(
query=query, owner=owner, metadata=metadata, entries=entries, entries_key=entries_key,
verify=verify_only)
edit_start = datetime.utcnow().isoformat()[0:22]
_response, status_code = MetadataEditRequestHandler.edit_metadata(mer, upload_id, user)
_response, status_code = MetadataEditRequestHandler.edit_metadata(edit_request_json, upload_id, user)
# Validate result
assert status_code == expected_status_code, 'Wrong status code returned'
if status_code == 200 and not verify_only:
......@@ -198,8 +206,31 @@ def convert_to_comparable_value_single(quantity, value, format, user):
upload_id='id_published_w',
metadata=dict(embargo_length=0)),
id='lift-embargo'),
])
def test_edit_metadata(proc_infra, example_data_writeable, a_dataset, test_users_dict, kwargs):
pytest.param(
dict(
query={'and': [{'upload_create_time:gt': '2021-01-01'}, {'published': False}]},
owner='user',
upload_id=None,
metadata=dict(comment='new comment'),
affected_upload_ids=['id_unpublished_w']),
id='query-ok'),
pytest.param(
dict(
query={'upload_create_time:lt': '2021-01-01'},
owner='user',
upload_id=None,
metadata=dict(comment='new comment'),
expected_status_code=404),
id='query-no-results'),
pytest.param(
dict(
query={'upload_create_time:gt': '2021-01-01'},
owner='user',
upload_id=None,
metadata=dict(comment='new comment'),
expected_status_code=401),
id='query-contains-published')])
def test_edit_metadata(proc_infra, purged_app, example_data_writeable, a_dataset, test_users_dict, kwargs):
kwargs['user'] = test_users_dict[kwargs.get('user', 'test_user')]
assert_edit_request(**kwargs)
......@@ -230,3 +261,20 @@ def test_admin_quantities(proc_infra, example_data_writeable, test_user, other_t
for k, v in all_admin_metadata.items():
assert_edit_request(
user=test_user, upload_id='id_unpublished_w', metadata={k: v}, expected_status_code=401)
def test_query_cannot_set_upload_attributes(proc_infra, example_data_writeable, a_dataset, test_user):
query = {'and': [{'upload_create_time:gt': '2021-01-01'}, {'published': False}]}
for k, v in all_coauthor_upload_metadata.items():
# Attempting to edit an upload level attribute with query should always fail,
# regardless of if upload_id is specified
for upload_id in (None, 'id_unpublished_w'):
assert_edit_request(
user=test_user, query=query, owner='user', upload_id=upload_id,
metadata={k: v},
expected_status_code=400)
# Attempting to edit an entry level attribute with query should always succeed
assert_edit_request(
user=test_user, query=query, owner='user', upload_id=None,
metadata=all_coauthor_entry_metadata,
affected_upload_ids=['id_unpublished_w'])
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment