diff --git a/nomad/app/v1/routers/entries.py b/nomad/app/v1/routers/entries.py index 9480871f931f8440903313d732ae1d45bc54d693..beae96be75da9ff752b7353755305d7e2cfb4f4a 100644 --- a/nomad/app/v1/routers/entries.py +++ b/nomad/app/v1/routers/entries.py @@ -15,9 +15,9 @@ # See the License for the specific language governing permissions and # limitations under the License. # -import math from datetime import datetime +from enum import Enum from typing import Optional, Set, Union, Dict, Iterator, Any, List from fastapi import ( APIRouter, @@ -38,20 +38,26 @@ import json import orjson from pydantic.main import create_model from starlette.responses import Response +import yaml from nomad import files, utils, metainfo, processing as proc from nomad import datamodel from nomad.config import config +from nomad.config.models.config import Reprocess from nomad.datamodel import EditableUserMetadata +from nomad.datamodel.context import ServerContext from nomad.files import StreamedFile, create_zipstream +from nomad.processing.data import Upload from nomad.utils import strip from nomad.archive import RequiredReader, RequiredValidationError, ArchiveQueryError +from nomad.groups import get_group_ids from nomad.search import ( AuthenticationRequiredError, + QueryValidationError, SearchError, + search, update_metadata as es_update_metadata, ) -from nomad.search import search, QueryValidationError from nomad.metainfo.elasticsearch_extension import entry_type from .auth import create_user_dependency @@ -313,6 +319,25 @@ class EntryMetadataEditResponse(EntryMetadataEdit): ) +class ArchiveChangeAction(Enum): + upsert = 'upsert' + remove = 'remove' + + +class ArchiveChange(BaseModel): + path: str + new_value: Any + action: ArchiveChangeAction = ArchiveChangeAction.upsert + + +class EntryEdit(BaseModel): + changes: List[ArchiveChange] + + +class EntryEditResponse(EntryEdit): + entry_id: str + + _bad_owner_response = ( status.HTTP_401_UNAUTHORIZED, { @@ -1380,6 +1405,136 @@ def answer_entry_archive_request( } +@router.post( + '/{entry_id}/edit', + tags=[raw_tag], + summary='Edit a raw mainfile in archive format.', + response_model=EntryEditResponse, + response_model_exclude_unset=True, + response_model_exclude_none=True, + responses=create_responses( + _bad_id_response, _bad_edit_request, _bad_edit_request_authorization + ), +) +async def post_entry_edit( + data: EntryEdit, + entry_id: str = Path( + ..., + description='The unique entry id of the entry to edit.', + ), + user: User = Depends(create_user_dependency()), +): + response = perform_search( + owner=Owner.all_, + query={'entry_id': entry_id}, + required=MetadataRequired( + include=['writers', 'writer_groups', 'mainfile', 'upload_id', 'published'] + ), + user_id=user.user_id if user is not None else None, + ) + + if response.pagination.total == 0: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail='The entry with the given id does not exist or is not visible to you.', + ) + + is_admin = user.is_admin + entry_data = response.data[0] + writers = [writer['user_id'] for writer in entry_data.get('writers', [])] + writer_groups = response.data[0].get('writer_groups', []) + is_writer = user.user_id in writers or not set( + get_group_ids(user.user_id) + ).isdisjoint(writer_groups) + + if not (is_admin or is_writer): + raise HTTPException( + status_code=status.HTTP_401_UNAUTHORIZED, + detail='Not enough permissions to execute edit request.', + ) + + if entry_data.get('published', False): + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail='Editing is only allowed for non published entries.', + ) + + mainfile = entry_data.get('mainfile') + upload_id = entry_data.get('upload_id') + upload = Upload.get(upload_id) + context = ServerContext(upload) + archive_data: dict = None + with context.raw_file(mainfile, 'rt') as f: + if mainfile.endswith('.archive.json'): + archive_data = json.load(f) + elif mainfile.endswith('.archive.yaml') or mainfile.endswith('.archive.yml'): + archive_data = yaml.load(f, Loader=yaml.SafeLoader) + else: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail='The entry mainfile in not in archive format.', + ) + + def to_key(path_segment: str): + try: + return int(path_segment) + except ValueError: + return path_segment + + # TODO this is only covers the most basic case + # - no checks yet, we simply assume that the raw file and the changes + # agree on the schema + # - no handling of concurrent changes yet + for change in reversed(data.changes): + path = change.path.split('/') + section_data = archive_data + + for path_index, path_segment in enumerate(path[:-1]): + # Usually all keys are str and indicate either a quantity or + # a single sub-section. If the next segment is an integer, we + # know that the current segment is a repeated sub-section. + next_key = to_key(path[path_index + 1]) + key = to_key(path_segment) + repeated_sub_section = isinstance(next_key, int) + + next_value = [] if repeated_sub_section else {} + + if isinstance(section_data, list): + if section_data[key] is None: + section_data[key] = next_value + section_data = section_data[key] + else: + section_data = section_data.setdefault(key, next_value) + + # If this is a list, we might need to fill some wholes before we can + # update the value. + if isinstance(section_data, list): + if len(section_data) <= next_key: + section_data.extend([None] * (next_key - len(section_data) + 1)) + + if change.action == ArchiveChangeAction.remove: + del section_data[next_key] + else: + section_data[next_key] = change.new_value + + with context.raw_file(mainfile, 'wt') as f: + if mainfile.endswith('.json'): + json.dump(archive_data, f) + else: + yaml.dump(archive_data, f, default_flow_style=False, sort_keys=False) + + reprocess_settings = Reprocess( + index_individual_entries=True, reprocess_existing_entries=True + ) + upload.put_file_and_process_local( + os.path.join(context.raw_path(), mainfile), + os.path.dirname(mainfile), + reprocess_settings=reprocess_settings, + ) + + return {'entry_id': entry_id, 'changes': data.changes} + + @router.get( '/{entry_id}/archive', tags=[archive_tag], diff --git a/nomad/files.py b/nomad/files.py index 30a3c334f992151b01501ce1747106f7709dc4ae..7a4a4677b143864f323297bd426c921ae9686cb8 100644 --- a/nomad/files.py +++ b/nomad/files.py @@ -1110,7 +1110,12 @@ class StagingUploadFiles(UploadFiles): shutil.move(element_source_path, element_target_path) else: # Copy the file - shutil.copyfile(element_source_path, element_target_path) + try: + shutil.copyfile( + element_source_path, element_target_path + ) + except shutil.SameFileError: + pass if updated_files is not None: updated_files.add( os.path.join(target_dir, element_relative_path) diff --git a/tests/app/v1/routers/test_entries_archive_edit.py b/tests/app/v1/routers/test_entries_archive_edit.py new file mode 100644 index 0000000000000000000000000000000000000000..557e0bf6eca9e3ebccbad3e7d89c9eabb0fd44b6 --- /dev/null +++ b/tests/app/v1/routers/test_entries_archive_edit.py @@ -0,0 +1,136 @@ +# +# Copyright The NOMAD Authors. +# +# This file is part of NOMAD. See https://nomad-lab.eu for further info. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import json +import pytest + +from nomad.datamodel.datamodel import EntryArchive, EntryMetadata +from nomad.datamodel.metainfo.basesections import BaseSection +from nomad.utils.exampledata import ExampleData +from tests.test_files import create_test_upload_files + + +@pytest.mark.parametrize( + 'edit, result, user', + [ + pytest.param( + {'changes': [{'path': 'data/name', 'new_value': 'NewName'}]}, + {'data': {'name': 'NewName'}}, + 'user1', + id='quantity', + ), + pytest.param( + {'changes': [{'path': 'data/sub', 'new_value': {'name': 'NewName'}}]}, + {'data': {'name': 'TestName', 'sub': {'name': 'NewName'}}}, + 'user1', + id='sub-section', + ), + pytest.param( + {'changes': [{'path': 'data/sub/0', 'new_value': {'name': 'NewName'}}]}, + {'data': {'name': 'TestName', 'sub': [{'name': 'NewName'}]}}, + 'user1', + id='repeated-sub-section', + ), + pytest.param( + {'changes': [{'path': 'data/sub/name', 'new_value': 'NewName'}]}, + {'data': {'name': 'TestName', 'sub': {'name': 'NewName'}}}, + 'user1', + id='missing-sub-section', + ), + pytest.param( + {'changes': [{'path': 'data/sub/0/name', 'new_value': 'NewName'}]}, + {'data': {'name': 'TestName', 'sub': [{'name': 'NewName'}]}}, + 'user1', + id='missing-repeated-sub-section', + ), + pytest.param( + {'changes': [{'path': 'data/name', 'action': 'remove'}]}, + {'data': {}}, + 'user1', + id='remove-quantity', + ), + pytest.param( + { + 'changes': [ + {'path': 'data/sub', 'action': 'remove'}, + {'path': 'data/sub/name', 'new_value': 'NewName'}, + ] + }, + { + 'data': { + 'name': 'TestName', + } + }, + 'user1', + id='remove-sub-section', + ), + pytest.param( + { + 'changes': [ + {'path': 'data/sub/1', 'action': 'remove'}, + {'path': 'data/sub/1/name', 'new_value': 'NewName'}, + ] + }, + {'data': {'name': 'TestName', 'sub': [None]}}, + 'user1', + id='remove-repeated-sub-section', + ), + ], +) +def test_post_entry_edit( + edit, + result, + user, + client, + auth_headers, + users_dict, + elastic_function, + mongo_function, + raw_files_function, +): + mainfile = 'mainfile.archive.json' + data = ExampleData(main_author=users_dict[user]) + data.create_upload(upload_id='upload_id', published=False) + data.create_entry(entry_id='entry_id', upload_id='upload_id', mainfile=mainfile) + data.save(with_files=False) + + upload_files = create_test_upload_files('upload_id', published=False, archives=[]) + with upload_files.raw_file(mainfile, 'wt') as f: + json.dump( + EntryArchive( + metadata=EntryMetadata( + entry_id='entry_id', + mainfile=mainfile, + ), + data=BaseSection(name='TestName'), + ).m_to_dict(), + f, + ) + + user_auth = auth_headers[user] + url = 'entries/entry_id/edit' + response = client.post(url, headers=user_auth, json=edit) + + assert response.status_code == 200, response.text + archive_data = None + with upload_files.raw_file(mainfile, 'rt') as f: + archive_data = json.load(f) + + assert json.dumps( + {key: value for key, value in archive_data['data'].items() if key != 'm_def'} + ) == json.dumps(result['data'])