Commit ed0188a4 authored by Markus Scheidgen's avatar Markus Scheidgen
Browse files

Merge branch 'partial-archive-v1' into v0.10.1

parents 9c1ab45f e6763731
Pipeline #97694 passed with stages
in 23 minutes and 24 seconds
......@@ -884,74 +884,81 @@ files_parameters = parameter_dependency_from_model(
ArchiveRequired = Union[str, Dict[str, Any]]
class EntriesArchive(WithQueryAndPagination):
required: Optional[ArchiveRequired] = Body(
'*',
embed=True,
description=strip('''
The `required` part allows you to specify what parts of the requested archives
should be returned. The NOMAD Archive is a hierarchical data format and
you can *require* certain branches (i.e. *sections*) in the hierarchy.
By specifing certain sections with specific contents or all contents (via `"*"`),
you can determine what sections and what quantities should be returned.
The default is everything: `"*"`.
For example to specify that you are only interested in the `section_metadata`
use:
```
{
"section_run": "*"
}
```
Or to only get the `energy_total` from each individual calculations, use:
```
{
"section_run": {
"section_single_configuration_calculation": {
"energy_total": "*"
}
_archive_required_field = Body(
'*',
embed=True,
description=strip('''
The `required` part allows you to specify what parts of the requested archives
should be returned. The NOMAD Archive is a hierarchical data format and
you can *require* certain branches (i.e. *sections*) in the hierarchy.
By specifing certain sections with specific contents or all contents (via `"*"`),
you can determine what sections and what quantities should be returned.
The default is everything: `"*"`.
For example to specify that you are only interested in the `section_metadata`
use:
```
{
"section_metadata": "*"
}
```
Or to only get the `energy_total` from each individual calculations, use:
```
{
"section_run": {
"section_single_configuration_calculation": {
"energy_total": "*"
}
}
```
You can also request certain parts of a list, e.g. the last calculation:
```
{
"section_run": {
"section_single_configuration_calculation[-1]": "*"
}
}
```
You can also request certain parts of a list, e.g. the last calculation:
```
{
"section_run": {
"section_single_configuration_calculation[-1]": "*"
}
```
These required specifications are also very useful to get workflow results.
This works because we can use references (e.g. workflow to final result calculation)
and the API will resolve these references and return the respective data.
For example just the total energy value and reduced formula from the resulting
calculation:
```
{
'section_workflow': {
'calculation_result_ref': {
'energy_total': '*',
'single_configuration_calculation_to_system_ref': {
'chemical_composition_reduced': '*'
}
}
```
These required specifications are also very useful to get workflow results.
This works because we can use references (e.g. workflow to final result calculation)
and the API will resolve these references and return the respective data.
For example just the total energy value and reduced formula from the resulting
calculation:
```
{
'section_workflow': {
'calculation_result_ref': {
'energy_total': '*',
'single_configuration_calculation_to_system_ref': {
'chemical_composition_reduced': '*'
}
}
}
```
'''),
example={
'section_run': {
'section_single_configuration_calculation[-1]': {
'energy_total': '*'
},
'section_system[-1]': '*'
}
```
'''),
example={
'section_run': {
'section_single_configuration_calculation[-1]': {
'energy_total': '*'
},
'section_metadata': '*'
})
'section_system[-1]': '*'
},
'section_metadata': '*'
})
class EntriesArchive(WithQueryAndPagination):
required: Optional[ArchiveRequired] = _archive_required_field
class EntryArchiveRequest(BaseModel):
required: Optional[ArchiveRequired] = _archive_required_field
class EntriesArchiveDownload(WithQuery):
......@@ -1046,7 +1053,7 @@ class EntryArchive(BaseModel):
calc_id: str = Field(None)
upload_id: str = Field(None)
parser_name: str = Field(None)
archive: Any = Field(None)
archive: Dict[str, Any] = Field(None)
class EntriesArchiveResponse(EntriesArchive):
......@@ -1054,9 +1061,9 @@ class EntriesArchiveResponse(EntriesArchive):
data: List[EntryArchive] = Field(None)
class EntryArchiveResponse(BaseModel):
class EntryArchiveResponse(EntryArchiveRequest):
entry_id: str = Field(...)
data: Dict[str, Any]
data: EntryArchive = Field(None)
class SearchResponse(EntriesMetadataResponse):
......
......@@ -41,7 +41,7 @@ from ..models import (
entry_pagination_parameters, files_parameters, User, Owner, HTTPExceptionModel, EntriesRaw,
EntriesRawResponse, EntriesRawDownload, EntryRaw, EntryRawFile, EntryRawResponse,
EntriesArchiveDownload, EntryArchiveResponse, EntriesArchive, EntriesArchiveResponse,
ArchiveRequired)
ArchiveRequired, EntryArchiveRequest)
router = APIRouter()
......@@ -455,6 +455,7 @@ def _read_archive(entry_metadata, uploads, required):
with upload_files.read_archive(calc_id) as archive:
return {
'calc_id': calc_id,
'upload_id': upload_id,
'parser_name': entry_metadata['parser_name'],
'archive': query_archive(archive, {calc_id: required})[calc_id]
}
......@@ -529,6 +530,7 @@ def _answer_entries_archive_request(
owner=search_response.owner,
query=search_response.query,
pagination=search_response.pagination,
required=required,
data=list(response_data.values()))
......@@ -878,20 +880,13 @@ async def get_entry_raw_download_file(
detail='The requested file does not exist.')
@router.get(
'/{entry_id}/archive',
tags=[archive_tag],
summary='Get the archive for an entry by its id',
response_model=EntryArchiveResponse,
response_model_exclude_unset=True,
response_model_exclude_none=True,
responses=create_responses(_bad_id_response))
async def get_entry_archive(
entry_id: str = Path(..., description='The unique entry id of the entry to retrieve raw data from.'),
user: User = Depends(get_optional_user)):
'''
Returns the full archive for the given `entry_id`.
'''
def _answer_entry_archive_request(entry_id: str, required: ArchiveRequired, user: User):
try:
required_with_references = compute_required_with_referenced(required)
except KeyError as e:
raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=(
'The required specification contains an unknown quantity or section: %s' % str(e)))
query = dict(calc_id=entry_id)
response = perform_search(
owner=Owner.visible, query=query,
......@@ -903,17 +898,79 @@ async def get_entry_archive(
status_code=status.HTTP_404_NOT_FOUND,
detail='The entry with the given id does not exist or is not visible to you.')
entry_metadata = response.data[0]
if required_with_references is not None:
# We can produce all the required archive data from the partial archives stored
# in mongodb.
partial_archives = cast(dict, read_partial_archives_from_mongo([entry_id], as_dict=True))
uploads = _Uploads()
try:
try:
archive_data = _read_archive(response.data[0], uploads, required='*')
except KeyError:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail='The entry with the given id does exist, but it has no archive.')
archive_data = None
if required_with_references is not None:
try:
partial_archive = partial_archives[entry_id]
archive_data = filter_archive(required, partial_archive, transform=lambda e: e)
except KeyError:
# the partial archive might not exist, e.g. due to processing problems
pass
except ArchiveQueryError as e:
detail = 'The required specification could not be understood: %s' % str(e)
raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=detail)
if archive_data is None:
try:
archive_data = _read_archive(entry_metadata, uploads, required=required)['archive']
except KeyError:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail='The entry with the given id does exist, but it has no archive.')
return {
'entry_id': entry_id,
'data': archive_data['archive']}
'required': required,
'data': {
'calc_id': entry_id,
'upload_id': entry_metadata['upload_id'],
'parser_name': entry_metadata['parser_name'],
'archive': archive_data
}}
finally:
uploads.close()
@router.get(
'/{entry_id}/archive',
tags=[archive_tag],
summary='Get the archive for an entry by its id',
response_model=EntryArchiveResponse,
response_model_exclude_unset=True,
response_model_exclude_none=True,
responses=create_responses(_bad_id_response))
async def get_entry_archive(
entry_id: str = Path(..., description='The unique entry id of the entry to retrieve raw data from.'),
user: User = Depends(get_optional_user)):
'''
Returns the full archive for the given `entry_id`.
'''
return _answer_entry_archive_request(entry_id=entry_id, required='*', user=user)
@router.post(
'/{entry_id}/archive/query',
tags=[archive_tag],
summary='Get the archive for an entry by its id',
response_model=EntryArchiveResponse,
response_model_exclude_unset=True,
response_model_exclude_none=True,
responses=create_responses(_bad_id_response, _bad_archive_required_response))
async def post_entry_archive_query(
data: EntryArchiveRequest, user: User = Depends(get_optional_user),
entry_id: str = Path(..., description='The unique entry id of the entry to retrieve raw data from.')):
'''
Returns a partial archive for the given `entry_id` based on the `required` specified
in the body.
'''
return _answer_entry_archive_request(entry_id=entry_id, required=data.required, user=user)
......@@ -478,9 +478,13 @@ def assert_archive_zip_file(response, entries: int = -1, compressed: bool = Fals
def assert_archive_response(response_json, required=None):
for key in ['entry_id', 'data']:
for key in ['entry_id', 'required', 'data']:
assert key in response_json
assert_archive(response_json['data'], required=required)
if required is not None:
assert required == response_json['required']
for key in ['calc_id', 'upload_id', 'parser_name', 'archive']:
assert key in response_json['data']
assert_archive(response_json['data']['archive'], required=required)
def assert_archive(archive, required=None):
......@@ -757,8 +761,8 @@ def test_entries_archive(client, data, required, status_code):
@pytest.mark.parametrize('entry_id, status_code', [
pytest.param('id_01', 200, id='id'),
pytest.param('id_02', 404, id='404'),
pytest.param('doesnotexist', 404, id='404')])
pytest.param('id_02', 404, id='404-not-visible'),
pytest.param('doesnotexist', 404, id='404-does-not-exist')])
def test_entry_archive(client, data, entry_id, status_code):
response = client.get('entries/%s/archive' % entry_id)
assert_response(response, status_code)
......@@ -766,6 +770,23 @@ def test_entry_archive(client, data, entry_id, status_code):
assert_archive_response(response.json())
@pytest.mark.parametrize('entry_id, required, status_code', [
pytest.param('id_01', '*', 200, id='full'),
pytest.param('id_02', '*', 404, id='404'),
pytest.param('id_01', {'section_metadata': '*'}, 200, id='partial'),
pytest.param('id_01', {'section_run': {'section_system[NOTANINT]': '*'}}, 400, id='bad-required-1'),
pytest.param('id_01', {'section_metadata': {'owners[NOTANINT]': '*'}}, 400, id='bad-required-2'),
pytest.param('id_01', {'DOESNOTEXIST': '*'}, 400, id='bad-required-3')
])
def test_entry_archive_query(client, data, entry_id, required, status_code):
response = client.post('entries/%s/archive/query' % entry_id, json={
'required': required
})
assert_response(response, status_code)
if status_code == 200:
assert_archive_response(response.json(), required=required)
def perform_entries_owner_test(
client, test_user_auth, other_test_user_auth, admin_user_auth,
owner, user, status_code, total, http_method, test_method):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment