Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
nomad-lab
nomad-FAIR
Commits
ed0188a4
Commit
ed0188a4
authored
Apr 06, 2021
by
Markus Scheidgen
Browse files
Merge branch 'partial-archive-v1' into v0.10.1
parents
9c1ab45f
e6763731
Pipeline
#97694
passed with stages
in 23 minutes and 24 seconds
Changes
3
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
nomad/app/v1/models.py
View file @
ed0188a4
...
...
@@ -884,74 +884,81 @@ files_parameters = parameter_dependency_from_model(
ArchiveRequired
=
Union
[
str
,
Dict
[
str
,
Any
]]
class
EntriesArchive
(
WithQueryAndPagination
):
required
:
Optional
[
ArchiveRequired
]
=
Body
(
'*'
,
embed
=
True
,
description
=
strip
(
'''
The `required` part allows you to specify what parts of the requested archives
should be returned. The NOMAD Archive is a hierarchical data format and
you can *require* certain branches (i.e. *sections*) in the hierarchy.
By specifing certain sections with specific contents or all contents (via `"*"`),
you can determine what sections and what quantities should be returned.
The default is everything: `"*"`.
For example to specify that you are only interested in the `section_metadata`
use:
```
{
"section_run": "*"
}
```
Or to only get the `energy_total` from each individual calculations, use:
```
{
"section_run": {
"section_single_configuration_calculation": {
"energy_total": "*"
}
_archive_required_field
=
Body
(
'*'
,
embed
=
True
,
description
=
strip
(
'''
The `required` part allows you to specify what parts of the requested archives
should be returned. The NOMAD Archive is a hierarchical data format and
you can *require* certain branches (i.e. *sections*) in the hierarchy.
By specifing certain sections with specific contents or all contents (via `"*"`),
you can determine what sections and what quantities should be returned.
The default is everything: `"*"`.
For example to specify that you are only interested in the `section_metadata`
use:
```
{
"section_metadata": "*"
}
```
Or to only get the `energy_total` from each individual calculations, use:
```
{
"section_run": {
"section_single_configuration_calculation": {
"energy_total": "*"
}
}
```
You can also request certain parts of a list, e.g. the last calculation:
```
{
"section_run":
{
"section_
single_configuration_calculation[-1]": "*"
}
}
```
You can also request certain parts of a list, e.g. the last calculation:
```
{
"section_
run": {
"section_single_configuration_calculation[-1]": "*"
}
```
These required specifications are also very useful to get workflow results.
This works because we can use references (e.g. workflow to final result calculation)
and the API will resolve these references and return the respective data.
For example just the total energy value and reduced formula from the resulting
calculation:
```
{
'section_workflow':
{
'calculation_result_ref
': {
'energy_total': '*',
'single_configuration_calculation_to_system_ref': {
'chemical_composition_reduced': '*'
}
}
```
These required specifications are also very useful to get workflow results.
This works because we can use references (e.g. workflow to final result calculation)
and the API will resolve these references and return the respective data.
For example just the total energy value and reduced formula from the resulting
calculation:
```
{
'section_workflow
': {
'calculation_result_ref': {
'energy_total': '*',
'single_configuration_calculation_to_system_ref': {
'chemical_composition_reduced': '*'
}
}
}
```
'''
),
example
=
{
'section_run'
:
{
'section_single_configuration_calculation[-1]'
:
{
'energy_total'
:
'*'
},
'section_system[-1]'
:
'*'
}
```
'''
),
example
=
{
'section_run'
:
{
'section_single_configuration_calculation[-1]'
:
{
'energy_total'
:
'*'
},
'section_metadata'
:
'*'
})
'section_system[-1]'
:
'*'
},
'section_metadata'
:
'*'
})
class
EntriesArchive
(
WithQueryAndPagination
):
required
:
Optional
[
ArchiveRequired
]
=
_archive_required_field
class
EntryArchiveRequest
(
BaseModel
):
required
:
Optional
[
ArchiveRequired
]
=
_archive_required_field
class
EntriesArchiveDownload
(
WithQuery
):
...
...
@@ -1046,7 +1053,7 @@ class EntryArchive(BaseModel):
calc_id
:
str
=
Field
(
None
)
upload_id
:
str
=
Field
(
None
)
parser_name
:
str
=
Field
(
None
)
archive
:
Any
=
Field
(
None
)
archive
:
Dict
[
str
,
Any
]
=
Field
(
None
)
class
EntriesArchiveResponse
(
EntriesArchive
):
...
...
@@ -1054,9 +1061,9 @@ class EntriesArchiveResponse(EntriesArchive):
data
:
List
[
EntryArchive
]
=
Field
(
None
)
class
EntryArchiveResponse
(
BaseModel
):
class
EntryArchiveResponse
(
EntryArchiveRequest
):
entry_id
:
str
=
Field
(...)
data
:
Dict
[
str
,
Any
]
data
:
EntryArchive
=
Field
(
None
)
class
SearchResponse
(
EntriesMetadataResponse
):
...
...
nomad/app/v1/routers/entries.py
View file @
ed0188a4
...
...
@@ -41,7 +41,7 @@ from ..models import (
entry_pagination_parameters
,
files_parameters
,
User
,
Owner
,
HTTPExceptionModel
,
EntriesRaw
,
EntriesRawResponse
,
EntriesRawDownload
,
EntryRaw
,
EntryRawFile
,
EntryRawResponse
,
EntriesArchiveDownload
,
EntryArchiveResponse
,
EntriesArchive
,
EntriesArchiveResponse
,
ArchiveRequired
)
ArchiveRequired
,
EntryArchiveRequest
)
router
=
APIRouter
()
...
...
@@ -455,6 +455,7 @@ def _read_archive(entry_metadata, uploads, required):
with
upload_files
.
read_archive
(
calc_id
)
as
archive
:
return
{
'calc_id'
:
calc_id
,
'upload_id'
:
upload_id
,
'parser_name'
:
entry_metadata
[
'parser_name'
],
'archive'
:
query_archive
(
archive
,
{
calc_id
:
required
})[
calc_id
]
}
...
...
@@ -529,6 +530,7 @@ def _answer_entries_archive_request(
owner
=
search_response
.
owner
,
query
=
search_response
.
query
,
pagination
=
search_response
.
pagination
,
required
=
required
,
data
=
list
(
response_data
.
values
()))
...
...
@@ -878,20 +880,13 @@ async def get_entry_raw_download_file(
detail
=
'The requested file does not exist.'
)
@
router
.
get
(
'/{entry_id}/archive'
,
tags
=
[
archive_tag
],
summary
=
'Get the archive for an entry by its id'
,
response_model
=
EntryArchiveResponse
,
response_model_exclude_unset
=
True
,
response_model_exclude_none
=
True
,
responses
=
create_responses
(
_bad_id_response
))
async
def
get_entry_archive
(
entry_id
:
str
=
Path
(...,
description
=
'The unique entry id of the entry to retrieve raw data from.'
),
user
:
User
=
Depends
(
get_optional_user
)):
'''
Returns the full archive for the given `entry_id`.
'''
def
_answer_entry_archive_request
(
entry_id
:
str
,
required
:
ArchiveRequired
,
user
:
User
):
try
:
required_with_references
=
compute_required_with_referenced
(
required
)
except
KeyError
as
e
:
raise
HTTPException
(
status_code
=
status
.
HTTP_400_BAD_REQUEST
,
detail
=
(
'The required specification contains an unknown quantity or section: %s'
%
str
(
e
)))
query
=
dict
(
calc_id
=
entry_id
)
response
=
perform_search
(
owner
=
Owner
.
visible
,
query
=
query
,
...
...
@@ -903,17 +898,79 @@ async def get_entry_archive(
status_code
=
status
.
HTTP_404_NOT_FOUND
,
detail
=
'The entry with the given id does not exist or is not visible to you.'
)
entry_metadata
=
response
.
data
[
0
]
if
required_with_references
is
not
None
:
# We can produce all the required archive data from the partial archives stored
# in mongodb.
partial_archives
=
cast
(
dict
,
read_partial_archives_from_mongo
([
entry_id
],
as_dict
=
True
))
uploads
=
_Uploads
()
try
:
try
:
archive_data
=
_read_archive
(
response
.
data
[
0
],
uploads
,
required
=
'*'
)
except
KeyError
:
raise
HTTPException
(
status_code
=
status
.
HTTP_404_NOT_FOUND
,
detail
=
'The entry with the given id does exist, but it has no archive.'
)
archive_data
=
None
if
required_with_references
is
not
None
:
try
:
partial_archive
=
partial_archives
[
entry_id
]
archive_data
=
filter_archive
(
required
,
partial_archive
,
transform
=
lambda
e
:
e
)
except
KeyError
:
# the partial archive might not exist, e.g. due to processing problems
pass
except
ArchiveQueryError
as
e
:
detail
=
'The required specification could not be understood: %s'
%
str
(
e
)
raise
HTTPException
(
status_code
=
status
.
HTTP_400_BAD_REQUEST
,
detail
=
detail
)
if
archive_data
is
None
:
try
:
archive_data
=
_read_archive
(
entry_metadata
,
uploads
,
required
=
required
)[
'archive'
]
except
KeyError
:
raise
HTTPException
(
status_code
=
status
.
HTTP_404_NOT_FOUND
,
detail
=
'The entry with the given id does exist, but it has no archive.'
)
return
{
'entry_id'
:
entry_id
,
'data'
:
archive_data
[
'archive'
]}
'required'
:
required
,
'data'
:
{
'calc_id'
:
entry_id
,
'upload_id'
:
entry_metadata
[
'upload_id'
],
'parser_name'
:
entry_metadata
[
'parser_name'
],
'archive'
:
archive_data
}}
finally
:
uploads
.
close
()
@
router
.
get
(
'/{entry_id}/archive'
,
tags
=
[
archive_tag
],
summary
=
'Get the archive for an entry by its id'
,
response_model
=
EntryArchiveResponse
,
response_model_exclude_unset
=
True
,
response_model_exclude_none
=
True
,
responses
=
create_responses
(
_bad_id_response
))
async
def
get_entry_archive
(
entry_id
:
str
=
Path
(...,
description
=
'The unique entry id of the entry to retrieve raw data from.'
),
user
:
User
=
Depends
(
get_optional_user
)):
'''
Returns the full archive for the given `entry_id`.
'''
return
_answer_entry_archive_request
(
entry_id
=
entry_id
,
required
=
'*'
,
user
=
user
)
@
router
.
post
(
'/{entry_id}/archive/query'
,
tags
=
[
archive_tag
],
summary
=
'Get the archive for an entry by its id'
,
response_model
=
EntryArchiveResponse
,
response_model_exclude_unset
=
True
,
response_model_exclude_none
=
True
,
responses
=
create_responses
(
_bad_id_response
,
_bad_archive_required_response
))
async
def
post_entry_archive_query
(
data
:
EntryArchiveRequest
,
user
:
User
=
Depends
(
get_optional_user
),
entry_id
:
str
=
Path
(...,
description
=
'The unique entry id of the entry to retrieve raw data from.'
)):
'''
Returns a partial archive for the given `entry_id` based on the `required` specified
in the body.
'''
return
_answer_entry_archive_request
(
entry_id
=
entry_id
,
required
=
data
.
required
,
user
=
user
)
tests/app/v1/routers/test_entries.py
View file @
ed0188a4
...
...
@@ -478,9 +478,13 @@ def assert_archive_zip_file(response, entries: int = -1, compressed: bool = Fals
def
assert_archive_response
(
response_json
,
required
=
None
):
for
key
in
[
'entry_id'
,
'data'
]:
for
key
in
[
'entry_id'
,
'required'
,
'data'
]:
assert
key
in
response_json
assert_archive
(
response_json
[
'data'
],
required
=
required
)
if
required
is
not
None
:
assert
required
==
response_json
[
'required'
]
for
key
in
[
'calc_id'
,
'upload_id'
,
'parser_name'
,
'archive'
]:
assert
key
in
response_json
[
'data'
]
assert_archive
(
response_json
[
'data'
][
'archive'
],
required
=
required
)
def
assert_archive
(
archive
,
required
=
None
):
...
...
@@ -757,8 +761,8 @@ def test_entries_archive(client, data, required, status_code):
@
pytest
.
mark
.
parametrize
(
'entry_id, status_code'
,
[
pytest
.
param
(
'id_01'
,
200
,
id
=
'id'
),
pytest
.
param
(
'id_02'
,
404
,
id
=
'404'
),
pytest
.
param
(
'doesnotexist'
,
404
,
id
=
'404'
)])
pytest
.
param
(
'id_02'
,
404
,
id
=
'404
-not-visible
'
),
pytest
.
param
(
'doesnotexist'
,
404
,
id
=
'404
-does-not-exist
'
)])
def
test_entry_archive
(
client
,
data
,
entry_id
,
status_code
):
response
=
client
.
get
(
'entries/%s/archive'
%
entry_id
)
assert_response
(
response
,
status_code
)
...
...
@@ -766,6 +770,23 @@ def test_entry_archive(client, data, entry_id, status_code):
assert_archive_response
(
response
.
json
())
@
pytest
.
mark
.
parametrize
(
'entry_id, required, status_code'
,
[
pytest
.
param
(
'id_01'
,
'*'
,
200
,
id
=
'full'
),
pytest
.
param
(
'id_02'
,
'*'
,
404
,
id
=
'404'
),
pytest
.
param
(
'id_01'
,
{
'section_metadata'
:
'*'
},
200
,
id
=
'partial'
),
pytest
.
param
(
'id_01'
,
{
'section_run'
:
{
'section_system[NOTANINT]'
:
'*'
}},
400
,
id
=
'bad-required-1'
),
pytest
.
param
(
'id_01'
,
{
'section_metadata'
:
{
'owners[NOTANINT]'
:
'*'
}},
400
,
id
=
'bad-required-2'
),
pytest
.
param
(
'id_01'
,
{
'DOESNOTEXIST'
:
'*'
},
400
,
id
=
'bad-required-3'
)
])
def
test_entry_archive_query
(
client
,
data
,
entry_id
,
required
,
status_code
):
response
=
client
.
post
(
'entries/%s/archive/query'
%
entry_id
,
json
=
{
'required'
:
required
})
assert_response
(
response
,
status_code
)
if
status_code
==
200
:
assert_archive_response
(
response
.
json
(),
required
=
required
)
def
perform_entries_owner_test
(
client
,
test_user_auth
,
other_test_user_auth
,
admin_user_auth
,
owner
,
user
,
status_code
,
total
,
http_method
,
test_method
):
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment