Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
nomad-lab
nomad-FAIR
Commits
cfacafb6
Commit
cfacafb6
authored
Jan 25, 2022
by
Markus Scheidgen
Browse files
Merge branch 'refactor-raw-api' into 'v1.0.0'
Refactor raw api See merge request
!539
parents
7d0e80e4
3fd1558c
Pipeline
#120579
passed with stages
in 32 minutes and 15 seconds
Changes
13
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
docs/api.md
View file @
cfacafb6
...
...
@@ -256,7 +256,7 @@ are:
-
`entries/query`
- Query entries for metadata
-
`entries/archive/query`
- Query entries for archive data
-
`entries/{entry-id}/raw
/download
`
- Download raw data for a specific entry
-
`entries/{entry-id}/raw`
- Download raw data for a specific entry
-
`uploads/{upload-id}/raw/path/to/file`
- Download a specific file of an upload
## Common concepts
...
...
@@ -392,7 +392,7 @@ files in one big zip-file. Here, you might want to use a program like *curl* to
directly from the shell:
```
curl "{{ nomad_url() }}/v1/entries/raw
/download
?results.material.elements=Ti&results.material.elements=O" -o download.zip
curl "{{ nomad_url() }}/v1/entries/raw?results.material.elements=Ti&results.material.elements=O" -o download.zip
```
## Access archives
...
...
gui/src/components/api.js
View file @
cfacafb6
...
...
@@ -206,7 +206,7 @@ class Api {
this
.
onStartLoading
()
const
auth
=
await
this
.
authHeaders
()
try
{
const
entry
=
await
this
.
axios
.
get
(
`/entries/
${
entryId
}
/raw`
,
auth
)
const
entry
=
await
this
.
axios
.
get
(
`/entries/
${
entryId
}
/raw
dir
`
,
auth
)
return
entry
.
data
}
catch
(
errors
)
{
handleApiError
(
errors
)
...
...
gui/src/components/entry/RawFiles.js
View file @
cfacafb6
...
...
@@ -153,7 +153,7 @@ export default function RawFiles({data, entryId}) {
setShownFile
(
file
)
setFileContents
(
null
)
api
.
get
(
`/entries/
${
entryId
}
/raw/
download/
${
file
.
split
(
'
/
'
).
reverse
()[
0
]}
`
,
`/entries/
${
entryId
}
/raw/
${
file
.
split
(
'
/
'
).
reverse
()[
0
]}
`
,
{
length
:
16
*
1024
,
decompress
:
true
},
{
transformResponse
:
[]})
.
then
(
contents
=>
setFileContents
({
...
...
@@ -174,7 +174,7 @@ export default function RawFiles({data, entryId}) {
if
(
fileContents
.
contents
.
length
<
(
page
+
1
)
*
16
*
1024
)
{
api
.
get
(
`/entries/
${
entryId
}
/raw/
download/
${
shownFile
.
split
(
'
/
'
).
reverse
()[
0
]}
`
,
`/entries/
${
entryId
}
/raw/
${
shownFile
.
split
(
'
/
'
).
reverse
()[
0
]}
`
,
{
offset
:
page
*
16
*
1024
,
length
:
16
*
1024
,
decompress
:
true
},
{
transformResponse
:
[]})
.
then
(
contents
=>
{
...
...
@@ -220,14 +220,14 @@ export default function RawFiles({data, entryId}) {
let
downloadUrl
if
(
selectedFiles
.
length
===
1
)
{
// download the individual file
downloadUrl
=
`entries/
${
entryId
}
/raw/
download/
${
file
(
selectedFiles
[
0
])}
`
downloadUrl
=
`entries/
${
entryId
}
/raw/
${
file
(
selectedFiles
[
0
])}
`
}
else
if
(
selectedFiles
.
length
===
availableFiles
.
length
)
{
// use an endpoint that downloads all files of the entry
downloadUrl
=
`entries/
${
entryId
}
/raw
/download
`
downloadUrl
=
`entries/
${
entryId
}
/raw`
}
else
if
(
selectedFiles
.
length
>
0
)
{
// download specific files
const
query
=
selectedFiles
.
map
(
file
).
map
(
f
=>
`include_files=
${
encodeURIComponent
(
f
)}
`
).
join
(
'
&
'
)
downloadUrl
=
`entries/
${
entryId
}
/raw
/download
?
${
query
}
`
downloadUrl
=
`entries/
${
entryId
}
/raw?
${
query
}
`
}
return
(
...
...
gui/src/components/uploads/FilesBrowser.js
View file @
cfacafb6
...
...
@@ -146,13 +146,13 @@ export default function FilesBrower({uploadId, disabled}) {
const
fetchData
=
useMemo
(()
=>
(
path
,
open
)
=>
{
async
function
fetchData
()
{
const
results
=
await
api
.
get
(
`/uploads/
${
uploadId
}
/raw/
${
path
}
`
)
const
results
=
await
api
.
get
(
`/uploads/
${
uploadId
}
/raw
dir
/
${
path
}
?page_size=500
`
)
allData
.
current
[
path
]
=
{
open
:
open
,
...
results
}
const
resultsByPath
=
{}
results
.
content
results
.
directory_metadata
.
content
.
filter
(
item
=>
item
.
is_file
)
.
forEach
(
item
=>
{
resultsByPath
[
`
${
path
}
/
${
item
.
name
}
`
]
=
item
...
...
@@ -212,7 +212,7 @@ export default function FilesBrower({uploadId, disabled}) {
key
:
path
,
hasChildren
:
!
is_file
,
open
:
data
?.
open
,
children
:
data
?.
content
?.
map
(
mapContent
),
children
:
data
?.
directory_metadata
?.
content
?.
map
(
mapContent
),
onToggle
:
is_file
?
null
:
()
=>
handleToggle
(
path
),
// TODO
// info: !is_file && data?.content?.length === 0 && <Typography variant="caption">
...
...
nomad/app/flask/dcat/mapping.py
View file @
cfacafb6
...
...
@@ -190,7 +190,7 @@ class Mapping():
dist
=
BNode
()
self
.
g
.
add
((
dist
,
RDF
.
type
,
DCAT
.
Distribution
))
self
.
g
.
add
((
dist
,
DCT
.
title
,
Literal
(
get_optional_entry_prop
(
entry
,
'formula'
)
+
'_raw'
)))
self
.
g
.
add
((
dist
,
DCAT
.
accessURL
,
URIRef
(
f
'https://nomad-lab.eu/prod/rae/api/v1/entries/
{
entry
[
"entry_id"
]
}
/raw
/download
'
)))
self
.
g
.
add
((
dist
,
DCAT
.
accessURL
,
URIRef
(
f
'https://nomad-lab.eu/prod/rae/api/v1/entries/
{
entry
[
"entry_id"
]
}
/raw'
)))
self
.
g
.
add
((
dist
,
DCAT
.
packageFormat
,
URIRef
(
'https://www.iana.org/assignments/media-types/application/zip'
)))
return
dist
nomad/app/v1/models.py
View file @
cfacafb6
...
...
@@ -569,7 +569,7 @@ class Pagination(BaseModel):
@
validator
(
'page_offset'
)
def
validate_page_offset
(
cls
,
page_offset
,
values
):
# pylint: disable=no-self-argument
if
page_offset
is
not
None
:
assert
page_offset
>=
0
,
'page must be >= 1'
assert
page_offset
>=
0
,
'page
_offset
must be >= 1'
return
page_offset
@
root_validator
(
skip_on_failure
=
True
)
...
...
nomad/app/v1/routers/entries.py
View file @
cfacafb6
...
...
@@ -180,11 +180,11 @@ class EntriesArchiveDownload(WithQuery):
files
:
Optional
[
Files
]
=
Body
(
None
)
class
EntriesRaw
(
WithQuery
):
class
EntriesRaw
Dir
(
WithQuery
):
pagination
:
Optional
[
MetadataPagination
]
=
Body
(
None
)
class
EntriesRaw
Download
(
WithQuery
):
class
EntriesRaw
(
WithQuery
):
files
:
Optional
[
Files
]
=
Body
(
None
,
example
=
{
...
...
@@ -192,26 +192,26 @@ class EntriesRawDownload(WithQuery):
})
class
EntryRawFile
(
BaseModel
):
class
EntryRaw
Dir
File
(
BaseModel
):
path
:
str
=
Field
(
None
)
size
:
int
=
Field
(
None
)
class
EntryRaw
(
BaseModel
):
class
EntryRaw
Dir
(
BaseModel
):
entry_id
:
str
=
Field
(
None
)
upload_id
:
str
=
Field
(
None
)
mainfile
:
str
=
Field
(
None
)
files
:
List
[
EntryRawFile
]
=
Field
(
None
)
files
:
List
[
EntryRaw
Dir
File
]
=
Field
(
None
)
class
EntriesRawResponse
(
EntriesRaw
):
class
EntriesRaw
Dir
Response
(
EntriesRaw
Dir
):
pagination
:
PaginationResponse
=
Field
(
None
)
# type: ignore
data
:
List
[
EntryRaw
]
=
Field
(
None
)
data
:
List
[
EntryRaw
Dir
]
=
Field
(
None
)
class
EntryRawResponse
(
BaseModel
):
class
EntryRaw
Dir
Response
(
BaseModel
):
entry_id
:
str
=
Field
(...)
data
:
EntryRaw
=
Field
(...)
data
:
EntryRaw
Dir
=
Field
(...)
class
EntryArchive
(
BaseModel
):
...
...
@@ -296,14 +296,14 @@ _bad_edit_request_empty_query = status.HTTP_404_NOT_FOUND, {
'model'
:
HTTPExceptionModel
,
'description'
:
strip
(
'No matching entries found.'
)}
_raw_
download_
response
=
200
,
{
_raw_response
=
200
,
{
'content'
:
{
'application/zip'
:
{}},
'description'
:
strip
(
'''
A zip file with the requested raw files. The file is streamed.
The content length is not known in advance.
'''
)}
_raw_
download_
file_response
=
200
,
{
_raw_file_response
=
200
,
{
'content'
:
{
'application/octet-stream'
:
{}},
'description'
:
strip
(
'''
A byte stream with raw file contents. The content length is not known in advance.
...
...
@@ -462,7 +462,7 @@ class _Uploads():
self
.
_upload_files
.
close
()
def
_create_entry_raw
(
entry_metadata
:
Dict
[
str
,
Any
],
uploads
:
_Uploads
):
def
_create_entry_raw
dir
(
entry_metadata
:
Dict
[
str
,
Any
],
uploads
:
_Uploads
):
entry_id
=
entry_metadata
[
'entry_id'
]
upload_id
=
entry_metadata
[
'upload_id'
]
mainfile
=
entry_metadata
[
'mainfile'
]
...
...
@@ -472,12 +472,12 @@ def _create_entry_raw(entry_metadata: Dict[str, Any], uploads: _Uploads):
files
=
[]
for
path_info
in
upload_files
.
raw_directory_list
(
mainfile_dir
,
files_only
=
True
):
files
.
append
(
EntryRawFile
(
path
=
path_info
.
path
,
size
=
path_info
.
size
))
files
.
append
(
EntryRaw
Dir
File
(
path
=
path_info
.
path
,
size
=
path_info
.
size
))
return
EntryRaw
(
entry_id
=
entry_id
,
upload_id
=
upload_id
,
mainfile
=
mainfile
,
files
=
files
)
return
EntryRaw
Dir
(
entry_id
=
entry_id
,
upload_id
=
upload_id
,
mainfile
=
mainfile
,
files
=
files
)
def
_answer_entries_raw_request
(
def
_answer_entries_raw
dir
_request
(
owner
:
Owner
,
query
:
Query
,
pagination
:
MetadataPagination
,
user
:
User
):
if
owner
==
Owner
.
all_
:
...
...
@@ -495,19 +495,19 @@ def _answer_entries_raw_request(
uploads
=
_Uploads
()
try
:
response_data
=
[
_create_entry_raw
(
entry_metadata
,
uploads
)
_create_entry_raw
dir
(
entry_metadata
,
uploads
)
for
entry_metadata
in
search_response
.
data
]
finally
:
uploads
.
close
()
return
EntriesRawResponse
(
return
EntriesRaw
Dir
Response
(
owner
=
search_response
.
owner
,
query
=
search_response
.
query
,
pagination
=
search_response
.
pagination
,
data
=
response_data
)
def
_answer_entries_raw_
download_
request
(
owner
:
Owner
,
query
:
Query
,
files
:
Files
,
user
:
User
):
def
_answer_entries_raw_request
(
owner
:
Owner
,
query
:
Query
,
files
:
Files
,
user
:
User
):
if
owner
==
Owner
.
all_
:
raise
HTTPException
(
status_code
=
status
.
HTTP_401_UNAUTHORIZED
,
detail
=
strip
(
'''
The owner=all is not allowed for this operation as it will search for entries
...
...
@@ -558,16 +558,15 @@ def _answer_entries_raw_download_request(owner: Owner, query: Query, files: File
raise
_entries_raw_query_docstring
=
strip
(
'''
_entries_raw
dir
_query_docstring
=
strip
(
'''
Will perform a search and return a *page* of raw file metadata for entries fulfilling
the query. This allows you to get a complete list of all rawfiles with their full
path in their respective upload and their sizes. The first returned files for each
entry, is their respective *mainfile*.
Each entry on NOMAD represents a set of raw files. These are the input and output
files (as well as additional auxiliary files) in their original form, i.e. as
provided by the uploader. More specifically, an entry represents a code-run identified
by a certain *mainfile*. This is usually the main output file of the code. All other
Each entry on NOMAD has a set of raw files. These are the files in their original form,
i.e. as provided by the uploader. More specifically, an entry has a *mainfile*, identified as
parseable. For CMS entries, the mainfile is usually the main output file of the code. All other
files in the same directory are considered the entries *auxiliary* no matter their role
or if they were actually parsed by NOMAD.
...
...
@@ -576,50 +575,49 @@ _entries_raw_query_docstring = strip('''
@
router
.
post
(
'/raw/query'
,
'/raw
dir
/query'
,
tags
=
[
raw_tag
],
summary
=
'Search entries and get their raw files metadata'
,
description
=
_entries_raw_query_docstring
,
response_model
=
EntriesRawResponse
,
description
=
_entries_raw
dir
_query_docstring
,
response_model
=
EntriesRaw
Dir
Response
,
responses
=
create_responses
(
_bad_owner_response
),
response_model_exclude_unset
=
True
,
response_model_exclude_none
=
True
)
async
def
post_entries_raw_query
(
request
:
Request
,
data
:
EntriesRaw
,
user
:
User
=
Depends
(
create_user_dependency
())):
async
def
post_entries_raw
dir
_query
(
request
:
Request
,
data
:
EntriesRaw
Dir
,
user
:
User
=
Depends
(
create_user_dependency
())):
return
_answer_entries_raw_request
(
return
_answer_entries_raw
dir
_request
(
owner
=
data
.
owner
,
query
=
data
.
query
,
pagination
=
data
.
pagination
,
user
=
user
)
@
router
.
get
(
'/raw'
,
'/raw
dir
'
,
tags
=
[
raw_tag
],
summary
=
'Search entries and get
raw
their raw files metadata'
,
description
=
_entries_raw_query_docstring
,
response_model
=
EntriesRawResponse
,
summary
=
'Search entries and get their raw files metadata'
,
description
=
_entries_raw
dir
_query_docstring
,
response_model
=
EntriesRaw
Dir
Response
,
response_model_exclude_unset
=
True
,
response_model_exclude_none
=
True
,
responses
=
create_responses
(
_bad_owner_response
))
async
def
get_entries_raw
(
async
def
get_entries_raw
dir
(
request
:
Request
,
with_query
:
WithQuery
=
Depends
(
query_parameters
),
pagination
:
MetadataPagination
=
Depends
(
metadata_pagination_parameters
),
user
:
User
=
Depends
(
create_user_dependency
())):
res
=
_answer_entries_raw_request
(
res
=
_answer_entries_raw
dir
_request
(
owner
=
with_query
.
owner
,
query
=
with_query
.
query
,
pagination
=
pagination
,
user
=
user
)
res
.
pagination
.
populate_urls
(
request
)
return
res
_entries_raw_
download_
query_docstring
=
strip
(
'''
This operation will perform a search and stream a .zip file with
raw input and output
files of the
found entries.
_entries_raw_query_docstring
=
strip
(
'''
This operation will perform a search and stream a .zip file with
the raw files of the
found entries.
Each entry on NOMAD represents a set of raw files. These are the input and output
files (as well as additional auxiliary files) in their original form, i.e. as
provided by the uploader. More specifically, an entry represents a code-run identified
by a certain *mainfile*. This is usually the main output file of the code. All other
Each entry on NOMAD has a set of raw files. These are the files in their original form,
i.e. as provided by the uploader. More specifically, an entry has a *mainfile*, identified as
parseable. For CMS entries, the mainfile is usually the main output file of the code. All other
files in the same directory are considered the entries *auxiliary* no matter their role
or if they were actually parsed by NOMAD.
...
...
@@ -633,32 +631,32 @@ _entries_raw_download_query_docstring = strip('''
@
router
.
post
(
'/raw/
download/
query'
,
'/raw/query'
,
tags
=
[
raw_tag
],
summary
=
'Search entries and download their raw files'
,
description
=
_entries_raw_
download_
query_docstring
,
description
=
_entries_raw_query_docstring
,
response_class
=
StreamingResponse
,
responses
=
create_responses
(
_raw_
download_
response
,
_bad_owner_response
))
async
def
post_entries_raw_
download_
query
(
data
:
EntriesRaw
Download
,
user
:
User
=
Depends
(
create_user_dependency
())):
responses
=
create_responses
(
_raw_response
,
_bad_owner_response
))
async
def
post_entries_raw_query
(
data
:
EntriesRaw
,
user
:
User
=
Depends
(
create_user_dependency
())):
return
_answer_entries_raw_
download_
request
(
return
_answer_entries_raw_request
(
owner
=
data
.
owner
,
query
=
data
.
query
,
files
=
data
.
files
,
user
=
user
)
@
router
.
get
(
'/raw
/download
'
,
'/raw'
,
tags
=
[
raw_tag
],
summary
=
'Search entries and download their raw files'
,
description
=
_entries_raw_
download_
query_docstring
,
description
=
_entries_raw_query_docstring
,
response_class
=
StreamingResponse
,
responses
=
create_responses
(
_raw_
download_
response
,
_bad_owner_response
))
async
def
get_entries_raw
_download
(
responses
=
create_responses
(
_raw_response
,
_bad_owner_response
))
async
def
get_entries_raw
(
with_query
:
WithQuery
=
Depends
(
query_parameters
),
files
:
Files
=
Depends
(
files_parameters
),
user
:
User
=
Depends
(
create_user_dependency
(
signature_token_auth_allowed
=
True
))):
return
_answer_entries_raw_
download_
request
(
return
_answer_entries_raw_request
(
owner
=
with_query
.
owner
,
query
=
with_query
.
query
,
files
=
files
,
user
=
user
)
...
...
@@ -913,14 +911,14 @@ async def get_entry_metadata(
@
router
.
get
(
'/{entry_id}/raw'
,
'/{entry_id}/raw
dir
'
,
tags
=
[
raw_tag
],
summary
=
'Get the raw files metadata for an entry by its id'
,
response_model
=
EntryRawResponse
,
response_model
=
EntryRaw
Dir
Response
,
responses
=
create_responses
(
_bad_id_response
),
response_model_exclude_unset
=
True
,
response_model_exclude_none
=
True
)
async
def
get_entry_raw
(
async
def
get_entry_raw
dir
(
entry_id
:
str
=
Path
(...,
description
=
'The unique entry id of the entry to retrieve raw data from.'
),
user
:
User
=
Depends
(
create_user_dependency
())):
'''
...
...
@@ -940,18 +938,18 @@ async def get_entry_raw(
uploads
=
_Uploads
()
try
:
return
EntryRawResponse
(
entry_id
=
entry_id
,
data
=
_create_entry_raw
(
response
.
data
[
0
],
uploads
))
return
EntryRaw
Dir
Response
(
entry_id
=
entry_id
,
data
=
_create_entry_raw
dir
(
response
.
data
[
0
],
uploads
))
finally
:
uploads
.
close
()
@
router
.
get
(
'/{entry_id}/raw
/download
'
,
'/{entry_id}/raw'
,
tags
=
[
raw_tag
],
summary
=
'Get the raw data of an entry by its id'
,
response_class
=
StreamingResponse
,
responses
=
create_responses
(
_bad_id_response
,
_raw_
download_
response
))
async
def
get_entry_raw
_download
(
responses
=
create_responses
(
_bad_id_response
,
_raw_response
))
async
def
get_entry_raw
(
entry_id
:
str
=
Path
(...,
description
=
'The unique entry id of the entry to retrieve raw data from.'
),
files
:
Files
=
Depends
(
files_parameters
),
user
:
User
=
Depends
(
create_user_dependency
(
signature_token_auth_allowed
=
True
))):
...
...
@@ -969,16 +967,16 @@ async def get_entry_raw_download(
status_code
=
status
.
HTTP_404_NOT_FOUND
,
detail
=
'The entry with the given id does not exist or is not visible to you.'
)
return
_answer_entries_raw_
download_
request
(
owner
=
Owner
.
visible
,
query
=
query
,
files
=
files
,
user
=
user
)
return
_answer_entries_raw_request
(
owner
=
Owner
.
visible
,
query
=
query
,
files
=
files
,
user
=
user
)
@
router
.
get
(
'/{entry_id}/raw/
download/
{path}'
,
'/{entry_id}/raw/{path}'
,
tags
=
[
raw_tag
],
summary
=
'Get the raw data of an entry by its id'
,
response_class
=
StreamingResponse
,
responses
=
create_responses
(
_bad_id_response
,
_bad_path_response
,
_raw_
download_
file_response
))
async
def
get_entry_raw_
download_
file
(
responses
=
create_responses
(
_bad_id_response
,
_bad_path_response
,
_raw_file_response
))
async
def
get_entry_raw_file
(
entry_id
:
str
=
Path
(...,
description
=
'The unique entry id of the entry to retrieve raw data from.'
),
path
:
str
=
Path
(...,
description
=
'A relative path to a file based on the directory of the entry
\'
s mainfile.'
),
offset
:
Optional
[
int
]
=
QueryParameter
(
...
...
@@ -1077,7 +1075,7 @@ def answer_entry_archive_request(query: Dict[str, Any], required: ArchiveRequire
response_model_exclude_none
=
True
,
responses
=
create_responses
(
_bad_id_response
))
async
def
get_entry_archive
(
entry_id
:
str
=
Path
(...,
description
=
'The unique entry id of the entry to retrieve
raw
data from.'
),
entry_id
:
str
=
Path
(...,
description
=
'The unique entry id of the entry to retrieve
archive
data from.'
),
user
:
User
=
Depends
(
create_user_dependency
())):
'''
Returns the full archive for the given `entry_id`.
...
...
@@ -1091,7 +1089,7 @@ async def get_entry_archive(
summary
=
'Get the archive for an entry by its id as plain archive json'
,
responses
=
create_responses
(
_bad_id_response
,
_archive_download_response
))
async
def
get_entry_archive_download
(
entry_id
:
str
=
Path
(...,
description
=
'The unique entry id of the entry to retrieve
raw
data from.'
),
entry_id
:
str
=
Path
(...,
description
=
'The unique entry id of the entry to retrieve
archive
data from.'
),
user
:
User
=
Depends
(
create_user_dependency
(
signature_token_auth_allowed
=
True
))):
'''
Returns the full archive for the given `entry_id`.
...
...
@@ -1110,7 +1108,7 @@ async def get_entry_archive_download(
responses
=
create_responses
(
_bad_id_response
,
_bad_archive_required_response
))
async
def
post_entry_archive_query
(
data
:
EntryArchiveRequest
,
user
:
User
=
Depends
(
create_user_dependency
()),
entry_id
:
str
=
Path
(...,
description
=
'The unique entry id of the entry to retrieve
raw
data from.'
)):
entry_id
:
str
=
Path
(...,
description
=
'The unique entry id of the entry to retrieve
archive
data from.'
)):
'''
Returns a partial archive for the given `entry_id` based on the `required` specified
...
...
nomad/app/v1/routers/uploads.py
View file @
cfacafb6
...
...
@@ -29,7 +29,7 @@ from fastapi.responses import StreamingResponse
from
fastapi.exceptions
import
RequestValidationError
from
nomad
import
utils
,
config
,
files
from
nomad.files
import
UploadFiles
,
StagingUploadFiles
,
UploadBundle
,
is_safe_relative_path
from
nomad.files
import
StagingUploadFiles
,
UploadBundle
,
is_safe_relative_path
from
nomad.processing
import
Upload
,
Entry
,
ProcessAlreadyRunning
,
ProcessStatus
,
MetadataEditRequestHandler
from
nomad.utils
import
strip
from
nomad.search
import
search
...
...
@@ -229,19 +229,53 @@ class EntryProcDataQueryResponse(BaseModel):
'''
))
class
DirectoryListLine
(
BaseModel
):
class
RawDirPagination
(
Pagination
):
@
validator
(
'order_by'
)
def
validate_order_by
(
cls
,
order_by
):
# pylint: disable=no-self-argument
assert
not
order_by
,
'Cannot specify `order_by` for rawdir calls'
return
None
@
validator
(
'page_after_value'
)
def
validate_page_after_value
(
cls
,
page_after_value
,
values
):
# pylint: disable=no-self-argument
# Validation handled elsewhere
return
page_after_value
rawdir_pagination_parameters
=
parameter_dependency_from_model
(
'rawdir_pagination_parameters'
,
RawDirPagination
,
exclude
=
[
'order'
,
'order_by'
])
class
RawDirFileMetadata
(
BaseModel
):
''' Metadata about a file '''
name
:
str
=
Field
()
size
:
Optional
[
int
]
=
Field
()
entry_id
:
Optional
[
str
]
=
Field
(
description
=
strip
(
'''
If this is a mainfile: the ID of the corresponding entry.'''
))
parser_name
:
Optional
[
str
]
=
Field
(
description
=
strip
(
'''
If this is a mainfile: the name of the matched parser.'''
))
class
RawDirElementMetadata
(
RawDirFileMetadata
):
''' Metadata about an directory *element*, i.e. a file or a directory '''
is_file
:
bool
=
Field
()
class
RawDirDirectoryMetadata
(
BaseModel
):
''' Metadata about a directory '''
name
:
str
=
Field
()
size
:
Optional
[
int
]
=
Field
()
access
:
str
=
Field
()
content
:
List
[
RawDirElementMetadata
]
=
Field
(
example
=
[
{
'name'
:
'a_directory'
,
'is_file'
:
False
,
'size'
:
456
},
{
'name'
:
'a_file.json'
,
'is_file'
:
True
,
'size'
:
123
,
'entry_id'
:
'XYZ'
,
'parser_name'
:
'parsers/vasp'
}])
class
Dir
ectoryList
Response
(
BaseModel
):
class
Raw
DirResponse
(
BaseModel
):
path
:
str
=
Field
(
example
=
'The/requested/path'
)
content
:
List
[
DirectoryListLine
]
=
Field
(
example
=
[
{
'name'
:
'a_directory'
,
'is_file'
:
False
,
'size'
:
456
,
'access'
:
'public'
},
{
'name'
:
'a_file.json'
,
'is_file'
:
True
,
'size'
:
123
,
'access'
:
'restricted'
}]
)
access
:
str
=
Field
(
)
file_metadata
:
Optional
[
RawDirFileMetadata
]
=
Field
()
directory_metadata
:
Optional
[
RawDirDirectoryMetadata
]
=
Field
()
pagination
:
Optional
[
PaginationResponse
]
=
Field
(
)
class
UploadCommandExamplesResponse
(
BaseModel
):
...
...
@@ -307,18 +341,12 @@ _upload_response = 200, {
`Accept = application/json`, otherwise a plain text information string.'''
)}
_raw_path_response
=
200
,
{
'model'
:
DirectoryListResponse
,
'content'
:
{
'application/json'
:
{},
'text/html'
:
{
'example'
:
'<html defining a list of directory content>'
},
'application/octet-stream'
:
{
'example'
:
'file data'
},
'application/zip'
:
{
'example'
:
'<zipped file or directory content>'
}},
'description'
:
strip
(
'''
If `path` denotes a file: a stream with the file content, zipped if `compress = true`.
If `path` denotes a directory, and `compress = true`, the directory content, zipped.
If `path` denotes a directory, and `compress = false`, a list of the directory
content, either encoded as json or html, depending on the request headers (json if
`Accept = application/json`, html otherwise).'''
)}
If `path` denotes a directory, and `compress = true`, the directory content, zipped.'''
)}
_upload_bundle_response
=
200
,
{
'content'
:
{
...
...
@@ -545,6 +573,100 @@ async def get_upload_entry(
return
EntryProcDataResponse
(
entry_id
=
entry_id
,
data
=
data
)
@
router
.
get
(
'/{upload_id}/rawdir/{path:path}'
,
tags
=
[
raw_tag
],
summary
=
'Get the raw files and folders metadata for a given upload and path.'
,
response_model
=
RawDirResponse
,
responses
=
create_responses
(
_upload_or_path_not_found
,
_not_authorized_to_upload
,
_bad_request
),
response_model_exclude_unset
=
True
,
response_model_exclude_none
=
True
)
async
def
get_upload_rawdir_path
(
request
:
Request
,
upload_id
:
str
=
Path
(
...,
description
=
'The unique id of the upload.'
),
path
:
str
=
Path
(
...,
description
=
'The path within the upload raw files.'
),
pagination
:
RawDirPagination
=
Depends
(
rawdir_pagination_parameters
),
include_entry_info
:
bool
=
FastApiQuery
(
False
,
description
=
strip
(
'''
If the fields `entry_id` and `parser_name` should be populated for all
encountered mainfiles.'''
)),
user
:
User
=
Depends
(
create_user_dependency
(
required
=
False
,
signature_token_auth_allowed
=
True
))):
'''
For the upload specified by `upload_id`, gets the raw file or directory metadata
located at the given `path`. The response will either contain a `file_metadata` or
`directory_metadata` key. For files, basic data about the file is returned, such as its
name and size. For directories, the response includes a list of elements
(files and folders) in the directory. For directories, the result is paginated.
'''
# Get upload
upload
=
_get_upload_with_read_access
(
upload_id
,
user
,
include_others
=
True
)
try
:
# Get upload files
upload_files
=
upload
.
upload_files
if
not
upload_files
.
raw_path_exists
(
path
):
raise
HTTPException
(
status_code
=
status
.
HTTP_404_NOT_FOUND
,
detail
=
strip
(
'''
Not found. Invalid path?'''
))
response
=
RawDirResponse
(
path
=
path
.
rstrip
(
'/'
),
access
=
'unpublished'
if
not
upload
.
published
else
(
'embargoed'
if
upload
.
embargo_length
else
'public'
))
if
upload_files
.
raw_path_is_file
(
path
):
response
.
file_metadata
=
RawDirFileMetadata
(
name
=
os
.
path
.
basename
(
path
),
size
=
upload_files
.
raw_file_size
(
path
))
if
include_entry_info
:
entry
:
Entry
=
Entry
.
objects
(
upload_id
=
upload_id
,
mainfile
=
path
).
first
()
if
entry
:
response
.
file_metadata
.
entry_id
=
entry
.
entry_id
response
.
file_metadata
.
parser_name
=
entry
.
parser_name
else
:
start
=
pagination
.
get_simple_index
()
end
=
start
+
pagination
.
page_size
directory_list
=
upload_files
.
raw_directory_list
(
path
)
upload_files
.
close
()
content
=
[]
path_to_element
:
Dict
[
str
,
RawDirElementMetadata
]
=
{}
total
=
0
total_size
=
0
for
i
,
path_info
in
enumerate
(
directory_list
):
total
+=
1
total_size
+=
path_info
.
size
if
start
<=
i
<
end
: