Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
nomad-lab
nomad-FAIR
Commits
a6456e2d
Commit
a6456e2d
authored
Jul 19, 2019
by
Markus Scheidgen
Browse files
Improved the auxfile handling. Some fixes to rawfile api. Completes implementation of
#171
.
parent
5b668898
Pipeline
#52158
passed with stages
in 29 minutes and 5 seconds
Changes
9
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
gui/src/components/uploads/Upload.js
View file @
a6456e2d
...
@@ -354,7 +354,20 @@ class Upload extends React.Component {
...
@@ -354,7 +354,20 @@ class Upload extends React.Component {
const
renderRow
=
(
calc
,
index
)
=>
{
const
renderRow
=
(
calc
,
index
)
=>
{
const
{
mainfile
,
upload_id
,
calc_id
,
parser
,
tasks
,
current_task
,
tasks_status
,
errors
}
=
calc
const
{
mainfile
,
upload_id
,
calc_id
,
parser
,
tasks
,
current_task
,
tasks_status
,
errors
}
=
calc
const
color
=
tasks_status
===
'
FAILURE
'
?
'
error
'
:
'
default
'
let
tooltip_html
=
null
let
color
=
tasks_status
===
'
FAILURE
'
?
'
error
'
:
'
default
'
if
(
tasks_status
===
'
FAILURE
'
)
{
tooltip_html
=
`Calculation processing failed with errors:
${
errors
.
join
(
'
,
'
)}
`
color
=
'
error
'
}
if
(
calc
.
errors
.
length
>
0
)
{
color
=
'
error
'
tooltip_html
=
`Calculation processed with errors:
${
calc
.
errors
.
join
(
'
,
'
)}
`
}
if
(
calc
.
warnings
.
length
>
0
)
{
color
=
'
error
'
tooltip_html
=
`Calculation processed with warnings:
${
calc
.
warnings
.
join
(
'
,
'
)}
`
}
const
processed
=
tasks_status
===
'
FAILURE
'
||
tasks_status
===
'
SUCCESS
'
const
processed
=
tasks_status
===
'
FAILURE
'
||
tasks_status
===
'
SUCCESS
'
const
row
=
(
const
row
=
(
<
TableRow
key
=
{
index
}
hover
=
{
processed
}
<
TableRow
key
=
{
index
}
hover
=
{
processed
}
...
@@ -391,10 +404,9 @@ class Upload extends React.Component {
...
@@ -391,10 +404,9 @@ class Upload extends React.Component {
<
/TableRow
>
<
/TableRow
>
)
)
if
(
tasks_status
===
'
FAILURE
'
)
{
if
(
tooltip_html
)
{
const
error_html
=
`Calculation processing failed with errors:
${
errors
.
join
(
'
,
'
)}
`
return
(
return
(
<
Tooltip
key
=
{
calc_id
}
title
=
{
error
_html
}
>
<
Tooltip
key
=
{
calc_id
}
title
=
{
tooltip
_html
}
>
{
row
}
{
row
}
<
/Tooltip
>
<
/Tooltip
>
)
)
...
...
nomad/api/raw.py
View file @
a6456e2d
...
@@ -100,7 +100,8 @@ class FileView:
...
@@ -100,7 +100,8 @@ class FileView:
return
self
.
f
.
read
(
size
)
return
self
.
f
.
read
(
size
)
def
get_raw_file_from_upload_path
(
upload_files
,
upload_filepath
,
authorization_predicate
):
def
get_raw_file_from_upload_path
(
upload_files
,
upload_filepath
,
authorization_predicate
,
mainfile
:
str
=
None
):
"""
"""
Helper method used by func:`RawFileFromUploadPathResource.get` and
Helper method used by func:`RawFileFromUploadPathResource.get` and
func:`RawFileFromCalcPathResource.get`.
func:`RawFileFromCalcPathResource.get`.
...
@@ -148,11 +149,14 @@ def get_raw_file_from_upload_path(upload_files, upload_filepath, authorization_p
...
@@ -148,11 +149,14 @@ def get_raw_file_from_upload_path(upload_files, upload_filepath, authorization_p
directory_files
=
upload_files
.
raw_file_list
(
upload_filepath
)
directory_files
=
upload_files
.
raw_file_list
(
upload_filepath
)
if
len
(
directory_files
)
==
0
:
if
len
(
directory_files
)
==
0
:
abort
(
404
,
message
=
'There is nothing to be found at %s.'
%
upload_filepath
)
abort
(
404
,
message
=
'There is nothing to be found at %s.'
%
upload_filepath
)
contents
=
sorted
([
dict
(
name
=
name
,
size
=
size
)
for
name
,
size
in
directory_files
],
key
=
lambda
x
:
''
if
x
[
'name'
]
==
mainfile
else
x
[
'name'
])
# if mainfile is not None:
# contents = [mainfile] + [content for content in contents if content['name'] != mainfile]
return
{
return
{
'upload_id'
:
upload_files
.
upload_id
,
'upload_id'
:
upload_files
.
upload_id
,
'directory'
:
upload_filepath
,
'directory'
:
upload_filepath
,
'contents'
:
[
'contents'
:
contents
dict
(
name
=
name
,
size
=
size
)
for
name
,
size
in
directory_files
]
},
200
},
200
...
@@ -252,7 +256,9 @@ class RawFileFromCalcPathResource(Resource):
...
@@ -252,7 +256,9 @@ class RawFileFromCalcPathResource(Resource):
abort
(
404
,
message
=
'The calc with id %s is not part of the upload with id %s.'
%
(
calc_id
,
upload_id
))
abort
(
404
,
message
=
'The calc with id %s is not part of the upload with id %s.'
%
(
calc_id
,
upload_id
))
upload_filepath
=
os
.
path
.
join
(
os
.
path
.
dirname
(
calc
.
mainfile
),
calc_filepath
)
upload_filepath
=
os
.
path
.
join
(
os
.
path
.
dirname
(
calc
.
mainfile
),
calc_filepath
)
return
get_raw_file_from_upload_path
(
upload_files
,
upload_filepath
,
authorization_predicate
)
return
get_raw_file_from_upload_path
(
upload_files
,
upload_filepath
,
authorization_predicate
,
mainfile
=
os
.
path
.
basename
(
calc
.
mainfile
))
@
ns
.
route
(
'/calc/<string:upload_id>/<string:calc_id>/'
)
@
ns
.
route
(
'/calc/<string:upload_id>/<string:calc_id>/'
)
...
...
nomad/config.py
View file @
a6456e2d
...
@@ -176,7 +176,7 @@ commit = gitinfo.commit
...
@@ -176,7 +176,7 @@ commit = gitinfo.commit
release
=
'devel'
release
=
'devel'
domain
=
'DFT'
domain
=
'DFT'
service
=
'unknown nomad service'
service
=
'unknown nomad service'
auxfile_cutoff
=
3
0
auxfile_cutoff
=
10
0
console_log_level
=
logging
.
WARNING
console_log_level
=
logging
.
WARNING
...
...
nomad/datamodel/base.py
View file @
a6456e2d
...
@@ -85,7 +85,6 @@ class CalcWithMetadata():
...
@@ -85,7 +85,6 @@ class CalcWithMetadata():
# basic upload and processing related metadata
# basic upload and processing related metadata
self
.
upload_time
:
datetime
.
datetime
=
None
self
.
upload_time
:
datetime
.
datetime
=
None
self
.
files
:
List
[
str
]
=
None
self
.
files
:
List
[
str
]
=
None
self
.
file_sizes
:
List
[
int
]
=
None
self
.
uploader
:
utils
.
POPO
=
None
self
.
uploader
:
utils
.
POPO
=
None
self
.
processed
:
bool
=
False
self
.
processed
:
bool
=
False
self
.
last_processing
:
datetime
.
datetime
=
None
self
.
last_processing
:
datetime
.
datetime
=
None
...
...
nomad/files.py
View file @
a6456e2d
...
@@ -468,9 +468,11 @@ class StagingUploadFiles(UploadFiles):
...
@@ -468,9 +468,11 @@ class StagingUploadFiles(UploadFiles):
if
not
calc
.
with_embargo
:
if
not
calc
.
with_embargo
:
mainfile
=
calc
.
mainfile
mainfile
=
calc
.
mainfile
assert
mainfile
is
not
None
assert
mainfile
is
not
None
for
filepath
in
self
.
calc_files
(
mainfile
):
# mainfile might already have been added due to being a auxfile to another calc
if
not
always_restricted
(
filepath
):
if
mainfile
not
in
public_files
:
public_files
[
filepath
]
=
None
for
filepath
in
self
.
calc_files
(
mainfile
,
with_cutoff
=
False
):
if
not
always_restricted
(
filepath
):
public_files
[
filepath
]
=
None
# 1.2 remove the non public mainfiles that have been added as auxfiles of public mainfiles
# 1.2 remove the non public mainfiles that have been added as auxfiles of public mainfiles
for
calc
in
upload
.
calcs
:
for
calc
in
upload
.
calcs
:
if
calc
.
with_embargo
:
if
calc
.
with_embargo
:
...
@@ -542,7 +544,7 @@ class StagingUploadFiles(UploadFiles):
...
@@ -542,7 +544,7 @@ class StagingUploadFiles(UploadFiles):
return
results
return
results
def
calc_files
(
self
,
mainfile
:
str
,
with_mainfile
:
bool
=
True
)
->
Iterable
[
str
]:
def
calc_files
(
self
,
mainfile
:
str
,
with_mainfile
:
bool
=
True
,
with_cutoff
:
bool
=
True
)
->
Iterable
[
str
]:
"""
"""
Returns all the auxfiles and mainfile for a given mainfile. This implements
Returns all the auxfiles and mainfile for a given mainfile. This implements
nomad's logic about what is part of a calculation and what not. The mainfile
nomad's logic about what is part of a calculation and what not. The mainfile
...
@@ -559,16 +561,21 @@ class StagingUploadFiles(UploadFiles):
...
@@ -559,16 +561,21 @@ class StagingUploadFiles(UploadFiles):
calc_dir
=
os
.
path
.
dirname
(
mainfile_object
.
os_path
)
calc_dir
=
os
.
path
.
dirname
(
mainfile_object
.
os_path
)
calc_relative_dir
=
calc_dir
[
len
(
self
.
_raw_dir
.
os_path
)
+
1
:]
calc_relative_dir
=
calc_dir
[
len
(
self
.
_raw_dir
.
os_path
)
+
1
:]
ls
=
os
.
listdir
(
calc_dir
)
file_count
=
0
if
len
(
ls
)
>
config
.
auxfile_cutoff
:
aux_files
:
List
[
str
]
=
[]
# If there are two many of them, its probably just a directory with lots of
for
filename
in
os
.
listdir
(
calc_dir
):
# calculations. In this case it does not make any sense to provide thousands of
if
filename
!=
mainfile_basename
and
os
.
path
.
isfile
(
os
.
path
.
join
(
calc_dir
,
filename
)):
# aux files.
aux_files
.
append
(
os
.
path
.
join
(
calc_relative_dir
,
filename
))
return
[
mainfile
]
if
with_mainfile
else
[]
file_count
+=
1
if
with_cutoff
and
file_count
>
config
.
auxfile_cutoff
:
# If there are two many of them, its probably just a directory with lots of
# calculations. In this case it does not make any sense to provide thousands of
# aux files.
break
aux_files
=
sorted
(
aux_files
)
aux_files
=
sorted
(
os
.
path
.
join
(
calc_relative_dir
,
path
)
for
path
in
ls
if
os
.
path
.
isfile
(
os
.
path
.
join
(
calc_dir
,
path
))
and
path
!=
mainfile_basename
)
if
with_mainfile
:
if
with_mainfile
:
return
[
mainfile
]
+
aux_files
return
[
mainfile
]
+
aux_files
else
:
else
:
...
...
nomad/processing/data.py
View file @
a6456e2d
...
@@ -164,9 +164,13 @@ class Calc(Proc):
...
@@ -164,9 +164,13 @@ class Calc(Proc):
calc_with_metadata
.
nomad_commit
=
config
.
commit
calc_with_metadata
.
nomad_commit
=
config
.
commit
calc_with_metadata
.
last_processing
=
datetime
.
now
()
calc_with_metadata
.
last_processing
=
datetime
.
now
()
calc_with_metadata
.
files
=
self
.
upload_files
.
calc_files
(
self
.
mainfile
)
calc_with_metadata
.
files
=
self
.
upload_files
.
calc_files
(
self
.
mainfile
)
self
.
preprocess_files
(
calc_with_metadata
.
files
)
self
.
metadata
=
calc_with_metadata
.
to_dict
()
self
.
metadata
=
calc_with_metadata
.
to_dict
()
if
len
(
calc_with_metadata
.
files
)
>=
config
.
auxfile_cutoff
:
self
.
warning
(
'This calc has many aux files in its directory. '
'Have you placed many calculations in the same directory?'
)
self
.
parsing
()
self
.
parsing
()
self
.
normalizing
()
self
.
normalizing
()
self
.
archiving
()
self
.
archiving
()
...
@@ -215,36 +219,6 @@ class Calc(Proc):
...
@@ -215,36 +219,6 @@ class Calc(Proc):
self
.
upload
.
reload
()
self
.
upload
.
reload
()
self
.
upload
.
check_join
()
self
.
upload
.
check_join
()
def
preprocess_files
(
self
,
filepaths
):
for
path
in
filepaths
:
if
os
.
path
.
basename
(
path
)
==
'POTCAR'
:
# create checksum
hash
=
hashlib
.
sha224
()
with
open
(
self
.
upload_files
.
raw_file_object
(
path
).
os_path
,
'rb'
)
as
f
:
for
line
in
f
.
readlines
():
hash
.
update
(
line
)
checksum
=
hash
.
hexdigest
()
# created stripped POTCAR
stripped_path
=
path
+
'.stripped'
with
open
(
self
.
upload_files
.
raw_file_object
(
stripped_path
).
os_path
,
'wt'
)
as
f
:
f
.
write
(
'Stripped POTCAR file. Checksum of original file (sha224): %s
\n
'
%
checksum
)
os
.
system
(
'''
awk < %s >> %s '
BEGIN { dump=1 }
/End of Dataset/ { dump=1 }
dump==1 { print }
/END of PSCTR/ { dump=0 }'
'''
%
(
self
.
upload_files
.
raw_file_object
(
path
).
os_path
,
self
.
upload_files
.
raw_file_object
(
stripped_path
).
os_path
))
filepaths
.
append
(
stripped_path
)
return
filepaths
@
task
@
task
def
parsing
(
self
):
def
parsing
(
self
):
context
=
dict
(
parser
=
self
.
parser
,
step
=
self
.
parser
)
context
=
dict
(
parser
=
self
.
parser
,
step
=
self
.
parser
)
...
@@ -652,6 +626,35 @@ class Upload(Proc):
...
@@ -652,6 +626,35 @@ class Upload(Proc):
self
.
fail
(
'bad .zip/.tar file'
,
log_level
=
logging
.
INFO
)
self
.
fail
(
'bad .zip/.tar file'
,
log_level
=
logging
.
INFO
)
return
return
def
_preprocess_files
(
self
,
path
):
"""
Some files need preprocessing. Currently we need to add a stripped POTCAR version
and always restrict/embargo the original.
"""
if
os
.
path
.
basename
(
path
)
==
'POTCAR'
:
# create checksum
hash
=
hashlib
.
sha224
()
with
open
(
self
.
staging_upload_files
.
raw_file_object
(
path
).
os_path
,
'rb'
)
as
orig_f
:
for
line
in
orig_f
.
readlines
():
hash
.
update
(
line
)
checksum
=
hash
.
hexdigest
()
# created stripped POTCAR
stripped_path
=
path
+
'.stripped'
with
open
(
self
.
staging_upload_files
.
raw_file_object
(
stripped_path
).
os_path
,
'wt'
)
as
stripped_f
:
stripped_f
.
write
(
'Stripped POTCAR file. Checksum of original file (sha224): %s
\n
'
%
checksum
)
os
.
system
(
'''
awk < %s >> %s '
BEGIN { dump=1 }
/End of Dataset/ { dump=1 }
dump==1 { print }
/END of PSCTR/ { dump=0 }'
'''
%
(
self
.
staging_upload_files
.
raw_file_object
(
path
).
os_path
,
self
.
staging_upload_files
.
raw_file_object
(
stripped_path
).
os_path
))
def
match_mainfiles
(
self
)
->
Generator
[
Tuple
[
str
,
object
],
None
,
None
]:
def
match_mainfiles
(
self
)
->
Generator
[
Tuple
[
str
,
object
],
None
,
None
]:
"""
"""
Generator function that matches all files in the upload to all parsers to
Generator function that matches all files in the upload to all parsers to
...
@@ -663,6 +666,7 @@ class Upload(Proc):
...
@@ -663,6 +666,7 @@ class Upload(Proc):
directories_with_match
:
Dict
[
str
,
str
]
=
dict
()
directories_with_match
:
Dict
[
str
,
str
]
=
dict
()
upload_files
=
self
.
staging_upload_files
upload_files
=
self
.
staging_upload_files
for
filename
in
upload_files
.
raw_file_manifest
():
for
filename
in
upload_files
.
raw_file_manifest
():
self
.
_preprocess_files
(
filename
)
try
:
try
:
parser
=
match_parser
(
filename
,
upload_files
)
parser
=
match_parser
(
filename
,
upload_files
)
if
parser
is
not
None
:
if
parser
is
not
None
:
...
...
tests/data/proc/examples_large_dir.zip
0 → 100644
View file @
a6456e2d
File added
tests/processing/test_data.py
View file @
a6456e2d
...
@@ -109,6 +109,14 @@ def test_processing(processed, no_warn, mails, monkeypatch):
...
@@ -109,6 +109,14 @@ def test_processing(processed, no_warn, mails, monkeypatch):
assert
re
.
search
(
r
'Processing completed'
,
mails
.
messages
[
0
].
data
.
decode
(
'utf-8'
))
is
not
None
assert
re
.
search
(
r
'Processing completed'
,
mails
.
messages
[
0
].
data
.
decode
(
'utf-8'
))
is
not
None
def
test_processing_with_large_dir
(
test_user
,
proc_infra
):
upload_path
=
'tests/data/proc/examples_large_dir.zip'
upload_id
=
upload_path
[:
-
4
]
upload
=
run_processing
((
upload_id
,
upload_path
),
test_user
)
for
calc
in
upload
.
calcs
:
assert
len
(
calc
.
warnings
)
==
1
def
test_publish
(
non_empty_processed
:
Upload
,
no_warn
,
example_user_metadata
,
with_publish_to_coe_repo
,
monkeypatch
):
def
test_publish
(
non_empty_processed
:
Upload
,
no_warn
,
example_user_metadata
,
with_publish_to_coe_repo
,
monkeypatch
):
processed
=
non_empty_processed
processed
=
non_empty_processed
processed
.
compress_and_set_metadata
(
example_user_metadata
)
processed
.
compress_and_set_metadata
(
example_user_metadata
)
...
...
tests/test_api.py
View file @
a6456e2d
...
@@ -460,6 +460,7 @@ class TestUploads:
...
@@ -460,6 +460,7 @@ class TestUploads:
# assert rv.status_code == 400
# assert rv.status_code == 400
def
test_potcar
(
self
,
client
,
proc_infra
,
test_user_auth
):
def
test_potcar
(
self
,
client
,
proc_infra
,
test_user_auth
):
# only the owner, shared with people are supposed to download the original potcar file
example_file
=
'tests/data/proc/examples_potcar.zip'
example_file
=
'tests/data/proc/examples_potcar.zip'
rv
=
client
.
put
(
'/uploads/?local_path=%s'
%
example_file
,
headers
=
test_user_auth
)
rv
=
client
.
put
(
'/uploads/?local_path=%s'
%
example_file
,
headers
=
test_user_auth
)
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment