Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
nomad-lab
nomad-FAIR
Commits
d63b17d1
Commit
d63b17d1
authored
Apr 29, 2019
by
Markus Scheidgen
Browse files
Added POTCAR restriction and stripping
#150
.
parent
cfc3fba7
Changes
6
Hide whitespace changes
Inline
Side-by-side
nomad/files.py
View file @
d63b17d1
...
...
@@ -66,6 +66,15 @@ from nomad.datamodel import UploadWithMetadata
user_metadata_filename
=
'user_metadata.pickle'
def
always_restricted
(
path
:
str
):
"""
Used to put general restrictions on files, e.g. due to licensing issues. Will be
called during packing and while accessing public files.
"""
if
os
.
path
.
basename
(
path
)
==
'POTCAR'
:
return
True
class
PathObject
:
"""
Object storage-like abstraction for paths in general.
...
...
@@ -403,7 +412,8 @@ class StagingUploadFiles(UploadFiles):
mainfile
=
calc
.
mainfile
assert
mainfile
is
not
None
for
filepath
in
self
.
calc_files
(
mainfile
):
public_files
[
filepath
]
=
None
if
not
always_restricted
(
filepath
):
public_files
[
filepath
]
=
None
# 1.2 remove the non public mainfiles that have been added as auxfiles of public mainfiles
for
calc
in
upload
.
calcs
:
if
calc
.
with_embargo
:
...
...
@@ -569,9 +579,9 @@ class PublicUploadFiles(UploadFiles):
try
:
zip_file
=
self
.
join_file
(
'%s-%s.%s.zip'
%
(
prefix
,
access
,
ext
))
with
ZipFile
(
zip_file
.
os_path
)
as
zf
:
f
=
zf
.
open
(
path
,
'r'
,
**
kwargs
)
if
access
==
'restricted'
and
not
self
.
_is_authorized
():
if
(
access
==
'restricted'
or
always_restricted
(
path
))
and
not
self
.
_is_authorized
():
raise
Restricted
f
=
zf
.
open
(
path
,
'r'
,
**
kwargs
)
if
't'
in
mode
:
return
io
.
TextIOWrapper
(
f
)
else
:
...
...
nomad/migration.py
View file @
d63b17d1
...
...
@@ -498,6 +498,16 @@ class SourceCalc(Document):
source_calc
=
SourceCalc
(
pid
=
calc
.
pid
)
source_calc
.
upload
=
segments
[
0
]
source_calc
.
mainfile
=
os
.
path
.
join
(
*
segments
[
1
:])
# this is taken from metadata.location and has inconsistent directory prefix,
# but is more accurate than taking the first file as mainfile, which
# also is sometimes not the actual mainfile.
if
calc
.
mainfile
is
not
None
:
calc_mainfile
=
os
.
path
.
basename
(
calc
.
mainfile
)
if
calc_mainfile
!=
os
.
path
.
basename
(
source_calc
.
mainfile
):
source_calc
.
mainfile
=
os
.
path
.
join
(
os
.
path
.
dirname
(
source_calc
.
mainfile
),
calc_mainfile
)
if
with_metadata
:
source_calc
.
metadata
=
calc
.
to_calc_with_metadata
().
__dict__
source_calcs
.
append
(
source_calc
)
...
...
@@ -773,7 +783,6 @@ class NomadCOEMigration:
logger
=
self
.
logger
.
bind
(
package_id
=
package
.
package_id
,
source_upload_id
=
package
.
upload_id
)
if
package
.
migration_version
is
not
None
and
package
.
migration_version
>=
self
.
migration_version
:
if
only_republish
:
self
.
republish_package
(
package
)
...
...
nomad/processing/data.py
View file @
d63b17d1
...
...
@@ -32,6 +32,7 @@ from contextlib import contextmanager
import
os.path
from
datetime
import
datetime
from
pymongo
import
UpdateOne
import
hashlib
from
nomad
import
utils
,
coe_repo
,
config
,
infrastructure
,
search
,
datamodel
from
nomad.files
import
PathObject
,
UploadFiles
,
ExtractError
,
ArchiveBasedStagingUploadFiles
,
PublicUploadFiles
,
StagingUploadFiles
...
...
@@ -163,6 +164,7 @@ class Calc(Proc):
calc_with_metadata
.
nomad_commit
=
config
.
commit
calc_with_metadata
.
last_processing
=
datetime
.
now
()
calc_with_metadata
.
files
=
self
.
upload_files
.
calc_files
(
self
.
mainfile
)
self
.
preprocess_files
(
calc_with_metadata
.
files
)
self
.
metadata
=
calc_with_metadata
.
to_dict
()
self
.
parsing
()
...
...
@@ -213,6 +215,36 @@ class Calc(Proc):
self
.
upload
.
reload
()
self
.
upload
.
check_join
()
def
preprocess_files
(
self
,
filepaths
):
for
path
in
filepaths
:
if
os
.
path
.
basename
(
path
)
==
'POTCAR'
:
# create checksum
hash
=
hashlib
.
sha224
()
with
open
(
self
.
upload_files
.
raw_file_object
(
path
).
os_path
,
'rb'
)
as
f
:
for
line
in
f
.
readlines
():
hash
.
update
(
line
)
checksum
=
hash
.
hexdigest
()
# created stripped POTCAR
stripped_path
=
path
+
'.stripped'
with
open
(
self
.
upload_files
.
raw_file_object
(
stripped_path
).
os_path
,
'wt'
)
as
f
:
f
.
write
(
'Stripped POTCAR file. Checksum of original file (sha224): %s
\n
'
%
checksum
)
os
.
system
(
'''
awk < %s >> %s '
BEGIN { dump=1 }
/End of Dataset/ { dump=1 }
dump==1 { print }
/END of PSCTR/ { dump=0 }'
'''
%
(
self
.
upload_files
.
raw_file_object
(
path
).
os_path
,
self
.
upload_files
.
raw_file_object
(
stripped_path
).
os_path
))
filepaths
.
append
(
stripped_path
)
return
filepaths
@
task
def
parsing
(
self
):
context
=
dict
(
parser
=
self
.
parser
,
step
=
self
.
parser
)
...
...
tests/data/migration/example_source_db.sql
View file @
d63b17d1
...
...
@@ -40,8 +40,8 @@ INSERT INTO public.tags VALUES(2, 7);
INSERT
INTO
public
.
tags
VALUES
(
1
,
8
);
INSERT
INTO
public
.
tags
VALUES
(
2
,
8
);
INSERT
INTO
public
.
metadata
VALUES
(
1
,
NULL
,
NULL
,
NULL
,
NULL
,
'BrKSi2'
,
'2019-01-01 12:00:00'
,
NULL
,
decode
(
'["$EXTRACTED/upload/1/template.json"]'
,
'escape'
),
1
,
NULL
);
INSERT
INTO
public
.
metadata
VALUES
(
1
,
NULL
,
NULL
,
NULL
,
NULL
,
'BrKSi2'
,
'2015-01-01 13:00:00'
,
NULL
,
decode
(
'["$EXTRACTED/upload/2/
template.json
"]'
,
'escape'
),
2
,
NULL
);
INSERT
INTO
public
.
metadata
VALUES
(
1
,
'different/prefix/template.json'
,
NULL
,
NULL
,
NULL
,
'BrKSi2'
,
'2019-01-01 12:00:00'
,
NULL
,
decode
(
'["$EXTRACTED/upload/1/template.json"]'
,
'escape'
),
1
,
NULL
);
INSERT
INTO
public
.
metadata
VALUES
(
1
,
'different/prefix/template.json'
,
NULL
,
NULL
,
NULL
,
'BrKSi2'
,
'2015-01-01 13:00:00'
,
NULL
,
decode
(
'["$EXTRACTED/upload/2/
wrong_mainfile
"]'
,
'escape'
),
2
,
NULL
);
INSERT
INTO
public
.
spacegroups
VALUES
(
1
,
123
);
INSERT
INTO
public
.
spacegroups
VALUES
(
2
,
123
);
INSERT
INTO
public
.
user_metadata
VALUES
(
1
,
0
,
'label1'
);
...
...
tests/data/proc/examples_potcar.zip
0 → 100644
View file @
d63b17d1
File added
tests/test_api.py
View file @
d63b17d1
...
...
@@ -445,6 +445,21 @@ class TestUploads:
# content_type='application/json')
# assert rv.status_code == 400
def
test_potcar
(
self
,
client
,
proc_infra
,
test_user_auth
):
example_file
=
'tests/data/proc/examples_potcar.zip'
rv
=
client
.
put
(
'/uploads/?local_path=%s'
%
example_file
,
headers
=
test_user_auth
)
upload
=
self
.
assert_upload
(
rv
.
data
)
upload_id
=
upload
[
'upload_id'
]
self
.
assert_processing
(
client
,
test_user_auth
,
upload_id
)
self
.
assert_published
(
client
,
test_user_auth
,
upload_id
,
proc_infra
,
with_coe_repo
=
True
)
rv
=
client
.
get
(
'/raw/%s/examples_potcar/POTCAR'
%
upload_id
)
assert
rv
.
status_code
==
401
rv
=
client
.
get
(
'/raw/%s/examples_potcar/POTCAR'
%
upload_id
,
headers
=
test_user_auth
)
assert
rv
.
status_code
==
200
rv
=
client
.
get
(
'/raw/%s/examples_potcar/POTCAR.stripped'
%
upload_id
)
assert
rv
.
status_code
==
200
class
UploadFilesBasedTests
:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment