Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
nomad-lab
nomad-FAIR
Commits
435efe1f
Commit
435efe1f
authored
Jan 12, 2019
by
Markus Scheidgen
Browse files
RRplaced by ucalc_hash with calc_id.
parent
14cb7dee
Changes
21
Hide whitespace changes
Inline
Side-by-side
nomad/api/app.py
View file @
435efe1f
...
...
@@ -95,7 +95,7 @@ def with_logger(func):
args
=
inspect
.
getcallargs
(
wrapper
,
*
args
,
**
kwargs
)
logger_args
=
{
k
:
v
for
k
,
v
in
args
.
items
()
if
k
in
[
'upload_id'
,
'calc_
hash
'
]}
if
k
in
[
'upload_id'
,
'calc_
id
'
]}
logger
=
utils
.
get_logger
(
__name__
,
**
logger_args
)
args
.
update
(
logger
=
logger
)
try
:
...
...
nomad/api/archive.py
View file @
435efe1f
...
...
@@ -42,28 +42,28 @@ class ArchiveCalcLogResource(Resource):
@
api
.
response
(
401
,
'Not authorized to access the data.'
)
@
api
.
response
(
200
,
'Archive data send'
,
headers
=
{
'Content-Type'
:
'application/plain'
})
@
login_if_available
def
get
(
self
,
upload_id
,
calc_
hash
):
def
get
(
self
,
upload_id
,
calc_
id
):
"""
Get calculation processing log.
Calcs are references via *upload_id*, *calc_
hash
* pairs.
Calcs are references via *upload_id*, *calc_
id
* pairs.
"""
archive_id
=
'%s/%s'
%
(
upload_id
,
calc_
hash
)
archive_id
=
'%s/%s'
%
(
upload_id
,
calc_
id
)
upload_files
=
UploadFiles
.
get
(
upload_id
,
is_authorized
=
create_authorization_predicate
(
upload_id
,
calc_
hash
))
upload_id
,
is_authorized
=
create_authorization_predicate
(
upload_id
,
calc_
id
))
if
upload_files
is
None
:
abort
(
404
,
message
=
'Upload %s does not exist.'
%
upload_id
)
try
:
return
send_file
(
upload_files
.
archive_log_file
(
calc_
hash
,
'rt'
),
upload_files
.
archive_log_file
(
calc_
id
,
'rt'
),
mimetype
=
'text/plain'
,
as_attachment
=
True
,
attachment_filename
=
'%s.log'
%
archive_id
)
except
Restricted
:
abort
(
401
,
message
=
'Not authorized to access %s/%s.'
%
(
upload_id
,
calc_
hash
))
abort
(
401
,
message
=
'Not authorized to access %s/%s.'
%
(
upload_id
,
calc_
id
))
except
KeyError
:
abort
(
404
,
message
=
'Calculation %s does not exist.'
%
archive_id
)
...
...
@@ -75,28 +75,28 @@ class ArchiveCalcResource(Resource):
@
api
.
response
(
401
,
'Not authorized to access the data.'
)
@
api
.
response
(
200
,
'Archive data send'
)
@
login_if_available
def
get
(
self
,
upload_id
,
calc_
hash
):
def
get
(
self
,
upload_id
,
calc_
id
):
"""
Get calculation data in archive form.
Calcs are references via *upload_id*, *calc_
hash
* pairs.
Calcs are references via *upload_id*, *calc_
id
* pairs.
"""
archive_id
=
'%s/%s'
%
(
upload_id
,
calc_
hash
)
archive_id
=
'%s/%s'
%
(
upload_id
,
calc_
id
)
upload_file
=
UploadFiles
.
get
(
upload_id
,
is_authorized
=
create_authorization_predicate
(
upload_id
,
calc_
hash
))
upload_id
,
is_authorized
=
create_authorization_predicate
(
upload_id
,
calc_
id
))
if
upload_file
is
None
:
abort
(
404
,
message
=
'Archive %s does not exist.'
%
upload_id
)
try
:
return
send_file
(
upload_file
.
archive_file
(
calc_
hash
,
'rt'
),
upload_file
.
archive_file
(
calc_
id
,
'rt'
),
mimetype
=
'application/json'
,
as_attachment
=
True
,
attachment_filename
=
'%s.json'
%
archive_id
)
except
Restricted
:
abort
(
401
,
message
=
'Not authorized to access %s/%s.'
%
(
upload_id
,
calc_
hash
))
abort
(
401
,
message
=
'Not authorized to access %s/%s.'
%
(
upload_id
,
calc_
id
))
except
KeyError
:
abort
(
404
,
message
=
'Calculation %s does not exist.'
%
archive_id
)
...
...
nomad/api/auth.py
View file @
435efe1f
...
...
@@ -149,7 +149,7 @@ class TokenResource(Resource):
'there is no token for you.'
)
def
create_authorization_predicate
(
upload_id
,
calc_
hash
=
None
):
def
create_authorization_predicate
(
upload_id
,
calc_
id
=
None
):
"""
Returns a predicate that determines if the logged in user has the authorization
to access the given upload and calculation.
...
...
@@ -171,7 +171,7 @@ def create_authorization_predicate(upload_id, calc_hash=None):
# There are no db entries for the given resource
if
files
.
UploadFiles
.
get
(
upload_id
)
is
not
None
:
logger
=
utils
.
get_logger
(
__name__
,
upload_id
=
upload_id
,
calc_
hash
=
calc_
hash
)
logger
=
utils
.
get_logger
(
__name__
,
upload_id
=
upload_id
,
calc_
id
=
calc_
id
)
logger
.
error
(
'Upload files without respective db entry'
)
raise
KeyError
...
...
nomad/api/common.py
View file @
435efe1f
...
...
@@ -45,10 +45,10 @@ pagination_request_parser.add_argument(
def
calc_route
(
ns
,
prefix
:
str
=
''
):
""" A resource decorator for /<upload>/<calc> based routes. """
def
decorator
(
func
):
ns
.
route
(
'%s/<string:upload_id>/<string:calc_
hash
>'
%
prefix
)(
ns
.
route
(
'%s/<string:upload_id>/<string:calc_
id
>'
%
prefix
)(
api
.
doc
(
params
=
{
'upload_id'
:
'The unique id for the requested upload.'
,
'calc_
hash
'
:
'The
upload
unique
hash
for the requested calculation.'
'calc_
id
'
:
'The unique
id
for the requested calculation.'
})(
func
)
)
return
decorator
nomad/api/raw.py
View file @
435efe1f
...
...
@@ -40,7 +40,7 @@ raw_file_from_path_parser.add_argument(**raw_file_compress_argument)
@
ns
.
route
(
'/<string:upload_id>/<path:path>'
)
@
api
.
doc
(
params
=
{
'upload_id'
:
'The unique
hash
for the requested upload.'
,
'upload_id'
:
'The unique
id
for the requested upload.'
,
'path'
:
'The path to a file or directory.'
})
@
api
.
header
(
'Content-Type'
,
'application/gz'
)
...
...
@@ -65,7 +65,7 @@ class RawFileFromPathResource(Resource):
upload_files
=
UploadFiles
.
get
(
upload_id
,
create_authorization_predicate
(
upload_id
))
if
upload_files
is
None
:
abort
(
404
,
message
=
'The upload with
hash
%s does not exist.'
%
upload_id
)
abort
(
404
,
message
=
'The upload with
id
%s does not exist.'
%
upload_id
)
if
upload_filepath
[
-
1
:]
==
'*'
:
upload_filepath
=
upload_filepath
[
0
:
-
1
]
...
...
@@ -108,7 +108,7 @@ raw_files_request_parser.add_argument(
@
ns
.
route
(
'/<string:upload_id>'
)
@
api
.
doc
(
params
=
{
'upload_id'
:
'The unique
hash
for the requested upload.'
'upload_id'
:
'The unique
id
for the requested upload.'
})
class
RawFilesResource
(
Resource
):
@
api
.
doc
(
'get_files'
)
...
...
@@ -154,7 +154,7 @@ def respond_to_get_raw_files(upload_id, files, compress=False):
upload_files
=
UploadFiles
.
get
(
upload_id
,
create_authorization_predicate
(
upload_id
))
if
upload_files
is
None
:
abort
(
404
,
message
=
'The upload with
hash
%s does not exist.'
%
upload_id
)
abort
(
404
,
message
=
'The upload with
id
%s does not exist.'
%
upload_id
)
def
generator
():
""" Stream a zip file with all files using zipstream. """
...
...
nomad/api/repo.py
View file @
435efe1f
...
...
@@ -35,19 +35,19 @@ class RepoCalcResource(Resource):
@
api
.
response
(
404
,
'The upload or calculation does not exist'
)
@
api
.
response
(
200
,
'Metadata send'
)
@
api
.
doc
(
'get_repo_calc'
)
def
get
(
self
,
upload_id
,
calc_
hash
):
def
get
(
self
,
upload_id
,
calc_
id
):
"""
Get calculation metadata in repository form.
Repository metadata only entails the quanties shown in the repository.
This is basically the elastic search index entry for the
requested calculations. Calcs are references via *upload_id*, *calc_
hash
*
requested calculations. Calcs are references via *upload_id*, *calc_
id
*
pairs.
"""
try
:
return
RepoCalc
.
get
(
id
=
'%s/%s'
%
(
upload_id
,
calc_
hash
)).
json_dict
,
200
return
RepoCalc
.
get
(
id
=
'%s/%s'
%
(
upload_id
,
calc_
id
)).
json_dict
,
200
except
NotFoundError
:
abort
(
404
,
message
=
'There is no calculation for %s/%s'
%
(
upload_id
,
calc_
hash
))
abort
(
404
,
message
=
'There is no calculation for %s/%s'
%
(
upload_id
,
calc_
id
))
except
Exception
as
e
:
abort
(
500
,
message
=
str
(
e
))
...
...
nomad/client.py
View file @
435efe1f
...
...
@@ -130,7 +130,7 @@ class CalcProcReproduction:
(parsing, normalizing) with the locally installed parsers and normalizers.
The use-case is error/warning reproduction. Use ELK to identify errors, use
the upload, archive ids
/hashes
to given by ELK, and reproduce and fix the error
the upload, archive ids to given by ELK, and reproduce and fix the error
in your development environment.
This is a class of :class:`UploadFile` the downloaded raw data will be treated as
...
...
@@ -142,7 +142,7 @@ class CalcProcReproduction:
override: Set to true to override any existing local calculation data.
"""
def
__init__
(
self
,
archive_id
:
str
,
override
:
bool
=
False
)
->
None
:
self
.
calc_
hash
=
utils
.
archive
.
calc_
hash
(
archive_id
)
self
.
calc_
id
=
utils
.
archive
.
calc_
id
(
archive_id
)
self
.
upload_id
=
utils
.
archive
.
upload_id
(
archive_id
)
self
.
mainfile
=
None
self
.
parser
=
None
...
...
@@ -170,10 +170,10 @@ class CalcProcReproduction:
self
.
logger
.
info
(
'Extracting calc data.'
)
self
.
upload_files
.
extract
()
# find mainfile matching calc_
hash
# find mainfile matching calc_
id
self
.
mainfile
=
next
(
filename
for
filename
in
self
.
upload_files
.
raw_file_manifest
()
if
self
.
upload_files
.
calc_
hash
(
filename
)
==
self
.
calc_
hash
)
if
self
.
upload_files
.
calc_
id
(
filename
)
==
self
.
calc_
id
)
assert
self
.
mainfile
is
not
None
,
'The mainfile could not be found.'
self
.
logger
=
self
.
logger
.
bind
(
mainfile
=
self
.
mainfile
)
...
...
nomad/coe_repo/calc.py
View file @
435efe1f
...
...
@@ -28,7 +28,7 @@ from .base import Base, calc_citation_association, ownership, co_authorship, sha
class
Calc
(
Base
,
datamodel
.
Calc
):
# type: ignore
__tablename__
=
'calculations'
calc_id
=
Column
(
Integer
,
primary_key
=
True
,
autoincrement
=
True
)
coe_
calc_id
=
Column
(
'calc_id'
,
Integer
,
primary_key
=
True
,
autoincrement
=
True
)
origin_id
=
Column
(
Integer
,
ForeignKey
(
'uploads.upload_id'
))
upload
=
relationship
(
'Upload'
)
checksum
=
Column
(
String
)
...
...
@@ -43,14 +43,14 @@ class Calc(Base, datamodel.Calc): # type: ignore
parents
=
relationship
(
'Calc'
,
secondary
=
calc_dataset_containment
,
primaryjoin
=
calc_dataset_containment
.
c
.
children_calc_id
==
calc_id
,
secondaryjoin
=
calc_dataset_containment
.
c
.
parent_calc_id
==
calc_id
,
primaryjoin
=
calc_dataset_containment
.
c
.
children_calc_id
==
coe_
calc_id
,
secondaryjoin
=
calc_dataset_containment
.
c
.
parent_calc_id
==
coe_
calc_id
,
backref
=
'children'
)
@
classmethod
def
load_from
(
cls
,
obj
):
repo_db
=
infrastructure
.
repository_db
return
repo_db
.
query
(
Calc
).
filter_by
(
calc_id
=
int
(
obj
.
pid
)).
first
()
return
repo_db
.
query
(
Calc
).
filter_by
(
coe_
calc_id
=
int
(
obj
.
pid
)).
first
()
@
property
def
mainfile
(
self
)
->
str
:
...
...
@@ -58,14 +58,14 @@ class Calc(Base, datamodel.Calc): # type: ignore
@
property
def
pid
(
self
):
return
self
.
calc_id
return
self
.
coe_
calc_id
@
property
def
comment
(
self
)
->
str
:
return
self
.
user_meta_data
.
label
@
property
def
calc_
hash
(
self
)
->
str
:
def
calc_
id
(
self
)
->
str
:
return
self
.
checksum
@
property
...
...
@@ -92,19 +92,19 @@ class Calc(Base, datamodel.Calc): # type: ignore
@
property
def
all_datasets
(
self
)
->
List
[
'DataSet'
]:
assert
self
.
calc_id
is
not
None
assert
self
.
coe_
calc_id
is
not
None
repo_db
=
infrastructure
.
repository_db
query
=
repo_db
.
query
(
literal
(
self
.
calc_id
).
label
(
'calc_id'
)).
cte
(
recursive
=
True
)
query
=
repo_db
.
query
(
literal
(
self
.
coe_
calc_id
).
label
(
'
coe_
calc_id'
)).
cte
(
recursive
=
True
)
right
=
aliased
(
query
)
left
=
aliased
(
CalcSet
)
query
=
query
.
union_all
(
repo_db
.
query
(
left
.
parent_calc_id
).
join
(
right
,
right
.
c
.
calc_id
==
left
.
children_calc_id
))
right
,
right
.
c
.
coe_
calc_id
==
left
.
children_calc_id
))
query
=
repo_db
.
query
(
query
)
dataset_calc_ids
=
list
(
r
[
0
]
for
r
in
query
if
not
r
[
0
]
==
self
.
calc_id
)
dataset_calc_ids
=
list
(
r
[
0
]
for
r
in
query
if
not
r
[
0
]
==
self
.
coe_
calc_id
)
if
len
(
dataset_calc_ids
)
>
0
:
return
[
DataSet
(
dataset_calc
)
for
dataset_calc
in
repo_db
.
query
(
Calc
).
filter
(
Calc
.
calc_id
.
in_
(
dataset_calc_ids
))]
for
dataset_calc
in
repo_db
.
query
(
Calc
).
filter
(
Calc
.
coe_
calc_id
.
in_
(
dataset_calc_ids
))]
else
:
return
[]
...
...
@@ -132,7 +132,7 @@ class DataSet:
@
property
def
id
(
self
):
return
self
.
_dataset_calc
.
calc_id
return
self
.
_dataset_calc
.
coe_
calc_id
@
property
def
dois
(
self
)
->
List
[
Citation
]:
...
...
nomad/coe_repo/upload.py
View file @
435efe1f
...
...
@@ -100,17 +100,17 @@ class Upload(Base, datamodel.Upload): # type: ignore
@
classmethod
def
load_from
(
cls
,
obj
):
return
Upload
.
from_upload_id
(
obj
.
upload_id
)
return
Upload
.
from_upload_id
(
str
(
obj
.
upload_id
)
)
@
staticmethod
def
from_upload_id
(
upload_id
)
->
'Upload'
:
def
from_upload_id
(
upload_id
:
str
)
->
'Upload'
:
repo_db
=
infrastructure
.
repository_db
uploads
=
repo_db
.
query
(
Upload
).
filter_by
(
upload_name
=
upload_id
)
assert
uploads
.
count
()
<=
1
,
'Upload
hash
/name must be unique'
assert
uploads
.
count
()
<=
1
,
'Upload
id
/name must be unique'
return
uploads
.
first
()
@
property
def
upload_id
(
self
):
def
upload_id
(
self
)
->
str
:
return
self
.
upload_name
@
property
...
...
@@ -163,7 +163,7 @@ class Upload(Base, datamodel.Upload): # type: ignore
if
has_calcs
:
# empty upload case
repo_db
.
commit
()
result
=
coe_upload
.
upload_id
result
=
coe_upload
.
coe_
upload_id
else
:
repo_db
.
rollback
()
except
Exception
as
e
:
...
...
@@ -181,8 +181,8 @@ class Upload(Base, datamodel.Upload): # type: ignore
# table based properties
coe_calc
=
Calc
(
calc_id
=
calc_meta_data
.
get
(
'_pid'
,
None
),
checksum
=
calc_meta_data
.
get
(
'_checksum'
,
calc
.
calc_
hash
),
coe_
calc_id
=
calc_meta_data
.
get
(
'_pid'
,
None
),
checksum
=
calc_meta_data
.
get
(
'_checksum'
,
calc
.
calc_
id
),
upload
=
self
)
repo_db
.
add
(
coe_calc
)
...
...
@@ -242,7 +242,7 @@ class Upload(Base, datamodel.Upload): # type: ignore
# datasets
for
dataset_id
in
calc_meta_data
.
get
(
'datasets'
,
[]):
dataset
=
CalcSet
(
parent_calc_id
=
dataset_id
,
children_calc_id
=
coe_calc
.
calc_id
)
dataset
=
CalcSet
(
parent_calc_id
=
dataset_id
,
children_calc_id
=
coe_calc
.
coe_
calc_id
)
repo_db
.
add
(
dataset
)
# references
...
...
nomad/datamodel.py
View file @
435efe1f
...
...
@@ -50,7 +50,7 @@ class Calc(Entity):
Attributes:
pid: The persistent id (pid) for the calculation
mainfile: The mainfile path relative to upload root
calc_
hash
: A unique
hash
/checksum that describes unique calculations
calc_
id
: A unique
id
/checksum that describes unique calculations
upload: The upload object that this calculation belongs to.
"""
@
property
...
...
@@ -62,7 +62,7 @@ class Calc(Entity):
raise
NotImplementedError
@
property
def
calc_
hash
(
self
)
->
str
:
def
calc_
id
(
self
)
->
str
:
raise
NotImplementedError
@
property
...
...
nomad/files.py
View file @
435efe1f
...
...
@@ -46,6 +46,7 @@ import shutil
from
zipfile
import
ZipFile
,
BadZipFile
,
is_zipfile
from
bagit
import
make_bag
import
hashlib
import
base64
import
io
from
nomad
import
config
,
utils
...
...
@@ -140,11 +141,11 @@ class Metadata(metaclass=ABCMeta):
pass
def
insert
(
self
,
calc
:
dict
)
->
None
:
""" Insert a calc, using
hash
as key. """
""" Insert a calc, using
calc_id
as key. """
raise
NotImplementedError
()
def
update
(
self
,
calc_
hash
:
str
,
updates
:
dict
)
->
dict
:
""" Updating a calc, using
hash
as key and running dict update with the given data. """
def
update
(
self
,
calc_
id
:
str
,
updates
:
dict
)
->
dict
:
""" Updating a calc, using
calc_id
as key and running dict update with the given data. """
raise
NotImplementedError
()
def
get
(
self
,
calc_id
:
str
)
->
dict
:
...
...
@@ -181,16 +182,16 @@ class StagingMetadata(Metadata):
pass
def
insert
(
self
,
calc
:
dict
)
->
None
:
id
=
calc
[
'
hash
'
]
id
=
calc
[
'
calc_id
'
]
path
=
self
.
_dir
.
join_file
(
'%s.json'
%
id
)
assert
not
path
.
exists
()
with
open
(
path
.
os_path
,
'wt'
)
as
f
:
ujson
.
dump
(
calc
,
f
)
def
update
(
self
,
calc_
hash
:
str
,
updates
:
dict
)
->
dict
:
metadata
=
self
.
get
(
calc_
hash
)
def
update
(
self
,
calc_
id
:
str
,
updates
:
dict
)
->
dict
:
metadata
=
self
.
get
(
calc_
id
)
metadata
.
update
(
updates
)
path
=
self
.
_dir
.
join_file
(
'%s.json'
%
calc_
hash
)
path
=
self
.
_dir
.
join_file
(
'%s.json'
%
calc_
id
)
with
open
(
path
.
os_path
,
'wt'
)
as
f
:
ujson
.
dump
(
metadata
,
f
)
return
metadata
...
...
@@ -263,24 +264,24 @@ class PublicMetadata(Metadata):
def
insert
(
self
,
calc
:
dict
)
->
None
:
assert
self
.
data
is
not
None
,
"Metadata is not open."
id
=
calc
[
'
hash
'
]
id
=
calc
[
'
calc_id
'
]
assert
id
not
in
self
.
data
self
.
data
[
id
]
=
calc
self
.
_modified
=
True
def
update
(
self
,
calc_
hash
:
str
,
updates
:
dict
)
->
dict
:
def
update
(
self
,
calc_
id
:
str
,
updates
:
dict
)
->
dict
:
assert
self
.
data
is
not
None
,
"Metadata is not open."
if
calc_
hash
not
in
self
.
data
:
if
calc_
id
not
in
self
.
data
:
raise
KeyError
()
self
.
data
[
calc_
hash
].
update
(
updates
)
self
.
data
[
calc_
id
].
update
(
updates
)
self
.
_modified
=
True
return
self
.
data
[
calc_
hash
]
return
self
.
data
[
calc_
id
]
def
get
(
self
,
calc_
hash
:
str
)
->
dict
:
def
get
(
self
,
calc_
id
:
str
)
->
dict
:
assert
self
.
data
is
not
None
,
"Metadata is not open."
return
self
.
data
[
calc_
hash
]
return
self
.
data
[
calc_
id
]
def
__iter__
(
self
)
->
Iterator
[
dict
]:
assert
self
.
data
is
not
None
,
"Metadata is not open."
...
...
@@ -349,24 +350,24 @@ class UploadFiles(DirectoryObject, metaclass=ABCMeta):
"""
raise
NotImplementedError
()
def
archive_file
(
self
,
calc_
hash
:
str
,
*
args
,
**
kwargs
)
->
IO
:
def
archive_file
(
self
,
calc_
id
:
str
,
*
args
,
**
kwargs
)
->
IO
:
"""
Opens a archive file and returns a file-like objects. Additional args, kwargs are
delegated to the respective `open` call.
Arguments:
calc_
hash
: The
hash
identifying the calculation.
calc_
id
: The
id
identifying the calculation.
Raises:
KeyError: If the calc does not exist.
Restricted: If the file is restricted and upload access evaluated to False.
"""
raise
NotImplementedError
()
def
archive_log_file
(
self
,
calc_
hash
:
str
,
*
args
,
**
kwargs
)
->
IO
:
def
archive_log_file
(
self
,
calc_
id
:
str
,
*
args
,
**
kwargs
)
->
IO
:
"""
Opens a archive log file and returns a file-like objects. Additional args, kwargs are
delegated to the respective `open` call.
Arguments:
calc_
hash
: The
hash
identifying the calculation.
calc_
id
: The
id
identifying the calculation.
Raises:
KeyError: If the calc does not exist.
Restricted: If the file is restricted and upload access evaluated to False.
...
...
@@ -409,21 +410,21 @@ class StagingUploadFiles(UploadFiles):
def
raw_file_object
(
self
,
file_path
:
str
)
->
PathObject
:
return
self
.
_raw_dir
.
join_file
(
file_path
)
def
archive_file
(
self
,
calc_
hash
:
str
,
*
args
,
**
kwargs
)
->
IO
:
def
archive_file
(
self
,
calc_
id
:
str
,
*
args
,
**
kwargs
)
->
IO
:
if
not
self
.
_is_authorized
():
raise
Restricted
return
self
.
_file
(
self
.
archive_file_object
(
calc_
hash
),
*
args
,
**
kwargs
)
return
self
.
_file
(
self
.
archive_file_object
(
calc_
id
),
*
args
,
**
kwargs
)
def
archive_log_file
(
self
,
calc_
hash
:
str
,
*
args
,
**
kwargs
)
->
IO
:
def
archive_log_file
(
self
,
calc_
id
:
str
,
*
args
,
**
kwargs
)
->
IO
:
if
not
self
.
_is_authorized
():
raise
Restricted
return
self
.
_file
(
self
.
archive_log_file_object
(
calc_
hash
),
*
args
,
**
kwargs
)
return
self
.
_file
(
self
.
archive_log_file_object
(
calc_
id
),
*
args
,
**
kwargs
)
def
archive_file_object
(
self
,
calc_
hash
:
str
)
->
PathObject
:
return
self
.
_archive_dir
.
join_file
(
'%s.%s'
%
(
calc_
hash
,
self
.
_archive_ext
))
def
archive_file_object
(
self
,
calc_
id
:
str
)
->
PathObject
:
return
self
.
_archive_dir
.
join_file
(
'%s.%s'
%
(
calc_
id
,
self
.
_archive_ext
))
def
archive_log_file_object
(
self
,
calc_
hash
:
str
)
->
PathObject
:
return
self
.
_archive_dir
.
join_file
(
'%s.log'
%
calc_
hash
)
def
archive_log_file_object
(
self
,
calc_
id
:
str
)
->
PathObject
:
return
self
.
_archive_dir
.
join_file
(
'%s.log'
%
calc_
id
)
def
add_rawfiles
(
self
,
path
:
str
,
move
:
bool
=
False
,
prefix
:
str
=
None
)
->
None
:
"""
...
...
@@ -519,10 +520,10 @@ class StagingUploadFiles(UploadFiles):
for
calc
in
self
.
metadata
:
archive_zip
=
archive_restricted_zip
if
calc
.
get
(
'restricted'
,
False
)
else
archive_public_zip
archive_filename
=
'%s.%s'
%
(
calc
[
'
hash
'
],
self
.
_archive_ext
)
archive_filename
=
'%s.%s'
%
(
calc
[
'
calc_id
'
],
self
.
_archive_ext
)
archive_zip
.
write
(
self
.
_archive_dir
.
join_file
(
archive_filename
).
os_path
,
archive_filename
)
archive_log_filename
=
'%s.%s'
%
(
calc
[
'
hash
'
],
'log'
)
archive_log_filename
=
'%s.%s'
%
(
calc
[
'
calc_id
'
],
'log'
)
log_file
=
self
.
_archive_dir
.
join_file
(
archive_log_filename
)
if
log_file
.
exists
():
archive_zip
.
write
(
log_file
.
os_path
,
archive_log_filename
)
...
...
@@ -567,13 +568,34 @@ class StagingUploadFiles(UploadFiles):
os
.
path
.
join
(
calc_relative_dir
,
path
)
for
path
in
os
.
listdir
(
calc_dir
)
if
os
.
path
.
isfile
(
os
.
path
.
join
(
calc_dir
,
path
))
and
(
with_mainfile
or
path
!=
mainfile
))
def
_websave_hash
(
self
,
hash
:
bytes
,
length
:
int
=
0
)
->
str
:
if
length
>
0
:
return
base64
.
b64encode
(
hash
,
altchars
=
b
'-_'
)[
0
:
28
].
decode
(
'utf-8'
)
else
:
return
base64
.
b64encode
(
hash
,
altchars
=
b
'-_'
)[
0
:
-
2
].
decode
(
'utf-8'
)
def
calc_id
(
self
,
mainfile
:
str
)
->
str
:
"""
Calculates a id for the given calc.
Arguments:
mainfile: The mainfile path relative to the upload that identifies the calc in the folder structure.
Returns:
The calc id
Raises:
KeyError: If the mainfile does not exist.
"""
hash
=
hashlib
.
sha512
()
hash
.
update
(
self
.
upload_id
.
encode
(
'utf-8'
))
hash
.
update
(
mainfile
.
encode
(
'utf-8'
))
return
self
.
_websave_hash
(
hash
.
digest
(),
utils
.
default_hash_len
)
def
calc_hash
(
self
,
mainfile
:
str
)
->
str
:
"""
Calculates a hash for the given calc.
Calculates a hash for the given calc
based on file contents and aux file contents
.
Arguments:
mainfile: The mainfile path relative to the upload that identifies the calc in the folder structure.
Returns:
The calc hash
The calc
ulated
hash
Raises:
KeyError: If the mainfile does not exist.
"""
...
...
@@ -583,7 +605,7 @@ class StagingUploadFiles(UploadFiles):
for
data
in
iter
(
lambda
:
f
.
read
(
65536
),
b
''
):
hash
.
update
(
data
)
return
utils
.
websave_hash
(
hash
.
digest
(),
utils
.
default_hash_len
)
return
self
.
_
websave_hash
(
hash
.
digest
(),
utils
.
default_hash_len
)
class
ArchiveBasedStagingUploadFiles
(
StagingUploadFiles
):
...
...
@@ -675,11 +697,11 @@ class PublicUploadFiles(UploadFiles):
except
FileNotFoundError
:
pass
def
archive_file
(
self
,
calc_
hash
:
str
,
*
args
,
**
kwargs
)
->
IO
:
return
self
.
_file
(
'archive'
,
self
.
_archive_ext
,
'%s.%s'
%
(
calc_
hash
,
self
.
_archive_ext
),
*
args
,
**
kwargs
)
def
archive_file
(
self
,
calc_
id
:
str
,
*
args
,
**
kwargs
)
->
IO
:
return
self
.
_file
(
'archive'
,
self
.
_archive_ext
,
'%s.%s'
%
(
calc_
id
,
self
.
_archive_ext
),
*
args
,
**
kwargs
)
def
archive_log_file
(
self
,
calc_
hash
:
str
,
*
args
,
**
kwargs
)
->
IO
:
return
self
.
_file
(
'archive'
,
self
.
_archive_ext
,
'%s.log'
%
calc_
hash
,
*
args
,
**
kwargs
)
def
archive_log_file
(
self
,
calc_
id
:
str
,
*
args
,
**
kwargs
)
->
IO
:
return
self
.
_file
(
'archive'
,
self
.
_archive_ext
,
'%s.log'
%
calc_
id
,
*
args
,
**
kwargs
)
def
repack
(
self
)
->
None
:
"""
...
...
nomad/normalizing/repository.py
View file @
435efe1f
...
...
@@ -29,7 +29,7 @@ class RepositoryNormalizer(Normalizer):
b
.
openNonOverlappingSection
(
'section_repository_info'
)
b
.
openNonOverlappingSection
(
'section_repository_parserdata'
)
b
.
addValue
(
'repository_checksum'
,
utils
.
archive
.
calc_
hash
(
b
.
get_value
(
'archive_id'
,
0
)))
b
.
addValue
(
'repository_checksum'
,
utils
.
archive
.
calc_
id
(
b
.
get_value
(
'archive_id'
,
0
)))
b
.
addValue
(
'repository_chemical_formula'
,
b
.
get_value
(
'chemical_composition_bulk_reduced'
,
0
))
b
.
addValue
(
'repository_parser_id'
,
b
.
get_value
(
'parser_name'
,
0
))
atoms
=
b
.
get_value
(
'atom_labels'
,
0
)
...
...
nomad/processing/data.py
View file @
435efe1f
...
...
@@ -52,7 +52,7 @@ class Calc(Proc, datamodel.Calc):
while parsing, including ``program_name``, ``program_version``, etc.
Attributes:
archive_id: the
hash based archive id of the calc
archive_id: the
full id upload_id and calc_id based id
parser: the name of the parser used to process this calc
upload_id: the id of the upload used to create this calculation
mainfile: the mainfile (including path in upload) that was used to create this calc
...
...
@@ -85,8 +85,8 @@ class Calc(Proc, datamodel.Calc):
return
self
.
upload_files
.
raw_file_object
(
self
.
mainfile
)
@
property
def
calc_
hash
(
self
)
->
str
:
return
utils
.
archive
.
calc_
hash
(
self
.
archive_id
)
def
calc_
id
(
self
)
->
str
:
return
utils
.
archive
.
calc_
id
(
self
.
archive_id
)
@
property
def
upload
(
self
)
->
'Upload'
:
...
...
@@ -103,7 +103,7 @@ class Calc(Proc, datamodel.Calc):
def
get_logger
(
self
,
**
kwargs
):
logger
=
super
().
get_logger
()
logger
=
logger
.
bind
(
upload_id
=
self
.
upload_id
,
mainfile
=
self
.
mainfile
,
calc_
hash
=
self
.
calc_
hash
,
upload_id
=
self
.
upload_id
,
mainfile
=
self
.
mainfile
,
calc_
id
=
self
.
calc_
id
,
archive_id
=
self
.
archive_id
,
**
kwargs
)
return
logger
...
...
@@ -116,7 +116,7 @@ class Calc(Proc, datamodel.Calc):
logger
=
self
.
get_logger
(
**
kwargs
)
if
self
.
_calc_proc_logwriter
is
None
:
self
.
_calc_proc_logwriter_ctx
=
self
.
upload_files
.
archive_log_file
(
self
.
calc_
hash
,
'wt'
)
self
.
_calc_proc_logwriter_ctx
=
self
.
upload_files
.
archive_log_file
(
self
.
calc_
id
,
'wt'
)
self
.
_calc_proc_logwriter
=
self
.
_calc_proc_logwriter_ctx
.
__enter__
()
# pylint: disable=E1101
def
save_to_calc_log
(
logger
,
method_name
,
event_dict
):
...
...
@@ -235,7 +235,7 @@ class Calc(Proc, datamodel.Calc):
def
archiving
(
self
):