Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
nomad-lab
nomad-FAIR
Commits
14cb7dee
Commit
14cb7dee
authored
Jan 12, 2019
by
Markus Scheidgen
Browse files
Removed upload_hash and replaced by upload_id.
parent
e63f46b1
Changes
18
Hide whitespace changes
Inline
Side-by-side
nomad/api/app.py
View file @
14cb7dee
...
...
@@ -95,7 +95,7 @@ def with_logger(func):
args
=
inspect
.
getcallargs
(
wrapper
,
*
args
,
**
kwargs
)
logger_args
=
{
k
:
v
for
k
,
v
in
args
.
items
()
if
k
in
[
'upload_id'
,
'upload_hash'
,
'calc_hash'
]}
if
k
in
[
'upload_id'
,
'calc_hash'
]}
logger
=
utils
.
get_logger
(
__name__
,
**
logger_args
)
args
.
update
(
logger
=
logger
)
try
:
...
...
nomad/api/archive.py
View file @
14cb7dee
...
...
@@ -42,19 +42,19 @@ class ArchiveCalcLogResource(Resource):
@
api
.
response
(
401
,
'Not authorized to access the data.'
)
@
api
.
response
(
200
,
'Archive data send'
,
headers
=
{
'Content-Type'
:
'application/plain'
})
@
login_if_available
def
get
(
self
,
upload_
hash
,
calc_hash
):
def
get
(
self
,
upload_
id
,
calc_hash
):
"""
Get calculation processing log.
Calcs are references via *upload_
hash
*, *calc_hash* pairs.
Calcs are references via *upload_
id
*, *calc_hash* pairs.
"""
archive_id
=
'%s/%s'
%
(
upload_
hash
,
calc_hash
)
archive_id
=
'%s/%s'
%
(
upload_
id
,
calc_hash
)
upload_files
=
UploadFiles
.
get
(
upload_
hash
,
is_authorized
=
create_authorization_predicate
(
upload_
hash
,
calc_hash
))
upload_
id
,
is_authorized
=
create_authorization_predicate
(
upload_
id
,
calc_hash
))
if
upload_files
is
None
:
abort
(
404
,
message
=
'
Archive
%s does not exist.'
%
upload_
hash
)
abort
(
404
,
message
=
'
Upload
%s does not exist.'
%
upload_
id
)
try
:
return
send_file
(
...
...
@@ -63,7 +63,7 @@ class ArchiveCalcLogResource(Resource):
as_attachment
=
True
,
attachment_filename
=
'%s.log'
%
archive_id
)
except
Restricted
:
abort
(
401
,
message
=
'Not authorized to access %s/%s.'
%
(
upload_
hash
,
calc_hash
))
abort
(
401
,
message
=
'Not authorized to access %s/%s.'
%
(
upload_
id
,
calc_hash
))
except
KeyError
:
abort
(
404
,
message
=
'Calculation %s does not exist.'
%
archive_id
)
...
...
@@ -75,19 +75,19 @@ class ArchiveCalcResource(Resource):
@
api
.
response
(
401
,
'Not authorized to access the data.'
)
@
api
.
response
(
200
,
'Archive data send'
)
@
login_if_available
def
get
(
self
,
upload_
hash
,
calc_hash
):
def
get
(
self
,
upload_
id
,
calc_hash
):
"""
Get calculation data in archive form.
Calcs are references via *upload_
hash
*, *calc_hash* pairs.
Calcs are references via *upload_
id
*, *calc_hash* pairs.
"""
archive_id
=
'%s/%s'
%
(
upload_
hash
,
calc_hash
)
archive_id
=
'%s/%s'
%
(
upload_
id
,
calc_hash
)
upload_file
=
UploadFiles
.
get
(
upload_
hash
,
is_authorized
=
create_authorization_predicate
(
upload_
hash
,
calc_hash
))
upload_
id
,
is_authorized
=
create_authorization_predicate
(
upload_
id
,
calc_hash
))
if
upload_file
is
None
:
abort
(
404
,
message
=
'Archive %s does not exist.'
%
upload_
hash
)
abort
(
404
,
message
=
'Archive %s does not exist.'
%
upload_
id
)
try
:
return
send_file
(
...
...
@@ -96,7 +96,7 @@ class ArchiveCalcResource(Resource):
as_attachment
=
True
,
attachment_filename
=
'%s.json'
%
archive_id
)
except
Restricted
:
abort
(
401
,
message
=
'Not authorized to access %s/%s.'
%
(
upload_
hash
,
calc_hash
))
abort
(
401
,
message
=
'Not authorized to access %s/%s.'
%
(
upload_
id
,
calc_hash
))
except
KeyError
:
abort
(
404
,
message
=
'Calculation %s does not exist.'
%
archive_id
)
...
...
nomad/api/auth.py
View file @
14cb7dee
...
...
@@ -149,7 +149,7 @@ class TokenResource(Resource):
'there is no token for you.'
)
def
create_authorization_predicate
(
upload_
hash
,
calc_hash
=
None
):
def
create_authorization_predicate
(
upload_
id
,
calc_hash
=
None
):
"""
Returns a predicate that determines if the logged in user has the authorization
to access the given upload and calculation.
...
...
@@ -160,18 +160,18 @@ def create_authorization_predicate(upload_hash, calc_hash=None):
return
False
# look in repository
upload
=
coe_repo
.
Upload
.
from_upload_
hash
(
upload_
hash
)
upload
=
coe_repo
.
Upload
.
from_upload_
id
(
upload_
id
)
if
upload
is
not
None
:
return
upload
.
user_id
==
g
.
user
.
user_id
# look in staging
staging_upload
=
processing
.
Upload
.
get
(
upload_
hash
)
staging_upload
=
processing
.
Upload
.
get
(
upload_
id
)
if
staging_upload
is
not
None
:
return
str
(
g
.
user
.
user_id
)
==
str
(
staging_upload
.
user_id
)
# There are no db entries for the given resource
if
files
.
UploadFiles
.
get
(
upload_
hash
)
is
not
None
:
logger
=
utils
.
get_logger
(
__name__
,
upload_
hash
=
upload_
hash
,
calc_hash
=
calc_hash
)
if
files
.
UploadFiles
.
get
(
upload_
id
)
is
not
None
:
logger
=
utils
.
get_logger
(
__name__
,
upload_
id
=
upload_
id
,
calc_hash
=
calc_hash
)
logger
.
error
(
'Upload files without respective db entry'
)
raise
KeyError
...
...
nomad/api/common.py
View file @
14cb7dee
...
...
@@ -45,10 +45,10 @@ pagination_request_parser.add_argument(
def
calc_route
(
ns
,
prefix
:
str
=
''
):
""" A resource decorator for /<upload>/<calc> based routes. """
def
decorator
(
func
):
ns
.
route
(
'%s/<string:upload_
hash
>/<string:calc_hash>'
%
prefix
)(
ns
.
route
(
'%s/<string:upload_
id
>/<string:calc_hash>'
%
prefix
)(
api
.
doc
(
params
=
{
'upload_
hash
'
:
'The unique
hash
for the requested upload.'
,
'calc_hash'
:
'The unique hash for the requested calculation.'
'upload_
id
'
:
'The unique
id
for the requested upload.'
,
'calc_hash'
:
'The
upload
unique hash for the requested calculation.'
})(
func
)
)
return
decorator
nomad/api/raw.py
View file @
14cb7dee
...
...
@@ -38,9 +38,9 @@ raw_file_from_path_parser = api.parser()
raw_file_from_path_parser
.
add_argument
(
**
raw_file_compress_argument
)
@
ns
.
route
(
'/<string:upload_
hash
>/<path:path>'
)
@
ns
.
route
(
'/<string:upload_
id
>/<path:path>'
)
@
api
.
doc
(
params
=
{
'upload_
hash
'
:
'The unique hash for the requested upload.'
,
'upload_
id
'
:
'The unique hash for the requested upload.'
,
'path'
:
'The path to a file or directory.'
})
@
api
.
header
(
'Content-Type'
,
'application/gz'
)
...
...
@@ -51,7 +51,7 @@ class RawFileFromPathResource(Resource):
@
api
.
response
(
200
,
'File(s) send'
,
headers
=
{
'Content-Type'
:
'application/gz'
})
@
api
.
expect
(
raw_file_from_path_parser
,
validate
=
True
)
@
login_if_available
def
get
(
self
,
upload_
hash
:
str
,
path
:
str
):
def
get
(
self
,
upload_
id
:
str
,
path
:
str
):
"""
Get a single raw calculation file or whole directory from a given upload.
...
...
@@ -63,9 +63,9 @@ class RawFileFromPathResource(Resource):
upload_filepath
=
path
upload_files
=
UploadFiles
.
get
(
upload_
hash
,
create_authorization_predicate
(
upload_
hash
))
upload_
id
,
create_authorization_predicate
(
upload_
id
))
if
upload_files
is
None
:
abort
(
404
,
message
=
'The upload with hash %s does not exist.'
%
upload_
hash
)
abort
(
404
,
message
=
'The upload with hash %s does not exist.'
%
upload_
id
)
if
upload_filepath
[
-
1
:]
==
'*'
:
upload_filepath
=
upload_filepath
[
0
:
-
1
]
...
...
@@ -74,7 +74,7 @@ class RawFileFromPathResource(Resource):
abort
(
404
,
message
=
'There are no files for %s.'
%
upload_filepath
)
else
:
compress
=
request
.
args
.
get
(
'compress'
,
None
)
is
not
None
return
respond_to_get_raw_files
(
upload_
hash
,
files
,
compress
)
return
respond_to_get_raw_files
(
upload_
id
,
files
,
compress
)
try
:
return
send_file
(
...
...
@@ -83,7 +83,7 @@ class RawFileFromPathResource(Resource):
as_attachment
=
True
,
attachment_filename
=
os
.
path
.
basename
(
upload_filepath
))
except
Restricted
:
abort
(
401
,
message
=
'Not authorized to access upload %s.'
%
upload_
hash
)
abort
(
401
,
message
=
'Not authorized to access upload %s.'
%
upload_
id
)
except
KeyError
:
files
=
list
(
file
for
file
in
upload_files
.
raw_file_manifest
(
upload_filepath
))
if
len
(
files
)
==
0
:
...
...
@@ -106,9 +106,9 @@ raw_files_request_parser.add_argument(
'files'
,
required
=
True
,
type
=
str
,
help
=
'Comma separated list of files to download.'
,
location
=
'args'
)
@
ns
.
route
(
'/<string:upload_
hash
>'
)
@
ns
.
route
(
'/<string:upload_
id
>'
)
@
api
.
doc
(
params
=
{
'upload_
hash
'
:
'The unique hash for the requested upload.'
'upload_
id
'
:
'The unique hash for the requested upload.'
})
class
RawFilesResource
(
Resource
):
@
api
.
doc
(
'get_files'
)
...
...
@@ -116,7 +116,7 @@ class RawFilesResource(Resource):
@
api
.
response
(
200
,
'File(s) send'
,
headers
=
{
'Content-Type'
:
'application/gz'
})
@
api
.
expect
(
raw_files_request_model
,
validate
=
True
)
@
login_if_available
def
post
(
self
,
upload_
hash
):
def
post
(
self
,
upload_
id
):
"""
Download multiple raw calculation files in a .zip file.
Zip files are streamed; instead of 401 errors, the zip file will just not contain
...
...
@@ -126,14 +126,14 @@ class RawFilesResource(Resource):
compress
=
json_data
.
get
(
'compress'
,
False
)
files
=
[
file
.
strip
()
for
file
in
json_data
[
'files'
]]
return
respond_to_get_raw_files
(
upload_
hash
,
files
,
compress
)
return
respond_to_get_raw_files
(
upload_
id
,
files
,
compress
)
@
api
.
doc
(
'get_files_alternate'
)
@
api
.
response
(
404
,
'The upload or path does not exist'
)
@
api
.
response
(
200
,
'File(s) send'
,
headers
=
{
'Content-Type'
:
'application/gz'
})
@
api
.
expect
(
raw_files_request_parser
,
validate
=
True
)
@
login_if_available
def
get
(
self
,
upload_
hash
):
def
get
(
self
,
upload_
id
):
"""
Download multiple raw calculation files.
Download multiple raw calculation files in a .zip file.
...
...
@@ -147,14 +147,14 @@ class RawFilesResource(Resource):
abort
(
400
,
message
=
"No files argument given."
)
files
=
[
file
.
strip
()
for
file
in
files_str
.
split
(
','
)]
return
respond_to_get_raw_files
(
upload_
hash
,
files
,
compress
)
return
respond_to_get_raw_files
(
upload_
id
,
files
,
compress
)
def
respond_to_get_raw_files
(
upload_
hash
,
files
,
compress
=
False
):
def
respond_to_get_raw_files
(
upload_
id
,
files
,
compress
=
False
):
upload_files
=
UploadFiles
.
get
(
upload_
hash
,
create_authorization_predicate
(
upload_
hash
))
upload_
id
,
create_authorization_predicate
(
upload_
id
))
if
upload_files
is
None
:
abort
(
404
,
message
=
'The upload with hash %s does not exist.'
%
upload_
hash
)
abort
(
404
,
message
=
'The upload with hash %s does not exist.'
%
upload_
id
)
def
generator
():
""" Stream a zip file with all files using zipstream. """
...
...
@@ -188,5 +188,5 @@ def respond_to_get_raw_files(upload_hash, files, compress=False):
yield
chunk
response
=
Response
(
stream_with_context
(
generator
()),
mimetype
=
'application/zip'
)
response
.
headers
[
'Content-Disposition'
]
=
'attachment; filename={}'
.
format
(
'%s.zip'
%
upload_
hash
)
response
.
headers
[
'Content-Disposition'
]
=
'attachment; filename={}'
.
format
(
'%s.zip'
%
upload_
id
)
return
response
nomad/api/repo.py
View file @
14cb7dee
...
...
@@ -35,19 +35,19 @@ class RepoCalcResource(Resource):
@
api
.
response
(
404
,
'The upload or calculation does not exist'
)
@
api
.
response
(
200
,
'Metadata send'
)
@
api
.
doc
(
'get_repo_calc'
)
def
get
(
self
,
upload_
hash
,
calc_hash
):
def
get
(
self
,
upload_
id
,
calc_hash
):
"""
Get calculation metadata in repository form.
Repository metadata only entails the quanties shown in the repository.
This is basically the elastic search index entry for the
requested calculations. Calcs are references via *upload_
hash
*, *calc_hash*
requested calculations. Calcs are references via *upload_
id
*, *calc_hash*
pairs.
"""
try
:
return
RepoCalc
.
get
(
id
=
'%s/%s'
%
(
upload_
hash
,
calc_hash
)).
json_dict
,
200
return
RepoCalc
.
get
(
id
=
'%s/%s'
%
(
upload_
id
,
calc_hash
)).
json_dict
,
200
except
NotFoundError
:
abort
(
404
,
message
=
'There is no calculation for %s/%s'
%
(
upload_
hash
,
calc_hash
))
abort
(
404
,
message
=
'There is no calculation for %s/%s'
%
(
upload_
id
,
calc_hash
))
except
Exception
as
e
:
abort
(
500
,
message
=
str
(
e
))
...
...
nomad/api/upload.py
View file @
14cb7dee
...
...
@@ -55,12 +55,7 @@ upload_model = api.inherit('UploadProcessing', proc_model, {
description
=
'The name of the upload. This can be provided during upload '
'using the name query parameter.'
),
'upload_id'
:
fields
.
String
(
description
=
'The unique id for the upload. Its a random uuid and '
'and used within nomad as long as no upload_hash is available.'
),
'upload_hash'
:
fields
.
String
(
description
=
'The unique upload hash. It is based on the uploaded content and '
'used within nomad to identify uploads.'
),
description
=
'The unique id for the upload.'
),
'additional_metadata'
:
fields
.
Arbitrary
,
'local_path'
:
fields
.
String
,
'upload_time'
:
fields
.
DateTime
(
dt_format
=
'iso8601'
),
...
...
nomad/client.py
View file @
14cb7dee
...
...
@@ -143,7 +143,7 @@ class CalcProcReproduction:
"""
def
__init__
(
self
,
archive_id
:
str
,
override
:
bool
=
False
)
->
None
:
self
.
calc_hash
=
utils
.
archive
.
calc_hash
(
archive_id
)
self
.
upload_
hash
=
utils
.
archive
.
upload_
hash
(
archive_id
)
self
.
upload_
id
=
utils
.
archive
.
upload_
id
(
archive_id
)
self
.
mainfile
=
None
self
.
parser
=
None
self
.
logger
=
utils
.
get_logger
(
__name__
,
archive_id
=
archive_id
)
...
...
@@ -156,7 +156,7 @@ class CalcProcReproduction:
# download with request, since bravado does not support streaming
# TODO currently only downloads mainfile
self
.
logger
.
info
(
'Downloading calc.'
)
req
=
requests
.
get
(
'%s/raw/%s/%s'
%
(
api_base
,
self
.
upload_
hash
,
os
.
path
.
dirname
(
self
.
mainfile
)),
stream
=
True
)
req
=
requests
.
get
(
'%s/raw/%s/%s'
%
(
api_base
,
self
.
upload_
id
,
os
.
path
.
dirname
(
self
.
mainfile
)),
stream
=
True
)
with
open
(
local_path
,
'wb'
)
as
f
:
for
chunk
in
req
.
iter_content
(
chunk_size
=
1024
):
f
.
write
(
chunk
)
...
...
nomad/coe_repo/upload.py
View file @
14cb7dee
...
...
@@ -89,7 +89,7 @@ class UploadMetaData:
class
Upload
(
Base
,
datamodel
.
Upload
):
# type: ignore
__tablename__
=
'uploads'
upload_id
=
Column
(
Integer
,
primary_key
=
True
,
autoincrement
=
True
)
coe_
upload_id
=
Column
(
'upload_id'
,
Integer
,
primary_key
=
True
,
autoincrement
=
True
)
upload_name
=
Column
(
String
)
user_id
=
Column
(
Integer
,
ForeignKey
(
'users.user_id'
))
is_processed
=
Column
(
Boolean
)
...
...
@@ -100,17 +100,17 @@ class Upload(Base, datamodel.Upload): # type: ignore
@
classmethod
def
load_from
(
cls
,
obj
):
return
Upload
.
from_upload_
hash
(
obj
.
upload_
hash
)
return
Upload
.
from_upload_
id
(
obj
.
upload_
id
)
@
staticmethod
def
from_upload_
hash
(
upload_
hash
)
->
'Upload'
:
def
from_upload_
id
(
upload_
id
)
->
'Upload'
:
repo_db
=
infrastructure
.
repository_db
uploads
=
repo_db
.
query
(
Upload
).
filter_by
(
upload_name
=
upload_
hash
)
uploads
=
repo_db
.
query
(
Upload
).
filter_by
(
upload_name
=
upload_
id
)
assert
uploads
.
count
()
<=
1
,
'Upload hash/name must be unique'
return
uploads
.
first
()
@
property
def
upload_
hash
(
self
):
def
upload_
id
(
self
):
return
self
.
upload_name
@
property
...
...
@@ -140,17 +140,14 @@ class Upload(Base, datamodel.Upload): # type: ignore
repo_db
=
infrastructure
.
repository_db
repo_db
.
begin
()
logger
=
utils
.
get_logger
(
__name__
,
upload_id
=
upload
.
upload_id
,
upload_hash
=
upload
.
upload_hash
)
logger
=
utils
.
get_logger
(
__name__
,
upload_id
=
upload
.
upload_id
)
result
=
None
try
:
# create upload
coe_upload
=
Upload
(
upload_name
=
upload
.
upload_
hash
,
upload_name
=
upload
.
upload_
id
,
created
=
meta_data
.
get
(
'_upload_time'
,
upload
.
upload_time
),
user
=
upload
.
uploader
,
is_processed
=
True
)
...
...
nomad/datamodel.py
View file @
14cb7dee
...
...
@@ -76,7 +76,6 @@ class Upload(Entity):
Attributes:
upload_id(str): The unique random id that each upload has
upload_hash(str): The hash/checksum that describes unique uploads
upload_time(datatime): The upload time
uploader(repo.User): The user that uploaded this upload
calcs(Iterable[Calc]): An iterable over the calculations of this upload
...
...
@@ -85,10 +84,6 @@ class Upload(Entity):
def
upload_id
(
self
)
->
str
:
return
'<not assigned>'
@
property
def
upload_hash
(
self
)
->
str
:
raise
NotImplementedError
@
property
def
upload_time
(
self
)
->
Type
[
datetime
.
datetime
]:
raise
NotImplementedError
...
...
nomad/files.py
View file @
14cb7dee
...
...
@@ -585,10 +585,6 @@ class StagingUploadFiles(UploadFiles):
return
utils
.
websave_hash
(
hash
.
digest
(),
utils
.
default_hash_len
)
def
upload_hash
(
self
)
->
str
:
""" Returns: A hash for the whole upload. It is only available if upload *is_bag*. """
pass
class
ArchiveBasedStagingUploadFiles
(
StagingUploadFiles
):
"""
...
...
nomad/processing/data.py
View file @
14cb7dee
...
...
@@ -100,15 +100,10 @@ class Calc(Proc, datamodel.Calc):
self
.
_upload_files
=
ArchiveBasedStagingUploadFiles
(
self
.
upload_id
,
is_authorized
=
lambda
:
True
,
local_path
=
self
.
upload
.
local_path
)
return
self
.
_upload_files
@
property
def
upload_hash
(
self
):
return
utils
.
archive
.
upload_hash
(
self
.
archive_id
)
def
get_logger
(
self
,
**
kwargs
):
logger
=
super
().
get_logger
()
logger
=
logger
.
bind
(
upload_id
=
self
.
upload_id
,
mainfile
=
self
.
mainfile
,
upload_hash
=
self
.
upload_hash
,
calc_hash
=
self
.
calc_hash
,
upload_id
=
self
.
upload_id
,
mainfile
=
self
.
mainfile
,
calc_hash
=
self
.
calc_hash
,
archive_id
=
self
.
archive_id
,
**
kwargs
)
return
logger
...
...
@@ -240,7 +235,7 @@ class Calc(Proc, datamodel.Calc):
def
archiving
(
self
):
logger
=
self
.
get_logger
()
upload_hash
,
calc_hash
=
self
.
archive_id
.
split
(
'/'
)
_
,
calc_hash
=
self
.
archive_id
.
split
(
'/'
)
additional
=
dict
(
mainfile
=
self
.
mainfile
,
upload_time
=
self
.
upload
.
upload_time
,
...
...
@@ -254,7 +249,6 @@ class Calc(Proc, datamodel.Calc):
repo_calc
=
RepoCalc
.
create_from_backend
(
self
.
_parser_backend
,
additional
=
additional
,
upload_hash
=
upload_hash
,
calc_hash
=
calc_hash
,
upload_id
=
self
.
upload_id
)
repo_calc
.
persist
()
...
...
@@ -292,7 +286,6 @@ class Upload(Chord, datamodel.Upload):
upload_id: the upload id generated by the database
is_private: true if the upload and its derivitaves are only visible to the uploader
upload_time: the timestamp when the system realised the upload
upload_hash: the hash of the uploaded file
user_id: the id of the user that created this upload
"""
id_field
=
'upload_id'
...
...
@@ -306,7 +299,6 @@ class Upload(Chord, datamodel.Upload):
is_private
=
BooleanField
(
default
=
False
)
upload_time
=
DateTimeField
()
upload_hash
=
StringField
(
default
=
None
)
user_id
=
StringField
(
required
=
True
)
...
...
@@ -316,7 +308,7 @@ class Upload(Chord, datamodel.Upload):
meta
:
Any
=
{
'indexes'
:
[
'upload_hash'
,
'user_id'
,
'status'
'user_id'
,
'status'
]
}
...
...
@@ -413,17 +405,10 @@ class Upload(Chord, datamodel.Upload):
logger
,
'upload extracted'
,
step
=
'extracting'
,
upload_size
=
self
.
upload_files
.
size
):
self
.
upload_files
.
extract
()
except
KeyError
as
e
:
except
KeyError
:
self
.
fail
(
'process request for non existing upload'
,
level
=
logging
.
ERROR
)
return
# create and save a hash for the upload
try
:
self
.
upload_hash
=
self
.
upload_id
# TODO self.upload_file.upload_hash()
except
Exception
as
e
:
self
.
fail
(
'could not create upload hash'
,
e
)
return
# check if the file was already uploaded and processed before
if
self
.
to
(
RepoUpload
).
exists
():
self
.
fail
(
'The same file was already uploaded and processed.'
,
level
=
logging
.
INFO
)
...
...
@@ -463,7 +448,7 @@ class Upload(Chord, datamodel.Upload):
total_calcs
=
0
for
filename
,
parser
in
self
.
match_mainfiles
():
calc
=
Calc
.
create
(
archive_id
=
'%s/%s'
%
(
self
.
upload_
hash
,
utils
.
hash
(
filename
)),
archive_id
=
'%s/%s'
%
(
self
.
upload_
id
,
utils
.
hash
(
filename
)),
mainfile
=
filename
,
parser
=
parser
.
name
,
upload_id
=
self
.
upload_id
)
...
...
nomad/repo.py
View file @
14cb7dee
...
...
@@ -45,13 +45,12 @@ class AlreadyExists(Exception): pass
class
RepoUpload
(
datamodel
.
Entity
):
def
__init__
(
self
,
upload_id
,
upload_hash
):
def
__init__
(
self
,
upload_id
):
self
.
upload_id
=
upload_id
self
.
upload_hash
=
upload_hash
@
classmethod
def
load_from
(
cls
,
obj
):
return
RepoUpload
(
obj
.
upload_id
,
obj
.
upload_hash
)
return
RepoUpload
(
obj
.
upload_id
)
@
property
def
calcs
(
self
):
...
...
@@ -67,7 +66,7 @@ class RepoUpload(datamodel.Entity):
""" Returns true if there are already calcs from the given upload. """
# TODO this is deprecated and should be varyfied via repository files
search
=
Search
(
using
=
infrastructure
.
elastic_client
,
index
=
config
.
elastic
.
index_name
)
\
.
query
(
'match'
,
upload_
hash
=
self
.
upload_
hash
)
\
.
query
(
'match'
,
upload_
id
=
self
.
upload_
id
)
\
.
execute
()
return
len
(
search
)
>
0
...
...
@@ -91,7 +90,6 @@ class RepoCalc(ElasticDocument, datamodel.Entity):
calc_hash
=
Keyword
()
mainfile
=
Keyword
()
upload_hash
=
Keyword
()
upload_id
=
Keyword
()
upload_time
=
Date
()
...
...
@@ -116,38 +114,37 @@ class RepoCalc(ElasticDocument, datamodel.Entity):
@
property
def
upload
(
self
):
return
RepoUpload
(
self
.
upload_id
,
self
.
upload_hash
)
return
RepoUpload
(
self
.
upload_id
)
@
property
def
archive_id
(
self
)
->
str
:
""" The unique id for this calculation. """
return
'%s/%s'
%
(
self
.
upload_
hash
,
self
.
calc_hash
)
return
'%s/%s'
%
(
self
.
upload_
id
,
self
.
calc_hash
)
@
classmethod
def
create_from_backend
(
cls
,
backend
:
LocalBackend
,
additional
:
Dict
[
str
,
Any
],
upload_id
:
str
,
upload_hash
:
str
,
calc_hash
:
str
)
->
'RepoCalc'
:
upload_id
:
str
,
calc_hash
:
str
)
->
'RepoCalc'
:
"""
Create a new calculation instance in elastic search. The data from the given backend
will be used. Additional meta-data can be given as *kwargs*.
``upload_id``,
``upload_
hash
``
,
and ``calc_hash`` are mandatory.
will be used. Additional meta-data can be given as *kwargs*.
``upload_
id
`` and ``calc_hash`` are mandatory.
Arguments:
backend: The parsing/normalizing backend that contains the calculation data.
additional: Additional arguments not stored in the backend. E.g. ``user_id``,
``staging``, ``restricted``
upload_hash: The upload hash of the originating upload.
upload_id: The upload id of the originating upload.
calc_hash: The upload unique hash for this calculation.
Returns:
The created instance.
"""
assert
upload_hash
is
not
None
and
calc_hash
is
not
None
and
upload_id
is
not
None
additional
.
update
(
dict
(
upload_hash
=
upload_hash
,
calc_hash
=
calc_hash
,
upload_id
=
upload_id
))
assert
calc_hash
is
not
None
and
upload_id
is
not
None
additional
.
update
(
dict
(
calc_hash
=
calc_hash
,
upload_id
=
upload_id
))
# prepare the entry with all necessary properties from the backend
calc
=
cls
(
meta
=
dict
(
id
=
'%s/%s'
%
(
upload_
hash
,
calc_hash
)))
calc
=
cls
(
meta
=
dict
(
id
=
'%s/%s'
%
(
upload_
id
,
calc_hash
)))
for
property
in
cls
.
_doc_type
.
mapping
:
mapped_property
=
key_mappings
.
get
(
property
,
property
)
...
...
@@ -165,7 +162,7 @@ class RepoCalc(ElasticDocument, datamodel.Entity):
program_name
=
'unknown'
logger
.
warning
(
'Missing property value'
,
property
=
mapped_property
,
upload_id
=
upload_id
,
upload_hash
=
upload_hash
,
calc_hash
=
calc_hash
,
code
=
program_name
)
calc_hash
=
calc_hash
,
code
=
program_name
)
continue
setattr
(
calc
,
property
,
value
)
...
...
nomad/utils.py
View file @
14cb7dee
...
...
@@ -98,7 +98,7 @@ class LogstashFormatter(logstash.formatter.LogstashFormatterBase):
if
key
in
(
'event'
,
'stack_info'
,
'id'
,
'timestamp'
):
continue
elif
key
in
(
'upload_hash'
,
'archive_id'
,
'upload_id'
,
'calc_hash'
,
'mainfile'
,
'archive_id'
,
'upload_id'
,
'calc_hash'
,
'mainfile'
,
'service'
,
'release'
):
key
=
'nomad.%s'
%
key
else
:
...
...
@@ -258,8 +258,8 @@ def timer(logger, event, method='info', **kwargs):
class
archive
:
@
staticmethod
def
create
(
upload_
hash
:
str
,
calc_hash
:
str
)
->
str
:
return
'%s/%s'
%
(
upload_
hash
,
calc_hash
)
def
create
(
upload_
id
:
str
,
calc_hash
:
str
)
->
str
:
return
'%s/%s'
%
(
upload_
id
,
calc_hash
)
@
staticmethod
def
items
(
archive_id
:
str
)
->
List
[
str
]:
...
...
@@ -274,5 +274,5 @@ class archive:
return
archive
.
item
(
archive_id
,
1
)
@
staticmethod
def
upload_
hash
(
archive_id
:
str
)
->
str
:
def
upload_
id
(
archive_id
:
str
)
->
str
:
return
archive
.
item
(
archive_id
,
0
)
tests/conftest.py
View file @
14cb7dee
...
...
@@ -144,26 +144,22 @@ def admin_user(repository_db):
@
pytest
.
fixture
(
scope
=
'function'
)
def
mocksearch
(
monkeypatch
):
uploads_by_hash
=
{}
uploads_by_id
=
{}
by_archive_id
=
{}
def
persist
(
calc
):
uploads_by_hash
.
setdefault
(
calc
.
upload_hash
,
[]).
append
(
calc
)
uploads_by_id
.
setdefault
(
calc
.
upload_id
,
[]).
append
(
calc
)
by_archive_id
[
calc
.
archive_id
]
=
calc
def
upload_exists
(
self
):
return
self
.
upload_
hash
in
uploads_by_
hash
return
self
.
upload_
id
in
uploads_by_
id
def
upload_delete
(
self
):
upload_id
=
self
.
upload_id
if
upload_id
in
uploads_by_id
:
for
calc
in
uploads_by_id
[
upload_id
]:
del
(
by_archive_id
[
calc
.
archive_id
])
upload_hash
=
uploads_by_id
[
upload_id
][
0
].
upload_hash
del
(
uploads_by_id
[
upload_id
])
del
(
uploads_by_hash
[
upload_hash
])
@
property
def
upload_calcs
(
self
):
...
...
tests/processing/test_data.py
View file @
14cb7dee
...
...
@@ -85,7 +85,7 @@ def processed_upload(uploaded_id, test_user, worker, no_warn) -> Upload:
def
assert_processing
(
upload
:
Upload
,
mocksearch
=
None
):
assert
upload
.
completed
assert
upload
.
current_task
==
'cleanup'
assert
upload
.
upload_
hash
is
not
None
assert
upload
.
upload_
id
is
not
None