Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
nomad-lab
nomad-FAIR
Commits
6eb62cd8
Commit
6eb62cd8
authored
Dec 09, 2019
by
Markus Scheidgen
Browse files
Dataset edit related checks and improvements.
parent
e26b46a4
Pipeline
#65183
passed with stages
in 19 minutes and 12 seconds
Changes
7
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
gui/src/components/search/EntryList.js
View file @
6eb62cd8
...
...
@@ -301,7 +301,7 @@ export class EntryListUnstyled extends React.Component {
{
moreActions
}
<
/React.Fragment
>
const
selectActions
=
createActions
({
query
:
selectQuery
,
buttonProps
:
{
color
:
'
secondary
'
}})
const
allActions
=
createActions
({
query
:
query
},
actions
)
const
allActions
=
actions
return
(
<
div
className
=
{
classes
.
root
}
>
...
...
gui/src/components/search/SearchContext.js
View file @
6eb62cd8
...
...
@@ -21,7 +21,11 @@ class SearchContext extends React.Component {
}
static
emptyResponse
=
{
statistics
:
{}
statistics
:
{
total
:
{
all
:
{}
}
}
}
static
type
=
React
.
createContext
()
...
...
nomad/app/api/dataset.py
View file @
6eb62cd8
...
...
@@ -16,7 +16,7 @@ from flask import request, g
from
flask_restplus
import
Resource
,
fields
,
abort
import
re
from
nomad
import
utils
from
nomad
import
utils
,
processing
as
proc
from
nomad.app.utils
import
with_logger
from
nomad.datamodel
import
Dataset
from
nomad.metainfo.flask_restplus
import
generate_flask_restplus_model
...
...
@@ -136,6 +136,15 @@ class DatasetResource(Resource):
if
result
.
doi
is
not
None
:
abort
(
400
,
'Dataset with name %s already has a DOI'
%
name
)
# check if the DOI can be created
published_values
=
proc
.
Calc
.
objects
(
metadata__datasets
=
result
.
dataset_id
).
distinct
(
'metadata.published'
)
if
False
in
published_values
:
abort
(
400
,
'Dataset must not contain non published entries.'
)
if
True
not
in
published_values
:
abort
(
400
,
'Dataset must not be empty.'
)
# set the DOI
doi
=
DOI
.
create
(
title
=
'NOMAD dataset: %s'
%
result
.
name
,
user
=
g
.
user
)
doi
.
create_draft
()
...
...
@@ -161,7 +170,7 @@ class DatasetResource(Resource):
@
authenticate
(
required
=
True
)
@
with_logger
def
delete
(
self
,
name
:
str
,
logger
):
"""
Assign a DOI to
the dataset. """
"""
Delete
the dataset. """
try
:
result
=
Dataset
.
m_def
.
m_x
(
'me'
).
get
(
user_id
=
g
.
user
.
user_id
,
name
=
name
)
except
KeyError
:
...
...
nomad/app/api/repo.py
View file @
6eb62cd8
...
...
@@ -20,6 +20,7 @@ meta-data.
from
typing
import
List
,
Dict
,
Any
from
flask_restplus
import
Resource
,
abort
,
fields
from
flask
import
request
,
g
from
elasticsearch_dsl
import
Q
from
elasticsearch.exceptions
import
NotFoundError
import
elasticsearch.helpers
...
...
@@ -344,43 +345,48 @@ repo_edit_model = api.model('RepoEdit', {
quantity
.
name
:
repo_edit_action_field
(
quantity
)
for
quantity
in
UserMetadata
.
m_def
.
all_quantities
.
values
()
}),
skip_none
=
True
,
description
=
'Each action specifies a single value (even for multi valued quantities).'
)
description
=
'Each action specifies a single value (even for multi valued quantities).'
),
'success'
:
fields
.
Boolean
(
description
=
'If the overall edit can/could be done. Only in API response.'
),
'message'
:
fields
.
String
(
description
=
'A message that details the overall edit result. Only in API response.'
)
})
def
edit
(
parsed_query
:
Dict
[
str
,
Any
],
logger
,
mongo_update
:
Dict
[
str
,
Any
]
=
None
,
re_index
=
True
)
->
List
[
str
]:
# get all calculations that have to change
search_request
=
search
.
SearchRequest
()
add_query
(
search_request
,
parsed_query
)
upload_ids
=
set
()
calc_ids
=
[]
for
hit
in
search_request
.
execute_scan
():
calc_ids
.
append
(
hit
[
'calc_id'
])
upload_ids
.
add
(
hit
[
'upload_id'
])
with
utils
.
timer
(
logger
,
'edit query executed'
):
search_request
=
search
.
SearchRequest
()
add_query
(
search_request
,
parsed_query
)
upload_ids
=
set
()
calc_ids
=
[]
for
hit
in
search_request
.
execute_scan
():
calc_ids
.
append
(
hit
[
'calc_id'
])
upload_ids
.
add
(
hit
[
'upload_id'
])
# perform the update on the mongo db
if
mongo_update
is
not
None
:
n_updated
=
proc
.
Calc
.
objects
(
calc_id__in
=
calc_ids
).
update
(
multi
=
True
,
**
mongo_update
)
if
n_updated
!=
len
(
calc_ids
):
logger
.
error
(
'edit repo did not update all entries'
,
payload
=
mongo_update
)
with
utils
.
timer
(
logger
,
'edit mongo update executed'
,
size
=
len
(
calc_ids
)):
if
mongo_update
is
not
None
:
n_updated
=
proc
.
Calc
.
objects
(
calc_id__in
=
calc_ids
).
update
(
multi
=
True
,
**
mongo_update
)
if
n_updated
!=
len
(
calc_ids
):
logger
.
error
(
'edit repo did not update all entries'
,
payload
=
mongo_update
)
# re-index the affected entries in elastic search
if
re_index
:
def
elastic_updates
():
for
calc
in
proc
.
Calc
.
objects
(
calc_id__in
=
calc_ids
):
entry
=
search
.
Entry
.
from_calc_with_metadata
(
datamodel
.
CalcWithMetadata
(
**
calc
[
'metadata'
]))
entry
=
entry
.
to_dict
(
include_meta
=
True
)
entry
[
'_op_type'
]
=
'index'
yield
entry
_
,
failed
=
elasticsearch
.
helpers
.
bulk
(
infrastructure
.
elastic_client
,
elastic_updates
(),
stats_only
=
True
)
search
.
refresh
()
if
failed
>
0
:
logger
.
error
(
'edit repo with failed elastic updates'
,
payload
=
mongo_update
,
nfailed
=
len
(
failed
))
with
utils
.
timer
(
logger
,
'edit elastic update executed'
,
size
=
len
(
calc_ids
)):
if
re_index
:
def
elastic_updates
():
for
calc
in
proc
.
Calc
.
objects
(
calc_id__in
=
calc_ids
):
entry
=
search
.
Entry
.
from_calc_with_metadata
(
datamodel
.
CalcWithMetadata
(
**
calc
[
'metadata'
]))
entry
=
entry
.
to_dict
(
include_meta
=
True
)
entry
[
'_op_type'
]
=
'index'
yield
entry
_
,
failed
=
elasticsearch
.
helpers
.
bulk
(
infrastructure
.
elastic_client
,
elastic_updates
(),
stats_only
=
True
)
search
.
refresh
()
if
failed
>
0
:
logger
.
error
(
'edit repo with failed elastic updates'
,
payload
=
mongo_update
,
nfailed
=
len
(
failed
))
return
list
(
upload_ids
)
...
...
@@ -431,91 +437,127 @@ class EditRepoCalcsResource(Resource):
parsed_query
[
'owner'
]
=
owner
# checking the edit actions and preparing a mongo update on the fly
json_data
[
'success'
]
=
True
mongo_update
=
{}
uploader_ids
=
None
lift_embargo
=
False
for
action_quantity_name
,
quantity_actions
in
actions
.
items
():
quantity
=
UserMetadata
.
m_def
.
all_quantities
.
get
(
action_quantity_name
)
if
quantity
is
None
:
abort
(
400
,
'Unknown quantity %s'
%
action_quantity_name
)
quantity_flask
=
quantity
.
m_x
(
'flask'
,
{})
if
quantity_flask
.
get
(
'admin_only'
,
False
):
if
not
g
.
user
.
is_admin
():
abort
(
404
,
'Only the admin user can set %s'
%
quantity
.
name
)
if
isinstance
(
quantity_actions
,
list
)
==
quantity
.
is_scalar
:
abort
(
400
,
'Wrong shape for quantity %s'
%
action_quantity_name
)
if
not
isinstance
(
quantity_actions
,
list
):
quantity_actions
=
[
quantity_actions
]
flask_verify
=
quantity_flask
.
get
(
'verify'
,
None
)
mongo_key
=
'metadata__%s'
%
quantity
.
name
has_error
=
False
for
action
in
quantity_actions
:
action
[
'success'
]
=
True
action
[
'message'
]
=
None
action_value
=
action
.
get
(
'value'
)
action_value
=
action_value
if
action_value
is
None
else
action_value
.
strip
()
if
action_value
is
None
:
mongo_value
=
None
elif
action_value
==
''
:
mongo_value
=
None
elif
flask_verify
==
datamodel
.
User
:
try
:
mongo_value
=
User
.
get
(
user_id
=
action_value
).
user_id
except
KeyError
:
action
[
'success'
]
=
False
has_error
=
True
action
[
'message'
]
=
'User does not exist'
continue
removed_datasets
=
None
with
utils
.
timer
(
logger
,
'edit verified'
):
for
action_quantity_name
,
quantity_actions
in
actions
.
items
():
quantity
=
UserMetadata
.
m_def
.
all_quantities
.
get
(
action_quantity_name
)
if
quantity
is
None
:
abort
(
400
,
'Unknown quantity %s'
%
action_quantity_name
)
quantity_flask
=
quantity
.
m_x
(
'flask'
,
{})
if
quantity_flask
.
get
(
'admin_only'
,
False
):
if
not
g
.
user
.
is_admin
():
abort
(
404
,
'Only the admin user can set %s'
%
quantity
.
name
)
if
isinstance
(
quantity_actions
,
list
)
==
quantity
.
is_scalar
:
abort
(
400
,
'Wrong shape for quantity %s'
%
action_quantity_name
)
if
not
isinstance
(
quantity_actions
,
list
):
quantity_actions
=
[
quantity_actions
]
flask_verify
=
quantity_flask
.
get
(
'verify'
,
None
)
mongo_key
=
'metadata__%s'
%
quantity
.
name
has_error
=
False
for
action
in
quantity_actions
:
action
[
'success'
]
=
True
action
[
'message'
]
=
None
action_value
=
action
.
get
(
'value'
)
action_value
=
action_value
if
action_value
is
None
else
action_value
.
strip
()
if
action_value
is
None
:
mongo_value
=
None
if
uploader_ids
is
None
:
uploader_ids
=
get_uploader_ids
(
parsed_query
)
if
action_value
in
uploader_ids
:
action
[
'success'
]
=
False
has_error
=
True
action
[
'message'
]
=
'This user is already an uploader of one entry in the query'
continue
elif
flask_verify
==
datamodel
.
Dataset
:
try
:
mongo_value
=
Dataset
.
m_def
.
m_x
(
'me'
).
get
(
user_id
=
g
.
user
.
user_id
,
name
=
action_value
).
dataset_id
except
KeyError
:
action
[
'message'
]
=
'Dataset does not exist and will be created'
elif
action_value
==
''
:
mongo_value
=
None
if
not
verify
:
dataset
=
Dataset
(
dataset_id
=
utils
.
create_uuid
(),
user_id
=
g
.
user
.
user_id
,
name
=
action_value
)
dataset
.
m_x
(
'me'
).
create
()
mongo_value
=
dataset
.
dataset_id
elif
action_quantity_name
==
'with_embargo'
:
# ignore the actual value ... just lift the embargo
mongo_value
=
False
lift_embargo
=
True
else
:
mongo_value
=
action_value
if
len
(
quantity
.
shape
)
==
0
:
mongo_update
[
mongo_key
]
=
mongo_value
else
:
mongo_values
=
mongo_update
.
setdefault
(
mongo_key
,
[])
if
mongo_value
is
not
None
:
if
mongo_value
in
mongo_values
:
elif
flask_verify
==
datamodel
.
User
:
try
:
mongo_value
=
User
.
get
(
user_id
=
action_value
).
user_id
except
KeyError
:
action
[
'success'
]
=
False
has_error
=
True
action
[
'message'
]
=
'
Duplicate values are not allowed
'
action
[
'message'
]
=
'
User does not exist
'
continue
mongo_values
.
append
(
mongo_value
)
if
len
(
quantity_actions
)
==
0
and
len
(
quantity
.
shape
)
>
0
:
mongo_update
[
mongo_key
]
=
[]
if
uploader_ids
is
None
:
uploader_ids
=
get_uploader_ids
(
parsed_query
)
if
action_value
in
uploader_ids
:
action
[
'success'
]
=
False
has_error
=
True
action
[
'message'
]
=
'This user is already an uploader of one entry in the query'
continue
elif
flask_verify
==
datamodel
.
Dataset
:
try
:
mongo_value
=
Dataset
.
m_def
.
m_x
(
'me'
).
get
(
user_id
=
g
.
user
.
user_id
,
name
=
action_value
).
dataset_id
except
KeyError
:
action
[
'message'
]
=
'Dataset does not exist and will be created'
mongo_value
=
None
if
not
verify
:
dataset
=
Dataset
(
dataset_id
=
utils
.
create_uuid
(),
user_id
=
g
.
user
.
user_id
,
name
=
action_value
)
dataset
.
m_x
(
'me'
).
create
()
mongo_value
=
dataset
.
dataset_id
elif
action_quantity_name
==
'with_embargo'
:
# ignore the actual value ... just lift the embargo
mongo_value
=
False
lift_embargo
=
True
# check if necessary
search_request
=
search
.
SearchRequest
()
add_query
(
search_request
,
parsed_query
)
search_request
.
q
=
search_request
.
q
&
Q
(
'term'
,
with_embargo
=
True
)
if
search_request
.
execute
()[
'total'
]
==
0
:
action
[
'success'
]
=
False
has_error
=
True
action
[
'message'
]
=
'There is no embargo to lift'
continue
else
:
mongo_value
=
action_value
if
len
(
quantity
.
shape
)
==
0
:
mongo_update
[
mongo_key
]
=
mongo_value
else
:
mongo_values
=
mongo_update
.
setdefault
(
mongo_key
,
[])
if
mongo_value
is
not
None
:
if
mongo_value
in
mongo_values
:
action
[
'success'
]
=
False
has_error
=
True
action
[
'message'
]
=
'Duplicate values are not allowed'
continue
mongo_values
.
append
(
mongo_value
)
if
len
(
quantity_actions
)
==
0
and
len
(
quantity
.
shape
)
>
0
:
mongo_update
[
mongo_key
]
=
[]
if
action_quantity_name
==
'datasets'
:
# check if datasets edit is allowed and if datasets have to be removed
search_request
=
search
.
SearchRequest
()
add_query
(
search_request
,
parsed_query
)
search_request
.
quantity
(
name
=
'dataset_id'
)
old_datasets
=
list
(
search_request
.
execute
()[
'quantities'
][
'dataset_id'
][
'values'
].
keys
())
removed_datasets
=
[]
for
dataset_id
in
old_datasets
:
if
dataset_id
not
in
mongo_update
.
get
(
mongo_key
,
[]):
removed_datasets
.
append
(
dataset_id
)
doi_ds
=
Dataset
.
m_def
.
m_x
(
'me'
).
objects
(
dataset_id__in
=
removed_datasets
,
doi__ne
=
None
).
first
()
if
doi_ds
is
not
None
:
json_data
[
'success'
]
=
False
json_data
[
'message'
]
=
json_data
.
get
(
'message'
,
''
)
+
\
'Edit would remove entries from a dataset with DOI (%s) '
%
doi_ds
.
name
has_error
=
True
# stop here, if client just wants to verify its actions
if
verify
:
...
...
@@ -534,6 +576,10 @@ class EditRepoCalcsResource(Resource):
upload
=
proc
.
Upload
.
get
(
upload_id
)
upload
.
re_pack
()
# remove old datasets
if
removed_datasets
is
not
None
:
Dataset
.
m_def
.
m_x
(
'me'
).
objects
(
dataset_id__in
=
removed_datasets
).
delete
()
return
json_data
,
200
...
...
nomad/processing/data.py
View file @
6eb62cd8
...
...
@@ -74,7 +74,9 @@ class Calc(Proc):
(
'upload_id'
,
'parser'
),
(
'upload_id'
,
'tasks_status'
),
(
'upload_id'
,
'process_status'
),
(
'upload_id'
,
'metadata.nomad_version'
)
(
'upload_id'
,
'metadata.nomad_version'
),
'metadata.published'
,
'metadata.datasets'
]
}
...
...
tests/app/test_api.py
View file @
6eb62cd8
...
...
@@ -36,6 +36,7 @@ from tests.test_files import example_file, example_file_mainfile, example_file_c
from
tests.test_files
import
create_staging_upload
,
create_public_upload
,
assert_upload_files
from
tests.test_search
import
assert_search_upload
from
tests.processing
import
test_data
as
test_processing
from
tests.utils
import
assert_exception
from
tests.app.test_app
import
BlueprintClient
...
...
@@ -1189,6 +1190,28 @@ class TestEditRepo():
self
.
assert_edit
(
rv
,
quantity
=
'datasets'
,
success
=
True
,
message
=
False
)
assert
self
.
mongo
(
1
,
datasets
=
[
self
.
example_dataset
.
dataset_id
])
def
test_edit_ds_remove_doi
(
self
):
rv
=
self
.
perform_edit
(
datasets
=
[
self
.
example_dataset
.
name
],
query
=
dict
(
upload_id
=
'upload_1'
))
assert
rv
.
status_code
==
200
rv
=
self
.
api
.
post
(
'/datasets/%s'
%
self
.
example_dataset
.
name
,
headers
=
self
.
test_user_auth
)
assert
rv
.
status_code
==
200
rv
=
self
.
perform_edit
(
datasets
=
[],
query
=
dict
(
upload_id
=
'upload_1'
))
assert
rv
.
status_code
==
400
data
=
json
.
loads
(
rv
.
data
)
assert
not
data
[
'success'
]
assert
self
.
example_dataset
.
name
in
data
[
'message'
]
assert
Dataset
.
m_def
.
m_x
(
'me'
).
get
(
dataset_id
=
self
.
example_dataset
.
dataset_id
)
is
not
None
def
test_edit_ds_remove
(
self
):
rv
=
self
.
perform_edit
(
datasets
=
[
self
.
example_dataset
.
name
],
query
=
dict
(
upload_id
=
'upload_1'
))
assert
rv
.
status_code
==
200
rv
=
self
.
perform_edit
(
datasets
=
[],
query
=
dict
(
upload_id
=
'upload_1'
))
assert
rv
.
status_code
==
200
with
assert_exception
(
KeyError
):
assert
Dataset
.
m_def
.
m_x
(
'me'
).
get
(
dataset_id
=
self
.
example_dataset
.
dataset_id
)
is
None
def
test_edit_ds_user_namespace
(
self
,
test_user
):
assert
Dataset
.
m_def
.
m_x
(
'me'
).
objects
(
name
=
self
.
other_example_dataset
.
name
).
first
()
is
not
None
...
...
@@ -1246,6 +1269,24 @@ def test_edit_lift_embargo(api, published, other_test_user_auth):
f
.
read
()
@
pytest
.
mark
.
timeout
(
config
.
tests
.
default_timeout
)
def
test_edit_lift_embargo_unnecessary
(
api
,
published_wo_user_metadata
,
other_test_user_auth
):
example_calc
=
Calc
.
objects
(
upload_id
=
published_wo_user_metadata
.
upload_id
).
first
()
assert
not
example_calc
.
metadata
[
'with_embargo'
]
rv
=
api
.
post
(
'/repo/edit'
,
headers
=
other_test_user_auth
,
content_type
=
'application/json'
,
data
=
json
.
dumps
({
'actions'
:
{
'with_embargo'
:
{
'value'
:
'lift'
}
}
}))
assert
rv
.
status_code
==
400
data
=
json
.
loads
(
rv
.
data
)
assert
not
data
[
'actions'
][
'with_embargo'
][
'success'
]
class
TestRaw
(
UploadFilesBasedTests
):
def
assert_zip_file
(
self
,
rv
,
files
:
int
=
-
1
,
basename
:
bool
=
None
):
...
...
@@ -1607,7 +1648,20 @@ class TestDataset:
self
.
assert_dataset
(
data
,
name
=
'ds1'
,
doi
=
True
)
self
.
assert_dataset_entry
(
api
,
'1'
,
True
,
True
,
headers
=
test_user_auth
)
def
test_resolve_doi
(
self
,
api
,
example_dataset_with_entry
):
def
test_assign_doi_empty
(
self
,
api
,
test_user_auth
,
example_datasets
):
rv
=
api
.
post
(
'/datasets/ds1'
,
headers
=
test_user_auth
)
assert
rv
.
status_code
==
400
def
test_assign_doi_unpublished
(
self
,
api
,
test_user_auth
,
example_datasets
):
calc
=
CalcWithMetadata
(
calc_id
=
'1'
,
upload_id
=
'1'
,
published
=
False
,
with_embargo
=
False
,
datasets
=
[
'1'
])
Calc
(
calc_id
=
'1'
,
upload_id
=
'1'
,
create_time
=
datetime
.
datetime
.
now
(),
metadata
=
calc
.
to_dict
()).
save
()
rv
=
api
.
post
(
'/datasets/ds1'
,
headers
=
test_user_auth
)
assert
rv
.
status_code
==
400
def
test_resolve_doi
(
self
,
api
,
example_datasets
):
rv
=
api
.
get
(
'/datasets/doi/test_doi'
)
assert
rv
.
status_code
==
200
data
=
json
.
loads
(
rv
.
data
)
...
...
tests/conftest.py
View file @
6eb62cd8
...
...
@@ -581,6 +581,21 @@ def published(non_empty_processed: processing.Upload, example_user_metadata) ->
return
non_empty_processed
@
pytest
.
mark
.
timeout
(
config
.
tests
.
default_timeout
)
@
pytest
.
fixture
(
scope
=
'function'
)
def
published_wo_user_metadata
(
non_empty_processed
:
processing
.
Upload
)
->
processing
.
Upload
:
"""
Provides a processed upload. Upload was uploaded with test_user.
"""
non_empty_processed
.
publish_upload
()
try
:
non_empty_processed
.
block_until_complete
(
interval
=
.
01
)
except
Exception
:
pass
return
non_empty_processed
@
pytest
.
fixture
def
reset_config
():
""" Fixture that resets the log-level after test. """
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment