Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
nomad-lab
nomad-FAIR
Commits
14329873
Commit
14329873
authored
Feb 27, 2020
by
Markus Scheidgen
Browse files
Merge branch 'v0.7.10' into v0.8.0<
parents
2b83f245
22b09bb2
Changes
63
Hide whitespace changes
Inline
Side-by-side
gui/src/components/search/EntryList.js
View file @
14329873
...
...
@@ -12,6 +12,7 @@ import EditUserMetadataDialog from '../EditUserMetadataDialog'
import
DownloadButton
from
'
../DownloadButton
'
import
PublishedIcon
from
'
@material-ui/icons/Public
'
import
PrivateIcon
from
'
@material-ui/icons/AccountCircle
'
import
{
withApi
}
from
'
../api
'
export
function
Published
(
props
)
{
const
{
entry
}
=
props
...
...
@@ -271,21 +272,33 @@ export class EntryListUnstyled extends React.Component {
<
/Quantity
>
<
/div
>
<
/div
>
<
div
className
=
{
classes
.
entryDetailsActions
}
>
<
Button
color
=
"
primary
"
onClick
=
{
event
=>
this
.
handleViewEntryPage
(
event
,
row
)}
>
Show
raw
files
and
archive
<
/Button
>
{
this
.
showEntryActions
(
row
)
&&
<
Button
color
=
"
primary
"
onClick
=
{
event
=>
this
.
handleViewEntryPage
(
event
,
row
)}
>
Show
raw
files
and
archive
<
/Button
>
}
<
/div
>
<
/div>
)
}
showEntryActions
(
row
)
{
const
{
user
}
=
this
.
props
if
(
row
.
with_embargo
&&
!
(
user
&&
row
.
owners
.
find
(
owner
=>
owner
.
user_id
===
user
.
sub
)))
{
return
false
}
else
{
return
!
this
.
props
.
showEntryActions
||
this
.
props
.
showEntryActions
(
row
)
}
}
handleViewEntryPage
(
event
,
row
)
{
event
.
stopPropagation
()
this
.
props
.
history
.
push
(
`/entry/id/
${
row
.
upload_id
}
/
${
row
.
calc_id
}
`
)
}
renderEntryActions
(
row
,
selected
)
{
if
(
!
this
.
props
.
showEntryActions
||
this
.
props
.
showEntryActions
(
row
))
{
if
(
this
.
showEntryActions
(
row
))
{
return
<
Tooltip
title
=
"
Show raw files and archive
"
>
<
IconButton
style
=
{
selected
?
{
color
:
'
white
'
}
:
null
}
onClick
=
{
event
=>
this
.
handleViewEntryPage
(
event
,
row
)}
>
<
DetailsIcon
/>
...
...
@@ -310,8 +323,7 @@ export class EntryListUnstyled extends React.Component {
}
const
defaultSelectedColumns
=
this
.
props
.
selectedColumns
||
[
...
domain
.
defaultSearchResultColumns
,
'
authors
'
]
...
domain
.
defaultSearchResultColumns
,
'
authors
'
]
const
pagination
=
<
TablePagination
count
=
{
totalNumber
}
...
...
@@ -365,6 +377,6 @@ export class EntryListUnstyled extends React.Component {
}
}
const
EntryList
=
compose
(
withRouter
,
withDomain
,
withStyles
(
EntryListUnstyled
.
styles
))(
EntryListUnstyled
)
const
EntryList
=
compose
(
withRouter
,
withDomain
,
withApi
(
false
),
withStyles
(
EntryListUnstyled
.
styles
))(
EntryListUnstyled
)
export
default
EntryList
gui/src/components/search/Search.js
View file @
14329873
...
...
@@ -371,6 +371,7 @@ class VisualizationSelect extends React.Component {
class
OwnerSelect
extends
React
.
Component
{
static
ownerLabel
=
{
all
:
'
All entries
'
,
visible
:
'
Include your private entries
'
,
public
:
'
Only public entries
'
,
user
:
'
Only your entries
'
,
staging
:
'
Staging area only
'
...
...
@@ -378,7 +379,8 @@ class OwnerSelect extends React.Component {
static
ownerTooltips
=
{
all
:
'
This will show all entries in the database.
'
,
public
:
'
Do not show entries that are only visible to you.
'
,
visible
:
'
Do also show entries that are only visible to you.
'
,
public
:
'
Do not entries with embargo.
'
,
user
:
'
Do only show entries visible to you.
'
,
staging
:
'
Will only show entries that you uploaded, but not yet published.
'
}
...
...
gui/src/components/search/SearchPage.js
View file @
14329873
...
...
@@ -75,7 +75,7 @@ class SearchPage extends React.Component {
const
{
classes
,
user
,
location
,
update
}
=
this
.
props
let
query
=
{
owner
:
'
all
'
owner
:
'
public
'
}
if
(
location
&&
location
.
search
)
{
query
=
{
...
...
@@ -91,7 +91,7 @@ class SearchPage extends React.Component {
<
SearchContext
update
=
{
update
}
initialQuery
=
{
query
}
ownerTypes
=
{[
'
all
'
,
'
public
'
].
filter
(
key
=>
user
||
withoutLogin
.
indexOf
(
key
)
!==
-
1
)}
ownerTypes
=
{[
'
public
'
,
'
visible
'
].
filter
(
key
=>
user
||
withoutLogin
.
indexOf
(
key
)
!==
-
1
)}
>
<
Search
visualization
=
"
elements
"
tabs
=
{[
'
entries
'
,
'
groups
'
,
'
datasets
'
]}
/
>
<
/SearchContext
>
...
...
gui/src/components/uploads/Upload.js
View file @
14329873
...
...
@@ -50,12 +50,16 @@ class PublishConfirmDialog extends React.Component {
area into the public NOMAD. This step is final. All public data will be made available under the Creative
Commons Attribution license ([CC BY 4.0](https://creativecommons.org/licenses/by/4.0/)).
If you wish, you can put an embargo on your data. Embargoed data is not
visible to others (unless explicitly shared), but you can already create
datasets and assign DOIs for data with embargo, e.g. to put it into your
unpublished paper. The embargo will last up to 36 month. Afterwards, your
data will be made publicly available. You can also lift the embargo on
entries at any time. This functionality is part of editing entries.
If you wish, you can put an embargo on your data. Embargoed data is
visible to and findable by others. This makes some metadata (e.g.
chemical formula, system type, spacegroup, etc.) public, but the raw-file
and archive contents remain hidden (except to you, and users you explicitly
share the data with).
You can already create datasets and assign DOIs for data with embargo, e.g.
to put it into your unpublished paper.
The embargo will last up to 36 month. Afterwards, your data will be made publicly
available. You can also lift the embargo on entries at any time.
This functionality is part of editing entries.
`
}
<
/Markdown
>
<
FormControl
style
=
{{
width
:
'
100%
'
,
marginTop
:
24
}}
>
...
...
gui/src/components/uploads/UploadPage.js
View file @
14329873
...
...
@@ -65,8 +65,9 @@ If you press publish, a dialog will appear that allows you to set an
*embargo* or publish your data as *Open Access* right away. The *embargo* allows you to share
data with selected users, create a DOI for your data, and later publish the data.
The *embargo* might last up to 36 month before data becomes public automatically.
During an *embargo* the data (and datasets created from this data) are only visible to you
and users you share the data with (i.e. users you added under *share with* when editing entries).
During an *embargo* the data (and datasets created from this data) are already visible and
findable, but only you and users you share the data with (i.e. users you added under
*share with* when editing entries) can view and download the raw-data and archive.
#### Processing errors
...
...
nomad/app/api/archive.py
View file @
14329873
...
...
@@ -35,8 +35,8 @@ from nomad.archive import query_archive
from
.auth
import
authenticate
,
create_authorization_predicate
from
.api
import
api
from
.common
import
calc_route
,
streamed_zipfile
,
search_model
,
\
add_search_parameters
,
apply_search_parameters
,
query_model
from
.common
import
calc_route
,
streamed_zipfile
,
search_model
,
add_search_parameters
,
apply_search_parameters
,
query_model
ns
=
api
.
namespace
(
'archive'
,
...
...
@@ -212,7 +212,7 @@ class ArchiveDownloadResource(Resource):
generator
(),
zipfile_name
=
'nomad_archive.zip'
,
compress
=
compress
)
_archive_query_model
=
api
.
inherit
(
'Archive
Calculations
'
,
search_model
,
{
_archive_query_model
=
api
.
inherit
(
'Archive
Search
'
,
search_model
,
{
'query'
:
fields
.
Nested
(
query_model
,
description
=
'The query used to find the requested entries.'
),
'query_schema'
:
fields
.
Raw
(
description
=
'The query schema that defines what archive data to retrive.'
)
})
...
...
@@ -287,18 +287,18 @@ class ArchiveQueryResource(Resource):
data
=
[]
calcs
=
results
[
'results'
]
archive_files
=
None
cur_upload_id
=
None
cur
rent
_upload_id
=
None
for
entry
in
calcs
:
upload_id
=
entry
[
'upload_id'
]
calc_id
=
entry
[
'calc_id'
]
if
archive_files
is
None
or
cur_upload_id
!=
upload_id
:
if
archive_files
is
None
or
cur
rent
_upload_id
!=
upload_id
:
upload_files
=
UploadFiles
.
get
(
upload_id
,
create_authorization_predicate
(
upload_id
))
if
upload_files
is
None
:
return
[]
archive_files
=
upload_files
.
archive_file_msgs
()
cur_upload_id
=
upload_id
cur
rent
_upload_id
=
upload_id
if
entry
[
'with_embargo'
]:
archive_file
=
archive_files
[
1
]
...
...
nomad/app/api/auth.py
View file @
14329873
...
...
@@ -249,6 +249,9 @@ class UsersResource(Resource):
@
api
.
expect
(
user_model
,
validate
=
True
)
def
put
(
self
):
""" Invite a new user. """
if
config
.
keycloak
.
oasis
:
abort
(
400
,
'User invide does not work this NOMAD OASIS'
)
json_data
=
request
.
get_json
()
try
:
user
=
datamodel
.
User
.
m_from_dict
(
json_data
)
...
...
@@ -314,7 +317,14 @@ def create_authorization_predicate(upload_id, calc_id=None):
# look in mongo
try
:
upload
=
processing
.
Upload
.
get
(
upload_id
)
return
g
.
user
.
user_id
==
upload
.
user_id
if
g
.
user
.
user_id
==
upload
.
user_id
:
return
True
try
:
calc
=
processing
.
Calc
.
get
(
calc_id
)
except
KeyError
:
return
False
return
g
.
user
.
user_id
in
calc
.
metadata
.
get
(
'shared_with'
,
[])
except
KeyError
as
e
:
logger
=
utils
.
get_logger
(
__name__
,
upload_id
=
upload_id
,
calc_id
=
calc_id
)
...
...
nomad/app/api/common.py
View file @
14329873
...
...
@@ -70,15 +70,24 @@ search_model_fields = {
'results'
:
fields
.
List
(
fields
.
Raw
(
allow_null
=
True
,
skip_none
=
True
),
description
=
(
'A list of search results. Each result is a dict with quantitie names as key and '
'values as values'
),
allow_null
=
True
,
skip_none
=
True
),
'owner'
:
fields
.
String
(
description
=
'The group the calculations belong to.'
,
allow_null
=
True
,
skip_none
=
True
),
'from_time'
:
fields
.
Raw
(
description
=
'The minimum entry time.'
,
allow_null
=
True
,
skip_none
=
True
),
'until_time'
:
fields
.
Raw
(
description
=
'The maximum entry time.'
,
allow_null
=
True
,
skip_none
=
True
),
}
'python'
:
fields
.
String
(
description
=
(
'A string of python code snippet which can be executed to reproduce the api result.'
)),
'curl'
:
fields
.
String
(
description
=
(
'A string of curl command which can be executed to reproduce the api result.'
))}
search_model
=
api
.
model
(
'Search'
,
search_model_fields
)
query_model
=
api
.
model
(
'Query'
,
{
query_model
_fields
=
{
quantity
.
name
:
fields
.
Raw
(
description
=
quantity
.
description
)
for
quantity
in
search
.
quantities
.
values
()})
for
quantity
in
search
.
quantities
.
values
()}
query_model_fields
.
update
(
**
{
'owner'
:
fields
.
String
(
description
=
'The group the calculations belong to.'
,
allow_null
=
True
,
skip_none
=
True
),
'from_time'
:
fields
.
Raw
(
description
=
'The minimum entry time.'
,
allow_null
=
True
,
skip_none
=
True
),
'until_time'
:
fields
.
Raw
(
description
=
'The maximum entry time.'
,
allow_null
=
True
,
skip_none
=
True
)
})
query_model
=
api
.
model
(
'Query'
,
query_model_fields
)
def
add_pagination_parameters
(
request_parser
):
...
...
@@ -111,7 +120,7 @@ def add_search_parameters(request_parser):
# more search parameters
request_parser
.
add_argument
(
'owner'
,
type
=
str
,
help
=
'Specify which calcs to return: ``
all
``, ``public``, ``user``, ``staging``, default is ``
all
``'
)
help
=
'Specify which calcs to return: ``
visible
``, ``public``,
``all``,
``user``, ``staging``, default is ``
visible
``'
)
request_parser
.
add_argument
(
'from_time'
,
type
=
lambda
x
:
rfc3339DateTime
.
parse
(
x
),
help
=
'A yyyy-MM-ddTHH:mm:ss (RFC3339) minimum entry time (e.g. upload time)'
)
...
...
@@ -133,7 +142,7 @@ def apply_search_parameters(search_request: search.SearchRequest, args: Dict[str
args
=
{
key
:
value
for
key
,
value
in
args
.
items
()
if
value
is
not
None
}
# owner
owner
=
args
.
get
(
'owner'
,
'
all
'
)
owner
=
args
.
get
(
'owner'
,
'
visible
'
)
try
:
search_request
.
owner
(
owner
,
...
...
nomad/app/api/dataset.py
View file @
14329873
...
...
@@ -104,7 +104,7 @@ class DatasetListResource(Resource):
return
Dataset
(
dataset_id
=
dataset_id
,
**
data
).
m_x
(
'me'
).
create
(),
200
@
ns
.
route
(
'/<
string
:name>'
)
@
ns
.
route
(
'/<
path
:name>'
)
@
api
.
doc
(
params
=
dict
(
name
=
'The name of the requested dataset.'
))
class
DatasetResource
(
Resource
):
@
api
.
doc
(
'get_dataset'
)
...
...
nomad/app/api/info.py
View file @
14329873
...
...
@@ -59,7 +59,8 @@ info_model = api.model('Info', {
'domain'
:
fields
.
Nested
(
model
=
domain_model
),
'version'
:
fields
.
String
,
'release'
:
fields
.
String
,
'git'
:
fields
.
Nested
(
model
=
git_info_model
)
'git'
:
fields
.
Nested
(
model
=
git_info_model
),
'oasis'
:
fields
.
Boolean
})
...
...
@@ -95,5 +96,6 @@ class InfoResource(Resource):
'version'
:
gitinfo
.
version
,
'commit'
:
gitinfo
.
commit
,
'log'
:
gitinfo
.
log
}
},
'oasis'
:
config
.
keycloak
.
oasis
},
200
nomad/app/api/mirror.py
View file @
14329873
...
...
@@ -103,7 +103,9 @@ class MirrorUploadResource(Resource):
datasets
[
dataset
]
=
_Dataset
.
_get_collection
().
find_one
(
dict
(
_id
=
dataset
))
doi
=
datasets
[
dataset
].
get
(
'doi'
,
None
)
if
doi
is
not
None
:
dois
[
doi
]
=
DOI
.
_get_collection
().
find_one
(
dict
(
_id
=
doi
))
doi_obj
=
DOI
.
_get_collection
().
find_one
(
dict
(
_id
=
doi
))
if
doi_obj
is
not
None
:
dois
[
doi
]
=
doi_obj
return
{
'upload_id'
:
upload_id
,
...
...
nomad/app/api/raw.py
View file @
14329873
...
...
@@ -272,14 +272,16 @@ class RawFileFromCalcPathResource(Resource):
path
=
urllib
.
parse
.
unquote
(
path
)
calc_filepath
=
path
if
path
is
not
None
else
''
authorization_predicate
=
create_authorization_predicate
(
upload_id
)
authorization_predicate
=
create_authorization_predicate
(
upload_id
,
calc_id
=
calc_id
)
upload_files
=
UploadFiles
.
get
(
upload_id
,
authorization_predicate
)
if
upload_files
is
None
:
abort
(
404
,
message
=
'The upload with id %s does not exist.'
%
upload_id
)
calc
=
Calc
.
get
(
calc_id
)
if
calc
is
None
:
try
:
calc
=
Calc
.
get
(
calc_id
)
except
KeyError
:
abort
(
404
,
message
=
'The calc with id %s does not exist.'
%
calc_id
)
if
calc
.
upload_id
!=
upload_id
:
abort
(
404
,
message
=
'The calc with id %s is not part of the upload with id %s.'
%
(
calc_id
,
upload_id
))
...
...
@@ -317,6 +319,8 @@ _raw_files_request_model = api.model('RawFilesRequest', {
_raw_files_request_parser
=
api
.
parser
()
_raw_files_request_parser
.
add_argument
(
'files'
,
required
=
True
,
type
=
str
,
help
=
'Comma separated list of files to download.'
,
location
=
'args'
)
_raw_files_request_parser
.
add_argument
(
'prefix'
,
type
=
str
,
help
=
'A common prefix that is prepend to all files'
,
location
=
'args'
)
_raw_files_request_parser
.
add_argument
(
**
raw_file_strip_argument
)
_raw_files_request_parser
.
add_argument
(
**
raw_file_compress_argument
)
...
...
@@ -356,13 +360,19 @@ class RawFilesResource(Resource):
any files that the user is not authorized to access.
"""
args
=
_raw_files_request_parser
.
parse_args
()
files_str
=
args
.
get
(
'files'
)
compress
=
args
.
get
(
'compress'
,
False
)
strip
=
args
.
get
(
'strip'
,
False
)
files_str
=
args
.
get
(
'files'
)
if
files_str
is
None
:
abort
(
400
,
message
=
"No files argument given."
)
files
=
[
file
.
strip
()
for
file
in
files_str
.
split
(
','
)]
prefix
=
args
.
get
(
'prefix'
)
if
prefix
is
not
None
:
files
=
[
os
.
path
.
join
(
prefix
,
file
.
strip
())
for
file
in
files_str
.
split
(
','
)]
else
:
files
=
[
file
.
strip
()
for
file
in
files_str
.
split
(
','
)]
compress
=
args
.
get
(
'compress'
,
False
)
strip
=
args
.
get
(
'strip'
,
False
)
return
respond_to_get_raw_files
(
upload_id
,
files
,
compress
=
compress
,
strip
=
strip
)
...
...
@@ -455,8 +465,7 @@ class RawFileQueryResource(Resource):
if
upload_files
is
not
None
:
upload_files
.
close_zipfile_cache
()
upload_files
=
UploadFiles
.
get
(
upload_id
,
create_authorization_predicate
(
upload_id
))
upload_files
=
UploadFiles
.
get
(
upload_id
)
if
upload_files
is
None
:
logger
.
error
(
'upload files do not exist'
,
upload_id
=
upload_id
)
...
...
@@ -467,6 +476,8 @@ class RawFileQueryResource(Resource):
def
open_file
(
upload_filename
):
return
upload_files
.
raw_file
(
upload_filename
,
'rb'
)
upload_files
.
_is_authorized
=
create_authorization_predicate
(
upload_id
=
upload_id
,
calc_id
=
entry
[
'calc_id'
])
directory
=
os
.
path
.
dirname
(
mainfile
)
directory_w_upload
=
os
.
path
.
join
(
upload_files
.
upload_id
,
directory
)
if
directory_w_upload
not
in
directories
:
...
...
nomad/app/api/repo.py
View file @
14329873
...
...
@@ -98,11 +98,7 @@ _repo_calcs_model_fields = {
'A dict with all statistics. Each statistic is dictionary with a metrics dict as '
'value and quantity value as key. The possible metrics are code runs(calcs), %s. '
'There is a pseudo quantity "total" with a single value "all" that contains the '
' metrics over all results. '
%
', '
.
join
(
datamodel
.
Domain
.
instance
.
metrics_names
))),
'python'
:
fields
.
String
(
description
=
(
'A string of python code snippet which can be executed to reproduce the api result.'
)),
'curl'
:
fields
.
String
(
description
=
(
'A string of curl command which can be executed to reproduce the api result.'
)),
' metrics over all results. '
%
', '
.
join
(
datamodel
.
Domain
.
instance
.
metrics_names
)))
}
for
group_name
,
(
group_quantity
,
_
)
in
search
.
groups
.
items
():
_repo_calcs_model_fields
[
group_name
]
=
fields
.
Nested
(
api
.
model
(
'RepoDatasets'
,
{
...
...
nomad/cli/admin/admin.py
View file @
14329873
...
...
@@ -12,8 +12,10 @@
# See the License for the specific language governing permissions and
# limitations under the License.
from
typing
import
Callable
,
List
import
click
import
datetime
from
elasticsearch_dsl
import
Q
import
elasticsearch.helpers
import
sys
import
io
...
...
@@ -21,6 +23,7 @@ import re
import
uuid
import
json
from
io
import
StringIO
import
threading
import
numpy
as
np
import
requests
...
...
@@ -35,6 +38,72 @@ from nomad.cli.cli import cli
from
nomad
import
config
def
__run_processing
(
uploads
,
parallel
:
int
,
process
:
Callable
[[
proc
.
Upload
],
None
],
label
:
str
):
if
isinstance
(
uploads
,
(
tuple
,
list
)):
uploads_count
=
len
(
uploads
)
else
:
uploads_count
=
uploads
.
count
()
uploads
=
list
(
uploads
)
# copy the whole mongo query set to avoid cursor timeouts
cv
=
threading
.
Condition
()
threads
:
List
[
threading
.
Thread
]
=
[]
state
=
dict
(
completed_count
=
0
,
skipped_count
=
0
,
available_threads_count
=
parallel
)
logger
=
utils
.
get_logger
(
__name__
)
print
(
'%d uploads selected, %s ...'
%
(
uploads_count
,
label
))
def
process_upload
(
upload
:
proc
.
Upload
):
logger
.
info
(
'%s started'
%
label
,
upload_id
=
upload
.
upload_id
)
completed
=
False
if
upload
.
process_running
:
logger
.
warn
(
'cannot trigger %s, since the upload is already/still processing'
%
label
,
current_process
=
upload
.
current_process
,
current_task
=
upload
.
current_task
,
upload_id
=
upload
.
upload_id
)
else
:
upload
.
reset
()
process
(
upload
)
upload
.
block_until_complete
(
interval
=
.
5
)
if
upload
.
tasks_status
==
proc
.
FAILURE
:
logger
.
info
(
'%s with failure'
%
label
,
upload_id
=
upload
.
upload_id
)
completed
=
True
logger
.
info
(
'%s complete'
%
label
,
upload_id
=
upload
.
upload_id
)
with
cv
:
state
[
'completed_count'
]
+=
1
if
completed
else
0
state
[
'skipped_count'
]
+=
1
if
not
completed
else
0
state
[
'available_threads_count'
]
+=
1
print
(
' %s %s and skipped %s of %s uploads'
%
(
label
,
state
[
'completed_count'
],
state
[
'skipped_count'
],
uploads_count
))
cv
.
notify
()
for
upload
in
uploads
:
with
cv
:
cv
.
wait_for
(
lambda
:
state
[
'available_threads_count'
]
>
0
)
state
[
'available_threads_count'
]
-=
1
thread
=
threading
.
Thread
(
target
=
lambda
:
process_upload
(
upload
))
threads
.
append
(
thread
)
thread
.
start
()
for
thread
in
threads
:
thread
.
join
()
@
cli
.
group
(
help
=
'''The nomad admin commands to do nasty stuff directly on the databases.
Remember: With great power comes great responsibility!'''
)
@
click
.
pass_context
...
...
@@ -57,6 +126,46 @@ def reset(remove, i_am_really_sure):
infrastructure
.
reset
(
remove
)
@
admin
.
command
(
help
=
'Check and lift embargo of data with expired embargo period.'
)
@
click
.
option
(
'--dry'
,
is_flag
=
True
,
help
=
'Do not lift the embargo, just show what needs to be done.'
)
@
click
.
option
(
'--parallel'
,
default
=
1
,
type
=
int
,
help
=
'Use the given amount of parallel processes. Default is 1.'
)
def
lift_embargo
(
dry
,
parallel
):
infrastructure
.
setup_logging
()
infrastructure
.
setup_mongo
()
infrastructure
.
setup_elastic
()
request
=
search
.
SearchRequest
()
request
.
q
=
Q
(
'term'
,
with_embargo
=
True
)
&
Q
(
'term'
,
published
=
True
)
request
.
quantity
(
'upload_id'
,
1000
)
result
=
request
.
execute
()
uploads_to_repack
=
[]
for
upload_id
in
result
[
'quantities'
][
'upload_id'
][
'values'
]:
upload
=
proc
.
Upload
.
get
(
upload_id
)
embargo_length
=
upload
.
embargo_length
if
embargo_length
is
None
:
embargo_length
=
36
upload
.
embargo_length
=
36
if
upload
.
upload_time
+
datetime
.
timedelta
(
days
=
int
(
embargo_length
*
365
/
12
))
<
datetime
.
datetime
.
now
():
print
(
'need to lift the embargo of %s (upload_time=%s, embargo=%d)'
%
(
upload
.
upload_id
,
upload
.
upload_time
,
embargo_length
))
if
not
dry
:
proc
.
Calc
.
_get_collection
().
update_many
(
{
'upload_id'
:
upload_id
},
{
'$set'
:
{
'metadata.with_embargo'
:
False
}})
uploads_to_repack
.
append
(
upload
)
upload
.
save
()
upload_with_metadata
=
upload
.
to_upload_with_metadata
()
calcs
=
upload_with_metadata
.
calcs
search
.
index_all
(
calcs
)
if
not
dry
:
__run_processing
(
uploads_to_repack
,
parallel
,
lambda
upload
:
upload
.
re_pack
(),
're-packing'
)
@
admin
.
command
(
help
=
'(Re-)index all calcs.'
)
@
click
.
option
(
'--threads'
,
type
=
int
,
default
=
1
,
help
=
'Number of threads to use.'
)
@
click
.
option
(
'--dry'
,
is_flag
=
True
,
help
=
'Do not index, just compute entries.'
)
...
...
nomad/cli/admin/uploads.py
View file @
14329873
...
...
@@ -12,18 +12,18 @@
# See the License for the specific language governing permissions and
# limitations under the License.
from
typing
import
List
,
Callable
from
typing
import
List
import
click
from
tabulate
import
tabulate
from
mongoengine
import
Q
from
elasticsearch_dsl
import
Q
as
ESQ
from
pymongo
import
UpdateOne
import
threading
import
elasticsearch_dsl
as
es
import
json
from
nomad
import
processing
as
proc
,
config
,
infrastructure
,
utils
,
search
,
files
,
datamodel
from
.admin
import
admin
from
nomad.archive
import
write_archive
from
nomad
import
processing
as
proc
,
config
,
infrastructure
,
utils
,
search
,
files
,
datamodel
,
archive
from
.admin
import
admin
,
__run_processing
@
admin
.
group
(
help
=
'Upload related commands'
)
...
...
@@ -70,6 +70,15 @@ def uploads(ctx, user: str, staging: bool, processing: bool, outdated: bool, cod
def
query_uploads
(
ctx
,
uploads
):
try
:
json_query
=
json
.
loads
(
' '
.
join
(
uploads
))
request
=
search
.
SearchRequest
()
request
.
q
=
ESQ
(
json_query
)
request
.
quantity
(
'upload_id'
,
size
=
10000
)
uploads
=
list
(
request
.
execute
()[
'quantities'
][
'upload_id'
][
'values'
])
except
Exception
:
pass
query
=
ctx
.
obj
.
query
if
len
(
uploads
)
>
0
:
query
&=
Q
(
upload_id__in
=
uploads
)
...
...
@@ -236,7 +245,7 @@ def rm(ctx, uploads, skip_es, skip_mongo, skip_files):
@
uploads
.
command
(
help
=
'Create msgpack file for upload'
)
@
click
.
argument
(
'UPLOADS'
,
nargs
=-
1
)
@
click
.
pass_context
def
msgpack
ed
(
ctx
,
uploads
):
def
msgpack
(
ctx
,
uploads
):
_
,
uploads
=
query_uploads
(
ctx
,
uploads
)
for
upload
in
uploads
:
...
...
@@ -250,74 +259,11 @@ def msgpacked(ctx, uploads):
yield
(
calc_id
,
json
.
load
(
f
))
for
access
in
[
'public'
,
'restricted'
]:
zf
=
upload_files
.
open_zip_file
(
'archive'
,
access
,
upload_files
.
_archive_ext
)
archive_name
=
zf
.
filename
.
replace
(
'.zip'
,
'.msg'
)
names
=
[
name
for
name
in
zf
.
namelist
()
if
name
.
endswith
(
'json'
)]
print
(
'writing msgpack archive %s'
%
archive_name
)
write_archive
(
archive_name
,
len
(
names
),
iterator
(
zf
,
names
))
def
__run_processing
(
ctx
,
uploads
,
parallel
:
int
,
process
:
Callable
[[
proc
.
Upload
],
None
],
label
:
str
):
_
,
uploads
=
query_uploads
(
ctx
,
uploads
)
uploads_count
=
uploads
.
count
()
uploads
=
list
(
uploads
)
# copy the whole mongo query set to avoid cursor timeouts
cv
=
threading
.
Condition
()
threads
:
List
[
threading
.
Thread
]
=
[]
state
=
dict
(
completed_count
=
0
,
skipped_count
=
0
,
available_threads_count
=
parallel
)
logger
=
utils
.
get_logger
(
__name__
)
print
(
'%d uploads selected, %s ...'
%
(
uploads_count
,
label
))
def
process_upload
(
upload
:
proc
.
Upload
):
logger
.
info
(
'%s started'
%
label
,
upload_id
=
upload
.
upload_id
)
completed
=
False
if
upload
.
process_running
: