Commit 14329873 authored by Markus Scheidgen's avatar Markus Scheidgen
Browse files

Merge branch 'v0.7.10' into v0.8.0<

parents 2b83f245 22b09bb2
......@@ -12,6 +12,7 @@ import EditUserMetadataDialog from '../EditUserMetadataDialog'
import DownloadButton from '../DownloadButton'
import PublishedIcon from '@material-ui/icons/Public'
import PrivateIcon from '@material-ui/icons/AccountCircle'
import { withApi } from '../api'
export function Published(props) {
const {entry} = props
......@@ -271,21 +272,33 @@ export class EntryListUnstyled extends React.Component {
</Quantity>
</div>
</div>
<div className={classes.entryDetailsActions}>
<Button color="primary" onClick={event => this.handleViewEntryPage(event, row)}>
Show raw files and archive
</Button>
{this.showEntryActions(row) &&
<Button color="primary" onClick={event => this.handleViewEntryPage(event, row)}>
Show raw files and archive
</Button>
}
</div>
</div>)
}
showEntryActions(row) {
const { user } = this.props
if (row.with_embargo && !(user && row.owners.find(owner => owner.user_id === user.sub))) {
return false
} else {
return !this.props.showEntryActions || this.props.showEntryActions(row)
}
}
handleViewEntryPage(event, row) {
event.stopPropagation()
this.props.history.push(`/entry/id/${row.upload_id}/${row.calc_id}`)
}
renderEntryActions(row, selected) {
if (!this.props.showEntryActions || this.props.showEntryActions(row)) {
if (this.showEntryActions(row)) {
return <Tooltip title="Show raw files and archive">
<IconButton style={selected ? {color: 'white'} : null} onClick={event => this.handleViewEntryPage(event, row)}>
<DetailsIcon />
......@@ -310,8 +323,7 @@ export class EntryListUnstyled extends React.Component {
}
const defaultSelectedColumns = this.props.selectedColumns || [
...domain.defaultSearchResultColumns,
'authors']
...domain.defaultSearchResultColumns, 'authors']
const pagination = <TablePagination
count={totalNumber}
......@@ -365,6 +377,6 @@ export class EntryListUnstyled extends React.Component {
}
}
const EntryList = compose(withRouter, withDomain, withStyles(EntryListUnstyled.styles))(EntryListUnstyled)
const EntryList = compose(withRouter, withDomain, withApi(false), withStyles(EntryListUnstyled.styles))(EntryListUnstyled)
export default EntryList
......@@ -371,6 +371,7 @@ class VisualizationSelect extends React.Component {
class OwnerSelect extends React.Component {
static ownerLabel = {
all: 'All entries',
visible: 'Include your private entries',
public: 'Only public entries',
user: 'Only your entries',
staging: 'Staging area only'
......@@ -378,7 +379,8 @@ class OwnerSelect extends React.Component {
static ownerTooltips = {
all: 'This will show all entries in the database.',
public: 'Do not show entries that are only visible to you.',
visible: 'Do also show entries that are only visible to you.',
public: 'Do not entries with embargo.',
user: 'Do only show entries visible to you.',
staging: 'Will only show entries that you uploaded, but not yet published.'
}
......
......@@ -75,7 +75,7 @@ class SearchPage extends React.Component {
const { classes, user, location, update } = this.props
let query = {
owner: 'all'
owner: 'public'
}
if (location && location.search) {
query = {
......@@ -91,7 +91,7 @@ class SearchPage extends React.Component {
<SearchContext
update={update}
initialQuery={query}
ownerTypes={['all', 'public'].filter(key => user || withoutLogin.indexOf(key) !== -1)}
ownerTypes={['public', 'visible'].filter(key => user || withoutLogin.indexOf(key) !== -1)}
>
<Search visualization="elements" tabs={['entries', 'groups', 'datasets']} />
</SearchContext>
......
......@@ -50,12 +50,16 @@ class PublishConfirmDialog extends React.Component {
area into the public NOMAD. This step is final. All public data will be made available under the Creative
Commons Attribution license ([CC BY 4.0](https://creativecommons.org/licenses/by/4.0/)).
If you wish, you can put an embargo on your data. Embargoed data is not
visible to others (unless explicitly shared), but you can already create
datasets and assign DOIs for data with embargo, e.g. to put it into your
unpublished paper. The embargo will last up to 36 month. Afterwards, your
data will be made publicly available. You can also lift the embargo on
entries at any time. This functionality is part of editing entries.
If you wish, you can put an embargo on your data. Embargoed data is
visible to and findable by others. This makes some metadata (e.g.
chemical formula, system type, spacegroup, etc.) public, but the raw-file
and archive contents remain hidden (except to you, and users you explicitly
share the data with).
You can already create datasets and assign DOIs for data with embargo, e.g.
to put it into your unpublished paper.
The embargo will last up to 36 month. Afterwards, your data will be made publicly
available. You can also lift the embargo on entries at any time.
This functionality is part of editing entries.
`}</Markdown>
<FormControl style={{width: '100%', marginTop: 24}}>
......
......@@ -65,8 +65,9 @@ If you press publish, a dialog will appear that allows you to set an
*embargo* or publish your data as *Open Access* right away. The *embargo* allows you to share
data with selected users, create a DOI for your data, and later publish the data.
The *embargo* might last up to 36 month before data becomes public automatically.
During an *embargo* the data (and datasets created from this data) are only visible to you
and users you share the data with (i.e. users you added under *share with* when editing entries).
During an *embargo* the data (and datasets created from this data) are already visible and
findable, but only you and users you share the data with (i.e. users you added under
*share with* when editing entries) can view and download the raw-data and archive.
#### Processing errors
......
......@@ -35,8 +35,8 @@ from nomad.archive import query_archive
from .auth import authenticate, create_authorization_predicate
from .api import api
from .common import calc_route, streamed_zipfile, search_model,\
add_search_parameters, apply_search_parameters, query_model
from .common import calc_route, streamed_zipfile, search_model, add_search_parameters, apply_search_parameters, query_model
ns = api.namespace(
'archive',
......@@ -212,7 +212,7 @@ class ArchiveDownloadResource(Resource):
generator(), zipfile_name='nomad_archive.zip', compress=compress)
_archive_query_model = api.inherit('ArchiveCalculations', search_model, {
_archive_query_model = api.inherit('ArchiveSearch', search_model, {
'query': fields.Nested(query_model, description='The query used to find the requested entries.'),
'query_schema': fields.Raw(description='The query schema that defines what archive data to retrive.')
})
......@@ -287,18 +287,18 @@ class ArchiveQueryResource(Resource):
data = []
calcs = results['results']
archive_files = None
cur_upload_id = None
current_upload_id = None
for entry in calcs:
upload_id = entry['upload_id']
calc_id = entry['calc_id']
if archive_files is None or cur_upload_id != upload_id:
if archive_files is None or current_upload_id != upload_id:
upload_files = UploadFiles.get(upload_id, create_authorization_predicate(upload_id))
if upload_files is None:
return []
archive_files = upload_files.archive_file_msgs()
cur_upload_id = upload_id
current_upload_id = upload_id
if entry['with_embargo']:
archive_file = archive_files[1]
......
......@@ -249,6 +249,9 @@ class UsersResource(Resource):
@api.expect(user_model, validate=True)
def put(self):
""" Invite a new user. """
if config.keycloak.oasis:
abort(400, 'User invide does not work this NOMAD OASIS')
json_data = request.get_json()
try:
user = datamodel.User.m_from_dict(json_data)
......@@ -314,7 +317,14 @@ def create_authorization_predicate(upload_id, calc_id=None):
# look in mongo
try:
upload = processing.Upload.get(upload_id)
return g.user.user_id == upload.user_id
if g.user.user_id == upload.user_id:
return True
try:
calc = processing.Calc.get(calc_id)
except KeyError:
return False
return g.user.user_id in calc.metadata.get('shared_with', [])
except KeyError as e:
logger = utils.get_logger(__name__, upload_id=upload_id, calc_id=calc_id)
......
......@@ -70,15 +70,24 @@ search_model_fields = {
'results': fields.List(fields.Raw(allow_null=True, skip_none=True), description=(
'A list of search results. Each result is a dict with quantitie names as key and '
'values as values'), allow_null=True, skip_none=True),
'owner': fields.String(description='The group the calculations belong to.', allow_null=True, skip_none=True),
'from_time': fields.Raw(description='The minimum entry time.', allow_null=True, skip_none=True),
'until_time': fields.Raw(description='The maximum entry time.', allow_null=True, skip_none=True),
}
'python': fields.String(description=(
'A string of python code snippet which can be executed to reproduce the api result.')),
'curl': fields.String(description=(
'A string of curl command which can be executed to reproduce the api result.'))}
search_model = api.model('Search', search_model_fields)
query_model = api.model('Query', {
query_model_fields = {
quantity.name: fields.Raw(description=quantity.description)
for quantity in search.quantities.values()})
for quantity in search.quantities.values()}
query_model_fields.update(** {
'owner': fields.String(description='The group the calculations belong to.', allow_null=True, skip_none=True),
'from_time': fields.Raw(description='The minimum entry time.', allow_null=True, skip_none=True),
'until_time': fields.Raw(description='The maximum entry time.', allow_null=True, skip_none=True)
})
query_model = api.model('Query', query_model_fields)
def add_pagination_parameters(request_parser):
......@@ -111,7 +120,7 @@ def add_search_parameters(request_parser):
# more search parameters
request_parser.add_argument(
'owner', type=str,
help='Specify which calcs to return: ``all``, ``public``, ``user``, ``staging``, default is ``all``')
help='Specify which calcs to return: ``visible``, ``public``, ``all``, ``user``, ``staging``, default is ``visible``')
request_parser.add_argument(
'from_time', type=lambda x: rfc3339DateTime.parse(x),
help='A yyyy-MM-ddTHH:mm:ss (RFC3339) minimum entry time (e.g. upload time)')
......@@ -133,7 +142,7 @@ def apply_search_parameters(search_request: search.SearchRequest, args: Dict[str
args = {key: value for key, value in args.items() if value is not None}
# owner
owner = args.get('owner', 'all')
owner = args.get('owner', 'visible')
try:
search_request.owner(
owner,
......
......@@ -104,7 +104,7 @@ class DatasetListResource(Resource):
return Dataset(dataset_id=dataset_id, **data).m_x('me').create(), 200
@ns.route('/<string:name>')
@ns.route('/<path:name>')
@api.doc(params=dict(name='The name of the requested dataset.'))
class DatasetResource(Resource):
@api.doc('get_dataset')
......
......@@ -59,7 +59,8 @@ info_model = api.model('Info', {
'domain': fields.Nested(model=domain_model),
'version': fields.String,
'release': fields.String,
'git': fields.Nested(model=git_info_model)
'git': fields.Nested(model=git_info_model),
'oasis': fields.Boolean
})
......@@ -95,5 +96,6 @@ class InfoResource(Resource):
'version': gitinfo.version,
'commit': gitinfo.commit,
'log': gitinfo.log
}
},
'oasis': config.keycloak.oasis
}, 200
......@@ -103,7 +103,9 @@ class MirrorUploadResource(Resource):
datasets[dataset] = _Dataset._get_collection().find_one(dict(_id=dataset))
doi = datasets[dataset].get('doi', None)
if doi is not None:
dois[doi] = DOI._get_collection().find_one(dict(_id=doi))
doi_obj = DOI._get_collection().find_one(dict(_id=doi))
if doi_obj is not None:
dois[doi] = doi_obj
return {
'upload_id': upload_id,
......
......@@ -272,14 +272,16 @@ class RawFileFromCalcPathResource(Resource):
path = urllib.parse.unquote(path)
calc_filepath = path if path is not None else ''
authorization_predicate = create_authorization_predicate(upload_id)
authorization_predicate = create_authorization_predicate(upload_id, calc_id=calc_id)
upload_files = UploadFiles.get(upload_id, authorization_predicate)
if upload_files is None:
abort(404, message='The upload with id %s does not exist.' % upload_id)
calc = Calc.get(calc_id)
if calc is None:
try:
calc = Calc.get(calc_id)
except KeyError:
abort(404, message='The calc with id %s does not exist.' % calc_id)
if calc.upload_id != upload_id:
abort(404, message='The calc with id %s is not part of the upload with id %s.' % (calc_id, upload_id))
......@@ -317,6 +319,8 @@ _raw_files_request_model = api.model('RawFilesRequest', {
_raw_files_request_parser = api.parser()
_raw_files_request_parser.add_argument(
'files', required=True, type=str, help='Comma separated list of files to download.', location='args')
_raw_files_request_parser.add_argument(
'prefix', type=str, help='A common prefix that is prepend to all files', location='args')
_raw_files_request_parser.add_argument(**raw_file_strip_argument)
_raw_files_request_parser.add_argument(**raw_file_compress_argument)
......@@ -356,13 +360,19 @@ class RawFilesResource(Resource):
any files that the user is not authorized to access.
"""
args = _raw_files_request_parser.parse_args()
files_str = args.get('files')
compress = args.get('compress', False)
strip = args.get('strip', False)
files_str = args.get('files')
if files_str is None:
abort(400, message="No files argument given.")
files = [file.strip() for file in files_str.split(',')]
prefix = args.get('prefix')
if prefix is not None:
files = [os.path.join(prefix, file.strip()) for file in files_str.split(',')]
else:
files = [file.strip() for file in files_str.split(',')]
compress = args.get('compress', False)
strip = args.get('strip', False)
return respond_to_get_raw_files(upload_id, files, compress=compress, strip=strip)
......@@ -455,8 +465,7 @@ class RawFileQueryResource(Resource):
if upload_files is not None:
upload_files.close_zipfile_cache()
upload_files = UploadFiles.get(
upload_id, create_authorization_predicate(upload_id))
upload_files = UploadFiles.get(upload_id)
if upload_files is None:
logger.error('upload files do not exist', upload_id=upload_id)
......@@ -467,6 +476,8 @@ class RawFileQueryResource(Resource):
def open_file(upload_filename):
return upload_files.raw_file(upload_filename, 'rb')
upload_files._is_authorized = create_authorization_predicate(
upload_id=upload_id, calc_id=entry['calc_id'])
directory = os.path.dirname(mainfile)
directory_w_upload = os.path.join(upload_files.upload_id, directory)
if directory_w_upload not in directories:
......
......@@ -98,11 +98,7 @@ _repo_calcs_model_fields = {
'A dict with all statistics. Each statistic is dictionary with a metrics dict as '
'value and quantity value as key. The possible metrics are code runs(calcs), %s. '
'There is a pseudo quantity "total" with a single value "all" that contains the '
' metrics over all results. ' % ', '.join(datamodel.Domain.instance.metrics_names))),
'python': fields.String(description=(
'A string of python code snippet which can be executed to reproduce the api result.')),
'curl': fields.String(description=(
'A string of curl command which can be executed to reproduce the api result.')),
' metrics over all results. ' % ', '.join(datamodel.Domain.instance.metrics_names)))
}
for group_name, (group_quantity, _) in search.groups.items():
_repo_calcs_model_fields[group_name] = fields.Nested(api.model('RepoDatasets', {
......
......@@ -12,8 +12,10 @@
# See the License for the specific language governing permissions and
# limitations under the License.
from typing import Callable, List
import click
import datetime
from elasticsearch_dsl import Q
import elasticsearch.helpers
import sys
import io
......@@ -21,6 +23,7 @@ import re
import uuid
import json
from io import StringIO
import threading
import numpy as np
import requests
......@@ -35,6 +38,72 @@ from nomad.cli.cli import cli
from nomad import config
def __run_processing(
uploads, parallel: int, process: Callable[[proc.Upload], None], label: str):
if isinstance(uploads, (tuple, list)):
uploads_count = len(uploads)
else:
uploads_count = uploads.count()
uploads = list(uploads) # copy the whole mongo query set to avoid cursor timeouts
cv = threading.Condition()
threads: List[threading.Thread] = []
state = dict(
completed_count=0,
skipped_count=0,
available_threads_count=parallel)
logger = utils.get_logger(__name__)
print('%d uploads selected, %s ...' % (uploads_count, label))
def process_upload(upload: proc.Upload):
logger.info('%s started' % label, upload_id=upload.upload_id)
completed = False
if upload.process_running:
logger.warn(
'cannot trigger %s, since the upload is already/still processing' % label,
current_process=upload.current_process,
current_task=upload.current_task, upload_id=upload.upload_id)
else:
upload.reset()
process(upload)
upload.block_until_complete(interval=.5)
if upload.tasks_status == proc.FAILURE:
logger.info('%s with failure' % label, upload_id=upload.upload_id)
completed = True
logger.info('%s complete' % label, upload_id=upload.upload_id)
with cv:
state['completed_count'] += 1 if completed else 0
state['skipped_count'] += 1 if not completed else 0
state['available_threads_count'] += 1
print(
' %s %s and skipped %s of %s uploads' %
(label, state['completed_count'], state['skipped_count'], uploads_count))
cv.notify()
for upload in uploads:
with cv:
cv.wait_for(lambda: state['available_threads_count'] > 0)
state['available_threads_count'] -= 1
thread = threading.Thread(target=lambda: process_upload(upload))
threads.append(thread)
thread.start()
for thread in threads:
thread.join()
@cli.group(help='''The nomad admin commands to do nasty stuff directly on the databases.
Remember: With great power comes great responsibility!''')
@click.pass_context
......@@ -57,6 +126,46 @@ def reset(remove, i_am_really_sure):
infrastructure.reset(remove)
@admin.command(help='Check and lift embargo of data with expired embargo period.')
@click.option('--dry', is_flag=True, help='Do not lift the embargo, just show what needs to be done.')
@click.option('--parallel', default=1, type=int, help='Use the given amount of parallel processes. Default is 1.')
def lift_embargo(dry, parallel):
infrastructure.setup_logging()
infrastructure.setup_mongo()
infrastructure.setup_elastic()
request = search.SearchRequest()
request.q = Q('term', with_embargo=True) & Q('term', published=True)
request.quantity('upload_id', 1000)
result = request.execute()
uploads_to_repack = []
for upload_id in result['quantities']['upload_id']['values']:
upload = proc.Upload.get(upload_id)
embargo_length = upload.embargo_length
if embargo_length is None:
embargo_length = 36
upload.embargo_length = 36
if upload.upload_time + datetime.timedelta(days=int(embargo_length * 365 / 12)) < datetime.datetime.now():
print('need to lift the embargo of %s (upload_time=%s, embargo=%d)' % (
upload.upload_id, upload.upload_time, embargo_length))
if not dry:
proc.Calc._get_collection().update_many(
{'upload_id': upload_id},
{'$set': {'metadata.with_embargo': False}})
uploads_to_repack.append(upload)
upload.save()
upload_with_metadata = upload.to_upload_with_metadata()
calcs = upload_with_metadata.calcs
search.index_all(calcs)
if not dry:
__run_processing(uploads_to_repack, parallel, lambda upload: upload.re_pack(), 're-packing')
@admin.command(help='(Re-)index all calcs.')
@click.option('--threads', type=int, default=1, help='Number of threads to use.')
@click.option('--dry', is_flag=True, help='Do not index, just compute entries.')
......
......@@ -12,18 +12,18 @@
# See the License for the specific language governing permissions and
# limitations under the License.
from typing import List, Callable
from typing import List
import click
from tabulate import tabulate
from mongoengine import Q
from elasticsearch_dsl import Q as ESQ
from pymongo import UpdateOne
import threading
import elasticsearch_dsl as es
import json
from nomad import processing as proc, config, infrastructure, utils, search, files, datamodel
from .admin import admin
from nomad.archive import write_archive
from nomad import processing as proc, config, infrastructure, utils, search, files, datamodel, archive
from .admin import admin, __run_processing
@admin.group(help='Upload related commands')
......@@ -70,6 +70,15 @@ def uploads(ctx, user: str, staging: bool, processing: bool, outdated: bool, cod
def query_uploads(ctx, uploads):
try:
json_query = json.loads(' '.join(uploads))
request = search.SearchRequest()
request.q = ESQ(json_query)
request.quantity('upload_id', size=10000)
uploads = list(request.execute()['quantities']['upload_id']['values'])
except Exception:
pass
query = ctx.obj.query
if len(uploads) > 0:
query &= Q(upload_id__in=uploads)
......@@ -236,7 +245,7 @@ def rm(ctx, uploads, skip_es, skip_mongo, skip_files):
@uploads.command(help='Create msgpack file for upload')
@click.argument('UPLOADS', nargs=-1)
@click.pass_context
def msgpacked(ctx, uploads):
def msgpack(ctx, uploads):
_, uploads = query_uploads(ctx, uploads)
for upload in uploads:
......@@ -250,74 +259,11 @@ def msgpacked(ctx, uploads):
yield (calc_id, json.load(f))
for access in ['public', 'restricted']:
zf = upload_files.open_zip_file('archive', access, upload_files._archive_ext)
archive_name = zf.filename.replace('.zip', '.msg')
names = [name for name in zf.namelist() if name.endswith('json')]
print('writing msgpack archive %s' % archive_name)
write_archive(archive_name, len(names), iterator(zf, names))
def __run_processing(
ctx, uploads, parallel: int, process: Callable[[proc.Upload], None], label: str):
_, uploads = query_uploads(ctx, uploads)
uploads_count = uploads.count()
uploads = list(uploads) # copy the whole mongo query set to avoid cursor timeouts
cv = threading.Condition()
threads: List[threading.Thread] = []
state = dict(
completed_count=0,
skipped_count=0,
available_threads_count=parallel)
logger = utils.get_logger(__name__)
print('%d uploads selected, %s ...' % (uploads_count, label))
def process_upload(upload: proc.Upload):
logger.info('%s started' % label, upload_id=upload.upload_id)
completed = False
if upload.process_running: