diff --git a/examples/external_project_parallel_upload/example-2.tar.gz b/examples/external_project_parallel_upload/example-2.tar.gz deleted file mode 100644 index 445ef22a4be2329ca5ee112e4fbebd385cd64731..0000000000000000000000000000000000000000 Binary files a/examples/external_project_parallel_upload/example-2.tar.gz and /dev/null differ diff --git a/examples/external_project_parallel_upload/example-2.tar.gz b/examples/external_project_parallel_upload/example-2.tar.gz new file mode 120000 index 0000000000000000000000000000000000000000..4dd08447f7bb8005100a239d32a738f3972fd83b --- /dev/null +++ b/examples/external_project_parallel_upload/example-2.tar.gz @@ -0,0 +1 @@ +example-1.tar.gz \ No newline at end of file diff --git a/examples/external_project_parallel_upload/example-3.tar.gz b/examples/external_project_parallel_upload/example-3.tar.gz deleted file mode 100644 index 17f4eb89cfbac709c0bb77a5df36ed6e500f2cfc..0000000000000000000000000000000000000000 Binary files a/examples/external_project_parallel_upload/example-3.tar.gz and /dev/null differ diff --git a/examples/external_project_parallel_upload/example-3.tar.gz b/examples/external_project_parallel_upload/example-3.tar.gz new file mode 120000 index 0000000000000000000000000000000000000000..4dd08447f7bb8005100a239d32a738f3972fd83b --- /dev/null +++ b/examples/external_project_parallel_upload/example-3.tar.gz @@ -0,0 +1 @@ +example-1.tar.gz \ No newline at end of file diff --git a/examples/external_project_parallel_upload/upload.py b/examples/external_project_parallel_upload/upload.py index 836a112d340c23019a1f4f31cb2f2499664b6a2f..a7cade9e252a88f36a0c6937dcdcc2e499d66b5d 100644 --- a/examples/external_project_parallel_upload/upload.py +++ b/examples/external_project_parallel_upload/upload.py @@ -16,6 +16,7 @@ import tarfile import io import zipfile import zipstream +import uuid # config nomad_url = 'http://labdev-nomad.esc.rzg.mpg.de/fairdi/nomad/mp/api' @@ -126,7 +127,7 @@ def upload_next_data(sources: Iterator[Tuple[str, str, str]], upload_name='next yield chunk # stream .zip to nomad - response = requests.put(url=url, headers={'X-Token': token}, data=content()) + response = requests.put(url=url, headers={'X-Token': token, 'Content-type': 'application/octet-stream'}, data=content()) if response.status_code != 200: raise Exception('nomad return status %d' % response.status_code) diff --git a/examples/files.py b/examples/files.py new file mode 100644 index 0000000000000000000000000000000000000000..05c642efb390dd342c8808310ca5d8256c8ed35e --- /dev/null +++ b/examples/files.py @@ -0,0 +1,20 @@ +from nomad import infrastructure, files, processing as proc + +infrastructure.setup_logging() +infrastructure.setup_mongo() + +upload_id = 'NvVyk3gATxCJW6dWS4cRWw' +upload = proc.Upload.get(upload_id) +upload_with_metadata = upload.to_upload_with_metadata() + +upload_files = files.PublicUploadFiles(upload_id) +upload_files.repack(upload_with_metadata) + +# try: +# public_upload_files = files.PublicUploadFiles(upload_id) +# public_upload_files.delete() +# except Exception: +# pass + +# staging_upload_files = files.StagingUploadFiles(upload_id) +# staging_upload_files.pack(upload_with_metadata) diff --git a/gui/src/components/ApiDialogButton.js b/gui/src/components/ApiDialogButton.js index e72e0e3449aac0eee843a0e404b2cb9c2c9cfa88..2a3d5fb4bd55f7628107fbe0168ca19b55035919 100644 --- a/gui/src/components/ApiDialogButton.js +++ b/gui/src/components/ApiDialogButton.js @@ -56,10 +56,10 @@ class ApiDialogUnstyled extends React.Component { } </DialogContent> <DialogActions> - <Button onClick={this.handleToggleRaw} color="primary"> + <Button onClick={this.handleToggleRaw}> {showRaw ? 'show tree' : 'show raw JSON'} </Button> - <Button onClick={onClose} color="primary"> + <Button onClick={onClose}> Close </Button> </DialogActions> diff --git a/gui/src/components/EditUserMetadataDialog.js b/gui/src/components/EditUserMetadataDialog.js index 069b3278b941ca560fe4c8b99b13af6e8154d443..74b1eddde0719ad8ab530132b2ec80f06a50951a 100644 --- a/gui/src/components/EditUserMetadataDialog.js +++ b/gui/src/components/EditUserMetadataDialog.js @@ -7,7 +7,7 @@ import DialogContent from '@material-ui/core/DialogContent' import DialogContentText from '@material-ui/core/DialogContentText' import DialogTitle from '@material-ui/core/DialogTitle' import PropTypes from 'prop-types' -import { IconButton, Tooltip, withStyles, Paper, MenuItem, Popper, CircularProgress } from '@material-ui/core' +import { IconButton, Tooltip, withStyles, Paper, MenuItem, Popper, CircularProgress, FormGroup, Checkbox, FormLabel } from '@material-ui/core' import EditIcon from '@material-ui/icons/Edit' import AddIcon from '@material-ui/icons/Add' import RemoveIcon from '@material-ui/icons/Delete' @@ -348,7 +348,7 @@ class DatasetInputUnstyled extends React.Component { shouldRenderSuggestions={() => true} margin={margin} label={usedLabel} - placeholder={`Type the dataset's name`} + placeholder="Type the dataset's name" /> } } @@ -398,6 +398,7 @@ class ReferenceInput extends React.Component { onChange={this.handleChange.bind(this)} error={value === undefined} label={value === undefined ? 'A reference must be a valid url' : label} + placeholder="Enter a URL to a related resource" /> } } @@ -648,7 +649,7 @@ class InviteUserDialogUnstyled extends React.Component { {input('affiliation', 'Affiliation')} </DialogContent> <DialogActions> - <Button onClick={this.handleClose.bind(this)} color="primary" disabled={submitting}> + <Button onClick={this.handleClose.bind(this)} disabled={submitting}> Cancel </Button> <div className={classes.submitWrapper}> @@ -665,6 +666,47 @@ class InviteUserDialogUnstyled extends React.Component { const InviteUserDialog = compose(withApi(true, false), withStyles(InviteUserDialogUnstyled.styles))(InviteUserDialogUnstyled) +class UserMetadataFieldUnstyled extends React.PureComponent { + static propTypes = { + classes: PropTypes.object.isRequired, + children: PropTypes.node, + modified: PropTypes.bool, + onChange: PropTypes.func.isRequired + } + + static styles = theme => ({ + root: { + flexWrap: 'nowrap', + alignItems: 'flex-start', + marginTop: theme.spacing.unit * 2 + }, + container: { + width: '100%' + }, + checkbox: { + marginLeft: -theme.spacing.unit * 2, + marginRight: theme.spacing.unit, + marginTop: theme.spacing.unit + } + }) + + render() { + const {children, classes, modified, onChange} = this.props + return <FormGroup row className={classes.root}> + <Checkbox + classes={{root: classes.checkbox}} + checked={modified} + onChange={(event, checked) => onChange(checked)} + /> + <div className={classes.container}> + {children} + </div> + </FormGroup> + } +} + +const UserMetadataField = withStyles(UserMetadataFieldUnstyled.styles)(UserMetadataFieldUnstyled) + class EditUserMetadataDialogUnstyled extends React.Component { static propTypes = { classes: PropTypes.object.isRequired, @@ -694,6 +736,9 @@ class EditUserMetadataDialogUnstyled extends React.Component { left: '50%', marginTop: -12, marginLeft: -12 + }, + liftEmbargoLabel: { + marginTop: theme.spacing.unit * 3 } }) @@ -710,7 +755,7 @@ class EditUserMetadataDialogUnstyled extends React.Component { coauthors: [], shared_with: [], datasets: [], - with_embargo: true + with_embargo: 'lift' } this.unmounted = false } @@ -720,13 +765,7 @@ class EditUserMetadataDialogUnstyled extends React.Component { actions: {}, isVerifying: false, verified: true, - submitting: false, - testCoauthors: [], - testUser: { - message: null, - success: true, - value: null - } + submitting: false } componentWillUnmount() { @@ -746,8 +785,7 @@ class EditUserMetadataDialogUnstyled extends React.Component { shared_with: (example.owners || []) .filter(user => user.user_id !== example.uploader.user_id) .map(user => user.user_id), - datasets: (example.datasets || []).map(ds => ds.name), - with_embargo: example.with_embargo + datasets: (example.datasets || []).map(ds => ds.name) } } @@ -882,8 +920,16 @@ class EditUserMetadataDialogUnstyled extends React.Component { const dialogEnabled = user && example.uploader && example.uploader.user_id === user.sub && !disabled const submitEnabled = Object.keys(actions).length && !submitting && verified + const editDataToActions = editData => { + if (Array.isArray(editData)) { + return editData.map(value => ({value: value})) + } else { + return {value: editData} + } + } + const listTextInputProps = (key, verify) => { - const values = actions[key] ? actions[key] : this.editData[key].map(value => ({value: value})) + const values = actions[key] ? actions[key] : editDataToActions(this.editData[key]) return { values: values, @@ -897,6 +943,21 @@ class EditUserMetadataDialogUnstyled extends React.Component { } } + const metadataFieldProps = (key, verify) => ({ + modified: Boolean(actions[key]), + onChange: checked => { + if (checked) { + this.setState({actions: {...actions, [key]: editDataToActions(this.editData[key])}}, () => { + if (verify) { + this.verify() + } + }) + } else { + this.setState({actions: {...actions, [key]: undefined}}) + } + } + }) + return ( <React.Fragment> <IconButton {...(buttonProps || {})} onClick={this.handleButtonClick} disabled={!dialogEnabled}> @@ -911,58 +972,63 @@ class EditUserMetadataDialogUnstyled extends React.Component { <DialogContentText> You are editing {total} {total === 1 ? 'entry' : 'entries'}. {total > 1 ? 'The fields are pre-filled with data from the first entry for.' : '' - } Only the fields that you change will be updated. - Be aware that all references, co-authors, shared_with, or datasets count as - one field. + } Only the checked fields will be updated. + The fields references, co-authors, shared with users, + and datasets can have many values. Changing one value, will apply all values. </DialogContentText> - <ActionInput component={TextField} - label="Comment" - value={actions.comment !== undefined ? actions.comment : {value: this.editData.comment}} - onChange={value => this.setState({actions: {...actions, comment: value}})} - margin="normal" - multiline rows="4" - fullWidth - placeholder="Add a comment" - InputLabelProps={{ shrink: true }} - /> - <ListTextInput - component={ReferenceInput} - {...listTextInputProps('references', true)} - label="References" - /> - <ListTextInput - component={UserInput} - {...listTextInputProps('coauthors', true)} - label="Co-author" - /> - <ListTextInput - component={UserInput} - {...listTextInputProps('shared_with', true)} - label="Shared with" - /> - <ListTextInput - component={DatasetInput} - {...listTextInputProps('datasets', true)} - label="Datasets" - /> + <UserMetadataField {...metadataFieldProps('comment')}> + <ActionInput component={TextField} + label="Comment" + value={actions.comment !== undefined ? actions.comment : {value: this.editData.comment}} + onChange={value => this.setState({actions: {...actions, comment: value}})} + margin="normal" + multiline + rowsMax="10" + fullWidth + placeholder="Add a comment" + InputLabelProps={{ shrink: true }} + /> + </UserMetadataField> + <UserMetadataField {...metadataFieldProps('references', true)}> + <ListTextInput + component={ReferenceInput} + {...listTextInputProps('references', true)} + label="References" + /> + </UserMetadataField> + <UserMetadataField {...metadataFieldProps('coauthors', true)}> + <ListTextInput + component={UserInput} + {...listTextInputProps('coauthors', true)} + label="Co-author" + /> + </UserMetadataField> + <UserMetadataField {...metadataFieldProps('shared_with', true)}> + <ListTextInput + component={UserInput} + {...listTextInputProps('shared_with', true)} + label="Shared with" + /> + </UserMetadataField> + <UserMetadataField {...metadataFieldProps('datasets', true)}> + <ListTextInput + component={DatasetInput} + {...listTextInputProps('datasets', true)} + label="Datasets" + /> + </UserMetadataField> + <UserMetadataField classes={{container: classes.liftEmbargoLabel}} {...metadataFieldProps('with_embargo', true)}> + <FormLabel>Lift embargo</FormLabel> + </UserMetadataField> </DialogContent> - {Object.keys(actions).length - ? <DialogContent> - <DialogContentText> - The following fields will be updated with the given values: <i> - {Object.keys(actions).map(action => action).join(', ')}</i>. - Updating many entries might take a few seconds. - </DialogContentText> - </DialogContent> - : ''} <DialogActions> <InviteUserDialog /> <span style={{flexGrow: 1}} /> - <Button onClick={this.handleClose} color="primary" disabled={submitting}> + <Button onClick={this.handleClose} disabled={submitting}> Cancel </Button> <div className={classes.submitWrapper}> - <Button onClick={this.handleSubmit} color="primary" disabled={!submitEnabled}> + <Button onClick={this.handleSubmit} disabled={!submitEnabled} color="primary"> Submit </Button> {submitting && <CircularProgress size={24} className={classes.submitProgress} />} diff --git a/gui/src/components/entry/ArchiveEntryView.js b/gui/src/components/entry/ArchiveEntryView.js index c075bc057f2530a84b35446c9c5fb49e41ae1ba7..abd9a24c09f33446ffc9ed9d1aa2d4c99ef863ad 100644 --- a/gui/src/components/entry/ArchiveEntryView.js +++ b/gui/src/components/entry/ArchiveEntryView.js @@ -66,7 +66,7 @@ class MetainfoDialogUnstyled extends React.PureComponent { )} data={metaInfoData ? metaInfoData.miJson : {}} title="Metainfo JSON" /> - <Button color="primary" onClick={onClose}> + <Button onClick={onClose}> Close </Button> </DialogActions> diff --git a/gui/src/components/entry/ArchiveLogView.js b/gui/src/components/entry/ArchiveLogView.js index 36e6cc25d46bb1c8e76bccf383fdf3d0dc84dd18..cae410c5a1e321c00669ef37d19aefe5229a165e 100644 --- a/gui/src/components/entry/ArchiveLogView.js +++ b/gui/src/components/entry/ArchiveLogView.js @@ -85,7 +85,7 @@ class ArchiveLogView extends React.Component { <Download classes={{root: classes.downloadFab}} tooltip="download logfile" - component={Fab} className={classes.downloadFab} color="primary" size="medium" + component={Fab} className={classes.downloadFab} color="secondary" size="medium" url={`archive/logs/${uploadId}/${calcId}`} fileName={`${calcId}.log`} > <DownloadIcon /> diff --git a/gui/src/components/uploads/ConfirmDialog.js b/gui/src/components/uploads/ConfirmDialog.js index 6eccd8bfeda894a51e833a790c047129d5d4ca7c..1d14345bfed8fca3c9f127be20e2eed68d6bc8e6 100644 --- a/gui/src/components/uploads/ConfirmDialog.js +++ b/gui/src/components/uploads/ConfirmDialog.js @@ -51,7 +51,7 @@ class ConfirmDialog extends React.Component { </FormGroup> </DialogContent> <DialogActions> - <Button onClick={onClose} color="primary"> + <Button onClick={onClose}> Cancel </Button> <Button onClick={() => onPublish(withEmbargo)} color="primary" autoFocus> diff --git a/nomad/app/api/repo.py b/nomad/app/api/repo.py index ecd87a9ca65f17984c8801ce296bdee3cb5a0661..78a8bc269e2f8129b72d491d8347424a3983bd6b 100644 --- a/nomad/app/api/repo.py +++ b/nomad/app/api/repo.py @@ -348,11 +348,15 @@ repo_edit_model = api.model('RepoEdit', { }) -def edit(parsed_query: Dict[str, Any], logger, mongo_update: Dict[str, Any] = None, re_index=True): +def edit(parsed_query: Dict[str, Any], logger, mongo_update: Dict[str, Any] = None, re_index=True) -> List[str]: # get all calculations that have to change search_request = search.SearchRequest() add_query(search_request, parsed_query) - calc_ids = list(hit['calc_id'] for hit in search_request.execute_scan()) + upload_ids = set() + calc_ids = [] + for hit in search_request.execute_scan(): + calc_ids.append(hit['calc_id']) + upload_ids.add(hit['upload_id']) # perform the update on the mongo db if mongo_update is not None: @@ -378,6 +382,8 @@ def edit(parsed_query: Dict[str, Any], logger, mongo_update: Dict[str, Any] = No 'edit repo with failed elastic updates', payload=mongo_update, nfailed=len(failed)) + return list(upload_ids) + def get_uploader_ids(query): """ Get all the uploader from the query, to check coauthers and shared_with for uploaders. """ @@ -427,6 +433,7 @@ class EditRepoCalcsResource(Resource): # checking the edit actions and preparing a mongo update on the fly mongo_update = {} uploader_ids = None + lift_embargo = False for action_quantity_name, quantity_actions in actions.items(): quantity = UserMetadata.m_def.all_quantities.get(action_quantity_name) if quantity is None: @@ -437,9 +444,6 @@ class EditRepoCalcsResource(Resource): if not g.user.is_admin(): abort(404, 'Only the admin user can set %s' % quantity.name) - if quantity.name == 'Embargo': - abort(400, 'Cannot raise an embargo, you can only lift the embargo') - if isinstance(quantity_actions, list) == quantity.is_scalar: abort(400, 'Wrong shape for quantity %s' % action_quantity_name) @@ -491,7 +495,10 @@ class EditRepoCalcsResource(Resource): name=action_value) dataset.m_x('me').create() mongo_value = dataset.dataset_id - + elif action_quantity_name == 'with_embargo': + # ignore the actual value ... just lift the embargo + mongo_value = False + lift_embargo = True else: mongo_value = action_value @@ -519,7 +526,13 @@ class EditRepoCalcsResource(Resource): return json_data, 400 # perform the change - edit(parsed_query, logger, mongo_update, True) + upload_ids = edit(parsed_query, logger, mongo_update, True) + + # lift embargo + if lift_embargo: + for upload_id in upload_ids: + upload = proc.Upload.get(upload_id) + upload.re_pack() return json_data, 200 @@ -643,7 +656,6 @@ class RepoQuantitiesResource(Resource): quantities = args.get('quantities', []) size = args.get('size', 5) - print('A ', quantities) try: assert size >= 0 except AssertionError: diff --git a/nomad/cli/admin/uploads.py b/nomad/cli/admin/uploads.py index bb808b447295f618171e7d9f169e54f4fdcb4225..deffdd7a5f2a1f4ad59993d1b832bbe5d1d12c21 100644 --- a/nomad/cli/admin/uploads.py +++ b/nomad/cli/admin/uploads.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from typing import List +from typing import List, Callable import click from tabulate import tabulate from mongoengine import Q @@ -204,11 +204,8 @@ def rm(ctx, uploads, skip_es, skip_mongo, skip_files): upload.delete() -@uploads.command(help='Reprocess selected uploads.') -@click.argument('UPLOADS', nargs=-1) -@click.option('--parallel', default=1, type=int, help='Use the given amount of parallel processes. Default is 1.') -@click.pass_context -def re_process(ctx, uploads, parallel: int): +def __run_processing( + ctx, uploads, parallel: int, process: Callable[[proc.Upload], None], label: str): _, uploads = query_uploads(ctx, uploads) uploads_count = uploads.count() uploads = list(uploads) # copy the whole mongo query set to avoid cursor timeouts @@ -223,29 +220,29 @@ def re_process(ctx, uploads, parallel: int): logger = utils.get_logger(__name__) - print('%d uploads selected, re-processing ...' % uploads_count) + print('%d uploads selected, %s ...' % (uploads_count, label)) - def re_process_upload(upload: proc.Upload): - logger.info('re-processing started', upload_id=upload.upload_id) + def process_upload(upload: proc.Upload): + logger.info('%s started' % label, upload_id=upload.upload_id) completed = False if upload.process_running: logger.warn( - 'cannot trigger re-process, since the upload is already/still processing', + 'cannot trigger %s, since the upload is already/still processing' % label, current_process=upload.current_process, current_task=upload.current_task, upload_id=upload.upload_id) else: upload.reset() - upload.re_process_upload() + process(upload) upload.block_until_complete(interval=.5) if upload.tasks_status == proc.FAILURE: - logger.info('re-processing with failure', upload_id=upload.upload_id) + logger.info('%s with failure' % label, upload_id=upload.upload_id) completed = True - logger.info('re-processing complete', upload_id=upload.upload_id) + logger.info('%s complete' % label, upload_id=upload.upload_id) with cv: state['completed_count'] += 1 if completed else 0 @@ -253,8 +250,8 @@ def re_process(ctx, uploads, parallel: int): state['available_threads_count'] += 1 print( - ' re-processed %s and skipped %s of %s uploads' % - (state['completed_count'], state['skipped_count'], uploads_count)) + ' %s %s and skipped %s of %s uploads' % + (label, state['completed_count'], state['skipped_count'], uploads_count)) cv.notify() @@ -262,7 +259,7 @@ def re_process(ctx, uploads, parallel: int): with cv: cv.wait_for(lambda: state['available_threads_count'] > 0) state['available_threads_count'] -= 1 - thread = threading.Thread(target=lambda: re_process_upload(upload)) + thread = threading.Thread(target=lambda: process_upload(upload)) threads.append(thread) thread.start() @@ -270,6 +267,22 @@ def re_process(ctx, uploads, parallel: int): thread.join() +@uploads.command(help='Reprocess selected uploads.') +@click.argument('UPLOADS', nargs=-1) +@click.option('--parallel', default=1, type=int, help='Use the given amount of parallel processes. Default is 1.') +@click.pass_context +def re_process(ctx, uploads, parallel: int): + __run_processing(ctx, uploads, parallel, lambda upload: upload.re_process_upload(), 're-processing') + + +@uploads.command(help='Repack selected uploads.') +@click.argument('UPLOADS', nargs=-1) +@click.option('--parallel', default=1, type=int, help='Use the given amount of parallel processes. Default is 1.') +@click.pass_context +def re_pack(ctx, uploads, parallel: int): + __run_processing(ctx, uploads, parallel, lambda upload: upload.re_pack(), 're-packing') + + @uploads.command(help='Attempt to abort the processing of uploads.') @click.argument('UPLOADS', nargs=-1) @click.option('--calcs', is_flag=True, help='Only stop calculation processing.') diff --git a/nomad/files.py b/nomad/files.py index 92bdf7cd4a9b8e00bb2749243e02b351a7e8b270..863487ba4f65bb0423750972feb5f605e756bca6 100644 --- a/nomad/files.py +++ b/nomad/files.py @@ -368,7 +368,9 @@ class StagingUploadFiles(UploadFiles): def archive_log_file_object(self, calc_id: str) -> PathObject: return self._archive_dir.join_file('%s.log' % calc_id) - def add_rawfiles(self, path: str, move: bool = False, prefix: str = None, force_archive: bool = False) -> None: + def add_rawfiles( + self, path: str, move: bool = False, prefix: str = None, + force_archive: bool = False, target_dir: DirectoryObject = None) -> None: """ Add rawfiles to the upload. The given file will be copied, moved, or extracted. @@ -378,11 +380,12 @@ class StagingUploadFiles(UploadFiles): prefix: Optional path prefix for the added files. force_archive: Expect the file to be a zip or other support archive file. Usually those files are only extracted if they can be extracted and copied instead. + target_dir: Overwrite the used directory to extract to. Default is the raw directory of this upload. """ assert not self.is_frozen assert os.path.exists(path) self._size += os.stat(path).st_size - target_dir = self._raw_dir + target_dir = self._raw_dir if target_dir is None else target_dir if prefix is not None: target_dir = target_dir.join_dir(prefix, create=True) ext = os.path.splitext(path)[1] @@ -424,7 +427,7 @@ class StagingUploadFiles(UploadFiles): def pack( self, upload: UploadWithMetadata, target_dir: DirectoryObject = None, - skip_raw: bool = False) -> None: + skip_raw: bool = False, skip_archive: bool = False) -> None: """ Replaces the staging upload data with a public upload record by packing all data into files. It is only available if upload *is_bag*. @@ -435,6 +438,7 @@ class StagingUploadFiles(UploadFiles): target_dir: optional DirectoryObject to override where to put the files. Default is the corresponding public upload files directory. skip_raw: determine to not pack the raw data, only archive and user metadata + skip_raw: determine to not pack the archive data, only raw and user metadata """ self.logger.info('started to pack upload') @@ -464,6 +468,17 @@ class StagingUploadFiles(UploadFiles): return zipfile.ZipFile(file.os_path, mode='w') # zip archives + if not skip_archive: + self._pack_archive_files(upload, create_zipfile) + self.logger.info('packed archives') + + # zip raw files + if not skip_raw: + self._pack_raw_files(upload, create_zipfile) + + self.logger.info('packed raw files') + + def _pack_archive_files(self, upload: UploadWithMetadata, create_zipfile): archive_public_zip = create_zipfile('archive', 'public', self._archive_ext) archive_restricted_zip = create_zipfile('archive', 'restricted', self._archive_ext) @@ -488,12 +503,7 @@ class StagingUploadFiles(UploadFiles): archive_restricted_zip.close() archive_public_zip.close() - self.logger.info('packed archives') - - if skip_raw: - return - - # zip raw files + def _pack_raw_files(self, upload: UploadWithMetadata, create_zipfile): raw_public_zip = create_zipfile('raw', 'public', 'plain') raw_restricted_zip = create_zipfile('raw', 'restricted', 'plain') @@ -533,8 +543,6 @@ class StagingUploadFiles(UploadFiles): raw_restricted_zip.close() raw_public_zip.close() - self.logger.info('packed raw files') - def raw_file_manifest(self, path_prefix: str = None) -> Generator[str, None, None]: upload_prefix_len = len(self._raw_dir.os_path) + 1 for root, _, files in os.walk(self._raw_dir.os_path): @@ -664,7 +672,9 @@ class ArchiveBasedStagingUploadFiles(StagingUploadFiles): assert next(self.raw_file_manifest(), None) is None, 'can only extract once' super().add_rawfiles(self.upload_path, force_archive=True) - def add_rawfiles(self, path: str, move: bool = False, prefix: str = None, force_archive: bool = False) -> None: + def add_rawfiles( + self, path: str, move: bool = False, prefix: str = None, + force_archive: bool = False, target_dir: DirectoryObject = None) -> None: assert False, 'do not add_rawfiles to a %s' % self.__class__.__name__ @@ -681,14 +691,19 @@ class PublicUploadFilesBasedStagingUploadFiles(StagingUploadFiles): super().__init__(public_upload_files.upload_id, *args, **kwargs) self.public_upload_files = public_upload_files - def extract(self) -> None: + def extract(self, include_archive: bool = False) -> None: assert next(self.raw_file_manifest(), None) is None, 'can only extract once' for access in ['public', 'restricted']: super().add_rawfiles( self.public_upload_files.get_zip_file('raw', access, 'plain').os_path, force_archive=True) - def add_rawfiles(self, path: str, move: bool = False, prefix: str = None, force_archive: bool = False) -> None: + if include_archive: + super().add_rawfiles( + self.public_upload_files.get_zip_file('archive', access, self._archive_ext).os_path, + force_archive=True, target_dir=self._archive_dir) + + def add_rawfiles(self, *args, **kwargs) -> None: assert False, 'do not add_rawfiles to a %s' % self.__class__.__name__ def pack(self, upload: UploadWithMetadata, *args, **kwargs) -> None: @@ -744,15 +759,20 @@ class PublicUploadFiles(UploadFiles): raise KeyError() - def to_staging_upload_files(self, create: bool = False) -> 'StagingUploadFiles': + def to_staging_upload_files(self, create: bool = False, **kwargs) -> 'StagingUploadFiles': + exists = False try: staging_upload_files = PublicUploadFilesBasedStagingUploadFiles(self) + exists = True except KeyError: if not create: return None staging_upload_files = PublicUploadFilesBasedStagingUploadFiles(self, create=True) - staging_upload_files.extract() + staging_upload_files.extract(**kwargs) + + if exists and create: + raise FileExistsError('Staging upload does already exist') return staging_upload_files @@ -819,13 +839,53 @@ class PublicUploadFiles(UploadFiles): def archive_log_file(self, calc_id: str, *args, **kwargs) -> IO: return self._file('archive', self._archive_ext, '%s.log' % calc_id, *args, **kwargs) - def repack(self) -> None: + def re_pack( + self, upload: UploadWithMetadata, skip_raw: bool = False, + skip_archive: bool = False) -> None: """ Replaces the existing public/restricted data file pairs with new ones, based on current restricted information in the metadata. Should be used after updating the restrictions on calculations. This is potentially a long running operation. """ - raise NotImplementedError() + # compute a list of files to repack + files = [] + kinds = [] + if not skip_archive: + kinds.append(('archive', self._archive_ext)) + if not skip_raw: + kinds.append(('raw', 'plain')) + for kind, ext in kinds: + for prefix in ['public', 'restricted']: + files.append(( + self.join_file('%s-%s.%s.repacked.zip' % (kind, prefix, ext)), + self.join_file('%s-%s.%s.zip' % (kind, prefix, ext)))) + + # check if there already is a running repack + for repacked_file, _ in files: + if repacked_file.exists(): + raise FileExistsError('Repacked files already exist') + + # create staging files + staging_upload = self.to_staging_upload_files(create=True, include_archive=True) + + def create_zipfile(kind: str, prefix: str, ext: str) -> zipfile.ZipFile: + file = self.join_file('%s-%s.%s.repacked.zip' % (kind, prefix, ext)) + return zipfile.ZipFile(file.os_path, mode='w') + + # perform the repacking + try: + if not skip_archive: + staging_upload._pack_archive_files(upload, create_zipfile) + if not skip_raw: + staging_upload._pack_raw_files(upload, create_zipfile) + finally: + staging_upload.delete() + + # replace the original files with the repacked ones + for repacked_file, public_file in files: + shutil.move( + repacked_file.os_path, + public_file.os_path) @contextmanager def zipfile_cache(self): diff --git a/nomad/normalizing/data/.gitignore b/nomad/normalizing/data/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..bdd4acd5650f1df7853b8e848379055292ca5afd --- /dev/null +++ b/nomad/normalizing/data/.gitignore @@ -0,0 +1 @@ +SM_all08.db \ No newline at end of file diff --git a/nomad/parsing/backend.py b/nomad/parsing/backend.py index 1f1da99d678bec77888ecc0c364026bdb2fb4fa0..eea1fd47c0edf2fb3d6aeb8aed73ecda2f87de93 100644 --- a/nomad/parsing/backend.py +++ b/nomad/parsing/backend.py @@ -184,8 +184,8 @@ class AbstractParserBackend(metaclass=ABCMeta): pass @abstractmethod - def get_sections(self, meta_name: str) -> List[int]: - """ Return all gIndices for existing sections of the given meta_name. """ + def get_sections(self, meta_name: str, g_index: int = -1) -> List[int]: + """ Return all gIndices for existing sections of the given meta_name and parent section index. """ pass @abstractmethod @@ -513,9 +513,11 @@ class LocalBackend(LegacyParserBackend, metaclass=DelegatingMeta): return section[meta_name] - def get_sections(self, meta_name): + def get_sections(self, meta_name, g_index=-1): sections = self._delegate.results[meta_name] - return [section.gIndex for section in sections] + return [ + section.gIndex for section in sections + if g_index == -1 or section.parents[0].gIndex == g_index] def _write( self, json_writer: JSONStreamWriter, value: Any, diff --git a/nomad/parsing/metainfo.py b/nomad/parsing/metainfo.py index 0186122e1f6fb44e8fe17f613446c153c5e47e0d..618b1bd8638b4d0100ac4cdfd2a4295e1c5edb1a 100644 --- a/nomad/parsing/metainfo.py +++ b/nomad/parsing/metainfo.py @@ -238,10 +238,12 @@ class MetainfoBackend(LegacyParserBackend): raise NotImplementedError( 'This method does not make sense in the context of the new metainfo.') - def get_sections(self, meta_name: str) -> List[int]: - """ Return all gIndices for existing sections of the given meta_name. """ + def get_sections(self, meta_name: str, g_index: int = -1) -> List[int]: + """ Return all gIndices for existing sections of the given meta_name and parent index. """ section_def = self.env.resolve_definition(meta_name, Section) - return [section.m_parent_index for section in self.resource.all(section_def.section_cls)] + return [ + section.m_parent_index for section in self.resource.all(section_def.section_cls) + if g_index == -1 or section.m_parent.m_parent_index == g_index] def get_value(self, meta_name: str, g_index=-1) -> Any: """ diff --git a/nomad/processing/data.py b/nomad/processing/data.py index 6c5e2b5b1428254347e98bb84dfe8964e445f40b..39f6950d8de7c670550ccde0396534f9e3ffcd64 100644 --- a/nomad/processing/data.py +++ b/nomad/processing/data.py @@ -672,6 +672,21 @@ class Upload(Proc): # the packing and removing of the staging upload files, will be trigged by # the 'cleanup' task after processing all calcs + @process + def re_pack(self): + """ A *process* that repacks the raw and archive data based on the current embargo data. """ + assert self.published + + # mock the steps of actual processing + self._continue_with('uploading') + self._continue_with('extracting') + self._continue_with('parse_all') + self._continue_with('cleanup') + + self.upload_files.re_pack(self.to_upload_with_metadata()) + self.joined = True + self._complete() + @process def process_upload(self): """ A *process* that performs the initial upload processing. """ @@ -874,7 +889,7 @@ class Upload(Proc): logger, 'reprocessed staged upload packed', step='delete staged', upload_size=self.upload_files.size): - staging_upload_files.pack(self.to_upload_with_metadata()) + staging_upload_files.pack(self.to_upload_with_metadata(), skip_raw=True) with utils.timer( logger, 'reprocessed staged upload deleted', step='delete staged', diff --git a/ops/helm/nomad/templates/gui-deployment.yml b/ops/helm/nomad/templates/gui-deployment.yml index 3c18d78dac167e8a718d3b5bdfd79d6a88b6bcd6..270f46c9a9626507be00e3643255b8555be5aafd 100644 --- a/ops/helm/nomad/templates/gui-deployment.yml +++ b/ops/helm/nomad/templates/gui-deployment.yml @@ -65,6 +65,14 @@ data: proxy_pass http://{{ include "nomad.fullname" . }}-app:8000; proxy_connect_timeout {{ .Values.proxy.timeout }}; } + + location {{ .Values.proxy.external.path }}/api/mirror { + proxy_buffering off; + proxy_set_header Host $host; + proxy_pass_request_headers on; + proxy_pass http://{{ include "nomad.fullname" . }}-app:8000; + proxy_connect_timeout {{ .Values.proxy.timeout }}; + } } env.js: | window.nomadEnv = { diff --git a/tests/app/test_api.py b/tests/app/test_api.py index 19d31f892ee489b87ffa1ab94ecca8f4398689fe..856afb3545c81e79a583a97ae11b74369fdf4128 100644 --- a/tests/app/test_api.py +++ b/tests/app/test_api.py @@ -1224,6 +1224,28 @@ class TestEditRepo(): assert rv.status_code != 200 +@pytest.mark.timeout(config.tests.default_timeout) +def test_edit_lift_embargo(api, published, other_test_user_auth): + example_calc = Calc.objects(upload_id=published.upload_id).first() + assert example_calc.metadata['with_embargo'] + rv = api.post( + '/repo/edit', headers=other_test_user_auth, content_type='application/json', + data=json.dumps({ + 'actions': { + 'with_embargo': { + 'value': 'lift' + } + } + })) + assert rv.status_code == 200 + assert not Calc.objects(calc_id=example_calc.calc_id).first().metadata['with_embargo'] + + Upload.get(published.upload_id).block_until_complete() + # should not raise Restricted anymore + with files.UploadFiles.get(published.upload_id).archive_file(example_calc.calc_id) as f: + f.read() + + class TestRaw(UploadFilesBasedTests): def assert_zip_file(self, rv, files: int = -1, basename: bool = None): diff --git a/tests/data/proc/examples_template/1.aux b/tests/data/proc/examples_template/1.aux index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..d95f3ad14dee633a758d2e331151e950dd13e4ed 100644 --- a/tests/data/proc/examples_template/1.aux +++ b/tests/data/proc/examples_template/1.aux @@ -0,0 +1 @@ +content diff --git a/tests/data/proc/examples_template/2.aux b/tests/data/proc/examples_template/2.aux index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..d95f3ad14dee633a758d2e331151e950dd13e4ed 100644 --- a/tests/data/proc/examples_template/2.aux +++ b/tests/data/proc/examples_template/2.aux @@ -0,0 +1 @@ +content diff --git a/tests/data/proc/examples_template/3.aux b/tests/data/proc/examples_template/3.aux index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..d95f3ad14dee633a758d2e331151e950dd13e4ed 100644 --- a/tests/data/proc/examples_template/3.aux +++ b/tests/data/proc/examples_template/3.aux @@ -0,0 +1 @@ +content diff --git a/tests/data/proc/examples_template/4.aux b/tests/data/proc/examples_template/4.aux index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..d95f3ad14dee633a758d2e331151e950dd13e4ed 100644 --- a/tests/data/proc/examples_template/4.aux +++ b/tests/data/proc/examples_template/4.aux @@ -0,0 +1 @@ +content diff --git a/tests/processing/test_data.py b/tests/processing/test_data.py index aa63125748af9740bb2e1baffef1a3a2ce4d985b..aa9dc679ac28c2eb38dae44e3d18eb8cb74ea501 100644 --- a/tests/processing/test_data.py +++ b/tests/processing/test_data.py @@ -282,6 +282,30 @@ def test_re_processing(published: Upload, example_user_metadata, monkeypatch, wi assert first_calc.metadata['atoms'][0] == 'Br' +@pytest.mark.timeout(config.tests.default_timeout) +@pytest.mark.parametrize('with_failure', [None, 'before', 'after']) +def test_re_pack(published: Upload, example_user_metadata, monkeypatch, with_failure): + upload_id = published.upload_id + calc = Calc.objects(upload_id=upload_id).first() + assert calc.metadata['with_embargo'] + calc.metadata['with_embargo'] = False + calc.save() + + published.re_pack() + try: + published.block_until_complete(interval=.01) + except Exception: + pass + + upload_files = PublicUploadFiles(upload_id) + for raw_file in upload_files.raw_file_manifest(): + with upload_files.raw_file(raw_file) as f: + f.read() + for calc in Calc.objects(upload_id=upload_id): + with upload_files.archive_file(calc.calc_id) as f: + f.read() + + def mock_failure(cls, task, monkeypatch): def mock(self): raise Exception('fail for test') diff --git a/tests/test_cli.py b/tests/test_cli.py index b4df409d183f0d088ec4609f24bdfc5560d04e9b..e3c69a621185c64832997dce1a815abfec184da3 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -18,7 +18,7 @@ import click.testing import json import mongoengine -from nomad import utils, search, processing as proc +from nomad import utils, search, processing as proc, files from nomad.cli import cli from nomad.processing import Upload, Calc @@ -147,6 +147,27 @@ class TestAdminUploads: calc.reload() assert calc.metadata['nomad_version'] == 'test_version' + def test_re_pack(self, published, monkeypatch): + upload_id = published.upload_id + calc = Calc.objects(upload_id=upload_id).first() + assert calc.metadata['with_embargo'] + calc.metadata['with_embargo'] = False + calc.save() + + result = click.testing.CliRunner().invoke( + cli, ['admin', 'uploads', 're-pack', '--parallel', '2', upload_id], catch_exceptions=False, obj=utils.POPO()) + + assert result.exit_code == 0 + assert 're-pack' in result.stdout + calc.reload() + upload_files = files.PublicUploadFiles(upload_id) + for raw_file in upload_files.raw_file_manifest(): + with upload_files.raw_file(raw_file) as f: + f.read() + for calc in Calc.objects(upload_id=upload_id): + with upload_files.archive_file(calc.calc_id) as f: + f.read() + @pytest.mark.usefixtures('reset_config') class TestClient: diff --git a/tests/test_files.py b/tests/test_files.py index 4e898b13ad2afe513bd9180849edaf48d9818e59..42e3c04ad36de91b66fa1b4d9a479bb96f942b39 100644 --- a/tests/test_files.py +++ b/tests/test_files.py @@ -28,6 +28,8 @@ from nomad.files import DirectoryObject, PathObject from nomad.files import StagingUploadFiles, PublicUploadFiles, UploadFiles, Restricted, \ ArchiveBasedStagingUploadFiles +from tests.utils import assert_exception + CalcWithFiles = Tuple[CalcWithMetadata, str] UploadWithFiles = Tuple[UploadWithMetadata, UploadFiles] @@ -138,6 +140,7 @@ def generate_example_calc( example_calc.update(**kwargs) example_file = os.path.join(config.fs.tmp, 'example.zip') + example_calc.files = [] with zipfile.ZipFile(example_file, 'w', zipfile.ZIP_DEFLATED) as zf: for filepath in example_file_contents: filename = os.path.basename(filepath) @@ -147,6 +150,7 @@ def generate_example_calc( if subdirectory is not None: arcname = os.path.join(subdirectory, arcname) + example_calc.files.append(arcname) zf.write(os.path.join(example_directory, filename), arcname) return example_calc, example_file @@ -207,10 +211,11 @@ class UploadFilesContract(UploadFilesFixtures): upload, upload_files = test_upload for calc in upload.calcs: try: - with upload_files.raw_file(calc.mainfile) as f: - assert len(f.read()) > 0 - if not upload_files._is_authorized(): - assert not calc.with_embargo + for file_path in calc.files: + with upload_files.raw_file(file_path) as f: + assert len(f.read()) > 0 + if not upload_files._is_authorized(): + assert not calc.with_embargo except Restricted: assert not upload_files._is_authorized() assert calc.with_embargo @@ -231,7 +236,7 @@ class UploadFilesContract(UploadFilesFixtures): assert '1.aux' in list(path for path, _ in raw_files) for file, size in raw_files: if file.endswith('.aux'): - assert size == 0 + assert size == 8 else: assert size > 0 assert_example_files([os.path.join(prefix, path) for path, _ in raw_files]) @@ -422,6 +427,16 @@ class TestPublicUploadFiles(UploadFilesContract): assert upload_files.to_staging_upload_files() is None + def test_repack(self, test_upload): + upload, upload_files = test_upload + for calc in upload.calcs: + calc.with_embargo = False + upload_files.re_pack(upload) + assert_upload_files(upload, PublicUploadFiles, with_embargo=False) + assert len(os.listdir(upload_files.os_path)) == 4 + with assert_exception(KeyError): + StagingUploadFiles(upload_files.upload_id) + def assert_upload_files( upload: UploadWithMetadata, cls, no_archive: bool = False, **kwargs): diff --git a/tests/test_parsing.py b/tests/test_parsing.py index e5effa71a833c51b9a3b33dc2a0d8f98c1436bf8..081f898a01d34c92afb45a8cbfdea2d631af6f81 100644 --- a/tests/test_parsing.py +++ b/tests/test_parsing.py @@ -108,6 +108,28 @@ class TestLocalBackend(object): for i in range(0, 3): assert backend.get_value('program_name', i) == 't%d' % i + def test_sub_section(self, backend, no_warn): + backend.openSection('section_run') + + backend.openNonOverlappingSection('section_system') + assert backend.openSection('section_symmetry') == 0 + backend.closeSection('section_symmetry', 0) + backend.closeNonOverlappingSection('section_system') + + backend.openNonOverlappingSection('section_system') + backend.closeNonOverlappingSection('section_system') + + backend.openNonOverlappingSection('section_system') + assert backend.openSection('section_symmetry') == 1 + backend.closeSection('section_symmetry', 1) + backend.closeNonOverlappingSection('section_system') + + assert backend.get_sections('section_system') == [0, 1, 2] + assert backend.get_sections('section_symmetry') == [0, 1] + assert backend.get_sections('section_symmetry', 0) == [0] + assert backend.get_sections('section_symmetry', 1) == [] + assert backend.get_sections('section_symmetry', 2) == [1] + def test_section_override(self, backend, no_warn): """ Test whether we can overwrite values already in the backend.""" expected_value = ['Cl', 'Zn']