diff --git a/.gitignore b/.gitignore index e7f55c1f12c3508138af36f0478c9fc4393048c0..1b0c8e5da37ea85af82a2627925000ef74bbe228 100644 --- a/.gitignore +++ b/.gitignore @@ -23,4 +23,4 @@ target/ vscode/ nomad.yaml gunicorn.log.conf -gunicorn.conf \ No newline at end of file +gunicorn.conf diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 097e178ac5d00ce37c0f20c843f39b16921abc29..189dc975f6277e964370e78f416d044d55b7f0db 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -91,7 +91,7 @@ tests: NOMAD_ELASTIC_HOST: elastic NOMAD_MONGO_HOST: mongo NOMAD_KEYCLOAK_PASSWORD: ${CI_KEYCLOAK_ADMIN_PASSWORD} - NOMAD_SPRINGER_DB_PATH: /nomad/fairdi/db/data/springer.db + NOMAD_NORMALIZE_SPRINGER_DB_PATH: /nomad/fairdi/db/data/springer.msg script: - cd /app - ls /builds diff --git a/docs/cli.rst b/docs/cli.rst new file mode 100644 index 0000000000000000000000000000000000000000..2872bcc6304505ab0edaf36d0cdc4893967bde1b --- /dev/null +++ b/docs/cli.rst @@ -0,0 +1,16 @@ +Command Line Interface (CLI) +---------------------------- +The :code:`nomad` python package comes with a command line interface (CLI) that +can be accessed after installation by simply running the :code:`nomad` command +in your terminal. The CLI provides a hiearchy of commands by using the `click +package <https://click.palletsprojects.com/>`_. + +This documentation describes how the CLI can be used to manage a NOMAD +installation. For commmon use cases see :ref:`cli_use_cases`. For a full +reference of the CLI commands see :ref:`cli_ref`. + +.. toctree:: + :maxdepth: 2 + + cli_use_cases.rst + cli_ref.rst diff --git a/docs/cli_ref.rst b/docs/cli_ref.rst new file mode 100644 index 0000000000000000000000000000000000000000..fea6229e0bd7c287837ceaa87299d336ee245042 --- /dev/null +++ b/docs/cli_ref.rst @@ -0,0 +1,17 @@ +.. _cli_ref: + +CLI Reference +************* + +Client CLI commands +"""""""""""""""""""""""""""""""""""""""" +.. click:: nomad.cli.client.client:client + :prog: nomad client + :show-nested: + +Admin CLI commands +"""""""""""""""""""""""""""""""""""""""" +.. click:: nomad.cli.admin.admin:admin + :prog: nomad admin + :show-nested: + diff --git a/docs/cli_use_cases.rst b/docs/cli_use_cases.rst new file mode 100644 index 0000000000000000000000000000000000000000..01595c37628ae4418a33b69826baa1a1e3ea5993 --- /dev/null +++ b/docs/cli_use_cases.rst @@ -0,0 +1,71 @@ +.. _cli_use_cases: + +Use cases +********* + +Mirroring data between production environments +"""""""""""""""""""""""""""""""""""""""""""""" +Sometimes you would wish to transfer data between separate deployments of the +NOMAD infrastructure. This use case covers the situation when the deployments +are up and running and both have access to the underlying file storage, part of +which is mounted inside each container under :code:`.volumes/fs`. + +With both the source and target deployment running, you can use the +:code::ref:`cli_ref:mirror` command to transfer the data from source to target. The +mirror will copy everything: i.e. the raw data, archive data and associated +metadata in the database. + +The data to be mirrored is specified by using a query API path. For example to +mirror the upload from source deployment to target deployment, you would use +the following CLI command inside the target deployment: + +.. code-block:: sh + + nomad client -n <api_url> -u <username> -w <password> mirror <query_json> --source-mapping <target_docker_path>:<shared_path> + +Here is a breakdown of the different arguments: + + * :code:`-n <url>`: Url to the API endpoint in the source deployment. This API will + be queried to fetch the data to be mirrored. E.g. + http://repository.nomad-coe.eu/api + * :code:`-u <username>`: Your username that is used for authentication in the API call. + * :code:`-w <password>`: Your password that is used for authentication in the API call. + * :code:`mirror <query>`: Your query as a JSON dictionary. See the documentation for + available keywords. E.g. "{"upload_id: "<upload_id>"}" + * :code:`--source-mapping <mapping>`: The deployments use a separate folder to store + the archive and raw data. To correctly find the data that should be + mirrored, the absolute path on the filesystem that is shared between the + deployments needs to be provided. E.g. *.volumes/fs:/nomad/fairdi/prod/fs*. + The first part of this mapping indicates a docker volume path + (*.volumes/fs* in this example) that should be mapped to the second + filepath on the shared filesystem (*/nomad/fairdi/prod/fs* in this example). + +Updating the AFLOW prototype information +"""""""""""""""""""""""""""""""""""""""" +NOMAD uses the `AFLOW prototype library +<http://www.aflowlib.org/CrystalDatabase/>`_ to link bulk crystal entries with +prototypical structures based on their symmetry. The +:ref:`cli_ref:prototypes-update` subcommand can be used to update this +database from the online information provided by AFLOW. The command produces a +prototype dataset as a python module. + +The dataset should be recreated if the AFLOW dataset has been updated or if the +symmetry matching routine used within NOMAD is updated (e.g. the symmetry +tolerance is modified). To produce a new dataset run the following command: + +.. code-block:: sh + + nomad admin ops prototypes-update <module_path> + +Here is a breakdown of the different arguments: + + * :code:`<module_name>`: Name of the python module in which the data should + be stored. If the file does not exist it will be created. The prototype + data used by NOMAD is under the path: + *nomad/normalizing/data/aflow_prototypes.py* + +The command also provides a :code:`--matches-only` flag for only updating the +dataset entry that is used for matching the prototypes. This means that the +online information from AFLOW is not queried. This makes the process faster +e.g. in the case when you want only to update the matches after modifying the +symmetry routines. diff --git a/docs/conf.py b/docs/conf.py index 27301cfe5d51ceb7f65fc786ac7d887f17234055..6a7bcea5e8c4333a37bec3198c37c98e802f787b 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -45,6 +45,8 @@ extensions = [ 'sphinx.ext.coverage', 'sphinx.ext.ifconfig', 'sphinx.ext.napoleon', + 'sphinx.ext.autosectionlabel', + 'sphinx_click.ext', 'sphinxcontrib.httpdomain', 'sphinxcontrib.autohttp.flask', 'sphinxcontrib.autohttp.flaskqref', @@ -52,6 +54,9 @@ extensions = [ 'm2r' ] +# Prefix the automatically generated labels with the document name +autosectionlabel_prefix_document = True + # Add any paths that contain templates here, relative to this directory. templates_path = ['.templates'] diff --git a/docs/depl_docker.rst b/docs/depl_docker.rst new file mode 100644 index 0000000000000000000000000000000000000000..8595e4871e1c437ab09bbde767a0ad97dcf1169d --- /dev/null +++ b/docs/depl_docker.rst @@ -0,0 +1 @@ +.. mdinclude:: ../ops/docker-compose/nomad/README.md diff --git a/docs/depl_helm.rst b/docs/depl_helm.rst new file mode 100644 index 0000000000000000000000000000000000000000..ba31c0baf9a0944f3db0eaa406e3db75fae061a2 --- /dev/null +++ b/docs/depl_helm.rst @@ -0,0 +1 @@ +.. mdinclude:: ../ops/helm/nomad/README.md diff --git a/docs/depl_images.rst b/docs/depl_images.rst new file mode 100644 index 0000000000000000000000000000000000000000..a9375f37843c06b60b1557c854478d74328e18c5 --- /dev/null +++ b/docs/depl_images.rst @@ -0,0 +1 @@ +.. mdinclude:: ../ops/containers/README.md diff --git a/docs/oasis.rst b/docs/oasis.rst new file mode 100644 index 0000000000000000000000000000000000000000..3e31876359d638c475a58973dbfbd27a3c7dd32b --- /dev/null +++ b/docs/oasis.rst @@ -0,0 +1 @@ +.. mdinclude:: ../ops/docker-compose/nomad-oasis/README.md diff --git a/docs/ops.rst b/docs/ops.rst index ccf961e90f801e0ed2f853072efe816d6f366ed5..d64e2c4b061f249ef218b9b17e58625101467888 100644 --- a/docs/ops.rst +++ b/docs/ops.rst @@ -1,9 +1,13 @@ Operating NOMAD -=============== +############### .. mdinclude:: ../ops/README.md -.. mdinclude:: ../ops/docker-compose/nomad/README.md -.. mdinclude:: ../ops/helm/nomad/README.md -.. mdinclude:: ../ops/containers/README.md -.. mdinclude:: ../ops/docker-compose/nomad-oasis/README.md +.. toctree:: + :maxdepth: 2 + + depl_docker + depl_helm + depl_images + cli + oasis diff --git a/gui/src/components/dft/DFTSearchAggregations.js b/gui/src/components/dft/DFTSearchAggregations.js index f2d0dbee1e6f10020bc7ba500a7703e0130768f8..a281d29324c7206def154886153f43cad1573188 100644 --- a/gui/src/components/dft/DFTSearchAggregations.js +++ b/gui/src/components/dft/DFTSearchAggregations.js @@ -36,13 +36,15 @@ class DFTSearchAggregations extends React.Component { <Grid item xs={4}> <Quantity quantity="dft.code_name" title="Code" scale={0.25} metric={usedMetric} /> </Grid> - <Grid item xs={4}> - <Quantity quantity="dft.system" title="System type" scale={0.25} metric={usedMetric} /> - <Quantity quantity="dft.crystal_system" title="Crystal system" scale={1} metric={usedMetric} /> - </Grid> <Grid item xs={4}> <Quantity quantity="dft.basis_set" title="Basis set" scale={0.25} metric={usedMetric} /> <Quantity quantity="dft.xc_functional" title="XC functionals" scale={0.5} metric={usedMetric} /> + <Quantity quantity="dft.compound_type" title="Compound type" scale={1} metric={usedMetric} /> + </Grid> + <Grid item xs={4}> + <Quantity quantity="dft.system" title="System type" scale={0.25} metric={usedMetric} /> + <Quantity quantity="dft.crystal_system" title="Crystal system" scale={1} metric={usedMetric} /> + <Quantity quantity="dft.labels_springer_compound_class" title="Springer compound class" scale={1} metric={usedMetric} /> </Grid> </Grid> ) diff --git a/gui/src/components/dft/DFTSearchByPropertyAggregations.js b/gui/src/components/dft/DFTSearchByPropertyAggregations.js new file mode 100644 index 0000000000000000000000000000000000000000..4ebc08fa3209543329d7255e5bc674fe80a68c66 --- /dev/null +++ b/gui/src/components/dft/DFTSearchByPropertyAggregations.js @@ -0,0 +1,53 @@ +import React from 'react' +import PropTypes from 'prop-types' +import { Grid } from '@material-ui/core' +import { Quantity } from '../search/QuantityHistogram' +import SearchContext from '../search/SearchContext' +import { withApi } from '../api' + +class DFTSearchByPropertyAggregations extends React.Component { + static propTypes = { + info: PropTypes.object + } + + static contextType = SearchContext.type + + render() { + const {info} = this.props + const {state: {response: {statistics}, usedMetric}} = this.context + + if (statistics.code_name && info) { + // filter based on known codes, since elastic search might return 0 aggregations on + // obsolete code names + const filteredCodeNames = {} + const defaultValue = { + code_runs: 0 + } + defaultValue[usedMetric] = 0 + info.codes.forEach(key => { + filteredCodeNames[key] = statistics.code_name[key] || defaultValue + }) + statistics.code_name = filteredCodeNames + } + + return ( + <Grid container spacing={24}> + <Grid item xs={4}> + <Quantity quantity="dft.quantities_energy" title="Energy" scale={1} metric={usedMetric} /> + <Quantity quantity="dft.quantities_forces" title="Forces" scale={1} metric={usedMetric} /> + <Quantity quantity="dft.quantities_electronic" title="Electronic" scale={1} metric={usedMetric} /> + </Grid> + <Grid item xs={4}> + <Quantity quantity="dft.quantities_magnetic" title="Magnetic" scale={1} metric={usedMetric} /> + <Quantity quantity="dft.quantities_vibrational" title="Vibrational" scale={1} metric={usedMetric} /> + <Quantity quantity="dft.quantities_optical" title="Optical" scale={1} metric={usedMetric} /> + </Grid> + <Grid item xs={4}> + <Quantity quantity="dft.labels_springer_classification" title="Springer classification" scale={1} metric={usedMetric} /> + </Grid> + </Grid> + ) + } +} + +export default withApi(false, false)(DFTSearchByPropertyAggregations) diff --git a/gui/src/components/domains.js b/gui/src/components/domains.js index 5c3e6c631d2b94d2aa14e30fb33003f0cea0fdf4..8520a02891b92470f38662a2e51c0ff13cdcbaf3 100644 --- a/gui/src/components/domains.js +++ b/gui/src/components/domains.js @@ -5,6 +5,7 @@ import DFTEntryCards from './dft/DFTEntryCards' import EMSSearchAggregations from './ems/EMSSearchAggregations' import EMSEntryOverview from './ems/EMSEntryOverview' import EMSEntryCards from './ems/EMSEntryCards' +import DFTSearchByPropertyAggregations from './dft/DFTSearchByPropertyAggregations' export const domains = ({ dft: { @@ -23,6 +24,10 @@ export const domains = ({ * onChange (callback to propagate searchValue changes). */ SearchAggregations: DFTSearchAggregations, + /** + * A component that is used to render the search aggregations by property. + */ + SearchByPropertyAggregations: DFTSearchByPropertyAggregations, /** * Metrics are used to show values for aggregations. Each metric has a key (used * for API calls), a label (used in the select form), and result string (to show diff --git a/gui/src/components/search/QuantityHistogram.js b/gui/src/components/search/QuantityHistogram.js index 4b1b433597b6d2a464d0ec00eb6ecff778df4a7e..b6e430da015273efb0a0dcc25a110c4330245f14 100644 --- a/gui/src/components/search/QuantityHistogram.js +++ b/gui/src/components/search/QuantityHistogram.js @@ -9,6 +9,39 @@ import SearchContext from '../search/SearchContext' const unprocessed_label = 'not processed' const unavailable_label = 'unavailable' +const _mapping = { + 'energy_total': 'Total energy', + 'energy_total_T0': 'Total energy (0K)', + 'energy_free': 'Free energy', + 'energy_electrostatic': 'Electrostatic', + 'energy_X': 'Exchange', + 'energy_XC': 'Exchange-correlation', + 'energy_sum_eigenvalues': 'Band energy', + 'dos_values': 'DOS', + 'eigenvalues_values': 'Eigenvalues', + 'volumetric_data_values': 'Volumetric data', + 'electronic_kinetic_energy': 'Kinetic energy', + 'total_charge': 'Charge', + 'atom_forces_free': 'Free atomic forces', + 'atom_forces_raw': 'Raw atomic forces', + 'atom_forces_T0': 'Atomic forces (0K)', + 'atom_forces': 'Atomic forces', + 'stress_tensor': 'Stress tensor', + 'thermodynamical_property_heat_capacity_C_v': 'Heat capacity', + 'vibrational_free_energy_at_constant_volume': 'Free energy (const=V)', + 'band_energies': 'Band energies', + 'spin_S2': 'Spin momentum operator', + 'excitation_energies': 'Excitation energies', + 'oscillator_strengths': 'Oscillator strengths', + 'transition_dipole_moments': 'Transition dipole moments'} + +function map_key (name) { + if (name in _mapping) { + return _mapping[name] + } + return name +} + class QuantityHistogramUnstyled extends React.Component { static propTypes = { classes: PropTypes.object.isRequired, @@ -69,7 +102,7 @@ class QuantityHistogramUnstyled extends React.Component { const data = Object.keys(this.props.data) .map(key => ({ - name: key, + name: map_key(key), value: this.props.data[key][this.props.metric] })) diff --git a/gui/src/components/search/Search.js b/gui/src/components/search/Search.js index 00684af68f7c204cbaf985dcecc70a1ef0a6ef6d..287238d92bc025582903b39df0d66b42aee28884 100644 --- a/gui/src/components/search/Search.js +++ b/gui/src/components/search/Search.js @@ -16,6 +16,7 @@ import UploadList from './UploadsList' import GroupList from './GroupList' import ApiDialogButton from '../ApiDialogButton' import SearchIcon from '@material-ui/icons/Search' +import UploadsChart from './UploadsChart' class Search extends React.Component { static tabs = { @@ -95,6 +96,16 @@ class Search extends React.Component { render: props => <DomainVisualization {...props}/>, label: 'Meta data', description: 'Shows histograms on key metadata' + }, + 'property': { + render: props => <PropertyVisualization {...props}/>, + label: 'Properties', + description: 'Shows histograms on key properties' + }, + 'users': { + render: props => <UsersVisualization {...props}/>, + label: 'Users', + description: 'Show statistics on user metadata' } } @@ -215,6 +226,44 @@ class DomainVisualization extends React.Component { } } +class PropertyVisualization extends React.Component { + static propTypes = { + open: PropTypes.bool + } + + static contextType = SearchContext.type + + render() { + const {domain} = this.context.state + const {open} = this.props + + return <KeepState visible={open} render={() => + <domain.SearchByPropertyAggregations /> + }/> + } +} + +class UsersVisualization extends React.Component { + static propTypes = { + open: PropTypes.bool + } + + static contextType = SearchContext.type + + render () { + const {domain} = this.context.state + const {open} = this.props + + return <KeepState visible={open} render={() => + <Card> + <CardContent> + <UploadsChart metricsDefinitions={domain.searchMetrics}/> + </CardContent> + </Card> + }/> + } +} + class ElementsVisualization extends React.Component { static propTypes = { open: PropTypes.bool diff --git a/gui/src/components/search/SearchContext.js b/gui/src/components/search/SearchContext.js index d189ae718d7a7d2e2fb38f79840ab341c072f84c..3ef869c8ef521c237ded865add7a2a3b012efc07 100644 --- a/gui/src/components/search/SearchContext.js +++ b/gui/src/components/search/SearchContext.js @@ -50,7 +50,8 @@ class SearchContext extends React.Component { order_by: 'upload_time', order: -1, page: 1, - per_page: 10 + per_page: 10, + date_histogram: true }, metric: this.defaultMetric, usedMetric: this.defaultMetric, diff --git a/gui/src/components/search/UploadsChart.js b/gui/src/components/search/UploadsChart.js new file mode 100644 index 0000000000000000000000000000000000000000..ca8df54c47d7dd0b0233e81925a79275215de8f3 --- /dev/null +++ b/gui/src/components/search/UploadsChart.js @@ -0,0 +1,434 @@ +import React from 'react' +import PropTypes from 'prop-types' +import { withStyles, Select, MenuItem } from '@material-ui/core' +import Grid from '@material-ui/core/Grid' +import TextField from '@material-ui/core/TextField' +import * as d3 from 'd3' +import { scaleBand, scalePow } from 'd3-scale' +import { nomadSecondaryColor } from '../../config.js' +import SearchContext from './SearchContext' +import { compose } from 'recompose' +import { withApi } from '../api' +import { Quantity } from './QuantityHistogram' + +class UploadsHistogramUnstyled extends React.Component { + static propTypes = { + classes: PropTypes.object.isRequired, + height: PropTypes.number.isRequired, + data: PropTypes.object, + interval: PropTypes.string, + metric: PropTypes.string.isRequired, + metricsDefinitions: PropTypes.object.isRequired, + onChanged: PropTypes.func.isRequired, + defaultScale: PropTypes.number + } + + static styles = theme => ({ + root: {}, + content: { + paddingTop: 10 + } + }) + + constructor(props) { + super(props) + this.state = { + scalePower: this.props.defaultScale || 1.0, + interval: this.props.interval || '1M', + time: null, + from_time: 0, + until_time: 0 + } + + this.container = React.createRef() + this.svgEl = React.createRef() + } + + startDate = '2013-01-01' + + scales = [ + { + label: 'Linear', + value: 1.0 + }, + { + label: '1/2', + value: 0.5 + }, + { + label: '1/4', + value: 0.25 + }, + { + label: '1/8', + value: 0.25 + } + ] + + intervals = [ + { + label: 'Yearly', + value: '1y', + number: 31536000000 + }, + { + label: 'Monthly', + value: '1M', + number: 2678400000 + }, + { + label: 'Daily', + value: '1d', + number: 86400000 + }, + { + label: 'Hourly', + value: '1h', + number: 3600000 + }, + { + label: 'Minute', + value: '1m', + number: 60000 + }, + { + label: 'Second', + value: '1s', + number: 1000 + } + ] + + timeInterval = Object.assign({}, ...this.intervals.map(e => ( {[e.value]: e.number}))) + + componentDidMount() { + const from_time = new Date(this.startDate).getTime() + const until_time = new Date().getTime() + this.handleTimeChange(from_time, 'from_time', 'all') + this.handleTimeChange(until_time, 'until_time', 'all') + } + + componentDidUpdate() { + this.updateChart() + } + + handleQueryChange() { + const interval = this.state.interval + const from_time = new Date(this.state.from_time) + const until_time = new Date(this.state.until_time) + this.props.onChanged(from_time.toISOString(), until_time.toISOString(), interval) + } + + handleIntervalChange(newInterval) { + // TODO: add a refresh button so directly updating interval is not necessary + this.state.interval = newInterval + //this.setState({interval: newInterval}) + this.handleQueryChange() + } + + handleTimeChange(newTime, key, target) { + let date + if (!newTime) { + date = key === 'from_time' ? new Date(this.startDate) : new Date() + } else { + date = new Date(newTime) + } + if (target === 'state' || target === 'all') { + key === 'from_time' ? this.setState({from_time: date.getTime()}) : this.setState({until_time: date.getTime()}) + } + if (target === 'picker' || target === 'all') { + document.getElementById(key).value = date.toISOString().substring(0,10) + } + } + + handleItemClicked(item) { + const selected = item.time + if (selected === this.state.time) { + this.props.onChanged(null, null, null) + } else { + const deltaT = this.timeInterval[this.state.interval] + this.handleTimeChange(selected, 'from_time', 'all') + this.handleTimeChange(selected + deltaT, 'until_time', 'all') + this.handleQueryChange() + } + } + + resolveDate (name) { + const date = new Date(parseInt(name, 10)) + const year = date.toLocaleDateString(undefined, {year: 'numeric'}) + const month = date.toLocaleDateString(undefined, {month: 'short'}) + const day = date.toLocaleDateString(undefined, {day: 'numeric'}) + const hour = date.toLocaleTimeString(undefined, {hour: 'numeric'}) + const min = date.toLocaleTimeString(undefined, {minute: 'numeric'}) + const sec= date.toLocaleTimeString(undefined, {second: 'numeric'}) + + const intervals = { + '1y': year, + '1M': month, + '1d': day, + '1h': hour, + '1m': min, + '1s': sec + } + + return intervals[this.state.interval] + } + + hover (svg, bar) { + const textOffset = 25 + + const tooltip = svg.append('g') + .attr('class', 'tooltip') + .style('display', 'none') + + const hoverBox = tooltip.append('rect') + .attr('width', 10) + .attr('height', 20) + .attr('fill', 'white') + .style('opacity', 0.0) + + const text = tooltip.append('text') + .attr('x', textOffset) + .attr('dy', '1.2em') + .style('text-anchor', 'start') + .attr('font-size', '12px') + //.attr('font-weight', 'bold') + + bar + .on('mouseover', () => { + tooltip.style('display', null) + let { width } = text.node().getBBox() + hoverBox.attr('width', `${ width + textOffset }px`) + }) + .on('mouseout', () => tooltip.style('display', 'none')) + .on('mousemove', function(d) { + let xPosition = d3.mouse(this)[0] - 15 + let yPosition = d3.mouse(this)[1] - 25 + + tooltip.attr('transform', `translate( ${ xPosition }, ${ yPosition })`) + tooltip.attr('data-html', 'true') + tooltip.select('text').text( new Date(d.time).toISOString() + ': ' + d.value ) + }) + } + + updateChart () { + let data = [] + if (! this.props.data) { + return + } else { + data = Object.keys(this.props.data).map(key => ({ + time: parseInt(key, 10), + name: this.resolveDate(key), + value: this.props.data[key][this.props.metric] + }))} + + + data.sort((a, b) => d3.ascending(a.time, b.time)) + if (data.length > 0) { + this.handleTimeChange(this.state.from_time, 'from_time', 'picker') + this.handleTimeChange(this.state.until_time, 'until_time', 'picker') + } + + const scalePower = this.state.scalePower + const width = this.container.current.offsetWidth + const height = this.props.height + const margin = Math.round(0.1*height) + + const x = scaleBand().rangeRound([margin, width]).padding(0.1) + const y = scalePow().range([height-margin, margin]).exponent(scalePower) + + const max = d3.max(data, d => d.value) || 0 + x.domain(data.map(d => d.name)) + y.domain([0, max]) + + let svg = d3.select(this.svgEl.current) + svg.attr('width', width) + svg.attr('height', height) + + const xAxis = d3.axisBottom(x) + svg.select('.xaxis').remove() + svg.append('g') + .attr('transform', `translate(0,${height-margin})`) + .attr('class', 'xaxis') + .call(xAxis) + + svg.select('.xlabel').remove() + svg.append('text') + .attr('class', 'xlabel') + .attr("x", width) + .attr("y", height-4) + .attr('dy', ".35em") + .attr('font-size', '12px') + .style('text-anchor', 'end') + + const yAxis = d3.axisLeft(y) + svg.select('.yaxis').remove() + svg.append('g') + .attr('transform', `translate(${margin}, 0)`) + .attr('class', 'yaxis') + .call(yAxis) + + const {label, shortLabel} = this.props.metricsDefinitions[this.props.metric] + svg.select('.ylabel').remove() + svg.append('text') + .attr('class','ylabel') + .attr('x', 0) + .attr('y', 0) + .attr('dy', "1em") + .attr('text-anchor', 'start') + .attr('font-size', '12px') + .text(`${shortLabel ? shortLabel : label}`) + + let withData = svg + .selectAll('.bar').remove().exit() + .data(data) + + let item = withData.enter() + .append('g') + + item + .append('rect') + .attr('class', 'bar') + .attr('x', d => x(d.name)) + .attr('y', d => y(d.value)) + .attr('width', x.bandwidth()) + .attr('height', d => y(0) - y(d.value)) + .style('fill', nomadSecondaryColor.light) + + item + .style('cursor', 'pointer') + .on('click', d => this.handleItemClicked(d)) + + svg.select('.tooltip').remove() + svg.call(this.hover, item) +} + + render () { + return ( + <div> + <Grid container justify='space-around' spacing={24}> + <Grid item xs={2}> + <Select + margin='none' + id='scales' + value={this.state.scalePower} + onChange={(event) => this.setState({scalePower: event.target.value})} + label= 'scale' + > {this.scales.map(item => ( + <MenuItem + value={item.value} + key={item.label}> {item.label} + </MenuItem>))} + </Select> + </Grid> + <Grid item xs={3}> + <TextField + id='from_time' + label="from time" + type="date" + onChange={(event) => this.handleTimeChange(event.target.value, 'from_time', 'state')} + InputLabelProps={{ + shrink: true, + }} + /> + </Grid> + <Grid item xs={3}> + <Select + id='interval' + value={this.state.interval} + onChange={(event) => this.handleIntervalChange(event.target.value)} + label= 'interval' + > {this.intervals.map(item => ( + <MenuItem value={item.value} key={item.value}> + {item.label} + </MenuItem>))} + </Select> + </Grid> + <Grid item xs={3}> + <TextField + id='until_time' + label="until time" + type="date" + onChange={(event) => this.handleTimeChange(event.target.value, 'until_time', 'state')} + InputLabelProps={{ + shrink: true, + }} + /> + </Grid> + </Grid> + <div ref={this.container}> + <svg ref={this.svgEl}></svg> + </div> + </div> + ) + } +} + +export const UploadsHistogram = withStyles(UploadsHistogramUnstyled.styles)(UploadsHistogramUnstyled) + +class UploadersListUnstyled extends React.Component { + + static propTypes = { + classes: PropTypes.object.isRequired + } + + static styles = theme => ({ + root: { + marginTop: theme.spacing.unit * 2 + } + }) + + static contextType = SearchContext.type + + render () { + const {state: {usedMetric}} = this.context + + return ( + <Grid> + <Quantity quantity="uploader" title="Top Uploaders" scale={1} metric={usedMetric} /> + </Grid> + ) + } +} + +export const UploadersList = withStyles(UploadersListUnstyled.styles)(UploadersListUnstyled) + +class UploadsChart extends React.Component { + static propTypes = { + classes: PropTypes.object.isRequired, + metricsDefinitions: PropTypes.object.isRequired + } + static styles = theme => ({ + root: { + marginTop: theme.spacing.unit + } + }) + + static contextType = SearchContext.type + + render() { + const {classes, metricsDefinitions, ...props} = this.props + const {state: {response, usedMetric, query, }, setQuery} = this.context + + return ( + <Grid container spacing={24}> + <Grid item xs={12}> + <UploadsHistogram + classes={{root: classes.root}} + height={250} + defaultScale={1} + data={response.statistics.date_histogram} + metric={usedMetric} + metricsDefinitions={metricsDefinitions} + interval={'1M'} + onChanged={(from_time, until_time, interval) => setQuery({...query, from_time: from_time, until_time: until_time, interval: interval})} + {...props} /> + </Grid> + <Grid item xs={12}> + <UploadersList /> + </Grid> + </Grid> + ) + } +} + +export default compose(withApi(false, false), withStyles(UploadsChart.styles))(UploadsChart) \ No newline at end of file diff --git a/nomad/app/api/mirror.py b/nomad/app/api/mirror.py index eb2fcee0ce8343135c8cdbeeea18af98e53f939b..4f8ed4b2562c5dd61cfbc7c6ed1763723b5fbfff 100644 --- a/nomad/app/api/mirror.py +++ b/nomad/app/api/mirror.py @@ -16,12 +16,14 @@ The mirror API of the nomad@FAIRDI APIs. Allows to export upload metadata. ''' -from flask import request +from flask import request, send_file from flask_restplus import Resource, abort, fields from nomad import processing as proc from nomad.datamodel import Dataset from nomad.doi import DOI +from nomad.files import PublicUploadFiles +from nomad import infrastructure from .api import api from .auth import authenticate @@ -115,3 +117,83 @@ class MirrorUploadResource(Resource): 'dois': dois, 'upload_files_path': upload.upload_files.os_path }, 200 + + +_mirror_files_parser = api.parser() +_mirror_files_parser.add_argument( + 'prefix', type='str', help='File to download archive or raw', location='args') + + +@upload_route(ns, '/files') +class MirrorFilesResource(Resource): + @api.doc('download_files_mirror') + @api.expect(_mirror_files_parser, validate=True) + @api.response(400, 'Invalid requests, e.g. wrong owner type or bad search parameters') + @api.response(404, 'The upload or calculation does not exist') + @authenticate(admin_only=True) + def get(self, upload_id): + ''' + Download archive and raw files for mirrors + ''' + try: + args = request.args + prefix = args.get('prefix') + assert prefix in ('archive', 'raw') + + except Exception: + abort(400, message='bad parameter types') + + try: + upload_files = PublicUploadFiles(upload_id) + + if prefix == 'raw': + ext = 'plain' + ending = 'zip' + + elif prefix == 'archive': + ext = 'msg' + ending = 'msg' + + elif prefix == 'legacy-archive': + ext = 'json' + ending = 'zip' + + else: + abort(400, message='Unsupported prefix.') + + fileobj = upload_files._file_object(prefix, 'public', ext, ending) + if not fileobj.exists(): + raise KeyError + + return send_file( + open(fileobj.os_path, 'rb'), + mimetype='application/zip', + as_attachment=True, + cache_timeout=0, + attachment_filename=fileobj.os_path) + + except KeyError: + abort(404, message='Upload %d does not exist' % upload_id) + + +@ns.route('/users') +class MirrorUsersResource(Resource): + @api.doc('downlod_users_mirror') + @api.response(400, 'Unsuccessful userlist query') + @authenticate(admin_only=True) + def get(self): + ''' + Download user list for mirrors + ''' + try: + users = infrastructure.keycloak.search_user(query='') + result = dict() + for user in users: + credentials = user.m_to_dict() + credentials.pop('email', None) + result[user.username] = credentials + + return result, 200 + + except Exception: + abort(400, message='Failed to fetch users') diff --git a/nomad/app/api/repo.py b/nomad/app/api/repo.py index 18f5937b022191a4f6b1596262f805a9f08b1463..93ebdebdd2548ed46364ed840a135819661fbb6e 100644 --- a/nomad/app/api/repo.py +++ b/nomad/app/api/repo.py @@ -79,6 +79,8 @@ add_scroll_parameters(_search_request_parser) add_search_parameters(_search_request_parser) _search_request_parser.add_argument( 'date_histogram', type=bool, help='Add an additional aggregation over the upload time') +_search_request_parser.add_argument( + 'interval', type=str, help='Interval to use for upload time aggregation.') _search_request_parser.add_argument( 'metrics', type=str, action='append', help=( 'Metrics to aggregate over all quantities and their values as comma separated list. ' @@ -168,6 +170,7 @@ class RepoCalcsResource(Resource): order_by = args.get('order_by', 'upload_time') date_histogram = args.get('date_histogram', False) + interval = args.get('interval', '1M') metrics: List[str] = request.args.getlist('metrics') with_statistics = args.get('statistics', False) or \ @@ -178,7 +181,7 @@ class RepoCalcsResource(Resource): search_request = search.SearchRequest() apply_search_parameters(search_request, args) if date_histogram: - search_request.date_histogram() + search_request.date_histogram(interval=interval) try: assert page >= 1 diff --git a/nomad/archive.py b/nomad/archive.py index 0b2d478f407a619b66e2c5fffb1a85aa587924b6..8cc94567b61508727743ffd3515681e4d1c4e6f6 100644 --- a/nomad/archive.py +++ b/nomad/archive.py @@ -362,22 +362,32 @@ class ArchiveReader(ArchiveObject): key = adjust_uuid_size(key) if self.use_blocked_toc and self.toc_entry is None: + if self._n_toc == 0: + raise KeyError(key) + positions = self._toc.get(key) # TODO use hash algo instead of binary search if positions is None: r_start = 0 r_end = self._n_toc - while positions is None and len(self._toc) < self._n_toc: - i_block = r_start + math.floor((r_end - r_start) / 2) + i_block = None + while r_start <= r_end: + new_i_block = r_start + math.floor((r_end - r_start) / 2) + if i_block == new_i_block: + break + else: + i_block = new_i_block + first, last = self._load_toc_block(i_block) + if key < first: r_end = i_block - 1 elif key > last: r_start = i_block + 1 else: - positions = self._toc.get(key) break + positions = self._toc.get(key) if positions is None: raise KeyError(key) @@ -501,7 +511,7 @@ def read_archive(file_or_path: str, **kwargs) -> ArchiveReader: return ArchiveReader(file_or_path, **kwargs) -def query_archive(f_or_archive_reader: Union[ArchiveReader, BytesIO], query_dict: dict): +def query_archive(f_or_archive_reader: Union[str, ArchiveReader, BytesIO], query_dict: dict): def _load_data(query_dict: Dict[str, Any], archive_item: ArchiveObject, main_section: bool = False): if not isinstance(query_dict, dict): @@ -517,17 +527,18 @@ def query_archive(f_or_archive_reader: Union[ArchiveReader, BytesIO], query_dict key = key.strip() # process array indices - match = re.match(r'([_a-bA-Z0-9]+)\[([0-9]+|:)\]', key) + match = re.match(r'(\w+)\[([-?0-9:]+)\]', key) if match: archive_key = match.group(1) index_str = match.group(2) - match = re.match(r'([0-9]*):([0-9]*)', index_str) + match = re.match(r'([-?0-9]*):([-?0-9]*)', index_str) if match: index = ( 0 if match.group(1) == '' else int(match.group(1)), None if match.group(2) == '' else int(match.group(2))) else: index = int(index_str) # type: ignore + key = archive_key else: archive_key = key index = None @@ -536,7 +547,6 @@ def query_archive(f_or_archive_reader: Union[ArchiveReader, BytesIO], query_dict archive_key = key.split('[')[0] if main_section: archive_key = adjust_uuid_size(key) - try: if index is None: res[key] = _load_data(val, archive_item[archive_key]) @@ -553,7 +563,7 @@ def query_archive(f_or_archive_reader: Union[ArchiveReader, BytesIO], query_dict if isinstance(f_or_archive_reader, ArchiveReader): return _load_data(query_dict, f_or_archive_reader, True) - elif isinstance(f_or_archive_reader, BytesIO): + elif isinstance(f_or_archive_reader, (BytesIO, str)): with ArchiveReader(f_or_archive_reader) as archive: return _load_data(query_dict, archive, True) diff --git a/nomad/cli/admin/admin.py b/nomad/cli/admin/admin.py index 54861ec2bb6f92f8dc4592434a10b0cd2641d983..2e43b9db53d104662e1dbe029acb09ea9f5e6ea4 100644 --- a/nomad/cli/admin/admin.py +++ b/nomad/cli/admin/admin.py @@ -33,9 +33,10 @@ from bs4 import BeautifulSoup from matid import SymmetryAnalyzer from nomad import processing as proc, search, datamodel, infrastructure, utils, config -from nomad.normalizing.structure import get_normalized_wyckoff +from nomad.normalizing.aflow_prototypes import get_normalized_wyckoff from nomad.cli.cli import cli from nomad import config +from nomad.normalizing.springer import update_springer_data def __run_processing( @@ -529,3 +530,10 @@ def prototypes_update(ctx, filepath, matches_only): # Write data file to the specified path write_prototype_data_file(aflow_prototypes, filepath) + + +@admin.command(help='Updates the springer database in nomad.config.springer_msg_db_path.') +@click.option('--max-n-query', default=10, type=int, help='Number of unsuccessful springer request before returning an error. Default is 10.') +@click.option('--retry-time', default=120, type=int, help='Time in seconds to retry after unsuccessful request. Default is 120.') +def springer_update(max_n_query, retry_time): + update_springer_data(max_n_query, retry_time) diff --git a/nomad/config.py b/nomad/config.py index e195cc62533207246b56cf19864386983df28c6b..c945732beb89ad963e79e7c9b206af7edf08fcd5 100644 --- a/nomad/config.py +++ b/nomad/config.py @@ -190,7 +190,9 @@ normalize = NomadConfig( # The distance tolerance between atoms for grouping them into the same # cluster. Used in detecting system type. cluster_threshold=3.1, -) + springer_db_path=os.path.join( + os.path.dirname(os.path.abspath(__file__)), + 'normalizing/data/springer.msg')) client = NomadConfig( user='leonard.hofstadter@nomad-fairdi.tests.de', @@ -220,10 +222,6 @@ use_empty_parsers = False reprocess_unmatched = True -springer_db_relative_path = 'normalizing/data/SM_all08.db' -springer_db_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), springer_db_relative_path) - - def normalize_loglevel(value, default_level=logging.INFO): plain_value = value if plain_value is None: diff --git a/nomad/datamodel/datamodel.py b/nomad/datamodel/datamodel.py index 69d2695423685f2872a58b919acf3ebf8be90c77..d979e989185a70629184c6ed9896101726266390 100644 --- a/nomad/datamodel/datamodel.py +++ b/nomad/datamodel/datamodel.py @@ -349,7 +349,8 @@ class EntryMetadata(metainfo.MSection): Search( description='Search uploader with exact names.', metric_name='uploaders', metric='cardinality', - many_or='append', search_field='uploader.name.keyword'), + many_or='append', search_field='uploader.name.keyword', + default_statistic=True, statistic_size=10), Search( name='uploader_id', search_field='uploader.user_id') ]) diff --git a/nomad/datamodel/dft.py b/nomad/datamodel/dft.py index 1a557bc95af6ed98c9f4a8c52a595fa10aeacc77..a1982e09d7f162867bacaa98811b95f361f4af78 100644 --- a/nomad/datamodel/dft.py +++ b/nomad/datamodel/dft.py @@ -43,6 +43,58 @@ basis_sets = { 'planewaves': 'plane waves' } +compound_types = [ + 'unary', + 'binary', + 'ternary', + 'quaternary', + 'quinary', + 'sexinary', + 'septenary', + 'octanary', + 'nonary', + 'decinary' +] + +_energy_quantities = [ + 'energy_total', + 'energy_total_T0', + 'energy_free', + 'energy_electrostatic', + 'energy_X', + 'energy_XC', + 'energy_sum_eigenvalues'] + +_electronic_quantities = [ + 'dos_values', + 'eigenvalues_values', + 'volumetric_data_values', + 'electronic_kinetic_energy', + 'total_charge', + 'atomic_multipole_values'] + +_forces_quantities = [ + 'atom_forces_free', + 'atom_forces_raw', + 'atom_forces_T0', + 'atom_forces', + 'stress_tensor'] + +_vibrational_quantities = [ + 'thermodynamical_property_heat_capacity_C_v', + 'vibrational_free_energy_at_constant_volume', + 'band_energies'] + +_magnetic_quantities = [ + 'spin_S2' +] + +_optical_quantities = [ + 'excitation_energies', + 'oscillator_strengths', + 'transition_dipole_moments' +] + version_re = re.compile(r'(\d+(\.\d+(\.\d+)?)?)') @@ -105,6 +157,12 @@ class DFTMetadata(MSection): description='The system type of the simulated system.', a_search=Search(default_statistic=True)) + compound_type = Quantity( + type=str, default='not processed', + description='The compound type of the simulated system.', + a_search=Search(statistic_size=11, default_statistic=True) + ) + crystal_system = Quantity( type=str, default='not processed', description='The crystal system type of the simulated system.', @@ -153,6 +211,36 @@ class DFTMetadata(MSection): a_search=Search( metric_name='distinct_quantities', metric='cardinality', many_and='append')) + quantities_energy = Quantity( + type=str, shape=['0..*'], + description='Energy-related quantities.', + a_search=Search(many_and='append', default_statistic=True)) + + quantities_electronic = Quantity( + type=str, shape=['0..*'], + description='Electronic structure-related quantities.', + a_search=Search(many_and='append', default_statistic=True)) + + quantities_forces = Quantity( + type=str, shape=['0..*'], + description='Forces-related quantities.', + a_search=Search(many_and='append', default_statistic=True)) + + quantities_vibrational = Quantity( + type=str, shape=['0..*'], + description='Vibrational-related quantities.', + a_search=Search(many_and='append', default_statistic=True)) + + quantities_magnetic = Quantity( + type=str, shape=['0..*'], + description='Magnetic-related quantities.', + a_search=Search(many_and='append', default_statistic=True)) + + quantities_optical = Quantity( + type=str, shape=['0..*'], + description='Optical-related quantities.', + a_search=Search(many_and='append', default_statistic=True)) + geometries = Quantity( type=str, shape=['0..*'], description='Hashes for each simulated geometry', @@ -168,6 +256,16 @@ class DFTMetadata(MSection): description='The labels taken from AFLOW prototypes and springer.', a_search='labels') + labels_springer_compound_class = Quantity( + type=str, shape=['0..*'], + description='Springer compund classification.', + a_search=Search(many_and='append', default_statistic=True, statistic_size=15)) + + labels_springer_classification = Quantity( + type=str, shape=['0..*'], + description='Springer classification by property.', + a_search=Search(many_and='append', default_statistic=True, statistic_size=15)) + optimade = SubSection( sub_section=OptimadeEntry, description='Metadata used for the optimade API.', @@ -199,6 +297,7 @@ class DFTMetadata(MSection): atoms = list(set(normalized_atom_labels(set(atoms)))) atoms.sort() entry.atoms = atoms + self.compound_type = compound_types[len(atoms) - 1] if len(atoms) <= 10 else '>decinary' self.crystal_system = get_optional_backend_value( backend, 'crystal_system', 'section_symmetry', logger=logger) @@ -229,6 +328,13 @@ class DFTMetadata(MSection): # metrics and quantities quantities = set() geometries = set() + quantities_energy = set() + quantities_electronic = set() + quantities_forces = set() + quantities_vibrational = set() + quantities_magnetic = set() + quantities_optical = set() + n_quantities = 0 n_calculations = 0 n_total_energies = 0 @@ -243,6 +349,19 @@ class DFTMetadata(MSection): quantities.add(property_name) n_quantities += 1 + if property_name in _energy_quantities: + quantities_energy.add(property_name) + elif property_name in _electronic_quantities: + quantities_electronic.add(property_name) + elif property_name in _forces_quantities: + quantities_forces.add(property_name) + elif property_name in _vibrational_quantities: + quantities_vibrational.add(property_name) + elif property_name in _magnetic_quantities: + quantities_magnetic.add(property_name) + elif property_name in _optical_quantities: + quantities_optical.add(property_name) + if property_name == 'energy_total': n_total_energies += 1 @@ -257,6 +376,12 @@ class DFTMetadata(MSection): self.quantities = list(quantities) self.geometries = list(geometries) + self.quantities_energy = list(quantities_energy) + self.quantities_electronic = list(quantities_electronic) + self.quantities_forces = list(quantities_forces) + self.quantities_vibrational = list(quantities_vibrational) + self.quantities_magnetic = list(quantities_magnetic) + self.quantities_optical = list(quantities_optical) self.n_quantities = n_quantities self.n_calculations = n_calculations self.n_total_energies = n_total_energies @@ -273,6 +398,8 @@ class DFTMetadata(MSection): self.labels.append(Label(label=compound, type='compound_class', source='springer')) for classification in classifications: self.labels.append(Label(label=classification, type='classification', source='springer')) + self.labels_springer_compound_class = list(compounds) + self.labels_springer_classification = list(classifications) aflow_id = get_optional_backend_value(backend, 'prototype_aflow_id', 'section_prototype') aflow_label = get_optional_backend_value(backend, 'prototype_label', 'section_prototype') diff --git a/nomad/normalizing/structure.py b/nomad/normalizing/aflow_prototypes.py similarity index 100% rename from nomad/normalizing/structure.py rename to nomad/normalizing/aflow_prototypes.py diff --git a/nomad/normalizing/data/.gitignore b/nomad/normalizing/data/.gitignore index 753400d2b6ef6154a2465ba04e6021aec7835c50..51f3bf9b73210dbccdb0f4b4c869dd6d2a5e91ca 100644 --- a/nomad/normalizing/data/.gitignore +++ b/nomad/normalizing/data/.gitignore @@ -1,2 +1,2 @@ SM_all08.db -springer.msg \ No newline at end of file +springer.msg diff --git a/nomad/normalizing/data/springer_msgpack.py b/nomad/normalizing/springer.py similarity index 83% rename from nomad/normalizing/data/springer_msgpack.py rename to nomad/normalizing/springer.py index 9a9bfdda45998ee9a24a7da09cec2d2744ad77f8..5a547c68ec8cae939f85a7a5644606582cf97639 100644 --- a/nomad/normalizing/data/springer_msgpack.py +++ b/nomad/normalizing/springer.py @@ -17,10 +17,6 @@ Generates and queries a msgpack database of springer-related quantities download http://materials.springer.com. The database is stuctured as space_group_number : normalized_formula : springer_id : entry - -The msgpack file can be queried using ArchiveFileDB. - -The html parser was taken from a collection of scripts from FHI without further testing. ''' import requests @@ -28,12 +24,12 @@ import re from bs4 import BeautifulSoup from typing import Dict, List, Any from time import sleep -import os +import os.path from nomad.archive import query_archive, write_archive, ArchiveReader +from nomad import config - -DB_NAME = '.springer.msg' +_DB_PATH = config.normalize.springer_db_path required_items = { 'Alphabetic Formula:': 'alphabetic_formula', @@ -41,6 +37,7 @@ required_items = { 'Compound Class(es):': 'compound_classes', 'Dataset ID': 'id', 'Space Group:': 'space_group_number', + 'Phase Label(s):': 'phase_labels' } spaces_re = re.compile(r'\s+') @@ -118,11 +115,15 @@ def parse(htmltext: str) -> Dict[str, str]: results['compound_classes'] = [x for x in results['compound_classes'] if x != '–'] normalized_formula = None - if 'alphabetic_formula' in results: - try: - normalized_formula = normalize_formula(results['alphabetic_formula']) - except Exception: - pass + for formula_type in ['alphabetic_formula', 'phase_labels']: + formula = results.get(formula_type, None) + if formula: + try: + normalized_formula = normalize_formula(formula) + break + except Exception: + pass + results['normalized_formula'] = normalized_formula return results @@ -140,7 +141,7 @@ def _merge_dict(dict0: Dict[str, Any], dict1: Dict[str, Any]) -> Dict[str, Any]: return dict0 -def _download(path: str, max_n_query: int = 10) -> str: +def _download(path: str, max_n_query: int = 10, retry_time: int = 120) -> str: n_query = 0 while True: response = requests.get(path) @@ -149,7 +150,7 @@ def _download(path: str, max_n_query: int = 10) -> str: if n_query > max_n_query: break n_query += 1 - sleep(120) + sleep(retry_time) if response.status_code != 200: response.raise_for_status() @@ -157,7 +158,7 @@ def _download(path: str, max_n_query: int = 10) -> str: return response.text -def download_springer_data(max_n_query: int = 10): +def update_springer_data(max_n_query: int = 10, retry_time: int = 120): ''' Downloads the springer quantities related to a structure from springer and updates database. @@ -165,11 +166,11 @@ def download_springer_data(max_n_query: int = 10): # load database # querying database with unvailable dataset leads to error, # get toc keys first by making an empty query - archive = ArchiveReader(DB_NAME) + archive = ArchiveReader(_DB_PATH) _ = archive._load_toc_block(0) archive_keys = archive._toc.keys() - sp_data = query_archive(DB_NAME, {spg: '*' for spg in archive_keys}) + sp_data = query_archive(_DB_PATH, {spg: '*' for spg in archive_keys}) sp_ids: List[str] = [] for spg in sp_data: @@ -181,19 +182,23 @@ def download_springer_data(max_n_query: int = 10): page = 1 while True: # check springer database for new entries by comparing with local database - root = 'https://materials.springer.com/search?searchTerm=&pageNumber=%d&datasourceFacet=sm_isp&substanceId=' % page - req_text = _download(root, max_n_query) + root = 'http://materials.springer.com/search?searchTerm=&pageNumber=%d&datasourceFacet=sm_isp&substanceId=' % page + req_text = _download(root, max_n_query, retry_time) if 'Sorry,' in req_text: break paths = search_re.findall(req_text) + + if len(paths) == 0: + break + for path in paths: sp_id = os.path.basename(path) if sp_id in sp_ids: continue path = 'http://materials.springer.com%s' % path - req_text = _download(path, max_n_query) + req_text = _download(path, max_n_query, retry_time) try: data = parse(req_text) except Exception: @@ -203,10 +208,12 @@ def download_springer_data(max_n_query: int = 10): normalized_formula = data.get('normalized_formula', None) if space_group_number is None or normalized_formula is None: continue - aformula = data.get('alphabetic_formula', None) + if aformula is None: + aformula = data.get('phase_labels', None) compound = data.get('compound_classes', None) classification = data.get('classification', None) + entry = dict( aformula=aformula, url=path, compound=compound, classification=classification) @@ -215,14 +222,12 @@ def download_springer_data(max_n_query: int = 10): page += 1 - write_archive(DB_NAME, len(sp_data), sp_data.items(), entry_toc_depth=1) + write_archive(_DB_PATH, len(sp_data), sp_data.items(), entry_toc_depth=1) def query_springer_data(normalized_formula: str, space_group_number: int) -> Dict[str, Any]: - ''' - Queries a msgpack database for springer-related quantities. - ''' - entries = query_archive(DB_NAME, {str(space_group_number): {normalized_formula: '*'}}) + ''' Queries a msgpack database for springer-related quantities. ''' + entries = query_archive(_DB_PATH, {str(space_group_number): {normalized_formula: '*'}}) db_dict = {} entries = entries.get(str(space_group_number), {}).get(normalized_formula, {}) diff --git a/nomad/normalizing/system.py b/nomad/normalizing/system.py index b8e8ce01b6ac1dc6fe3ec6028bba2928d5e15bec..46203ec0c45d04e143550b32b60ad59cb0aa2c4d 100644 --- a/nomad/normalizing/system.py +++ b/nomad/normalizing/system.py @@ -19,16 +19,14 @@ from ase import Atoms import numpy as np import json import re -import os -import sqlite3 - from matid import SymmetryAnalyzer, Classifier from matid.classifications import Class0D, Atom, Class1D, Material2D, Surface, Class3D -from nomad.normalizing import structure from nomad import utils, config -from nomad.normalizing.normalizer import SystemBasedNormalizer -from nomad.normalizing.data.springer_msgpack import query_springer_data + +from . import aflow_prototypes +from .normalizer import SystemBasedNormalizer +from .springer import query_springer_data # use a regular expression to check atom labels; expression is build from list of # all labels sorted desc to find Br and not B when searching for Br. @@ -36,28 +34,6 @@ atom_label_re = re.compile('|'.join( sorted(ase.data.chemical_symbols, key=lambda x: len(x), reverse=True))) -springer_db_connection = None - - -def open_springer_database(): - ''' - Create a global connection to the Springer database in a way that - each worker opens the database just once. - ''' - global springer_db_connection - if springer_db_connection is None: - # filepath definition in 'nomad-FAIR/nomad/config.py' - db_file = config.springer_db_path - if not os.path.exists(db_file): - utils.get_logger(__name__).error('Springer database not found') - return None - springer_db_connection = sqlite3.connect(db_file, check_same_thread=False, uri=True) - # we lift the thread check because we share the connection among workers - # 'uri=True': open a database in read-only mode - - return springer_db_connection - - def normalized_atom_labels(atom_labels): ''' Normalizes the given atom labels: they either are labels right away, or contain @@ -395,94 +371,43 @@ class SystemNormalizer(SystemBasedNormalizer): self._backend.closeSection('section_symmetry', symmetry_gid) - def springer_classification(self, atoms, space_group_number, database='sqlite'): - # SPRINGER NORMALIZER + def springer_classification(self, atoms, space_group_number): normalized_formula = formula_normalizer(atoms) - # - if database == 'sqlite': - springer_db_connection = open_springer_database() - if springer_db_connection is None: - return - - cur = springer_db_connection.cursor() - - # SQL QUERY - # (this replaces the four queries done in the old 'classify4me_SM_normalizer.py') - cur.execute(''' - SELECT - entry.entry_id, - entry.alphabetic_formula, - GROUP_CONCAT(DISTINCT compound_classes.compound_class_name), - GROUP_CONCAT(DISTINCT classification.classification_name) - FROM entry - LEFT JOIN entry_compound_class as ecc ON ecc.entry_nr = entry.entry_nr - LEFT JOIN compound_classes ON ecc.compound_class_nr = compound_classes.compound_class_nr - LEFT JOIN entry_classification as ec ON ec.entry_nr = entry.entry_nr - LEFT JOIN classification ON ec.classification_nr = classification.classification_nr - LEFT JOIN entry_reference as er ON er.entry_nr = entry.entry_nr - LEFT JOIN reference ON reference.reference_nr = er.entry_nr - WHERE entry.normalized_formula = ( %r ) and entry.space_group_number = '%d' - GROUP BY entry.entry_id; - ''' % (normalized_formula, space_group_number)) - - results = cur.fetchall() - # 'results' is a list of tuples, i.e. '[(a,b,c,d), ..., (a,b,c,d)]' - # All SQL queries done - - # Storing 'results' in a dictionary - dbdict = {} - for ituple in results: - # 'spr' means 'springer' - spr_id = ituple[0] - spr_aformula = ituple[1] # alphabetical formula - spr_url = 'http://materials.springer.com/isp/crystallographic/docs/' + spr_id - spr_compound = ituple[2].split(',') # split to convert string to list - spr_classification = ituple[3].split(',') - # - spr_compound.sort() - spr_classification.sort() - # - dbdict[spr_id] = { - 'spr_id': spr_id, - 'spr_aformula': spr_aformula, - 'spr_url': spr_url, - 'spr_compound': spr_compound, - 'spr_classification': spr_classification} - - elif database == 'msgpack': - dbdict = query_springer_data(normalized_formula, space_group_number) - - # ============= - - # SPRINGER's METAINFO UPDATE - # LAYOUT: Five sections under 'section_springer_material' for each material ID: - # id, alphabetical formula, url, compound_class, clasification. - # As per Markus/Luca's emails, we don't expose Springer bib references (Springer's paywall) - for material in dbdict.values(): + springer_data = query_springer_data(normalized_formula, space_group_number) + + for material in springer_data.values(): self._backend.openNonOverlappingSection('section_springer_material') self._backend.addValue('springer_id', material['spr_id']) self._backend.addValue('springer_alphabetical_formula', material['spr_aformula']) self._backend.addValue('springer_url', material['spr_url']) - self._backend.addArrayValues('springer_compound_class', material['spr_compound']) - self._backend.addArrayValues('springer_classification', material['spr_classification']) + + compound_classes = material['spr_compound'] + if compound_classes is None: + compound_classes = [] + self._backend.addArrayValues('springer_compound_class', compound_classes) + + classifications = material['spr_classification'] + if classifications is None: + classifications = [] + self._backend.addArrayValues('springer_classification', classifications) self._backend.closeNonOverlappingSection('section_springer_material') # Check the 'springer_classification' and 'springer_compound_class' information # found is the same for all springer_id's - dkeys = list(dbdict.keys()) - if len(dkeys) != 0: - class_0 = dbdict[dkeys[0]]['spr_classification'] - comp_0 = dbdict[spr_id]['spr_compound'] + springer_data_keys = list(springer_data.keys()) + if len(springer_data_keys) != 0: + class_0 = springer_data[springer_data_keys[0]]['spr_classification'] + comp_0 = springer_data[springer_data_keys[0]]['spr_compound'] # compare 'class_0' and 'comp_0' against the rest - for ii in range(1, len(dkeys)): - class_test = (class_0 == dbdict[dkeys[ii]]['spr_classification']) - comp_test = (comp_0 == dbdict[dkeys[ii]]['spr_compound']) + for ii in range(1, len(springer_data_keys)): + class_test = (class_0 == springer_data[springer_data_keys[ii]]['spr_classification']) + comp_test = (comp_0 == springer_data[springer_data_keys[ii]]['spr_compound']) if (class_test or comp_test) is False: - self.logger.warning('Mismatch in Springer classification or compounds') + self.logger.info('Mismatch in Springer classification or compounds') def prototypes(self, atom_species: np.array, wyckoffs: np.array, spg_number: int) -> None: '''Tries to match the material to an entry in the AFLOW prototype data. @@ -493,8 +418,8 @@ class SystemNormalizer(SystemBasedNormalizer): wyckoff_letters: Array of Wyckoff letters as strings. spg_number: Space group number. ''' - norm_wyckoff = structure.get_normalized_wyckoff(atom_species, wyckoffs) - protoDict = structure.search_aflow_prototype(spg_number, norm_wyckoff) + norm_wyckoff = aflow_prototypes.get_normalized_wyckoff(atom_species, wyckoffs) + protoDict = aflow_prototypes.search_aflow_prototype(spg_number, norm_wyckoff) if protoDict is not None: aflow_prototype_id = protoDict["aflow_prototype_id"] aflow_prototype_url = protoDict["aflow_prototype_url"] diff --git a/nomad/search.py b/nomad/search.py index f9ac993acb6f6f06e706efaebb07eaa7e7baa740..6edaa537a9ebc5efba4a2acd4b64f820d7c9fd37 100644 --- a/nomad/search.py +++ b/nomad/search.py @@ -337,11 +337,11 @@ class SearchRequest: 'metric:%s' % metric_quantity.metric_name, A(metric_quantity.metric, field=field)) - def date_histogram(self, metrics_to_use: List[str] = []): + def date_histogram(self, metrics_to_use: List[str] = [], interval: str = '1M'): ''' Adds a date histogram on the given metrics to the statistics part. ''' - histogram = A('date_histogram', field='upload_time', interval='1M', format='yyyy-MM-dd') + histogram = A('date_histogram', field='upload_time', interval=interval, format='yyyy-MM-dd') self._add_metrics(self._search.aggs.bucket('statistics:date_histogram', histogram), metrics_to_use) return self diff --git a/ops/README.md b/ops/README.md index 621afd8b99c48e109e8e1efbd3b1e3aee20ec78a..824aa6c3f142517e3e19b5ef0d20caf7ec478404 100644 --- a/ops/README.md +++ b/ops/README.md @@ -1,5 +1,3 @@ -## Overview - Read the [introduction](./introduction.html) and [setup](./setup.html) for input on the different nomad services. This is about how to deploy and operate these services. diff --git a/ops/helm/nomad/templates/nomad-configmap.yml b/ops/helm/nomad/templates/nomad-configmap.yml index 2cac45e2ee9f2071ba37c119f0dd236a69725dc5..9a7620c08148b717101497822751562229e834ef 100644 --- a/ops/helm/nomad/templates/nomad-configmap.yml +++ b/ops/helm/nomad/templates/nomad-configmap.yml @@ -59,7 +59,8 @@ data: realm_name: "{{ .Values.keycloak.realmName }}" username: "{{ .Values.keycloak.username }}" client_id: "{{ .Values.keycloak.clientId }}" - springer_db_path: "{{ .Values.springerDbPath }}" + normalize: + springer_db_path: "{{ .Values.springerDbPath }}" datacite: enabled: {{ .Values.datacite.enabled }} prefix: "{{ .Values.datacite.prefix }}" diff --git a/ops/helm/nomad/values.yaml b/ops/helm/nomad/values.yaml index 39d115ca67ee608914cbf7b348df738100080c91..5d5451559425a6d715c30fcb5ac2962cb587b589 100644 --- a/ops/helm/nomad/values.yaml +++ b/ops/helm/nomad/values.yaml @@ -152,7 +152,7 @@ volumes: # The domain configuration, currently there is dft and ems domain: dft -springerDbPath: /nomad/fairdi/db/data/springer.db +springerDbPath: /nomad/fairdi/db/data/springer.msg # Will reprocess calculations with their old matched parser, even if they do not # match this parser anymore diff --git a/requirements.txt b/requirements.txt index 730fd29568158828e2c273531a6b6a3b3ce67a31..2e9ec4d26a0e719253842abf5feb010cd719bca2 100644 --- a/requirements.txt +++ b/requirements.txt @@ -15,6 +15,8 @@ orjson elasticsearch==6.4.0 elasticsearch-dsl==6.4.0 msgpack<0.6.0 +elasticsearch==6.4.0 +elasticsearch-dsl==6.4.0 pyyaml future enum34 @@ -60,6 +62,7 @@ python-json-logger setuptools sphinx sphinxcontrib.httpdomain +sphinx-click sphinx_rtd_theme gitpython mypy==0.730 diff --git a/tests/app/test_api.py b/tests/app/test_api.py index 5790b17ab6344d8df7e4fb6543f14f39d2d3249f..6c367efaf35d5aea794612ec6ca79af3e020033d 100644 --- a/tests/app/test_api.py +++ b/tests/app/test_api.py @@ -24,6 +24,7 @@ import os.path from urllib.parse import urlencode import base64 import itertools +from hashlib import md5 from nomad.app.common import rfc3339DateTime from nomad.app.api.auth import generate_upload_token @@ -975,12 +976,19 @@ class TestRepo(): assert 'values' in data[group] # assert len(data[group]['values']) == data['statistics']['total']['all'][group] - def test_search_date_histogram(self, api, example_elastic_calcs, no_warn): - rv = api.get('/repo/?date_histogram=true&metrics=dft.total_energies') + @pytest.mark.parametrize('query, nbuckets', [ + (dict(interval='1M', metrics='dft.total_energies'), 1), + (dict(interval='1d', metrics='dft.quantities'), 6), + (dict(interval='1y', from_time='2019-03-20T12:43:54.566414'), 1), + (dict(until_time='2010-03-20T12:43:54.566414'), 0), + (dict(interval='1m', from_time='2020-02-20T12:43:54.566414', metrics='dft.calculations'), 7201) + ]) + def test_search_date_histogram(self, api, example_elastic_calcs, no_warn, query, nbuckets): + rv = api.get('/repo/?date_histogram=true&%s' % urlencode(query)) assert rv.status_code == 200 data = json.loads(rv.data) histogram = data.get('statistics').get('date_histogram') - assert len(histogram) > 0 + assert len(histogram) == nbuckets @pytest.mark.parametrize('n_results, page, per_page', [(2, 1, 5), (1, 1, 1), (0, 2, 3)]) def test_search_pagination(self, api, example_elastic_calcs, no_warn, n_results, page, per_page): @@ -1043,7 +1051,10 @@ class TestRepo(): (1, 'atoms', 'Fe'), (1, 'authors', 'Leonard Hofstadter'), (2, 'files', 'test/mainfile.txt'), - (0, 'dft.quantities', 'dos') + (0, 'dft.quantities', 'dos'), + (2, 'dft.quantities_energy', 'energy_total'), + (2, 'dft.compound_type', 'ternary'), + (0, 'dft.labels_springer_compound_class', 'intermetallic') ]) def test_quantity_search(self, api, example_elastic_calcs, no_warn, test_user_auth, calcs, quantity, value): rv = api.get('/repo/quantity/%s' % quantity, headers=test_user_auth) @@ -1724,6 +1735,26 @@ class TestMirror: else: assert 'dois' not in data + # TODO + # - parametrize to also check raw + # - compute the hex digest reference + def test_files(self, api, published, admin_user_auth, no_warn): + url = '/mirror/files/%s?prefix=archive' % published.upload_id + rv = api.get(url, headers=admin_user_auth) + assert rv.status_code == 200 + assert rv.data is not None + assert md5(rv.data).hexdigest() == 'a50a980a4f1bd9892e95410936a36cdf' + + def test_users(self, api, published, admin_user_auth, no_warn): + url = '/mirror/users' + rv = api.get(url, headers=admin_user_auth) + assert rv.status_code == 200 + users = json.loads(rv.data) + assert users is not None + assert len(users) == 3 + for user in users: + assert 'email' not in user + class TestDataset: diff --git a/tests/test_archive.py b/tests/test_archive.py index 35855761206a280b0ae25cde8c7cd3853fd42134..1331ba41f3bdf29199c1925ac283031d9ca973ee 100644 --- a/tests/test_archive.py +++ b/tests/test_archive.py @@ -200,15 +200,15 @@ def test_read_archive_multi(example_uuid, example_entry, use_blocked_toc): def test_query(): payload = { - 'calc1': { - 'secA': { - 'subsecA1': [{'propA1a': 1.0}] + 'c1': { + 's1': { + 'ss1': [{'p1': 1.0, 'p2': 'x'}, {'p1': 1.5, 'p2': 'y'}] }, - 'secB': {'propB1a': ['a', 'b']} + 's2': {'p1': ['a', 'b']} }, - 'calc2': { - 'secA': {'subsecA1': [{'propA1a': 2.0}]}, - 'secB': {'propB1a': ['c', 'd']} + 'c2': { + 's1': {'ss1': [{'p1': 2.0}]}, + 's2': {'p1': ['c', 'd']} } } @@ -217,10 +217,22 @@ def test_query(): packed_archive = f.getbuffer() f = BytesIO(packed_archive) - assert query_archive(f, {'calc1': '*'}) == {'calc1': payload['calc1']} - assert query_archive(f, {'calc2': {'secA': {'subsecA1[0]': '*'}}}) == {'calc2': {'secA': {'subsecA1[0]': [{'propA1a': 2.0}]}}} - # TODO - # test [:][-1][0:1] ... + assert query_archive(f, {'c1': '*'}) == {'c1': payload['c1']} + assert query_archive(f, {'c1': '*', 'c2': {'s1': '*'}}) == {'c1': payload['c1'], 'c2': {'s1': payload['c2']['s1']}} + assert query_archive(f, {'c2': {'s1': {'ss1[0]': '*'}}}) == {'c2': {'s1': {'ss1': payload['c2']['s1']['ss1'][0]}}} + assert query_archive(f, {'c1': {'s1': {'ss1[1:]': '*'}}}) == {'c1': {'s1': {'ss1': payload['c1']['s1']['ss1'][1:]}}} + assert query_archive(f, {'c1': {'s1': {'ss1[:2]': '*'}}}) == {'c1': {'s1': {'ss1': payload['c1']['s1']['ss1'][:2]}}} + assert query_archive(f, {'c1': {'s1': {'ss1[0:2]': '*'}}}) == {'c1': {'s1': {'ss1': payload['c1']['s1']['ss1'][0:2]}}} + assert query_archive(f, {'c1': {'s1': {'ss1[-2]': '*'}}}) == {'c1': {'s1': {'ss1': payload['c1']['s1']['ss1'][-2]}}} + assert query_archive(f, {'c1': {'s1': {'ss1[:-1]': '*'}}}) == {'c1': {'s1': {'ss1': payload['c1']['s1']['ss1'][:-1]}}} + assert query_archive(f, {'c1': {'s1': {'ss1[1:-1]': '*'}}}) == {'c1': {'s1': {'ss1': payload['c1']['s1']['ss1'][1:-1]}}} + assert query_archive(f, {'c2': {'s1': {'ss1[-3:-1]': '*'}}}) == {'c2': {'s1': {'ss1': payload['c2']['s1']['ss1'][-3:-1]}}} + + +def test_read_springer(): + springer = read_archive(config.normalize.springer_db_path) + with pytest.raises(KeyError): + springer['doesnotexist'] if __name__ == '__main__': diff --git a/tests/test_normalizing.py b/tests/test_normalizing.py index 3f8e78bb0b2989f3719083c0f56b269daf739217..b662e69f4b7af3310f814abaee2bf58e9f62058f 100644 --- a/tests/test_normalizing.py +++ b/tests/test_normalizing.py @@ -438,16 +438,18 @@ def test_springer_normalizer(): backend = parse_file(vasp_parser) backend = run_normalize(backend) - backend_value = backend.get_value('springer_id', 89) - expected_value = 'sd_1932539' + gindex = 0 + + backend_value = backend.get_value('springer_id', gindex) + expected_value = 'sd_0305232' assert expected_value == backend_value - backend_value = backend.get_value('springer_alphabetical_formula', 89) + backend_value = backend.get_value('springer_alphabetical_formula', gindex) expected_value = 'O3SrTi' assert expected_value == backend_value - backend_value = backend.get_value('springer_url', 89) - expected_value = 'http://materials.springer.com/isp/crystallographic/docs/sd_1932539' + backend_value = backend.get_value('springer_url', gindex) + expected_value = 'http://materials.springer.com/isp/crystallographic/docs/sd_0305232' assert expected_value == backend_value