diff --git a/gui/src/components/APIs.js b/gui/src/components/APIs.js index 1cd91fc2b7f55e2a4a3fcef16228159908b557e4..4427d6a7d8bad73525085ddc932f761803e42c35 100644 --- a/gui/src/components/APIs.js +++ b/gui/src/components/APIs.js @@ -40,7 +40,7 @@ export default function About() { and functions programatically. For all APIs, we offer dashboards that let you use each API interactively, right in your browser. - ## NOMAD's new (Version 1) API + ## NOMAD's API - [API dashboard](${apiBase}/v1/extensions/docs) - [API documentation](${apiBase}/v1/extensions/redoc) @@ -50,19 +50,16 @@ export default function About() { At some point, it will replace it entirely. For new users, we recommend to start using this API. API Dashboard and documentation contain a tutorial on how to get started. - ## NOMAD's main (Version 0) API - - - [API dashboard](${apiBase}/) - - This is NOMAD main REST API. This API the main interface to NOMAD and it also used - by this web-page to provide all functions. Therefore, everything you do here, can - also be done by using this API. - There is a [tutorial on how to use the API with plain Python](${appBase}/docs/api_tutorial.html). Another [tutorial covers how to install and use NOMAD's Python client library](${appBase}/docs/archive_tutorial.html). The [NOMAD Analytics Toolkit](https://nomad-lab.eu/AIToolkit) allows to use this without installation and directly on NOMAD servers. + You can still use NOMAD's old REST API. The data it provides might miss the most + recent contributions: + + - [v0 API dashboard](https://nomad-lab.eu/prod/rae/api/) + ## OPTIMADE - [OPTIMADE API overview page](${appBase}/optimade/) diff --git a/gui/src/components/About.js b/gui/src/components/About.js index 673b50725728cd04621665339ec165ea8f84f8f6..445f527e8a771c61a607d2f637b8cecc7e752536 100644 --- a/gui/src/components/About.js +++ b/gui/src/components/About.js @@ -21,7 +21,6 @@ import PropTypes from 'prop-types' import Markdown from './Markdown' import { appBase, debug, consent, aitoolkitEnabled, encyclopediaEnabled } from '../config' import packageJson from '../../package.json' -import { domainData } from './domainData' import { Grid, Card, CardContent, Typography, makeStyles, Link, Dialog, DialogTitle, DialogContent, DialogActions, Button } from '@material-ui/core' import { Link as RouterLink, useHistory } from 'react-router-dom' import tutorials from '../toolkitMetadata' @@ -354,10 +353,8 @@ export default function About() { ${debug ? ` ### Material science data and domains Originally NOMAD was build for DFT calculations and data from the respective - community code. By NOMAD supports multiple materials science domains: - - ${info && info.domains.map(domain => domainData[domain.name]).map(domain => `- ${domain.name}: ${domain.about}`).join('\n')} - ` : ''} + community codes. But NOMAD is now extended to support multiple materials science domains, + such as experiments, synthesis, and computational methods at different scales.` : ''} ${debug ? ` ### Log management with Elastic stack @@ -381,13 +378,12 @@ export default function About() { ### About this version - version (API): \`${info ? info.version : 'loading'}/${info ? info.git.commit : 'loading'}\` - version (GUI): \`${packageJson.version}/${packageJson.commit}\` - - domains: ${info ? Object.keys(info.domains).map(domain => info.domains[domain].name).join(', ') : 'loading'} - git: \`${info ? info.git.ref : 'loading'}; ${info ? info.git.version : 'loading'}\` - last commit message: *${info ? info.git.log : 'loading'}* - supported codes: ${info ? info.codes.map(code => code.code_name).join(', ') : 'loading'} - parsers: ${info ? info.parsers.join(', ') : 'loading'} - normalizers: ${info ? info.normalizers.join(', ') : 'loading'} - `}</Markdown> + `}</Markdown> </Grid> </Grid> </div> diff --git a/gui/src/components/domainData.js b/gui/src/components/domainData.js deleted file mode 100644 index 7bebe89f162311cd0c83ca22aa079591cdf2b5c8..0000000000000000000000000000000000000000 --- a/gui/src/components/domainData.js +++ /dev/null @@ -1,207 +0,0 @@ -/* - * Copyright The NOMAD Authors. - * - * This file is part of NOMAD. See https://nomad-lab.eu for further info. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -import React from 'react' -import { Link, Typography } from '@material-ui/core' -import { amber } from '@material-ui/core/colors' - -/* eslint-disable react/display-name */ - -export const domainData = ({ - dft: { - name: 'Computational data', - label: 'Computational materials science data', - key: 'dft', - about: 'This include data from many computational materials science codes', - disclaimer: <Typography> - First time users can find an introduction to NOMAD, tutorials, and videos <Link href="https://nomad-lab.eu/services/repo-arch" target="nomad-lab">here</Link>. - </Typography>, - entryLabel: 'entry', - entryLabelPlural: 'entries', - entryHeader: 'DFT Calculation', - entryTitle: data => - data.dft && data.dft.code_name - ? data.dft.code_name.charAt(0).toUpperCase() + data.dft.code_name.slice(1) + ' run' - : 'Code run', - searchPlaceholder: 'enter atoms, codes, functionals, or other quantity values', - searchMetrics: { - code_runs: { - label: 'Entries', - tooltip: 'The statistics will show the number of database entry. Each set of input/output files that represents a code run is an entry.', - renderResultString: count => (<span><b>{count.toLocaleString()}</b> entr{count === 1 ? 'y' : 'ies'}</span>) - }, - unique_entries: { - label: 'Unique', - tooltip: 'Counts duplicates only once.', - renderResultString: count => (<span> and <b>{count.toLocaleString()}</b> unique entr{count === 1 ? 'y' : 'ies'}</span>) - }, - // total_energies: { - // label: 'Total energy calculations', - // tooltip: 'Aggregates the number of total energy calculations as each entry can contain many calculations.', - // renderResultString: count => (<span> with <b>{count.toLocaleString()}</b> total energy calculation{count === 1 ? '' : 's'}</span>) - // }, - 'dft.calculations': { - label: 'Single configuration calculations', - shortLabel: 'SCC', - tooltip: 'Aggregates the number of single configuration calculations (e.g. total energy calculations) as each entry can contain many calculations.', - renderResultString: count => (<span> with <b>{count.toLocaleString()}</b> single configuration calculation{count === 1 ? '' : 's'}</span>) - }, - // The unique_geometries search aggregates unique geometries based on 10^8 hashes. - // This takes to long in elastic search for a reasonable user experience. - // Therefore, we only support geometries without uniqueness check - 'dft.unique_geometries': { - label: 'Unique geometries', - shortLabel: 'Geometries', - tooltip: 'Aggregates the number of simulated system geometries in all entries.', - renderResultString: count => (<span> that simulate <b>{count.toLocaleString()}</b> unique geometrie{count === 1 ? '' : 's'}</span>) - }, - 'encyclopedia.material.materials': { - label: 'Materials', - tooltip: 'Shows statistics in terms of materials.', - renderResultString: count => (<span> of <b>{count.toLocaleString()}</b> material{count === 1 ? '' : 's'}</span>) - }, - datasets: { - label: 'Datasets', - tooltip: 'Shows statistics in terms of datasets that entries belong to.', - renderResultString: count => (<span> curated in <b>{count.toLocaleString()}</b> dataset{count === 1 ? '' : 's'}</span>) - } - }, - defaultSearchMetric: 'code_runs', - additionalSearchKeys: { - raw_id: {}, - external_id: {}, - upload_id: {}, - calc_id: {}, - paths: {}, - pid: {}, - mainfile: {}, - calc_hash: {}, - formula: {}, - optimade: {}, - quantities: {}, - 'dft.spacegroup': {}, - 'dft.spacegroup_symbol': {}, - 'dft.labels': {}, - upload_name: {} - }, - /** - * An dict where each object represents a column. Possible keys are label, render. - * Default render - */ - searchResultColumns: { - 'formula': { - label: 'Formula', - supportsSort: true - }, - 'dft.code_name': { - label: 'Code', - supportsSort: true - }, - 'dft.basis_set': { - label: 'Basis set', - supportsSort: true - }, - 'dft.xc_functional': { - label: 'XC functionals', - supportsSort: true - }, - 'dft.system': { - label: 'System', - supportsSort: true - }, - 'dft.crystal_system': { - label: 'Crystal system', - supportsSort: true - }, - 'dft.spacegroup_symbol': { - label: 'Spacegroup', - supportsSort: true - }, - 'dft.spacegroup': { - label: 'Spacegroup (number)', - supportsSort: true - } - }, - defaultSearchResultColumns: ['formula', 'dft.code_name', 'dft.system', 'dft.crystal_system'], - searchTabs: ['entries', 'materials', 'datasets', 'groups', 'uploads'] - }, - ems: { - name: 'Experimental data', - key: 'ems', - label: 'Experimental data (beta)', - about: 'This includes first metadata from materials science experiments. This aspect of NOMAD is still in development and mnight change frequently.', - disclaimer: <Typography style={{color: amber[700]}}> - This aspect of NOMAD is still under development. The offered functionality and displayed data - might change frequently, is not necessarely reviewed by NOMAD, and might contain - errors. Some of the information is taken verbatim from external sources. - </Typography>, - entryLabel: 'entry', - entryLabelPlural: 'entries', - entryTitle: () => 'Experiment', - searchPlaceholder: 'enter atoms, experimental methods, or other quantity values', - /** - * Metrics are used to show values for aggregations. Each metric has a key (used - * for API calls), a label (used in the select form), and result string (to show - * the overall amount in search results). - */ - searchMetrics: { - code_runs: { - label: 'Experiments', - tooltip: 'Statistics will show the number of entires; usually each entry represents a single experiment.', - renderResultString: count => (<span><b>{count}</b> entries</span>) - }, - datasets: { - label: 'Datasets', - tooltip: 'Shows statistics in terms of datasets that entries belong to.', - renderResultString: count => (<span> curated in <b>{count}</b> datasets</span>) - } - }, - defaultSearchMetric: 'code_runs', - /** - * An dict where each object represents a column. Possible keys are label, render. - * Default render - */ - searchResultColumns: { - 'formula': { - label: 'Formula', - supportsSort: true - }, - 'ems.chemical': { - label: 'Material name' - }, - 'ems.method': { - label: 'Method', - supportsSort: true - }, - 'ems.data_type': { - label: 'Data', - supportsSort: true - }, - 'ems.origin_time': { - label: 'Date', - supportsSort: true, - render: entry => (entry.ems && entry.ems.origin_time && new Date(entry.ems.origin_time).toLocaleDateString()) || 'unavailable' - }, - 'ems.repository_url': { - label: 'Source', - render: entry => <Link target="external" href={entry.ems.entry_repository_url}>{entry.ems.repository_url}</Link> - } - }, - defaultSearchResultColumns: ['formula', 'ems.chemical', 'ems.method', 'ems.data_type', 'ems.origin_time', 'ems.repository_url'], - searchTabs: ['entries', 'datasets', 'uploads'] - } -}) diff --git a/gui/src/components/search/EntryList.js b/gui/src/components/search/EntryList.js index 4c1bcd88215cc750319211c1b23e3defb0a0d545..11891e953d87790e802f50bcb00c8ba5b664d168 100644 --- a/gui/src/components/search/EntryList.js +++ b/gui/src/components/search/EntryList.js @@ -30,7 +30,6 @@ import PublicIcon from '@material-ui/icons/Public' import UploaderIcon from '@material-ui/icons/AccountCircle' import SharedIcon from '@material-ui/icons/SupervisedUserCircle' import PrivateIcon from '@material-ui/icons/VisibilityOff' -import { domainData } from '../domainData' import { authorList, nameList } from '../../utils' import EntryDetails from '../entry/EntryDetails' import { EntryButton } from '../nav/Routes' @@ -92,12 +91,11 @@ export class EntryListUnstyled extends React.Component { per_page: PropTypes.number.isRequired, editable: PropTypes.bool, editUserMetadataDialogProps: PropTypes.object, - columns: PropTypes.object, + columns: PropTypes.object.isRequired, title: PropTypes.string, actions: PropTypes.element, showEntryActions: PropTypes.func, - selectedColumns: PropTypes.arrayOf(PropTypes.string), - domain: PropTypes.object, + selectedColumns: PropTypes.arrayOf(PropTypes.string).isRequired, user: PropTypes.object, showAccessColumn: PropTypes.bool, entryPagePathPrefix: PropTypes.string, @@ -263,7 +261,6 @@ export class EntryListUnstyled extends React.Component { renderEntryDetails(row) { const { classes } = this.props - const domain = (row.domain && domainData[row.domain]) || domainData.dft return (<div className={classes.entryDetails} style={{width: this.props.entryDetailsWidth}}> <div className={classes.entryDetailsContents} > @@ -301,11 +298,11 @@ export class EntryListUnstyled extends React.Component { <div className={classes.entryDetailsRow} style={{paddingRight: 0}}> <Quantity column > {/* <Quantity quantity="pid" label='PID' placeholder="not yet assigned" noWrap data={row} withClipboard /> */} - <Quantity quantity="calc_id" label={`${domain ? domain.entryLabel : 'entry'} id`} noWrap withClipboard data={row} /> - <Quantity quantity="raw_id" label={`raw id`} noWrap withClipboard data={row} /> - <Quantity quantity="external_id" label={`external id`} noWrap withClipboard data={row} /> - <Quantity quantity='mainfile' noWrap ellipsisFront data={row} withClipboard /> - <Quantity quantity="upload_id" label='upload id' data={row} noWrap withClipboard> + <Quantity quantity="calc_id" label="entry id" noWrap withClipboard data={row} /> + <Quantity quantity="raw_id" label="raw id" noWrap withClipboard data={row} /> + <Quantity quantity="external_id" label="external id" noWrap withClipboard data={row} /> + <Quantity quantity="mainfile" noWrap ellipsisFront data={row} withClipboard /> + <Quantity quantity="upload_id" label="upload id" data={row} noWrap withClipboard> <Typography style={{flexGrow: 1}}> <Link component={RouterLink} to={`/uploads/${row.upload_id}`}>{row.upload_id}</Link> </Typography> @@ -349,26 +346,15 @@ export class EntryListUnstyled extends React.Component { } render() { - const { classes, data, order, order_by, page, per_page, domain, editable, title, query, actions, user, showAccessColumn, ...rest } = this.props + const { classes, data, order, order_by, page, per_page, editable, title, query, actions, user, showAccessColumn, ...rest } = this.props const { selected } = this.state const results = data?.results || data?.data || [] const total = data?.pagination.total const totalNumber = total || 0 - const columns = this.props.columns || { - ...domain.searchResultColumns, - ...EntryListUnstyled.defaultColumns - } - - let selectedColumns = this.props.selectedColumns - if (!selectedColumns) { - selectedColumns = [...domain.defaultSearchResultColumns] - if (user !== undefined || showAccessColumn) { - selectedColumns.push('published') - } - selectedColumns.push('authors') - } + const columns = this.props.columns + const selectedColumns = this.props.selectedColumns const pagination = <TablePagination rowsPerPageOptions={[5, 10, 25, 50, 100]} @@ -399,7 +385,7 @@ export class EntryListUnstyled extends React.Component { return ( <div className={classes.root}> <DataTable - entityLabels={domain ? [domain.entryLabel, domain.entryLabelPlural] : ['entry', 'entries']} + entityLabels={['entry', 'entries']} selectActions={selectActions} id={row => row.entry_id} total={total} diff --git a/gui/src/components/search/results/SearchResultsEntries.js b/gui/src/components/search/results/SearchResultsEntries.js index 2857b20f22e94dd423a8aaeeb4e85b54c872be76..28725989fdeec92b71921efdaa1c8558d23e55e5 100644 --- a/gui/src/components/search/results/SearchResultsEntries.js +++ b/gui/src/components/search/results/SearchResultsEntries.js @@ -27,7 +27,6 @@ import EditUserMetadataDialog from '../../EditUserMetadataDialog' import SharedIcon from '@material-ui/icons/SupervisedUserCircle' import PrivateIcon from '@material-ui/icons/VisibilityOff' import DownloadButton from '../../DownloadButton' -import { domainData } from '../../domainData' import EntryDetails from '../../entry/EntryDetails' import { authorList, nameList } from '../../../utils' import NewDataTable from '../../NewDataTable' @@ -293,8 +292,6 @@ const SearchResultsEntries = React.memo(({ }, [showEntryActions]) const renderEntryDetails = useCallback((row) => { - const domain = (row.domain && domainData[row.domain]) || domainData.dft - return (<div className={styles.entryDetails}> <div className={styles.entryDetailsContents}> <div className={styles.entryDetailsRow}> @@ -331,11 +328,11 @@ const SearchResultsEntries = React.memo(({ <div className={styles.entryDetailsRow} style={{maxWidth: '33%', paddingRight: 0}}> <Quantity column > {/* <Quantity quantity="pid" label='PID' placeholder="not yet assigned" noWrap data={row} withClipboard /> */} - <Quantity quantity="calc_id" label={`${domain ? domain.entryLabel : 'entry'} id`} noWrap withClipboard data={row} /> - <Quantity quantity="raw_id" label={`raw id`} noWrap withClipboard data={row} /> - <Quantity quantity="external_id" label={`external id`} noWrap withClipboard data={row} /> - <Quantity quantity='mainfile' noWrap ellipsisFront data={row} withClipboard /> - <Quantity quantity="upload_id" label='upload id' data={row} noWrap withClipboard> + <Quantity quantity="calc_id" label="entry id" noWrap withClipboard data={row} /> + <Quantity quantity="raw_id" label="raw id" noWrap withClipboard data={row} /> + <Quantity quantity="external_id" label="external id" noWrap withClipboard data={row} /> + <Quantity quantity="mainfile" noWrap ellipsisFront data={row} withClipboard /> + <Quantity quantity="upload_id" label="upload id" data={row} noWrap withClipboard> <Typography style={{flexGrow: 1}}> <Link component={RouterLink} to={`/uploads/${row.upload_id}`}>{row.upload_id}</Link> </Typography> diff --git a/nomad/app/flask/dcat/mapping.py b/nomad/app/flask/dcat/mapping.py index fe4e1ab7bf666f75d7573a3dfd1e4aa39829dbc9..4294068f9a7dd04cd6aeae5ea1f35a6ec0cf3a2d 100644 --- a/nomad/app/flask/dcat/mapping.py +++ b/nomad/app/flask/dcat/mapping.py @@ -32,7 +32,12 @@ HYDRA = Namespace('http://www.w3.org/ns/hydra/core#') def get_optional_entry_prop(entry, name): try: - return entry[name] + value = entry + segments = name.split('.') + for segment in segments: + value = value[segment] + + return value except (KeyError, AttributeError): return 'unavailable' @@ -86,7 +91,7 @@ class Mapping(): self.g.add((dataset, DCT.identifier, Literal(entry['entry_id']))) self.g.add((dataset, DCT.issued, Literal(entry['upload_time']))) self.g.add((dataset, DCT.modified, Literal(entry['last_processing']))) - self.g.add((dataset, DCT.title, Literal(get_optional_entry_prop(entry, 'formula')))) + self.g.add((dataset, DCT.title, Literal(get_optional_entry_prop(entry, 'results.material.chemical_formula_descriptive')))) self.g.add((dataset, DCT.description, Literal(get_optional_entry_prop(entry, 'comment')))) if slim: diff --git a/nomad/app/main.py b/nomad/app/main.py index 9aed3c89363d4a62ea1bb213672b6c818d3b7957..7c9263638c5ab4dbef12e87829921469f42e8aca 100644 --- a/nomad/app/main.py +++ b/nomad/app/main.py @@ -96,7 +96,6 @@ async def http_exception_handler(request, exc): <h2>info</h2> {'<br/>'.join(f'{key}: {value}' for key, value in config.meta.items())} <h2>apis</h2> - <a href="{app_base}/api">NOMAD API v0</a><br/> <a href="{app_base}/api/v1/extensions/docs">NOMAD API v1</a><br/> <a href="{app_base}/optimade/v1/extensions/docs">Optimade API</a><br/> <a href="{app_base}/dcat">DCAT API</a><br/> @@ -109,10 +108,6 @@ async def http_exception_handler(request, exc): 'info': { 'app': config.meta, 'apis': { - 'v0': { - 'root': f'{app_base}/api', - 'dashboard': f'{app_base}/api', - }, 'v1': { 'root': f'{app_base}/api/v1', 'dashboard': f'{app_base}/api/v1/extensions/docs', diff --git a/nomad/app/optimade/elasticsearch.py b/nomad/app/optimade/elasticsearch.py index d8dbea06b7513f3a2262fafde66ffd0a7b1c51a1..4d8941d472927ab092273ecc389eb1f00d89cd37 100644 --- a/nomad/app/optimade/elasticsearch.py +++ b/nomad/app/optimade/elasticsearch.py @@ -114,7 +114,7 @@ class StructureCollection(EntryCollection): super().__init__( resource_cls=NomadStructureResource, resource_mapper=StructureMapper, - transformer=get_transformer(nomad_properties='dft', without_prefix=False)) + transformer=get_transformer(without_prefix=False)) self.parser = LarkParser(version=(1, 0, 0), variant="default") diff --git a/nomad/app/optimade/filterparser.py b/nomad/app/optimade/filterparser.py index 4cfc5a680b899b5c2d4d12622a2ea7c679ed0add..f666cd074c530e1e12c65dbf2a0c98f5364a0e47 100644 --- a/nomad/app/optimade/filterparser.py +++ b/nomad/app/optimade/filterparser.py @@ -36,7 +36,7 @@ class FilterException(Exception): @cached(cache={}) -def _get_transformer(nomad_properties, without_prefix): +def _get_transformer(without_prefix): from nomad.datamodel import OptimadeEntry quantities: Dict[str, Quantity] = { q.name: Quantity( @@ -55,35 +55,34 @@ def _get_transformer(nomad_properties, without_prefix): quantities['elements'].nested_quantity = quantities['elements_ratios'] quantities['elements_ratios'].nested_quantity = quantities['elements_ratios'] - if nomad_properties is not None: - for name, search_quantity in provider_specific_fields().items(): - names = ['_nmd_' + name] - if without_prefix: - names.append(name) + for name, search_quantity in provider_specific_fields().items(): + names = ['_nmd_' + name] + if without_prefix: + names.append(name) - for name in names: - if name not in quantities: - quantities[name] = Quantity( - name, - es_field=search_quantity.search_field, - elastic_mapping_type=search_quantity.mapping['type']) + for name in names: + if name not in quantities: + quantities[name] = Quantity( + name, + es_field=search_quantity.search_field, + elastic_mapping_type=search_quantity.mapping['type']) return ElasticTransformer(quantities=quantities.values()) -def parse_filter(filter_str: str, nomad_properties='dft', without_prefix=False) -> Q: +def parse_filter(filter_str: str, without_prefix=False) -> Q: ''' Parses the given optimade filter str and returns a suitable elastic search query. Arguments: filter_str: Can be direct user input with no prior processing. - nomad_properties: Also include the nomad proprietary properties of the given domain. + nomad_properties: Also include the nomad proprietary properties. without_prefix: Do not prefix the nomad proprietary properties with _nmd_. Raises: FilterException: If the given str cannot be parsed, or if there are any semantic errors in the given expression. ''' - transformer = _get_transformer(nomad_properties, without_prefix) + transformer = _get_transformer(without_prefix) try: parse_tree = _parser.parse(filter_str) diff --git a/nomad/app/v1/routers/info.py b/nomad/app/v1/routers/info.py index d08ce2fdd4ef39c3190da00b9b15be69dffd2db4..8f834b37140284e9c5cdaf35a7b6e34cf227d0fe 100644 --- a/nomad/app/v1/routers/info.py +++ b/nomad/app/v1/routers/info.py @@ -26,7 +26,7 @@ from fastapi.routing import APIRouter from pydantic.fields import Field from pydantic.main import BaseModel -from nomad import config, normalizing, datamodel, gitinfo +from nomad import config, normalizing, gitinfo from nomad.utils import strip from nomad.search import search from nomad.parsing import parsers, MatchingParser @@ -48,11 +48,6 @@ class MetainfoModel(BaseModel): data.''')) -class DomainModel(BaseModel): - name: str - metainfo: MetainfoModel - - class GitInfoModel(BaseModel): ref: str version: str @@ -79,7 +74,6 @@ class InfoModel(BaseModel): metainfo_packages: List[str] codes: List[CodeInfoModel] normalizers: List[str] - domains: List[DomainModel] statistics: StatisticsModel = Field(None, description='General NOMAD statistics') search_quantities: dict version: str @@ -130,16 +124,6 @@ async def get_info(): 'codes': codes, 'normalizers': [normalizer.__name__ for normalizer in normalizing.normalizers], 'statistics': statistics(), - 'domains': [ - { - 'name': domain_name, - 'metainfo': { - 'all_package': domain['metainfo_all_package'], - 'root_section': domain['root_section'] - } - } - for domain_name, domain in datamodel.domains.items() - ], 'search_quantities': { s.qualified_name: { 'name': s.qualified_name, diff --git a/nomad/cli/client/__init__.py b/nomad/cli/client/__init__.py index 3a57199e29e7960ef8c3ecf410834f190698c302..a524ace65264073aba3786116dd40ca9c85353cb 100644 --- a/nomad/cli/client/__init__.py +++ b/nomad/cli/client/__init__.py @@ -149,7 +149,7 @@ def local(ctx, entry_id, show_archive, show_metadata, skip_normalizers, not_stri if show_metadata: metadata = entry_archive.metadata - metadata.apply_domain_metadata(entry_archive) + metadata.apply_archvie_metadata(entry_archive) json.dump(metadata.m_to_dict(), sys.stdout, indent=4) diff --git a/nomad/cli/parse.py b/nomad/cli/parse.py index 4d1c39875cd7aee2bb285f1a147ef4b8d8c361d9..2e9469fc452f2ddf4dec7c3ffc75914571ccc813 100644 --- a/nomad/cli/parse.py +++ b/nomad/cli/parse.py @@ -40,12 +40,12 @@ def _parse(mainfile, show_archive, show_metadata, skip_normalizers, not_strict, if not skip_normalizers: normalize_all(entry_archive) - entry_archive.metadata.apply_domain_metadata(entry_archive) + entry_archive.metadata.apply_archvie_metadata(entry_archive) if show_archive: json.dump(entry_archive.m_to_dict(), sys.stdout, indent=2) if show_metadata: metadata = entry_archive.metadata - metadata.apply_domain_metadata(entry_archive) + metadata.apply_archvie_metadata(entry_archive) json.dump(metadata.m_to_dict(), sys.stdout, indent=4) diff --git a/nomad/datamodel/__init__.py b/nomad/datamodel/__init__.py index 7c905a1c0f4862c5426901a1e544fe5877ea21c3..b93a7170b600647df942c63c439c936b1ec3bce8 100644 --- a/nomad/datamodel/__init__.py +++ b/nomad/datamodel/__init__.py @@ -71,12 +71,6 @@ The datamodel supports different *domains*. This means that most domain metadata entry/calculation is stored in domain-specific sub sections of the :class:`EntryMetadata` section. We currently have the following domain specific metadata classes/sections: -.. autoclass:: nomad.datamodel.dft.DFTMetadata - :members: - -.. autoclass:: nomad.datamodel.ems.EMSMetadata - :members: - .. autoclass:: nomad.datamodel.OptimadeEntry :members: @@ -87,8 +81,6 @@ import sys from nomad.metainfo import Environment -from .dft import DFTMetadata -from .ems import EMSMetadata from .datamodel import ( Dataset, User, Author, EditableUserMetadata, UserProvidableMetadata, OasisMetadata, UploadMetadata, MongoMetadata, EntryMetadata, EntryArchive) @@ -97,22 +89,5 @@ from .metainfo import m_env from .results import Results m_env.m_add_sub_section(Environment.packages, sys.modules['nomad.datamodel.datamodel'].m_package) # type: ignore -m_env.m_add_sub_section(Environment.packages, sys.modules['nomad.datamodel.dft'].m_package) # type: ignore -m_env.m_add_sub_section(Environment.packages, sys.modules['nomad.datamodel.ems'].m_package) # type: ignore m_env.m_add_sub_section(Environment.packages, sys.modules['nomad.datamodel.encyclopedia'].m_package) # type: ignore m_env.m_add_sub_section(Environment.packages, sys.modules['nomad.datamodel.optimade'].m_package) # type: ignore - -domains = { - 'dft': { - 'metadata': DFTMetadata, - 'metainfo_all_package': 'common', - 'root_section': 'run' - }, - 'ems': { - 'metadata': EMSMetadata, - 'metainfo_all_package': 'common_experimental', - 'root_section': 'section_measurement' - } -} - -root_sections = [domain['root_section'] for domain in domains.values()] + ['section_entry_info', 'OptimadeEntry', 'Workflow'] diff --git a/nomad/datamodel/datamodel.py b/nomad/datamodel/datamodel.py index 803a9b39f91c606e8bf94e6840eac475ee7f8685..a356ed81710ce902943930824727b5599dbf4900 100644 --- a/nomad/datamodel/datamodel.py +++ b/nomad/datamodel/datamodel.py @@ -29,16 +29,13 @@ from nomad.datamodel.metainfo.common import FastAccess from nomad.metainfo.pydantic_extension import PydanticModel from nomad.metainfo.elasticsearch_extension import Elasticsearch, material_entry_type, entry_type -from .dft import DFTMetadata -from .ems import EMSMetadata -from .optimade import OptimadeEntry - # This is usually defined automatically when the first metainfo definition is evaluated, but # due to the next imports requireing the m_package already, this would be too late. m_package = metainfo.Package() from .results import Results # noqa from .encyclopedia import EncyclopediaMetadata # noqa +from .optimade import OptimadeEntry # noqa from .metainfo.simulation.run import Run # noqa from .metainfo.workflow import Workflow # noqa from .metainfo.common_experimental import Measurement # noqa @@ -357,8 +354,6 @@ class EntryMetadata(metainfo.MSection): calc_hash: The raw file content based checksum/hash of this calculation. pid: The unique persistent id of this calculation. mainfile: The upload relative mainfile path. - domain: Must be the key for a registered domain. This determines which actual - subclass is instantiated. files: A list of all files, relative to upload. processed: Boolean indicating if this calc was successfully processed and archive @@ -562,23 +557,6 @@ class EntryMetadata(metainfo.MSection): type=metainfo.Datetime, categories=[MongoMetadata, OasisMetadata], description='The date and time the user metadata was edited last') - formula = metainfo.Quantity( - type=str, categories=[DomainMetadata], - description='A (reduced) chemical formula') - - atoms = metainfo.Quantity( - type=str, shape=['n_atoms'], default=[], categories=[DomainMetadata], - description='The atom labels of all atoms of the entry\'s material') - - only_atoms = metainfo.Quantity( - type=str, categories=[DomainMetadata], - description='The atom labels concatenated in order-number order', - derived=lambda entry: _only_atoms(entry.atoms)) - - n_atoms = metainfo.Quantity( - type=int, categories=[DomainMetadata], default=0, - description='The number of atoms in the entry\'s material') - optimade = metainfo.SubSection( sub_section=OptimadeEntry, description='Metadata used for the optimade API.', @@ -593,29 +571,13 @@ class EntryMetadata(metainfo.MSection): description='All quantities that are used by this entry.', a_elasticsearch=QuantitySearch()) - ems = metainfo.SubSection(sub_section=EMSMetadata) - dft = metainfo.SubSection(sub_section=DFTMetadata, categories=[FastAccess]) encyclopedia = metainfo.SubSection(sub_section=EncyclopediaMetadata, categories=[FastAccess]) def apply_user_metadata(self, metadata: dict): ''' Applies a user provided metadata dict to this calc. ''' self.m_update(**metadata) - def apply_domain_metadata(self, archive): - ''' Used to apply metadata that is related to the domain. ''' - assert self.domain is not None, 'all entries must have a domain' - domain_sub_section_def = self.m_def.all_sub_sections.get(self.domain) - if domain_sub_section_def is not None: - domain_section_def = domain_sub_section_def.sub_section - assert domain_section_def is not None, 'unknown domain %s' % self.domain - - # add domain section if not already there - domain_section = self.m_get_sub_section(domain_sub_section_def, -1) - if domain_section is None: - domain_section = self.m_create(domain_section_def.section_cls) - - domain_section.apply_domain_metadata(archive) - + def apply_archvie_metadata(self, archive): quantities = set() n_quantities = 0 diff --git a/nomad/datamodel/dft.py b/nomad/datamodel/dft.py deleted file mode 100644 index d9c02f3bb8c8b13e7918ab93da82d232598a6ed7..0000000000000000000000000000000000000000 --- a/nomad/datamodel/dft.py +++ /dev/null @@ -1,545 +0,0 @@ -# -# Copyright The NOMAD Authors. -# -# This file is part of NOMAD. See https://nomad-lab.eu for further info. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -''' -DFT specific metadata -''' - -import re - -from nomad import config, utils -from nomad.metainfo import MSection, Section, Quantity, MEnum, SubSection - -from .metainfo.workflow import Workflow -from .metainfo.common import FastAccess -from .metainfo.simulation.run import Run -from .metainfo.simulation.method import Functional -from .metainfo.simulation.calculation import Energy - - -xc_treatments = { - 'gga': 'GGA', - 'hf_': 'HF', - 'oep': 'OEP', - 'hyb': 'hybrid', - 'mgg': 'meta-GGA', - 'vdw': 'vdW', - 'lda': 'LDA', -} -''' https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-meta-info/wikis/metainfo/XC-functional ''' - -basis_sets = { - 'gaussians': 'gaussians', - 'realspacegrid': 'real-space grid', - 'planewaves': 'plane waves' -} - -compound_types = [ - 'unary', - 'binary', - 'ternary', - 'quaternary', - 'quinary', - 'sexinary', - 'septenary', - 'octanary', - 'nonary', - 'decinary' -] - -_electronic_quantities = [ - 'electronic_band_structure', - 'electronic_dos', - 'eigenvalues_values', -] - -_mechanical_quantities = [ - 'stress_tensor' -] - -_thermal_quantities = [ - 'heat_capacity_c_v', - 'helmholtz_free_energy', - 'phonon_band_structure', - 'phonon_dos', -] - -_magnetic_quantities = [ - 'spin_S2' -] - -_optical_quantities = [ - 'oscillator_strengths', - 'transition_dipole_moments' -] - -_searchable_quantities = set(_electronic_quantities + _mechanical_quantities + _thermal_quantities + _magnetic_quantities + _optical_quantities) - -version_re = re.compile(r'(\d+(\.\d+(\.\d+)?)?)') - - -def map_functional_name_to_xc_treatment(name): - if name == config.services.unavailable_value: - return name - - return xc_treatments.get(name[:3].lower(), config.services.unavailable_value) - - -def map_basis_set_to_basis_set_label(name): - key = name.replace('_', '').replace('-', '').replace(' ', '').lower() - return basis_sets.get(key, name) - - -def simplify_version(version): - match = version_re.search(version) - if match is None: - return version - else: - return match.group(0) - - -def valid_array(array): - """Checks if the given variable is a non-empty array. - """ - return array is not None and len(array) > 0 - - -class Label(MSection): - ''' - Label that further classify a structure. - - Attributes: - label: The label as a string - type: The type of the label - source: The source that this label was taken from. - - ''' - label = Quantity(type=str) - - type = Quantity(type=MEnum( - 'compound_class', 'classification', 'prototype', 'prototype_id')) - - source = Quantity( - type=MEnum('springer', 'aflow_prototype_library')) - - -class DFTMetadata(MSection): - m_def = Section(a_domain='dft') - - basis_set = Quantity( - type=str, default='not processed', - description='The used basis set functions.') - - xc_functional = Quantity( - type=str, default='not processed', - description='The libXC based xc functional classification used in the simulation.') - - xc_functional_names = Quantity( - type=str, default=[], shape=['*'], - description='The list of libXC functional names that where used in this entry.') - - system = Quantity( - type=str, default='not processed', - description='The system type of the simulated system.') - - compound_type = Quantity( - type=str, default='not processed', - description='The compound type of the simulated system.' - ) - - crystal_system = Quantity( - type=str, default='not processed', - description='The crystal system type of the simulated system.') - - spacegroup = Quantity( - type=int, default=-1, - description='The spacegroup of the simulated system as number.') - - spacegroup_symbol = Quantity( - type=str, default='not processed', - description='The spacegroup as international short symbol.') - - code_name = Quantity( - type=str, default='not processed', - description='The name of the used code.') # in import the parser module is added codes here as statistic_values - - code_version = Quantity( - type=str, default='not processed', - description='The version of the used code.') - - n_geometries = Quantity( - type=int, default=0, description='Number of unique geometries.') - - n_calculations = Quantity( - type=int, default=0, - description='Number of single configuration calculation sections') - - n_total_energies = Quantity( - type=int, default=0, description='Number of total energy calculations') - - n_quantities = Quantity( - type=int, default=0, description='Number of metainfo quantities parsed from the entry.') - - quantities = Quantity( - type=str, shape=['0..*'], - description='All quantities that are used by this entry.') - - searchable_quantities = Quantity( - type=str, shape=['0..*'], - description='All quantities with existence filters in the search GUI.') - - geometries = Quantity( - type=str, shape=['0..*'], - description='Hashes for each simulated geometry') - - group_hash = Quantity( - type=str, - description='Hashes that describe unique geometries simulated by this code run.') - - labels = SubSection( - sub_section=Label, repeats=True, categories=[FastAccess], - description='The labels taken from AFLOW prototypes and springer.') - - labels_springer_compound_class = Quantity( - type=str, shape=['0..*'], - description='Springer compund classification.') - - labels_springer_classification = Quantity( - type=str, shape=['0..*'], - description='Springer classification by property.') - - workflow = Quantity(type=Workflow) - - def code_name_from_parser(self): - entry = self.m_parent - if entry.parser_name is not None: - from nomad.parsing.parsers import parser_dict - parser = parser_dict.get(entry.parser_name) - if hasattr(parser, 'code_name'): - return parser.code_name - return config.services.unavailable_value - - def update_group_hash(self): - user_id = None - uploader = self.m_parent.uploader - if uploader is not None: - user_id = uploader.user_id - self.group_hash = utils.hash( - self.m_parent.formula, - self.spacegroup, - self.basis_set, - self.xc_functional, - self.code_name, - self.code_version, - self.m_parent.with_embargo, - user_id) - - def apply_domain_metadata(self, entry_archive): - from nomad.normalizing.system import normalized_atom_labels - entry = self.m_parent - - logger = utils.get_logger(__name__).bind( - upload_id=entry.upload_id, calc_id=entry.calc_id, mainfile=entry.mainfile) - - self.code_name = self.code_name_from_parser() - - if entry_archive is None: - return - - section_run = entry_archive.run - if not section_run: - logger.warn('no section run found') - return - section_run = section_run[0] - - # default values - self.system = config.services.unavailable_value - self.crystal_system = config.services.unavailable_value - self.spacegroup_symbol = config.services.unavailable_value - self.basis_set = config.services.unavailable_value - self.xc_functional = config.services.unavailable_value - - section_system = None - for section in section_run.system: - if section.is_representative: - section_system = section - break - - # code and code specific ids - try: - code_name = section_run.program.name - if code_name: - self.code_name = code_name - else: - raise KeyError - except KeyError as e: - logger.warn('archive without program_name', exc_info=e) - - try: - version = section_run.program.version - if version: - self.code_version = simplify_version(version) - else: - raise KeyError - except KeyError: - self.code_version = config.services.unavailable_value - - def get_value(value): - return value if value else config.services.unavailable_value - - raw_id = section_run.raw_id - if raw_id is not None: - entry.raw_id = raw_id - - # metadata (system, method, chemistry) - atom_labels = section_system.atoms.labels if section_system else [] - atoms = atom_labels if atom_labels else [] - entry.n_atoms = len(atoms) - atoms = list(set(normalized_atom_labels(set(atoms)))) - atoms.sort() - entry.atoms = atoms - self.compound_type = compound_types[len(atoms) - 1] if len(atoms) <= 10 else '>decinary' - - self.system = config.services.unavailable_value - self.crystal_system = config.services.unavailable_value - self.spacegroup_symbol = config.services.unavailable_value - - section_symmetry = None - if section_system and len(section_system.symmetry) > 0: - section_symmetry = section_system.symmetry[0] - self.crystal_system = get_value(section_symmetry.crystal_system) - spacegroup = section_symmetry.space_group_number - self.spacegroup = 0 if not spacegroup else int(spacegroup) - self.spacegroup_symbol = get_value(section_symmetry.international_short_symbol) - - if section_run.method and section_run.method[0].basis_set: - program_basis_set_type = section_run.method[0].basis_set[0].type - if program_basis_set_type: - self.basis_set = map_basis_set_to_basis_set_label(program_basis_set_type) - - if section_system: - self.system = get_value(section_system.type) - if section_system.chemical_composition_reduced is not None: - entry.formula = get_value(section_system.chemical_composition_reduced) - - # metrics and quantities - quantities = set() - searchable_quantities = set() - geometries = set() - xc_functionals = set() - xc_functional = None - - n_quantities = 0 - n_calculations = 0 - n_total_energies = 0 - n_geometries = 0 - - for section, property_def, _ in entry_archive.m_traverse(): - property_name = property_def.name - quantities.add(property_name) - n_quantities += 1 - - if property_name in _searchable_quantities: - searchable_quantities.add(property_name) - - if property_def == Functional.name: - xc_functional = getattr(section, property_name) - if xc_functional: - xc_functionals.add(xc_functional) - - if property_def == Energy.total: - n_total_energies += 1 - - if property_name == 'configuration_raw_gid': - geometries.add(section.m_get(property_def)) - - if property_name == Run.calculation: - n_calculations += 1 - - if property_def == Run.system: - n_geometries += 1 - - # Special handling for electronic/vibrational DOS and band structure: - # these cannot currently be distinguished through the presence of a - # single metainfo. - searchable_quantities.discard("electronic_dos") - searchable_quantities.discard("electronic_band_structure") - searchable_quantities.discard("phonon_dos") - searchable_quantities.discard("phonon_band_structure") - if self.band_structure_electronic(entry_archive): - searchable_quantities.add("electronic_band_structure") - if self.band_structure_phonon(entry_archive): - searchable_quantities.add("phonon_band_structure") - if self.dos_electronic(entry_archive): - searchable_quantities.add("electronic_dos") - if self.dos_phonon(entry_archive): - searchable_quantities.add("phonon_dos") - - self.xc_functional_names = sorted(xc_functionals) - if len(self.xc_functional_names) > 0: - self.xc_functional = map_functional_name_to_xc_treatment( - get_value(self.xc_functional_names[0])) - else: - self.xc_functional = config.services.unavailable_value - - self.quantities = list(quantities) - self.geometries = list(geometries) - self.searchable_quantities = list(searchable_quantities) - self.n_quantities = n_quantities - self.n_calculations = n_calculations - self.n_total_energies = n_total_energies - self.n_geometries = n_geometries - - # grouping - self.update_group_hash() - - # labels - compounds = set() - classifications = set() - if section_system: - for section in section_system.springer_material: - compounds.update(section.compound_class) - classifications.update(section.classification) - - for compound in compounds: - self.labels.append(Label(label=compound, type='compound_class', source='springer')) - for classification in classifications: - self.labels.append(Label(label=classification, type='classification', source='springer')) - self.labels_springer_compound_class = list(compounds) - self.labels_springer_classification = list(classifications) - - aflow_id, aflow_label = None, None - section_prototype = section_system.prototype if section_system else [] - if section_prototype: - aflow_id = get_value(section_prototype[0].aflow_id) - aflow_label = get_value(section_prototype[0].label) - - if aflow_id is not None and aflow_label is not None: - self.labels.append(Label(label=aflow_label, type='prototype', source='aflow_prototype_library')) - self.labels.append(Label(label=aflow_id, type='prototype_id', source='aflow_prototype_library')) - - if entry_archive.workflow: - self.workflow = entry_archive.workflow[-1] - - def band_structure_electronic(self, entry_archive): - """Returns whether a valid electronic band structure can be found. In - the case of multiple valid band structures, only the latest one is - considered. - - Band structure is reported only under the following conditions: - - There is a non-empty array of band_k_points. - - There is a non-empty array of band_energies. - - The reported band_structure_kind is not "vibrational". - """ - path = ["run", "calculation", "band_structure_electronic"] - valid = False - for bs in self.traverse_reversed(entry_archive, path): - valid = True - for segment in bs.segment: - energies = segment.energies - k_points = segment.kpoints - if not valid_array(energies) or not valid_array(k_points): - valid = False - break - if valid: - break - return valid - - def dos_electronic(self, entry_archive): - """Returns whether a valid electronic DOS can be found. In the case of - multiple valid DOSes, only the latest one is reported. - - DOS is reported only under the following conditions: - - There is a non-empty array of dos_values_normalized. - - There is a non-empty array of dos_energies. - - The reported dos_kind is not "vibrational". - """ - path = ["run", "calculation", "dos_electronic"] - for dos in self.traverse_reversed(entry_archive, path): - energies = dos.energies - values = dos.total[-1].value - if valid_array(energies) and valid_array(values): - return True - - return False - - def band_structure_phonon(self, entry_archive): - """Returns whether a valid phonon band structure can be found. In the - case of multiple valid band structures, only the latest one is - considered. - - Band structure is reported only under the following conditions: - - There is a non-empty array of band_k_points. - - There is a non-empty array of band_energies. - - The reported band_structure_kind is "vibrational". - """ - path = ["run", "calculation", "band_structure_phonon"] - valid = False - for bs in self.traverse_reversed(entry_archive, path): - valid = True - for segment in bs.segment: - energies = segment.energies - k_points = segment.kpoints - if not valid_array(energies) or not valid_array(k_points): - valid = False - break - if valid: - break - - return valid - - def dos_phonon(self, entry_archive): - """Returns whether a valid phonon dos can be found. In the case of - multiple valid data sources, only the latest one is reported. - - DOS is reported only under the following conditions: - - There is a non-empty array of dos_values_normalized. - - There is a non-empty array of dos_energies. - - The reported dos_kind is "vibrational". - """ - path = ["run", "calculation", "dos_phonon"] - for dos in self.traverse_reversed(entry_archive, path): - energies = dos.energies - values = dos.total[-1].value - if valid_array(energies) and valid_array(values): - return True - - return False - - def traverse_reversed(self, entry_archive, path): - """Traverses the given metainfo path in reverse order. Useful in - finding the latest reported section or value. - """ - def traverse(root, path, i): - sections = getattr(root, path[i]) - if isinstance(sections, list): - for section in reversed(sections): - if i == len(path) - 1: - yield section - else: - for s in traverse(section, path, i + 1): - yield s - else: - if i == len(path) - 1: - yield sections - else: - for s in traverse(sections, path, i + 1): - yield s - for t in traverse(entry_archive, path, 0): - if t is not None: - yield t diff --git a/nomad/datamodel/ems.py b/nomad/datamodel/ems.py deleted file mode 100644 index 6a548d9f9357629cae2f87af4ea40ffe2048a30a..0000000000000000000000000000000000000000 --- a/nomad/datamodel/ems.py +++ /dev/null @@ -1,140 +0,0 @@ -# -# Copyright The NOMAD Authors. -# -# This file is part of NOMAD. See https://nomad-lab.eu for further info. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -''' -Experimental material science specific metadata -''' -from nomad import config -from nomad.metainfo import Quantity, MSection, Section, Datetime - - -def _unavailable(value): - if value is None: - return config.services.unavailable_value - - return value - - -class EMSMetadata(MSection): - m_def = Section(a_domain='ems') - - # sample quantities - chemical = Quantity(type=str) - sample_constituents = Quantity(type=str) - sample_microstructure = Quantity(type=str) - - # general metadata - experiment_summary = Quantity(type=str) - origin_time = Quantity(type=Datetime) - experiment_location = Quantity(type=str) - - # method - method = Quantity(type=str) - data_type = Quantity(type=str) - probing_method = Quantity(type=str) - - # origin metadata - repository_name = Quantity(type=str) - repository_url = Quantity(type=str) - entry_repository_url = Quantity(type=str) - preview_url = Quantity(type=str) - - # TODO move to more a general metadata section - quantities = Quantity(type=str, shape=['0..*'], default=[]) - group_hash = Quantity(type=str) - - def apply_domain_metadata(self, entry_archive): - from nomad import utils - - if entry_archive is None: - return - - entry = self.m_parent - - root_section = entry_archive.section_measurement[0] - - sample = root_section.section_metadata.section_sample - entry.formula = config.services.unavailable_value - if sample: - # TODO deal with multiple materials - material = sample.section_material[0] if len(sample.section_material) > 0 else None - if material: - entry.formula = _unavailable(material.formula) - atoms = material.elements - - if atoms is None: - entry.atoms = [] - else: - if hasattr(atoms, 'tolist'): - atoms = atoms.tolist() - entry.n_atoms = len(atoms) - - atoms = list(set(atoms)) - atoms.sort() - entry.atoms = atoms - - if material.name: - self.chemical = _unavailable(material.name) - else: - self.chemical = _unavailable(material.formula) - - self.sample_microstructure = _unavailable(sample.sample_microstructure) - self.sample_constituents = _unavailable(sample.sample_constituents) - - self.experiment_summary = root_section.section_metadata.section_experiment.notes - location = root_section.section_metadata.section_experiment.experiment_location - if location is not None: - location_str = ', '.join([ - getattr(location, prop) - for prop in ['facility', 'institution', 'address'] - if getattr(location, prop) is not None]) - self.experiment_location = location_str - - if root_section.section_metadata.section_experiment.experiment_start_time: - self.origin_time = root_section.section_metadata.section_experiment.experiment_start_time - elif root_section.section_metadata.section_experiment.experiment_publish_time: - self.origin_time = root_section.section_metadata.section_experiment.experiment_publish_time - else: - self.origin_time = self.m_parent.upload_time - - # self.data_type = _unavailable(root_section.section_method.data_type) - self.method = _unavailable(root_section.section_metadata.section_experiment.method_name) - # self.probing_method = _unavailable(root_section.section_method.probing_method) - - self.repository_name = _unavailable(root_section.section_metadata.section_origin.repository_name) - self.repository_url = root_section.section_metadata.section_origin.repository_url - self.preview_url = root_section.section_metadata.section_origin.preview_url - self.entry_repository_url = root_section.section_metadata.section_origin.entry_repository_url - - self.group_hash = utils.hash( - entry.formula, - self.method, - self.experiment_location, - entry.with_embargo, - entry.uploader) - - quantities = set() - - quantities.add(root_section.m_def.name) - for _, property_def, _ in root_section.m_traverse(): - quantities.add(property_def.name) - - self.quantities = list(quantities) - - if self.m_parent.references is None: - self.m_parent.references = [self.entry_repository_url] diff --git a/nomad/normalizing/__init__.py b/nomad/normalizing/__init__.py index 8d561661c25f5cb7698cbd9e8068c140a6f943a7..8b6fbf9fcd95884c37b326e4c5c44e3248edf537 100644 --- a/nomad/normalizing/__init__.py +++ b/nomad/normalizing/__init__.py @@ -41,7 +41,6 @@ from typing import List, Any, Iterable, Type from .system import SystemNormalizer from .optimade import OptimadeNormalizer -from .fhiaims import FhiAimsBaseNormalizer from .dos import DosNormalizer from .normalizer import Normalizer from .band_structure import BandStructureNormalizer @@ -52,7 +51,6 @@ from .results import ResultsNormalizer normalizers: Iterable[Type[Normalizer]] = [ SystemNormalizer, OptimadeNormalizer, - # FhiAimsBaseNormalizer, DosNormalizer, BandStructureNormalizer, WorkflowNormalizer, diff --git a/nomad/normalizing/fhiaims.py b/nomad/normalizing/fhiaims.py deleted file mode 100644 index eea99e3e4546d1a825a77f24b71ae1d1f5466161..0000000000000000000000000000000000000000 --- a/nomad/normalizing/fhiaims.py +++ /dev/null @@ -1,125 +0,0 @@ -# -# Copyright The NOMAD Authors. -# -# This file is part of NOMAD. See https://nomad-lab.eu for further info. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import os.path -import json -import numpy as np - -from nomad.normalizing.normalizer import Normalizer - -controlIn_basis_set = 'x_fhi_aims_section_controlIn_basis_set' -controlIn_basis_func = 'x_fhi_aims_section_controlIn_basis_func' -controlIn_nucleus = 'x_fhi_aims_controlIn_nucleus' - -pure_types_json = dict() - -for pure_types_str in ['light', 'really_tight', 'tight']: - with open(os.path.join(os.path.dirname(__file__), 'data', pure_types_str + '.json')) as f: - json_data = json.load(f) - section_method = json_data['sections']['section_run-0']['sections']['section_method-0'] - pure_types_json[pure_types_str] = section_method[controlIn_basis_set] - - -class FhiAimsBaseNormalizer(Normalizer): - - # Finds out if val is in the array - def compare_val_list(self, val, list): - if val in list: - return 0 - return 1 - - # Comparison between two dicts. - # List structure: - def compare_dict_dict(self, d1, d2): - sum2 = np.zeros(len(d2)) - - # Loop over the size of dict2 - for k in np.arange(0, len(d2)): - # Lopp over the elements of each dict2 - for idx, val in d1.items(): - # Excludes the keys that are always different. - if (idx not in ["gIndex", "references", "uri"]): - try: - if (val != d2[k][idx]): - sum2[k] = sum2[k] + 1 - except KeyError: # this exception case arises if the cut off potential is not a number - continue - if (min(sum2) == 0): - return 0 - else: - return 1 # sum(sum2) - - def compare_to_defaults(self, dict2_default, dict1): - # first compare the integration grid - false_hits_integration_grid = 0 - false_hits_basis = 0 - - for key in dict1: - if key not in ['gIndex', 'uri', controlIn_basis_func]: - if np.size(dict1[key]) == 1: - if(dict1[key] != dict2_default[key]): - false_hits_integration_grid += 1 - false_hits_integration_grid += abs(np.size(dict1[key]) - np.size(dict2_default[key])) - if np.size(dict1[key]) > 1: - for i in dict1[key]: - false_hits_integration_grid += self.compare_val_list(i, dict2_default[key]) - false_hits_integration_grid += abs(np.size(dict1[key]) - np.size(dict2_default[key])) - - elif (key == controlIn_basis_func): - for i in np.arange(0, len(dict1[key])): - false_hits_basis += self.compare_dict_dict( - dict1[key][i], dict2_default[key]) - false_hits_basis += abs(len(dict1[key]) - len(dict2_default[key])) - - return [false_hits_integration_grid, false_hits_basis] - - def normalize(self, logger=None) -> None: - super().normalize(logger) - if not self.section_run or self.section_run.program_name != 'FHI-aims': - return - - for method in self.section_run.section_method: - to_compare = getattr(method, controlIn_basis_set, None) - if to_compare is None: - # not fhi aims data - continue - - matrix_hits_int = dict.fromkeys(pure_types_json, 0) - matrix_hits_basis = dict.fromkeys(pure_types_json, 0) - - for index, data in enumerate(to_compare): - atom_index = int(data[controlIn_nucleus]) - for key, val in pure_types_json.items(): - results = self.compare_to_defaults(val[atom_index], to_compare[index]) - matrix_hits_int[key] += results[0] - matrix_hits_basis[key] += results[1] - - # matrix_hits[key]=matrix_hits[key]+CompareToDefaults(val[AtomIndex],to_compare[i]) - - closest_base_int = min(matrix_hits_int, key=matrix_hits_int.get) - if (matrix_hits_basis[min(matrix_hits_basis, key=matrix_hits_basis.get)] == 0): - closest_base_base = '' - else: - closest_base_base = '+' - - if (matrix_hits_int[closest_base_int] == 0): - method.basis_set = closest_base_int + closest_base_base - elif(matrix_hits_int[closest_base_int] <= 5): - method.basis_set = '~' + closest_base_int + closest_base_base - elif(matrix_hits_int[closest_base_int] > 5): - method.basis_set = 'custom-' + closest_base_int diff --git a/nomad/processing/data.py b/nomad/processing/data.py index 6a10564facd1a861b4ad26c0ed8cd1c9c1b060f2..99973f07b0b1b32df1a095a8686604059fa4e314 100644 --- a/nomad/processing/data.py +++ b/nomad/processing/data.py @@ -498,7 +498,7 @@ class Calc(Proc): 'could not apply entry metadata to entry', exc_info=e) try: - self._entry_metadata.apply_domain_metadata(self._parser_results) + self._entry_metadata.apply_archvie_metadata(self._parser_results) except Exception as e: self.get_logger().error( 'could not apply domain metadata to entry', exc_info=e) @@ -603,9 +603,6 @@ class Calc(Proc): # new timestamp and method details taken from the referenced # archive. self._entry_metadata.last_processing = datetime.utcnow() - self._entry_metadata.dft.xc_functional = ref_archive.metadata.dft.xc_functional - self._entry_metadata.dft.basis_set = ref_archive.metadata.dft.basis_set - self._entry_metadata.dft.update_group_hash() self._entry_metadata.encyclopedia.status = EncyclopediaMetadata.status.type.success except Exception as e: logger.error("Could not retrieve method information for phonon calculation.", exc_info=e) @@ -666,7 +663,7 @@ class Calc(Proc): self.set_process_step('archiving') logger = self.get_logger() - self._entry_metadata.apply_domain_metadata(self._parser_results) + self._entry_metadata.apply_archvie_metadata(self._parser_results) self._entry_metadata.processed = True if self.upload.publish_directly: diff --git a/nomad/search.py b/nomad/search.py index 500a2ad35fe85ae18ddd5ecf302f33740e3edd1b..7376eac261703e636de350ed11851195ac406888 100644 --- a/nomad/search.py +++ b/nomad/search.py @@ -478,8 +478,7 @@ def validate_api_query( value = str(value) from nomad.app.optimade import filterparser try: - return filterparser.parse_filter( - value, nomad_properties='dft', without_prefix=True) + return filterparser.parse_filter(value, without_prefix=True) except filterparser.FilterException as e: raise QueryValidationError( diff --git a/ops/helm/nomad/deployments/prod-beta-values.yaml b/ops/helm/nomad/deployments/prod-beta-values.yaml index da831e8a3ee1a50804cf39b01228ea06c1c9a236..14264485e30cbaa5c64a732388ad07af9319fff4 100644 --- a/ops/helm/nomad/deployments/prod-beta-values.yaml +++ b/ops/helm/nomad/deployments/prod-beta-values.yaml @@ -41,7 +41,7 @@ mongo: logstash: host: logstash.elk.svc.cluster.local -dbname: fairdi_nomad_prod_v1 +dbname: nomad_prod_v1 uploadurl: 'http://nomad-lab.eu/prod/rae/beta/api/uploads' diff --git a/ops/helm/nomad/deployments/prod-test-values.yaml b/ops/helm/nomad/deployments/prod-test-values.yaml index 018f608b83aa552722a7ad7e3b0489e74e562155..8007eea1aea48780f03230e9741cded5273589bf 100644 --- a/ops/helm/nomad/deployments/prod-test-values.yaml +++ b/ops/helm/nomad/deployments/prod-test-values.yaml @@ -41,7 +41,7 @@ mongo: logstash: host: logstash.elk.svc.cluster.local -dbname: fairdi_nomad_test_v1 +dbname: nomad_test_v1 uploadurl: 'http://nomad-lab.eu/prod/rae/test/api/uploads' diff --git a/ops/helm/nomad/templates/nomad-configmap.yml b/ops/helm/nomad/templates/nomad-configmap.yml index 3360af940082b6e58becf0dfd6a5b3a5351ce792..f13467eaa139491981fb0c62037d18bcb979e59c 100644 --- a/ops/helm/nomad/templates/nomad-configmap.yml +++ b/ops/helm/nomad/templates/nomad-configmap.yml @@ -48,8 +48,7 @@ data: elastic: host: "{{ .Values.elastic.host }}" port: {{ .Values.elastic.port }} - index_name: "{{ .Values.dbname }}_entries" - materials_index_name: "{{ .Values.dbname }}_materials_v0" + entries_index: "{{ .Values.dbname }}_entries_v1" materials_index: "{{ .Values.dbname }}_materials_v1" mongo: host: "{{ .Values.mongo.host }}" diff --git a/tests/app/flask/test_dcat.py b/tests/app/flask/test_dcat.py index 78bdbb1cc094332ab543d23a026039665f78f03c..f5878fde7afc06a7009fe759d09885fbc8d2a466 100644 --- a/tests/app/flask/test_dcat.py +++ b/tests/app/flask/test_dcat.py @@ -20,6 +20,7 @@ import pytest from datetime import datetime from nomad.app.flask.dcat.mapping import Mapping +from nomad.datamodel.results import Material, Results from tests.utils import ExampleData @@ -53,11 +54,11 @@ def data(test_user, other_test_user, elastic_infra): uploader=test_user, coauthors=[other_test_user], comment='this is a calculation comment', - formula='H20', published=True) data = ExampleData() - data.create_entry(**example_attrs) + archive = data.create_entry(**example_attrs) + archive.m_create(Results).m_create(Material).chemical_formula_descriptive = 'H2O' for i in range(1, 11): example_attrs.update( diff --git a/tests/app/v1/routers/test_info.py b/tests/app/v1/routers/test_info.py index 73007bb34e40aef157b7376b6b72435863c48f57..24035f424a7d90c8d9d22974d414c222215cee25 100644 --- a/tests/app/v1/routers/test_info.py +++ b/tests/app/v1/routers/test_info.py @@ -26,7 +26,6 @@ def test_info(client, elastic): assert 'parsers' in data assert 'statistics' in data assert len(data['parsers']) >= len(data['codes']) - assert len(data['domains']) >= 1 assert rv.status_code == 200 rv = client.get('info') diff --git a/tests/normalizing/test_system.py b/tests/normalizing/test_system.py index 8ffac7138dfafbbbbaa5e2b4efcbc11ab6bb225d..b3242ae7f3f881f4a23ccffc8fa88874e2d3f1e0 100644 --- a/tests/normalizing/test_system.py +++ b/tests/normalizing/test_system.py @@ -120,7 +120,7 @@ def assert_normalized(entry_archive: datamodel.EntryArchive): metadata = entry_archive.metadata results = entry_archive.results - metadata.apply_domain_metadata(entry_archive) + metadata.apply_archvie_metadata(entry_archive) parser_name = metadata.parser_name exceptions = parser_exceptions.get(parser_name, []) diff --git a/tests/processing/test_data.py b/tests/processing/test_data.py index 545379959659d4099466183f5b3ed45607487771..299861f613a823dcb597a285a2dbb0b058582bf9 100644 --- a/tests/processing/test_data.py +++ b/tests/processing/test_data.py @@ -16,6 +16,7 @@ # limitations under the License. # +from nomad.datamodel.datamodel import EntryArchive from typing import Generator, Tuple import pytest from datetime import datetime @@ -128,9 +129,6 @@ def assert_processing(upload: Upload, published: bool = False, process='process_ # check some (domain) metadata assert entry_metadata.quantities assert len(entry_metadata.quantities) > 0 - - assert entry_metadata.n_atoms > 0 - assert len(entry_metadata.atoms) > 0 assert len(entry_metadata.processing_errors) == 0 assert upload.get_calc(calc.calc_id) is not None @@ -382,12 +380,14 @@ def test_re_processing(published: Upload, internal_example_user_metadata, monkey # assert changed archive files if with_failure == 'after': - with published.upload_files.read_archive(first_calc.calc_id) as archive: - assert list(archive[first_calc.calc_id].keys()) == ['processing_logs', 'metadata'] + with published.upload_files.read_archive(first_calc.calc_id) as archive_reader: + assert list(archive_reader[first_calc.calc_id].keys()) == ['processing_logs', 'metadata'] + archive = EntryArchive.m_from_dict(archive_reader[first_calc.calc_id].to_dict()) else: - with published.upload_files.read_archive(first_calc.calc_id) as archive: - assert len(archive[first_calc.calc_id]) > 2 # contains more then logs and metadata + with published.upload_files.read_archive(first_calc.calc_id) as archive_reader: + assert len(archive_reader[first_calc.calc_id]) > 2 # contains more then logs and metadata + archive = EntryArchive.m_from_dict(archive_reader[first_calc.calc_id].to_dict()) # assert maintained user metadata (mongo+es) assert_upload_files(published.upload_id, entries, PublicUploadFiles, published=True) @@ -395,14 +395,13 @@ def test_re_processing(published: Upload, internal_example_user_metadata, monkey if with_failure not in ['after', 'not-matched']: assert_processing(Upload.get(published.upload_id, include_published=True), published=True) - # assert changed calc metadata (mongo) - entry_metadata = first_calc.full_entry_metadata(published.upload_files) + # assert changed calc data if with_failure not in ['after', 'not-matched']: - assert entry_metadata.atoms[0] == 'H' + assert archive.results.material.elements[0] == 'H' elif with_failure == 'not-matched': - assert entry_metadata.atoms[0] == 'Si' + assert archive.results.material.elements[0] == 'Si' else: - assert entry_metadata.atoms == [] + assert archive.results is None @pytest.mark.parametrize('publish,old_staging', [ @@ -542,8 +541,6 @@ def test_process_failure(monkeypatch, uploaded, function, proc_infra, test_user, with upload.upload_files.read_archive(calc.calc_id) as archive: calc_archive = archive[calc.calc_id] assert 'metadata' in calc_archive - assert calc_archive['metadata']['dft']['code_name'] not in [ - config.services.unavailable_value, config.services.not_processed_value] if function != 'cleanup': assert len(calc_archive['metadata']['processing_errors']) > 0 assert 'processing_logs' in calc_archive diff --git a/tests/test_datamodel.py b/tests/test_datamodel.py index b27c972d357285d68ee51f80fc145e6485b360b1..7a64c26763bce6a23e6427db51fc7fc5296ba38a 100644 --- a/tests/test_datamodel.py +++ b/tests/test_datamodel.py @@ -23,8 +23,6 @@ A generator for random test calculations. import random from essential_generators import DocumentGenerator import datetime -from ase.data import chemical_symbols -from ase.spacegroup import Spacegroup from nomad import datamodel, utils from nomad.parsing.parsers import parser_dict @@ -95,24 +93,6 @@ def generate_calc(pid: int = 0, calc_id: str = None, upload_id: str = None, with _gen_dataset() for _ in range(0, random.choice(low_numbers_for_refs_and_datasets))) - entry.atoms = list(random.choices(chemical_symbols[1:], k=random.choice(low_numbers_for_atoms))) - entry.formula = ''.join('%s%d' % (atom, random.choice(low_numbers_for_atoms)) for atom in entry.atoms) - entry.formula = entry.formula.replace('1', '') - - dft_metadata = entry.m_create(datamodel.DFTMetadata) - dft_metadata.basis_set = random.choice(basis_sets) - dft_metadata.xc_functional = random.choice(xc_functionals) - dft_metadata.system = random.choice(systems) - dft_metadata.crystal_system = random.choice(crystal_systems) - spacegroup = random.randint(1, 225) - dft_metadata.spacegroup = str(spacegroup) - dft_metadata.spacegroup_symbol = Spacegroup(spacegroup).symbol - dft_metadata.code_name = random.choice(codes) - dft_metadata.code_version = '1.0.0' - - dft_metadata.n_total_energies = random.choice(range(0, 5)) - dft_metadata.geometries = ['%d' % random.randint(1, 500), '%d' % random.randint(1, 500)] - return entry diff --git a/tests/utils.py b/tests/utils.py index b003f7be58f97a5d893aa2e180dc4f439f1f6fd7..462a6c433c08aeb40862cafc3e81430bd8fb6094 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -27,7 +27,7 @@ import zipfile import os.path from nomad import search, files -from nomad.datamodel import EntryMetadata, EntryArchive, DFTMetadata, Results +from nomad.datamodel import EntryMetadata, EntryArchive, Results from nomad.datamodel.metainfo.simulation.run import Run, Program from nomad.datamodel.metainfo.simulation.system import System, Atoms from tests.normalizing.conftest import run_normalize @@ -260,7 +260,6 @@ class ExampleData: material_id: str = None, mainfile: str = None, results: Union[Results, dict] = None, - dft: Union[DFTMetadata, dict] = None, archive: dict = None, **kwargs) -> EntryArchive: if entry_id is None: @@ -307,26 +306,6 @@ class ExampleData: entry_metadata.m_update(**self.entry_defaults) entry_metadata.m_update(**kwargs) - # create v0 default data - if entry_archive.metadata.dft is None: - if dft is None: - dft = { - 'xc_functional': 'GGA', - 'code_name': 'VASP', - 'n_calculations': 1, - 'atoms': ['H', 'O'], - 'n_atoms': 2 - } - if isinstance(dft, dict): - for key in ['atoms', 'n_atoms']: - if key in dft: - setattr(entry_metadata, key, dft.pop(key)) - section_dft = DFTMetadata.m_from_dict(dft) - else: - section_dft = dft - assert isinstance(section_dft, DFTMetadata) - entry_metadata.m_add_sub_section(EntryMetadata.dft, section_dft) - # create v1 default data if entry_archive.results is None: if results is None: @@ -426,7 +405,7 @@ class ExampleData: run_normalize(archive) entry_metadata = archive.metadata entry_metadata.domain = 'dft' - entry_metadata.apply_domain_metadata(archive) + entry_metadata.apply_archvie_metadata(archive) if not optimade: entry_metadata.optimade = None