diff --git a/dependencies/parsers/nexus b/dependencies/parsers/nexus index 78b721318cb824e404234702c0aef19a7d8b6dba..4a01429f1b8a3e23e93af85de03f63bd024c0bb0 160000 --- a/dependencies/parsers/nexus +++ b/dependencies/parsers/nexus @@ -1 +1 @@ -Subproject commit 78b721318cb824e404234702c0aef19a7d8b6dba +Subproject commit 4a01429f1b8a3e23e93af85de03f63bd024c0bb0 diff --git a/examples/data/custom-schema/full-storage-quantities.archive.yaml b/examples/data/custom-schema/full-storage-quantities.archive.yaml new file mode 100644 index 0000000000000000000000000000000000000000..09d6d121bf7683d33cb4fcac87580f079613f5a0 --- /dev/null +++ b/examples/data/custom-schema/full-storage-quantities.archive.yaml @@ -0,0 +1,58 @@ +definitions: + sections: + MySection: + base_section: nomad.datamodel.EntryData + attributes: + - + name: section_attr_1 + type: str + - + name: section_attr_2 + type: float + quantities: + my_quantity: + type: str + attributes: + - + name: quantity_attr_1 + type: str + - + name: quantity_attr_2 + type: float + MY_variable_quantity: + type: np.float64 + variable: true + shape: [3,3] + dimensionality: '[length]' + unit: 'm' + attributes: + - + name: quantity_attr_1 + type: str + - + name: quantity_attr_2 + type: float + +data: + m_def: MySection + m_attributes: + section_attr_1: "value" + section_attr_2: 0 + MY_variable_quantity: + foo_variable_quantity: + m_value: [[1,1,1], [0,0,0], [-1,-1,-1]] + m_unit: 'mm' + m_attributes: + quantity_attr_1: "foo" + quantity_attr_2: 0 + bar_variable_quantity: + m_value: [[1,1,1], [0,0,0], [-1,-1,-1]] + m_unit: 'cm' + m_attributes: + quantity_attr_1: "bar" + quantity_attr_2: 0 + my_quantity: + my_quantity: + m_value: 'value' + m_attributes: + quantity_attr_2: 0.0 \ No newline at end of file diff --git a/gui/package.json b/gui/package.json index 175e05a5e0ee13108c296d94fbf11439cf33b5b6..d3b8f41183a56d98787affa64c442ce53925892b 100644 --- a/gui/package.json +++ b/gui/package.json @@ -145,7 +145,7 @@ }, "resolutions": { "//": "See https://github.com/facebook/create-react-app/issues/11773 and https://github.com/jsdom/jsdom/issues/3419", - "react-error-overlay": "6.0.11", + "react-error-overlay": "6.0.9", "tough-cookie": "4.0.0" } } diff --git a/gui/src/components/archive/ArchiveBrowser.js b/gui/src/components/archive/ArchiveBrowser.js index 529409ae3db044d67bea749d51af195a77c77193..cafe285bc329b81c18ab21a33742aeeabbf8cc0d 100644 --- a/gui/src/components/archive/ArchiveBrowser.js +++ b/gui/src/components/archive/ArchiveBrowser.js @@ -30,7 +30,8 @@ import Autocomplete from '@material-ui/lab/Autocomplete' import Browser, { Item, Content, Compartment, Adaptor, formatSubSectionName, laneContext, useLane, browserContext, ItemChip } from './Browser' import { RawFileAdaptor } from './FileBrowser' import { - isEditable, PackageMDef, QuantityMDef, removeSubSection, SectionMDef, SubSectionMDef, + AttributeMDef, + isEditable, PackageMDef, QuantityMDef, quantityUsesFullStorage, removeSubSection, SectionMDef, SubSectionMDef, useMetainfo, getMetainfoFromDefinition, getUrlFromDefinition } from './metainfo' import { ArchiveTitle, metainfoAdaptorFactory, DefinitionLabel } from './MetainfoBrowser' @@ -471,6 +472,10 @@ class ArchiveAdaptor extends Adaptor { return new QuantityAdaptor(baseUrl, obj, def) } + if (def.m_def === AttributeMDef) { + return new AttributeAdaptor(baseUrl, obj, def) + } + throw new Error('not implemented') } @@ -486,8 +491,11 @@ class ArchiveAdaptor extends Adaptor { class SectionAdaptor extends ArchiveAdaptor { async itemAdaptor(key) { const [name, index] = key.split(':') - const property = this.def._properties[name] - const value = this.obj[name] || property?.default + const property = this.def._properties[name] || (name === 'm_attributes' && this.def.attributes.find(attr => attr.name === index)) + let value = this.obj[name] || property?.default + if (property.m_def === QuantityMDef && quantityUsesFullStorage(property)) { + value = value[index] + } if (!property) { return super.itemAdaptor(key) } else if (property.m_def === SubSectionMDef) { @@ -546,6 +554,8 @@ class SectionAdaptor extends ArchiveAdaptor { } } return this.adaptorFactory(this.parsedBaseUrl, value, property) + } else if (property.m_def === AttributeMDef) { + return this.adaptorFactory(this.parsedBaseUrl, this.obj?.m_attributes[index], property) } else { throw new Error('Unknown metainfo meta definition') } @@ -567,8 +577,28 @@ class UnresolvedReferenceAdaptor extends ArchiveAdaptor { } class QuantityAdaptor extends ArchiveAdaptor { + async itemAdaptor(key) { + const attribute = this.def?.attributes?.find(attr => attr.name === key) + if (attribute) { + const value = this.obj?.m_attributes?.[key] + return await this.adaptorFactory(this.parsedBaseUrl, value, attribute) + } + + return super.itemAdaptor(key) + } + render() { - return + if (quantityUsesFullStorage(this.def)) { + return + } else { + return + } + } +} + +class AttributeAdaptor extends ArchiveAdaptor { + render() { + return } } @@ -586,22 +616,26 @@ function QuantityItemPreview({value, def}) { } if (def.shape.length > 0) { const dimensions = [] - let current = value - for (let i = 0; i < def.shape.length; i++) { - dimensions.push(current.length) - current = current[0] - } - let typeLabel - if (def.type.type_kind === 'python') { - typeLabel = 'list' - } else { - if (dimensions.length === 1) { - typeLabel = 'vector' - } else if (dimensions.length === 2) { - typeLabel = 'matrix' + let typeLabel = 'unknown' + try { + let current = value + for (let i = 0; i < def.shape.length; i++) { + dimensions.push(current.length) + current = current[0] + } + if (def.type.type_kind === 'python') { + typeLabel = 'list' } else { - typeLabel = 'tensor' + if (dimensions.length === 1) { + typeLabel = 'vector' + } else if (dimensions.length === 2) { + typeLabel = 'matrix' + } else { + typeLabel = 'tensor' + } } + } catch (e) { + console.error('Quantity shape did not fit quantity value.', e) } return @@ -631,19 +665,24 @@ QuantityItemPreview.propTypes = ({ def: PropTypes.object.isRequired }) -const QuantityValue = React.memo(function QuantityValue({value, def}) { +const QuantityValue = React.memo(function QuantityValue({value, def, ...more}) { const units = useUnits() const getRenderValue = useCallback(value => { let finalValue = (def.type.type_data === 'nomad.metainfo.metainfo._Datetime' ? formatTimestamp(value) : value) let finalUnit if (def.unit) { - const a = new Q(finalValue, def.unit).toSystem(units) - finalValue = a.value() - finalUnit = a.label() + const systemUnitQ = new Q(finalValue, def.unit).toSystem(units) + finalValue = systemUnitQ.value() + finalUnit = systemUnitQ.label() + if (more.unit) { + const customUnitQ = systemUnitQ.to(more.unit) + finalValue = customUnitQ.value() + finalUnit = customUnitQ.label() + } } return [finalValue, finalUnit] - }, [def, units]) + }, [def, more, units]) const isMathValue = def.type.type_kind === 'numpy' if (isMathValue) { @@ -687,7 +726,8 @@ const QuantityValue = React.memo(function QuantityValue({value, def}) { }) QuantityValue.propTypes = ({ value: PropTypes.any, - def: PropTypes.object.isRequired + def: PropTypes.object.isRequired, + unit: PropTypes.string }) const InheritingSections = React.memo(function InheritingSections({def, section, lane}) { @@ -797,20 +837,15 @@ function Section({section, def, parentRelation, sectionIsEditable, sectionIsInEl }, [navEntryId, setShowJson, sectionIsEditable, parentRelation, lane, history, handleArchiveChanged, section]) - const renderQuantity = useCallback(quantityDef => { - const key = quantityDef.name - const value = section[key] || quantityDef.default + const renderQuantityItem = useCallback((key, quantityName, quantityDef, value, disabled) => { + const itemKey = quantityName ? `${key}:${quantityName}` : key const isDefault = value && !section[key] - const disabled = value === undefined - if (!disabled && quantityDef.type.type_kind === 'reference' && quantityDef.shape.length === 1) { - return - } return ( - + - {quantityDef.name} + {quantityName || quantityDef.name} {!disabled &&  =  @@ -826,6 +861,25 @@ function Section({section, def, parentRelation, sectionIsEditable, sectionIsInEl ) }, [section]) + const renderQuantity = useCallback(quantityDef => { + const key = quantityDef.name + const value = section[key] || quantityDef.default + const disabled = value === undefined + if (!disabled && quantityDef.type.type_kind === 'reference' && quantityDef.shape.length === 1) { + return + } + if (quantityUsesFullStorage(quantityDef)) { + const storage = section[quantityDef.name] || {} + return + {Object.keys(storage).map(quantityName => + renderQuantityItem(key, quantityName, quantityDef, storage[quantityName]?.m_value, disabled) + )} + + } else { + return renderQuantityItem(key, null, quantityDef, value, disabled) + } + }, [section, renderQuantityItem]) + if (!section) { console.error('section is not available') return null @@ -877,6 +931,7 @@ function Section({section, def, parentRelation, sectionIsEditable, sectionIsInEl {def.m_annotations?.plot && } } else { + const attributes = section?.m_attributes || {} contents = {subSectionCompartment} @@ -886,6 +941,11 @@ function Section({section, def, parentRelation, sectionIsEditable, sectionIsInEl .map(renderQuantity) } + {Object.keys(attributes).length > 0 && + {Object.keys(attributes).map(key => ( + {key} + ))} + } {def.m_annotations?.plot && } } @@ -922,7 +982,12 @@ function SubSection({subSectionDef, section, editable}) { sectionDef._properties[key] && sectionDef._properties[key].m_def === QuantityMDef )) } - const labelQuantity = itemLabelKey && sectionDef._properties[itemLabelKey] + let labelQuantity = itemLabelKey && sectionDef._properties[itemLabelKey] + if (labelQuantity && quantityUsesFullStorage(labelQuantity)) { + // We do not yet support label quantities that use full storage + labelQuantity = undefined + itemLabelKey = undefined + } const getItemLabel = item => { if (labelQuantity) { const value = item[itemLabelKey] @@ -1146,7 +1211,22 @@ SectionPlots.propTypes = { section: PropTypes.object } -function Quantity({value, def}) { +function FullStorageQuantity({value, def}) { + const attributes = value.m_attributes || {} + return + {Object.keys(attributes).length > 0 && + {Object.keys(attributes).map(key => ( + {key} + ))} + } + +} +FullStorageQuantity.propTypes = ({ + value: PropTypes.any, + def: PropTypes.object.isRequired +}) + +function Quantity({value, def, unit, children}) { const {prev} = useLane() return @@ -1164,12 +1244,36 @@ function Quantity({value, def}) { + {children} } Quantity.propTypes = ({ + value: PropTypes.any, + def: PropTypes.object.isRequired, + unit: PropTypes.string, + children: PropTypes.oneOfType([ + PropTypes.arrayOf(PropTypes.node), + PropTypes.node + ]) +}) + +function Attribute({value, def}) { + return + + + + + + +} +Attribute.propTypes = ({ value: PropTypes.any, def: PropTypes.object.isRequired }) diff --git a/gui/src/components/archive/Browser.js b/gui/src/components/archive/Browser.js index 749d785c092f647decf290db2174262beaa3b6c1..ad3761722ccce38940abbd400103b663ff4ee79a 100644 --- a/gui/src/components/archive/Browser.js +++ b/gui/src/components/archive/Browser.js @@ -539,20 +539,27 @@ export function Content(props) { return } -export function Compartment({title, children, color}) { +export function Compartment({title, children, color, startCollapsed}) { + const [collapsed, setCollapsed] = useState(startCollapsed) + const handleClick = useCallback(() => { + setCollapsed(value => !value) + }, [setCollapsed]) if (!React.Children.count(children)) { return null } + return - + {title && {title}} + {collapsed && } - {children} + {(!collapsed) && children} } Compartment.propTypes = ({ title: PropTypes.string, color: PropTypes.string, + startCollapsed: PropTypes.bool, children: PropTypes.oneOfType([ PropTypes.arrayOf(PropTypes.node), PropTypes.node diff --git a/gui/src/components/archive/MetainfoBrowser.js b/gui/src/components/archive/MetainfoBrowser.js index 2dbd9a76496158548ab234718f9385b94254adce..825be68d09ae2e61d504ce8dc3f1894934acece8 100644 --- a/gui/src/components/archive/MetainfoBrowser.js +++ b/gui/src/components/archive/MetainfoBrowser.js @@ -19,9 +19,9 @@ import React, { useMemo, useEffect, useRef, useLayoutEffect, useContext, useStat import PropTypes from 'prop-types' import { useRecoilValue, useRecoilState, atom } from 'recoil' import { configState } from './ArchiveBrowser' -import Browser, { Item, Content, Compartment, Adaptor, laneContext, formatSubSectionName, Title } from './Browser' +import Browser, { Item, Content, Compartment, Adaptor, laneContext, formatSubSectionName, Title, ItemChip } from './Browser' import { Typography, Box, makeStyles, FormGroup, TextField, Button, Link } from '@material-ui/core' -import { vicinityGraph, SubSectionMDef, SectionMDef, QuantityMDef, CategoryMDef, useGlobalMetainfo, PackageMDef, getMetainfoFromDefinition } from './metainfo' +import { vicinityGraph, SubSectionMDef, SectionMDef, QuantityMDef, CategoryMDef, useGlobalMetainfo, PackageMDef, AttributeMDef, getMetainfoFromDefinition } from './metainfo' import * as d3 from 'd3' import blue from '@material-ui/core/colors/blue' import teal from '@material-ui/core/colors/teal' @@ -172,6 +172,8 @@ export async function metainfoAdaptorFactory(def) { return new CategoryDefAdaptor(def) } else if (def.m_def === PackageMDef) { return new PackageDefAdaptor(def) + } else if (def.m_def === AttributeMDef) { + return new AttributeDefAdaptor(def) } else { throw new Error('Unknown metainfo definition type') } @@ -363,6 +365,11 @@ export class SectionDefAdaptor extends MetainfoAdaptor { return metainfoAdaptorFactory(property) } + const attribute = this.def.attributes?.find(attr => attr.name === key) + if (attribute) { + return metainfoAdaptorFactory(attribute) + } + return super.itemAdaptor(key) } render() { @@ -382,6 +389,10 @@ class SubSectionDefAdaptor extends MetainfoAdaptor { this.sectionDefAdaptor.cleanup() } async itemAdaptor(key) { + const attributeDef = this.def.attributes?.find(def => def.name === key) + if (attributeDef) { + return metainfoAdaptorFactory(attributeDef) + } return this.sectionDefAdaptor.itemAdaptor(key) } render() { @@ -390,6 +401,15 @@ class SubSectionDefAdaptor extends MetainfoAdaptor { } class QuantityDefAdaptor extends MetainfoAdaptor { + itemAdaptor(key) { + const attributeDef = this.def.attributes.find(def => def.name === key) + if (attributeDef) { + return metainfoAdaptorFactory(attributeDef) + } + + return super.itemAdaptor(key) + } + render() { return } @@ -404,6 +424,12 @@ class CategoryDefAdaptor extends MetainfoAdaptor { } } +class AttributeDefAdaptor extends MetainfoAdaptor { + render() { + return + } +} + function SectionDefContent({def, inheritingSections}) { const config = useRecoilValue(configState) const metainfoConfig = useRecoilValue(metainfoConfigState) @@ -448,7 +474,7 @@ function SectionDefContent({def, inheritingSections}) { } {inheritingSections.length > 0 && - + {inheritingSections.map((inheritingSection, index) => { const key = `_inheritingSectionDef@${inheritingSection._qualifiedName}` const categories = inheritingSection.categories @@ -475,7 +501,10 @@ function SectionDefContent({def, inheritingSections}) { {formatSubSectionName(subSectionDef.more?.label || subSectionDef.name)} - {subSectionDef.repeats &&  (repeats)} + + {subSectionDef.repeats && } + {subSectionDef._overwritten && } + {subSectionDef._inherited && } }) @@ -494,6 +523,8 @@ function SectionDefContent({def, inheritingSections}) { {quantityDef.more?.label || quantityDef.name} + {quantityDef._overwritten && } + {quantityDef._inherited && } }) @@ -518,6 +549,8 @@ function SectionDefContent({def, inheritingSections}) { } } + + } SectionDefContent.propTypes = ({ @@ -529,7 +562,6 @@ function SectionDef({def, inheritingSections}) { return - } SectionDef.propTypes = ({ @@ -543,8 +575,9 @@ function SubSectionDef({def, inheritingSections}) { - + + } @@ -569,7 +602,7 @@ function DefinitionProperties({def, children}) { {children} {def.aliases?.length && aliases: {def.aliases.map(a => `"${a}"`).join(', ')}} {def.deprecated && deprecated: {def.deprecated}} - {Object.keys(def.more).map((moreKey, i) => ( + {Object.keys(def.more || {}).map((moreKey, i) => ( {moreKey}: {String(def.more[moreKey])} ))} {hasSearchAnnotations > 0 && search keys: { @@ -600,12 +633,17 @@ function QuantityDef({def}) { shape:  [{def.shape.join(', ')}] + {def.derived && repeats: true} {def.unit && unit: {def.unit}} + {def.dimensionality && + dimensionality: {def.dimensionality}} {def.default && default: {String(def.default)}} - {def.derived && derived} + {def.derived && derived: true} + {def.variable && variable: true} + } @@ -613,6 +651,47 @@ QuantityDef.propTypes = ({ def: PropTypes.object }) +function AttributeDef({def}) { + return + + + + type:  + {Array.isArray(def.type.type_data) ? def.type.type_data.join(', ') : def.type.type_data}  + {def.type.type_kind !== 'data' && `(${def.type.type_kind})`} + + {def.shape && + shape:  + [{def.shape.join(', ')}] + } + {def.default && + default: {String(def.default)}} + + + + +} +AttributeDef.propTypes = ({ + def: PropTypes.object +}) + +function Attributes({def}) { + if (!def.attributes?.length) { + return null + } + + return + {def.attributes.map((attributeDef, index) => { + return + {attributeDef.more?.label || attributeDef.name} + + })} + +} +Attributes.propTypes = ({ + def: PropTypes.object +}) + function DefinitionDocs({def}) { return {def.description && !def.extends_base_section && @@ -731,7 +810,8 @@ const definitionLabels = { [SectionMDef]: 'section', [QuantityMDef]: 'quantity', [SubSectionMDef]: 'sub section', - [CategoryMDef]: 'category' + [CategoryMDef]: 'category', + [AttributeMDef]: 'attribute' } export function ArchiveTitle({def, isDefinition, data, kindLabel, useName, actions}) { diff --git a/gui/src/components/archive/metainfo.js b/gui/src/components/archive/metainfo.js index 3d947b09f7b03b7c5c2a445538dd2b38487d74da..acbc32b84dad227717684f18ef0599427b8a0283 100644 --- a/gui/src/components/archive/metainfo.js +++ b/gui/src/components/archive/metainfo.js @@ -160,6 +160,11 @@ export const SectionMDef = 'nomad.metainfo.metainfo.Section' export const QuantityMDef = 'nomad.metainfo.metainfo.Quantity' export const SubSectionMDef = 'nomad.metainfo.metainfo.SubSection' export const CategoryMDef = 'nomad.metainfo.metainfo.Category' +export const AttributeMDef = 'nomad.metainfo.metainfo.Attribute' + +export function quantityUsesFullStorage(def) { + return def.repeats || def.variable || def.attributes?.length +} /** * Represents and manages schema data. @@ -368,23 +373,30 @@ export class Metainfo { async _getAllProperties(sectionDef) { const results = {} - function addProperties(sectionDef) { - sectionDef.quantities.forEach( - property => { - property.m_def = QuantityMDef - results[property.name] = property - } - ) - sectionDef.sub_sections.forEach( - property => { - property.m_def = SubSectionMDef - results[property.name] = property + function createAddProperties(inherited) { + return (sectionDef) => { + function createAddProperty(m_def) { + return (property) => { + const propertyToAdd = inherited ? {} : property + if (inherited) { + Object.assign(propertyToAdd, property) + propertyToAdd._inherited = true + } else { + if (results[property.name]) { + propertyToAdd._overwritten = true + } + } + property.m_def = m_def + results[property.name] = propertyToAdd + } } - ) + sectionDef.quantities.forEach(createAddProperty(QuantityMDef)) + sectionDef.sub_sections.forEach(createAddProperty(SubSectionMDef)) + } } sectionDef = await this._initSection(sectionDef) - sectionDef._allBaseSections.forEach(addProperties) - addProperties(sectionDef) + sectionDef._allBaseSections.forEach(createAddProperties(true)) + createAddProperties(false)(sectionDef) return Object.keys(results).map(key => results[key]) } @@ -432,6 +444,10 @@ export class Metainfo { property._parentIndex = index property._qualifiedName = `${sectionDef._qualifiedName}.${property.name}` property._package = pkg + property._parent = sectionDef + for (const attribute of (property?.attributes || [])) { + attribute._parent = property + } await this._addDef(property) if (property.m_def === QuantityMDef) { property.shape = property.shape || [] @@ -448,6 +464,9 @@ export class Metainfo { property._section = sectionDef } } + for (const attribute of (sectionDef?.attributes || [])) { + attribute._parent = sectionDef + } for (const def of sectionDef.quantities) { await addNewProperty(def, 'quantities', index) } diff --git a/gui/yarn.lock b/gui/yarn.lock index 71546883f153c72287372fcaf81ee9c8a4a22a3f..a386a8023992bcceec35eeac10e110cfeca6bc19 100644 --- a/gui/yarn.lock +++ b/gui/yarn.lock @@ -13313,10 +13313,10 @@ react-error-boundary@3.1.4: dependencies: "@babel/runtime" "^7.12.5" -react-error-overlay@6.0.11, react-error-overlay@^6.0.9: - version "6.0.11" - resolved "https://registry.yarnpkg.com/react-error-overlay/-/react-error-overlay-6.0.11.tgz#92835de5841c5cf08ba00ddd2d677b6d17ff9adb" - integrity sha512-/6UZ2qgEyH2aqzYZgQPxEnz33NJ2gNsnHA2o5+o4wW9bLM/JYQitNP9xPhsXwC08hMMovfGe/8retsdDsczPRg== +react-error-overlay@6.0.9, react-error-overlay@^6.0.9: + version "6.0.9" + resolved "https://registry.yarnpkg.com/react-error-overlay/-/react-error-overlay-6.0.9.tgz#3c743010c9359608c375ecd6bc76f35d93995b0a" + integrity sha512-nQTTcUu+ATDbrSD1BZHr5kgSD4oF8OFjxun8uAaL8RwPBacGBNPf/yAuVVdx17N8XNzRDMrZ9XcKZHCjPW+9ew== react-event-listener@^0.6.0: version "0.6.6" diff --git a/nomad/app/optimade/common.py b/nomad/app/optimade/common.py index 64548ea11999833ace945d4f1199a17d2b786762..76833abe716b5f54719c6ea04434f0f307c26595 100644 --- a/nomad/app/optimade/common.py +++ b/nomad/app/optimade/common.py @@ -23,8 +23,7 @@ from nomad.metainfo.metainfo import ( Reference, Datetime, MEnum, - _types_int, - _types_float + MTypes ) from nomad.metainfo.elasticsearch_extension import SearchQuantity, entry_type @@ -42,9 +41,9 @@ def create_provider_field(name, definition): type = 'boolean' elif definition.type == Datetime: type = 'timestamp' - elif definition.type in _types_float: + elif definition.type in MTypes.float: type = 'float' - elif definition.type in _types_int: + elif definition.type in MTypes.int: type = 'integer' else: raise NotImplementedError( diff --git a/nomad/cli/dev.py b/nomad/cli/dev.py index 481426327a04fe779a391ebf1f9dc19171585913..9b8ffa2057430c1161a5cef28bd251c00b34e70b 100644 --- a/nomad/cli/dev.py +++ b/nomad/cli/dev.py @@ -86,7 +86,7 @@ def _all_metainfo_packages(): entry_type.create_mapping(EntryArchive.m_def) # TODO this is otherwise not imported and will add nexus to the Package.registry - from nexusparser.metainfo import nexus # pylint: disable=unused-import + from nomad.metainfo import nexus # pylint: disable=unused-import # TODO we call __init_metainfo__() for all packages where this has been forgotten # by the package author. Ideally this would not be necessary and we fix the diff --git a/nomad/metainfo/metainfo.py b/nomad/metainfo/metainfo.py index 02c94fec45549858bb32d15340a0496dd754c33c..6c2f44f91703eab764a2a2c6aee773ef0c239692 100644 --- a/nomad/metainfo/metainfo.py +++ b/nomad/metainfo/metainfo.py @@ -16,116 +16,43 @@ # limitations under the License. # -import hashlib +import base64 +import importlib +import inspect import itertools -from difflib import SequenceMatcher -from typing import Type, TypeVar, Union, Tuple, Iterable, List, Any, Dict, Set, \ - Callable as TypingCallable, cast, Optional -from dataclasses import dataclass -from collections.abc import Iterable as IterableABC, Sequence +import json +import re import sys +from collections.abc import Iterable as IterableABC from functools import reduce -import inspect -import re -import json +from typing import ( + Any, Callable as TypingCallable, Dict, Iterable, List, Optional, Set, Tuple, Type, TypeVar, Union, cast) + +import docstring_parser +import jmespath import numpy as np import pandas as pd import pint -import aniso8601 -from datetime import datetime, date -import pytz -import docstring_parser -import jmespath -import base64 -import importlib -import email.utils -from urllib.parse import urlsplit, urlunsplit, SplitResult from nomad.config import process +from nomad.metainfo.metainfo_utility import ( + Annotation, DefinitionAnnotation, MEnum, MQuantity, MRegEx, MSubSectionList, MTypes, ReferenceURL, + SectionAnnotation, _delta_symbols, check_dimensionality, check_unit, convert_to, default_hash, dict_to_named_list, + normalize_datetime, resolve_variadic_name, retrieve_attribute, split_python_definition, to_dict, to_numpy, + to_section_def, validate_shape, validate_url) from nomad.units import ureg as units -m_package: 'Package' = None +m_package: Optional['Package'] = None is_bootstrapping = True Elasticsearch = TypeVar('Elasticsearch') MSectionBound = TypeVar('MSectionBound', bound='MSection') SectionDefOrCls = Union['Section', 'SectionProxy', Type['MSection']] T = TypeVar('T') -_hash_method = 'sha1' # choose from hashlib.algorithms_guaranteed -reserved_name_re = re.compile(r'^(m_|a_|_+).*$') - -_primitive_types = { - str: lambda v: None if v is None else str(v), - # TODO it is more complicated than that, because bytes cannot be naturally serialized to JSON - # bytes: lambda v: None if v is None else bytes(v), - int: int, - float: lambda v: None if v is None else float(v), - bool: bool, - np.bool_: bool} - -primitive_type_aliases = {'string': str, 'boolean': bool} - -_primitive_type_names = { - primitive_type.__name__: primitive_type for primitive_type in _primitive_types} - -_primitive_type_names.update(primitive_type_aliases) - -_types_int_numpy = {np.int8, np.int16, np.int32, np.int64, np.uint8, np.uint16, np.uint32, np.uint64} -_types_int_python = {int} -_types_int = _types_int_python | _types_int_numpy -_types_float_numpy = {np.float16, np.float32, np.float64} -_types_float_python = {float} -_types_float = _types_float_python | _types_float_numpy -_types_num_numpy = _types_int_numpy | _types_float_numpy -_types_num_python = _types_int_python | _types_float_python -_types_num = _types_num_python | _types_num_numpy -_types_str_numpy = {np.str_} -_types_bool_numpy = {np.bool_} -_types_numpy = _types_num_numpy | _types_str_numpy | _types_bool_numpy -_delta_symbols = {'delta_', 'Δ'} - -validElnTypes = { - 'str': ['str', 'string'], - 'bool': ['bool', 'boolean'], - 'number': [x.__name__ for x in _types_num_python] + [f'np.{x.__name__}' for x in _types_num_numpy], - 'datetime': ['Datetime'], - 'enum': ['{type_kind: Enum, type_data: [Operator, Responsible_person]}'], - 'user': ['User'], - 'author': ['Author'], - 'reference': [''] -} - -validElnComponents = { - 'str': ['StringEditQuantity', 'FileEditQuantity', 'RichTextEditQuantity', 'EnumEditQuantity'], - 'bool': ['BoolEditQuantity'], - 'number': ['NumberEditQuantity', 'SliderEditQuantity'], - 'datetime': ['DateTimeEditQuantity'], - 'enum': ['EnumEditQuantity', 'AutocompleteEditQuantity', 'RadioEnumEditQuantity'], - 'user': ['UserEditQuantity'], - 'author': ['AuthorEditQuantity'], - 'reference': ['ReferenceEditQuantity'] -} _unset_value = '__UNSET__' - -def _default_hash(): - return hashlib.new(_hash_method) - - -def _split_python_definition(definition_with_id: str) -> Tuple[list, Union[str, None]]: - ''' - Split a Python type name into names and an optional id. - - Example: - mypackage.mysection@myid ==> (['mypackage', 'mysection'], 'myid') - mypackage.mysection ==> (['mypackage', 'mysection'], None) - ''' - if '@' not in definition_with_id: - return definition_with_id.split('.'), None - - definition_names, definition_id = definition_with_id.split('@') - return definition_names.split('.'), definition_id +_HASH_OBJ = Type['hashlib._Hash'] # type: ignore def _check_definition_id(target_id, tgt_section: MSectionBound) -> MSectionBound: @@ -137,17 +64,6 @@ def _check_definition_id(target_id, tgt_section: MSectionBound) -> MSectionBound return tgt_section -def to_section_def(section_def: SectionDefOrCls): - ''' - Resolves duck-typing for values that are section definitions or section classes to - section definition. - ''' - if isinstance(section_def, type): - return section_def.m_def # type: ignore - - return section_def - - # Make pylint believe all bootstrap quantities are actual properties even though # we have to initialize them to None due to bootstrapping _placeholder_quantity: 'Quantity' = property() # type: ignore @@ -172,81 +88,6 @@ class MetainfoReferenceError(MetainfoError): pass -# Metainfo quantity data types - -class MEnum(Sequence): - ''' - Allows to define str types with values limited to a pre-set list of possible values. - - The allowed values can be provided as a list of strings, the keys of which will be identical to values. - Alternatively, they can be provided as key-value pairs. - - Example: - some_variable = MEnum(['a', 'b', 'c']) - some_variable = MEnum(a='a', b='b', c='c') - - The values are stored in __dict__ and can be accessed as attributes: - some_variable.a # gives 'a' - - For description of each possible value, it can be organized into a dictionary. - - Example: - some_variable = MEnum(['a', 'b', 'c'], m_descriptions={'a': 'first', 'b': 'second', 'c': 'third'}) - ''' - - def __init__(self, *args, **kwargs): - # Supports one big list in place of args - if len(args) == 1 and isinstance(args[0], list): - args = args[0] - - self._descriptions: Dict[str, str] = {} - if 'm_descriptions' in kwargs: - self._descriptions = kwargs.pop('m_descriptions') - - # If non-named arguments are given, the default is to have them placed - # into a dictionary with their string value as both the enum name and - # the value. - for arg in args: - if arg in kwargs: - raise ValueError(f"Duplicate value '{arg}' provided for enum") - kwargs[arg] = arg - - self._list = list(kwargs.values()) - self._values = set(kwargs.values()) # For allowing constant time member check - - for enum_value in self._values: - if not isinstance(enum_value, str): - raise TypeError(f'MEnum value {enum_value} is not a string.') - - self.__dict__.update(kwargs) - - def set_description(self, value: str, description: str): - if value not in self._values: - raise ValueError(f'{value} is not a value of this MEnum') - self._descriptions[value] = description - - def get_description(self, value: str) -> str: - if value not in self._values: - raise ValueError(f'{value} is not a value of this MEnum') - return self._descriptions.get(value, '') - - def get_all_descriptions(self) -> Dict[str, str]: - return self._descriptions - - def get_all_values(self) -> set: - return self._values - - # no need to implement __getattr__ as all attributes are stored in the __dict__ - # def __getattr__(self, attr): - # pass - - def __getitem__(self, index): - return self._list[index] - - def __len__(self): - return len(self._list) - - class MProxy: ''' A placeholder object that acts as reference to a value that is not yet resolved. @@ -374,7 +215,7 @@ class SectionProxy(MProxy): if '.' in self.m_proxy_value: # Try to interpret as python class name - python_name, definition_id = _split_python_definition(self.m_proxy_value) + python_name, definition_id = split_python_definition(self.m_proxy_value) package_name = '.'.join(python_name[:-1]) section_name = python_name[-1] @@ -396,7 +237,7 @@ class SectionProxy(MProxy): if not self.m_proxy_section or self.m_proxy_resolved: return self.m_proxy_resolved - python_name, definition_id = _split_python_definition(self.m_proxy_value) + python_name, definition_id = split_python_definition(self.m_proxy_value) current = self.m_proxy_section for name in python_name: current = self._resolve_name(name, current) @@ -426,6 +267,7 @@ class DataType: section differently from how users might set/get them, and it allows to have non-serializable values that are transformed on de-/serialization. ''' + def set_normalize(self, section: 'MSection', quantity_def: 'Quantity', value: Any) -> Any: ''' Transforms the given value before it is set and checks its type. ''' return value @@ -444,8 +286,6 @@ class DataType: class _Dimension(DataType): - range_re = re.compile(r'(\d)\.\.(\d|\*)') - def set_normalize(self, section, quantity_def: 'Quantity', value): if isinstance(value, int): return value @@ -453,7 +293,7 @@ class _Dimension(DataType): if isinstance(value, str): if value.isidentifier(): return value - if re.match(_Dimension.range_re, value): + if re.match(MRegEx.index_range, value): return value if isinstance(value, Section): @@ -468,63 +308,12 @@ class _Dimension(DataType): # that is later evaluated in the parser return value - raise TypeError('%s is not a valid dimension' % str(value)) - - @staticmethod - def check_dimension(section, dimension, length): - if isinstance(dimension, int): - return dimension == length - if isinstance(dimension, str): - if dimension.isidentifier(): - return dimension == getattr(section, dimension) - - m = re.match(_Dimension.range_re, dimension) - start = int(m.group(1)) - end = -1 if m.group(2) == '*' else int(m.group(2)) - return start <= length and (end == -1 or length <= end) + raise TypeError(f'{str(value)} is not a valid dimension') class _Unit(DataType): - @staticmethod - def check_dimensionality(quantity_def, unit: Optional[pint.Unit]) -> None: - if quantity_def is None or unit is None: - return - - dimensionality = getattr(quantity_def, 'dimensionality', None) - - if dimensionality is None: # not set, do not validate - return - - if dimensionality in ('dimensionless', '1') and unit.dimensionless: # dimensionless - return - - if dimensionality == 'transformation': - # todo: check transformation dimensionality - return - - if units.Quantity(1 * unit).check(dimensionality): # dimensional - return - - raise TypeError(f'Dimensionality {dimensionality} is not met by unit {unit}') - - @staticmethod - def check_unit(unit: Union[str, pint.Unit]) -> None: - '''Check that the unit is valid. - ''' - if isinstance(unit, str): - unit_str = unit - elif isinstance(unit, pint.unit._Unit): - unit_str = str(unit) - else: - raise TypeError('Units must be given as str or pint Unit instances.') - - # Explicitly providing a Pint delta-unit is not currently allowed. - # Implicit conversions are fine as MathJS on the frontend supports them. - if any(x in unit_str for x in _delta_symbols): - raise TypeError('Explicit Pint "delta"-units are not yet supported.') - def set_normalize(self, section, quantity_def: 'Quantity', value): - _Unit.check_unit(value) + check_unit(value) if isinstance(value, str): value = units.parse_units(value) @@ -535,20 +324,23 @@ class _Unit(DataType): elif not isinstance(value, pint.Unit): raise TypeError('Units must be given as str or pint Unit instances.') - _Unit.check_dimensionality(quantity_def, value) + check_dimensionality(quantity_def, value) return value def serialize(self, section, quantity_def: 'Quantity', value): + if quantity_def.flexible_unit: + return None + value = value.__str__() # The delta prefixes are not serialized: only implicit deltas are # allowed currently. return reduce(lambda a, b: a.replace(b, ''), _delta_symbols, value) def deserialize(self, section, quantity_def: 'Quantity', value): - _Unit.check_unit(value) + check_unit(value) value = units.parse_units(value) - _Unit.check_dimensionality(quantity_def, value) + check_dimensionality(quantity_def, value) return value @@ -574,7 +366,7 @@ class _QuantityType(DataType): ''' def set_normalize(self, section, quantity_def, value): - if value in _primitive_types: + if value in MTypes.primitive: return value if isinstance(value, MEnum): @@ -583,7 +375,7 @@ class _QuantityType(DataType): if isinstance(value, np.dtype): value = value.type # we normalise all np.dtype to basic np.number types - if value in _types_numpy: + if value in MTypes.numpy: return value if isinstance(value, Section): @@ -620,7 +412,7 @@ class _QuantityType(DataType): raise MetainfoError(f'Type {value} of {quantity_def} is not a valid metainfo quantity type') def serialize(self, section, quantity_def, value): - if value in _primitive_types: + if value in MTypes.primitive: return dict(type_kind='python', type_data=value.__name__) if isinstance(value, MEnum): @@ -632,7 +424,7 @@ class _QuantityType(DataType): if isinstance(value, np.dtype): value = value.type # serialise follows the same logic to use basic np.number only - if value in _types_numpy: + if value in MTypes.numpy: return dict(type_kind='numpy', type_data=str(value.__name__)) if isinstance(value, Reference): @@ -672,7 +464,7 @@ class _QuantityType(DataType): type_kind, type_data = value['type_kind'], value.get('type_data') if type_kind == 'python': - return _primitive_type_names[type_data] + return MTypes.primitive_name[type_data] if type_kind == 'Enum': return MEnum(*type_data) reference = Reference.deserialize_type(type_kind, type_data, section) @@ -697,12 +489,11 @@ class _QuantityType(DataType): return np.dtype(type_data).type except Exception: raise MetainfoError(f'{type_data} is not a valid numpy type.') - if type_kind in ['numpy', 'custom']: - raise NotImplementedError() + raise MetainfoError(f'{type_kind} is not a valid quantity type kind.') - if value in _primitive_type_names: - return _primitive_type_names[value] + if value in MTypes.primitive_name: + return MTypes.primitive_name[value] if isinstance(value, str): if value.startswith('np.') or value.startswith('numpy.'): @@ -724,22 +515,6 @@ class _QuantityType(DataType): return super().deserialize(section, quantity_def, value) -@dataclass -class ReferenceURL: - fragment: str - archive_url: str - url_parts: SplitResult - - def __init__(self, url: str): - if '#' not in url: - url = f'#{url}' - - self.url_parts = urlsplit(url) - archive_url = urlunsplit(self.url_parts[0:4] + ('',)) - self.archive_url = None if archive_url is None else archive_url - self.fragment = self.url_parts.fragment - - class Reference(DataType): ''' Datatype used for quantities that use other sections as values. @@ -748,11 +523,11 @@ class Reference(DataType): The behavior in this DataType class uses URLs to serialize references. In memory, the actual referenced section instance (or respective MProxy instances) are used as values. - During de-serialization, MProxy instances that autoresolve on usage, will be used. + During de-serialization, MProxy instances that auto-resolve on usage, will be used. The reference datatype will also accept MProxy instances or URL strings as values when set in Python and replace the value with the resolved section instance. - Sub-classes might exchange URLs with a different string serialization, e.g. Python + Subclasses might exchange URLs with a different string serialization, e.g. Python qualified names. Arguments: @@ -761,7 +536,7 @@ class Reference(DataType): sections that inherit from the given section can also be used as values. ''' - def __init__(self, section_def: SectionDefOrCls): + def __init__(self, section_def: Optional[SectionDefOrCls]): self._target_section_def = to_section_def(section_def) @property @@ -858,11 +633,6 @@ class Reference(DataType): # TODO has to deal with URLs, Python qualified names, and Metainfo references class _SectionReference(Reference): - # matches for example - # Python package/module name: nomad.metainfo.section - # Python name + 40 digits id: nomad.metainfo.section@1a2b3c... - value_re = re.compile(r'^\w*(\.\w*)*(@\w{40})?$') - def __init__(self): super().__init__(None) @@ -892,7 +662,7 @@ class _SectionReference(Reference): else: first_segment, remaining_fragment = split_fragment[0], None - resolved: MSection = None + resolved: Optional[MSection] = None for content in definitions.m_contents(): if isinstance(content, Definition) and content.name == first_segment: if remaining_fragment: @@ -907,7 +677,7 @@ class _SectionReference(Reference): return super().resolve_fragment(context_section, fragment_with_id) def set_normalize(self, section: 'MSection', quantity_def: 'Quantity', value: Any) -> Any: - if isinstance(value, str) and _SectionReference.value_re.match(value): + if isinstance(value, str) and MRegEx.python_definition.match(value): return SectionProxy(value, m_proxy_section=section, m_proxy_type=quantity_def.type) return super().set_normalize(section, quantity_def, value) @@ -931,10 +701,10 @@ class _SectionReference(Reference): def deserialize(self, section: 'MSection', quantity_def: 'Quantity', value: Any) -> Any: proxy_type = quantity_def.type if quantity_def else SectionReference - if isinstance(value, str) and _SectionReference.value_re.match(value): + if isinstance(value, str) and MRegEx.python_definition.match(value): # First assume it's a python name and try to resolve it. if '.' in value: - python_name, definition_id = _split_python_definition(value) + python_name, definition_id = split_python_definition(value) package_name = '.'.join(python_name[:-1]) section_name = python_name[-1] @@ -985,12 +755,12 @@ class QuantityReference(Reference): return getattr(section, self.target_quantity_def.name) def serialize(self, section: 'MSection', quantity_def: 'Quantity', value: Any) -> Any: - section_path = super().serialize(section, quantity_def, value) - return f'{section_path}/{self.target_quantity_def.name}' + target_path = super().serialize(section, quantity_def, value) + return f'{target_path}/{self.target_quantity_def.name}' def deserialize(self, section: 'MSection', quantity_def: 'Quantity', value: Any) -> Any: - section_path = value.rsplit('/', 1)[0] - return MProxy(section_path, m_proxy_section=section, m_proxy_type=quantity_def.type) + target_path = value.rsplit('/', 1)[0] + return MProxy(target_path, m_proxy_section=section, m_proxy_type=quantity_def.type) class _File(DataType): @@ -1007,117 +777,25 @@ class _File(DataType): class _URL(DataType): - _url_regex = re.compile( - r'^(?:http|ftp)s?://' - r'(?:(?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\.)+(?:[A-Z]{2,6}\.?|[A-Z0-9-]{2,}\.?)|' - r'localhost|' - r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})' - r'(?::\d+)?' - r'(?:/?|[/?]\S+)$', re.IGNORECASE) - - @staticmethod - def _validate_url(url_str: str) -> Optional[str]: - if url_str is None: - return None - - if not isinstance(url_str, str): - raise TypeError('Links need to be given as URL strings') - if re.match(_URL._url_regex, url_str) is None: - raise ValueError('The given URL is not valid') - - return url_str - def set_normalize(self, section: 'MSection', quantity_def: 'Quantity', value: Any) -> Any: - return _URL._validate_url(value) + return validate_url(value) def serialize(self, section: 'MSection', quantity_def: 'Quantity', value: Any) -> Any: - return _URL._validate_url(value) + return validate_url(value) def deserialize(self, section: 'MSection', quantity_def: 'Quantity', value: Any) -> Any: - return _URL._validate_url(value) + return validate_url(value) class _Datetime(DataType): - @staticmethod - def _parse(datetime_str: str) -> datetime: - # removing trailing spaces and replacing the potential white space between date and time with char 'T' - if datetime_str[0].isdigit(): - datetime_str = datetime_str.strip().replace(' ', 'T') - - try: - return aniso8601.parse_datetime(datetime_str) - except ValueError: - pass - - try: - date = aniso8601.parse_date(datetime_str) - if isinstance(date, datetime): - return date - except ValueError: - pass - - try: - return email.utils.parsedate_to_datetime(datetime_str) - except Exception: - pass - - try: - return datetime.strptime(datetime_str, '%Y-%m-%d %H:%M:%S.%f') - except ValueError: - pass - - try: - return datetime.strptime(datetime_str, '%Y-%m-%d %H:%M:%S') - except ValueError: - pass - - try: - return datetime.strptime(datetime_str, '%Y-%m-%d') - except ValueError: - pass - - try: - return datetime.fromisoformat(datetime_str) - except ValueError: - pass - - raise TypeError(f'Invalid date literal {datetime_str}') - - @staticmethod - def _convert(value) -> Optional[datetime]: - if value is None: - return None - - if isinstance(value, str): - value = _Datetime._parse(value) - - elif isinstance(value, (int, float)): - value = datetime.fromtimestamp(value) - - elif isinstance(value, pint.Quantity): - value = datetime.fromtimestamp(value.magnitude) - - elif not isinstance(value, datetime) and isinstance(value, date): - value = datetime.combine(value, datetime.min.time()) - - if not isinstance(value, datetime): - raise TypeError(f'{value} is not a datetime.') - - if value.tzinfo is None: - value = value.replace(tzinfo=pytz.utc) - else: - value = value.astimezone(pytz.utc) - - return value - def set_normalize(self, section: 'MSection', quantity_def: 'Quantity', value: Any) -> Any: - return self._convert(value) + return normalize_datetime(value) def serialize(self, section: 'MSection', quantity_def: 'Quantity', value: Any) -> Any: return None if value is None else value.isoformat() def deserialize(self, section: 'MSection', quantity_def: 'Quantity', value: Any) -> Any: - return self._convert(value) + return normalize_datetime(value) class _JSON(DataType): @@ -1163,7 +841,7 @@ class MObjectMeta(type): def __new__(self, cls_name, bases, dct): do_init = dct.get('do_init', None) if do_init is not None: - del(dct['do_init']) + del dct['do_init'] else: do_init = True @@ -1182,7 +860,7 @@ This can either be : - the name of the section - the section definition itself -- the definition of a sub section +- the definition of a subsection - or the section definition Python class ''' @@ -1194,18 +872,15 @@ def constraint(warning): f = warning warning = False - def decorator(f): - setattr(f, 'm_constraint', True) - setattr(f, 'm_warning', warning) - return f + def decorator(_f): + setattr(_f, 'm_constraint', True) + setattr(_f, 'm_warning', warning) + return _f - if f is None: - return decorator - else: - return decorator(f) + return decorator if f is None else decorator(f) -class Context(): +class Context: ''' The root of a metainfo section hierarchy can have a Context. Contexts allow to customize the resolution of references based on how and in what context a metainfo-based @@ -1223,7 +898,7 @@ class Context(): def create_reference(self, section: 'MSection', quantity_def: 'Quantity', value: 'MSection') -> str: ''' Returns a reference for the given target section (value) based on the given context. - Allows sub-classes to build references across resources, if necessary. + Allows subclasses to build references across resources, if necessary. Raises: MetainfoReferenceError ''' @@ -1312,29 +987,29 @@ class MSection(metaclass=MObjectMeta): # TODO find a way to make this a subclas .. automethod:: m_add_sub_section .. automethod:: m_remove_sub_section - There are some specific attributes for section instances that are sub-sections of - another section. While sub-sections are directly accessible from the containing - section by using the Python property that represents the sub-section (e.g. - `run.section_system`), there is also a way to navigate from the sub-section to + There are some specific attributes for section instances that are subsections of + another section. While subsections are directly accessible from the containing + section by using the Python property that represents the subsection (e.g. + `run.section_system`), there is also a way to navigate from the subsection to the containing section (`parent section`) using these Python properties: Attributes: m_parent: - If this section is a sub-section, this references the parent section instance. + If this section is a subsection, this references the parent section instance. m_parent_sub_section: - If this section is a sub-section, this is the :class:`SubSection` that defines + If this section is a subsection, this is the :class:`SubSection` that defines this relationship. m_parent_index: - For repeatable sections, parent keep a list of sub-sections. This is the index - of this section in the respective parent sub-section list. + For repeatable sections, parent keep a list of subsections. This is the index + of this section in the respective parent subsection list. m_context: The :class:`MContext` that manages this (root-)section. Often some general tasks have to be performed on a whole tree of sections without knowing about the definitions in advance. The following methods allow to access - sub-sections reflectively. + subsections reflectively. .. automethod:: m_traverse .. automethod:: m_all_contents @@ -1442,10 +1117,7 @@ class MSection(metaclass=MObjectMeta): # TODO find a way to make this a subclas if attr.description is not None: description = inspect.cleandoc(attr.description) description = description.strip() - description = re.sub( - r'\(https?://[^\)]*\)', - lambda m: re.sub(r'\n', '', m.group(0)), - description) + description = re.sub(r'\(https?://[^)]*\)', lambda m: re.sub(r'\n', '', m.group(0)), description) attr.description = description attr.__doc__ = attr.description @@ -1455,6 +1127,9 @@ class MSection(metaclass=MObjectMeta): # TODO find a way to make this a subclas m_def.m_add_sub_section(Section.sub_sections, attr) else: raise NotImplementedError('Unknown property kind.') + elif isinstance(attr, Attribute): + attr.name = name + m_def.m_add_sub_section(Section.attributes, attr) if inspect.isclass(attr): inner_section_def = getattr(attr, 'm_def', None) @@ -1481,8 +1156,10 @@ class MSection(metaclass=MObjectMeta): # TODO find a way to make this a subclas for event_handler in base_section.event_handlers: event_handlers.add(event_handler) - if len(constraints) > 0: m_def.constraints = list(constraints) - if len(event_handlers) > 0: m_def.event_handlers = list(event_handlers) + if len(constraints) > 0: + m_def.constraints = list(sorted(constraints)) + if len(event_handlers) > 0: + m_def.event_handlers = list(sorted(event_handlers)) # add section cls' section to the module's package module_name = cls.__module__ @@ -1490,12 +1167,12 @@ class MSection(metaclass=MObjectMeta): # TODO find a way to make this a subclas pkg.m_add_sub_section(Package.section_definitions, cls.m_def) # apply_google_docstrings - # Parses the google doc string of the given class and properly updates the + # Parses the Google doc string of the given class and properly updates the # definition descriptions. - # This allows to document quantities and sub-sections with 'Args:' in the section + # This allows to document quantities and subsections with 'Args:' in the section # class. It will remove the 'Args' section from the section definition and will - # set the respective pieces to the quantity and sub-section descriptions. + # set the respective pieces to the quantity and subsection descriptions. docstring = cls.__doc__ if docstring is not None: parsed_docstring = docstring_parser.parse(docstring) @@ -1523,45 +1200,6 @@ class MSection(metaclass=MObjectMeta): # TODO find a way to make this a subclas for content in m_def.m_all_contents(depth_first=True, include_self=True): cast(Definition, content).__init_metainfo__() - @staticmethod - def __resolve_variable_name( - definitions: Dict[str, 'Definition'], name: str) -> Optional['Definition']: - ''' - For properties with variadic names, it is necessary to check all possible definitions - in the schema to find the unique and correct definition that matches the naming pattern. - - In the schema defines a property with the name 'FOO_bar', implying the prefix 'FOO' is - merely a placeholder, the actual name in the data can be anything, such as 'a_bar' or 'b_bar'. - - This method checks each definition name by replacing the placeholder with '.*' and then check if - the property name matches the pattern. If it does, it returns the corresponding definition. - - For example, the definition name 'FOO_bar' will be replaced by '.*_bar', which further matches - 'a_bar', 'aa_bar', etc. - - In case of multiple quantities with identical template/variadic patterns, the string similarity - is used to determine which to be used. - ''' - candidates: list = [] - for definition in set(definitions.values()): - if not definition.variable: - continue - - name_re = re.sub(r'(^_*)[A-Z_]+(_|$)', r'\1.*\2', definition.name) - if re.match(name_re, name): - candidates.append(definition) - - if len(candidates) == 0: - raise MetainfoError(f'Cannot find a proper definition for name {name}') - - if len(candidates) == 1: - return candidates[0] - - # multiple matches, check similarity - similarity: list = [SequenceMatcher(None, v.name.upper(), name.upper()).ratio() for v in candidates] - - return candidates[similarity.index(max(similarity))] - def __setattr__(self, name, value): if self.m_def is None: return super().__setattr__(name, value) @@ -1570,11 +1208,10 @@ class MSection(metaclass=MObjectMeta): # TODO find a way to make this a subclas if alias_pool is not None and name in alias_pool: name = alias_pool[name].name - elif self.m_def.has_variable_names and not reserved_name_re.match(name): - resolved_variable_name = self.__resolve_variable_name(self.m_def.all_properties, name) - if resolved_variable_name: - self.m_set_attribute(resolved_variable_name, 'm_source_name', name) # type: ignore - name = resolved_variable_name.name + elif self.m_def.has_variable_names and not MRegEx.reserved_name.match(name): + resolved_name = resolve_variadic_name(self.m_def.all_properties, name) + if resolved_name: + name = resolved_name.name return super().__setattr__(name, value) @@ -1587,58 +1224,34 @@ class MSection(metaclass=MObjectMeta): # TODO find a way to make this a subclas return getattr(self, self.m_def.all_aliases[name].name) if self.m_def.has_variable_names: - resolved_variable_name = self.__resolve_variable_name(self.m_def.all_properties, name) - if resolved_variable_name: - return getattr(self, resolved_variable_name.name) + m_definition: Definition = resolve_variadic_name(self.m_def.all_properties, name) + if m_definition: + if not isinstance(m_definition, Quantity) or not m_definition.use_full_storage: + return getattr(self, m_definition.name) - raise AttributeError(name) + m_storage: dict = self.__dict__.get(m_definition.name, None) + if m_storage is None: + return None - def __check_np(self, definition: 'Definition', value: np.ndarray) -> np.ndarray: - # TODO this feels expensive, first check, then possible convert very often? - # if quantity_ref.type != value.dtype: - # raise MetainfoError( - # 'Quantity dtype %s and value dtype %s do not match.' % - # (quantity_ref.type, value.dtype)) + m_quantity = m_storage.get(name, None) + if m_quantity is None: + return None - return value + if m_quantity.value is not None: + if m_quantity.unit is not None: + return units.Quantity(m_quantity.value, m_quantity.unit) - def __normalize_value(self, value_type, definition: 'Definition', value: Any) -> Any: - if isinstance(value_type, DataType): - return value_type.set_normalize(self, None, value) # type: ignore + return m_quantity.value - if isinstance(value_type, MEnum): - if value not in cast(MEnum, value_type).get_all_values(): - raise TypeError(f'The value {value} is not an enum value for {definition}.') - return value - - if value_type == Any: - return value - - if value_type == str and type(value) == np.str_: - return str(value) - - if value_type == bool and type(value) == np.bool_: - return bool(value) - - if value_type == int and type(value) == np.float_: - return int(value) - - if value_type in _primitive_types and type(value) != value_type: - try: - return _primitive_types[value_type](value) # type: ignore - except ValueError as e: - raise TypeError(e) - - if value is not None and type(value) != value_type: - raise TypeError(f'The value {value} with for {definition} is not of type {value_type}.') - - return value + raise AttributeError(name) def __set_normalize(self, quantity_def: 'Quantity', value: Any) -> Any: - if isinstance(quantity_def.type, DataType): - return quantity_def.type.set_normalize(self, quantity_def, value) + target_type = quantity_def.type + + if isinstance(target_type, DataType): + return target_type.set_normalize(self, quantity_def, value) - if isinstance(quantity_def.type, Section): + if isinstance(target_type, Section): if isinstance(value, MProxy): return value @@ -1646,126 +1259,185 @@ class MSection(metaclass=MObjectMeta): # TODO find a way to make this a subclas raise TypeError( f'The value {value} for reference quantity {quantity_def} is not a section instance.') - if not value.m_follows(quantity_def.type): + if not value.m_follows(target_type): raise TypeError( - f'The value {value} for quantity {quantity_def} does not follow {quantity_def.type}') + f'The value {value} for quantity {quantity_def} does not follow {target_type}') return value - return self.__normalize_value(quantity_def.type, quantity_def, value) + if isinstance(target_type, DataType): + return target_type.set_normalize(self, None, value) # type: ignore - def __to_np(self, np_type, shape, unit, definition: 'Definition', value): - _Unit.check_dimensionality(definition, unit) + if isinstance(target_type, MEnum): + if value not in cast(MEnum, target_type).get_all_values(): + raise TypeError(f'The value {value} is not an enum value for {quantity_def}.') + return value - if isinstance(value, pint.Quantity): - if unit is None: - raise MetainfoError(f'The quantity {definition} does not have a unit, but value {value} does.') + if target_type == Any: + return value - if type(value.magnitude) == np.ndarray and np_type != value.dtype: - value = value.astype(np_type) + if target_type == str and type(value) == np.str_: + return str(value) - value = value.to(unit).magnitude + if target_type == bool and type(value) == np.bool_: + return bool(value) - if isinstance(value, pd.DataFrame): - try: - value = value.to_numpy() - except AttributeError: - raise AttributeError( - f'Could not convert value {value} of type pandas.Dataframe to a numpy array') + if target_type == int and type(value) == np.float_: + return int(value) - if type(value) != np.ndarray: - if len(shape) > 0: - try: - value = np.asarray(value) - except TypeError: - raise TypeError(f'Could not convert value {value} of {definition} to a numpy array') - elif type(value) != np_type: + if type(value) != target_type: + if target_type in MTypes.primitive: try: - value = np_type(value) - except TypeError: - raise TypeError(f'Could not convert value {value} of {definition} to a numpy scalar') + return MTypes.primitive[target_type](value) # type: ignore + except ValueError as e: + raise TypeError(e) + + if value is not None: + raise TypeError(f'The value {value} for {quantity_def} is not of type {target_type}.') - return self.__check_np(definition, value) + return value - def m_set(self, quantity_def: 'Quantity', value: Any, add_new: bool = False) -> None: + def m_set(self, quantity_def: 'Quantity', value: Any) -> None: ''' Set the given value for the given quantity. ''' self.m_mod_count += 1 if quantity_def.derived is not None: raise MetainfoError(f'The quantity {quantity_def} is derived and cannot be set.') + item_name: str = quantity_def.name + if value is None: # This implements the implicit "unset" semantics of assigned None as a value - self.__dict__.pop(quantity_def.name, None) + to_remove = self.__dict__.pop(item_name, None) + # if full storage is used, also need to clear quantities created for convenient access + if quantity_def.use_full_storage and to_remove: + # self.__dict__[full_name] is guaranteed to be a 'dict[str, MQuantity]' + for key in to_remove.keys(): + self.__dict__.pop(key, None) return - if quantity_def.type in _types_numpy or isinstance(quantity_def.type, pd.DataFrame): - value = self.__to_np( - quantity_def.type, - quantity_def.shape, - quantity_def.unit, - quantity_def, - value) - else: - dimensions = len(quantity_def.shape) - if dimensions == 0: - value = self.__set_normalize(quantity_def, value) + if not quantity_def.use_full_storage: + # handles the non-repeating and no attribute case, store the value directly under the name + if quantity_def.type in MTypes.numpy or isinstance(quantity_def.type, pd.DataFrame): + value = to_numpy(quantity_def.type, quantity_def.shape, quantity_def.unit, quantity_def, value) + else: + dimensions = len(quantity_def.shape) + if dimensions == 0: + value = self.__set_normalize(quantity_def, value) + if value == _unset_value: + return - if value == _unset_value: - return + elif dimensions == 1: + if type(value) == str or not isinstance(value, IterableABC): + raise TypeError( + f'The shape of {quantity_def} requires an iterable value, but {value} is not iterable.') - elif dimensions == 1: - if type(value) == str or not isinstance(value, IterableABC): - raise TypeError( - f'The shape of {quantity_def} requires an iterable value, but {value} is not iterable.') + value = [v for v in list( + self.__set_normalize(quantity_def, item) for item in value) if v != _unset_value] - list_value = list() - for item in value: - item_value = self.__set_normalize(quantity_def, item) - if item_value == _unset_value: - continue - list_value.append(item_value) - value = list_value + else: + raise MetainfoError( + f'Only numpy arrays and dtypes can be used for higher dimensional quantities: {quantity_def}') - def __check_shape(shape): - if not isinstance(shape, str) or shape == '*': - return + self.__dict__[item_name] = value + else: + # it is a repeating quantity w/o attributes + # the actual value/name/unit would be wrapped into 'MQuantity' + # check if there is an existing item + m_quantity: MQuantity + m_attribute: dict = {} + if isinstance(value, MQuantity): + m_quantity = value + if not quantity_def.variable: + if not m_quantity.name: + m_quantity.name = item_name + elif m_quantity.name != item_name: + raise MetainfoError(f"The name of {value} must match definition name {item_name}") + else: + if not m_quantity.name: + raise MetainfoError(f"The name must be provided for variadic quantity {item_name}") - bound_match = re.match(r"(\d+)\.\.(\d+|\*)", shape) - if bound_match: - low_bound = bound_match.group(1) - upper_bound = bound_match.group(2) - if len(value) < int(low_bound): - raise MetainfoError(f'At least {low_bound} elements required.') - if upper_bound != '*' and len(value) >= int(upper_bound): - raise MetainfoError(f'At most {upper_bound} elements required.') + # swap to add attributes via the setter to allow validation + m_attribute = m_quantity.attributes + m_quantity.attributes = {} + elif not quantity_def.variable: + try: + m_quantity = self.__dict__[item_name][item_name] + if isinstance(value, pint.Quantity): + m_quantity.value = value.m + m_quantity.unit = value.u else: - try: - bound = int(shape) - if len(value) != bound: - raise MetainfoError(f'Exact {bound} elements required.') - except ValueError: - pass - # todo: need further work as some tests do not follow the shape rule - # __check_shape(quantity_def.shape[0]) + m_quantity.value = value + except KeyError: + m_quantity = MQuantity(item_name, value) else: - raise MetainfoError( - f'Only numpy arrays and dtypes can be used for higher dimensional quantities: {quantity_def}') + raise MetainfoError("Variadic quantities only accept raw values wrapped in 'MQuantity'") + + if not validate_shape(self, quantity_def, m_quantity.value): + raise MetainfoError(f"The shape of {m_quantity} does not match {quantity_def.shape}") + + # todo validate values + if quantity_def.unit is None: + # no prescribed unit, need to check dimensionality, no need to convert + check_dimensionality(quantity_def, m_quantity.unit) + else: + try: + m_quantity.value = convert_to(m_quantity.value, m_quantity.unit, quantity_def.unit) + except (ValueError, TypeError): + raise MetainfoError(f'Could not convert {m_quantity.unit} to {quantity_def.unit}') + m_quantity.unit = quantity_def.unit + + if quantity_def.type in MTypes.numpy or isinstance(quantity_def.type, pd.DataFrame): + m_quantity.value = to_numpy( + quantity_def.type, quantity_def.shape, quantity_def.unit, quantity_def, m_quantity.value) + else: + dimensions = len(quantity_def.shape) + if dimensions == 0: + m_quantity.value = self.__set_normalize(quantity_def, m_quantity.value) + if m_quantity.value == _unset_value: + return + + elif dimensions == 1: + if type(m_quantity.value) == str or not isinstance(m_quantity.value, IterableABC): + raise TypeError( + f'The shape of {quantity_def} requires an iterable value, ' + f'but {m_quantity.value} is not iterable.') - # account for variable name - if add_new: - self.m_def.quantities.append(quantity_def) - quantity_def.init_metainfo() + m_quantity.value = [v for v in list( + self.__set_normalize(quantity_def, item) for item in m_quantity.value) if v != _unset_value] + + else: + raise MetainfoError( + f'Only numpy arrays and dtypes can be used for higher dimensional quantities: {quantity_def}') + + # store under variable name with suffix + if item_name in self.__dict__: + self.__dict__[item_name][m_quantity.name] = m_quantity + else: + self.__dict__[item_name] = {m_quantity.name: m_quantity} - self.__dict__[quantity_def.name] = value + for k, v in m_attribute.items(): + self.m_set_quantity_attribute(m_quantity.name, k, v) for handler in self.m_def.event_handlers: if handler.__name__.startswith('on_set'): handler(self, quantity_def, value) - def m_get(self, quantity_def: 'Quantity') -> Any: + def m_get(self, quantity_def: 'Quantity', full: bool = False) -> Any: ''' Retrieve the given value for the given quantity. ''' - return quantity_def.__get__(self, Quantity) + if not full: + return quantity_def.__get__(self, Quantity) + + return self.__dict__[quantity_def.name] + + def m_get_quantity_definition(self, quantity_name: str, hint: Optional[str] = None): + ''' + Get the definition of the quantity with the target name. + + An optional hint string can be provided. The hint should be the name of one of attributes + defined in the target quantity. + ''' + return resolve_variadic_name(self.m_def.all_quantities, quantity_name, hint) def m_is_set(self, quantity_def: 'Quantity') -> bool: ''' True if the given quantity is set. ''' @@ -1805,20 +1477,17 @@ class MSection(metaclass=MObjectMeta): # TODO find a way to make this a subclas sub_section.m_parent_index = -1 def m_add_sub_section(self, sub_section_def: 'SubSection', sub_section: 'MSection', index: int = -1) -> None: - ''' Adds the given section instance as a sub section of the given sub section definition. ''' + ''' Adds the given section instance as a subsection of the given subsection definition. ''' sub_section_name = sub_section_def.name if sub_section_def.repeats: - sub_section_lst = self._get_sub_sections(sub_section_def) - if index == -1: - sub_section_lst.append(sub_section) - else: - raise NotImplementedError('You can only append sub sections.') + if index != -1: + raise NotImplementedError('You can only append subsections.') + sub_section_lst = self._get_sub_sections(sub_section_def) + sub_section_lst.append(sub_section) if sub_section_lst.__class__ != MSubSectionList: - self._on_add_sub_section( - sub_section_def, sub_section, len(sub_section_lst) - 1) - + self._on_add_sub_section(sub_section_def, sub_section, len(sub_section_lst) - 1) else: old_sub_section = self.__dict__.get(sub_section_name, None) self.__dict__[sub_section_name] = sub_section @@ -1828,138 +1497,184 @@ class MSection(metaclass=MObjectMeta): # TODO find a way to make this a subclas self._on_remove_sub_section(sub_section_def, old_sub_section) def m_remove_sub_section(self, sub_section_def: 'SubSection', index: int) -> None: - ''' Removes the exiting section for a non repeatable sub section ''' + ''' Removes the exiting section for a non-repeatable subsection ''' self.m_mod_count += 1 + if sub_section_def.name not in self.__dict__: + return + if sub_section_def.repeats: sub_section = self.__dict__[sub_section_def.name][index] - del(self.__dict__[sub_section_def.name][index]) - - elif sub_section_def.name in self.__dict__: + del self.__dict__[sub_section_def.name][index] + else: sub_section = self.__dict__[sub_section_def.name] - del(self.__dict__[sub_section_def.name]) - self._on_remove_sub_section(sub_section_def, sub_section) + del self.__dict__[sub_section_def.name] - def m_get_sub_section(self, sub_section_def: 'SubSection', index: Any) -> 'MSection': - ''' Retrieves a single sub section of the given sub section definition. ''' - if sub_section_def.repeats: - if isinstance(index, int): - return self.__dict__[sub_section_def.name][index] - elif isinstance(index, str): - try: - sub_sections: List['MSection'] = [section for section in self.__dict__[sub_section_def.name] if index == section.name] - if len(sub_sections) > 1: - raise MetainfoReferenceError( - f'multiple sections with this section id were found.') - if len(sub_sections) == 1: - return sub_sections[0] - except KeyError: - raise MetainfoReferenceError( - f'{index} is not a valid subsection.') - return None + self._on_remove_sub_section(sub_section_def, sub_section) - else: + def m_get_sub_section(self, sub_section_def: 'SubSection', index: Any) -> Optional['MSection']: + ''' Retrieves a single subsection of the given subsection definition. ''' + if not sub_section_def.repeats: return self.__dict__.get(sub_section_def.name, None) + if isinstance(index, int): + return self.__dict__[sub_section_def.name][index] + + if isinstance(index, str): + try: + sub_sections: List['MSection'] = [ + section for section in self.__dict__[sub_section_def.name] if index == section.name] + if len(sub_sections) > 1: + raise MetainfoReferenceError(f'multiple sections with this section id were found.') + if len(sub_sections) == 1: + return sub_sections[0] + except KeyError: + raise MetainfoReferenceError(f'{index} is not a valid subsection.') + + return None + def m_get_sub_sections(self, sub_section_def: 'SubSection') -> List['MSection']: - ''' Retrieves all sub sections of the given sub section definition. ''' + ''' Retrieves all subsections of the given subsection definition. ''' if sub_section_def.repeats: return self._get_sub_sections(sub_section_def) - else: - try: - return [self.__dict__[sub_section_def.name]] - except KeyError: - return [] + + try: + return [self.__dict__[sub_section_def.name]] + except KeyError: + return [] def m_sub_section_count(self, sub_section_def: 'SubSection') -> int: - ''' Returns the number of sub sections for the given sub section definition. ''' + ''' Returns the number of subsections for the given subsection definition. ''' try: value = self.__dict__[sub_section_def.name] - if sub_section_def.repeats: - return len(value) - else: - return 1 + return len(value) if sub_section_def.repeats else 1 except KeyError: return 0 - def _get_attribute(self, property: Union[str, 'Property'], attr_name: str) -> 'Attribute': - property_as_property = None - - if isinstance(property, str): - property_as_property = self.m_def.all_properties.get(property) - if not property_as_property: - property_as_property = self.__resolve_variable_name( - self.m_def.all_properties, property) - elif isinstance(property, Property): - property_as_property = self.m_def.all_properties.get(property.name) - - if property_as_property is None: - raise ValueError('The given property is not a property.') - - attribute = property_as_property.all_attributes.get(attr_name) - if attribute is None: - attribute = self.__resolve_variable_name(property_as_property.all_attributes, attr_name) - if attribute is None: - raise ValueError( - 'The given attribute name must be a name for an attribute of the given property.') - - return attribute - - def m_set_attribute(self, tgt_property: Union[str, 'Property'], attr_name: str, attr_value: Any): - attribute = self._get_attribute(tgt_property, attr_name) - key = attribute.section_key - if attribute.type in _types_numpy: - attr_value = self.__to_np(attribute.type, [], None, attribute, attr_value) - attr_value = self.__normalize_value(attribute.type, attribute, attr_value) + def m_set_section_attribute(self, name: str, value: Any) -> None: + ''' + Set attribute for the current section. + ''' + self.__set_attribute(None, name, value) + + def m_set_quantity_attribute(self, quantity_def: Union[str, 'Quantity'], name: str, value: Any) -> None: + ''' + Set attribute for the given quantity. + ''' + self.__set_attribute(quantity_def, name, value) + + def __set_attribute(self, tgt_property: Union[Optional[str], 'Definition'], attr_name: str, attr_value: Any): + ''' + Set attribute for current section for a quantity of the current section. + + For attributes of the current section, use None as the target property. + For attributes of a quantity, use the quantity name/definition as the target property. + + Both the quantity name and the attribute name can be variadic. + + Arguments: + tgt_property: The name or definition of the quantity to set the attribute for, can be None. + attr_name: The name of the attribute to set. + attr_value: The value of the attribute to set. + ''' + tgt_name: Optional[str] = tgt_property.name if isinstance(tgt_property, Definition) else tgt_property + + tgt_def, tgt_attr = retrieve_attribute(self.m_def, tgt_name, attr_name) + + if tgt_attr.type in MTypes.numpy: + attr_value = to_numpy(tgt_attr.type, [], None, tgt_attr, attr_value) else: - dimension = len(attribute.shape) + dimension = len(tgt_attr.shape) if dimension == 0: - attr_value = self.__normalize_value(attribute.type, attribute, attr_value) + attr_value = self.__set_normalize(tgt_attr, attr_value) elif dimension == 1: if type(attr_value) == str or not isinstance(attr_value, IterableABC): - raise TypeError( - f'The shape requires an iterable value, but {attr_value} is not iterable.') - - attr_value = list(self.__normalize_value(attribute.type, attribute, item) for item in attr_value) + raise TypeError(f'The shape requires an iterable value, but {attr_value} is not.') + attr_value = list(self.__set_normalize(tgt_attr, item) for item in attr_value) else: - raise MetainfoError( - f'Only numpy arrays and dtypes can be used for higher dimensional quantities: {attribute}') + raise MetainfoError(f'Only numpy arrays can be used for higher dimensional quantities: {tgt_attr}.') + + if not validate_shape(self, tgt_attr, attr_value): + raise MetainfoError(f'Invalid shape for attribute: {tgt_attr}.') + + if isinstance(tgt_def, Quantity) and tgt_def.use_full_storage: + m_storage: Optional[dict] = self.__dict__.get(tgt_def.name, None) + m_quantity: Optional[MQuantity] = m_storage.get(tgt_property, None) if m_storage else None + if m_quantity is None: + m_quantity = MQuantity(tgt_name, None) + self.m_set(tgt_def, m_quantity) + m_quantity.m_set_attribute(attr_name, attr_value) + elif tgt_property is None: + # indicating that the attribute is for the current section + if 'm_attributes' not in self.__dict__: + self.__dict__['m_attributes'] = {} + self.__dict__['m_attributes'][attr_name] = attr_value + + def m_get_section_attribute(self, name: str) -> Any: + ''' + Get attribute for the current section. + ''' + return self.__get_attribute(None, name) - self.__dict__[key] = attr_value + def m_get_quantity_attribute(self, quantity_def: str, name: str) -> Any: + ''' + Get attribute for the given quantity. + ''' + return self.__get_attribute(quantity_def, name) - def m_get_attribute(self, tgt_property: Union[str, 'Property'], attr_name: str): - attribute = self._get_attribute(tgt_property, attr_name) - key = attribute.section_key - return self.__dict__.get(key) + def __get_attribute(self, tgt_property: Optional[str], attr_name: str): + ''' + Get the attribute of a quantity of the current section, or of the current section itself. + ''' + tgt_def: Definition = tgt_property if tgt_property is None else retrieve_attribute( + self.m_def, tgt_property, attr_name)[0] + + # section attributes + if tgt_def is None: + if 'm_attributes' not in self.__dict__: + return None + + return self.__dict__['m_attributes'].get(attr_name, None) + + # quantity attributes + m_storage: Optional[dict] = self.__dict__.get(tgt_def.name, None) + if m_storage is None: + return None + + m_quantity: Optional[MQuantity] = m_storage.get(tgt_property, None) + if m_quantity is None: + return None + + return m_quantity.attributes.get(attr_name, None) def m_create( self, section_cls: Type[MSectionBound], sub_section_def: 'SubSection' = None, **kwargs) -> MSectionBound: ''' Creates a section instance and adds it to this section provided there is a - corresponding sub section. + corresponding subsection. Args: - section_cls: The section class for the sub-section to create - sub_section_def: If there are multiple sub-sections for the given class, - this must be used to explicitly state the sub-section definition. + section_cls: The section class for the subsection to create + sub_section_def: If there are multiple subsections for the given class, + this must be used to explicitly state the subsection definition. ''' section_def = section_cls.m_def sub_section_defs = self.m_def.all_sub_sections_by_section.get(section_def, []) n_sub_section_defs = len(sub_section_defs) if n_sub_section_defs == 0: - raise TypeError(f'There is no sub section to hold a {section_def} in {self.m_def}.') + raise TypeError(f'There is no subsection to hold a {section_def} in {self.m_def}.') if n_sub_section_defs > 1 and sub_section_def is None: raise MetainfoError( - f'There are multiple sub section to hold a {section_def} in {self.m_def}, ' - f'but no sub-section was explicitly given.') + f'There are multiple subsection to hold a {section_def} in {self.m_def}, ' + f'but no subsection was explicitly given.') if sub_section_def is not None and sub_section_def not in sub_section_defs: raise MetainfoError( - f'The given sub-section class {section_cls} does not ' - f'match the given sub-section definition {sub_section_def}.') + f'The given subsection class {section_cls} does not ' + f'match the given subsection definition {sub_section_def}.') if sub_section_def is None: sub_section_def = sub_section_defs[0] @@ -1970,7 +1685,7 @@ class MSection(metaclass=MObjectMeta): # TODO find a way to make this a subclas return cast(MSectionBound, sub_section) def m_update(self, m_ignore_additional_keys: bool = False, **kwargs): - ''' Updates all quantities and sub-sections with the given arguments. ''' + ''' Updates all quantities and subsections with the given arguments. ''' self.m_mod_count += 1 for name, value in kwargs.items(): @@ -1986,7 +1701,7 @@ class MSection(metaclass=MObjectMeta): # TODO find a way to make this a subclas for item in value: self.m_add_sub_section(prop, item) else: - raise TypeError(f'Sub section {prop.name} repeats, but no list was given') + raise TypeError(f'Subsection {prop.name} repeats, but no list was given') else: self.m_add_sub_section(prop, value) @@ -2025,8 +1740,8 @@ class MSection(metaclass=MObjectMeta): # TODO find a way to make this a subclas with_meta: Include information about the section definition, the sections position in its parent, and annotations. For Definition instances this information will be included regardless; the section definition will - always be included if the sub section definition references a base section - and the concrete sub section is derived from this base section. + always be included if the subsection definition references a base section + and the concrete subsection is derived from this base section. with_out_meta: Exclude information `with_meta` information, even from Definition instances. with_root_def: Include the m_def for the top-level section. This allows to @@ -2039,25 +1754,25 @@ class MSection(metaclass=MObjectMeta): # TODO find a way to make this a subclas Treat references as the sections and values they represent. References must not create circles; there is no check and danger of endless looping. categories: A list of category classes or category definitions that is used - to filter the included quantities and sub sections. Only applied to - properties of this section, not on sub-sections. Is overwritten + to filter the included quantities and subsections. Only applied to + properties of this section, not on subsections. Is overwritten by partial. - include: A function that determines if a property (quantity or sub-section) will + include: A function that determines if a property (quantity or subsection) will be included in the results. It takes the property definition and the current section as arguments. The function returns true for including and false for - excluding the property. Include is applied recursively on sub-sections. + excluding the property. Include is applied recursively on subsections. Overrides categories. - exclude: A function that determines if a property (quantity or sub-section) will + exclude: A function that determines if a property (quantity or subsection) will be excluded from the results. It takes the property definition and the current section as arguments. The function returns true for excluding and false for - including the property. Exclude is applied recursively on sub-sections. + including the property. Exclude is applied recursively on subsections. Overrides categories. transform: A function that determines serialized quantity values. It takes the quantity definition, current section, the default serialized value and the metainfo path with respect to the - document root as arguments. Depending where this is used, you + document root as arguments. Depending on where this is used, you might have to ensure that the result is JSON-serializable. By - default values are serialized to JSON according to the quantity + default, values are serialized to JSON according to the quantity type. ''' if isinstance(self, Definition) and not with_out_meta: @@ -2085,7 +1800,6 @@ class MSection(metaclass=MObjectMeta): # TODO find a way to make this a subclas return False kwargs['exclude'] = exclude - else: category_defs: List[Category] = [] for category in categories: @@ -2094,25 +1808,25 @@ class MSection(metaclass=MObjectMeta): # TODO find a way to make this a subclas elif isinstance(category, Category): category_defs.append(category) else: - raise TypeError('%s is not a category' % category) + raise TypeError(f'{category} is not a category') def exclude(prop, section): # pylint: disable=function-redefined - return not any( - prop in category.get_all_definitions() - for category in category_defs) + return not any(prop in v.get_all_definitions() for v in category_defs) - def serialize_quantity(quantity, is_set, is_derived, path): + def serialize_quantity(quantity, is_set, is_derived, path, target_value=None): quantity_type = quantity.type - serialize: TypingCallable[[Any], Any] = str if resolve_references and isinstance(quantity_type, QuantityReference): quantity_type = quantity_type.target_quantity_def.type - is_ref = False + serialize: TypingCallable[[Any], Any] + + # define serialization functions for all valid data types + is_reference = False if isinstance(quantity_type, Reference): - is_ref = True + is_reference = True - def reference_serialize(value, path_override): + def serialize_reference(value, path_override): if resolve_references: assert not isinstance(quantity_type, QuantityReference) value = value.m_resolved() @@ -2121,41 +1835,34 @@ class MSection(metaclass=MObjectMeta): # TODO find a way to make this a subclas ref_kwargs["transform"] = lambda q, s, v, p: kwargs["transform"](q, s, v, path_override) return value.m_to_dict(**ref_kwargs) - elif isinstance(value, MProxy): + if isinstance(value, MProxy): if value.m_proxy_resolved is not None: return quantity_type.serialize(self, quantity, value) - else: - return quantity_type.serialize_proxy_value(value) + + return quantity_type.serialize_proxy_value(value) return quantity_type.serialize(self, quantity, value) - serialize = reference_serialize + serialize = serialize_reference elif isinstance(quantity_type, DataType): - def data_type_serialize(value): + def serialize_data_type(value): return quantity_type.serialize(self, quantity, value) - serialize = data_type_serialize + serialize = serialize_data_type - elif quantity_type in _primitive_types: - serialize = _primitive_types[quantity_type] + elif quantity_type in MTypes.primitive: - elif quantity_type in _types_numpy: - is_scalar = quantity.is_scalar + serialize = MTypes.primitive[quantity_type] - def serialize_dtype(value): - if isinstance(value, np.ndarray): - if is_scalar: - self.m_warning('numpy quantity has wrong shape', quantity=str(quantity)) - - return value.tolist() + elif quantity_type in MTypes.numpy: - else: - if not is_scalar: - self.m_warning('numpy quantity has wrong shape', quantity=str(quantity)) + def serialize_dtype(value): + if not (isinstance(value, np.ndarray) ^ quantity.is_scalar): + self.m_warning('numpy quantity has wrong shape', quantity=str(quantity)) - return value.item() + return value.tolist() if isinstance(value, np.ndarray) else value.item() serialize = serialize_dtype @@ -2166,16 +1873,15 @@ class MSection(metaclass=MObjectMeta): # TODO find a way to make this a subclas serialize = serialize_enum elif quantity_type == Any: - def _serialize(value: Any): + def serialize_any(value: Any): if type(value) not in [str, int, float, bool, np.bool_, list, dict, type(None)]: raise MetainfoError( - 'Only python primitives are allowed for Any typed non ' - 'virtual quantities: %s of quantity %s in section %s' % - (value, quantity, self)) + f'Only python primitives are allowed for Any typed non-virtual ' + f'quantities: {value} of quantity {quantity} in section {self}') return value - serialize = _serialize + serialize = serialize_any else: raise MetainfoError( @@ -2185,59 +1891,61 @@ class MSection(metaclass=MObjectMeta): # TODO find a way to make this a subclas if resolve_references and isinstance(quantity_type, QuantityReference): serialize_before_reference_resolution = serialize - def serialize_reference(value: Any): + def serialize_reference_v2(value: Any): value = getattr(value.m_resolved(), quantity_type.target_quantity_def.name) return serialize_before_reference_resolution(value) - serialize = serialize_reference + serialize = serialize_reference_v2 - if is_set: - value = self.__dict__[quantity.name] - elif is_derived: - try: - value = quantity.derived(self) - except Exception: - value = quantity.default - else: - value = quantity.default + # get the value to be serialized + # explicitly assigning the target value overrides the value from the section + if target_value is None: + if is_set: + target_value = self.__dict__[quantity.name] + elif is_derived: + try: + target_value = quantity.derived(self) + except Exception: + target_value = quantity.default + else: + target_value = quantity.default if transform is not None: serialize_before_transform = serialize def serialize_and_transform(value: Any, path_override=None): - if not is_ref: + if not is_reference: return transform(quantity, self, serialize_before_transform(value), path_override) return transform(quantity, self, serialize_before_transform(value, path_override), path_override) serialize = serialize_and_transform - if quantity_type in _types_numpy: - return serialize(value) + # serialization starts here + if quantity_type in MTypes.numpy: + return serialize(target_value) - elif len(quantity.shape) == 0: - if is_ref: - return serialize(value, path) - else: - return serialize(value) - elif len(quantity.shape) == 1: - if is_ref: - return [serialize(item, f"{path}/{index}") for index, item in enumerate(value)] - else: - return [serialize(item) for item in value] - else: - raise NotImplementedError(f'Higher shapes ({quantity.shape}) not supported: {quantity}') + if len(quantity.shape) == 0: + return serialize(target_value, path) if is_reference else serialize(target_value) + + if len(quantity.shape) == 1: + if not is_reference: + return [serialize(item) for item in target_value] + + return [serialize(item, f"{path}/{index}") for index, item in enumerate(target_value)] + + raise NotImplementedError(f'Higher shapes ({quantity.shape}) not supported: {quantity}') def serialize_attribute(attribute: 'Attribute', value: Any) -> Any: if isinstance(attribute.type, DataType): return attribute.type.serialize(self, None, value) - if attribute.type in _primitive_types: + if attribute.type in MTypes.primitive: if len(attribute.shape) == 0: - return _primitive_types[attribute.type](value) # type: ignore + return MTypes.primitive[attribute.type](value) # type: ignore - return [_primitive_types[attribute.type](v) for v in value] # type: ignore + return [MTypes.primitive[attribute.type](v) for v in value] # type: ignore if isinstance(attribute.type, MEnum): return str(value) @@ -2247,6 +1955,30 @@ class MSection(metaclass=MObjectMeta): # TODO find a way to make this a subclas return value + def collect_attributes(attr_map: dict, all_attr: dict): + result: dict = {} + for attr_key, attr_value in attr_map.items(): + attr_def = resolve_variadic_name(all_attr, attr_key) + result[attr_key] = serialize_attribute(attr_def, attr_value) + return result + + def serialize_full_quantity(quantity_def: 'Quantity', values: Dict[str, MQuantity]): + result: dict = {} + for m_quantity in values.values(): + m_result: dict = { + 'm_value': serialize_quantity(quantity_def, True, False, None, m_quantity.value)} + if m_quantity.unit: + m_result['m_unit'] = str(m_quantity.unit) + if m_quantity.original_unit: + m_result['m_original_unit'] = str(m_quantity.original_unit) + if m_quantity.attributes: + a_result: dict = collect_attributes(m_quantity.attributes, quantity_def.all_attributes) + if a_result: + m_result['m_attributes'] = a_result + result[m_quantity.name] = m_result + + return result + def serialize_annotation(annotation): if isinstance(annotation, Annotation): return annotation.m_to_dict() @@ -2303,9 +2035,9 @@ class MSection(metaclass=MObjectMeta): # TODO find a way to make this a subclas if with_def_id: yield 'm_def_id', self.m_def.definition_id elif self.m_parent and self.m_parent_sub_section.sub_section != self.m_def: - # The sub section definition's section def is different from our + # The subsection definition's section def is different from our # own section def. We are probably a specialized derived section - # from the base section that was used in the sub section def. To allow + # from the base section that was used in the subsection def. To allow # clients to recognize the concrete section def, we force the export # of the section def. yield 'm_def', m_def_reference() @@ -2330,17 +2062,20 @@ class MSection(metaclass=MObjectMeta): # TODO find a way to make this a subclas if not include_defaults or not quantity.m_is_set(Quantity.default): continue - yield name, serialize_quantity(quantity, is_set, False, path) - - for attribute in quantity.all_attributes.values(): - key = attribute.section_key - if key in self.__dict__: - yield key, serialize_attribute(attribute, self.__dict__[key]) + if not quantity.use_full_storage: + yield name, serialize_quantity(quantity, is_set, False, path) + else: + yield name, serialize_full_quantity(quantity, self.__dict__[quantity.name]) except ValueError as e: raise ValueError(f'Value error ({str(e)}) for {quantity}') - # sub sections + # section attributes + if 'm_attributes' in self.__dict__: + yield 'm_attributes', collect_attributes( + self.__dict__['m_attributes'], self.m_def.all_attributes) + + # subsections for name, sub_section_def in self.m_def.all_sub_sections.items(): if exclude(sub_section_def, self): continue @@ -2358,14 +2093,37 @@ class MSection(metaclass=MObjectMeta): # TODO find a way to make this a subclas is_set = True yield name, sub_section.m_to_dict(**kwargs) - if is_set: - for attribute in sub_section_def.all_attributes.values(): - key = attribute.section_key - if key in self.__dict__: - yield key, serialize_attribute(attribute, self.__dict__[key]) + # attributes are disabled for subsections + # if is_set: + # yield from collect_attributes(sub_section_def.all_attributes) return {key: value for key, value in items()} + @staticmethod + def __deserialize(section: 'MSection', quantity_def: 'Quantity', quantity_value: Any): + tgt_type = quantity_def.type + + if tgt_type in MTypes.numpy: + if not isinstance(quantity_value, list): + return tgt_type(quantity_value) + + return np.asarray(quantity_value).astype(tgt_type) + + if isinstance(tgt_type, DataType): + def __type_specific_deserialize(v): + return tgt_type.deserialize(section, quantity_def, v) + + dimensions = len(quantity_def.shape) + + if dimensions == 0: + return __type_specific_deserialize(quantity_value) + if dimensions == 1: + return list(__type_specific_deserialize(item) for item in quantity_value) + + raise MetainfoError('Only numpy quantities can have more than 1 dimension.') + + return quantity_value + def m_update_from_dict(self, dct: Dict[str, Any]) -> None: ''' Updates this section with the serialized data from the given dict, e.g. data @@ -2382,17 +2140,14 @@ class MSection(metaclass=MObjectMeta): # TODO find a way to make this a subclas if isinstance(property_def, SubSection): sub_section_def = property_def sub_section_value = dct.get(name) + sub_section_cls = sub_section_def.sub_section.section_cls if sub_section_def.repeats: for sub_section_dct in sub_section_value: - if sub_section_dct is None: - sub_section = None - else: - sub_section = sub_section_def.sub_section.section_cls.m_from_dict( - sub_section_dct, m_parent=self, m_context=m_context) + sub_section = None if sub_section_dct is None else sub_section_cls.m_from_dict( + sub_section_dct, m_parent=self, m_context=m_context) section.m_add_sub_section(sub_section_def, sub_section) - else: - sub_section = sub_section_def.sub_section.section_cls.m_from_dict( + sub_section = sub_section_cls.m_from_dict( sub_section_value, m_parent=self, m_context=m_context) section.m_add_sub_section(sub_section_def, sub_section) @@ -2400,34 +2155,36 @@ class MSection(metaclass=MObjectMeta): # TODO find a way to make this a subclas quantity_def = property_def quantity_value = dct[name] - if quantity_def.type in _types_numpy: - quantity_value = np.asarray(quantity_value) - - if isinstance(quantity_def.type, DataType): - dimensions = len(quantity_def.shape) - if dimensions == 0: - quantity_value = quantity_def.type.deserialize( - section, quantity_def, quantity_value) - elif dimensions == 1: - quantity_value = list( - quantity_def.type.deserialize(section, quantity_def, item) - for item in quantity_value) - else: - raise MetainfoError( - 'Only numpy quantities can have more than 1 dimension.') + if quantity_def.use_full_storage: + if not isinstance(quantity_value, dict): + raise MetainfoError('Full storage quantity must be a dict') - section.__dict__[property_def.name] = quantity_value # type: ignore + for each_name, each_quantity in quantity_value.items(): + try: + m_value = self.__deserialize(section, quantity_def, each_quantity['m_value']) + except KeyError: + raise MetainfoError(f'Set full storage quantity {property_def} must have a value') + m_quantity = MQuantity(each_name, m_value) + if 'm_unit' in each_quantity: + m_quantity.unit = units.parse_units(each_quantity['m_unit']) + if 'm_original_unit' in each_quantity: + m_quantity.original_unit = units.parse_units(each_quantity['m_original_unit']) + if 'm_attributes' in each_quantity: + m_quantity.attributes = each_quantity['m_attributes'] + + section.m_set(quantity_def, m_quantity) + else: + section.__dict__[property_def.name] = self.__deserialize(section, quantity_def, quantity_value) - for attribute in property_def.all_attributes.values(): - key = attribute.section_key - if key in dct: - section.m_set_attribute(property_def, attribute.name, dct[key]) + if 'm_attributes' in dct: + for attr_key, attr_value in dct['m_attributes'].items(): + section.m_set_section_attribute(attr_key, attr_value) @classmethod def m_from_dict(cls: Type[MSectionBound], data: Dict[str, Any], **kwargs) -> MSectionBound: ''' Creates a section from the given serializable data dictionary. - This is the 'opposite' of :func:`m_to_dict`. It takes a deserialized dict, e.g + This is the 'opposite' of :func:`m_to_dict`. It takes a deserialized dict, e.g. loaded from JSON, and turns it into a proper section, i.e. instance of the given section class. ''' @@ -2435,11 +2192,11 @@ class MSection(metaclass=MObjectMeta): # TODO find a way to make this a subclas @staticmethod def from_dict( - dct: Dict[str, Any], - cls: Type[MSectionBound] = None, - m_parent: 'MSection' = None, - m_context: 'Context' = None, - **kwargs + dct: Dict[str, Any], + cls: Type[MSectionBound] = None, + m_parent: 'MSection' = None, + m_context: 'Context' = None, + **kwargs ) -> MSectionBound: ''' Creates a section from the given serializable data dictionary. @@ -2469,7 +2226,7 @@ class MSection(metaclass=MObjectMeta): # TODO find a way to make this a subclas m_def_proxy.m_proxy_context = m_context cls = m_def_proxy.section_cls - # if 'm_def_id' exist, check if id matches + # if 'm_def_id' exists, check if id matches # in case of mismatch, retrieve the Package and use the corresponding section definition if 'm_def_id' in dct: if cls is None or cls.m_def is None or dct['m_def_id'] != cls.m_def.definition_id: @@ -2489,11 +2246,11 @@ class MSection(metaclass=MObjectMeta): # TODO find a way to make this a subclas section.m_parent = m_parent if 'm_annotations' in dct: - if isinstance(dct['m_annotations'], dict): - section.m_annotations.update(dct['m_annotations']) - else: + m_annotations = dct['m_annotations'] + if not isinstance(m_annotations, dict): raise MetainfoError( - f'The provided m_annotations is of a wrong type. {type(dct["m_annotations"]).__name__} was provided.') + f'The provided m_annotations is of a wrong type. {type(m_annotations).__name__} was provided.') + section.m_annotations.update(m_annotations) section.m_update_from_dict(dct) return section @@ -2506,7 +2263,7 @@ class MSection(metaclass=MObjectMeta): # TODO find a way to make this a subclas self, depth_first: bool = False, include_self: bool = False, stop: TypingCallable[['MSection'], bool] = None) -> Iterable['MSection']: ''' - Returns an iterable over all sub and sub subs sections. + Returns an iterable over all sub and sub subsections. Arguments: depth_first: A boolean indicating that children should be returned before @@ -2677,19 +2434,18 @@ class MSection(metaclass=MObjectMeta): # TODO find a way to make this a subclas section = section.m_get_sub_section(prop_def, index) except Exception: raise MetainfoReferenceError( - f'Could not resolve {path}, there is no sub section for ' + f'Could not resolve {path}, there is no subsection for ' f'{prop_name} at {index}') else: section = section.m_get_sub_section(prop_def, -1) if section is None: raise MetainfoReferenceError( - f'Could not resolve {path}, there is no sub section {prop_name}') + f'Could not resolve {path}, there is no subsection {prop_name}') elif isinstance(prop_def, Quantity): if len(path_stack) > 0: - raise MetainfoReferenceError( - f'Could not resolve {path}, no property {prop_name}') + raise MetainfoReferenceError(f'Could not resolve {path}, no property {prop_name}') if not section.m_is_set(prop_def): raise MetainfoReferenceError( @@ -2708,7 +2464,7 @@ class MSection(metaclass=MObjectMeta): # TODO find a way to make this a subclas case the annotation is returned, regardless of its type. In the second case, all names and list for names are iterated and all annotations of the given class are returned. - default: The default, if no annotation is found. None is the default default. + default: The default, if no annotation is found. None is the default `default`. as_list: Returns a list, no matter how many annotations have been found. ''' if isinstance(key, str): @@ -2739,28 +2495,6 @@ class MSection(metaclass=MObjectMeta): # TODO find a way to make this a subclas raise TypeError('Key must be str or annotation class.') - def __validate_shape(self, quantity_def: 'Quantity', value): - if quantity_def == Quantity.default: - return True - - quantity_shape = quantity_def.shape - - if type(value) == np.ndarray: - value_shape = value.shape - elif isinstance(value, list) and not isinstance(value, MEnum): - value_shape = [len(value)] - else: - value_shape = [] - - if len(value_shape) != len(quantity_shape): - return False - - for i in range(0, len(value_shape)): - if not _Dimension.check_dimension(self, quantity_shape[i], value_shape[i]): - return False - - return True - def m_validate(self) -> Tuple[List[str], List[str]]: ''' Evaluates all constraints and shapes of this section and returns a list of errors. ''' errors: List[str] = [] @@ -2783,10 +2517,9 @@ class MSection(metaclass=MObjectMeta): # TODO find a way to make this a subclas errors.append(error_str) for quantity in self.m_def.all_quantities.values(): - if self.m_is_set(quantity) and not quantity.derived: - if not self.__validate_shape(quantity, self.m_get(quantity)): - errors.append( - f'The shape of quantity {quantity} does not match its value.') + if self.m_is_set(quantity) and not quantity.derived and quantity != Quantity.default: + if not validate_shape(self, quantity, self.m_get(quantity)): + errors.append(f'The shape of quantity {quantity} does not match its value.') return errors, warnings @@ -2847,20 +2580,17 @@ class MSection(metaclass=MObjectMeta): # TODO find a way to make this a subclas # name = self.m_get(name_quantity_def) try: name = self.__dict__['name'] - main = '%s:%s' % (name, m_section_name) + main = f'{name}:{m_section_name}' except KeyError: main = m_section_name more = '' - props = [ - prop - for prop in self.m_def.all_properties - if prop in self.__dict__] + props = [prop for prop in self.m_def.all_properties if prop in self.__dict__] if len(props) > 10: - more = ', +%d more properties' % (len(props) - 10) + more = f', +{len(props) - 10:d} more properties' - return '%s(%s%s)' % (main, ', '.join(props[0:10]), more) + return f'{main}({", ".join(props[0:10])}{more})' def __getitem__(self, key): try: @@ -2898,80 +2628,11 @@ class MSection(metaclass=MObjectMeta): # TODO find a way to make this a subclas metainfo_section.m_xpath('sccs[?energy_total < `1.0E-23`].system') ''' - def to_dict(entries): - if not isinstance(entries, list): - try: - entries = entries.m_to_dict() - except Exception: - pass - return entries - else: - return [to_dict(entry) for entry in entries] - - result = jmespath.search(expression, self) - return to_dict(result) - -# TODO implement and test the NotImplementErrors -class MSubSectionList(list): - def __init__(self, section: 'MSection', sub_section_def: 'SubSection'): - self.section = section - self.sub_section_def = sub_section_def - super().__init__() - - def __setitem__(self, key, value): - raise NotImplementedError('You can only append sub-sections.') - - def __delitem__(self, key): - old_value = self[key] - list.__delitem__(self, key) - for index in range(key, len(self)): - self[index].m_parent_index = index - - self.section._on_remove_sub_section(self.sub_section_def, old_value) - - def __setslice__(self, i, j, sequence): - raise NotImplementedError('You can only append sub-sections.') - - def __delslice__(self, i, j): - raise NotImplementedError('You can only append sub-sections.') - - def append(self, value): - list.append(self, value) - if value is not None: - self.section._on_add_sub_section(self.sub_section_def, value, len(self) - 1) - - def pop(self): - raise NotImplementedError('You can only append sub-sections.') - - def extend(self, newvalue): - start_index = len(self) - list.extend(self, newvalue) - for index, value in enumerate(newvalue): - self.section._on_add_sub_section( - self.sub_section_def, value, start_index + index) - - def insert(self, i, element): - raise NotImplementedError('You can only append sub-sections.') - - def remove(self, element): - raise NotImplementedError('You can only append sub-sections.') - - def reverse(self): - raise NotImplementedError('You can only append sub-sections.') - - def sort(self, cmpfunc=None): - raise NotImplementedError('You can only append sub-sections.') - - def clear(self): - old_values = list(self) - list.clear(self) - for old_value in old_values: - self.section._on_remove_sub_section(self.sub_section_def, old_value) + return to_dict(jmespath.search(expression, self)) class MCategory(metaclass=MObjectMeta): - m_def: 'Category' = None @classmethod @@ -3000,7 +2661,7 @@ class Definition(MSection): ''' :class:`Definition` is the common base class for all metainfo definitions. - All metainfo `definitions` (sections, quantities, sub-sections, packages, ...) share + All metainfo `definitions` (sections, quantities, subsections, packages, ...) share some common properties. Attributes: @@ -3016,22 +2677,22 @@ class Definition(MSection): definitions* (i.e. section definitions are represented by Python classes), lower case `snake_case` identifier for variables that hold *sections*, and for *properties* (i.e. fields in a Python class) we typically use lower - case `snake_case` identifier. Sub-sections are often prefixed with ``section_`` - to clearly separate sub-sections from quantities. + case `snake_case` identifier. Subsections are often prefixed with ``section_`` + to clearly separate subsections from quantities. Generally, you do not have to set this attribute manually, it will be derived from Python identifiers automatically. - description: The description can be an arbitrary human readable text that explains + description: The description can be an arbitrary human-readable text that explains what a definition is about. For section definitions you do not have to set this manually as it will be derived from the classes doc string. Quantity and - sub-section descriptions can also be taken from the containing section class' + subsection descriptions can also be taken from the containing section class' doc-string ``Attributes:`` section. links: Each definition can be accompanied by a list of URLs. These should point to resources that further explain the definition. - aliases: A list of alternative names. For quantities and sub-sections these + aliases: A list of alternative names. For quantities and subsections these can be used to access the respective property with a different name from its containing section. @@ -3059,6 +2720,14 @@ class Definition(MSection): more: A dictionary that contains additional definition properties that are not part of the metainfo. Those can be passed as additional kwargs to definition constructors. The values must be JSON serializable. + + attributes: + The attributes that can further qualify property values. + + all_attributes: + A virtual convenient property that provides all attributes as a dictionary + from attribute name to attribute. This includes meta attributes (starting with m_) + that are defined for all properties of the same kind (sub_section or quantity). ''' name: 'Quantity' = _placeholder_quantity @@ -3070,8 +2739,12 @@ class Definition(MSection): variable: 'Quantity' = _placeholder_quantity more: 'Quantity' = _placeholder_quantity + attributes: 'SubSection' = None # type: ignore + + all_attributes: 'Quantity' = _placeholder_quantity + # store the hash object generated - _cached_hash: 'hashlib._Hash' = None # type: ignore + _cached_hash: _HASH_OBJ = None # type: ignore def __init__(self, *args, **kwargs): if is_bootstrapping: @@ -3089,7 +2762,9 @@ class Definition(MSection): more[key] = value super().__init__(*args, **new_kwargs) - self.more = more + self.more = more # type: ignore + + self._cached_hash = None # type: ignore def __init_metainfo__(self): ''' @@ -3161,14 +2836,19 @@ class Definition(MSection): return seed - def _hash(self, regenerate=False) -> 'hashlib._Hash': + def _hash(self, regenerate=False) -> _HASH_OBJ: ''' Generates a hash object based on the unique representation of the definition. ''' if self._cached_hash is None or regenerate: - self._cached_hash = _default_hash() + self._cached_hash = default_hash() self._cached_hash.update(self._hash_seed().encode('utf-8')) + if self.attributes: + for item in self.attributes: # pylint: disable=not-an-iterable + if id(self) != id(item): + self._cached_hash.update(item._hash(regenerate).digest()) + return self._cached_hash @property @@ -3183,7 +2863,7 @@ class Definition(MSection): class Attribute(Definition): ''' - Attributes can be used to qualify all properties (sub sections and quantities) + Attributes can be used to qualify all properties (subsections and quantities) with simple scalar values. Attributes: @@ -3195,19 +2875,14 @@ class Attribute(Definition): shape: 'Quantity' = _placeholder_quantity def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - - @property - def section_key(self): - return f'{cast(Property, self.m_parent).name}@{self.name}' + super(Attribute, self).__init__(*args, **kwargs) @constraint(warning=False) - def type_is_primitive(self): - value = self.type - if value in _primitive_types or value in _types_num: + def is_primitive(self): + if self.type in MTypes.primitive or self.type in MTypes.num: return - if isinstance(value, (MEnum, np.dtype, _Datetime)): + if isinstance(self.type, (MEnum, np.dtype, _Datetime)): return assert False, 'Attributes must have primitive type.' @@ -3219,32 +2894,16 @@ class Attribute(Definition): type_id['type_data'].sort() seed += json.dumps(type_id) for dim in self.shape: - seed += dim if isinstance(dim, str) else str(dim) + seed += str(dim) return seed class Property(Definition): ''' - A common base-class for section properties: sub sections and quantities. - - Attributes: - attributes: - The attributes that can further qualify property values. - all_attributes: - A virtual convenient property that provides all attributes as a dictionary - from attribute name to attribute. This includes meta attributes (starting with m_) - that are defined for all properties of the same kind (sub_section or quantity). - An example for a meta attribute is the quantity attribute `m_source_unit`, - which can be set on all quantities without defining it on each quantity - separately. + A common base-class for section properties: subsections and quantities. ''' - m_attributes: List['Attribute'] = [] - - all_attributes: 'Quantity' = _placeholder_quantity - attributes: 'SubSection' = None - - def get_from_dict(self, data: Dict[str, Any], default_value: Any = None) -> Tuple[str, Any]: + def get_from_dict(self, data: Dict[str, Any], default_value: Any = None) -> Tuple[Optional[str], Any]: ''' Attempts to read the property from a dict. Returns the used alias and value as tuple. @@ -3254,7 +2913,7 @@ class Property(Definition): return name, data[name] return None, default_value - def get_base_property(self) -> 'Property': + def get_base_property(self) -> Optional['Property']: ''' Retrieve a potential overwritten property from a base-class. ''' @@ -3269,15 +2928,6 @@ class Property(Definition): return None - def _hash_seed(self) -> str: - seed = super(Property, self)._hash_seed() - - if self.attributes: - for item in self.attributes: # pylint: disable=not-an-iterable - seed += item._hash_seed() - - return seed - class Quantity(Property): ''' @@ -3386,7 +3036,7 @@ class Quantity(Property): virtual: A boolean that determines if this quantity is virtual. Virtual quantities can - be get/set like regular quantities, but their values are not (de-)serialized, + be got/set like regular quantities, but their values are not (de-)serialized, hence never permanently stored. ''' @@ -3398,7 +3048,11 @@ class Quantity(Property): derived: 'Quantity' = _placeholder_quantity cached: 'Quantity' = _placeholder_quantity virtual: 'Quantity' = _placeholder_quantity + is_scalar: 'Quantity' = _placeholder_quantity + repeats: 'Quantity' = _placeholder_quantity + use_full_storage: 'Quantity' = _placeholder_quantity + flexible_unit: 'Quantity' = _placeholder_quantity # TODO derived_from = Quantity(type=Quantity, shape=['0..*']) def __init__(self, *args, **kwargs): @@ -3408,14 +3062,14 @@ class Quantity(Property): super().__init_metainfo__() if self.derived is not None: - self.virtual = True + self.virtual = True # type: ignore # replace the quantity implementation with an optimized version for the most # primitive quantities if applicable - is_primitive = not self.derived + is_primitive = not self.derived and not self.use_full_storage is_primitive = is_primitive and len(self.shape) <= 1 is_primitive = is_primitive and self.type in [str, bool, float, int] - is_primitive = is_primitive and self.type not in _types_num_numpy + is_primitive = is_primitive and self.type not in MTypes.num_numpy if is_primitive: self._default = self.default self._name = self.name @@ -3423,12 +3077,19 @@ class Quantity(Property): self._list = len(self.shape) == 1 self.__class__ = PrimitiveQuantity - _Unit.check_dimensionality(self, self.unit) + check_dimensionality(self, self.unit) def __get__(self, obj, cls): try: value = obj.__dict__[self.name] - + # appears to be a quantity using full storage + # cannot use .use_full_storage as this is not set yet + if isinstance(value, dict) and self.name in value: + m_quantity = value[self.name] + if m_quantity.unit: + value = units.Quantity(m_quantity.value, m_quantity.unit) + else: + value = m_quantity.value except KeyError: if self.derived is not None: try: @@ -3438,10 +3099,10 @@ class Quantity(Property): cached[0] = obj.m_mod_count cached[1] = self.derived(obj) # pylint: disable=not-callable return cached[1] - else: - return self.derived(obj) # pylint: disable=not-callable + + return self.derived(obj) # pylint: disable=not-callable except Exception as e: - raise DeriveError('Could not derive value for %s: %s' % (self, str(e))) + raise DeriveError(f'Could not derive value for {self}: {str(e)}') value = self.default @@ -3464,8 +3125,12 @@ class Quantity(Property): raise MetainfoError( 'Only numpy arrays and dtypes can be used for higher dimensional quantities.') - if self.unit is not None and self.type in _types_num: - value = value * self.unit + # no need to append unit if it is already a quantity from full storage + if isinstance(value, units.Quantity): + return value + + if self.unit is not None and self.type in MTypes.num: + return value * self.unit return value @@ -3513,7 +3178,7 @@ class Quantity(Property): @constraint(warning=True) def higher_shapes_require_dtype(self): if len(self.shape) > 1: - assert self.type in _types_numpy, \ + assert self.type in MTypes.numpy, \ f'Higher dimensional quantities ({self}) need a dtype and will be treated as numpy arrays.' def _hash_seed(self) -> str: @@ -3597,6 +3262,7 @@ class DirectQuantity(Quantity): class PrimitiveQuantity(Quantity): ''' An optimized replacement for Quantity suitable for primitive properties. ''' + def __get__(self, obj, cls): try: value = obj.__dict__[self._name] @@ -3604,8 +3270,8 @@ class PrimitiveQuantity(Quantity): value = self._default except AttributeError: return self - if value is not None and self.unit is not None and self.type in _types_num: - value = value * self.unit + if value is not None and self.unit is not None and self.type in MTypes.num: + return value * self.unit # type: ignore return value def __set__(self, obj, value): @@ -3618,16 +3284,13 @@ class PrimitiveQuantity(Quantity): # Handle pint quantities. Conversion is done automatically between # units. Notice that currently converting from float to int or vice # versa is not allowed for primitive types. - if isinstance(value, pint.quantity._Quantity): + if isinstance(value, pint.Quantity): if self.unit is None: - raise TypeError( - f'The quantity {self} does not have a unit, but value {value} has.' - ) - if self.type in _types_int: + raise TypeError(f'The quantity {self} does not have a unit, but value {value} has.') + if self.type in MTypes.int: raise TypeError( f'Cannot save data with unit conversion into the quantity {self} ' - 'with integer data type due to possible precision loss.' - ) + 'with integer data type due to possible precision loss.') value = value.to(self.unit).magnitude if self._list: @@ -3635,8 +3298,7 @@ class PrimitiveQuantity(Quantity): if hasattr(value, 'tolist'): value = value.tolist() else: - raise TypeError( - f'The value {value} for quantity {self} has not shape {self.shape}') + raise TypeError(f'The value {value} for quantity {self} has not shape {self.shape}') if any(v is not None and type(v) != self._type for v in value): raise TypeError( @@ -3654,22 +3316,22 @@ class PrimitiveQuantity(Quantity): class SubSection(Property): ''' - Like quantities, sub-sections are defined in a `section class` as attributes - of this class. An like quantities, each sub-section definition becomes a property of - the corresponding `section definition` (parent). A sub-section definition references - another `section definition` as the sub-section (child). As a consequence, parent - `section instances` can contain child `section instances` as sub-sections. + Like quantities, subsections are defined in a `section class` as attributes + of this class. Unlike quantities, each subsection definition becomes a property of + the corresponding `section definition` (parent). A subsection definition references + another `section definition` as the subsection (child). As a consequence, parent + `section instances` can contain child `section instances` as subsections. - Contrary to the old NOMAD metainfo, we distinguish between sub-section the section - and sub-section the property. This allows to use on child `section definition` as - sub-section of many different parent `section definitions`. + Contrary to the old NOMAD metainfo, we distinguish between subsection the section + and subsection the property. This allows to use on child `section definition` as + subsection of many parent `section definitions`. Attributes: sub_section: A :class:`Section` or Python class object for a `section class`. This will be the child `section definition`. The defining section the child `section definition`. - repeats: A boolean that determines whether this sub-section can appear multiple + repeats: A boolean that determines whether this subsection can appear multiple times in the parent section. ''' @@ -3702,7 +3364,7 @@ class SubSection(Property): if value is not None: raise NotImplementedError( - 'Cannot set a repeating sub section directly, modify the list, e.a. via append.') + 'Cannot set a repeating subsection directly, modify the list, e.a. via append.') obj.m_get_sub_sections(self).clear() @@ -3713,25 +3375,24 @@ class SubSection(Property): obj.m_add_sub_section(self, value) def __delete__(self, obj): - raise NotImplementedError('Deleting sub sections is not supported.') + raise NotImplementedError('Deleting subsections is not supported.') @constraint(warning=False) def has_sub_section(self): assert self.sub_section is not None, \ - 'Each sub section must define the section that is used as sub section via the "sub_section" quantity' + 'Each subsection must define the section that is used as subsection via the "sub_section" quantity' try: assert not isinstance(self.sub_section.m_resolved(), MProxy), 'Cannot resolve "sub_section"' except MetainfoReferenceError as e: assert False, f'Cannot resolve "sub_section": {str(e)}' - def _hash(self, regenerate=False) -> 'hashlib._Hash': + def _hash(self, regenerate=False) -> _HASH_OBJ: if self._cached_hash is not None and not regenerate: return self._cached_hash - base_id = f'{super(SubSection, self)._hash_seed()}{"T" if self.repeats else "F"}' + self._cached_hash = super(SubSection, self)._hash(regenerate) - self._cached_hash = _default_hash() - self._cached_hash.update(base_id.encode('utf-8')) + self._cached_hash.update(('T' if self.repeats else 'F').encode('utf-8')) for item in itertools.chain( self.sub_section.quantities, @@ -3773,20 +3434,20 @@ class Section(Definition): Will be automatically set from the `section class`. sub_sections: - The sub-section definitions of this section definition as list of :class:`SubSection`. + The subsection definitions of this section definition as list of :class:`SubSection`. Will be automatically set from the `section class`. base_sections: A list of `section definitions` (:class:`Section`). By default this definition will - inherit all quantity and sub section definitions from the given section definitions. + inherit all quantity and subsection definitions from the given section definitions. This behavior might be altered with ``extends_base_section``. If there are no base sections to define, you have to use :class:`MSection`. - The Metainfo supports two inheritance mechanism. By default it behaves like regular + The Metainfo supports two inheritance mechanism. By default, it behaves like regular Python inheritance and the class inherits all its base classes' properties. The other mode (enabled via ``extends_base_section=True``), will - add all sub-class properties to the base-class. This is used throughout the NOMAD metainfo + add all subclass properties to the base-class. This is used throughout the NOMAD metainfo to add code-specific metadata to common section definitions. Here is an example: .. code-block:: python @@ -3799,18 +3460,18 @@ class Section(Definition): x_vasp_some_incar_parameter = Quantity(str) method = Method() - methid.x_vasp_same_incar_parameter = 'value' + method.x_vasp_same_incar_parameter = 'value' In this example, the section class ``VASPMethod`` defines a section definition that inherits from section definition ``Method``. The quantity `x_vasp_some_incar_parameter` will be added to `Method` and can be used in regular `Method` instances. - The following :class:`Section` attributes maniputlate the inheritance semantics: + The following :class:`Section` attributes manipulate the inheritance semantics: Attributes: extends_base_section: If True, this definition must have exactly one ``base_sections``. - Instead of inheriting properties, the quantity and sub-section definitions + Instead of inheriting properties, the quantity and subsection definitions of this section will be added to the base section. This allows to add further properties to an existing section definition. @@ -3825,11 +3486,11 @@ class Section(Definition): inheriting_sections: A list of `section definitions` (:class:`Section`). These are those sections - that inherit (i.e. are sub classes) of this section. + that inherit (i.e. are subclasses) of this section. - Besides defining quantities and sub-sections, a section definition can also provide - constraints that are used to validate a section and its quantities and sub-sections. + Besides defining quantities and subsections, a section definition can also provide + constraints that are used to validate a section and its quantities and subsections. Constraints allow to define more specific data structures beyond types and shapes. But constraints are not enforced automatically, sections have to be explicitly validated in order to evaluate constraints. @@ -3853,7 +3514,7 @@ class Section(Definition): Constraints are rules that a section must fulfil to be valid. This allows to implement semantic checks that go behind mere type or shape checks. This quantity takes the names of constraints as string. Constraints have to be implemented as methods - with the :func:`constraint` decorator. They can raise :class:`ConstraintVialated` + with the :func:`constraint` decorator. They can raise :class:`ConstraintViolated` or an AssertionError to indicate that the constraint is not fulfilled for the ``self`` section. This quantity will be set automatically from all constraint methods in the respective section class. To run validation of a section use :py:meth:`MSection.m_validate`. @@ -3876,7 +3537,7 @@ class Section(Definition): A helper attribute that gives direct and indirect inheriting sections. all_properties: - A helper attribute that gives all properties (sub section and quantity) definitions + A helper attribute that gives all properties (subsection and quantity) definitions including inherited properties and properties from extending sections as a dictionary with names and definitions. @@ -3886,12 +3547,12 @@ class Section(Definition): to :class:`Quantity`. all_sub_sections: - A helper attribute that gives all sub-section definition including inherited ones + A helper attribute that gives all subsection definition including inherited ones and ones from extending sections as a dictionary that maps names (strings) to :class:`SubSection`. all_sub_sections_by_section: - A helper attribute that gives all sub-section definition including inherited ones + A helper attribute that gives all subsection definition including inherited ones and ones from extending sections as a dictionary that maps section classes (i.e. Python class objects) to lists of :class:`SubSection`. @@ -3905,7 +3566,7 @@ class Section(Definition): their aliases by name. path: Shortest path from a root section to this section. This is not the path - in the metainfo schema (`m_path`) but a archive path in potential data. + in the metainfo schema (`m_path`) but an archive path in potential data. event_handlers: Event handler are functions that get called when the section data is changed. @@ -3948,7 +3609,7 @@ class Section(Definition): path: 'Quantity' = _placeholder_quantity def __init__(self, *args, validate: bool = True, **kwargs): - self._section_cls: Type[MSection] = None + self._section_cls: Type[MSection] = None # type: ignore super().__init__(*args, **kwargs) self.validate = validate @@ -3967,9 +3628,7 @@ class Section(Definition): # Create a section class if this does not exist. This happens if the section # is not created through a class definition. - attrs = { - prop.name: prop - for prop in self.quantities + self.sub_sections} + attrs = {prop.name: prop for prop in itertools.chain(self.quantities, self.sub_sections)} for name, inner_section_def in self.all_inner_section_definitions.items(): attrs[name] = inner_section_def.section_cls @@ -3991,11 +3650,11 @@ class Section(Definition): base_sections_count = len(self.base_sections) if base_sections_count == 0: raise MetainfoError( - 'Section %s extend the base section, but has no base section.' % self) + f'Section {self} extend the base section, but has no base section.') if base_sections_count > 1: raise MetainfoError( - 'Section %s extend the base section, but has more than one base section' % self) + f'Section {self} extend the base section, but has more than one base section.') base_section = self.base_sections[0] for name, attr in self.section_cls.__dict__.items(): @@ -4015,7 +3674,7 @@ class Section(Definition): for base_section in self.all_base_sections: inherited_properties.update(**base_section.all_properties) - for property in self.quantities + self.sub_sections: + for property in itertools.chain(self.quantities, self.sub_sections): inherited_property = inherited_properties.get(property.name) if inherited_property is None: continue @@ -4028,93 +3687,72 @@ class Section(Definition): def unique_names(self): names: Set[str] = set() for base in self.extending_sections: - for quantity in base.quantities + base.sub_sections: + for quantity in itertools.chain(base.quantities, base.sub_sections): for alias in quantity.aliases: names.add(alias) names.add(quantity.name) - for def_list in [self.quantities, self.sub_sections]: - for definition in def_list: - assert definition.name not in names, \ - f'All names in a section must be unique. ' \ - f'Name {definition.name} of {definition} in {definition.m_parent} already exists in {self}.' - names.add(definition.name) - for alias in definition.aliases: - assert alias not in names, \ - f'All names (incl. aliases) in a section must be unique. ' \ - f'Alias {alias} of {definition} in {definition.m_parent} already exists in {self}.' - names.add(alias) + for definition in itertools.chain(self.quantities, self.sub_sections): + assert definition.name not in names, \ + f'All names in a section must be unique. ' \ + f'Name {definition.name} of {definition} in {definition.m_parent} already exists in {self}.' + names.add(definition.name) + for alias in definition.aliases: + assert alias not in names, \ + f'All names (incl. aliases) in a section must be unique. ' \ + f'Alias {alias} of {definition} in {definition.m_parent} already exists in {self}.' + names.add(alias) @constraint def compatible_eln_annotation(self): def assert_component(component_name, quantity_name, quantity_type, accepted_components): assert component_name in accepted_components, \ - 'The component `%s` is not compatible with the quantity `%s` of the type `%s`. Accepted components: %s.' \ - % (component_name, quantity_name, quantity_type, ', '.join(accepted_components)) - - for def_list in [self.quantities, self.sub_sections]: - for definition in def_list: - if definition.m_annotations and 'eln' in definition.m_annotations \ - and definition.m_annotations['eln'] and 'component' in definition.m_annotations['eln']: - component = definition.m_annotations['eln']['component'] - if component: - if isinstance(definition.type, type): - if definition.type.__name__ == 'str': - assert_component( - component, definition.name, definition.type.__name__, - validElnComponents['str'] - ) - elif definition.type.__name__ == 'bool': - assert_component( - component, definition.name, definition.type.__name__, validElnComponents['bool'] - ) - elif definition.type in _types_num_python: - assert_component( - component, definition.name, definition.type.__name__, - validElnComponents['number'] - ) - elif definition.type in _types_num_numpy: - assert_component( - component, definition.name, f'np.{definition.type.__name__}', - validElnComponents['number'] - ) - elif definition.type.__name__ == 'User': - assert_component( - component, definition.name, definition.type.__name__, - validElnComponents['user'] - ) - elif definition.type.__name__ == 'Author': - assert_component( - component, definition.name, definition.type.__name__, - validElnComponents['author'] - ) - elif isinstance(definition.type, _Datetime): - assert_component( - component, definition.name, type(definition.type).__name__, - validElnComponents['datetime'] - ) - elif isinstance(definition.type, MEnum): - assert_component( - component, definition.name, type(definition.type).__name__, - validElnComponents['enum'] - ) - elif isinstance(definition.type, Reference): - target_class = definition.type.target_section_def.section_cls - if target_class.__name__ == 'User': - assert_component( - component, definition.name, target_class.__name__, - validElnComponents['user'] - ) - elif target_class.__name__ == 'Author': - assert_component( - component, definition.name, target_class.__name__, - validElnComponents['author'] - ) - else: - assert_component( - component, definition.name, type(definition.type).__name__, - validElnComponents['reference'] - ) + f'The component `{component_name}` is not compatible with the quantity `{quantity_name}` ' \ + f'of the type `{quantity_type}`. Accepted components: {", ".join(accepted_components)}.' + + for definition in itertools.chain(self.quantities, self.sub_sections): + if not definition.m_annotations or 'eln' not in definition.m_annotations or not \ + definition.m_annotations['eln'] or 'component' not in definition.m_annotations['eln']: + continue + component = definition.m_annotations['eln']['component'] + if not component: + continue + if isinstance(definition.type, type): + if definition.type.__name__ == 'str': + assert_component( + component, definition.name, definition.type.__name__, MTypes.eln_component['str']) + elif definition.type.__name__ == 'bool': + assert_component( + component, definition.name, definition.type.__name__, MTypes.eln_component['bool']) + elif definition.type in MTypes.num_python: + assert_component( + component, definition.name, definition.type.__name__, MTypes.eln_component['number']) + elif definition.type in MTypes.num_numpy: + assert_component( + component, definition.name, f'np.{definition.type.__name__}', MTypes.eln_component['number']) + elif definition.type.__name__ == 'User': + assert_component( + component, definition.name, definition.type.__name__, MTypes.eln_component['user']) + elif definition.type.__name__ == 'Author': + assert_component( + component, definition.name, definition.type.__name__, MTypes.eln_component['author']) + elif isinstance(definition.type, _Datetime): + assert_component( + component, definition.name, type(definition.type).__name__, MTypes.eln_component['datetime']) + elif isinstance(definition.type, MEnum): + assert_component( + component, definition.name, type(definition.type).__name__, MTypes.eln_component['enum']) + elif isinstance(definition.type, Reference): + target_class = definition.type.target_section_def.section_cls + if target_class.__name__ == 'User': + assert_component( + component, definition.name, target_class.__name__, MTypes.eln_component['user']) + elif target_class.__name__ == 'Author': + assert_component( + component, definition.name, target_class.__name__, MTypes.eln_component['author']) + else: + assert_component( + component, definition.name, type(definition.type).__name__, MTypes.eln_component['reference']) @constraint def resolved_base_sections(self): @@ -4154,7 +3792,7 @@ class Section(Definition): return super(Section, cls).m_from_dict(data, **kwargs) - def _hash(self, regenerate=False) -> 'hashlib._Hash': + def _hash(self, regenerate=False) -> _HASH_OBJ: if self._cached_hash is not None and not regenerate: return self._cached_hash @@ -4172,19 +3810,6 @@ class Section(Definition): return self._cached_hash -def dict_to_named_list(data): - if not isinstance(data, dict): - return data - - results = [] - for key, value in data.items(): - if value is None: - value = {} - value.update(dict(name=key)) - results.append(value) - return results - - class Package(Definition): ''' Packages organize metainfo definitions alongside Python modules @@ -4305,7 +3930,7 @@ class Package(Definition): if archive.metadata.entry_name is None and self.name and self.name != '*': archive.metadata.entry_name = self.name - def _hash(self, regenerate=False) -> 'hashlib._Hash': + def _hash(self, regenerate=False) -> _HASH_OBJ: if self._cached_hash is not None and not regenerate: return self._cached_hash @@ -4356,37 +3981,6 @@ class Category(Definition): return definitions -class Annotation: - ''' Base class for annotations. ''' - - def m_to_dict(self): - ''' - Returns a JSON serializable representation that is used for exporting the - annotation to JSON. - ''' - return str(self.__class__.__name__) - - -class DefinitionAnnotation(Annotation): - ''' Base class for annotations for definitions. ''' - - def __init__(self): - self.definition: Definition = None - - def init_annotation(self, definition: Definition): - self.definition = definition - - -class SectionAnnotation(DefinitionAnnotation): - ''' - Special annotation class for section definition that allows to auto add annotations - to section instances. - ''' - - def new(self, section) -> Dict[str, Any]: - return {} - - Section.m_def = Section(name='Section') Section.m_def.m_def = Section.m_def Section.m_def._section_cls = Section @@ -4399,6 +3993,9 @@ SubSection.m_def = Section(name='SubSection') Category.m_def = Section(name='Category') Package.m_def = Section(name='Package') +Attribute.type = DirectQuantity(type=QuantityType, name='type') +Attribute.shape = DirectQuantity(type=Dimension, shape=['0..*'], name='shape', default=[]) + Definition.name = DirectQuantity(type=str, name='name') Definition.description = Quantity(type=str, name='description') Definition.links = Quantity(type=str, shape=['0..*'], name='links') @@ -4408,6 +4005,19 @@ Definition.deprecated = Quantity(type=str, name='deprecated') Definition.aliases = Quantity(type=str, shape=['0..*'], default=[], name='aliases') Definition.variable = Quantity(type=bool, name='variable', default=False) Definition.more = Quantity(type=JSON, name='more', default={}) +Definition.attributes = SubSection(sub_section=Attribute.m_def, name='attributes', repeats=True) + + +@derived(cached=True, virtual=True) # Virtual has to be set manually, due to bootstrapping hen-egg problems +def all_attributes(self: Property) -> Dict[str, Attribute]: + result: Dict[str, Attribute] = {} + for definition in self.attributes: + result[definition.name] = definition + + return result + + +Definition.all_attributes = all_attributes Section.quantities = SubSection( sub_section=Quantity.m_def, name='quantities', repeats=True) @@ -4480,7 +4090,7 @@ def all_inheriting_sections(self) -> List[Section]: def all_properties(self) -> Dict[str, Union[SubSection, Quantity]]: result: Dict[str, Union[SubSection, Quantity]] = dict() for section in self.inherited_sections: - for definition in section.quantities + section.sub_sections: + for definition in itertools.chain(section.quantities, section.sub_sections): result[definition.name] = definition return result @@ -4517,7 +4127,7 @@ def all_sub_sections_by_section(self) -> Dict[Section, List[SubSection]]: def all_aliases(self) -> Dict[str, Union[SubSection, Quantity]]: result: Dict[str, Union[SubSection, Quantity]] = dict() for section in self.inherited_sections: - for definition in section.quantities + section.sub_sections: + for definition in itertools.chain(section.quantities, section.sub_sections): for alias in definition.aliases: result[alias] = definition result[definition.name] = definition @@ -4540,20 +4150,14 @@ def all_inner_section_definitions(self) -> Dict[str, Section]: @derived(cached=True) def has_variable_names(self) -> bool: - for property in self.all_properties.values(): - if property.variable: - return True - return False + return any(value.variable for value in self.all_properties.values()) @derived(cached=True) def section_path(self) -> str: used_in_sub_sections: List[SubSection] = SubSection._used_sections.get(self, []) # type: ignore if len(used_in_sub_sections) == 0: - if self.name == 'EntryArchive': - return None - else: - return '__no_archive_path__' + return None if self.name == 'EntryArchive' else '__no_archive_path__' if len(used_in_sub_sections) > 1: return '__ambiguous__' @@ -4582,28 +4186,6 @@ Section.all_inner_section_definitions = all_inner_section_definitions Section.has_variable_names = has_variable_names Section.path = section_path -Attribute.type = DirectQuantity(type=QuantityType, name='type') -Attribute.shape = DirectQuantity(type=Dimension, shape=['0..*'], name='shape', default=[]) - - -@derived(cached=True, virtual=True) # Virtual has to be set manually, due to bootstrapping hen-egg problems -def all_attributes(self: Property) -> Dict[str, Attribute]: - result: Dict[str, Attribute] = {} - for definition in self.attributes: - result[definition.name] = definition - - for section_def in self.m_def.all_base_sections + [self.m_def]: - m_attributes = getattr(section_def.section_cls, 'm_attributes', []) - for m_attribute in m_attributes: - m_attribute = m_attribute.m_copy(parent=self) - result[m_attribute.name] = m_attribute - return result - - -Property.attributes = SubSection( - sub_section=Attribute.m_def, name='attributes', repeats=True) -Property.all_attributes = all_attributes - SubSection.repeats = Quantity(type=bool, name='repeats', default=False) SubSection.sub_section = Quantity( @@ -4620,6 +4202,11 @@ Quantity.derived = DirectQuantity(type=Callable, default=None, name='derived', v Quantity.virtual = DirectQuantity(type=bool, default=False, name='virtual') Quantity.is_scalar = Quantity( type=bool, name='is_scalar', derived=lambda quantity: len(quantity.shape) == 0) +Quantity.use_full_storage = Quantity( + type=bool, name='use_full_storage', + derived=lambda quantity: quantity.repeats or quantity.variable or len(quantity.attributes) > 0) +Quantity.flexible_unit = Quantity(type=bool, name='flexible_unit', default=False) +Quantity.repeats = Quantity(type=bool, name='repeats', default=False) Quantity.cached = Quantity(type=bool, name='cached', default=False) Package.section_definitions = SubSection( @@ -4633,13 +4220,13 @@ Package.category_definitions = SubSection( @derived(cached=True) def all_definitions(self): - all_definitions: Dict[str, Definition] = dict() + result: Dict[str, Definition] = dict() for sub_section_def in [Package.section_definitions, Package.category_definitions]: for definition in self.m_get_sub_sections(sub_section_def): - all_definitions[definition.name] = definition + result[definition.name] = definition for alias in definition.aliases: - all_definitions[alias] = definition - return all_definitions + result[alias] = definition + return result @derived(cached=True) @@ -4648,7 +4235,7 @@ def dependencies(self): All packages which have definitions that definitions from this package need. Being 'needed' includes categories, base sections, and referenced definitions. ''' - dependencies: Set[Package] = set() + result = set() for content in self.m_all_contents(): to_add = None if isinstance(content, Definition): @@ -4671,19 +4258,16 @@ def dependencies(self): more_dependencies.append(to_add) while len(more_dependencies) > 0: dependency = more_dependencies.pop() - if dependency not in dependencies: - dependencies.add(dependency) + if dependency not in result: + result.add(dependency) more_dependencies.extend(dependency.dependencies) - return dependencies + return result Package.all_definitions = all_definitions Package.dependencies = dependencies -Property.m_attributes.append(Attribute(name='m_source_name', type=str)) -Quantity.m_attributes.append(Attribute(name='m_source_unit', type=Unit)) - is_bootstrapping = False Definition.__init_cls__() @@ -4742,7 +4326,8 @@ class Environment(MSection): defs = self.resolve_definitions(name, section_cls, filter=filter) if len(defs) == 1: return defs[0] - elif len(defs) > 1: - raise KeyError('Could not uniquely identify %s, candidates are %s' % (name, defs)) - else: - raise KeyError('Could not resolve %s' % name) + + if len(defs) > 1: + raise KeyError(f'Could not uniquely identify {name}, candidates are {defs}') + + raise KeyError(f'Could not resolve {name}') diff --git a/nomad/metainfo/metainfo_utility.py b/nomad/metainfo/metainfo_utility.py new file mode 100644 index 0000000000000000000000000000000000000000..2bc14fb6b2705ec84e03257f0cc715fddc924978 --- /dev/null +++ b/nomad/metainfo/metainfo_utility.py @@ -0,0 +1,759 @@ +# +# Copyright The NOMAD Authors. +# +# This file is part of NOMAD. See https://nomad-lab.eu for further info. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import email.utils +import hashlib +import re +from dataclasses import dataclass +from datetime import datetime, date +from difflib import SequenceMatcher +from typing import Sequence, Dict, Any, Optional, Union, Tuple +from urllib.parse import SplitResult, urlsplit, urlunsplit + +import aniso8601 +import numpy as np +import pandas as pd +import pint +import pytz + +from nomad.units import ureg + +__hash_method = 'sha1' # choose from hashlib.algorithms_guaranteed + +_storage_suffix = '' + +_delta_symbols = {'delta_', 'Δ'} + + +@dataclass +class MRegEx: + # matches the range of indices, e.g., 1..3, 0..* + index_range = re.compile(r'(\d)\.\.(\d|\*)') + # matches the reserved name + reserved_name = re.compile(r'^(m_|a_|_+).*$') + # matches for example + # Python package/module name: nomad.metainfo.section + # Python name + 40 digits id: nomad.metainfo.section@1a2b3c... + python_definition = re.compile(r'^\w*(\.\w*)*(@\w{40})?$') + # matches url + url = re.compile( + r'^(?:http|ftp)s?://' + r'(?:(?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\.)+(?:[A-Z]{2,6}\.?|[A-Z0-9-]{2,}\.?)|' + r'localhost|' + r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})' + r'(?::\d+)?' + r'(?:/?|[/?]\S+)$', re.IGNORECASE) + + +@dataclass +class MTypes: + # todo: account for bytes which cannot be naturally serialized to JSON + primitive = { + str: lambda v: None if v is None else str(v), + int: lambda v: None if v is None else int(v), + float: lambda v: None if v is None else float(v), + bool: lambda v: None if v is None else bool(v), + np.bool_: lambda v: None if v is None else bool(v)} + + primitive_name = {**{v.__name__: v for v in primitive}, 'string': str, 'boolean': bool} + + int_numpy = {np.int8, np.int16, np.int32, np.int64, np.uint8, np.uint16, np.uint32, np.uint64} + int_python = {int} + int = int_python | int_numpy + float_numpy = {np.float16, np.float32, np.float64} + float_python = {float} + float = float_python | float_numpy + num_numpy = int_numpy | float_numpy + num_python = int_python | float_python + num = num_python | num_numpy + str_numpy = {np.str_} + bool_numpy = {np.bool_} + numpy = num_numpy | str_numpy | bool_numpy + + eln = { + 'str': ['str', 'string'], + 'bool': ['bool', 'boolean'], + 'number': [x.__name__ for x in num_python] + [f'np.{x.__name__}' for x in num_numpy], + 'datetime': ['Datetime'], + 'enum': ['{type_kind: Enum, type_data: [Operator, Responsible_person]}'], + 'user': ['User'], + 'author': ['Author'], + 'reference': [''] + } + + eln_component = { + 'str': ['StringEditQuantity', 'FileEditQuantity', 'RichTextEditQuantity', 'EnumEditQuantity'], + 'bool': ['BoolEditQuantity'], + 'number': ['NumberEditQuantity', 'SliderEditQuantity'], + 'datetime': ['DateTimeEditQuantity'], + 'enum': ['EnumEditQuantity', 'AutocompleteEditQuantity', 'RadioEnumEditQuantity'], + 'user': ['UserEditQuantity'], + 'author': ['AuthorEditQuantity'], + 'reference': ['ReferenceEditQuantity'] + } + + +class MEnum(Sequence): + ''' + Allows to define string types with values limited to a pre-set list of possible values. + + The allowed values can be provided as a list of strings, the keys of which will be identical to values. + Alternatively, they can be provided as key-value pairs. + + For example: + some_variable = MEnum(['a', 'b', 'c']) + some_variable = MEnum(a='a', b='b', c='c') + + The values are stored in __dict__ and can be accessed as attributes: + some_variable.a # gives 'a' + + For description of each possible value, it can be organized into a dictionary. + + For example: + some_variable = MEnum(['a', 'b', 'c'], m_descriptions={'a': 'first', 'b': 'second', 'c': 'third'}) + ''' + + def __init__(self, *args, **kwargs): + # Supports one big list in place of args + if len(args) == 1 and isinstance(args[0], list): + args = args[0] + + self._descriptions: Dict[str, str] = {} + if 'm_descriptions' in kwargs: + self._descriptions = kwargs.pop('m_descriptions') + + # If non-named arguments are given, the default is to have them placed + # into a dictionary with their string value as both the enum name and + # the value. + for arg in args: + if arg in kwargs: + raise ValueError(f"Duplicate value '{arg}' provided for enum") + kwargs[arg] = arg + + self._list = list(kwargs.values()) + self._values = set(kwargs.values()) # For allowing constant time member check + + for enum_value in self._values: + if not isinstance(enum_value, str): + raise TypeError(f'MEnum value {enum_value} is not a string.') + + self.__dict__.update(kwargs) + + def set_description(self, value: str, description: str): + if value not in self._values: + raise ValueError(f'{value} is not a value of this MEnum') + self._descriptions[value] = description + + def get_description(self, value: str) -> str: + if value not in self._values: + raise ValueError(f'{value} is not a value of this MEnum') + return self._descriptions.get(value, '') + + def get_all_descriptions(self) -> Dict[str, str]: + return self._descriptions + + def get_all_values(self) -> set: + return self._values + + # no need to implement __getattr__ as all attributes are stored in the __dict__ + # def __getattr__(self, attr): + # pass + + def __getitem__(self, index): + return self._list[index] + + def __len__(self): + return len(self._list) + + +class MQuantity: + ''' + A simple wrapper to represent complex quantities that may have multiple values, + additional attributes, and more. + ''' + name: str = None + value: Any = None + unit: Optional[pint.Unit] = None + original_unit: Optional[pint.Unit] = None + attributes: dict = None + + def __init__( + self, + in_name: Optional[str], + in_value: Any, + in_unit: Optional[pint.Unit] = None, + in_attributes: Optional[dict] = None): + ''' + The validation of value/unit/attribute is performed at 'MSection' level. + ''' + self.name = in_name + if self.name: + assert isinstance(self.name, str), 'Name must be a string' + + self.unit = None + if isinstance(in_value, pint.Quantity): + self.value = in_value.m # magnitude + self.unit = in_value.u # unit + assert in_unit is None, f'Unit is already defined in the value {in_value}' + else: + # the input argument is not a pint quantity + # the unit is set to None + self.value = in_value + if isinstance(in_unit, pint.Unit): + self.unit = in_unit + elif isinstance(in_unit, str): + self.unit = ureg.parse_units(in_unit) + + self.original_unit = self.unit + + self.attributes: dict = {} + if in_attributes is not None: + self.attributes.update(**in_attributes) + self.__dict__.update(**in_attributes) + + @staticmethod + def wrap(in_value: Any, in_name: Optional[str] = None): + ''' + Syntax sugar to wrap a value into a MQuantity. The name is optional. + + This would be useful for non-variadic primitive quantities with additional attributes. + ''' + return MQuantity(in_name, in_value) + + def __repr__(self): + return self.name if self.name else 'Unnamed quantity' + + def m_set_attribute(self, name, value): + ''' + Validation is done outside this container + ''' + self.attributes[name] = value + + +class MSubSectionList(list): + def __init__(self, section, sub_section_def): + self.section = section + self.sub_section_def = sub_section_def + super().__init__() + + def __setitem__(self, key, value): + raise NotImplementedError('You can only append subsections.') + + def __delitem__(self, key): + old_value = self[key] + list.__delitem__(self, key) + for index in range(key, len(self)): + self[index].m_parent_index = index + + # noinspection PyProtectedMember + self.section._on_remove_sub_section(self.sub_section_def, old_value) + + def __setslice__(self, i, j, sequence): + raise NotImplementedError('You can only append subsections.') + + def __delslice__(self, i, j): + raise NotImplementedError('You can only append subsections.') + + def append(self, value): + list.append(self, value) + if value is not None: + # noinspection PyProtectedMember + self.section._on_add_sub_section(self.sub_section_def, value, len(self) - 1) + + def pop(self, index=...): + raise NotImplementedError('You can only append subsections.') + + def extend(self, new_value): + start_index = len(self) + list.extend(self, new_value) + for index, value in enumerate(new_value): + # noinspection PyProtectedMember + self.section._on_add_sub_section(self.sub_section_def, value, start_index + index) + + def insert(self, i, element): + raise NotImplementedError('You can only append subsections.') + + def remove(self, element): + raise NotImplementedError('You can only append subsections.') + + def reverse(self): + raise NotImplementedError('You can only append subsections.') + + def sort(self, *, key=..., reverse=...): + raise NotImplementedError('You can only append subsections.') + + def clear(self): + old_values = list(self) + list.clear(self) + for old_value in old_values: + # noinspection PyProtectedMember + self.section._on_remove_sub_section(self.sub_section_def, old_value) + + +@dataclass +class ReferenceURL: + fragment: str + archive_url: str + url_parts: SplitResult + + def __init__(self, url: str): + if '#' not in url: + url = f'#{url}' + + self.url_parts = urlsplit(url) + archive_url = urlunsplit(self.url_parts[0:4] + ('',)) + self.archive_url = None if archive_url is None else archive_url + self.fragment = self.url_parts.fragment + + +class Annotation: + ''' Base class for annotations. ''' + + def m_to_dict(self): + ''' + Returns a JSON serializable representation that is used for exporting the + annotation to JSON. + ''' + return str(self.__class__.__name__) + + +class DefinitionAnnotation(Annotation): + ''' Base class for annotations for definitions. ''' + + def __init__(self): + self.definition = None + + def init_annotation(self, definition): + self.definition = definition + + +class SectionAnnotation(DefinitionAnnotation): + ''' + Special annotation class for section definition that allows to auto add annotations + to section instances. + ''' + + def new(self, section) -> Dict[str, Any]: + return {} + + +def to_dict(entries): + if isinstance(entries, list): + return [to_dict(entry) for entry in entries] + + # noinspection PyBroadException + try: + entries = entries.m_to_dict() + except Exception: + pass + + return entries + + +def convert_to(from_magnitude, from_unit: Optional[ureg.Unit], to_unit: Optional[ureg.Unit]): + ''' + Convert a magnitude from one unit to another. + + Arguments: + from_magnitude: the magnitude to be converted + from_unit: the unit of the magnitude + to_unit: the unit to convert to + + Return: + the converted magnitude + ''' + + if to_unit is None: + return from_magnitude + + from_quantity: ureg.Quantity = from_magnitude * from_unit + + return from_quantity.to(to_unit).m + + +def __similarity_match(candidates: list, name: str): + ''' + Use similarity to find the best match for a name. + ''' + similarity: list = [SequenceMatcher(None, v.name.upper(), name.upper()).ratio() for v in candidates] + + return candidates[similarity.index(max(similarity))] + + +def resolve_variadic_name(definitions: dict, name: str, hint: Optional[str] = None): + ''' + For properties with variadic names, it is necessary to check all possible definitions + in the schema to find the unique and correct definition that matches the naming pattern. + + In the schema defines a property with the name 'FOO_bar', implying the prefix 'FOO' is + merely a placeholder, the actual name in the data can be anything, such as 'a_bar' or 'b_bar'. + + This method checks each definition name by replacing the placeholder with '.*' and then check if + the property name matches the pattern. If it does, it returns the corresponding definition. + + For example, the definition name 'FOO_bar' will be replaced by '.*_bar', which further matches + 'a_bar', 'aa_bar', etc. + + In case of multiple quantities with identical template/variadic patterns, the following strategy + is used: + 1. Check all quantities and collect all qualified quantities that match the naming pattern + in a candidate list. + 2. Use the optionally provided hint string, which shall be one of attribute names of the desired + quantity. Check all candidates if this attribute exists. The existence of a hint attribute + prioritize this quantity, and it will be put into a prioritized list. + 3. If the prioritized candidate list contains multiple matches, use name similarity determine + which to be used. + 4. If no hint is provided, or no candidate has the hint attribute, check all quantities in the + first candidate list and use name similarity to determine which to be used. + + ''' + + # check the exact name match + if name in definitions: + return definitions[name] + + # check naming pattern match + candidates: list = [] + for definition in set(definitions.values()): + if not definition.variable: + continue + + name_pattern = re.sub(r'^([a-z0-9_]*)[A-Z0-9]+([a-z0-9_]*)$', r'\1[a-z0-9]+\2', definition.name) + if re.match(name_pattern, name): + candidates.append(definition) + + if len(candidates) == 0: + raise ValueError(f'Cannot find a proper definition for name {name}') + + if len(candidates) == 1: + return candidates[0] + + hinted_candidates: list = [] + if hint is not None: + for definition in candidates: + try: + if resolve_variadic_name(definition.all_attributes, hint): + hinted_candidates.append(definition) + except ValueError: + pass + + if len(hinted_candidates) == 1: + return hinted_candidates[0] + + # multiple matches, check similarity + if len(hinted_candidates) > 1: + return __similarity_match(hinted_candidates, name) + + return __similarity_match(candidates, name) + + +def retrieve_attribute(section, definition: Optional[str], attr_name: str) -> tuple: + ''' + Retrieve the attribute of a definition by its name. + In the case of variadic/template name, the name is also resolved by checking naming pattern. + ''' + + # find the section or quantity where attribute is defined + tgt_def = section if definition is None else resolve_variadic_name( + section.all_quantities, definition, attr_name) + if tgt_def is None: + raise ValueError(f'Cannot find the definition by the given {definition}') + + # find the corresponding attribute + tgt_attr = resolve_variadic_name(tgt_def.all_attributes, attr_name) + if tgt_attr is None: + raise ValueError('The given attribute name is not found in the given property.') + + return tgt_def, tgt_attr + + +def validate_allowable_unit( + dimensionality: Optional[str], + allowable_list: Union[str, list, pint.Unit, pint.Quantity]) -> bool: + ''' + For a given list of units, e.g., ['m', 'cm', 'mm'], and a target NX unit token such as 'NX_LENGTH', + this function checks the compatibility of the target unit with the list of units. + + Returns: + True if ALL units are compatible with the unit token (dimensionality). + False if at least one unit cannot be represented by the unit token (dimensionality). + ''' + if not dimensionality: + return True + + if isinstance(allowable_list, str): + if dimensionality in ('1', 'dimensionless'): + return ureg.Quantity(1, allowable_list).dimensionless + + try: + return ureg.Quantity(1, allowable_list).check(dimensionality) + except KeyError: + return False + + if isinstance(allowable_list, (pint.Unit, pint.Quantity)): + if dimensionality in ('1', 'dimensionless'): + return allowable_list.dimensionless + + return allowable_list.dimensionality == dimensionality + + for unit in allowable_list: + if not validate_allowable_unit(dimensionality, unit): + return False + + return True + + +def default_hash(): + ''' + Returns a hash object using the designated hash algorithm. + ''' + return hashlib.new(__hash_method) + + +def split_python_definition(definition_with_id: str) -> Tuple[list, Optional[str]]: + ''' + Split a Python type name into names and an optional id. + + Example: + my_package.my_section@my_id ==> (['my_package', 'my_section'], 'my_id') + + my_package.my_section ==> (['my_package', 'my_section'], None) + ''' + if '@' not in definition_with_id: + return definition_with_id.split('.'), None + + definition_names, definition_id = definition_with_id.split('@') + return definition_names.split('.'), definition_id + + +def check_dimensionality(quantity_def, unit: Optional[pint.Unit]) -> None: + if quantity_def is None or unit is None: + return + + dimensionality = getattr(quantity_def, 'dimensionality', None) + + if dimensionality is None: # not set, do not validate + return + + if dimensionality in ('dimensionless', '1') and unit.dimensionless: # dimensionless + return + + if dimensionality == 'transformation': + # todo: check transformation dimensionality + return + + if ureg.Quantity(1 * unit).check(dimensionality): # dimensional + return + + raise TypeError(f'Dimensionality {dimensionality} is not met by unit {unit}') + + +def check_unit(unit: Union[str, pint.Unit]) -> None: + '''Check that the unit is valid. + ''' + if isinstance(unit, str): + unit_str = unit + elif isinstance(unit, pint.Unit): + unit_str = str(unit) + else: + raise TypeError('Units must be given as str or pint Unit instances.') + + # Explicitly providing a Pint delta-unit is not currently allowed. + # Implicit conversions are fine as MathJS on the frontend supports them. + if any(x in unit_str for x in _delta_symbols): + raise TypeError('Explicit Pint "delta"-units are not yet supported.') + + +def to_section_def(section_def): + ''' + Resolves duck-typing for values that are section definitions or section classes to + section definition. + ''' + return section_def.m_def if isinstance(section_def, type) else section_def # type: ignore + + +def to_numpy(np_type, shape: list, unit: Optional[pint.Unit], definition, value: Any): + check_dimensionality(definition, unit) + + if isinstance(value, pint.Quantity): + # if flexible unit is set, do not check unit in the definition + # it will be handled specially + # the stored unit would not be serialized + flexible_unit = getattr(definition, 'flexible_unit', False) + + if not flexible_unit and unit is None: + raise TypeError(f'The quantity {definition} does not have a unit, but value {value} does.') + + if type(value.magnitude) == np.ndarray and np_type != value.dtype: + value = value.astype(np_type) + + if not flexible_unit: + value = value.to(unit).magnitude + else: + value = value.magnitude + + if isinstance(value, pd.DataFrame): + try: + value = value.to_numpy() + except AttributeError: + raise AttributeError( + f'Could not convert value {value} of type pandas.Dataframe to a numpy array') + + if type(value) != np.ndarray: + if len(shape) > 0: + try: + value = np.asarray(value) + except TypeError: + raise TypeError(f'Could not convert value {value} of {definition} to a numpy array') + elif type(value) != np_type: + try: + value = np_type(value) + except TypeError: + raise TypeError(f'Could not convert value {value} of {definition} to a numpy scalar') + + return value + + +def __validate_shape(section, dimension: Union[str, int], length: int) -> bool: + if isinstance(dimension, int): + return dimension == length + + if not isinstance(dimension, str): + raise TypeError(f'Invalid dimension type {type(dimension)}') + + if dimension.isidentifier(): + return dimension == getattr(section, dimension) + + m = re.match(MRegEx.index_range, dimension) + start = int(m.group(1)) + end = -1 if m.group(2) == '*' else int(m.group(2)) + return start <= length and (end == -1 or length <= end) + + +def validate_shape(section, quantity_def, value: Any) -> bool: + quantity_shape: list = quantity_def.shape + + if type(value) == np.ndarray: + value_shape = value.shape + elif isinstance(value, list) and not isinstance(value, MEnum): + value_shape = [len(value)] + else: + value_shape = [] + + if len(value_shape) != len(quantity_shape): + return False + + return all(__validate_shape(section, x, y) for x, y in zip(quantity_shape, value_shape)) + + +def dict_to_named_list(data) -> list: + if not isinstance(data, dict): + return data + + results: list = [] + for key, value in data.items(): + if value is None: + value = {} + value.update(dict(name=key)) + results.append(value) + return results + + +def validate_url(url_str: str) -> Optional[str]: + if url_str is None: + return None + + if not isinstance(url_str, str): + raise TypeError('Links need to be given as URL strings') + if re.match(MRegEx.url, url_str) is None: + raise ValueError('The given URL is not valid') + + return url_str + + +def __parse_datetime(datetime_str: str) -> datetime: + # removing trailing spaces and replacing the potential white space between date and time with char "T" + if datetime_str[0].isdigit(): + datetime_str = datetime_str.strip().replace(' ', 'T') + + try: + return aniso8601.parse_datetime(datetime_str) + except ValueError: + pass + + try: + date_value = aniso8601.parse_date(datetime_str) + if isinstance(date_value, datetime): + return date_value + except ValueError: + pass + + # noinspection PyBroadException + try: + return email.utils.parsedate_to_datetime(datetime_str) + except Exception: + pass + + try: + return datetime.strptime(datetime_str, '%Y-%m-%d %H:%M:%S.%f') + except ValueError: + pass + + try: + return datetime.strptime(datetime_str, '%Y-%m-%d %H:%M:%S') + except ValueError: + pass + + try: + return datetime.strptime(datetime_str, '%Y-%m-%d') + except ValueError: + pass + + try: + return datetime.fromisoformat(datetime_str) + except ValueError: + pass + + raise TypeError(f'Invalid date literal {datetime_str}') + + +def normalize_datetime(value) -> Optional[datetime]: + if value is None: + return None + + if isinstance(value, str): + value = __parse_datetime(value) + + elif isinstance(value, (int, float)): + value = datetime.fromtimestamp(value) + + elif isinstance(value, pint.Quantity): + value = datetime.fromtimestamp(value.magnitude) + + elif not isinstance(value, datetime) and isinstance(value, date): + value = datetime.combine(value, datetime.min.time()) + + if not isinstance(value, datetime): + raise TypeError(f'{value} is not a datetime.') + + if value.tzinfo is None: + value = value.replace(tzinfo=pytz.utc) + else: + value = value.astimezone(pytz.utc) + + return value diff --git a/nomad/metainfo/nexus.py b/nomad/metainfo/nexus.py new file mode 100644 index 0000000000000000000000000000000000000000..95db827737aec98e445f3943c5591d40aa456640 --- /dev/null +++ b/nomad/metainfo/nexus.py @@ -0,0 +1,624 @@ +# +# Copyright The NOMAD Authors. +# +# This file is part of NOMAD. See https://nomad-lab.eu for further info. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import os +import os.path +import re +import sys +# noinspection PyPep8Naming +import xml.etree.ElementTree as ET +from typing import Dict, List, Optional, Union + +import numpy as np +from toposort import toposort_flatten + +from nexusparser.tools import nexus +from nomad.datamodel import EntryArchive +from nomad.metainfo import ( + Attribute, Bytes, Datetime, Definition, MEnum, Package, Property, Quantity, Section, SubSection) +from nomad.utils import get_logger, strip + +# __URL_REGEXP from +# https://stackoverflow.com/questions/3809401/what-is-a-good-regular-expression-to-match-a-url +__URL_REGEXP = re.compile( + r'(?i)\b((?:https?://|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}/)' + r'(?:[^\s()<>]+|\(([^\s()<>]+|(\([^\s()<>]+\)))*\))+' + r'(?:\(([^\s()<>]+|(\([^\s()<>]+\)))*\)|[^\s`!()\[\]{};:\'".,<>?«»“”‘’]))') +# noinspection HttpUrlsUsage +__XML_NAMESPACES = {'nx': 'http://definition.nexusformat.org/nxdl/3.1'} + +# TO DO the validation still show some problems. Most notably there are a few higher +# dimensional fields with non number types, which the metainfo does not support + +__section_definitions: Dict[str, Section] = dict() + +__logger = get_logger(__name__) + +VALIDATE = False + +__XML_PARENT_MAP: Dict[ET.Element, ET.Element] +__NX_DOC_BASE = 'https://manual.nexusformat.org/classes' +__NX_TYPES = { # Primitive Types, 'ISO8601' is the only type not defined here + 'NX_COMPLEX': np.float64, + 'NX_FLOAT': np.float64, + 'NX_CHAR': str, + 'NX_BOOLEAN': bool, + 'NX_INT': np.int64, + 'NX_UINT': np.uint64, + 'NX_NUMBER': np.float64, + 'NX_POSINT': np.uint64, + 'NX_BINARY': Bytes, + 'NX_DATE_TIME': Datetime +} + + +class NXUnitSet: + ''' + maps from `NX_` token to dimensionality + None -> disable dimensionality check + '1' -> dimensionless quantities + 'transformation' -> Specially handled in metainfo + ''' + mapping: dict = { + 'NX_ANGLE': '[angle]', + 'NX_ANY': None, + 'NX_AREA': '[area]', + 'NX_CHARGE': '[charge]', + 'NX_COUNT': '1', + 'NX_CROSS_SECTION': '[area]', + 'NX_CURRENT': '[current]', + 'NX_DIMENSIONLESS': '1', + 'NX_EMITTANCE': '[length] * [angle]', + 'NX_ENERGY': '[energy]', + 'NX_FLUX': '1 / [time] / [area]', + 'NX_FREQUENCY': '[frequency]', + 'NX_LENGTH': '[length]', + 'NX_MASS': '[mass]', + 'NX_MASS_DENSITY': '[mass] / [volume]', + 'NX_MOLECULAR_WEIGHT': '[mass] / [substance]', + 'NX_PERIOD': '[time]', + 'NX_PER_AREA': '1 / [area]', + 'NX_PER_LENGTH': '1 / [length]', + 'NX_POWER': '[power]', + 'NX_PRESSURE': '[pressure]', + 'NX_PULSES': '1', + 'NX_SCATTERING_LENGTH_DENSITY': '1 / [area]', + 'NX_SOLID_ANGLE': '[angle] * [angle]', + 'NX_TEMPERATURE': '[temperature]', + 'NX_TIME': '[time]', + 'NX_TIME_OF_FLIGHT': '[time]', + 'NX_TRANSFORMATION': 'transformation', + 'NX_UNITLESS': '1', + 'NX_VOLTAGE': '[energy] / [current] / [time]', + 'NX_VOLUME': '[volume]', + 'NX_WAVELENGTH': '[length]', + 'NX_WAVENUMBER': '1 / [length]' + } + + @staticmethod + def normalise(value: str) -> str: + ''' + Normalise the given token + ''' + value = value.upper() + if not value.startswith('NX_'): + value = 'NX_' + value + return value + + @staticmethod + def is_nx_token(value: str) -> bool: + ''' + Check if a given token is one of NX tokens + ''' + return NXUnitSet.normalise(value) in NXUnitSet.mapping.keys() + + +def __to_camel_case(snake_str: str, upper: bool = False) -> str: + ''' + Take as input a snake case variable and return a camel case one + ''' + components = snake_str.split('_') + + if upper: + return ''.join(x.capitalize() for x in components) + + return components[0] + ''.join(x.capitalize() for x in components[1:]) + + +def __to_root(xml_node: ET.Element) -> ET.Element: + ''' + get the root element + ''' + elem = xml_node + while True: + parent = __XML_PARENT_MAP.get(elem) + if parent is None: + break + elem = parent + + return elem + + +def __if_base(xml_node: ET.Element) -> bool: + ''' + retrieves the category from the root element + ''' + return __to_root(xml_node).get('category') == 'base' + + +def __if_repeats(name: str, max_occurs: str) -> bool: + repeats = any(char.isupper() for char in name) or max_occurs == 'unbounded' + + if max_occurs.isdigit(): + repeats = repeats or int(max_occurs) > 1 + + return repeats + + +def __if_template(name: Optional[str]) -> bool: + return name is None or name.lower() != name + + +def __get_documentation_url( + xml_node: ET.Element, nx_type: Optional[str]) -> Optional[str]: + ''' + Get documentation url + ''' + if nx_type is None: + return None + + anchor_segments = [] + if nx_type != 'class': + anchor_segments.append(nx_type) + + while True: + nx_type = xml_node.get('type') + if nx_type: + nx_type = nx_type.replace('NX', '') + segment = xml_node.get('name', nx_type) # type: ignore + anchor_segments.append(segment.replace('_', '-')) + + xml_parent = xml_node + xml_node = __XML_PARENT_MAP.get(xml_node) + if xml_node is None: + break + + nx_package = xml_parent.get('nxdl_base').split('/')[-1] + anchor = "-".join([name.lower() for name in reversed(anchor_segments)]) + return f'{__NX_DOC_BASE}/{nx_package}/{anchor_segments[-1]}.html#{anchor}' + + +def __to_section(name: str, **kwargs) -> Section: + ''' + Returns the 'existing' metainfo section for a given top-level nexus base-class name. + + This function ensures that sections for these base-classes are only created one. + This allows to access the metainfo section even before it is generated from the base + class nexus definition. + ''' + if name in __section_definitions: + section = __section_definitions[name] + section.more.update(**kwargs) + return section + + section = Section(validate=VALIDATE, name=name, **kwargs) + + __section_definitions[name] = section + + return section + + +def __get_enumeration(xml_node: ET.Element) -> Optional[MEnum]: + ''' + Get the enumeration field from xml node + ''' + enumeration = xml_node.find('nx:enumeration', __XML_NAMESPACES) + if enumeration is None: + return None + + items = enumeration.findall('nx:item', __XML_NAMESPACES) + + return MEnum([value.attrib['value'] for value in items]) + + +def __add_common_properties(xml_node: ET.Element, definition: Definition): + ''' + Adds general metainfo definition properties (e.g., deprecated, docs, optional, ...) + from the given nexus XML node to the given metainfo definition. + ''' + xml_attrs = xml_node.attrib + + # Read properties from potential base section. Those are not inherited, but we + # duplicate them for a nicer presentation + if isinstance(definition, Section) and definition.base_sections: + base_section = definition.base_sections[0] + if base_section.description: + definition.description = base_section.description + if base_section.deprecated: + definition.deprecated = base_section.deprecated + if base_section.more: + definition.more.update(**base_section.more) + + links = [] + doc_url = __get_documentation_url(xml_node, definition.more.get('nx_kind')) + if doc_url: + links.append(doc_url) + + doc = xml_node.find('nx:doc', __XML_NAMESPACES) + if doc is not None and doc.text is not None: + definition.description = strip(doc.text) + links.extend([match[0] for match in __URL_REGEXP.findall(definition.description)]) + + if links: + definition.links = links + + for key, value in xml_attrs.items(): + if key == 'deprecated': + definition.deprecated = value + continue + if 'nxdl_base' in key or 'schemaLocation' in key: + continue + definition.more['nx_' + key] = value + + if 'optional' not in xml_attrs: + definition.more['nx_optional'] = __if_base(xml_node) + + +def __create_attributes(xml_node: ET.Element, definition: Union[Section, Property]): + ''' + Add all attributes in the given nexus XML node to the given + Quantity or SubSection using the Attribute class (new mechanism). + + todo: account for more attributes of attribute, e.g., default, minOccurs + ''' + for attribute in xml_node.findall('nx:attribute', __XML_NAMESPACES): + name = attribute.get('name') + + nx_enum = __get_enumeration(attribute) + if nx_enum: + nx_type = nx_enum + nx_shape: List[str] = [] + else: + nx_type = __NX_TYPES[attribute.get('type', 'NX_CHAR')] # type: ignore + has_bound = False + has_bound |= 'minOccurs' in attribute.attrib + has_bound |= 'maxOccurs' in attribute.attrib + if has_bound: + nx_min_occurs = attribute.get('minOccurs', '0') # type: ignore + nx_max_occurs = attribute.get('maxOccurs', '*') # type: ignore + if nx_max_occurs == 'unbounded': + nx_max_occurs = '*' + nx_shape = [f'{nx_min_occurs}..{nx_max_occurs}'] + else: + nx_shape = [] + + # check if the attribute exist + # if yes then modify directly + # if not create a new one and append to the list + for m_attribute in definition.attributes: + if m_attribute.name == name: + m_attribute.shape = nx_shape + m_attribute.type = nx_type + + for name, value in attribute.items(): + m_attribute.more[f'nx_{name}'] = value + + break + else: + m_attribute = Attribute( + name=name, variable=__if_template(name), shape=nx_shape, type=nx_type) + + for name, value in attribute.items(): + m_attribute.more[f'nx_{name}'] = value + + definition.attributes.append(m_attribute) + + +def __create_field(xml_node: ET.Element, container: Section) -> Quantity: + ''' + Creates a metainfo quantity from the nexus field given as xml node. + ''' + xml_attrs = xml_node.attrib + + # name + assert 'name' in xml_attrs, 'Expecting name to be present' + name = xml_attrs['name'] + + # type + nx_type = xml_attrs.get('type', 'NX_CHAR') + if nx_type not in __NX_TYPES: + raise NotImplementedError(f'Type {nx_type} is not supported for the moment for {name}.') + + # enumeration + enum_type = __get_enumeration(xml_node) + + # dimensionality + nx_dimensionality = xml_attrs.get('units', None) + if nx_dimensionality: + if nx_dimensionality not in NXUnitSet.mapping: + raise NotImplementedError(f'Unit {nx_dimensionality} is not supported for {name}.') + dimensionality = NXUnitSet.mapping[nx_dimensionality] + else: + dimensionality = None + + # shape + shape: list = [] + dimensions = xml_node.find('nx:dimensions', __XML_NAMESPACES) + if dimensions is not None: + for dimension in dimensions.findall('nx:dim', __XML_NAMESPACES): + dimension_value: str = dimension.attrib.get('value', '0..*') + if dimension_value.isdigit(): + shape.append(int(dimension_value)) + elif dimension_value == 'n': + shape.append('0..*') + else: + shape.append(dimension_value) + + value_quantity: Quantity = None # type: ignore + + # copy from base to inherit from it + if container.base_sections is not None: + base_quantity: Quantity = container.base_sections[0].all_quantities.get(name) + if base_quantity: + value_quantity = base_quantity.m_copy(deep=True) + + # create quantity + if value_quantity is None: + value_quantity = Quantity(name=name, flexible_unit=True) + + value_quantity.variable = __if_template(name) + + # check parent type compatibility + parent_type = getattr(value_quantity, 'type', None) + if not isinstance(parent_type, MEnum): + # if parent type is not MEnum then overwrite whatever given + value_quantity.type = enum_type if enum_type else __NX_TYPES[nx_type] + elif enum_type: + # only when derived type is also MEnum to allow overwriting + value_quantity.type = enum_type + + value_quantity.dimensionality = dimensionality + value_quantity.shape = shape + value_quantity.more.update(dict(nx_kind='field', nx_type=nx_type)) + + __add_common_properties(xml_node, value_quantity) + __create_attributes(xml_node, value_quantity) + + container.quantities.append(value_quantity) + + return value_quantity + + +def __create_group(xml_node: ET.Element, root_section: Section): + ''' + Adds all properties that can be generated from the given nexus group XML node to + the given (empty) metainfo section definition. + ''' + __create_attributes(xml_node, root_section) + + for group in xml_node.findall('nx:group', __XML_NAMESPACES): + xml_attrs = group.attrib + + assert 'type' in xml_attrs, 'Expecting type to be present' + nx_type = xml_attrs['type'] + + nx_name = xml_attrs.get('name', nx_type) + group_section = Section(validate=VALIDATE, nx_kind='group', name=nx_name) + + __attach_base_section(group_section, root_section, __to_section(nx_type)) + __copy_base_attributes(group_section, nx_type) + __add_common_properties(group, group_section) + + nx_name = xml_attrs.get('name', nx_type.replace('NX', '').upper()) + group_subsection = SubSection( + section_def=group_section, + nx_kind='group', + name=nx_name, + repeats=__if_repeats(nx_name, xml_attrs.get('maxOccurs', '0')), + variable=__if_template(nx_name)) + + root_section.inner_section_definitions.append(group_section) + + root_section.sub_sections.append(group_subsection) + + __create_group(group, group_section) + + for field in xml_node.findall('nx:field', __XML_NAMESPACES): + __create_field(field, root_section) + + +def __attach_base_section(section: Section, container: Section, default: Section): + ''' + Potentially adds a base section to the given section, if the given container has + a base-section with a suitable base. + ''' + base_section = container.all_inner_section_definitions.get(section.name) + if base_section: + assert base_section.nx_kind == section.nx_kind, 'Base section has wrong kind' + else: + base_section = default + + section.base_sections = [base_section] + + +def __copy_base_attributes(destination: Section, source_name: str): + ''' + Copy attributes from base subsection to derived subsection. + + Attributes are stored in `SubSection.attributes` list. They are not inherited + thus need to be manually copied. + ''' + source: Section = __section_definitions.get(source_name) + + if not source or not destination or source is destination: + return + + for m_attribute in source.attributes: + destination.attributes.append(Attribute( + name=m_attribute.name, + type=m_attribute.type, + shape=m_attribute.shape, + variable=m_attribute.variable, + **m_attribute.more)) + + +def __create_class_section(xml_node: ET.Element) -> Section: + ''' + Creates a metainfo section from the top-level nexus definition given as xml node. + ''' + xml_attrs = xml_node.attrib + assert 'name' in xml_attrs, 'Expecting name to be present' + assert 'type' in xml_attrs, 'Expecting type to be present' + assert 'category' in xml_attrs, 'Expecting category to be present' + + nx_name = xml_attrs['name'] + nx_type = xml_attrs['type'] + nx_category = xml_attrs['category'] + + class_section: Section = __to_section( + nx_name, nx_kind=nx_type, nx_category=nx_category) + + if 'extends' in xml_attrs: + base_section = __to_section(xml_attrs['extends']) + class_section.base_sections = [base_section] + __copy_base_attributes(class_section, base_section.name) + + __add_common_properties(xml_node, class_section) + + __create_group(xml_node, class_section) + + return class_section + + +def __sort_nxdl_files(paths): + ''' + Sort all definitions based on dependencies + ''' + + name_node_map = {} + name_dependency_map = {} + for path in paths: + for nxdl_file in os.listdir(path): + if not nxdl_file.endswith('.nxdl.xml'): + continue + xml_node = ET.parse(os.path.join(path, nxdl_file)).getroot() + xml_node.set('nxdl_base', path) + assert xml_node.get('type') == 'group', 'definition is not a group' + xml_name = xml_node.get('name') + name_node_map[xml_name] = xml_node + dependency_list = [] + if 'extends' in xml_node.attrib: + dependency_list.append(xml_node.get('extends')) + for child in xml_node.iter(): + if child.tag.endswith('group') and child.get('type') != xml_name: + dependency_list.append(child.get('type')) + name_dependency_map[xml_name] = set(dependency_list) + + # manually remove deprecated circular dependency + name_dependency_map['NXgeometry'].remove('NXorientation') + name_dependency_map['NXgeometry'].remove('NXtranslation') + + sorted_nodes = toposort_flatten(name_dependency_map) + validated_names = [] + for node in sorted_nodes: + if node in name_node_map: + validated_names.append(name_node_map[node]) + else: + parent_nodes = [] + for name, dependencies in name_dependency_map.items(): + if node in dependencies: + parent_nodes.append(name) + __logger.error('Missing dependency (incorrect group type).', target_name=node, used_by=parent_nodes) + + return validated_names + + +def __add_section_from_nxdl(xml_node: ET.Element) -> Optional[Section]: + ''' + Creates a metainfo section from a nxdl file. + ''' + try: + global __XML_PARENT_MAP # pylint: disable=global-statement + __XML_PARENT_MAP = { + child: parent for parent in xml_node.iter() for child in parent} + + return __create_class_section(xml_node) + + except NotImplementedError as err: + __logger.error('Fail to generate metainfo.', target_name=xml_node.attrib['name'], exe_info=str(err)) + return None + + +def __create_package_from_nxdl_directories(nexus_section: Section) -> Package: + ''' + Creates a metainfo package from the given nexus directory. Will generate the + respective metainfo definitions from all the nxdl files in that directory. + ''' + package = Package(name='nexus') + + folder_list = ('base_classes', 'contributed_definitions', 'applications') + paths = [os.path.join( + nexus.get_nexus_definitions_path(), folder) for folder in folder_list] + + for nxdl_file in __sort_nxdl_files(paths): + section = __add_section_from_nxdl(nxdl_file) + if section is None: + continue + package.section_definitions.append(section) + nexus_section.sub_sections.append( + SubSection(section_def=section, name=section.name)) + + return package + + +nexus_metainfo_package: Optional[Package] = None # pylint: disable=C0103 + + +def init_nexus_metainfo(): + ''' + Initializes the metainfo package for the nexus definitions. + ''' + global nexus_metainfo_package # pylint: disable=global-statement + + if nexus_metainfo_package is not None: + return + + # We take the application definitions and create a common parent section that allows + # to include nexus in an EntryArchive. + nexus_section = Section(validate=VALIDATE, name='NeXus') + + nexus_metainfo_package = __create_package_from_nxdl_directories(nexus_section) + + EntryArchive.nexus = SubSection(name='nexus', section_def=nexus_section) + EntryArchive.nexus.init_metainfo() + EntryArchive.m_def.sub_sections.append(EntryArchive.nexus) + + nexus_metainfo_package.section_definitions.append(nexus_section) + + # We need to initialize the metainfo definitions. This is usually done automatically, + # when the metainfo schema is defined though MSection Python classes. + nexus_metainfo_package.init_metainfo() + + # We skip the Python code generation for now and offer Python classes as variables + # TO DO not necessary right now, could also be done case-by-case by the nexus parser + python_module = sys.modules[__name__] + for section in nexus_metainfo_package.section_definitions: # pylint: disable=E1133 + setattr(python_module, section.name, section.section_cls) + + +init_nexus_metainfo() diff --git a/nomad/metainfo/nx_unit.py b/nomad/metainfo/nx_unit.py deleted file mode 100644 index 697e81ae33b10d8db6d28247b543988d19933adb..0000000000000000000000000000000000000000 --- a/nomad/metainfo/nx_unit.py +++ /dev/null @@ -1,58 +0,0 @@ -# -# Copyright The NOMAD Authors. -# -# This file is part of NOMAD. See https://nomad-lab.eu for further info. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -from typing import Union - -import pint - -from nomad.units import ureg - - -def validate_allowable_list( - dimensionality: Union[str, None], allowable_list: Union[str, list, pint.Unit, pint.Quantity]) -> bool: - ''' - For a given list of units, e.g., ['m', 'cm', 'mm'], and a target NX unit token such as 'NX_LENGTH', - this function check the compatibility of the target unit with the list of units. - - Returns: - True if ALL units are compatible with the unit token (dimensionality). - False if at least one unit cannot be represented by the unit token (dimensionality). - ''' - if not dimensionality: - return True - - if isinstance(allowable_list, str): - if dimensionality in ('1', 'dimensionless'): - return ureg.Quantity(1, allowable_list).dimensionless - - try: - return ureg.Quantity(1, allowable_list).check(dimensionality) - except KeyError: - return False - - if isinstance(allowable_list, (pint.Unit, pint.Quantity)): - if dimensionality == ('1', 'dimensionless'): - return allowable_list.dimensionless - - return allowable_list.dimensionality == dimensionality - - for unit in allowable_list: - if not validate_allowable_list(dimensionality, unit): - return False - - return True diff --git a/nomad/parsing/metadata.yaml b/nomad/parsing/metadata.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3b918850da99a71a1357b54d376f7e656eca829d --- /dev/null +++ b/nomad/parsing/metadata.yaml @@ -0,0 +1,64 @@ +codeName: nexus +codeLabel: NeXus +codeCategory: Database manager +status: production +codeLabelStyle: 'capitals: N, X.' +codeUrl: https://www.nexusformat.org/ +parserDirName: dependencies/parsers/nexus/ +parserGitUrl: https://github.com/nomad-coe/nomad-parser-nexus.git +preamble: | + This is not a real parser, but an example template on how to write one. You can fork this repository to create actual parsers. + + ## Get started + + You should create a virtual environment. This is optional, but highly recommended as + the required nomad-lab pypi package requires many dependencies with specific versions + that might conflict with other libraries that you have installed. This was tested + with Python 3.7. + + ``` + pip install virtualenv + virtualenv -p `which python3` .pyenv + source .pyenv/bin/activate + ``` + + Simply install our pypi package with pip: + ``` + pip install --upgrade pip + pip install nomad-lab + ``` + + Clone this project (or fork and then clone the fork). Go into the cloned directly and + directly run the parser from there: + ``` + git clone https://github.com/nomad-coe/nomad-parser-nexus.git parser-nexus + cd parser-nexus + python -m nexusparser tests/data/nexus.out + ``` + + There are also a basic test framework written in [pytest](https://docs.pytest.org/en/stable/). + Install the remaining dev dependencies and run the tests with: + ``` + pip install -r requirements.txt + pytest -sv tests + ``` + + ## Next steps + + Our documentation provides several resources that might be interesting: + - [How to write a parser](https://nomad-lab.eu/prod/rae/docs/parser.html). Provides + a more detailed tutorial on how to write a parser. + - [Introduction to the NOMAD Metainfo](https://nomad-lab.eu/prod/rae/docs/metainfo.html). + This explains how NOMAD data schema and can be extended and used within your parser. + + To get you parser included in NOMAD or ask further questions, you can: + - Use our forums at [matsci.org](https://matsci.org/c/nomad/32) + - Open an issue on the [nexus-parser GitHub project](https://github.com/nomad-coe/nomad-parser-nexus/issues) + - Write to [support@nomad-lab.eu](mailto:support@nomad-lab.eu) + + **Note!** The rest of this README.md is the usual text that applies to all NOMAD parsers. + +tableOfFiles: | + |Input Filename| Description| + |--- | --- | + |`nexus.out` | **Mainfile** in NEXUS specific plain-text | diff --git a/nomad/parsing/nexus.py b/nomad/parsing/nexus.py new file mode 100644 index 0000000000000000000000000000000000000000..56a6de189c56d09632bd466617169bc5ba2128ca --- /dev/null +++ b/nomad/parsing/nexus.py @@ -0,0 +1,249 @@ +# +# Copyright The NOMAD Authors. +# +# This file is part of NOMAD. See https://nomad-lab.eu for further info. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +import os.path +import xml.etree.ElementTree as ET +from typing import Optional + +import numpy as np + +from nexusparser.tools import nexus as read_nexus +from nomad.datamodel import EntryArchive +from nomad.metainfo import MSection, nexus +from nomad.metainfo.metainfo_utility import MQuantity, resolve_variadic_name +from nomad.parsing import MatchingParser +from nomad.units import ureg +from nomad.utils import get_logger + + +def _to_group_name(nx_node: ET.Element): + ''' + Normalise the given group name + ''' + return nx_node.attrib.get('name', nx_node.attrib['type'][2:].upper()) + + +# noinspection SpellCheckingInspection +def _to_section( + hdf_name: Optional[str], nx_def: str, nx_node: Optional[ET.Element], + current: MSection) -> MSection: + ''' + Args: + hdf_name : name of the hdf group/field/attribute (None for definition) + nx_def : application definition + nx_node : node in the nxdl.xml + current : current section in which the new entry needs to be picked up from + + Note that if the new element did not exist, it will be created + + Returns: + tuple: the new subsection + + The strict mapping is available between metainfo and nexus: + Group <-> SubSection + Field <-> Quantity + Attribute <-> SubSection.Attribute or Quantity.Attribute + + If the given nxdl_node is a Group, return the corresponding Section. + If the given nxdl_node is a Field, return the Section contains it. + If the given nxdl_node is an Attribute, return the associated Section or the + Section contains the associated Quantity. + ''' + + if hdf_name is None: + nomad_def_name = nx_def + elif nx_node.tag.endswith('group'): + # it is a new group + nomad_def_name = _to_group_name(nx_node) + else: + # no need to change section for quantities and attributes + return current + + # for groups, get the definition from the package + new_def = current.m_def.all_sub_sections[nomad_def_name] + + new_section: MSection = None # type:ignore + + for section in current.m_get_sub_sections(new_def): + if hdf_name is None or getattr(section, 'nx_name', None) == hdf_name: + new_section = section + break + + if new_section is None: + current.m_create(new_def.section_def.section_cls) + new_section = current.m_get_sub_section(new_def, -1) + new_section.__dict__['nx_name'] = hdf_name + + return new_section + + +def _get_value(hdf_node): + ''' + Get value from hdl5 node + ''' + + hdf_value = hdf_node[...] + if str(hdf_value.dtype) == 'bool': + val = bool(hdf_value) + elif hdf_value.dtype.kind in 'iufc': + val = hdf_value + else: + try: + val = str(hdf_value.astype(str)) + except UnicodeDecodeError: + val = str(hdf_node[()].decode()) + return val + + +class NexusParser(MatchingParser): + ''' + NexusParser doc + ''' + + def __init__(self): + super().__init__( + metadata_path=os.path.join(os.path.dirname(__file__), 'metadata.yaml'), + mainfile_mime_re=r'(application/.*)|(text/.*)', + mainfile_name_re=r'.*\.nxs', + supported_compressions=['gz', 'bz2', 'xz'] + ) + self.archive: Optional[EntryArchive] = None + self.nx_root = None + self._logger = None + + def _populate_data(self, depth: int, nx_path: list, nx_def: str, hdf_node, current: MSection): + ''' + Populate attributes and fields + ''' + + if depth < len(nx_path): + # it is an attribute of either field or group + nx_attr = nx_path[depth] + nx_parent: ET.Element = nx_path[depth - 1] + + if isinstance(nx_attr, str): + if nx_attr != 'units': + # no need to handle units here + # as all quantities have flexible units + print(nx_attr) + else: + # get the name of parent (either field or group) + # which will be used to set attribute + # this is required by the syntax of metainfo mechanism + # due to variadic/template quantity names + parent_type = nx_parent.get('type').replace('NX', '').upper() + parent_name = nx_parent.get('name', parent_type) # type: ignore + + attr_name = nx_attr.get('name') + # by default, we assume it is a 1D array + attr_value = hdf_node.attrs[attr_name] + if not isinstance(attr_value, str): + attr_value = [value for value in attr_value] + if len(attr_value) == 1: + attr_value = attr_value[0] + + current = _to_section(attr_name, nx_def, nx_attr, current) + + try: + if nx_parent.tag.endswith('group'): + current.m_set_section_attribute(attr_name, attr_value) + else: + current.m_set_quantity_attribute(parent_name, attr_name, attr_value) + except Exception as e: + self._logger.warning('Error while setting attribute.', target_name=attr_name, exe_info=str(e)) + else: + # it is a field + field = _get_value(hdf_node) + + # need to remove + if hdf_node[...].dtype.kind in 'iufc' and isinstance(field, np.ndarray) and field.size > 1: + field = np.array([np.mean(field), np.var(field), np.min(field), np.max(field)]) + + # get the corresponding field name + field_name = nx_path[-1].get('name') + metainfo_def = resolve_variadic_name(current.m_def.all_properties, field_name) + + # check if unit is given + unit = hdf_node.attrs.get('units', None) + + pint_unit: Optional[ureg.Unit] = None + + if unit: + try: + if unit != 'counts': + pint_unit = ureg.parse_units(unit) + else: + pint_unit = ureg.parse_units('1') + field = ureg.Quantity(field, pint_unit) + except ValueError: + pass + + if metainfo_def.use_full_storage: + field = MQuantity.wrap(field, hdf_node.name.split('/')[-1]) + elif metainfo_def.unit is None and pint_unit is not None: + metainfo_def.unit = pint_unit + + # may need to check if the given unit is in the allowable list + + try: + current.m_set(metainfo_def, field) + except Exception as e: + self._logger.warning('Error while setting field.', target_name=field_name, exe_info=str(e)) + + def __nexus_populate(self, params: dict, attr=None): # pylint: disable=W0613 + ''' + Walks through name_list and generate nxdl nodes + (hdf_info, nx_def, nx_path, val, logger) = params + ''' + + hdf_info: dict = params['hdf_info'] + nx_def: str = params['nxdef'] + nx_path: list = params['nxdl_path'] + + hdf_path: str = hdf_info['hdf_path'] + hdf_node = hdf_info['hdf_node'] + + if nx_path is None: + return + + current: MSection = _to_section(None, nx_def, None, self.nx_root) + depth: int = 1 + for name in hdf_path.split('/')[1:]: + nx_node = nx_path[depth] if depth < len(nx_path) else name + current = _to_section(name, nx_def, nx_node, current) + depth += 1 + + self._populate_data(depth, nx_path, nx_def, hdf_node, current) + + def parse(self, mainfile: str, archive: EntryArchive, logger=None, child_archives=None): + self.archive = archive + self.archive.m_create(nexus.NeXus) # type: ignore # pylint: disable=no-member + self.nx_root = self.archive.nexus + self._logger = logger if logger else get_logger(__name__) + + nexus_helper = read_nexus.HandleNexus(logger, [mainfile]) + nexus_helper.process_nexus_master_file(self.__nexus_populate) + + if archive.metadata is None: + return + + app_def: str = '' + for var in dir(archive.nexus): + if getattr(archive.nexus, var, None) is not None: + app_def = var + + archive.metadata.entry_type = app_def diff --git a/nomad/parsing/parsers.py b/nomad/parsing/parsers.py index 654623fe3bd3f294bda5b6d9fa72ca90c4ad82f0..f3e4310c1b8b6c7d85339e5ee12d9f5ecb797311 100644 --- a/nomad/parsing/parsers.py +++ b/nomad/parsing/parsers.py @@ -29,7 +29,7 @@ from nomad.datamodel.context import Context, ClientContext from .parser import MissingParser, BrokenParser, Parser, ArchiveParser, MatchingParserInterface from .artificial import EmptyParser, GenerateRandomParser, TemplateParser, ChaosParser from .tabular import TabularDataParser -from nexusparser.parser import NexusParser +from .nexus import NexusParser try: @@ -642,7 +642,10 @@ for parser in parsers: code_name != 'currupted mainfile' and \ code_name != 'Template': code_names.append(code_name) - code_metadata[code_name] = parser.metadata.dict() + if parser.metadata: + code_metadata[code_name] = parser.metadata.dict() + else: + code_metadata[code_name] = {} code_names = sorted(set(code_names), key=lambda code_name: code_name.lower()) results.Simulation.program_name.a_elasticsearch[0].values = code_names + [ config.services.unavailable_value, config.services.not_processed_value] diff --git a/requirements.txt b/requirements.txt index b763636cd8fcb8f4a44f84ed7226c296d25e61c3..6f6713aefdaa7139b9968c8c5ec50cc905ebf096 100644 --- a/requirements.txt +++ b/requirements.txt @@ -91,6 +91,7 @@ oauthenticator==14.2.0 validators==0.18.2 aiofiles==0.8.0 joblib==1.1.0 +toposort==1.7 # [dev] markupsafe==2.0.1 diff --git a/tests/data/parsers/nexus/201805_WSe2_arpes.nxs b/tests/data/parsers/nexus/201805_WSe2_arpes.nxs new file mode 100755 index 0000000000000000000000000000000000000000..69eba9d4b66073f5e0bd40aee0c803d7b09fb633 Binary files /dev/null and b/tests/data/parsers/nexus/201805_WSe2_arpes.nxs differ diff --git a/tests/metainfo/test_attributes.py b/tests/metainfo/test_attributes.py index 61fb5bf9771b5ef8ad81a8c8534ebbd631c98c8c..969a057a597c42ed7faeb9659713c25c5d7dbae6 100644 --- a/tests/metainfo/test_attributes.py +++ b/tests/metainfo/test_attributes.py @@ -22,9 +22,9 @@ import pytest import numpy as np import pytz -from nomad.metainfo import MSection, Quantity, Attribute, MEnum, Reference, Datetime, Property -from nomad.metainfo.metainfo import SubSection, MetainfoError -from nomad.metainfo.nx_unit import validate_allowable_list +from nomad.metainfo import MSection, Quantity, Attribute, MEnum, Reference, Datetime +from nomad.metainfo.metainfo import MQuantity, Definition +from nomad.metainfo.metainfo_utility import validate_allowable_unit from nomad.units import ureg @@ -43,44 +43,28 @@ def test_attributes(type, errors, value): Attribute(name='my_quantity_attribute', type=type) ] ) - my_section = SubSection( - section=Quantity.m_def, - attributes=[ - Attribute(name='my_section_attribute', type=type) - ] - ) - assert Property.all_attributes.derived is not None - assert len(MySection.m_def.m_all_validate()[0]) == errors * 2 + assert Definition.all_attributes.derived is not None + assert len(MySection.m_def.m_all_validate()[0]) == errors assert MySection.my_quantity.attributes[0].name == 'my_quantity_attribute' assert MySection.my_quantity.attributes[0].type == type - assert MySection.my_section.attributes[0].name == 'my_section_attribute' - assert MySection.my_section.attributes[0].type == type if errors > 0: return section = MySection() - attributes = [ - (MySection.my_quantity, 'my_quantity_attribute'), - (MySection.my_section, 'my_section_attribute') - ] - for property, attribute in attributes: - assert section.m_get_attribute(property, attribute) is None - section.m_set_attribute(property, attribute, value) - assert section.m_get_attribute(property, attribute) == value - json_data = section.m_to_dict() - assert json_data == {} + assert section.m_get_quantity_attribute('my_quantity', 'my_quantity_attribute') is None + section.m_set_quantity_attribute('my_quantity', 'my_quantity_attribute', value) + assert section.m_get_quantity_attribute('my_quantity', 'my_quantity_attribute') == value section.my_quantity = 'test' - section.my_section = MySection.my_quantity json_data = section.m_to_dict() section = MySection.m_from_dict(json_data) - for property, attribute in attributes: - assert section.m_get_attribute(property, attribute) == value + + assert section.m_get_quantity_attribute('my_quantity', 'my_quantity_attribute') == value @pytest.mark.parametrize('name,value', [ @@ -89,20 +73,21 @@ def test_attributes(type, errors, value): ]) def test_m_attributes(name, value): class MySection(MSection): - my_quantity = Quantity(type=float) + my_quantity = Quantity( + type=float, + attributes=[ + Attribute(name='m_source_unit', type=str) + ]) section = MySection(my_quantity=1) if name: - section.m_set_attribute('my_quantity', name, value) + section.m_set_quantity_attribute('my_quantity', name, value) json_data = section.m_to_dict() section = MySection.m_from_dict(json_data) if name: - assert section.m_get_attribute('my_quantity', name) == value - else: - for key in json_data.keys(): - assert '@' not in key + assert section.m_get_quantity_attribute('my_quantity', name) == value def test_variable_name(): @@ -116,20 +101,17 @@ def test_variable_name(): section = MySection() - section.MY_quantity = 'v1' - section.m_set_attribute('MY_quantity', 'MY_attribute', 'v1') + section.MY_quantity = MQuantity.wrap('v1', 'MY_quantity') + section.m_set_quantity_attribute('MY_quantity', 'MY_attribute', 'v1') assert section.MY_quantity == 'v1' - assert section.m_get_attribute('MY_quantity', 'MY_attribute') == 'v1' + assert section.m_get_quantity_attribute('MY_quantity', 'MY_attribute') == 'v1' - section.test_quantity = 'v2' - section.m_set_attribute('test_quantity', 'test_attribute', 'v2') - assert section.MY_quantity == 'v2' - assert section.m_get_attribute('MY_quantity', 'm_source_name') == 'test_quantity' - assert section.m_get_attribute('MY_quantity', 'MY_attribute') == 'v2' + section.test_quantity = MQuantity.wrap('v2', 'test_quantity') + section.m_set_quantity_attribute('test_quantity', 'test_attribute', 'v2') assert section.test_quantity == 'v2' - assert section.m_get_attribute('test_quantity', 'test_attribute') == 'v2' + assert section.m_get_quantity_attribute('test_quantity', 'test_attribute') == 'v2' - with pytest.raises(MetainfoError): + with pytest.raises(ValueError): section.completely_off = 'v1' @@ -138,8 +120,8 @@ def test_variable_name(): pytest.param('[length]', ['m/m/m', '1/m'], False, id='length_false'), pytest.param('dimensionless', ['1', 'm/m', 'kg*m/s/s/m^2/MPa'], True, id='dimensionless_true') ]) -def test_nx_unit_compatibility(token, units, result): - assert validate_allowable_list(token, units) == result +def test_unit_compatibility(token, units, result): + assert validate_allowable_unit(token, units) == result if result: class MySection(MSection): @@ -160,3 +142,30 @@ def test_nx_unit_compatibility(token, units, result): section = MySection() for u in units: section.numerical = 1 * ureg.parse_units(u) + + +def test_repeating_quantity(): + class MySection(MSection): + TEST_repeat = Quantity(repeats=True, type=float, unit='m') + TEST_nonrepeat = Quantity(variable=True, type=float) + + my_section = MySection() + + my_section.TEST_repeat = MQuantity.wrap(ureg.Quantity(1., 'cm')) + + assert my_section.TEST_repeat.m == 0.01 # pylint: disable=E1101 + + my_section.TEST_repeat = None + + assert my_section.TEST_repeat is None + + with pytest.raises(ValueError): + _ = my_section.instance_repeat + + my_section.instance_nonrepeat = MQuantity('instance_nonrepeat', 1.23) + + assert my_section.instance_nonrepeat == 1.23 + + my_section.instance_nonrepeat = None + + _ = my_section.instance_nonrepeat diff --git a/tests/metainfo/test_full_storage_quantity.py b/tests/metainfo/test_full_storage_quantity.py new file mode 100644 index 0000000000000000000000000000000000000000..53c61c165a2e71f2b4fb1579b439b696c31eb1e3 --- /dev/null +++ b/tests/metainfo/test_full_storage_quantity.py @@ -0,0 +1,105 @@ +# +# Copyright The NOMAD Authors. +# +# This file is part of NOMAD. See https://nomad-lab.eu for further info. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +import pytest + +from nomad.metainfo import MSection, Quantity, Attribute, SubSection, MetainfoError, Section +from nomad.metainfo.metainfo_utility import MQuantity +from nomad.units import ureg + + +class SectionA(MSection): + plain = Quantity(type=float) + full = Quantity(type=float, unit='m', dimensionality='[length]', attributes=[Attribute(name='gender', type=str)]) + VARIABLE_game = Quantity(type=str, variable=True, attributes=[ + Attribute(name='year', type=int), + Attribute(name='aka', type=str, shape=['1..3']), + ]) + a_attribute = Attribute(type=str) + + +class SectionB(MSection): + out_plain = Quantity(type=int) + b_attribute = Attribute(type=str) + subsection = SubSection( + section=SectionA.m_def + ) + + +def test_full_storage_quantity(): + a_section = SectionA() + b_section = SectionB() + + a_section.plain = 1. + assert a_section.plain == 1. + + # wrong dimensionality + with pytest.raises(MetainfoError): + a_section.full = ureg.Quantity('2*s') + + a_section.full = ureg.Quantity('2*cm') + assert a_section.full == ureg.Quantity('0.02*m') + + # for variadic quantity, it is not allowed to set the value directly + with pytest.raises(MetainfoError): + a_section.gta5_game = 'gta5' + + # need to wrap it in a MQuantity with the correct name + a_section.gta5_game = MQuantity.wrap('gta5', 'gta5_game') + assert a_section.gta5_game == 'gta5' + + # possible to use variadic name to set the value + a_section.VARIABLE_game = MQuantity.wrap('gta3', 'gta3_game') + assert a_section.gta3_game == 'gta3' + + # wrong type but implicitly convertible + a_section.m_set_quantity_attribute('full', 'gender', 0) + assert a_section.m_get_quantity_attribute('full', 'gender') == '0' + + a_section.m_set_quantity_attribute('full', 'gender', 'Male') + assert a_section.m_get_quantity_attribute('full', 'gender') == 'Male' + + a_section.m_set_quantity_attribute('gta5_game', 'year', 2013) + assert a_section.m_get_quantity_attribute('gta5_game', 'year') == 2013 + + a_section.m_set_quantity_attribute('gta3_game', 'year', 2001) + assert a_section.m_get_quantity_attribute('gta3_game', 'year') == 2001 + + # shape error + with pytest.raises(MetainfoError): + a_section.m_set_quantity_attribute('gta3_game', 'aka', ['rockstar games', 'gta', 'gta 3', 'GTA3']) + + a_section.m_set_quantity_attribute('gta3_game', 'aka', ['rockstar games', 'gta', 'gta 3']) + a_section.m_set_section_attribute('a_attribute', 'easy') + + assert a_section.m_get_quantity_attribute('gta3_game', 'aka') == ['rockstar games', 'gta', 'gta 3'] + assert a_section.m_get_section_attribute('a_attribute') == 'easy' + + b_section.subsection = a_section + + json = a_section.m_to_dict(with_out_meta=True) + + assert json == SectionA.m_from_dict(json).m_to_dict(with_out_meta=True) + + json = b_section.m_to_dict(with_out_meta=True) + assert json == SectionB.m_from_dict(json).m_to_dict(with_out_meta=True) + + json = a_section.m_def.m_to_dict(with_out_meta=True) + assert json == Section.m_from_dict(json).m_to_dict(with_out_meta=True) + + json = b_section.m_def.m_to_dict(with_out_meta=True) + assert json == Section.m_from_dict(json).m_to_dict(with_out_meta=True) diff --git a/tests/metainfo/test_quantities.py b/tests/metainfo/test_quantities.py index 78bf120d75ef15b7933b1159060ee49f05802578..71141d055c74468d8946cab8e3269f35e3b9cadd 100644 --- a/tests/metainfo/test_quantities.py +++ b/tests/metainfo/test_quantities.py @@ -31,8 +31,7 @@ from nomad.metainfo.metainfo import ( Capitalized, Bytes, URL, - _types_float, - _types_int, + MTypes ) @@ -107,11 +106,14 @@ def test_normalization_string(def_type, orig_value, normalized_value): @pytest.mark.parametrize( 'def_type, unit, shape, input, output, valid', - [pytest.param(x, None, [], 1, 1, True, id=f'0D type without unit: {x.__name__}') for x in _types_int] - + [pytest.param(x, None, [], 1.0, 1.0, True, id=f'0D type without unit: {x.__name__}') for x in _types_float] - + [pytest.param(x, 'm', [], 100 * units('cm'), 1 * units('m'), True, id=f'0D type with unit: {x.__name__}') for x in _types_int - {int}] - + [pytest.param(int, 'm', [], 100 * units('m'), 100 * units('m'), False, id="precision loss: 0D int to int with unit")] - + [pytest.param(x, 'm', [], 100.0 * units('cm'), 1.0 * units('m'), True, id=f'0D type with unit: {x.__name__}') for x in _types_float] + [pytest.param(x, None, [], 1, 1, True, id=f'0D type without unit: {x.__name__}') for x in MTypes.int] + + [pytest.param(x, None, [], 1.0, 1.0, True, id=f'0D type without unit: {x.__name__}') for x in MTypes.float] + + [pytest.param(x, 'm', [], 100 * units('cm'), 1 * units('m'), True, id=f'0D type with unit: {x.__name__}') for x in + MTypes.int - {int}] + + [pytest.param(int, 'm', [], 100 * units('m'), 100 * units('m'), False, + id="precision loss: 0D int to int with unit")] + + [pytest.param(x, 'm', [], 100.0 * units('cm'), 1.0 * units('m'), True, id=f'0D type with unit: {x.__name__}') for + x in MTypes.float] ) def test_normalization_number(def_type, unit, shape, input, output, valid): '''Numeric quantities with a unit should always return a full pint.Quantity diff --git a/tests/metainfo/test_yaml_schema.py b/tests/metainfo/test_yaml_schema.py index 8ce002fcaf74c488c140ac31ff26453771526638..078d17315305f832ff7168c1237acc95df4c9799 100644 --- a/tests/metainfo/test_yaml_schema.py +++ b/tests/metainfo/test_yaml_schema.py @@ -3,7 +3,7 @@ import pytest import yaml from nomad.datamodel.data import UserReference, AuthorReference -from nomad.metainfo.metainfo import validElnComponents, validElnTypes, primitive_type_aliases +from nomad.metainfo.metainfo import MTypes from nomad.utils import strip from nomad.metainfo import Package, MSection, Quantity, Reference, SubSection, Section, MProxy, MetainfoError @@ -228,8 +228,8 @@ def test_sub_section_tree(): assert yaml.m_to_dict() == reference.m_to_dict() -@pytest.mark.parametrize("eln_type", validElnTypes.keys()) -@pytest.mark.parametrize("eln_component", sum(validElnComponents.values(), [])) +@pytest.mark.parametrize("eln_type", MTypes.eln.keys()) +@pytest.mark.parametrize("eln_component", sum(MTypes.eln_component.values(), [])) def test_datatype_component_annotations(eln_type, eln_component): base_schema = ''' m_def: 'nomad.metainfo.metainfo.Package' @@ -251,27 +251,28 @@ def test_datatype_component_annotations(eln_type, eln_component): component: eln_component ''' - for quantity_type in validElnTypes[eln_type]: + for quantity_type in MTypes.eln[eln_type]: if eln_type == 'reference': yaml_schema = base_schema.replace("quantity_type", "'#/Sample'").replace("eln_component", eln_component) else: yaml_schema = base_schema.replace("quantity_type", quantity_type).replace("eln_component", eln_component) - if eln_component not in validElnComponents[eln_type]: + if eln_component not in MTypes.eln_component[eln_type]: with pytest.raises(Exception) as exception: package = yaml_to_package(yaml_schema) type_name = quantity_type if eln_type == 'number' or eln_type == 'datetime' or eln_type == 'enum' or eln_type == 'reference': - process = next(filter(lambda section: section['name'] == 'Process', package['section_definitions']), None) + process = next(filter(lambda section: section['name'] == 'Process', package['section_definitions']), + None) quantity = process['quantities'][0] if type(quantity.type).__name__ != 'type': type_name = type(quantity.type).__name__ - if type_name in primitive_type_aliases.keys(): - type_name = primitive_type_aliases[type_name].__name__ + if type_name in MTypes.primitive_name: + type_name = MTypes.primitive_name[type_name].__name__ package.__init_metainfo__() assert isinstance(exception.value, MetainfoError) assert exception.value.args[0] == 'One constraint was violated: The component `%s` is not compatible with the quantity `%s` of the type `%s`. Accepted components: %s (there are 0 more violations)' \ - % (eln_component, 'quantity_name', type_name, ', '.join(validElnComponents[eln_type])) + % (eln_component, 'quantity_name', type_name, ', '.join(MTypes.eln_component[eln_type])) yaml_schema_user_author = strip(''' diff --git a/tests/normalizing/test_system.py b/tests/normalizing/test_system.py index 008d5766f10750501ce19d131213a02b4a70e588..8208ed4741248186d5cec37efe9f7a18808eaa4c 100644 --- a/tests/normalizing/test_system.py +++ b/tests/normalizing/test_system.py @@ -124,6 +124,10 @@ def test_template_example_normalizer(parsed_template_example, no_warn, caplog): def assert_normalized(entry_archive: datamodel.EntryArchive): metadata = entry_archive.metadata + + if metadata.parser_name == 'parsers/nexus': + return + results = entry_archive.results metadata.apply_archvie_metadata(entry_archive) parser_name = metadata.parser_name diff --git a/tests/parsing/test_nexus.py b/tests/parsing/test_nexus.py new file mode 100644 index 0000000000000000000000000000000000000000..7d445d738240efeaace343289e78123aa9383a59 --- /dev/null +++ b/tests/parsing/test_nexus.py @@ -0,0 +1,105 @@ +"""This is a code that performs several tests on nexus tool + +""" +# +# Copyright The NOMAD Authors. +# +# This file is part of NOMAD. See https://nomad-lab.eu for further info. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from typing import Any + +import pytest + +from nomad.datamodel import EntryArchive +from nomad.metainfo import Section +from nomad.metainfo.nexus import nexus_metainfo_package +from nomad.parsing.nexus import NexusParser +from nomad.units import ureg +from nomad.utils import get_logger + + +@pytest.mark.parametrize('path,value', [ + pytest.param('name', 'nexus'), + pytest.param('NXobject.name', 'NXobject'), + pytest.param('NXentry.nx_kind', 'group'), + pytest.param('NXentry.NXdata', '*'), + pytest.param('NXdetector.real_time', '*'), + pytest.param('NXentry.NXdata.nx_optional', True), + pytest.param('NXentry.DATA.section_def.nx_kind', 'group'), + pytest.param('NXentry.DATA.section_def.nx_optional', True), + pytest.param('NXentry.DATA.section_def.name', 'NXdata'), + pytest.param('NXdetector.real_time.name', 'real_time'), + pytest.param('NXdetector.real_time.nx_type', 'NX_NUMBER'), + pytest.param('NXdetector.real_time.nx_units', 'NX_TIME'), + pytest.param('NXarpes.NXentry.NXdata.nx_optional', False), + pytest.param('NXentry.nx_category', 'base'), + pytest.param('NXapm.nx_category', 'application') +]) +def test_assert_nexus_metainfo(path: str, value: Any): + ''' + Test the existence of nexus metainfo + ''' + current = nexus_metainfo_package + for name in path.split('.'): + for content in current.m_contents(): + if getattr(content, 'name', None) == name: + current = content # type: ignore + break + + else: + current = getattr(current, name, None) + + if current is None: + assert False, f'{path} does not exist' + + if value == '*': + assert current is not None, f'{path} does not exist' + elif value is None: + assert current is None, f'{path} does exist' + else: + assert current == value, f'{path} has wrong value' + + if isinstance(current, Section): + assert current.nx_kind is not None + for base_section in current.all_base_sections: + assert base_section.nx_kind == current.nx_kind + + +def test_nexus_example(): + archive = EntryArchive() + + example_data = 'tests/data/parsers/nexus/201805_WSe2_arpes.nxs' + NexusParser().parse(example_data, archive, get_logger(__name__)) + assert archive.nexus.NXarpes.ENTRY[0].SAMPLE[0].pressure == ureg.Quantity('3.27e-10*millibar') + + instrument = archive.nexus.NXarpes.ENTRY[0].INSTRUMENT[0] + + assert instrument.monochromator.energy == ureg.Quantity('36.49699020385742*electron_volt') + assert instrument.analyser.entrance_slit_size == ureg.Quantity('750 micrometer') + # good ENUM - x-ray + assert instrument.SOURCE[0].probe == 'x-ray' + # wrong inherited ENUM - Burst + assert instrument.SOURCE[0].mode is None + # wrong inherited ENUM for extended field - 'Free Electron Laser' + assert instrument.SOURCE[0].type is None + + data = archive.nexus.NXarpes.ENTRY[0].DATA[0] + assert data.angles is not None + # assert data.delays is not None + assert data.energies is not None + assert data.angles.check("1/Å") + # assert data.delays.check("fs") + assert data.energies.check("eV") diff --git a/tests/parsing/test_parsing.py b/tests/parsing/test_parsing.py index a4004249069feac7fecd215d7e2fbc996a626413..1eb9c34a9dd0a4f4ff572c2656a7aa9d87fbebc7 100644 --- a/tests/parsing/test_parsing.py +++ b/tests/parsing/test_parsing.py @@ -76,7 +76,8 @@ parser_examples = [ ('parsers/asr', 'tests/data/parsers/asr/archive_ccdc26c4f32546c5a00ad03a093b73dc.json'), ('parsers/psi4', 'tests/data/parsers/psi4/adc1/output.ref'), ('parsers/yambo', 'tests/data/parsers/yambo/hBN/r-10b_1Ry_HF_and_locXC_gw0_em1d_ppa'), - ('parsers/archive', 'tests/data/parsers/archive.json') + ('parsers/archive', 'tests/data/parsers/archive.json'), + ('parsers/nexus', 'tests/data/parsers/nexus/201805_WSe2_arpes.nxs') ] # We need to remove some cases with external mainfiles, which might not exist @@ -87,7 +88,7 @@ for parser, mainfile in parser_examples: fixed_parser_examples.append((parser, mainfile)) parser_examples = fixed_parser_examples -correct_num_output_files = 123 +correct_num_output_files = 124 def create_reference(data, pretty):