Commit 59b66f16 authored by Markus Scheidgen's avatar Markus Scheidgen
Browse files

Allow to generate and use metainfo 2 python modules.

parent b9d81a09
Pipeline #70865 failed with stages
in 34 minutes and 57 seconds
......@@ -42,3 +42,12 @@ def qa(skip_tests: bool, exitfirst: bool):
ret_code += os.system('python -m mypy --ignore-missing-imports --follow-imports=silent --no-strict-optional nomad tests')
sys.exit(ret_code)
@dev.command(help='Generates source-code for the new metainfo from .json files of the old.')
@click.argument('package', nargs=1)
def legacy_metainfo(package):
from nomad.metainfo.legacy import convert, generate_metainfo_code
env = convert(package)
generate_metainfo_code(env)
......@@ -119,6 +119,11 @@ def _parse(
if metainfo:
def backend_factory(env, logger):
# from vaspparser.metainfo import m_env
# from nomad.metainfo import Section
# m_env.resolve_definition('section_basis_set_atom_centered', Section)
# return MetainfoBackend(m_env, logger=logger)
return MetainfoBackend(convert(env), logger=logger)
kwargs.update(backend_factory=backend_factory)
......
import sys
from nomad.metainfo import Environment
from nomad.metainfo.legacy import LegacyMetainfoEnvironment
import nomad.datamodel.metainfo.common
import nomad.datamodel.metainfo.public
import nomad.datamodel.metainfo.general
m_env = LegacyMetainfoEnvironment()
m_env.m_add_sub_section(Environment.packages, sys.modules['nomad.datamodel.metainfo.common'].m_package) # type: ignore
m_env.m_add_sub_section(Environment.packages, sys.modules['nomad.datamodel.metainfo.public'].m_package) # type: ignore
m_env.m_add_sub_section(Environment.packages, sys.modules['nomad.datamodel.metainfo.general'].m_package) # type: ignore
This diff is collapsed.
import numpy as np # pylint: disable=unused-import
import typing # pylint: disable=unused-import
from nomad.metainfo import ( # pylint: disable=unused-import
MSection, MCategory, Category, Package, Quantity, Section, SubSection, SectionProxy,
Reference
)
m_package = Package(name='general', description='None')
class section_entry_info(MSection):
'''
General information about this entry that is independent from its domain, field, or
used parser
'''
m_def = Section(validate=False)
entry_upload_time = Quantity(
type=np.dtype(np.int64),
shape=[],
description='''
Upload datetime, given as total number of seconds is the elapsed since the unix
epoch (1 January 1970)
''')
entry_uploader_name = Quantity(
type=str,
shape=[],
description='''
Name of the uploader, given as lastname, firstname.
''')
entry_uploader_id = Quantity(
type=str,
shape=[],
description='''
The id of the uploader.
''')
upload_id = Quantity(
type=str,
shape=[],
description='''
Nomad upload id
''')
calc_id = Quantity(
type=str,
shape=[],
description='''
Nomad calc id.
''')
calc_hash = Quantity(
type=str,
shape=[],
description='''
Calculation hash based on raw file contents.
''')
mainfile = Quantity(
type=str,
shape=[],
description='''
Path to the main file within the upload.
''')
parser_name = Quantity(
type=str,
shape=[],
description='''
Name of the parser used to extract this information.
''')
filepaths = Quantity(
type=str,
shape=['number_of_files'],
description='''
Filepaths of files that belong to this entry, i.e. files in the same directory.
Filepaths are relative to the upload.
''')
number_of_files = Quantity(
type=int,
shape=[],
description='''
Number of files that belong to this entry.
''')
section_archive_processing_info = SubSection(
sub_section=SectionProxy('section_archive_processing_info'),
repeats=True)
class section_archive_processing_info(MSection):
'''
Information about the used archive processing steps and their execution.
'''
m_def = Section(validate=False)
archive_processor_name = Quantity(
type=str,
shape=[],
description='''
Name of the applied archive processing program.
''')
archive_processor_error = Quantity(
type=str,
shape=[],
description='''
The main error during execution of the archive processing program that failed the
program.
''')
number_of_archive_processor_warnings = Quantity(
type=int,
shape=[],
description='''
Number of warnings during execution of the archive processing program.
''')
archive_processor_warnings = Quantity(
type=str,
shape=['number_of_archive_processor_warnings'],
description='''
Warnings during execution of the archive processing program.
''')
archive_processor_status = Quantity(
type=str,
shape=[],
description='''
Status returned by archive processing program.
''')
m_package.__init_metainfo__()
This diff is collapsed.
......@@ -167,7 +167,7 @@ class ElasticDocument(SectionAnnotation):
annotation.mapping = Date(**kwargs)
elif isinstance(quantity.type, Reference):
inner_document = ElasticDocument.create_document(
quantity.type.target_section_def, inner_doc=True,
cast(Section, quantity.type.target_section_def), inner_doc=True,
prefix=annotation.field)
annotation.mapping = Object(inner_document)
elif isinstance(quantity.type, MEnum):
......
from typing import cast, Dict, List, Union, Any, Set, Iterable
from typing import cast, Dict, List, Union, Any, Set, Iterable, Tuple
import numpy as np
from pint.errors import UndefinedUnitError
import os.path
......@@ -12,7 +12,7 @@ import nomad_meta_info
from nomad import utils
from nomad.metainfo import (
Definition, SubSection, Package, Quantity, Category, Section, Reference, units,
Environment, MEnum)
Environment, MEnum, MProxy)
logger = utils.get_logger(__name__)
......@@ -23,11 +23,37 @@ _ignored_packages = [
'repository.nomadmetainfo.json']
def python_package_mapping(metainfo_package_name: str) -> Tuple[str, str]:
'''
Compute the python package for the given metainfo package name. It returns
a tuple containing a file path and a package name. The filepath denotes the file
for this package within the nomad git project.
'''
split_mi_package_name = metainfo_package_name.split('_')
prefix = split_mi_package_name[0]
if prefix in ['common', 'general', 'public']:
directory = 'nomad/datamodel/metainfo'
python_package_name = 'nomad.datamodel.metainfo.%s' % metainfo_package_name
else:
directory = 'dependencies/parsers/%s/%sparser/metainfo' % (prefix, prefix)
python_package_name = '%sparser.metainfo.%s' % (prefix, metainfo_package_name)
path = '%s/%s.py' % (directory, metainfo_package_name)
return python_package_name, path
class LegacyMetainfoEnvironment(Environment):
'''
A metainfo environment with functions to create a legacy metainfo version of
the environment.
'''
legacy_package_name = Quantity(type=str)
def legacy_info(self, definition: Definition, *args, **kwargs) -> InfoKindEl:
''' Creates a legacy metainfo objects for the given definition. '''
super_names: List[str] = list()
......@@ -62,6 +88,11 @@ class LegacyMetainfoEnvironment(Environment):
dtype_str = 'C'
elif isinstance(definition.type, Reference):
dtype_str = 'r'
if isinstance(definition.type.target_section_def, MProxy):
proxy = definition.type.target_section_def
proxy.m_proxy_section = definition
proxy.m_proxy_quantity = Quantity.type
definition.type.target_section_def = proxy.m_proxy_resolve()
result['referencedSections'] = [definition.type.target_section_def.name]
elif isinstance(definition.type, MEnum):
dtype_str = 'C'
......@@ -136,6 +167,7 @@ class EnvironmentConversion:
def create_env(self) -> LegacyMetainfoEnvironment:
env = LegacyMetainfoEnvironment()
env.legacy_package_name = self.legacy_env.name.replace('.nomadmetainfo.json', '').replace('.', '_')
for package_conv in self.package_conversions.values():
package = package_conv.package
errors, warnings = package.m_all_validate()
......@@ -149,7 +181,6 @@ class EnvironmentConversion:
(warnings[0], len(warnings) - 1, package))
env.m_add_sub_section(Environment.packages, package)
package.init_metainfo()
return env
def __fix_legacy_super_names(self):
......@@ -206,7 +237,9 @@ class PackageConversion:
self.env_conversion = env_conversion
self.legacy_defs: List[InfoKindEl] = []
self.package = Package(name=name)
python_package, python_path = python_package_mapping(name)
self.package = Package(name=name, a_python=(python_package, python_path))
self.quantities: Dict[str, Quantity] = {}
self.logger = logger.bind(package=name)
......@@ -356,20 +389,18 @@ def convert(metainfo_path: str) -> LegacyMetainfoEnvironment:
return EnvironmentConversion(metainfo_path).create_env()
def generate_metainfo_code(metainfo_env: Environment, directory: str = None):
def generate_metainfo_code(metainfo_env: LegacyMetainfoEnvironment):
'''
Generates python code with metainfo definitions for all packages in the given
environement
Arguments:
env: The metainfo environment.
directory: An optional directory path. The directory must exist. Default
is the working directory.
python_package_path: An optional directory path. The directory must exist. Default
is the working directory. The path will be used to form the module prefix
for generated Python modules.
'''
if directory is None:
directory = '.'
def format_description(description, indent=0, width=90):
paragraphs = [paragraph.strip() for paragraph in description.split('\n')]
......@@ -383,13 +414,19 @@ def generate_metainfo_code(metainfo_env: Environment, directory: str = None):
format_paragraph(p, i == 0)
for i, p in enumerate(paragraphs) if p != ''])
def format_type(mi_type):
def format_type(pkg, mi_type):
if type(mi_type) == np.dtype:
return 'np.dtype(np.%s)' % mi_type
if mi_type in [int, float, str, bool]:
return mi_type.__name__
if isinstance(mi_type, Reference):
return "SectionProxy('%s')" % mi_type.target_section_def.name
if pkg == mi_type.target_section_def.m_parent:
return "Reference(SectionProxy('%s'))" % mi_type.target_section_def.name
else:
python_pkg, _ = mi_type.target_section_def.m_parent.a_python
return '%s.%s' % (python_pkg.split('.')[-1], mi_type.target_section_def.name)
else:
return str(mi_type)
......@@ -401,10 +438,16 @@ def generate_metainfo_code(metainfo_env: Environment, directory: str = None):
if pkg == definition.m_parent:
return definition.name
else:
return definition.qualified_name()
python_pkg, _ = definition.m_parent.a_python
return '%s.%s' % (python_pkg.split('.')[-1], definition.name)
return ', '.join([format_definition_ref(definition) for definition in definitions])
def fromat_package_import(pkg):
python_package, _ = pkg.a_python
packages = python_package.split('.')
return 'from %s import %s' % ('.'.join(packages[:-1]), packages[-1])
env = JinjaEnvironment(
loader=PackageLoader('nomad.metainfo', 'templates'),
autoescape=select_autoescape(['python']))
......@@ -412,31 +455,26 @@ def generate_metainfo_code(metainfo_env: Environment, directory: str = None):
format_description=format_description,
format_type=format_type,
format_unit=format_unit,
format_definition_refs=format_definition_refs)
format_definition_refs=format_definition_refs,
fromat_package_import=fromat_package_import)
for package in metainfo_env.packages:
file_name = package.name
with open(os.path.join(directory, '%s.py' % file_name), 'wt') as f:
_, path = package.a_python
if not os.path.exists(os.path.dirname(path)):
os.makedirs(os.path.dirname(path))
with open(path, 'wt') as f:
code = env.get_template('package.j2').render(pkg=package)
code = '\n'.join([
line.rstrip() if line.strip() != '' else ''
for line in code.split('\n')])
f.write(code)
_, path = python_package_mapping(metainfo_env.legacy_package_name)
with open(os.path.join(os.path.dirname(path), '__init__.py'), 'wt') as f:
# if __name__ == '__main__':
# output = 'output'
# env = convert('vasp.nomadmetainfo.json')
# assert env.resolve_definition('x_vasp_incar_EFIELD_PEAD', Quantity) is not None
# assert 'x_vasp_incar_EFIELD_PEAD' in env.legacy_info_env()
# generate_metainfo_code(env, output)
# from output import public
# import json
# run = public.section_run()
# system = run.m_create(public.section_system)
# system.atom_labels = ['H', 'H', 'O']
# print(json.dumps(run.m_to_dict(with_meta=True), indent=2))
code = env.get_template('environment.j2').render(env=metainfo_env)
code = '\n'.join([
line.rstrip() if line.strip() != '' else ''
for line in code.split('\n')])
f.write(code)
......@@ -95,15 +95,17 @@ class MProxy():
url: The reference represented as an URL string.
'''
def __init__(self, m_proxy_url: str, m_proxy_section: 'MSection', m_proxy_quantity: 'Quantity'):
def __init__(
self, m_proxy_url: str, m_proxy_section: 'MSection' = None,
m_proxy_quantity: 'Quantity' = None):
self.m_proxy_url = m_proxy_url
self.m_proxy_section = m_proxy_section
self.m_proxy_resolved = None
self.m_reference_type = m_proxy_quantity.type
self.m_proxy_quantity = m_proxy_quantity
def m_proxy_resolve(self):
if self.m_proxy_section and not self.m_proxy_resolved:
self.m_proxy_resolved = self.m_reference_type.resolve(self)
if self.m_proxy_section and self.m_proxy_quantity and not self.m_proxy_resolved:
self.m_proxy_resolved = self.m_proxy_quantity.type.resolve(self)
if self.m_proxy_resolved is not None and isinstance(self, MProxy):
setattr(self, '__class__', self.m_proxy_resolved.__class__)
......@@ -112,12 +114,28 @@ class MProxy():
return self.m_proxy_resolved
def __getattr__(self, key):
if self.m_proxy_resolve():
if self.m_proxy_resolve() is not None:
return getattr(self.m_proxy_resolved, key)
raise ReferenceError('could not resolve %s' % self.m_proxy_url)
class SectionProxy(MProxy):
def m_proxy_resolve(self):
if self.m_proxy_section and not self.m_proxy_resolved:
root = self.m_proxy_section
while root is not None and not isinstance(root, Package):
root = root.m_parent
if isinstance(root, Package):
self.m_proxy_resolved = root.all_definitions.get(self.m_proxy_url)
if self.m_proxy_resolved is None:
raise ReferenceError('could not resolve %s' % self.m_proxy_url)
return self.m_proxy_resolved
class DataType:
'''
Allows to define custom data types that can be used in the meta-info.
......@@ -298,9 +316,7 @@ class _QuantityType(DataType):
class Reference(DataType):
''' Datatype used for reference quantities. '''
def __init__(self, section_def: 'Section'):
if not isinstance(section_def, Section):
raise MetainfoError('%s is not a section definition.' % section_def)
def __init__(self, section_def: Union['Section', 'SectionProxy']):
self.target_section_def = section_def
def resolve(self, proxy) -> 'MSection':
......@@ -311,6 +327,12 @@ class Reference(DataType):
return proxy.m_proxy_section.m_resolve(proxy.m_proxy_url)
def set_normalize(self, section: 'MSection', quantity_def: 'Quantity', value: Any) -> Any:
if isinstance(self.target_section_def, MProxy):
proxy = self.target_section_def
proxy.m_proxy_section = section
proxy.m_proxy_quantity = quantity_def
self.target_section_def = proxy.m_proxy_resolve()
if self.target_section_def.m_follows(Definition.m_def):
# special case used in metainfo definitions, where we reference metainfo definitions
# using their Python class. E.g. referencing a section definition using its
......@@ -325,13 +347,14 @@ class Reference(DataType):
if isinstance(value, MProxy):
value.m_proxy_section = section
value.m_proxy_quantity = quantity_def
return value
if not isinstance(value, MSection):
raise TypeError(
'The value %s is not a section and can not be used as a reference.' % value)
if not value.m_follows(self.target_section_def):
if not value.m_follows(self.target_section_def): # type: ignore
raise TypeError(
'%s is not a %s and therefore an invalid value of %s.' %
(value, self.target_section_def, quantity_def))
......@@ -2398,13 +2421,3 @@ class Environment(MSection):
raise KeyError('Could not uniquely identify %s' % name)
else:
raise KeyError('Could not resolve %s' % name)
class SectionProxy(MProxy):
def m_proxy_resolve(self):
if self.m_proxy_section and not self.m_proxy_resolved:
root = self.m_proxy_section.m_root()
if isinstance(root, Package):
self.m_proxy_resolved = root.all_definitions.get(self.m_proxy_url)
super().m_proxy_resolve()
import sys
from nomad.metainfo import Environment
from nomad.metainfo.legacy import LegacyMetainfoEnvironment
{%- for package in env.packages %}
import {{ package.a_python[0] }}
{%- endfor %}
m_env = LegacyMetainfoEnvironment()
{%- for package in env.packages %}
m_env.m_add_sub_section(Environment.packages, sys.modules['{{ package.a_python[0] }}'].m_package) # type: ignore
{%- endfor %}
import numpy as np # pylint: disable=unused-import
import typing # pylint: disable=unused-import
from nomad.metainfo import ( # pylint: disable=unused-import
MSection, MCategory, Category, Package, Quantity, Section, SubSection, SectionProxy
MSection, MCategory, Category, Package, Quantity, Section, SubSection, SectionProxy,
Reference
)
{% for dependency in pkg.dependencies %}
from . import {{dependency.name}}
{{ fromat_package_import(dependency) }}
{%- endfor %}
m_package = Package(name='{{ pkg.name }}', description='{{ pkg.description }}')
......@@ -33,7 +34,7 @@ class {{ section.name }}({%- if section.extends_base_section -%}{{ format_defini
m_def = Section(validate=False{%- if section.extends_base_section -%}, extends_base_section=True{%- endif -%})
{% for quantity in section.quantities %}
{{ quantity.name }} = Quantity(
type={{ format_type(quantity.type) }},
type={{ format_type(pkg, quantity.type) }},
shape={{ quantity.shape }}
{%- if quantity.unit is not none -%},
unit={{ format_unit(quantity.unit) }}
......
......@@ -318,4 +318,4 @@ class MetainfoBackend(LegacyParserBackend):
pass
def pwarn(self, msg):
pass
self.logger.warning(msg)
......@@ -407,6 +407,10 @@ def timer(logger, event, method='info', **kwargs):
finally:
stop = time.time()
if logger is None:
print(event, stop - start)
return
logger_method = getattr(logger, 'info', None)
if logger_method is not None:
logger_method(event, exec_time=stop - start, **kwargs)
......
......@@ -21,7 +21,7 @@ from nomad.metainfo.metainfo import (
MSection, MCategory, Section, Quantity, SubSection, Definition, Package, DeriveError,
MetainfoError, Environment, MResource, Datetime, units, Annotation, SectionAnnotation,
DefinitionAnnotation, Reference, MProxy, derived)
from nomad.metainfo.legacy import LegacyMetainfoEnvironment, convert
from nomad.metainfo.legacy import LegacyMetainfoEnvironment, convert, python_package_mapping
from nomad.parsing.metainfo import MetainfoBackend
......@@ -60,6 +60,7 @@ def legacy_example():
@pytest.fixture(scope='session')
def legacy_env(legacy_example):
env = InfoKindEnv()
env.name = 'test.nomadmetainfo.json'
for definition in legacy_example.get('metaInfos'):
env.addInfoKindEl(InfoKindEl(
description='test_description', package='test_package', **definition))
......@@ -71,6 +72,24 @@ def env(legacy_env):
return convert(legacy_env)
@pytest.mark.parametrize('package,path,name', [
(
'vasp',
'dependencies/parsers/vasp/vaspparser/metainfo/vasp.py',
'vaspparser.metainfo.vasp'),
(
'common',
'nomad/datamodel/metainfo/common.py',
'nomad.datamodel.metainfo.common'),
(
'vasp_incars',
'dependencies/parsers/vasp/vaspparser/metainfo/vasp_incars.py',
'vaspparser.metainfo.vasp_incars')
])
def test_package_mapping(package, path, name):
assert python_package_mapping(package) == (name, path)
def test_environment(env: LegacyMetainfoEnvironment, no_warn):
assert env.packages[0].name == 'test_package'
assert 'section_system' in env.packages[0].all_definitions
......
......@@ -111,6 +111,25 @@ def generate_calc(pid: int = 0, calc_id: str = None, upload_id: str = None) -> d
return entry
def test_common_metainfo():
from nomad.datamodel.metainfo import public
run = public.section_run()
system = run.m_create(public.section_system)
system.atom_labels = ['H', 'H', 'O']
assert run.section_system[0].atom_labels == ['H', 'H', 'O']