Commit e1677782 authored by Markus Scheidgen's avatar Markus Scheidgen
Browse files

Removed legacy parsing and backend.

parent 96d7fe6b
Pipeline #102879 passed with stages
in 25 minutes and 12 seconds
[submodule "dependencies/python_common"]
path = dependencies/python_common
url = https://gitlab.mpcdf.mpg.de/nomad-lab/python-common.git
branch = nomad-fair
[submodule "dependencies/parsers/vasp"]
path = dependencies/parsers/vasp
url = https://github.com/nomad-coe/nomad-parser-vasp.git
......
Subproject commit 0bb9e8468d947dcdeb19ca519d0b99d62121865a
Subproject commit 51b0ccd3656355b1c33687d910ed9f982c9e297c
Subproject commit 1280a139435859fcf340a1393bf3771657624607
Subproject commit 80be588238cfed55b480067f272fbe15cd630906
Subproject commit 02c12ef1f72f23956febc463e69042e7902d2765
Subproject commit 09c9fbf47391814bbe95d76b7468f86eb8c361fb
Subproject commit 725217b401a130b34260f29334e353c9548cd1fa
This diff is collapsed.
......@@ -24,9 +24,7 @@ The archive API of the nomad@FAIRDI APIs. This API is about serving processed
from flask_restplus import abort, Resource
import importlib
from nomad.metainfo.legacy import python_package_mapping, LegacyMetainfoEnvironment
from nomad.metainfo import Package
from nomad.parsing.parsers import parsers
from .api import api
......@@ -44,7 +42,7 @@ class AllMetainfoResource(Resource):
'''
Returns all metainfo packages.
'''
_ = [parser for parser in parsers]
from nomad.parsing.parsers import parsers # pylint: disable=unused-import
return {
key: value.m_to_dict()
for key, value in Package.registry.items()}
......@@ -57,25 +55,16 @@ class MetainfoResource(Resource):
@api.response(200, 'Metainfo send')
def get(self, metainfo_package_name):
'''
Get a JSON representation of the NOMAD Metainfo.
You can get the metainfo for 'common', and parser/code metainfo packages.
Parser/code packages constain the necessary definitions that the respective
parser/code might use. 'Common' contains all non specific general definitions.
Other required packages might also be returned, e.g. a parser might organize its
definitions in multiple packages.
Get a JSON representation of a NOMAD Metainfo package. The package name is
the qualified Python name of the respective module that contains the definitions.
Examples are `nomad.datamodel.metainfo.common_dft` or `vaspparser.metainfo`.
If the desired package depends on other packages, these will also be contain in
the results.
'''
package = metainfo_package_name
if package.endswith('.json'):
package = package[:-5]
try:
try:
python_module = importlib.import_module(package)
except ImportError:
python_package_name, _ = python_package_mapping(package)
python_module = importlib.import_module(python_package_name)
python_module = importlib.import_module(package)
metainfo_package = getattr(python_module, 'm_package')
except (ImportError, KeyError, FileNotFoundError, AttributeError):
abort(404, message='Metainfo package %s does not exist.' % package)
......@@ -86,29 +75,3 @@ class MetainfoResource(Resource):
result[dependency.name] = dependency.m_to_dict()
return result
@ns.route('/legacy/<string:metainfo_package_name>')
class LegacyMetainfoResource(Resource):
@api.doc('get_legacy_metainfo')
@api.response(404, 'Package (e.g. code, parser, converter) does not exist')
@api.response(200, 'Metainfo send')
def get(self, metainfo_package_name):
'''
Get a JSON representation of the NOMAD Metainfo in its old legacy JSON format.
You can get the metainfo for 'common', and parser/code metainfo packages.
Parser/code packages constain the necessary definitions that the respective
parser/code might use. 'Common' contains all non specific general definitions.
Other required packages might also be returned, e.g. a parser might organize its
definitions in multiple packages.
'''
try:
metainfo = LegacyMetainfoEnvironment.from_legacy_package_path(metainfo_package_name)
except (ImportError, KeyError, FileNotFoundError, AttributeError):
abort(404, message='Metainfo package %s does not exist.' % metainfo_package_name)
if isinstance(metainfo, LegacyMetainfoEnvironment):
return metainfo.to_legacy_dict(metainfo.packages)
else:
abort(404, message='Metainfo package %s is not a legacy package.' % metainfo_package_name)
......@@ -48,8 +48,6 @@ lazy_import.lazy_module('nomad.metainfo')
lazy_import.lazy_module('nomad.atomutils')
lazy_import.lazy_module('nomad.processing')
lazy_import.lazy_module('nomad.client')
lazy_import.lazy_module('nomadcore')
lazy_import.lazy_module('nomadcore.simple_parser')
from . import dev, admin, parse, client # noqa
from .cli import run_cli, cli # noqa
......@@ -87,6 +87,11 @@ def metainfo_undecorated():
nomad.datamodel.optimade.m_package.__init_metainfo__() # pylint: disable=no-member
nomad.datamodel.encyclopedia.m_package.__init_metainfo__()
# TODO similar to before, due to lazyloading, we need to explicily access parsers
# to actually import all parsers and indirectly all metainfo packages
from nomad.parsing import parsers
parsers.parsers
export = Environment()
for package in Package.registry.values():
export.m_add_sub_section(Environment.packages, package)
......@@ -130,14 +135,12 @@ def parser_metadata():
import json
import yaml
from nomad.parsing import LegacyParser, FairdiParser
from nomad.parsing import Parser
from nomad.parsing.parsers import parser_dict
parsers_metadata = {}
for parser in parser_dict.values():
if isinstance(parser, LegacyParser):
parser_class = parser.parser_class
elif isinstance(parser, FairdiParser):
if isinstance(parser, Parser):
parser_class = parser.__class__
else:
continue
......
......@@ -24,8 +24,6 @@ import sys
from nomad import utils, parsing, normalizing, datamodel
import nomadcore
from .cli import cli
......@@ -113,11 +111,7 @@ def normalize_all(entry_archive, logger=None):
@click.option('--skip-normalizers', is_flag=True, default=False, help='Do not run the normalizer.')
@click.option('--not-strict', is_flag=True, help='Do also match artificial parsers.')
@click.option('--parser', help='Skip matching and use the provided parser')
@click.option('--annotate', is_flag=True, help='Sub-matcher based parsers will create a .annotate file.')
def _parse(
mainfile, show_archive, show_metadata, skip_normalizers, not_strict, parser,
annotate):
nomadcore.simple_parser.annotate = annotate
def _parse(mainfile, show_archive, show_metadata, skip_normalizers, not_strict, parser):
kwargs = dict(strict=not not_strict, parser_name=parser)
entry_archive = parse(mainfile, **kwargs)
......
......@@ -18,8 +18,7 @@
import sys
from nomad.metainfo import Environment
from nomad.metainfo.legacy import LegacyMetainfoEnvironment
import nomad.datamodel.metainfo.common_dft
m_env = LegacyMetainfoEnvironment()
m_env = Environment()
m_env.m_add_sub_section(Environment.packages, sys.modules['nomad.datamodel.metainfo.common_dft'].m_package) # type: ignore
......@@ -9,7 +9,7 @@ from nomad.metainfo.search_extension import Search
m_package = Package(
name='nomad.datamodel.metainfo.public_old',
name='nomad.datamodel.metainfo.public',
description='None',
a_legacy=LegacyDefinition(name='public.nomadmetainfo.json'))
......
This diff is collapsed.
This diff is collapsed.
......@@ -24,7 +24,7 @@ definitions.
import numpy as np
from nomad import utils
from nomad.metainfo import Definition, SubSection, Package, Quantity, Section, Reference, MEnum
from nomad.metainfo import Definition, Package, Reference, MEnum
logger = utils.get_logger(__name__)
......@@ -120,7 +120,7 @@ def generate_metainfo_code(metainfo_pkg: Package, python_package_path: str):
format_aliases=format_aliases)
with open(python_package_path, 'wt') as f:
code = env.get_template('package_new.j2').render(pkg=metainfo_pkg)
code = env.get_template('package.j2').render(pkg=metainfo_pkg)
code = '\n'.join([
line.rstrip() if line.strip() != '' else ''
for line in code.split('\n')])
......@@ -128,51 +128,7 @@ def generate_metainfo_code(metainfo_pkg: Package, python_package_path: str):
if __name__ == '__main__':
# Simple use case that merges old common/public defs
import json
from nomad.metainfo import Category
from nomad.datamodel.metainfo.public_old import m_package
from nomad.datamodel.metainfo.common_old import m_package as common_pkg
for section in common_pkg.section_definitions: # pylint: disable=not-an-iterable
if section.extends_base_section:
base_section = section.base_sections[0]
for name, attr in section.section_cls.__dict__.items():
if isinstance(attr, Quantity):
base_section.m_add_sub_section(Section.quantities, attr.m_copy(deep=True))
elif isinstance(attr, SubSection):
base_section.m_add_sub_section(Section.sub_sections, attr.m_copy(deep=True))
else:
m_package.m_add_sub_section(Package.section_definitions, section)
for category in common_pkg.category_definitions: # pylint: disable=not-an-iterable
m_package.m_add_sub_section(Package.category_definitions, category)
for definition in m_package.section_definitions + m_package.category_definitions:
old_name = definition.name
new_name = ''.join([item[0].title() + item[1:] for item in old_name.split('_')])
if new_name.startswith('Section'):
new_name = new_name[7:]
if new_name != old_name:
definition.aliases = [old_name]
definition.name = new_name
unused_category = m_package.m_create(Category)
unused_category.name = 'Unused'
unused_category.description = 'This metainfo definition is not used by NOMAD data.'
with open('local/metainfostats.json', 'rt') as f:
stats = json.load(f)
unused = []
for (definition, _, _) in m_package.m_traverse():
if isinstance(definition, (SubSection, Quantity)):
if definition.name not in stats:
unused.append(definition)
for definition in unused:
if unused_category not in definition.categories:
definition.categories += [unused_category]
# Simple use case that re-generates the common_dft package
from nomad.datamodel.metainfo.common_dft import m_package
generate_metainfo_code(m_package, 'nomad/datamodel/metainfo/common_dft.py')
......@@ -22,25 +22,7 @@ new nomad@fairdi infrastructure. This covers aspects like the new metainfo, a un
wrapper for parsers, parser logging, and a parser backend.
'''
from typing import cast, Dict, List, Any, Tuple, Type
import numpy as np
import os.path
import importlib
from nomadcore.local_meta_info import InfoKindEl, InfoKindEnv
from nomad import utils
from nomad.metainfo import (
Definition, SubSection, Package, Quantity, Category, Section, Reference,
Environment, MEnum, MSection, DefinitionAnnotation, MetainfoError, MSectionBound)
logger = utils.get_logger(__name__)
_ignored_packages = [
'meta_types.nomadmetainfo.json',
'repository.nomadmetainfo.json']
from nomad.metainfo import DefinitionAnnotation, Environment
class LegacyDefinition(DefinitionAnnotation):
......@@ -49,229 +31,4 @@ class LegacyDefinition(DefinitionAnnotation):
self.name = name
def def_name(definition):
try:
return definition.a_legacy.name
except AttributeError:
return definition.name
def normalize_name(name: str):
return name.replace('.', '_').replace('-', '_')
def normalized_package_name(name: str):
'''
Transforms legacy metainfo '.nomadmetainfo.json' filenames into proper (python)
identifier.
'''
name = name.replace('.nomadmetainfo.json', '')
return normalize_name(name)
def python_package_mapping(metainfo_package_name: str) -> Tuple[str, str]:
'''
Compute the python package for the given metainfo package name. It returns
a tuple containing a package name and a file path. The filepath denotes the file
for this package within the nomad git project.
'''
prefix = metainfo_package_name.replace('.nomadmetainfo.json', '').split('.')[0]
metainfo_package_name = normalized_package_name(metainfo_package_name)
if prefix in ['common', 'general', 'public', 'dft', 'ems']:
directory = 'nomad/datamodel/metainfo'
python_package_name = 'nomad.datamodel.metainfo.%s' % metainfo_package_name
else:
parser_dir = prefix.replace('_', '-')
prefix = prefix.replace('_', '')
directory = 'dependencies/parsers/%s/%sparser/metainfo' % (parser_dir, prefix)
python_package_name = '%sparser.metainfo.%s' % (prefix, metainfo_package_name)
path = '%s/%s.py' % (directory, metainfo_package_name)
return python_package_name, path
class LegacyMetainfoEnvironment(Environment):
'''
A metainfo environment with functions to create a legacy metainfo version of
the environment.
'''
@staticmethod
def from_legacy_package_path(path):
metainfo_package_name = os.path.basename(path)
package = metainfo_package_name
if package.endswith('.nomadmetainfo.json'):
package = package[:-19]
if package.endswith('.json'):
package = package[:-5]
python_package_name, _ = python_package_mapping(package)
python_package_name = '.'.join(python_package_name.split('.')[:-1])
python_module = importlib.import_module(python_package_name)
metainfo = getattr(python_module, 'm_env')
return metainfo
legacy_package_name = Quantity(type=str)
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.__section_to_sub_section_name = None
self.__legacy_names = None
def from_legacy_name(self, name: str, section_cls: Type[MSectionBound]) -> MSectionBound:
''' Returns the definition of the given globally unique legacy metainfo name. '''
if self.__legacy_names is None:
self.__legacy_names = dict()
for definition in self.m_all_contents():
try:
if isinstance(definition, Section):
if definition.extends_base_section:
continue
legacy = definition.a_legacy
key = (legacy.name, definition.m_def.section_cls)
if key in self.__legacy_names:
raise MetainfoError('Legacy name %s is not globally unique' % legacy.name)
self.__legacy_names[key] = definition
except AttributeError:
pass
return self.__legacy_names.get((name, section_cls))
@property
def section_to_sub_section_name(self) -> Dict[str, str]:
if self.__section_to_sub_section_name is not None:
return self.__section_to_sub_section_name
self.__section_to_sub_section_name = dict()
for definition in self.m_all_contents():
if definition.m_def == SubSection.m_def:
self.__section_to_sub_section_name[definition.sub_section.name] = definition.name
return self.__section_to_sub_section_name
def legacy_info(self, definition: Definition, *args, **kwargs) -> InfoKindEl:
''' Creates a legacy metainfo object for the given definition. '''
super_names: List[str] = list()
result: Dict[str, Any] = dict(
name=def_name(definition),
description=definition.description,
superNames=super_names)
for category in definition.categories:
super_names.append(def_name(category))
if isinstance(definition, Section):
sub_section_name = self.section_to_sub_section_name.get(definition.name, definition.name)
result['kindStr'] = 'type_section'
result['repeats'] = any(
sub_section.repeats
for sub_section in self.resolve_definitions(sub_section_name, SubSection))
for sub_section in self.resolve_definitions(sub_section_name, SubSection):
super_names.append(def_name(sub_section.m_parent_as(Definition)))
elif isinstance(definition, Quantity):
result['kindStr'] = 'type_document_content'
result['shape'] = definition.shape
dtype_str = None
if definition.type == int:
dtype_str = 'i'
elif definition.type == float:
dtype_str = 'f'
elif definition.type == bool:
dtype_str = 'b'
elif definition.type == str:
dtype_str = 'C'
elif isinstance(definition.type, Reference):
dtype_str = 'r'
result['referencedSections'] = [
def_name(definition.type.target_section_def.m_resolved())]
elif isinstance(definition.type, MEnum):
dtype_str = 'C'
elif isinstance(definition.type, np.dtype):
dtype_str = definition.type.name[0]
elif definition.type == Any:
dtype_str = 'D'
else:
dtype_str = str(definition.type)
# raise TypeError(
# 'Unsupported quantity type %s in %s.' % (definition.type, definition))
result['dtypeStr'] = dtype_str
if definition.unit is not None:
result['units'] = str(definition.unit)
super_names.append(def_name(definition.m_parent_as(Definition)))
elif isinstance(definition, Category):
result['kindStr'] = 'type_abstract_document_content'
package = cast(MSection, definition)
while not isinstance(package, Package):
package = package.m_parent
result['package'] = package.name
return InfoKindEl(*args, **result, **kwargs)
def legacy_info_env(self, packages: List[Package] = None, *args, **kwargs) -> InfoKindEnv:
''' Creates a legacy metainfo environment with all definitions from the given packages. '''
if packages is None:
packages = self.packages
env = InfoKindEnv(*args, **kwargs)
for package in packages:
for definition in package.all_definitions.values():
if not (isinstance(definition, Section) and definition.extends_base_section):
env.addInfoKindEl(self.legacy_info(definition))
if isinstance(definition, Section):
for quantity in definition.quantities:
env.addInfoKindEl(self.legacy_info(quantity))
return env
def to_legacy_dict(
self, packages: List[Package] = None, description: str = None,
*args, **kwargs) -> Dict[str, Any]:
'''
Creates a dictionary that can be serialized to a legacy metainfo definition file
(*.nomadmetainfo.json).
Arguments:
package: Will add all definitions of these packages as actual definitions,
all other packages will be added by import.
description: The description for the legacy file. If None the description of
the firs package will be used.
'''
if packages is None:
packages = []
definitions = []
dependencies = []
for package in self.packages:
if package in packages:
if description is None:
description = package.description
for definition in package.all_definitions.values():
if not (isinstance(definition, Section) and definition.extends_base_section):
definitions.append(self.legacy_info(definition).toDict())
if isinstance(definition, Section):
for quantity in definition.quantities:
definitions.append(self.legacy_info(quantity).toDict())
else:
dependencies.append(package)
return {
'type': 'nomad_meta_info_1_0',
'description': description,
'dependencies': [
{'relativePath': def_name(dependency)}
for dependency in dependencies],
'metaInfos': definitions
}
LegacyMetainfoEnvironment = Environment
import sys
from nomad.metainfo import Environment
from nomad.metainfo.legacy import LegacyMetainfoEnvironment
{%- for package in env.packages %}
import {{ package.a_legacy.python_module }}
{%- endfor %}
m_env = LegacyMetainfoEnvironment()
{%- for package in env.packages %}
m_env.m_add_sub_section(Environment.packages, sys.modules['{{ package.a_legacy.python_module }}'].m_package) # type: ignore
{%- endfor %}
......@@ -2,19 +2,19 @@ import numpy as np # pylint: disable=unused-import
import typing # pylint: disable=unused-import
from nomad.metainfo import ( # pylint: disable=unused-import
MSection, MCategory, Category, Package, Quantity, Section, SubSection, SectionProxy,
Reference
)
Reference, MEnum)
from nomad.metainfo.legacy import LegacyDefinition
from nomad.metainfo.search_extension import Search
{% for dependency in pkg.dependencies %}
{{ fromat_package_import(dependency) }}
{%- endfor %}
m_package = Package(
name='{{ pkg.name }}',
description='{{ pkg.description }}',
{% if pkg.a_legacy is defined -%}
description='{{ pkg.description }}'
{%- if pkg.a_legacy is defined %},
a_legacy=LegacyDefinition(name='{{pkg.a_legacy.name}}')
{% endif -%})
{%- endif %})
{% for category in order_categories(pkg.category_definitions) %}
class {{ category.name }}(MCategory):
......@@ -25,12 +25,15 @@ class {{ category.name }}(MCategory):
{%- endif %}
m_def = Category(
{% if category.categories | length > 0 -%}
{%- if category.aliases | length > 0 %}
aliases=['{{ category.aliases[0] }}'],
{%- endif -%}
{%- if category.categories | length > 0 %}
categories=[{{ format_definition_refs(pkg, category.categories) }}],
{% endif -%}
{% if category.a_legacy is defined -%}
{%- endif -%}
{%- if category.a_legacy is defined %}
a_legacy=LegacyDefinition(name='{{category.a_legacy.name}}')
{% endif -%})
{%- endif %})
{% endfor -%}
{% for section in pkg.section_definitions %}
......@@ -42,11 +45,14 @@ class {{ section.name }}({%- if section.extends_base_section -%}{{ format_defini
'''
{% endif %}
m_def = Section(
{%- if section.aliases | length > 0 %}
aliases=['{{ section.aliases[0] }}'],
{%- endif %}
validate=False{%- if section.extends_base_section -%},
extends_base_section=True{%- endif -%},
{% if section.a_legacy is defined -%}
a_legacy=LegacyDefinition(name='{{section.a_legacy.name}}')
{% endif -%})
{%- endif -%})
{% for quantity in section.quantities %}
{{ quantity.name }} = Quantity(
type={{ format_type(pkg, quantity.type) }},
......@@ -61,9 +67,12 @@ class {{ section.name }}({%- if section.extends_base_section -%}{{ format_defini
{%- if quantity.categories | length > 0 -%},
categories=[{{ format_definition_refs(pkg, quantity.categories) }}]
{%- endif -%},
{% if quantity.a_legacy is defined -%}
{%- if quantity.a_search is defined %}
a_search=Search(),
{%- endif -%}
{%- if quantity.a_legacy is defined %}
a_legacy=LegacyDefinition(name='{{quantity.a_legacy.name}}')