legacy.py 10.3 KB
Newer Older
Markus Scheidgen's avatar
Markus Scheidgen committed
1
2
3
4
#
# Copyright The NOMAD Authors.
#
# This file is part of NOMAD. See https://nomad-lab.eu for further info.
5
6
7
8
9
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
Markus Scheidgen's avatar
Markus Scheidgen committed
10
#     http://www.apache.org/licenses/LICENSE-2.0
11
12
#
# Unless required by applicable law or agreed to in writing, software
Markus Scheidgen's avatar
Markus Scheidgen committed
13
# distributed under the License is distributed on an "AS IS" BASIS,
14
15
16
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
Markus Scheidgen's avatar
Markus Scheidgen committed
17
#
18
19
20
21
22
23

'''
This module contains functionality to use old 'legacy' NOMAD CoE parsers with the
new nomad@fairdi infrastructure. This covers aspects like the new metainfo, a unifying
wrapper for parsers, parser logging, and a parser backend.
'''
24

25
from typing import cast, Dict, List, Any, Tuple, Type
26
import numpy as np
27
import os.path
28
import importlib
29

30

31
from nomadcore.local_meta_info import InfoKindEl, InfoKindEnv
32
33

from nomad import utils
34
from nomad.metainfo import (
35
    Definition, SubSection, Package, Quantity, Category, Section, Reference,
36
    Environment, MEnum, MSection, DefinitionAnnotation, MetainfoError, MSectionBound)
37

38
logger = utils.get_logger(__name__)
39
40


41
42
43
_ignored_packages = [
    'meta_types.nomadmetainfo.json',
    'repository.nomadmetainfo.json']
44
45


46
47
48
49
50
51
class LegacyDefinition(DefinitionAnnotation):

    def __init__(self, name: str):
        self.name = name


52
53
54
55
56
57
58
def def_name(definition):
    try:
        return definition.a_legacy.name
    except AttributeError:
        return definition.name


59
60
61
62
63
64
65
66
67
68
69
70
71
def normalize_name(name: str):
    return name.replace('.', '_').replace('-', '_')


def normalized_package_name(name: str):
    '''
    Transforms legacy metainfo '.nomadmetainfo.json' filenames into proper (python)
    identifier.
    '''
    name = name.replace('.nomadmetainfo.json', '')
    return normalize_name(name)


72
73
74
def python_package_mapping(metainfo_package_name: str) -> Tuple[str, str]:
    '''
    Compute the python package for the given metainfo package name. It returns
75
    a tuple containing a package name and a file path. The filepath denotes the file
76
77
    for this package within the nomad git project.
    '''
78
79
    prefix = metainfo_package_name.replace('.nomadmetainfo.json', '').split('.')[0]
    metainfo_package_name = normalized_package_name(metainfo_package_name)
80

81
    if prefix in ['common', 'general', 'public', 'dft', 'ems']:
82
83
84
85
        directory = 'nomad/datamodel/metainfo'
        python_package_name = 'nomad.datamodel.metainfo.%s' % metainfo_package_name

    else:
86
87
88
89
        parser_dir = prefix.replace('_', '-')
        prefix = prefix.replace('_', '')

        directory = 'dependencies/parsers/%s/%sparser/metainfo' % (parser_dir, prefix)
90
91
92
93
94
95
96
        python_package_name = '%sparser.metainfo.%s' % (prefix, metainfo_package_name)

    path = '%s/%s.py' % (directory, metainfo_package_name)

    return python_package_name, path


97
class LegacyMetainfoEnvironment(Environment):
98
    '''
99
100
    A metainfo environment with functions to create a legacy metainfo version of
    the environment.
101
    '''
102

103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
    @staticmethod
    def from_legacy_package_path(path):
        metainfo_package_name = os.path.basename(path)
        package = metainfo_package_name
        if package.endswith('.nomadmetainfo.json'):
            package = package[:-19]
        if package.endswith('.json'):
            package = package[:-5]

        python_package_name, _ = python_package_mapping(package)
        python_package_name = '.'.join(python_package_name.split('.')[:-1])
        python_module = importlib.import_module(python_package_name)
        metainfo = getattr(python_module, 'm_env')

        return metainfo

119
120
    legacy_package_name = Quantity(type=str)

121
122
123
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.__section_to_sub_section_name = None
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
        self.__legacy_names = None

    def from_legacy_name(self, name: str, section_cls: Type[MSectionBound]) -> MSectionBound:
        ''' Returns the definition of the given globally unique legacy metainfo name. '''
        if self.__legacy_names is None:
            self.__legacy_names = dict()
            for definition in self.m_all_contents():
                try:
                    if isinstance(definition, Section):
                        if definition.extends_base_section:
                            continue
                    legacy = definition.a_legacy
                    key = (legacy.name, definition.m_def.section_cls)
                    if key in self.__legacy_names:
                        raise MetainfoError('Legacy name %s is not globally unique' % legacy.name)
                    self.__legacy_names[key] = definition
                except AttributeError:
                    pass

        return self.__legacy_names.get((name, section_cls))
144
145
146
147
148
149
150
151
152
153
154
155
156

    @property
    def section_to_sub_section_name(self) -> Dict[str, str]:
        if self.__section_to_sub_section_name is not None:
            return self.__section_to_sub_section_name

        self.__section_to_sub_section_name = dict()
        for definition in self.m_all_contents():
            if definition.m_def == SubSection.m_def:
                self.__section_to_sub_section_name[definition.sub_section.name] = definition.name

        return self.__section_to_sub_section_name

157
    def legacy_info(self, definition: Definition, *args, **kwargs) -> InfoKindEl:
158
        ''' Creates a legacy metainfo object for the given definition. '''
159
160
        super_names: List[str] = list()
        result: Dict[str, Any] = dict(
161
            name=def_name(definition),
162
163
164
165
            description=definition.description,
            superNames=super_names)

        for category in definition.categories:
166
            super_names.append(def_name(category))
167
168

        if isinstance(definition, Section):
169
            sub_section_name = self.section_to_sub_section_name.get(definition.name, definition.name)
170
171
172
            result['kindStr'] = 'type_section'
            result['repeats'] = any(
                sub_section.repeats
173
                for sub_section in self.resolve_definitions(sub_section_name, SubSection))
174

175
176
            for sub_section in self.resolve_definitions(sub_section_name, SubSection):
                super_names.append(def_name(sub_section.m_parent_as(Definition)))
177
178

        elif isinstance(definition, Quantity):
179
            result['kindStr'] = 'type_document_content'
180
181
182
183
184
185
186
187
188
189
190
191
            result['shape'] = definition.shape
            dtype_str = None
            if definition.type == int:
                dtype_str = 'i'
            elif definition.type == float:
                dtype_str = 'f'
            elif definition.type == bool:
                dtype_str = 'b'
            elif definition.type == str:
                dtype_str = 'C'
            elif isinstance(definition.type, Reference):
                dtype_str = 'r'
192
                result['referencedSections'] = [
193
                    def_name(definition.type.target_section_def.m_resolved())]
194
            elif isinstance(definition.type, MEnum):
195
                dtype_str = 'C'
196
            elif isinstance(definition.type, np.dtype):
197
198
199
200
                dtype_str = definition.type.name[0]
            elif definition.type == Any:
                dtype_str = 'D'
            else:
201
202
203
                dtype_str = str(definition.type)
                # raise TypeError(
                #     'Unsupported quantity type %s in %s.' % (definition.type, definition))
204
205
206
            result['dtypeStr'] = dtype_str
            if definition.unit is not None:
                result['units'] = str(definition.unit)
207
            super_names.append(def_name(definition.m_parent_as(Definition)))
208
209

        elif isinstance(definition, Category):
210
            result['kindStr'] = 'type_abstract_document_content'
211

212
213
214
215
216
217
        package = cast(MSection, definition)
        while not isinstance(package, Package):
            package = package.m_parent

        result['package'] = package.name

218
219
220
        return InfoKindEl(*args, **result, **kwargs)

    def legacy_info_env(self, packages: List[Package] = None, *args, **kwargs) -> InfoKindEnv:
221
        ''' Creates a legacy metainfo environment with all definitions from the given packages. '''
222
        if packages is None:
223
            packages = self.packages
224
225
226
227

        env = InfoKindEnv(*args, **kwargs)
        for package in packages:
            for definition in package.all_definitions.values():
228
229
230
                if not (isinstance(definition, Section) and definition.extends_base_section):
                    env.addInfoKindEl(self.legacy_info(definition))

231
232
233
234
235
236
                if isinstance(definition, Section):
                    for quantity in definition.quantities:
                        env.addInfoKindEl(self.legacy_info(quantity))

        return env

237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
    def to_legacy_dict(
            self, packages: List[Package] = None, description: str = None,
            *args, **kwargs) -> Dict[str, Any]:
        '''
        Creates a dictionary that can be serialized to a legacy metainfo definition file
        (*.nomadmetainfo.json).

        Arguments:
            package: Will add all definitions of these packages as actual definitions,
                all other packages will be added by import.
            description: The description for the legacy file. If None the description of
                the firs package will be used.
        '''
        if packages is None:
            packages = []

        definitions = []
        dependencies = []
        for package in self.packages:
            if package in packages:
                if description is None:
                    description = package.description

                for definition in package.all_definitions.values():
                    if not (isinstance(definition, Section) and definition.extends_base_section):
                        definitions.append(self.legacy_info(definition).toDict())

                    if isinstance(definition, Section):
                        for quantity in definition.quantities:
                            definitions.append(self.legacy_info(quantity).toDict())
            else:
                dependencies.append(package)

        return {
            'type': 'nomad_meta_info_1_0',
            'description': description,
            'dependencies': [
274
                {'relativePath': def_name(dependency)}
275
276
277
                for dependency in dependencies],
            'metaInfos': definitions
        }