xmlpreparser.py 11 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
#! /usr/bin/env python
# -*- coding: utf-8 -*-

"""Provides functions for creating a python object representing a CP2K input
structure.

Creates preparsed versions of the cp2k_input.xmls and pickles them (python
version of serialization). The pickle files can then be easily reused without
doing the xml parsing again.

The actual calculation input contents can later be added to this object. Then
the object can be queried for the results, or the default values defined by the
cp2k_input.xml.
"""
15
from __future__ import print_function
16
17
from future import standard_library
standard_library.install_aliases()
18
19
20

import xml.etree.cElementTree as ET
import logging
21
import json
22
import pickle
23
24
25
26
27
from cp2kparser.generic.inputparsing import *
logger = logging


#===============================================================================
28
def generate_object_tree(xml_file, for_metainfo=False):
29
30

    xml_element = ET.parse(xml_file)
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47

    # Leave out certain currently uninteresting parts of the input. These can be
    # added later if need be.
    root = xml_element.getroot()
    # ignored = ["ATOM", "DEBUG", "EXT_RESTART", "FARMING", "OPTIMIZE_BASIS", "OPTIMIZE_INPUT", "SWARM", "TEST"]
    # removed = []
    # for child in root:
        # name = child.find("NAME")
        # if name is not None:
            # name_string = name.text
            # if name_string in ignored:
                # removed.append(child)
    # for child in removed:
        # root.remove(child)

    # Recursively generate the tree
    object_tree = recursive_tree_generation(root, for_metainfo)
48
49
50
51
    return object_tree


#===============================================================================
52
def recursive_tree_generation(xml_element, for_metainfo=False, name_stack=[], ignore=True):
53
54
55
56
57
58
59
60
61

    # Make new section object for the root
    section_name_element = xml_element.find("NAME")
    if section_name_element is not None:
        section_name = section_name_element.text
    else:
        section_name = "CP2K_INPUT"
    section = Section(section_name)

62
63
64
    name_stack.append(section_name)

    # Ignore most sections that control the print settings
65
66
67
68
69
70
71
72
73
    if ignore:
        ignored = ["EACH", "PRINT"]
        if section_name in ignored:
            kept_print_settings = [
                "CP2K_INPUT/FORCE_EVAL/PRINT",
                "CP2K_INPUT/MOTION/PRINT",
            ]
            name = "/".join(name_stack)
            if "/".join(name_stack) in kept_print_settings:
74
                print("KEPT {}".format(name))
75
76
77
                ignore = False
            else:
                return
78
79
80
81
82
83
84

    if for_metainfo:
        # Descriptions
        description = xml_element.find("DESCRIPTION")
        if description is not None:
            section.description = description.text

85
86
87
88
89
90
91
92
93
94
95
96
97
98
    # Section parameters
    parameter = xml_element.find("SECTION_PARAMETERS")
    if parameter:
        sp_default_element = parameter.find("DEFAULT_VALUE")
        sp_default_value = None
        if sp_default_element is not None:
            sp_default_value = sp_default_element.text
        sp_lone_element = parameter.find("LONE_KEYWORD_VALUE")
        sp_lone_value = None
        if sp_lone_element is not None:
            sp_lone_value = sp_lone_element.text
        parameter_object = SectionParameters(sp_default_value, sp_lone_value)
        section.section_parameter = parameter_object

99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
        # Data type
        data_type = parameter.find("DATA_TYPE")
        if data_type is not None:
            data_type_kind = data_type.get("kind")
            parameter_object.data_type = data_type_kind

            # Data dimension
            data_dim = data_type.find("N_VAR")
            if data_dim is not None:
                parameter_object.data_dimension = data_dim.text

        if for_metainfo:
            # Description
            section_param_description = parameter.find("DESCRIPTION")
            if section_param_description is not None:
                parameter_object.description = section_param_description.text

    # Default keyword
    default_keyword_element = xml_element.find("DEFAULT_KEYWORD")
    if default_keyword_element is not None:
        default_keyword_object = DefaultKeyword()

        # Data type
        data_type = default_keyword_element.find("DATA_TYPE")
        if data_type is not None:
            data_type_kind = data_type.get("kind")
            default_keyword_object.data_type = data_type_kind

            # Data dimension
            data_dim = data_type.find("N_VAR")
            if data_dim is not None:
                default_keyword_object.data_dimension = data_dim.text

        if for_metainfo:
            # Description
            description = default_keyword_element.find("DESCRIPTION")
            if description is not None:
                default_keyword_object.description = description.text

        section.default_keyword = default_keyword_object

140
141
142
    # Keywords
    for keyword in xml_element.findall("KEYWORD"):
        keyword_names = keyword.findall("NAME")
143

144
145
146
147
148
149
150
151
        default_name = None
        aliases = []
        for name in keyword_names:
            keytype = name.get("type")
            if keytype == "default":
                default_name = name.text
            else:
                aliases.append(name.text)
152
153
154
155
156
157
158
159

        # Ignore hidden keywords
        if default_name.startswith("__"):
            continue

        # Save the default keyword name
        section.default_keyword_names.append(default_name)

160
161
162
163
164
165
166
167
168
169
        default_keyword_element = keyword.find("DEFAULT_VALUE")
        default_keyword_value = None
        if default_keyword_element is not None:
            default_keyword_value = default_keyword_element.text

        default_unit_element = keyword.find("DEFAULT_UNIT")
        default_unit_value = None
        if default_unit_element is not None:
            default_unit_value = default_unit_element.text

170
        keyword_object = Keyword(default_name, default_keyword_value, default_unit_value, default_name)
171
172
173
174
        section.keywords[default_name].append(keyword_object)
        for alias in aliases:
            section.keywords[alias].append(keyword_object)

175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
        # Data type
        data_type = keyword.find("DATA_TYPE")
        if data_type is not None:
            data_type_kind = data_type.get("kind")
            keyword_object.data_type = data_type_kind

            # Data dimension
            data_dim = data_type.find("N_VAR")
            if data_dim is not None:
                keyword_object.data_dimension = data_dim.text

        if for_metainfo:
            # Description
            keyword_description = keyword.find("DESCRIPTION")
            if keyword_description is not None:
                keyword_object.description = keyword_description.text

192
193
    # Sections
    for sub_section_element in xml_element.findall("SECTION"):
194
        sub_section = recursive_tree_generation(sub_section_element, for_metainfo, name_stack[::1], ignore)
195
196
        if sub_section is not None:
            section.sections[sub_section.name].append(sub_section)
197
198
199
200

    # Return section
    return section

201
202
203
204
205
206

#===============================================================================
def generate_input_metainfos(object_tree):
    parent = Section("dummy")
    root_section = object_tree.root_section
    root_section.name = "CP2K_INPUT"
Lauri Himanen's avatar
Lauri Himanen committed
207
    root_section.description = "This section contains the explicitly stated keywords, default keywords, and section parameters in the CP2K input file. Only some of the sections that control printing (PRINT, EACH) are supported, because including all of them would double the size of this metadata without adding much useful information. The hidden input keywords starting with a double underscore are not included."
208
209
210
211
212
213
214
215
216
217
218
219
220
221
    container = []
    name_stack = []
    generate_metainfo_recursively(root_section, parent, container, name_stack)
    with open("input_metainfo.json", "w") as f:
        f.write(json.dumps(container, indent=2, separators=(',', ': ')))


#===============================================================================
def generate_metainfo_recursively(obj, parent, container, name_stack):

    json = None
    if isinstance(obj, Section):
        name_stack.append(obj.name)
        json = generate_section_metainfo_json(obj, parent, name_stack)
222
        for child in obj.sections.values():
223
            generate_metainfo_recursively(child[0], obj, container, name_stack)
224
        for child in obj.keywords.values():
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
            generate_metainfo_recursively(child[0], obj, container, name_stack)
        if obj.section_parameter is not None:
            generate_metainfo_recursively(obj.section_parameter, obj, container, name_stack)
        if obj.default_keyword is not None:
            generate_metainfo_recursively(obj.default_keyword, obj, container, name_stack)
        name_stack.pop()
    else:
        json = generate_input_object_metainfo_json(obj, parent, name_stack)
    container.append(json)


#===============================================================================
def generate_input_object_metainfo_json(child, parent, name_stack):
    path = ".".join(name_stack)
    json_obj = {}
240
    json_obj["name"] = "x_cp2k_{}.{}".format(path, child.name)
Lauri Himanen's avatar
Lauri Himanen committed
241
    json_obj["superNames"] = ["x_cp2k_section_{}".format(path)]
242
243
244

    # Description
    description = child.description
245
246
    if description is None or description.isspace():
        description = "Settings for {}".format(child.name)
247
248
249
    json_obj["description"] = description

    # Shape
250
251
252
253
254
255
256
257
    # data_dim = int(child.data_dimension)
    # if data_dim == -1:
        # data_dim = "n"
    # if data_dim == 1:
        # json_obj["shape"] = []
    # else:
        # json_obj["shape"] = [data_dim]
    json_obj["shape"] = []
258
259
260
261
262
263

    # Determine data type according to xml info
    mapping = {
        "keyword": "C",
        "logical": "C",
        "string": "C",
264
        "integer": "C",
265
        "word": "C",
266
        "real": "C",
267
268
269
270
271
272
273
274
275
276
277
    }
    json_obj["dtypeStr"] = mapping[child.data_type]
    return json_obj


#===============================================================================
def generate_section_metainfo_json(child, parent, name_stack):
    name = ".".join(name_stack)
    path = ".".join(name_stack[:-1])
    json_obj = {}

Lauri Himanen's avatar
Lauri Himanen committed
278
    json_obj["name"] = "x_cp2k_section_{}".format(name)
279
    json_obj["kindStr"] = "type_section"
Lauri Himanen's avatar
Lauri Himanen committed
280
    json_obj["superNames"] = ["x_cp2k_section_{}".format(path)]
281
282

    description = child.description
283
284
    if description is None or description.isspace():
        description = "Settings for {}".format(child.name)
285
286
287
288
    json_obj["description"] = description
    return json_obj


289
290
291
#===============================================================================
# Run main function by default
if __name__ == "__main__":
292
293

    # xml to pickle
294
295
296
297
298
    xml_file = open("../versions/cp2k262/input_data/cp2k_input.xml", 'r')
    object_tree = CP2KInput(generate_object_tree(xml_file))
    file_name = "../versions/cp2k262/input_data/cp2k_input_tree.pickle"
    fh = open(file_name, "wb")
    pickle.dump(object_tree, fh, protocol=2)
299
300

    # Metainfo generation
301
302
303
    # xml_file = open("../versions/cp2k262/input_data/cp2k_input.xml", 'r')
    # object_tree = CP2KInput(generate_object_tree(xml_file, for_metainfo=True))
    # generate_input_metainfos(object_tree)