xmlpreparser.py 10.8 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
#! /usr/bin/env python
# -*- coding: utf-8 -*-

"""Provides functions for creating a python object representing a CP2K input
structure.

Creates preparsed versions of the cp2k_input.xmls and pickles them (python
version of serialization). The pickle files can then be easily reused without
doing the xml parsing again.

The actual calculation input contents can later be added to this object. Then
the object can be queried for the results, or the default values defined by the
cp2k_input.xml.
"""

import xml.etree.cElementTree as ET
import logging
18
import json
19
20
21
22
23
24
import cPickle as pickle
from cp2kparser.generic.inputparsing import *
logger = logging


#===============================================================================
25
def generate_object_tree(xml_file, for_metainfo=False):
26
27

    xml_element = ET.parse(xml_file)
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44

    # Leave out certain currently uninteresting parts of the input. These can be
    # added later if need be.
    root = xml_element.getroot()
    # ignored = ["ATOM", "DEBUG", "EXT_RESTART", "FARMING", "OPTIMIZE_BASIS", "OPTIMIZE_INPUT", "SWARM", "TEST"]
    # removed = []
    # for child in root:
        # name = child.find("NAME")
        # if name is not None:
            # name_string = name.text
            # if name_string in ignored:
                # removed.append(child)
    # for child in removed:
        # root.remove(child)

    # Recursively generate the tree
    object_tree = recursive_tree_generation(root, for_metainfo)
45
46
47
48
    return object_tree


#===============================================================================
49
def recursive_tree_generation(xml_element, for_metainfo=False, name_stack=[], ignore=True):
50
51
52
53
54
55
56
57
58

    # Make new section object for the root
    section_name_element = xml_element.find("NAME")
    if section_name_element is not None:
        section_name = section_name_element.text
    else:
        section_name = "CP2K_INPUT"
    section = Section(section_name)

59
60
61
    name_stack.append(section_name)

    # Ignore most sections that control the print settings
62
63
64
65
66
67
68
69
70
71
72
73
74
    if ignore:
        ignored = ["EACH", "PRINT"]
        if section_name in ignored:
            kept_print_settings = [
                "CP2K_INPUT/FORCE_EVAL/PRINT",
                "CP2K_INPUT/MOTION/PRINT",
            ]
            name = "/".join(name_stack)
            if "/".join(name_stack) in kept_print_settings:
                print "KEPT {}".format(name)
                ignore = False
            else:
                return
75
76
77
78
79
80
81

    if for_metainfo:
        # Descriptions
        description = xml_element.find("DESCRIPTION")
        if description is not None:
            section.description = description.text

82
83
84
85
86
87
88
89
90
91
92
93
94
95
    # Section parameters
    parameter = xml_element.find("SECTION_PARAMETERS")
    if parameter:
        sp_default_element = parameter.find("DEFAULT_VALUE")
        sp_default_value = None
        if sp_default_element is not None:
            sp_default_value = sp_default_element.text
        sp_lone_element = parameter.find("LONE_KEYWORD_VALUE")
        sp_lone_value = None
        if sp_lone_element is not None:
            sp_lone_value = sp_lone_element.text
        parameter_object = SectionParameters(sp_default_value, sp_lone_value)
        section.section_parameter = parameter_object

96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
        # Data type
        data_type = parameter.find("DATA_TYPE")
        if data_type is not None:
            data_type_kind = data_type.get("kind")
            parameter_object.data_type = data_type_kind

            # Data dimension
            data_dim = data_type.find("N_VAR")
            if data_dim is not None:
                parameter_object.data_dimension = data_dim.text

        if for_metainfo:
            # Description
            section_param_description = parameter.find("DESCRIPTION")
            if section_param_description is not None:
                parameter_object.description = section_param_description.text

    # Default keyword
    default_keyword_element = xml_element.find("DEFAULT_KEYWORD")
    if default_keyword_element is not None:
        default_keyword_object = DefaultKeyword()

        # Data type
        data_type = default_keyword_element.find("DATA_TYPE")
        if data_type is not None:
            data_type_kind = data_type.get("kind")
            default_keyword_object.data_type = data_type_kind

            # Data dimension
            data_dim = data_type.find("N_VAR")
            if data_dim is not None:
                default_keyword_object.data_dimension = data_dim.text

        if for_metainfo:
            # Description
            description = default_keyword_element.find("DESCRIPTION")
            if description is not None:
                default_keyword_object.description = description.text

        section.default_keyword = default_keyword_object

137
138
139
    # Keywords
    for keyword in xml_element.findall("KEYWORD"):
        keyword_names = keyword.findall("NAME")
140

141
142
143
144
145
146
147
148
        default_name = None
        aliases = []
        for name in keyword_names:
            keytype = name.get("type")
            if keytype == "default":
                default_name = name.text
            else:
                aliases.append(name.text)
149
150
151
152
153
154
155
156

        # Ignore hidden keywords
        if default_name.startswith("__"):
            continue

        # Save the default keyword name
        section.default_keyword_names.append(default_name)

157
158
159
160
161
162
163
164
165
166
        default_keyword_element = keyword.find("DEFAULT_VALUE")
        default_keyword_value = None
        if default_keyword_element is not None:
            default_keyword_value = default_keyword_element.text

        default_unit_element = keyword.find("DEFAULT_UNIT")
        default_unit_value = None
        if default_unit_element is not None:
            default_unit_value = default_unit_element.text

167
        keyword_object = Keyword(default_name, default_keyword_value, default_unit_value, default_name)
168
169
170
171
        section.keywords[default_name].append(keyword_object)
        for alias in aliases:
            section.keywords[alias].append(keyword_object)

172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
        # Data type
        data_type = keyword.find("DATA_TYPE")
        if data_type is not None:
            data_type_kind = data_type.get("kind")
            keyword_object.data_type = data_type_kind

            # Data dimension
            data_dim = data_type.find("N_VAR")
            if data_dim is not None:
                keyword_object.data_dimension = data_dim.text

        if for_metainfo:
            # Description
            keyword_description = keyword.find("DESCRIPTION")
            if keyword_description is not None:
                keyword_object.description = keyword_description.text

189
190
    # Sections
    for sub_section_element in xml_element.findall("SECTION"):
191
        sub_section = recursive_tree_generation(sub_section_element, for_metainfo, name_stack[::1], ignore)
192
193
        if sub_section is not None:
            section.sections[sub_section.name].append(sub_section)
194
195
196
197

    # Return section
    return section

198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236

#===============================================================================
def generate_input_metainfos(object_tree):
    parent = Section("dummy")
    root_section = object_tree.root_section
    root_section.name = "CP2K_INPUT"
    root_section.description = "The section containing all information that is explicitly stated in the CP2K input file. The sections that control printing (PRINT, EACH) and the hidden input keywords starting with a double underscore are not included."
    container = []
    name_stack = []
    generate_metainfo_recursively(root_section, parent, container, name_stack)
    with open("input_metainfo.json", "w") as f:
        f.write(json.dumps(container, indent=2, separators=(',', ': ')))


#===============================================================================
def generate_metainfo_recursively(obj, parent, container, name_stack):

    json = None
    if isinstance(obj, Section):
        name_stack.append(obj.name)
        json = generate_section_metainfo_json(obj, parent, name_stack)
        for child in obj.sections.itervalues():
            generate_metainfo_recursively(child[0], obj, container, name_stack)
        for child in obj.keywords.itervalues():
            generate_metainfo_recursively(child[0], obj, container, name_stack)
        if obj.section_parameter is not None:
            generate_metainfo_recursively(obj.section_parameter, obj, container, name_stack)
        if obj.default_keyword is not None:
            generate_metainfo_recursively(obj.default_keyword, obj, container, name_stack)
        name_stack.pop()
    else:
        json = generate_input_object_metainfo_json(obj, parent, name_stack)
    container.append(json)


#===============================================================================
def generate_input_object_metainfo_json(child, parent, name_stack):
    path = ".".join(name_stack)
    json_obj = {}
237
    json_obj["name"] = "x_cp2k_{}.{}".format(path, child.name)
Lauri Himanen's avatar
Lauri Himanen committed
238
    json_obj["superNames"] = ["x_cp2k_section_{}".format(path)]
239
240
241

    # Description
    description = child.description
242
243
    if description is None or description.isspace():
        description = "Settings for {}".format(child.name)
244
245
246
    json_obj["description"] = description

    # Shape
247
248
249
250
251
252
253
254
    # data_dim = int(child.data_dimension)
    # if data_dim == -1:
        # data_dim = "n"
    # if data_dim == 1:
        # json_obj["shape"] = []
    # else:
        # json_obj["shape"] = [data_dim]
    json_obj["shape"] = []
255
256
257
258
259
260

    # Determine data type according to xml info
    mapping = {
        "keyword": "C",
        "logical": "C",
        "string": "C",
261
        "integer": "C",
262
        "word": "C",
263
        "real": "C",
264
265
266
267
268
269
270
271
272
273
274
    }
    json_obj["dtypeStr"] = mapping[child.data_type]
    return json_obj


#===============================================================================
def generate_section_metainfo_json(child, parent, name_stack):
    name = ".".join(name_stack)
    path = ".".join(name_stack[:-1])
    json_obj = {}

Lauri Himanen's avatar
Lauri Himanen committed
275
    json_obj["name"] = "x_cp2k_section_{}".format(name)
276
    json_obj["kindStr"] = "type_section"
Lauri Himanen's avatar
Lauri Himanen committed
277
    json_obj["superNames"] = ["x_cp2k_section_{}".format(path)]
278
279

    description = child.description
280
281
    if description is None or description.isspace():
        description = "Settings for {}".format(child.name)
282
283
284
285
    json_obj["description"] = description
    return json_obj


286
287
288
#===============================================================================
# Run main function by default
if __name__ == "__main__":
289
290

    # xml to pickle
291
292
293
294
295
    xml_file = open("../versions/cp2k262/input_data/cp2k_input.xml", 'r')
    object_tree = CP2KInput(generate_object_tree(xml_file))
    file_name = "../versions/cp2k262/input_data/cp2k_input_tree.pickle"
    fh = open(file_name, "wb")
    pickle.dump(object_tree, fh, protocol=2)
296
297

    # Metainfo generation
298
299
300
    # xml_file = open("../versions/cp2k262/input_data/cp2k_input.xml", 'r')
    # object_tree = CP2KInput(generate_object_tree(xml_file, for_metainfo=True))
    # generate_input_metainfos(object_tree)