xmlpreparser.py 10.7 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
#! /usr/bin/env python
# -*- coding: utf-8 -*-

"""Provides functions for creating a python object representing a CP2K input
structure.

Creates preparsed versions of the cp2k_input.xmls and pickles them (python
version of serialization). The pickle files can then be easily reused without
doing the xml parsing again.

The actual calculation input contents can later be added to this object. Then
the object can be queried for the results, or the default values defined by the
cp2k_input.xml.
"""

import xml.etree.cElementTree as ET
import logging
18
import json
19
20
21
22
23
24
import cPickle as pickle
from cp2kparser.generic.inputparsing import *
logger = logging


#===============================================================================
25
def generate_object_tree(xml_file, for_metainfo=False):
26
27

    xml_element = ET.parse(xml_file)
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44

    # Leave out certain currently uninteresting parts of the input. These can be
    # added later if need be.
    root = xml_element.getroot()
    # ignored = ["ATOM", "DEBUG", "EXT_RESTART", "FARMING", "OPTIMIZE_BASIS", "OPTIMIZE_INPUT", "SWARM", "TEST"]
    # removed = []
    # for child in root:
        # name = child.find("NAME")
        # if name is not None:
            # name_string = name.text
            # if name_string in ignored:
                # removed.append(child)
    # for child in removed:
        # root.remove(child)

    # Recursively generate the tree
    object_tree = recursive_tree_generation(root, for_metainfo)
45
46
47
48
    return object_tree


#===============================================================================
49
def recursive_tree_generation(xml_element, for_metainfo=False, name_stack=[]):
50
51
52
53
54
55
56
57
58

    # Make new section object for the root
    section_name_element = xml_element.find("NAME")
    if section_name_element is not None:
        section_name = section_name_element.text
    else:
        section_name = "CP2K_INPUT"
    section = Section(section_name)

59
60
61
62
63
64
65
66
67
68
69
70
71
72
    name_stack.append(section_name)

    # Ignore most sections that control the print settings
    ignored = ["EACH", "PRINT"]
    if section_name in ignored:
        kept_print_settings = [
            "CP2K_INPUT/FORCE_EVAL/PRINT",
            "CP2K_INPUT/MOTION/PRINT",
        ]
        name = "/".join(name_stack)
        if "/".join(name_stack) in kept_print_settings:
            print "KEPT {}".format(name)
        else:
            return
73
74
75
76
77
78
79

    if for_metainfo:
        # Descriptions
        description = xml_element.find("DESCRIPTION")
        if description is not None:
            section.description = description.text

80
81
82
83
84
85
86
87
88
89
90
91
92
93
    # Section parameters
    parameter = xml_element.find("SECTION_PARAMETERS")
    if parameter:
        sp_default_element = parameter.find("DEFAULT_VALUE")
        sp_default_value = None
        if sp_default_element is not None:
            sp_default_value = sp_default_element.text
        sp_lone_element = parameter.find("LONE_KEYWORD_VALUE")
        sp_lone_value = None
        if sp_lone_element is not None:
            sp_lone_value = sp_lone_element.text
        parameter_object = SectionParameters(sp_default_value, sp_lone_value)
        section.section_parameter = parameter_object

94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
        # Data type
        data_type = parameter.find("DATA_TYPE")
        if data_type is not None:
            data_type_kind = data_type.get("kind")
            parameter_object.data_type = data_type_kind

            # Data dimension
            data_dim = data_type.find("N_VAR")
            if data_dim is not None:
                parameter_object.data_dimension = data_dim.text

        if for_metainfo:
            # Description
            section_param_description = parameter.find("DESCRIPTION")
            if section_param_description is not None:
                parameter_object.description = section_param_description.text

    # Default keyword
    default_keyword_element = xml_element.find("DEFAULT_KEYWORD")
    if default_keyword_element is not None:
        default_keyword_object = DefaultKeyword()

        # Data type
        data_type = default_keyword_element.find("DATA_TYPE")
        if data_type is not None:
            data_type_kind = data_type.get("kind")
            default_keyword_object.data_type = data_type_kind

            # Data dimension
            data_dim = data_type.find("N_VAR")
            if data_dim is not None:
                default_keyword_object.data_dimension = data_dim.text

        if for_metainfo:
            # Description
            description = default_keyword_element.find("DESCRIPTION")
            if description is not None:
                default_keyword_object.description = description.text

        section.default_keyword = default_keyword_object

135
136
137
    # Keywords
    for keyword in xml_element.findall("KEYWORD"):
        keyword_names = keyword.findall("NAME")
138

139
140
141
142
143
144
145
146
        default_name = None
        aliases = []
        for name in keyword_names:
            keytype = name.get("type")
            if keytype == "default":
                default_name = name.text
            else:
                aliases.append(name.text)
147
148
149
150
151
152
153
154

        # Ignore hidden keywords
        if default_name.startswith("__"):
            continue

        # Save the default keyword name
        section.default_keyword_names.append(default_name)

155
156
157
158
159
160
161
162
163
164
        default_keyword_element = keyword.find("DEFAULT_VALUE")
        default_keyword_value = None
        if default_keyword_element is not None:
            default_keyword_value = default_keyword_element.text

        default_unit_element = keyword.find("DEFAULT_UNIT")
        default_unit_value = None
        if default_unit_element is not None:
            default_unit_value = default_unit_element.text

165
        keyword_object = Keyword(default_name, default_keyword_value, default_unit_value, default_name)
166
167
168
169
        section.keywords[default_name].append(keyword_object)
        for alias in aliases:
            section.keywords[alias].append(keyword_object)

170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
        # Data type
        data_type = keyword.find("DATA_TYPE")
        if data_type is not None:
            data_type_kind = data_type.get("kind")
            keyword_object.data_type = data_type_kind

            # Data dimension
            data_dim = data_type.find("N_VAR")
            if data_dim is not None:
                keyword_object.data_dimension = data_dim.text

        if for_metainfo:
            # Description
            keyword_description = keyword.find("DESCRIPTION")
            if keyword_description is not None:
                keyword_object.description = keyword_description.text

187
188
    # Sections
    for sub_section_element in xml_element.findall("SECTION"):
189
        sub_section = recursive_tree_generation(sub_section_element, for_metainfo, name_stack[::1])
190
191
        if sub_section is not None:
            section.sections[sub_section.name].append(sub_section)
192
193
194
195

    # Return section
    return section

196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234

#===============================================================================
def generate_input_metainfos(object_tree):
    parent = Section("dummy")
    root_section = object_tree.root_section
    root_section.name = "CP2K_INPUT"
    root_section.description = "The section containing all information that is explicitly stated in the CP2K input file. The sections that control printing (PRINT, EACH) and the hidden input keywords starting with a double underscore are not included."
    container = []
    name_stack = []
    generate_metainfo_recursively(root_section, parent, container, name_stack)
    with open("input_metainfo.json", "w") as f:
        f.write(json.dumps(container, indent=2, separators=(',', ': ')))


#===============================================================================
def generate_metainfo_recursively(obj, parent, container, name_stack):

    json = None
    if isinstance(obj, Section):
        name_stack.append(obj.name)
        json = generate_section_metainfo_json(obj, parent, name_stack)
        for child in obj.sections.itervalues():
            generate_metainfo_recursively(child[0], obj, container, name_stack)
        for child in obj.keywords.itervalues():
            generate_metainfo_recursively(child[0], obj, container, name_stack)
        if obj.section_parameter is not None:
            generate_metainfo_recursively(obj.section_parameter, obj, container, name_stack)
        if obj.default_keyword is not None:
            generate_metainfo_recursively(obj.default_keyword, obj, container, name_stack)
        name_stack.pop()
    else:
        json = generate_input_object_metainfo_json(obj, parent, name_stack)
    container.append(json)


#===============================================================================
def generate_input_object_metainfo_json(child, parent, name_stack):
    path = ".".join(name_stack)
    json_obj = {}
235
    json_obj["name"] = "x_cp2k_{}.{}".format(path, child.name)
Lauri Himanen's avatar
Lauri Himanen committed
236
    json_obj["superNames"] = ["x_cp2k_section_{}".format(path)]
237
238
239

    # Description
    description = child.description
240
241
    if description is None or description.isspace():
        description = "Settings for {}".format(child.name)
242
243
244
    json_obj["description"] = description

    # Shape
245
246
247
248
249
250
251
252
    # data_dim = int(child.data_dimension)
    # if data_dim == -1:
        # data_dim = "n"
    # if data_dim == 1:
        # json_obj["shape"] = []
    # else:
        # json_obj["shape"] = [data_dim]
    json_obj["shape"] = []
253
254
255
256
257
258

    # Determine data type according to xml info
    mapping = {
        "keyword": "C",
        "logical": "C",
        "string": "C",
259
        "integer": "C",
260
        "word": "C",
261
        "real": "C",
262
263
264
265
266
267
268
269
270
271
272
    }
    json_obj["dtypeStr"] = mapping[child.data_type]
    return json_obj


#===============================================================================
def generate_section_metainfo_json(child, parent, name_stack):
    name = ".".join(name_stack)
    path = ".".join(name_stack[:-1])
    json_obj = {}

Lauri Himanen's avatar
Lauri Himanen committed
273
    json_obj["name"] = "x_cp2k_section_{}".format(name)
274
    json_obj["kindStr"] = "type_section"
Lauri Himanen's avatar
Lauri Himanen committed
275
    json_obj["superNames"] = ["x_cp2k_section_{}".format(path)]
276
277

    description = child.description
278
279
    if description is None or description.isspace():
        description = "Settings for {}".format(child.name)
280
281
282
283
    json_obj["description"] = description
    return json_obj


284
285
286
#===============================================================================
# Run main function by default
if __name__ == "__main__":
287
288

    # xml to pickle
289
290
291
292
293
    # xml_file = open("../versions/cp2k262/input_data/cp2k_input.xml", 'r')
    # object_tree = CP2KInput(generate_object_tree(xml_file))
    # file_name = "../versions/cp2k262/input_data/cp2k_input_tree.pickle"
    # fh = open(file_name, "wb")
    # pickle.dump(object_tree, fh, protocol=2)
294
295

    # Metainfo generation
296
297
298
    xml_file = open("../versions/cp2k262/input_data/cp2k_input.xml", 'r')
    object_tree = CP2KInput(generate_object_tree(xml_file, for_metainfo=True))
    generate_input_metainfos(object_tree)