xmlpreparser.py 11 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
#! /usr/bin/env python
# -*- coding: utf-8 -*-

"""Provides functions for creating a python object representing a CP2K input
structure.

Creates preparsed versions of the cp2k_input.xmls and pickles them (python
version of serialization). The pickle files can then be easily reused without
doing the xml parsing again.

The actual calculation input contents can later be added to this object. Then
the object can be queried for the results, or the default values defined by the
cp2k_input.xml.
"""
15
from __future__ import print_function
16
17
18

import xml.etree.cElementTree as ET
import logging
19
import json
20
21
22
23
24
25
import cPickle as pickle
from cp2kparser.generic.inputparsing import *
logger = logging


#===============================================================================
26
def generate_object_tree(xml_file, for_metainfo=False):
27
28

    xml_element = ET.parse(xml_file)
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45

    # Leave out certain currently uninteresting parts of the input. These can be
    # added later if need be.
    root = xml_element.getroot()
    # ignored = ["ATOM", "DEBUG", "EXT_RESTART", "FARMING", "OPTIMIZE_BASIS", "OPTIMIZE_INPUT", "SWARM", "TEST"]
    # removed = []
    # for child in root:
        # name = child.find("NAME")
        # if name is not None:
            # name_string = name.text
            # if name_string in ignored:
                # removed.append(child)
    # for child in removed:
        # root.remove(child)

    # Recursively generate the tree
    object_tree = recursive_tree_generation(root, for_metainfo)
46
47
48
49
    return object_tree


#===============================================================================
50
def recursive_tree_generation(xml_element, for_metainfo=False, name_stack=[], ignore=True):
51
52
53
54
55
56
57
58
59

    # Make new section object for the root
    section_name_element = xml_element.find("NAME")
    if section_name_element is not None:
        section_name = section_name_element.text
    else:
        section_name = "CP2K_INPUT"
    section = Section(section_name)

60
61
62
    name_stack.append(section_name)

    # Ignore most sections that control the print settings
63
64
65
66
67
68
69
70
71
    if ignore:
        ignored = ["EACH", "PRINT"]
        if section_name in ignored:
            kept_print_settings = [
                "CP2K_INPUT/FORCE_EVAL/PRINT",
                "CP2K_INPUT/MOTION/PRINT",
            ]
            name = "/".join(name_stack)
            if "/".join(name_stack) in kept_print_settings:
72
                print("KEPT {}".format(name))
73
74
75
                ignore = False
            else:
                return
76
77
78
79
80
81
82

    if for_metainfo:
        # Descriptions
        description = xml_element.find("DESCRIPTION")
        if description is not None:
            section.description = description.text

83
84
85
86
87
88
89
90
91
92
93
94
95
96
    # Section parameters
    parameter = xml_element.find("SECTION_PARAMETERS")
    if parameter:
        sp_default_element = parameter.find("DEFAULT_VALUE")
        sp_default_value = None
        if sp_default_element is not None:
            sp_default_value = sp_default_element.text
        sp_lone_element = parameter.find("LONE_KEYWORD_VALUE")
        sp_lone_value = None
        if sp_lone_element is not None:
            sp_lone_value = sp_lone_element.text
        parameter_object = SectionParameters(sp_default_value, sp_lone_value)
        section.section_parameter = parameter_object

97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
        # Data type
        data_type = parameter.find("DATA_TYPE")
        if data_type is not None:
            data_type_kind = data_type.get("kind")
            parameter_object.data_type = data_type_kind

            # Data dimension
            data_dim = data_type.find("N_VAR")
            if data_dim is not None:
                parameter_object.data_dimension = data_dim.text

        if for_metainfo:
            # Description
            section_param_description = parameter.find("DESCRIPTION")
            if section_param_description is not None:
                parameter_object.description = section_param_description.text

    # Default keyword
    default_keyword_element = xml_element.find("DEFAULT_KEYWORD")
    if default_keyword_element is not None:
        default_keyword_object = DefaultKeyword()

        # Data type
        data_type = default_keyword_element.find("DATA_TYPE")
        if data_type is not None:
            data_type_kind = data_type.get("kind")
            default_keyword_object.data_type = data_type_kind

            # Data dimension
            data_dim = data_type.find("N_VAR")
            if data_dim is not None:
                default_keyword_object.data_dimension = data_dim.text

        if for_metainfo:
            # Description
            description = default_keyword_element.find("DESCRIPTION")
            if description is not None:
                default_keyword_object.description = description.text

        section.default_keyword = default_keyword_object

138
139
140
    # Keywords
    for keyword in xml_element.findall("KEYWORD"):
        keyword_names = keyword.findall("NAME")
141

142
143
144
145
146
147
148
149
        default_name = None
        aliases = []
        for name in keyword_names:
            keytype = name.get("type")
            if keytype == "default":
                default_name = name.text
            else:
                aliases.append(name.text)
150
151
152
153
154
155
156
157

        # Ignore hidden keywords
        if default_name.startswith("__"):
            continue

        # Save the default keyword name
        section.default_keyword_names.append(default_name)

158
159
160
161
162
163
164
165
166
167
        default_keyword_element = keyword.find("DEFAULT_VALUE")
        default_keyword_value = None
        if default_keyword_element is not None:
            default_keyword_value = default_keyword_element.text

        default_unit_element = keyword.find("DEFAULT_UNIT")
        default_unit_value = None
        if default_unit_element is not None:
            default_unit_value = default_unit_element.text

168
        keyword_object = Keyword(default_name, default_keyword_value, default_unit_value, default_name)
169
170
171
172
        section.keywords[default_name].append(keyword_object)
        for alias in aliases:
            section.keywords[alias].append(keyword_object)

173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
        # Data type
        data_type = keyword.find("DATA_TYPE")
        if data_type is not None:
            data_type_kind = data_type.get("kind")
            keyword_object.data_type = data_type_kind

            # Data dimension
            data_dim = data_type.find("N_VAR")
            if data_dim is not None:
                keyword_object.data_dimension = data_dim.text

        if for_metainfo:
            # Description
            keyword_description = keyword.find("DESCRIPTION")
            if keyword_description is not None:
                keyword_object.description = keyword_description.text

190
191
    # Sections
    for sub_section_element in xml_element.findall("SECTION"):
192
        sub_section = recursive_tree_generation(sub_section_element, for_metainfo, name_stack[::1], ignore)
193
194
        if sub_section is not None:
            section.sections[sub_section.name].append(sub_section)
195
196
197
198

    # Return section
    return section

199
200
201
202
203
204

#===============================================================================
def generate_input_metainfos(object_tree):
    parent = Section("dummy")
    root_section = object_tree.root_section
    root_section.name = "CP2K_INPUT"
Lauri Himanen's avatar
Lauri Himanen committed
205
    root_section.description = "This section contains the explicitly stated keywords, default keywords, and section parameters in the CP2K input file. Only some of the sections that control printing (PRINT, EACH) are supported, because including all of them would double the size of this metadata without adding much useful information. The hidden input keywords starting with a double underscore are not included."
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
    container = []
    name_stack = []
    generate_metainfo_recursively(root_section, parent, container, name_stack)
    with open("input_metainfo.json", "w") as f:
        f.write(json.dumps(container, indent=2, separators=(',', ': ')))


#===============================================================================
def generate_metainfo_recursively(obj, parent, container, name_stack):

    json = None
    if isinstance(obj, Section):
        name_stack.append(obj.name)
        json = generate_section_metainfo_json(obj, parent, name_stack)
        for child in obj.sections.itervalues():
            generate_metainfo_recursively(child[0], obj, container, name_stack)
        for child in obj.keywords.itervalues():
            generate_metainfo_recursively(child[0], obj, container, name_stack)
        if obj.section_parameter is not None:
            generate_metainfo_recursively(obj.section_parameter, obj, container, name_stack)
        if obj.default_keyword is not None:
            generate_metainfo_recursively(obj.default_keyword, obj, container, name_stack)
        name_stack.pop()
    else:
        json = generate_input_object_metainfo_json(obj, parent, name_stack)
    container.append(json)


#===============================================================================
def generate_input_object_metainfo_json(child, parent, name_stack):
    path = ".".join(name_stack)
    json_obj = {}
238
    json_obj["name"] = "x_cp2k_{}.{}".format(path, child.name)
Lauri Himanen's avatar
Lauri Himanen committed
239
    json_obj["superNames"] = ["x_cp2k_section_{}".format(path)]
240
241
242

    # Description
    description = child.description
243
244
    if description is None or description.isspace():
        description = "Settings for {}".format(child.name)
245
246
247
    json_obj["description"] = description

    # Shape
248
249
250
251
252
253
254
255
    # data_dim = int(child.data_dimension)
    # if data_dim == -1:
        # data_dim = "n"
    # if data_dim == 1:
        # json_obj["shape"] = []
    # else:
        # json_obj["shape"] = [data_dim]
    json_obj["shape"] = []
256
257
258
259
260
261

    # Determine data type according to xml info
    mapping = {
        "keyword": "C",
        "logical": "C",
        "string": "C",
262
        "integer": "C",
263
        "word": "C",
264
        "real": "C",
265
266
267
268
269
270
271
272
273
274
275
    }
    json_obj["dtypeStr"] = mapping[child.data_type]
    return json_obj


#===============================================================================
def generate_section_metainfo_json(child, parent, name_stack):
    name = ".".join(name_stack)
    path = ".".join(name_stack[:-1])
    json_obj = {}

Lauri Himanen's avatar
Lauri Himanen committed
276
    json_obj["name"] = "x_cp2k_section_{}".format(name)
277
    json_obj["kindStr"] = "type_section"
Lauri Himanen's avatar
Lauri Himanen committed
278
    json_obj["superNames"] = ["x_cp2k_section_{}".format(path)]
279
280

    description = child.description
281
282
    if description is None or description.isspace():
        description = "Settings for {}".format(child.name)
283
284
285
286
    json_obj["description"] = description
    return json_obj


287
288
289
#===============================================================================
# Run main function by default
if __name__ == "__main__":
290
291

    # xml to pickle
292
293
294
295
296
    xml_file = open("../versions/cp2k262/input_data/cp2k_input.xml", 'r')
    object_tree = CP2KInput(generate_object_tree(xml_file))
    file_name = "../versions/cp2k262/input_data/cp2k_input_tree.pickle"
    fh = open(file_name, "wb")
    pickle.dump(object_tree, fh, protocol=2)
297
298

    # Metainfo generation
299
300
301
    # xml_file = open("../versions/cp2k262/input_data/cp2k_input.xml", 'r')
    # object_tree = CP2KInput(generate_object_tree(xml_file, for_metainfo=True))
    # generate_input_metainfos(object_tree)