inputparsing.py 13.8 KB
Newer Older
1
import numpy as np
2
3
import logging
from collections import defaultdict
4
logger = logging.getLogger("nomad")
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42


#===============================================================================
class CP2KInput(object):
    """The contents of a CP2K simulation including default values and default
    units from the version-specific xml file.
    """

    def __init__(self, root_section):
        self.root_section = root_section

    @staticmethod
    def decode_cp2k_unit(unit):
        """Given a CP2K unit name, decode it as Pint unit definition.
        """
        map = {
            # Length
            "bohr": "bohr",
            "m": "meter",
            "pm": "picometer",
            "nm": "nanometer",
            "angstrom": "angstrom",

            # Angle
            "rad": "radian",
            "deg": "degree",

            #Energy
            "Ry": "rydberg"
        }
        pint_unit = map.get(unit)
        if pint_unit:
            return pint_unit
        else:
            logger.error("Unknown CP2K unit definition '{}'.".format(unit))

    def set_parameter(self, path, value):
        parameter, section = self.get_parameter_and_section(path)
43
44
45
46
47
48
49
50
        if section is None:
            message = "The CP2K input does not contain a section {}".format(path)
            logger.warning(message)
        if parameter is None:
            message = "The CP2K input section {} does not contain a SECTION_PARAMETER".format(path)
            logger.warning(message)
        else:
            parameter.value = value
51
52
53

    def set_keyword(self, path, value):
        keyword, section = self.get_keyword_and_section(path)
54
        # If keyword found, put data in there
55
56
        if keyword and section:
            keyword.value = value
57
        # Keyword not found in the input tree, assuming it is a default keyword
58
59
60
        elif section is not None:
            split_path = path.rsplit("/", 1)
            keyword = split_path[1]
61
62
63
64
65
66
            if section.default_keyword is not None:
                # print "Saving default keyword at path '{}'".format(path)
                section.default_keyword.value += keyword + " " + value + "\n"
            else:
                message = "The CP2K input does not contain the keyword {}, and there is no default keyword for the section {}".format(path, split_path[0])
                logger.warning(message)
67
68
69
70
71
72
73

    def get_section(self, path):
        split_path = path.split("/")
        section = self.root_section
        for part in split_path:
            section = section.get_subsection(part)
            if not section:
74
75
                message = "The CP2K input does not contain the section {}".format(path)
                logger.warning(message)
76
77
78
79
80
81
82
83
                return None
        return section

    def get_keyword_and_section(self, path):
        split_path = path.rsplit("/", 1)
        keyword = split_path[1]
        section_path = split_path[0]
        section = self.get_section(section_path)
84
85
86
87
88
89

        if section is None:
            message = "The CP2K input does not contain the section {}".format(path)
            logger.warning(message)
            return (None, None)

Lauri Himanen's avatar
Lauri Himanen committed
90
        keyword = section.get_keyword_object(keyword)
91
92
        if keyword and section:
            return (keyword, section)
93
        else:
94
95
            return (None, section)

Lauri Himanen's avatar
Lauri Himanen committed
96
97
    def get_keyword_value_formatted(self, path):
        """
98
99
100
        """
        keyword, section = self.get_keyword_and_section(path)
        if keyword:
Lauri Himanen's avatar
Lauri Himanen committed
101
102
103
104
105
106
107
108
            return keyword.get_value_formatted()

    def get_keyword_value(self, path):
        """
        """
        keyword, section = self.get_keyword_and_section(path)
        if keyword:
            return keyword.get_value()
109
110

    def get_default_keyword(self, path):
111
        return self.get_section(path).default_keyword.value
112
113
114

    def set_section_accessed(self, path):
        section = self.get_section(path)
115
116
117
118
119
        if section:
            section.accessed = True
        else:
            message = "The CP2K input does not contain the section {}".format(path)
            logger.warning(message)
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137

    def get_keyword_default(self, path):
        keyword, section = self.get_keyword_and_section(path)
        if keyword:
            return keyword.default_value

    def get_default_unit(self, path):
        keyword, section = self.get_keyword_and_section(path)
        if keyword:
            return keyword.default_unit

    def get_unit(self, path):
        keyword, section = self.get_keyword_and_section(path)
        if keyword:
            return keyword.get_unit()

    def get_parameter_and_section(self, path):
        section = self.get_section(path)
138
139
140
141
142
143
144
        if section is None:
            return (None, None)
        if section.section_parameter is not None:
            parameter = section.section_parameter
            return (parameter, section)
        else:
            return (None, section)
145
146
147
148
149
150
151
152
153
154
155

    def get_parameter(self, path):
        parameter, section = self.get_parameter_and_section(path)
        if parameter:
            if parameter.value:
                return parameter.value
            elif section and section.accessed:
                return parameter.lone_value


#===============================================================================
156
157
class InputObject(object):
    """Base class for all kind of data elements in the CP2K input.
158
    """
159
    __slots__ = ['name', 'value', 'default_value', 'description', 'data_type', 'data_dimension']
160

161
162
    def __init__(self, name):
        self.name = name
163
        self.value = None
164
165
166
167
168
        self.description = None
        self.data_type = None
        self.data_dimension = None
        self.default_value = None

169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184

#===============================================================================
class Keyword(InputObject):
    """Information about a keyword in a CP2K calculation.
    """
    __slots__ = ['unit', 'value_no_unit', 'default_unit', 'default_name']

    def __init__(self, name, default_value,  default_unit, default_name):
        super(Keyword, self).__init__(name)
        self.unit = None
        self.value_no_unit = None
        self.default_unit = default_unit
        self.default_value = default_value
        self.default_name = default_name

    def get_value(self):
Lauri Himanen's avatar
Lauri Himanen committed
185
186
187
188
189
190
        """Returns the unformatted value of this keyword. This is exactly what
        was set by the used in the input as a string.
        """
        return self.value

    def get_value_formatted(self):
191
192
        """Returns the value stored in this keyword by removing the possible
        unit definition and formatting the string into the correct data type.
193
        """
194
        # Decode the unit and the value if not done before
Lauri Himanen's avatar
Lauri Himanen committed
195
        proper_value = None
196
197
198
199
200
201
202
        if self.default_unit:
            if not self.value_no_unit:
                self.decode_cp2k_unit_and_value()
        if self.value_no_unit is not None:
            proper_value = self.value_no_unit
        else:
            proper_value = self.value
Lauri Himanen's avatar
Lauri Himanen committed
203
204
        if proper_value is None:
            proper_value = self.default_value
205

206
207
        returned = None
        dim = int(self.data_dimension)
208
        splitted = proper_value.split()
209
210
211
        if len(splitted) != dim:
            logger.error("The dimensions of the CP2K input parameter {} do not match the specification in the XML file.".format(self.name))

212
213
214
        if dim == 1:
            try:
                if self.data_type == "integer":
215
                    returned = int(proper_value)
216
                elif self.data_type == "real":
217
                    returned = float(proper_value)
218
                elif self.data_type == "word":
219
                    returned = str(proper_value)
220
                elif self.data_type == "keyword":
221
                    returned = str(proper_value)
222
                elif self.data_type == "string":
223
                    returned = str(proper_value)
224
                elif self.data_type == "logical":
225
                    returned = str(proper_value)
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
                else:
                    logger.error("Unknown data type '{}'".format(self.data_type))
                    return
            except TypeError:
                logger.error("The CP2K input parameter {} could not be converted to the type specified in the XML file.".format(self.name))
                return
        else:
            try:
                if self.data_type == "integer":
                    returned = np.array([int(x) for x in splitted])
                elif self.data_type == "real":
                    returned = np.array([float(x) for x in splitted])
                elif self.data_type == "word":
                    returned = np.array([str(x) for x in splitted])
                elif self.data_type == "keyword":
                    returned = np.array([str(x) for x in splitted])
                elif self.data_type == "string":
                    returned = np.array([str(x) for x in splitted])
                elif self.data_type == "logical":
                    returned = np.array([str(x) for x in splitted])
                else:
                    logger.error("Unknown data type '{}'".format(self.data_type))
                    return
            except TypeError:
                logger.error("The CP2K input parameter {} could not be converted to the type specified in the XML file.".format(self.name))
251
252
253
                return

        return returned
254

255
    def determine_value_and_unit(self):
256
257
258
259
260
261
262
263
264
265
266
        """If the units of this value can be changed, return a value and the
        unit separately.
        """
        if self.default_unit:
            if not self.value_no_unit:
                self.decode_cp2k_unit_and_value()
            return self.value_no_unit
        else:
            return self.value

    def get_unit(self):
267
268

        # Decode the unit and the value if not done before
269
270
271
272
273
274
        if self.default_unit:
            if not self.unit:
                self.decode_cp2k_unit_and_value()
            return self.unit
        else:
            logger.error("The keyword '{}' does not have a unit.".format(self.default_name))
275
            return None
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295

    def decode_cp2k_unit_and_value(self):
        """Given a CP2K unit name, decode it as Pint unit definition.
        """
        splitted = self.value.split(None, 1)
        unit_definition = splitted[0]
        if unit_definition.startswith('[') and unit_definition.endswith(']'):
            unit_definition = unit_definition[1:-1]
            self.unit = CP2KInput.decode_cp2k_unit(self.default_unit)
            self.value_no_unit = splitted[1]
        elif self.default_unit:
            logger.debug("No special unit definition found, returning default unit.")
            self.unit = CP2KInput.decode_cp2k_unit(self.default_unit)
            self.value_no_unit = self.value
        else:
            logger.debug("The value has no unit, returning bare value.")
            self.value_no_unit = self.value


#===============================================================================
296
class Section(object):
297
298
    """An input section in a CP2K calculation.
    """
299
    __slots__ = ['accessed', 'name', 'keywords', 'default_keyword_names', 'default_keyword', 'section_parameter', 'sections', 'description']
300
301
302
303
304

    def __init__(self, name):
        self.accessed = False
        self.name = name
        self.keywords = defaultdict(list)
305
306
        self.default_keyword_names = []
        self.default_keyword = None
307
308
        self.section_parameter = None
        self.sections = defaultdict(list)
309
        self.description = None
310

Lauri Himanen's avatar
Lauri Himanen committed
311
    def get_keyword_object(self, name):
312
313
314
315
316
317
318
        keyword = self.keywords.get(name)
        if keyword:
            if len(keyword) == 1:
                return keyword[0]
            else:
                logger.error("The keyword '{}' in '{}' does not exist or has too many entries.".format(name, self.name))

Lauri Himanen's avatar
Lauri Himanen committed
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
    def get_keyword_value_formatted(self, name):
        """Returns the keyword value formatted to the correct shape and type,
        and returns the default value if nothing was specified.
        """
        keyword_object = self.get_keyword_object(name)
        if keyword_object is not None:
            value = keyword_object.get_value_formatted()
            return value

    def get_keyword_value(self, name):
        """Returns the keyword value as a raw string as specfied by the used.
        """
        keyword_object = self.get_keyword_object(name)
        if keyword_object is not None:
            return keyword_object.get_value()

335
336
337
338
339
340
341
342
343
344
    def get_subsection(self, name):
        subsection = self.sections.get(name)
        if subsection:
            if len(subsection) == 1:
                return subsection[0]
            else:
                logger.error("The subsection '{}' in '{}' has too many entries.".format(name, self.name))
        else:
            logger.error("The subsection '{}' in '{}' does not exist.".format(name, self.name))

Lauri Himanen's avatar
Lauri Himanen committed
345
346
347
348
349
350
351
352
353
354
    def get_section_parameter(self):
        """Get the section parameter, or if not specified the lone keyword
        value.
        """
        if self.section_parameter is not None:
            value = self.section_parameter.value
            if value is None:
                value = self.section_parameter.lone_keyword_value
            return value.upper()

355
356

#===============================================================================
357
class SectionParameters(InputObject):
358
359
360
361
362
    """Section parameters in a CP2K calculation.

    Section parameters are the short values that can be added right after a
    section name, e.g. &PRINT ON, where ON is the section parameter.
    """
363
    __slots__ = ['lone_keyword_value']
364

365
366
    def __init__(self, default_value, lone_keyword_value):
        super(SectionParameters, self).__init__("SECTION_PARAMETERS")
367
        self.default_value = default_value
368
369
370
371
372
373
374
        self.lone_keyword_value = lone_keyword_value


#===============================================================================
class DefaultKeyword(InputObject):
    """Default keyword in the CP2K input.
    """
375
376
    __slots__ = ['lone_value']

377
378
379
380
    def __init__(self):
        super(DefaultKeyword, self).__init__("DEFAULT_KEYWORD")
        self.lone_value = None
        self.value = ""