inputparsing.py 15.3 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
# Copyright 2015-2018 Lauri Himanen, Fawzi Mohamed, Ankit Kariryaa
# 
#   Licensed under the Apache License, Version 2.0 (the "License");
#   you may not use this file except in compliance with the License.
#   You may obtain a copy of the License at
# 
#     http://www.apache.org/licenses/LICENSE-2.0
# 
#   Unless required by applicable law or agreed to in writing, software
#   distributed under the License is distributed on an "AS IS" BASIS,
#   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#   See the License for the specific language governing permissions and
#   limitations under the License.

15
16
from builtins import str
from builtins import object
17
import numpy as np
18
19
import logging
from collections import defaultdict
20

21
logger = logging.getLogger("nomad")
22
23
metainfo_section_prefix = "x_cp2k_section_input_"
metainfo_data_prefix = "x_cp2k_input_"
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60


class CP2KInput(object):
    """The contents of a CP2K simulation including default values and default
    units from the version-specific xml file.
    """

    def __init__(self, root_section):
        self.root_section = root_section

    @staticmethod
    def decode_cp2k_unit(unit):
        """Given a CP2K unit name, decode it as Pint unit definition.
        """
        map = {
            # Length
            "bohr": "bohr",
            "m": "meter",
            "pm": "picometer",
            "nm": "nanometer",
            "angstrom": "angstrom",

            # Angle
            "rad": "radian",
            "deg": "degree",

            #Energy
            "Ry": "rydberg"
        }
        pint_unit = map.get(unit)
        if pint_unit:
            return pint_unit
        else:
            logger.error("Unknown CP2K unit definition '{}'.".format(unit))

    def set_parameter(self, path, value):
        parameter, section = self.get_parameter_and_section(path)
61
62
63
64
65
66
67
68
        if section is None:
            message = "The CP2K input does not contain a section {}".format(path)
            logger.warning(message)
        if parameter is None:
            message = "The CP2K input section {} does not contain a SECTION_PARAMETER".format(path)
            logger.warning(message)
        else:
            parameter.value = value
69

70
    def set_keyword(self, path, value, full):
71
        keyword, section = self.get_keyword_and_section(path)
72
        # If keyword found, put data in there
73
74
        if keyword and section:
            keyword.value = value
75
        # Keyword not found in the input tree, assuming it is a default keyword
76
77
78
        elif section is not None:
            split_path = path.rsplit("/", 1)
            keyword = split_path[1]
79
            if section.default_keyword is not None:
80
                section.default_keyword.value += full + "\n"
81
82
83
            else:
                message = "The CP2K input does not contain the keyword {}, and there is no default keyword for the section {}".format(path, split_path[0])
                logger.warning(message)
84
85
86
87
88
89
90

    def get_section(self, path):
        split_path = path.split("/")
        section = self.root_section
        for part in split_path:
            section = section.get_subsection(part)
            if not section:
91
92
                message = "The CP2K input does not contain the section {}".format(path)
                logger.warning(message)
93
94
95
                return None
        return section

96
97
98
99
100
101
102
103
104
105
106
107
108
    def get_section_list(self, path):
        split_path = path.split("/")
        last_section = split_path[-1]
        split_path.pop()
        section = self.root_section
        for part in split_path:
            section = section.get_subsection(part)
            if not section:
                message = "The CP2K input does not contain the section {}".format(path)
                logger.warning(message)
                return None
        return section.get_subsection_list(last_section)

109
110
111
112
113
    def get_keyword_and_section(self, path):
        split_path = path.rsplit("/", 1)
        keyword = split_path[1]
        section_path = split_path[0]
        section = self.get_section(section_path)
114
115
116
117
118
119

        if section is None:
            message = "The CP2K input does not contain the section {}".format(path)
            logger.warning(message)
            return (None, None)

Lauri Himanen's avatar
Lauri Himanen committed
120
        keyword = section.get_keyword_object(keyword)
121
122
        if keyword and section:
            return (keyword, section)
123
        else:
124
125
            return (None, section)

126
    def get_keyword(self, path, raw=False, allow_default=True):
Lauri Himanen's avatar
Lauri Himanen committed
127
128
129

        keyword, section = self.get_keyword_and_section(path)
        if keyword:
130
            return keyword.get_value(raw, allow_default)
131
132
133

    def set_section_accessed(self, path):
        section = self.get_section(path)
134
135
136
137
138
        if section:
            section.accessed = True
        else:
            message = "The CP2K input does not contain the section {}".format(path)
            logger.warning(message)
139
140
141
142
143
144
145
146
147
148
149
150
151

    def get_default_unit(self, path):
        keyword, section = self.get_keyword_and_section(path)
        if keyword:
            return keyword.default_unit

    def get_unit(self, path):
        keyword, section = self.get_keyword_and_section(path)
        if keyword:
            return keyword.get_unit()

    def get_parameter_and_section(self, path):
        section = self.get_section(path)
152
153
154
155
        if section is None:
            return (None, None)
        if section.section_parameter is not None:
            parameter = section.section_parameter
156
            parameter.accessed = section.accessed
157
158
159
            return (parameter, section)
        else:
            return (None, section)
160
161
162
163
164
165
166
167
168
169

    def get_parameter(self, path):
        parameter, section = self.get_parameter_and_section(path)
        if parameter:
            if parameter.value:
                return parameter.value
            elif section and section.accessed:
                return parameter.lone_value


170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
class Section(object):
    """An input section in a CP2K calculation.
    """
    __slots__ = ['accessed', 'name', 'keywords', 'default_keyword_names', 'default_keyword', 'section_parameter', 'sections', 'description']

    def __init__(self, name):
        self.accessed = False
        self.name = name
        self.keywords = defaultdict(list)
        self.default_keyword_names = []
        self.default_keyword = None
        self.section_parameter = None
        self.sections = defaultdict(list)
        self.description = None

    def get_keyword_object(self, name):
        keyword = self.keywords.get(name)
        if keyword:
            if len(keyword) == 1:
                return keyword[0]
            else:
                logger.error("The keyword '{}' in '{}' does not exist or has too many entries.".format(name, self.name))

    def get_keyword(self, name, raw=False, allow_default=True):
        """Returns the keyword value for the given name.

        Args:
            name: The name of the keyword
            raw: Boolean indicating if the raw value (not modified in any way)
                should be returned.
            allow_default: Boolean indicating if it is allowed to return the
                default value is no actual value was set by the user in the input.
        """
        keyword_object = self.get_keyword_object(name)
        return keyword_object.get_value(raw, allow_default)

    def get_subsection(self, name):
        subsection = self.sections.get(name)
        if subsection:
            if len(subsection) == 1:
                return subsection[0]
            else:
                logger.error("The subsection '{}' in '{}' has too many entries.".format(name, self.name))
        else:
Mohamed, Fawzi Roberto (fawzi)'s avatar
Mohamed, Fawzi Roberto (fawzi) committed
214
            logger.error("The subsection '{}' in '{}' does not exist, knowing sections {} and keywords {} .".format(name, self.name, self.sections.keys(), self.keywords.keys()))
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230

    def get_subsection_list(self, name):
        subsection = self.sections.get(name)
        return subsection

    def get_section_parameter(self):
        """Get the section parameter, or if not specified the lone keyword
        value.
        """
        if self.section_parameter is not None:
            value = self.section_parameter.value
            if value is None:
                value = self.section_parameter.lone_keyword_value
            return value.upper()


231
232
class InputObject(object):
    """Base class for all kind of data elements in the CP2K input.
233
    """
234
    __slots__ = ['name', 'value', 'default_value', 'description', 'data_type', 'data_dimension']
235

236
237
    def __init__(self, name):
        self.name = name
238
        self.value = None
239
240
241
242
243
        self.description = None
        self.data_type = None
        self.data_dimension = None
        self.default_value = None

244
245
246
247
248
249
250
251
252
253
254
255
256
257

class Keyword(InputObject):
    """Information about a keyword in a CP2K calculation.
    """
    __slots__ = ['unit', 'value_no_unit', 'default_unit', 'default_name']

    def __init__(self, name, default_value,  default_unit, default_name):
        super(Keyword, self).__init__(name)
        self.unit = None
        self.value_no_unit = None
        self.default_unit = default_unit
        self.default_value = default_value
        self.default_name = default_name

258
259
260
261
262
263
264
    def get_value(self, raw=False, allow_default=True):
        if raw:
            return self._get_value_raw()
        else:
            return self._get_value_formatted(allow_default)

    def _get_value_raw(self):
Lauri Himanen's avatar
Lauri Himanen committed
265
266
267
268
269
        """Returns the unformatted value of this keyword. This is exactly what
        was set by the used in the input as a string.
        """
        return self.value

270
    def _get_value_formatted(self, allow_default=False):
271
272
        """Returns the value stored in this keyword by removing the possible
        unit definition and formatting the string into the correct data type.
273
274
        If asked, will use the default value if not actual value was set by
        user.
275
        """
276
        # Decode the unit and the value if not done before
Lauri Himanen's avatar
Lauri Himanen committed
277
        proper_value = None
278
279
        if self.default_unit is not None:
            if self.value_no_unit is None:
280
281
282
283
284
                self.decode_cp2k_unit_and_value()
        if self.value_no_unit is not None:
            proper_value = self.value_no_unit
        else:
            proper_value = self.value
285

286
        # if allow_default:
Lauri Himanen's avatar
Lauri Himanen committed
287
288
        if proper_value is None:
            proper_value = self.default_value
289
290
        if proper_value is None:
            return None
291

292
293
        returned = None
        dim = int(self.data_dimension)
294
        splitted = proper_value.split()
295
296
297
        if len(splitted) != dim:
            logger.error("The dimensions of the CP2K input parameter {} do not match the specification in the XML file.".format(self.name))

298
299
300
        if dim == 1:
            try:
                if self.data_type == "integer":
301
                    returned = int(proper_value)
302
                elif self.data_type == "real":
303
                    returned = float(proper_value)
304
                elif self.data_type == "word":
305
                    returned = str(proper_value)
306
                elif self.data_type == "keyword":
307
                    returned = str(proper_value)
308
                elif self.data_type == "string":
309
                    returned = str(proper_value)
310
                elif self.data_type == "logical":
311
                    returned = str(proper_value)
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
                else:
                    logger.error("Unknown data type '{}'".format(self.data_type))
                    return
            except TypeError:
                logger.error("The CP2K input parameter {} could not be converted to the type specified in the XML file.".format(self.name))
                return
        else:
            try:
                if self.data_type == "integer":
                    returned = np.array([int(x) for x in splitted])
                elif self.data_type == "real":
                    returned = np.array([float(x) for x in splitted])
                elif self.data_type == "word":
                    returned = np.array([str(x) for x in splitted])
                elif self.data_type == "keyword":
                    returned = np.array([str(x) for x in splitted])
                elif self.data_type == "string":
                    returned = np.array([str(x) for x in splitted])
                elif self.data_type == "logical":
                    returned = np.array([str(x) for x in splitted])
                else:
                    logger.error("Unknown data type '{}'".format(self.data_type))
                    return
            except TypeError:
                logger.error("The CP2K input parameter {} could not be converted to the type specified in the XML file.".format(self.name))
337
338
339
                return

        return returned
340

341
    def determine_value_and_unit(self):
342
343
344
        """If the units of this value can be changed, return a value and the
        unit separately.
        """
345
346
        if self.default_unit is not None:
            if self.value_no_unit is None:
347
348
349
350
351
352
                self.decode_cp2k_unit_and_value()
            return self.value_no_unit
        else:
            return self.value

    def get_unit(self):
353

354
355
356
357
358
359
        if self.unit is None:
            self.decode_cp2k_unit_and_value()
            if self.unit is not None:
                return self.unit
            elif self.default_unit is not None:
                return self.default_unit
360
        else:
361
362
363
364
            return self.unit

        logger.error("The keyword '{}' does not have a unit.".format(self.default_name))
        return None
365
366
367
368

    def decode_cp2k_unit_and_value(self):
        """Given a CP2K unit name, decode it as Pint unit definition.
        """
369
370
371
372
373
374
375
376
377
378
379
380
381
382
        if self.value is not None:
            splitted = self.value.split(None, 1)
            unit_definition = splitted[0]
            if unit_definition.startswith('[') and unit_definition.endswith(']'):
                unit_definition = unit_definition[1:-1]
                self.unit = CP2KInput.decode_cp2k_unit(unit_definition)
                self.value_no_unit = splitted[1]
            elif self.default_unit:
                logger.debug("No special unit definition found, returning default unit.")
                self.unit = CP2KInput.decode_cp2k_unit(self.default_unit)
                self.value_no_unit = self.value
            else:
                logger.debug("The value has no unit, returning bare value.")
                self.value_no_unit = self.value
383
384


385
class SectionParameters(InputObject):
386
387
388
389
390
    """Section parameters in a CP2K calculation.

    Section parameters are the short values that can be added right after a
    section name, e.g. &PRINT ON, where ON is the section parameter.
    """
391
    __slots__ = ['lone_keyword_value', 'accessed']
392

393
394
    def __init__(self, default_value, lone_keyword_value):
        super(SectionParameters, self).__init__("SECTION_PARAMETERS")
395
        self.default_value = default_value
396
        self.lone_keyword_value = lone_keyword_value
397
398
399
400
401
402
403
404
405
406
407
408
409
410
        self.accessed = None

    def get_value(self):
        """Returns the value for this section parameter. Uses the user given
        value primarily, if the section is not used, return the default value
        and if the section is defined but without explicit section parameter
        returns the lone keyword value.
        """
        if self.value is not None:
            return self.value
        elif self.accessed:
            return self.lone_keyword_value
        else:
            return self.default_value
411
412
413
414
415


class DefaultKeyword(InputObject):
    """Default keyword in the CP2K input.
    """
416
417
    __slots__ = ['lone_value']

418
419
420
421
    def __init__(self):
        super(DefaultKeyword, self).__init__("DEFAULT_KEYWORD")
        self.lone_value = None
        self.value = ""