parser.py 7.09 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
# Copyright 2015-2018 Lauri Himanen, Fawzi Mohamed, Ankit Kariryaa
# 
#   Licensed under the Apache License, Version 2.0 (the "License");
#   you may not use this file except in compliance with the License.
#   You may obtain a copy of the License at
# 
#     http://www.apache.org/licenses/LICENSE-2.0
# 
#   Unless required by applicable law or agreed to in writing, software
#   distributed under the License is distributed on an "AS IS" BASIS,
#   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#   See the License for the specific language governing permissions and
#   limitations under the License.

15
16
from builtins import next
from builtins import range
17
18
19
import os
import re
import logging
20
import importlib
21
from nomadcore.baseclasses import ParserInterface
22
23
24
25

# Needs to be imported in order for the importlib calls to work in python 2.7
import cp2kparser.versions.cp2k262.singlepointparser

26
logger = logging.getLogger("nomad")
27
28
29
30
31


class CP2KParser(ParserInterface):
    """This class handles the initial setup before any parsing can happen. It
    determines which version of CP2K was used to generate the output and then
32
    sets up a correct main parser.
33

34
    After the implementation has been setup, you can parse files with
35
36
    parse().
    """
37
38
    def __init__(self, metainfo_to_keep=None, backend=None, default_units=None, metainfo_units=None, debug=False, log_level=logging.ERROR, store=True):
        super(CP2KParser, self).__init__(metainfo_to_keep, backend, default_units, metainfo_units, debug, log_level, store)
39
40
41
42
43

    def setup_version(self):
        """Setups the version by looking at the output file and the version
        specified in it.
        """
44
45
46
47
48
        # Search for the CP2K version specification and the RUN_TYPE for the
        # calculation. The correct and optimized parser is initialized based on
        # this information.
        regex_version = re.compile(r" CP2K\| version string:\s+CP2K version ([\d\.]+)")
        regex_run_type = re.compile(r"\s+GLOBAL\| Run type\s+(.+)")
49
        n_lines = 100
50
51
        version_id = None
        run_type = None
52
        with open(self.parser_context.main_file, 'r') as outputfile:
53
            for i_line in range(n_lines):
54
55
56
57
                try:
                    line = next(outputfile)
                except StopIteration:
                    break
58
59
60
61
62
63
                result_version = regex_version.match(line)
                result_run_type = regex_run_type.match(line)
                if result_version:
                    version_id = result_version.group(1).replace('.', '')
                if result_run_type:
                    run_type = result_run_type.group(1)
64

65
66
67
68
69
70
71
72
        if version_id is None:
            msg = "Could not find a version specification from the given main file."
            logger.exception(msg)
            raise RuntimeError(msg)
        if run_type is None:
            msg = "Could not find a version specification from the given main file."
            logger.exception(msg)
            raise RuntimeError(msg)
73
74
75

        # Setup the root folder to the fileservice that is used to access files
        dirpath, filename = os.path.split(self.parser_context.main_file)
76
        dirpath = os.path.abspath(dirpath)
77
78
79
80
81
        self.parser_context.file_service.setup_root_folder(dirpath)
        self.parser_context.file_service.set_file_id(filename, "output")

        # Setup the correct main parser based on the version id. If no match
        # for the version is found, use the main parser for CP2K 2.6.2
82
        self.setup_main_parser({"version_id": version_id, "run_type": run_type})
83

84
85
86
87
88
89
90
91
92
93
94
    @staticmethod
    def get_mainfile_regex():
        regex_str = (
            "  \*\*\*\* \*\*\*\* \*\*\*\*\*\*  \*\*  PROGRAM STARTED AT\s.*\n"
            " \*\*\*\*\* \*\* \*\*\*  \*\*\* \*\*   PROGRAM STARTED ON\s*.*\n"
            " \*\*    \*\*\*\*   \*\*\*\*\*\*    PROGRAM STARTED BY .*\n"
            " \*\*\*\*\* \*\*    \*\* \*\* \*\*   PROGRAM PROCESS ID .*\n"
            "  \*\*\*\* \*\*  \*\*\*\*\*\*\*  \*\*  PROGRAM STARTED IN .*\n"
        )
        return regex_str

95
    def get_metainfo_filename(self):
Lauri Himanen's avatar
Lauri Himanen committed
96
        return "cp2k.nomadmetainfo.json"
97
98
99

    def get_parser_info(self):
        return {'name': 'cp2k-parser', 'version': '1.0'}
100
101
102
103
104
105
106
107

    def setup_main_parser(self, version_dictionary):
        """
        Setups a main parser class for this calculation. The main class can be
        different for each version and run type.

        Args:
            version_id: An integer representing the CP2K version. The version
108
109
110
111
112
                number is originally a string the form '2.6.2', but here the
                numbers are just concatenated into a single integer number 262.
            run_type: A string that identifies the RUN_TYPE for the
                calculation.  All the possible run types can be found in the
                CP2K reference manual.
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134

        Returns:
            A python class that should be instantiated later with the correct
            parameters.
        """
        run_type = version_dictionary["run_type"]
        version_id = version_dictionary["version_id"]

        # Search for a RUN_TYPE specific parser
        parser_map = {
            "ENERGY": "SinglePointParser",
            "ENERGY_FORCE": "SinglePointParser",
            "WAVEFUNCTION_OPTIMIZATION": "SinglePointParser",
            "WFN_OPT": "SinglePointParser",
            "GEO_OPT": "GeoOptParser",
            "GEOMETRY_OPTIMIZATION": "GeoOptParser",
            "MD": "MDParser",
            "MOLECULAR_DYNAMICS": "MDParser",
        }
        try:
            parser = parser_map[run_type]
        except KeyError:
135
136
137
138
            logger.exception(
                "A parser corresponding to the run_type '{}' could not be found."
                .format(run_type)
            )
139
140
141
142
143
144
145
            raise

        # Currently the version id is a pure integer, so it can directly be mapped
        # into a package name.
        base = "cp2kparser.versions.cp2k{}.{}".format(version_id, parser.lower())
        parser_module = None
        parser_class = None
146

147
148
149
        try:
            parser_module = importlib.import_module(base)
        except ImportError:
150
151
152
153
154
            logger.warning(
                "Could not find a parser for version '{}' and run type '{}'. "
                "Trying to default to the base implementation for CP2K 2.6.2"
                .format(version_id, run_type)
            )
155
156
157
158
            base = "cp2kparser.versions.cp2k262.{}".format(parser.lower())
            try:
                parser_module = importlib.import_module(base)
            except ImportError:
159
160
161
162
163
                logger.exception(
                    "Tried to default to the CP2K 2.6.2 implementation but "
                    "could not find the correct modules for run_type '{}'."
                    .format(run_type)
                )
164
165
166
167
                raise
        try:
            parser_class = getattr(parser_module, "CP2K{}".format(parser))
        except AttributeError:
168
169
170
171
            logger.exception(
                "A parser class '{}' could not be found in the module '[]'."
                .format(parser_class, parser_module)
            )
172
173
            raise

174
        self.main_parser = parser_class(self.parser_context)