parser.py 6.47 KB
Newer Older
1
2
from builtins import next
from builtins import range
3
4
5
import os
import re
import logging
6
import importlib
7
from nomadcore.baseclasses import ParserInterface
8
9
10
11

# Needs to be imported in order for the importlib calls to work in python 2.7
import cp2kparser.versions.cp2k262.singlepointparser

12
logger = logging.getLogger("nomad")
13
14
15
16
17


class CP2KParser(ParserInterface):
    """This class handles the initial setup before any parsing can happen. It
    determines which version of CP2K was used to generate the output and then
18
    sets up a correct main parser.
19

20
    After the implementation has been setup, you can parse files with
21
22
    parse().
    """
23
24
    def __init__(self, metainfo_to_keep=None, backend=None, default_units=None, metainfo_units=None, debug=False, log_level=logging.ERROR, store=True):
        super(CP2KParser, self).__init__(metainfo_to_keep, backend, default_units, metainfo_units, debug, log_level, store)
25
26
27
28
29

    def setup_version(self):
        """Setups the version by looking at the output file and the version
        specified in it.
        """
30
31
32
33
34
        # Search for the CP2K version specification and the RUN_TYPE for the
        # calculation. The correct and optimized parser is initialized based on
        # this information.
        regex_version = re.compile(r" CP2K\| version string:\s+CP2K version ([\d\.]+)")
        regex_run_type = re.compile(r"\s+GLOBAL\| Run type\s+(.+)")
35
        n_lines = 100
36
37
        version_id = None
        run_type = None
38
        with open(self.parser_context.main_file, 'r') as outputfile:
39
            for i_line in range(n_lines):
40
41
42
43
                try:
                    line = next(outputfile)
                except StopIteration:
                    break
44
45
46
47
48
49
50
51
52
53
54
55
56
57
                result_version = regex_version.match(line)
                result_run_type = regex_run_type.match(line)
                if result_version:
                    version_id = result_version.group(1).replace('.', '')
                if result_run_type:
                    run_type = result_run_type.group(1)
        if version_id is None:
            msg = "Could not find a version specification from the given main file."
            logger.exception(msg)
            raise RuntimeError(msg)
        if run_type is None:
            msg = "Could not find a version specification from the given main file."
            logger.exception(msg)
            raise RuntimeError(msg)
58
59
60

        # Setup the root folder to the fileservice that is used to access files
        dirpath, filename = os.path.split(self.parser_context.main_file)
61
        dirpath = os.path.abspath(dirpath)
62
63
64
65
66
        self.parser_context.file_service.setup_root_folder(dirpath)
        self.parser_context.file_service.set_file_id(filename, "output")

        # Setup the correct main parser based on the version id. If no match
        # for the version is found, use the main parser for CP2K 2.6.2
67
        self.setup_main_parser({"version_id": version_id, "run_type": run_type})
68

69
70
71
72
73
74
75
76
77
78
79
    @staticmethod
    def get_mainfile_regex():
        regex_str = (
            "  \*\*\*\* \*\*\*\* \*\*\*\*\*\*  \*\*  PROGRAM STARTED AT\s.*\n"
            " \*\*\*\*\* \*\* \*\*\*  \*\*\* \*\*   PROGRAM STARTED ON\s*.*\n"
            " \*\*    \*\*\*\*   \*\*\*\*\*\*    PROGRAM STARTED BY .*\n"
            " \*\*\*\*\* \*\*    \*\* \*\* \*\*   PROGRAM PROCESS ID .*\n"
            "  \*\*\*\* \*\*  \*\*\*\*\*\*\*  \*\*  PROGRAM STARTED IN .*\n"
        )
        return regex_str

80
    def get_metainfo_filename(self):
Lauri Himanen's avatar
Lauri Himanen committed
81
        return "cp2k.nomadmetainfo.json"
82
83
84

    def get_parser_info(self):
        return {'name': 'cp2k-parser', 'version': '1.0'}
85
86
87
88
89
90
91
92

    def setup_main_parser(self, version_dictionary):
        """
        Setups a main parser class for this calculation. The main class can be
        different for each version and run type.

        Args:
            version_id: An integer representing the CP2K version. The version
93
94
95
96
97
                number is originally a string the form '2.6.2', but here the
                numbers are just concatenated into a single integer number 262.
            run_type: A string that identifies the RUN_TYPE for the
                calculation.  All the possible run types can be found in the
                CP2K reference manual.
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119

        Returns:
            A python class that should be instantiated later with the correct
            parameters.
        """
        run_type = version_dictionary["run_type"]
        version_id = version_dictionary["version_id"]

        # Search for a RUN_TYPE specific parser
        parser_map = {
            "ENERGY": "SinglePointParser",
            "ENERGY_FORCE": "SinglePointParser",
            "WAVEFUNCTION_OPTIMIZATION": "SinglePointParser",
            "WFN_OPT": "SinglePointParser",
            "GEO_OPT": "GeoOptParser",
            "GEOMETRY_OPTIMIZATION": "GeoOptParser",
            "MD": "MDParser",
            "MOLECULAR_DYNAMICS": "MDParser",
        }
        try:
            parser = parser_map[run_type]
        except KeyError:
120
121
122
123
            logger.exception(
                "A parser corresponding to the run_type '{}' could not be found."
                .format(run_type)
            )
124
125
126
127
128
129
130
            raise

        # Currently the version id is a pure integer, so it can directly be mapped
        # into a package name.
        base = "cp2kparser.versions.cp2k{}.{}".format(version_id, parser.lower())
        parser_module = None
        parser_class = None
131

132
133
134
        try:
            parser_module = importlib.import_module(base)
        except ImportError:
135
136
137
138
139
            logger.warning(
                "Could not find a parser for version '{}' and run type '{}'. "
                "Trying to default to the base implementation for CP2K 2.6.2"
                .format(version_id, run_type)
            )
140
141
142
143
            base = "cp2kparser.versions.cp2k262.{}".format(parser.lower())
            try:
                parser_module = importlib.import_module(base)
            except ImportError:
144
145
146
147
148
                logger.exception(
                    "Tried to default to the CP2K 2.6.2 implementation but "
                    "could not find the correct modules for run_type '{}'."
                    .format(run_type)
                )
149
150
151
152
                raise
        try:
            parser_class = getattr(parser_module, "CP2K{}".format(parser))
        except AttributeError:
153
154
155
156
            logger.exception(
                "A parser class '{}' could not be found in the module '[]'."
                .format(parser_class, parser_module)
            )
157
158
            raise

159
        self.main_parser = parser_class(self.parser_context)