parser.py 6.41 KB
Newer Older
1
2
from builtins import next
from builtins import range
3
4
5
import os
import re
import logging
6
import importlib
7
from nomadcore.baseclasses import ParserInterface
8
9
10
11

# Needs to be imported in order for the importlib calls to work in python 2.7
import cp2kparser.versions.cp2k262.singlepointparser

12
logger = logging.getLogger("nomad")
13
14
15
16
17


class CP2KParser(ParserInterface):
    """This class handles the initial setup before any parsing can happen. It
    determines which version of CP2K was used to generate the output and then
18
    sets up a correct main parser.
19

20
    After the implementation has been setup, you can parse files with
21
22
    parse().
    """
23
24
    def __init__(self, metainfo_to_keep=None, backend=None, default_units=None, metainfo_units=None, debug=False, log_level=logging.ERROR, store=True):
        super(CP2KParser, self).__init__(metainfo_to_keep, backend, default_units, metainfo_units, debug, log_level, store)
25
26
27
28
29

    def setup_version(self):
        """Setups the version by looking at the output file and the version
        specified in it.
        """
30
31
32
33
34
35
36
37
        # Search for the CP2K version specification and the RUN_TYPE for the
        # calculation. The correct and optimized parser is initialized based on
        # this information.
        regex_version = re.compile(r" CP2K\| version string:\s+CP2K version ([\d\.]+)")
        regex_run_type = re.compile(r"\s+GLOBAL\| Run type\s+(.+)")
        n_lines = 50
        version_id = None
        run_type = None
38
        with open(self.parser_context.main_file, 'r') as outputfile:
39
            for i_line in range(n_lines):
40
                line = next(outputfile)
41
42
43
44
45
46
47
48
49
50
51
52
53
54
                result_version = regex_version.match(line)
                result_run_type = regex_run_type.match(line)
                if result_version:
                    version_id = result_version.group(1).replace('.', '')
                if result_run_type:
                    run_type = result_run_type.group(1)
        if version_id is None:
            msg = "Could not find a version specification from the given main file."
            logger.exception(msg)
            raise RuntimeError(msg)
        if run_type is None:
            msg = "Could not find a version specification from the given main file."
            logger.exception(msg)
            raise RuntimeError(msg)
55
56
57

        # Setup the root folder to the fileservice that is used to access files
        dirpath, filename = os.path.split(self.parser_context.main_file)
58
        dirpath = os.path.abspath(dirpath)
59
60
61
62
63
        self.parser_context.file_service.setup_root_folder(dirpath)
        self.parser_context.file_service.set_file_id(filename, "output")

        # Setup the correct main parser based on the version id. If no match
        # for the version is found, use the main parser for CP2K 2.6.2
64
        self.setup_main_parser({"version_id": version_id, "run_type": run_type})
65

66
67
68
69
70
71
72
73
74
75
76
    @staticmethod
    def get_mainfile_regex():
        regex_str = (
            "  \*\*\*\* \*\*\*\* \*\*\*\*\*\*  \*\*  PROGRAM STARTED AT\s.*\n"
            " \*\*\*\*\* \*\* \*\*\*  \*\*\* \*\*   PROGRAM STARTED ON\s*.*\n"
            " \*\*    \*\*\*\*   \*\*\*\*\*\*    PROGRAM STARTED BY .*\n"
            " \*\*\*\*\* \*\*    \*\* \*\* \*\*   PROGRAM PROCESS ID .*\n"
            "  \*\*\*\* \*\*  \*\*\*\*\*\*\*  \*\*  PROGRAM STARTED IN .*\n"
        )
        return regex_str

77
    def get_metainfo_filename(self):
Lauri Himanen's avatar
Lauri Himanen committed
78
        return "cp2k.nomadmetainfo.json"
79
80
81

    def get_parser_info(self):
        return {'name': 'cp2k-parser', 'version': '1.0'}
82
83
84
85
86
87
88
89

    def setup_main_parser(self, version_dictionary):
        """
        Setups a main parser class for this calculation. The main class can be
        different for each version and run type.

        Args:
            version_id: An integer representing the CP2K version. The version
90
91
92
93
94
                number is originally a string the form '2.6.2', but here the
                numbers are just concatenated into a single integer number 262.
            run_type: A string that identifies the RUN_TYPE for the
                calculation.  All the possible run types can be found in the
                CP2K reference manual.
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116

        Returns:
            A python class that should be instantiated later with the correct
            parameters.
        """
        run_type = version_dictionary["run_type"]
        version_id = version_dictionary["version_id"]

        # Search for a RUN_TYPE specific parser
        parser_map = {
            "ENERGY": "SinglePointParser",
            "ENERGY_FORCE": "SinglePointParser",
            "WAVEFUNCTION_OPTIMIZATION": "SinglePointParser",
            "WFN_OPT": "SinglePointParser",
            "GEO_OPT": "GeoOptParser",
            "GEOMETRY_OPTIMIZATION": "GeoOptParser",
            "MD": "MDParser",
            "MOLECULAR_DYNAMICS": "MDParser",
        }
        try:
            parser = parser_map[run_type]
        except KeyError:
117
118
119
120
            logger.exception(
                "A parser corresponding to the run_type '{}' could not be found."
                .format(run_type)
            )
121
122
123
124
125
126
127
            raise

        # Currently the version id is a pure integer, so it can directly be mapped
        # into a package name.
        base = "cp2kparser.versions.cp2k{}.{}".format(version_id, parser.lower())
        parser_module = None
        parser_class = None
128

129
130
131
        try:
            parser_module = importlib.import_module(base)
        except ImportError:
132
133
134
135
136
            logger.warning(
                "Could not find a parser for version '{}' and run type '{}'. "
                "Trying to default to the base implementation for CP2K 2.6.2"
                .format(version_id, run_type)
            )
137
138
139
140
            base = "cp2kparser.versions.cp2k262.{}".format(parser.lower())
            try:
                parser_module = importlib.import_module(base)
            except ImportError:
141
142
143
144
145
                logger.exception(
                    "Tried to default to the CP2K 2.6.2 implementation but "
                    "could not find the correct modules for run_type '{}'."
                    .format(run_type)
                )
146
147
148
149
                raise
        try:
            parser_class = getattr(parser_module, "CP2K{}".format(parser))
        except AttributeError:
150
151
152
153
            logger.exception(
                "A parser class '{}' could not be found in the module '[]'."
                .format(parser_class, parser_module)
            )
154
155
156
            raise

        self.main_parser = parser_class(self.parser_context.main_file, self.parser_context)