parser.py 6.13 KB
Newer Older
1
2
from builtins import next
from builtins import range
3
4
5
import os
import re
import logging
6
import importlib
7
from nomadcore.baseclasses import ParserInterface
8
logger = logging.getLogger("nomad")
9
10
11
12
13
14


#===============================================================================
class CP2KParser(ParserInterface):
    """This class handles the initial setup before any parsing can happen. It
    determines which version of CP2K was used to generate the output and then
15
    sets up a correct main parser.
16
17
18
19

    After the implementation has been setup, you can parse the files with
    parse().
    """
20
21
    def __init__(self, main_file, metainfo_to_keep=None, backend=None, default_units=None, metainfo_units=None, debug=False, log_level=logging.ERROR, store=True):
        super(CP2KParser, self).__init__(main_file, metainfo_to_keep, backend, default_units, metainfo_units, debug, log_level, store)
22
23
24
25
26

    def setup_version(self):
        """Setups the version by looking at the output file and the version
        specified in it.
        """
27
28
29
30
31
32
33
34
        # Search for the CP2K version specification and the RUN_TYPE for the
        # calculation. The correct and optimized parser is initialized based on
        # this information.
        regex_version = re.compile(r" CP2K\| version string:\s+CP2K version ([\d\.]+)")
        regex_run_type = re.compile(r"\s+GLOBAL\| Run type\s+(.+)")
        n_lines = 50
        version_id = None
        run_type = None
35
        with open(self.parser_context.main_file, 'r') as outputfile:
36
            for i_line in range(n_lines):
37
                line = next(outputfile)
38
39
40
41
42
43
44
45
46
47
48
49
50
51
                result_version = regex_version.match(line)
                result_run_type = regex_run_type.match(line)
                if result_version:
                    version_id = result_version.group(1).replace('.', '')
                if result_run_type:
                    run_type = result_run_type.group(1)
        if version_id is None:
            msg = "Could not find a version specification from the given main file."
            logger.exception(msg)
            raise RuntimeError(msg)
        if run_type is None:
            msg = "Could not find a version specification from the given main file."
            logger.exception(msg)
            raise RuntimeError(msg)
52
53
54

        # Setup the root folder to the fileservice that is used to access files
        dirpath, filename = os.path.split(self.parser_context.main_file)
55
        dirpath = os.path.abspath(dirpath)
56
57
58
59
60
        self.parser_context.file_service.setup_root_folder(dirpath)
        self.parser_context.file_service.set_file_id(filename, "output")

        # Setup the correct main parser based on the version id. If no match
        # for the version is found, use the main parser for CP2K 2.6.2
61
        self.setup_main_parser({"version_id": version_id, "run_type": run_type})
62

63
64
65
66
67
68
69
70
71
72
73
    @staticmethod
    def get_mainfile_regex():
        regex_str = (
            "  \*\*\*\* \*\*\*\* \*\*\*\*\*\*  \*\*  PROGRAM STARTED AT\s.*\n"
            " \*\*\*\*\* \*\* \*\*\*  \*\*\* \*\*   PROGRAM STARTED ON\s*.*\n"
            " \*\*    \*\*\*\*   \*\*\*\*\*\*    PROGRAM STARTED BY .*\n"
            " \*\*\*\*\* \*\*    \*\* \*\* \*\*   PROGRAM PROCESS ID .*\n"
            "  \*\*\*\* \*\*  \*\*\*\*\*\*\*  \*\*  PROGRAM STARTED IN .*\n"
        )
        return regex_str

74
    def get_metainfo_filename(self):
Lauri Himanen's avatar
Lauri Himanen committed
75
        return "cp2k.nomadmetainfo.json"
76
77
78

    def get_parser_info(self):
        return {'name': 'cp2k-parser', 'version': '1.0'}
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137

    def setup_main_parser(self, version_dictionary):
        """
        Setups a main parser class for this calculation. The main class can be
        different for each version and run type.

        Args:
            version_id: An integer representing the CP2K version. The version
                number is originally a string the form '2.6.2', but here the numbers
                are just concatenated into a single integer number 262.
            run_type: A string that identifies the RUN_TYPE for the calculation.
                All the possible run types can be found in the CP2K reference manual.

        Returns:
            A python class that should be instantiated later with the correct
            parameters.
        """
        run_type = version_dictionary["run_type"]
        version_id = version_dictionary["version_id"]

        # Search for a RUN_TYPE specific parser
        parser_map = {
            "ENERGY": "SinglePointParser",
            "ENERGY_FORCE": "SinglePointParser",
            "WAVEFUNCTION_OPTIMIZATION": "SinglePointParser",
            "WFN_OPT": "SinglePointParser",
            "GEO_OPT": "GeoOptParser",
            "GEOMETRY_OPTIMIZATION": "GeoOptParser",
            "MD": "MDParser",
            "MOLECULAR_DYNAMICS": "MDParser",
        }
        try:
            parser = parser_map[run_type]
        except KeyError:
            logger.exception("A parser corresponding to the run_type '{}' could not be found.".format(run_type))
            raise

        # Currently the version id is a pure integer, so it can directly be mapped
        # into a package name.
        base = "cp2kparser.versions.cp2k{}.{}".format(version_id, parser.lower())
        parser_module = None
        parser_class = None
        try:
            parser_module = importlib.import_module(base)
        except ImportError:
            logger.warning("Could not find a parser for version '{}' and run type '{}'. Trying to default to the base implementation for CP2K 2.6.2".format(version_id, run_type))
            base = "cp2kparser.versions.cp2k262.{}".format(parser.lower())
            try:
                parser_module = importlib.import_module(base)
            except ImportError:
                logger.exception("Tried to default to the CP2K 2.6.2 implementation but could not find the correct modules for run_type '{}'.".format(run_type))
                raise
        try:
            parser_class = getattr(parser_module, "CP2K{}".format(parser))
        except AttributeError:
            logger.exception("A parser class '{}' could not be found in the module '[]'.".format(parser_class, parser_module))
            raise

        self.main_parser = parser_class(self.parser_context.main_file, self.parser_context)