parser.py 5.62 KB
Newer Older
1
2
from builtins import next
from builtins import range
3
4
5
import os
import re
import logging
6
import importlib
7
from nomadcore.baseclasses import ParserInterface
8
logger = logging.getLogger("nomad")
9
10
11
12
13
14


#===============================================================================
class CP2KParser(ParserInterface):
    """This class handles the initial setup before any parsing can happen. It
    determines which version of CP2K was used to generate the output and then
15
    sets up a correct main parser.
16
17
18
19

    After the implementation has been setup, you can parse the files with
    parse().
    """
20
21
    def __init__(self, main_file, metainfo_to_keep=None, backend=None, default_units=None, metainfo_units=None, debug=True, store=True):
        super(CP2KParser, self).__init__(main_file, metainfo_to_keep, backend, default_units, metainfo_units, debug, store)
22
23
24
25
26

    def setup_version(self):
        """Setups the version by looking at the output file and the version
        specified in it.
        """
27
28
29
30
31
32
33
34
        # Search for the CP2K version specification and the RUN_TYPE for the
        # calculation. The correct and optimized parser is initialized based on
        # this information.
        regex_version = re.compile(r" CP2K\| version string:\s+CP2K version ([\d\.]+)")
        regex_run_type = re.compile(r"\s+GLOBAL\| Run type\s+(.+)")
        n_lines = 50
        version_id = None
        run_type = None
35
        with open(self.parser_context.main_file, 'r') as outputfile:
36
            for i_line in range(n_lines):
37
                line = next(outputfile)
38
39
40
41
42
43
44
45
46
47
48
49
50
51
                result_version = regex_version.match(line)
                result_run_type = regex_run_type.match(line)
                if result_version:
                    version_id = result_version.group(1).replace('.', '')
                if result_run_type:
                    run_type = result_run_type.group(1)
        if version_id is None:
            msg = "Could not find a version specification from the given main file."
            logger.exception(msg)
            raise RuntimeError(msg)
        if run_type is None:
            msg = "Could not find a version specification from the given main file."
            logger.exception(msg)
            raise RuntimeError(msg)
52
53
54

        # Setup the root folder to the fileservice that is used to access files
        dirpath, filename = os.path.split(self.parser_context.main_file)
55
        dirpath = os.path.abspath(dirpath)
56
57
58
59
60
        self.parser_context.file_service.setup_root_folder(dirpath)
        self.parser_context.file_service.set_file_id(filename, "output")

        # Setup the correct main parser based on the version id. If no match
        # for the version is found, use the main parser for CP2K 2.6.2
61
        self.setup_main_parser({"version_id": version_id, "run_type": run_type})
62
63

    def get_metainfo_filename(self):
Lauri Himanen's avatar
Lauri Himanen committed
64
        return "cp2k.nomadmetainfo.json"
65
66
67

    def get_parser_info(self):
        return {'name': 'cp2k-parser', 'version': '1.0'}
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126

    def setup_main_parser(self, version_dictionary):
        """
        Setups a main parser class for this calculation. The main class can be
        different for each version and run type.

        Args:
            version_id: An integer representing the CP2K version. The version
                number is originally a string the form '2.6.2', but here the numbers
                are just concatenated into a single integer number 262.
            run_type: A string that identifies the RUN_TYPE for the calculation.
                All the possible run types can be found in the CP2K reference manual.

        Returns:
            A python class that should be instantiated later with the correct
            parameters.
        """
        run_type = version_dictionary["run_type"]
        version_id = version_dictionary["version_id"]

        # Search for a RUN_TYPE specific parser
        parser_map = {
            "ENERGY": "SinglePointParser",
            "ENERGY_FORCE": "SinglePointParser",
            "WAVEFUNCTION_OPTIMIZATION": "SinglePointParser",
            "WFN_OPT": "SinglePointParser",
            "GEO_OPT": "GeoOptParser",
            "GEOMETRY_OPTIMIZATION": "GeoOptParser",
            "MD": "MDParser",
            "MOLECULAR_DYNAMICS": "MDParser",
        }
        try:
            parser = parser_map[run_type]
        except KeyError:
            logger.exception("A parser corresponding to the run_type '{}' could not be found.".format(run_type))
            raise

        # Currently the version id is a pure integer, so it can directly be mapped
        # into a package name.
        base = "cp2kparser.versions.cp2k{}.{}".format(version_id, parser.lower())
        parser_module = None
        parser_class = None
        try:
            parser_module = importlib.import_module(base)
        except ImportError:
            logger.warning("Could not find a parser for version '{}' and run type '{}'. Trying to default to the base implementation for CP2K 2.6.2".format(version_id, run_type))
            base = "cp2kparser.versions.cp2k262.{}".format(parser.lower())
            try:
                parser_module = importlib.import_module(base)
            except ImportError:
                logger.exception("Tried to default to the CP2K 2.6.2 implementation but could not find the correct modules for run_type '{}'.".format(run_type))
                raise
        try:
            parser_class = getattr(parser_module, "CP2K{}".format(parser))
        except AttributeError:
            logger.exception("A parser class '{}' could not be found in the module '[]'.".format(parser_class, parser_module))
            raise

        self.main_parser = parser_class(self.parser_context.main_file, self.parser_context)