diff --git a/parser/parser-cp2k/cp2kparser/generic/inputparsing.py b/parser/parser-cp2k/cp2kparser/generic/inputparsing.py
index e7bb96899fd6056fc67d73da6ba8f93ccb3f0778..8e2c5d220904a69c801a55576080027bfe4d4797 100644
--- a/parser/parser-cp2k/cp2kparser/generic/inputparsing.py
+++ b/parser/parser-cp2k/cp2kparser/generic/inputparsing.py
@@ -1,3 +1,4 @@
+import numpy as np
 import logging
 from collections import defaultdict
 logger = logging.getLogger("nomad")
@@ -39,17 +40,30 @@ class CP2KInput(object):
 
     def set_parameter(self, path, value):
         parameter, section = self.get_parameter_and_section(path)
-        parameter.value = value
+        if section is None:
+            message = "The CP2K input does not contain a section {}".format(path)
+            logger.warning(message)
+        if parameter is None:
+            message = "The CP2K input section {} does not contain a SECTION_PARAMETER".format(path)
+            logger.warning(message)
+        else:
+            parameter.value = value
 
     def set_keyword(self, path, value):
         keyword, section = self.get_keyword_and_section(path)
+        # If keyword found, put data in there
         if keyword and section:
             keyword.value = value
+        # Keyword not found in the input tree, assuming it is a default keyword
         elif section is not None:
-            # print "Saving default keyword at path '{}'".format(path)
             split_path = path.rsplit("/", 1)
             keyword = split_path[1]
-            section.default_keyword += keyword + " " + value + "\n"
+            if section.default_keyword is not None:
+                # print "Saving default keyword at path '{}'".format(path)
+                section.default_keyword.value += keyword + " " + value + "\n"
+            else:
+                message = "The CP2K input does not contain the keyword {}, and there is no default keyword for the section {}".format(path, split_path[0])
+                logger.warning(message)
 
     def get_section(self, path):
         split_path = path.split("/")
@@ -57,7 +71,8 @@ class CP2KInput(object):
         for part in split_path:
             section = section.get_subsection(part)
             if not section:
-                print "Error in getting section at path '{}'.".format(path)
+                message = "The CP2K input does not contain the section {}".format(path)
+                logger.warning(message)
                 return None
         return section
 
@@ -66,12 +81,17 @@ class CP2KInput(object):
         keyword = split_path[1]
         section_path = split_path[0]
         section = self.get_section(section_path)
+
+        if section is None:
+            message = "The CP2K input does not contain the section {}".format(path)
+            logger.warning(message)
+            return (None, None)
+
         keyword = section.get_keyword(keyword)
         if keyword and section:
             return (keyword, section)
-        elif section:
+        else:
             return (None, section)
-        return (None, None)
 
     def get_keyword(self, path):
         """Returns the keyword that is specified by the given path.
@@ -83,15 +103,18 @@ class CP2KInput(object):
             if keyword.value is not None:
                 return keyword.get_value()
             else:
-                # if section.accessed:
                 return keyword.default_value
 
     def get_default_keyword(self, path):
-        return self.get_section(path).default_keyword
+        return self.get_section(path).default_keyword.value
 
     def set_section_accessed(self, path):
         section = self.get_section(path)
-        section.accessed = True
+        if section:
+            section.accessed = True
+        else:
+            message = "The CP2K input does not contain the section {}".format(path)
+            logger.warning(message)
 
     def get_keyword_default(self, path):
         keyword, section = self.get_keyword_and_section(path)
@@ -110,8 +133,13 @@ class CP2KInput(object):
 
     def get_parameter_and_section(self, path):
         section = self.get_section(path)
-        parameter = section.section_parameter
-        return (parameter, section)
+        if section is None:
+            return (None, None)
+        if section.section_parameter is not None:
+            parameter = section.section_parameter
+            return (parameter, section)
+        else:
+            return (None, section)
 
     def get_parameter(self, path):
         parameter, section = self.get_parameter_and_section(path)
@@ -123,18 +151,68 @@ class CP2KInput(object):
 
 
 #===============================================================================
-class Keyword(object):
-    """Information about a keyword in a CP2K calculation.
+class InputObject(object):
+    """Base class for all kind of data elements in the CP2K input.
     """
-    __slots__ = ['value', 'unit', 'value_no_unit', 'default_name', 'default_value', 'default_unit']
+    __slots__ = ['name', 'value', 'default_value', 'description', 'data_type', 'data_dimension']
 
-    def __init__(self, default_name, default_value, default_unit_value):
+    def __init__(self, name):
+        self.name = name
         self.value = None
+        self.description = None
+        self.data_type = None
+        self.data_dimension = None
+        self.default_value = None
+
+
+#===============================================================================
+class Keyword(InputObject):
+    """Information about a keyword in a CP2K calculation.
+    """
+    __slots__ = ['unit', 'value_no_unit', 'default_unit', 'default_name']
+
+    def __init__(self, name, default_value,  default_unit, default_name):
+        super(Keyword, self).__init__(name)
         self.unit = None
         self.value_no_unit = None
-        self.default_name = default_name
+        self.default_unit = default_unit
         self.default_value = default_value
-        self.default_unit = default_unit_value
+        self.default_name = default_name
+
+    def get_formatted_value(self):
+        """ Used to set the value of the keyword. The data will be transformed
+        into the correct data type and dimension from a simple string.
+        """
+        returned = None
+        dim = int(self.data_dimension)
+        splitted = self.value.split()
+        if len(splitted) != dim:
+            logger.error("The dimensions of the CP2K input parameter {} do not match the specification in the XML file.".format(self.name))
+
+        try:
+            if self.data_type == "integer":
+                returned = np.array([int(x) for x in splitted])
+            elif self.data_type == "real":
+                returned = np.array([float(x) for x in splitted])
+            elif self.data_type == "word":
+                returned = np.array(splitted)
+            elif self.data_type == "keyword":
+                returned = np.array(splitted)
+            elif self.data_type == "string":
+                returned = np.array(splitted)
+            elif self.data_type == "logical":
+                returned = np.array([True if x.upper() == "T" else False for x in splitted])
+            else:
+                logger.error("Unknown data type '{}'".format(self.data_type))
+                return
+        except TypeError:
+            logger.error("The CP2K input parameter {} could not be converted to the type specified in the XML file.".format(self.name))
+            return
+
+        if len(returned) == 1:
+            return returned[0]
+
+        return returned
 
     def get_value(self):
         """If the units of this value can be changed, return a value and the
@@ -174,18 +252,20 @@ class Keyword(object):
 
 
 #===============================================================================
-class Section(object):
+class Section(InputObject):
     """An input section in a CP2K calculation.
     """
-    __slots__ = ['accessed', 'name', 'keywords', 'default_keyword', 'section_parameter', 'sections']
+    __slots__ = ['accessed', 'name', 'keywords', 'default_keyword_names', 'default_keyword', 'section_parameter', 'sections', 'description']
 
     def __init__(self, name):
         self.accessed = False
         self.name = name
         self.keywords = defaultdict(list)
-        self.default_keyword = ""
+        self.default_keyword_names = []
+        self.default_keyword = None
         self.section_parameter = None
         self.sections = defaultdict(list)
+        self.description = None
 
     def get_keyword(self, name):
         keyword = self.keywords.get(name)
@@ -207,15 +287,26 @@ class Section(object):
 
 
 #===============================================================================
-class SectionParameters(object):
+class SectionParameters(InputObject):
     """Section parameters in a CP2K calculation.
 
     Section parameters are the short values that can be added right after a
     section name, e.g. &PRINT ON, where ON is the section parameter.
     """
-    __slots__ = ['value', 'default_value', 'lone_value']
+    __slots__ = ['lone_keyword_value']
 
-    def __init__(self, default_value, lone_value):
-        self.value = None
+    def __init__(self, default_value, lone_keyword_value):
+        super(SectionParameters, self).__init__("SECTION_PARAMETERS")
         self.default_value = default_value
-        self.lone_value = lone_value
+        self.lone_keyword_value = lone_keyword_value
+
+
+#===============================================================================
+class DefaultKeyword(InputObject):
+    """Default keyword in the CP2K input.
+    """
+    def __init__(self):
+        super(DefaultKeyword, self).__init__("DEFAULT_KEYWORD")
+        self.default_value = None
+        self.lone_value = None
+        self.value = ""
diff --git a/parser/parser-cp2k/cp2kparser/tools/xmlpreparser.py b/parser/parser-cp2k/cp2kparser/tools/xmlpreparser.py
index b75b79494a3365c3f411f75cd0dd4b26444ba9b3..a33a026d7d6a4318dc95e8f5b19c919582a01595 100644
--- a/parser/parser-cp2k/cp2kparser/tools/xmlpreparser.py
+++ b/parser/parser-cp2k/cp2kparser/tools/xmlpreparser.py
@@ -15,21 +15,38 @@ cp2k_input.xml.
 
 import xml.etree.cElementTree as ET
 import logging
+import json
 import cPickle as pickle
 from cp2kparser.generic.inputparsing import *
 logger = logging
 
 
 #===============================================================================
-def generate_object_tree(xml_file):
+def generate_object_tree(xml_file, for_metainfo=False):
 
     xml_element = ET.parse(xml_file)
-    object_tree = recursive_tree_generation(xml_element)
+
+    # Leave out certain currently uninteresting parts of the input. These can be
+    # added later if need be.
+    root = xml_element.getroot()
+    # ignored = ["ATOM", "DEBUG", "EXT_RESTART", "FARMING", "OPTIMIZE_BASIS", "OPTIMIZE_INPUT", "SWARM", "TEST"]
+    # removed = []
+    # for child in root:
+        # name = child.find("NAME")
+        # if name is not None:
+            # name_string = name.text
+            # if name_string in ignored:
+                # removed.append(child)
+    # for child in removed:
+        # root.remove(child)
+
+    # Recursively generate the tree
+    object_tree = recursive_tree_generation(root, for_metainfo)
     return object_tree
 
 
 #===============================================================================
-def recursive_tree_generation(xml_element):
+def recursive_tree_generation(xml_element, for_metainfo=False, name_stack=[]):
 
     # Make new section object for the root
     section_name_element = xml_element.find("NAME")
@@ -39,6 +56,17 @@ def recursive_tree_generation(xml_element):
         section_name = "CP2K_INPUT"
     section = Section(section_name)
 
+    # Ignore sections that control the print settings
+    ignored = ["EACH", "PRINT"]
+    if section_name in ignored:
+        return
+
+    if for_metainfo:
+        # Descriptions
+        description = xml_element.find("DESCRIPTION")
+        if description is not None:
+            section.description = description.text
+
     # Section parameters
     parameter = xml_element.find("SECTION_PARAMETERS")
     if parameter:
@@ -53,9 +81,51 @@ def recursive_tree_generation(xml_element):
         parameter_object = SectionParameters(sp_default_value, sp_lone_value)
         section.section_parameter = parameter_object
 
+        # Data type
+        data_type = parameter.find("DATA_TYPE")
+        if data_type is not None:
+            data_type_kind = data_type.get("kind")
+            parameter_object.data_type = data_type_kind
+
+            # Data dimension
+            data_dim = data_type.find("N_VAR")
+            if data_dim is not None:
+                parameter_object.data_dimension = data_dim.text
+
+        if for_metainfo:
+            # Description
+            section_param_description = parameter.find("DESCRIPTION")
+            if section_param_description is not None:
+                parameter_object.description = section_param_description.text
+
+    # Default keyword
+    default_keyword_element = xml_element.find("DEFAULT_KEYWORD")
+    if default_keyword_element is not None:
+        default_keyword_object = DefaultKeyword()
+
+        # Data type
+        data_type = default_keyword_element.find("DATA_TYPE")
+        if data_type is not None:
+            data_type_kind = data_type.get("kind")
+            default_keyword_object.data_type = data_type_kind
+
+            # Data dimension
+            data_dim = data_type.find("N_VAR")
+            if data_dim is not None:
+                default_keyword_object.data_dimension = data_dim.text
+
+        if for_metainfo:
+            # Description
+            description = default_keyword_element.find("DESCRIPTION")
+            if description is not None:
+                default_keyword_object.description = description.text
+
+        section.default_keyword = default_keyword_object
+
     # Keywords
     for keyword in xml_element.findall("KEYWORD"):
         keyword_names = keyword.findall("NAME")
+
         default_name = None
         aliases = []
         for name in keyword_names:
@@ -64,6 +134,14 @@ def recursive_tree_generation(xml_element):
                 default_name = name.text
             else:
                 aliases.append(name.text)
+
+        # Ignore hidden keywords
+        if default_name.startswith("__"):
+            continue
+
+        # Save the default keyword name
+        section.default_keyword_names.append(default_name)
+
         default_keyword_element = keyword.find("DEFAULT_VALUE")
         default_keyword_value = None
         if default_keyword_element is not None:
@@ -74,24 +152,136 @@ def recursive_tree_generation(xml_element):
         if default_unit_element is not None:
             default_unit_value = default_unit_element.text
 
-        keyword_object = Keyword(default_name, default_keyword_value, default_unit_value)
+        keyword_object = Keyword(default_name, default_keyword_value, default_unit_value, default_name)
         section.keywords[default_name].append(keyword_object)
         for alias in aliases:
             section.keywords[alias].append(keyword_object)
 
+        # Data type
+        data_type = keyword.find("DATA_TYPE")
+        if data_type is not None:
+            data_type_kind = data_type.get("kind")
+            keyword_object.data_type = data_type_kind
+
+            # Data dimension
+            data_dim = data_type.find("N_VAR")
+            if data_dim is not None:
+                keyword_object.data_dimension = data_dim.text
+
+        if for_metainfo:
+            # Description
+            keyword_description = keyword.find("DESCRIPTION")
+            if keyword_description is not None:
+                keyword_object.description = keyword_description.text
+
     # Sections
     for sub_section_element in xml_element.findall("SECTION"):
-        sub_section = recursive_tree_generation(sub_section_element)
-        section.sections[sub_section.name].append(sub_section)
+        sub_section = recursive_tree_generation(sub_section_element, for_metainfo)
+        if sub_section is not None:
+            section.sections[sub_section.name].append(sub_section)
 
     # Return section
     return section
 
+
+#===============================================================================
+def generate_input_metainfos(object_tree):
+    parent = Section("dummy")
+    root_section = object_tree.root_section
+    root_section.name = "CP2K_INPUT"
+    root_section.description = "The section containing all information that is explicitly stated in the CP2K input file. The sections that control printing (PRINT, EACH) and the hidden input keywords starting with a double underscore are not included."
+    container = []
+    name_stack = []
+    generate_metainfo_recursively(root_section, parent, container, name_stack)
+    with open("input_metainfo.json", "w") as f:
+        f.write(json.dumps(container, indent=2, separators=(',', ': ')))
+
+
+#===============================================================================
+def generate_metainfo_recursively(obj, parent, container, name_stack):
+
+    json = None
+    if isinstance(obj, Section):
+        name_stack.append(obj.name)
+        json = generate_section_metainfo_json(obj, parent, name_stack)
+        for child in obj.sections.itervalues():
+            generate_metainfo_recursively(child[0], obj, container, name_stack)
+        for child in obj.keywords.itervalues():
+            generate_metainfo_recursively(child[0], obj, container, name_stack)
+        if obj.section_parameter is not None:
+            generate_metainfo_recursively(obj.section_parameter, obj, container, name_stack)
+        if obj.default_keyword is not None:
+            generate_metainfo_recursively(obj.default_keyword, obj, container, name_stack)
+        name_stack.pop()
+    else:
+        json = generate_input_object_metainfo_json(obj, parent, name_stack)
+    container.append(json)
+
+
+#===============================================================================
+def generate_input_object_metainfo_json(child, parent, name_stack):
+    path = ".".join(name_stack)
+    json_obj = {}
+    json_obj["name"] = "x_cp2k_{}.{}".format(path, child.name)
+    json_obj["superNames"] = ["x_cp2k_{}".format(path)]
+
+    # Description
+    description = child.description
+    if description is None:
+        description = ""
+    json_obj["description"] = description
+
+    # Shape
+    data_dim = int(child.data_dimension)
+    if data_dim == -1:
+        data_dim = "n"
+    if data_dim == 1:
+        json_obj["shape"] = []
+    else:
+        json_obj["shape"] = [data_dim]
+
+    # Determine data type according to xml info
+    mapping = {
+        "keyword": "C",
+        "logical": "C",
+        "string": "C",
+        "integer": "i",
+        "word": "C",
+        "real": "f",
+    }
+    json_obj["dtypeStr"] = mapping[child.data_type]
+    return json_obj
+
+
+#===============================================================================
+def generate_section_metainfo_json(child, parent, name_stack):
+    name = ".".join(name_stack)
+    path = ".".join(name_stack[:-1])
+    json_obj = {}
+
+    json_obj["name"] = "x_cp2k_{}".format(name)
+    json_obj["kindStr"] = "type_section"
+    json_obj["superNames"] = ["x_cp2k_{}".format(path)]
+
+    description = child.description
+    if description is None:
+        description = ""
+    json_obj["description"] = description
+    return json_obj
+
+
 #===============================================================================
 # Run main function by default
 if __name__ == "__main__":
+
+    # xml to pickle
     xml_file = open("../versions/cp2k262/input_data/cp2k_input.xml", 'r')
     object_tree = CP2KInput(generate_object_tree(xml_file))
     file_name = "../versions/cp2k262/input_data/cp2k_input_tree.pickle"
     fh = open(file_name, "wb")
     pickle.dump(object_tree, fh, protocol=2)
+
+    # Metainfo generation
+    # xml_file = open("../versions/cp2k262/input_data/cp2k_input.xml", 'r')
+    # object_tree = CP2KInput(generate_object_tree(xml_file, for_metainfo=True))
+    # generate_input_metainfos(object_tree)
diff --git a/parser/parser-cp2k/cp2kparser/versions/cp2k262/commonmatcher.py b/parser/parser-cp2k/cp2kparser/versions/cp2k262/commonmatcher.py
index cdbe11958c42e466fdfd6c72d21031f0f1405a93..419fa69c14e0cce38e1d87d1aec480b187b5577c 100644
--- a/parser/parser-cp2k/cp2kparser/versions/cp2k262/commonmatcher.py
+++ b/parser/parser-cp2k/cp2kparser/versions/cp2k262/commonmatcher.py
@@ -95,6 +95,13 @@ class CommonMatcher(object):
 
     #===========================================================================
     # Section close triggers
+    def onClose_section_run(self, backend, gIndex, section):
+        """Information that is pushed regardless at the end of parsing.
+        Contains also information that is totally agnostic on the calculation
+        contents, like program_basis_set_type.
+        """
+        backend.addValue("program_basis_set_type", "gaussian")
+
     def onClose_section_method(self, backend, gIndex, section):
         """When all the functional definitions have been gathered, matches them
         with the nomad correspondents and combines into one single string which
diff --git a/parser/parser-cp2k/cp2kparser/versions/cp2k262/input_data/cp2k_input_tree.pickle b/parser/parser-cp2k/cp2kparser/versions/cp2k262/input_data/cp2k_input_tree.pickle
index d21fe61ea02c59423d41c202a946c569b1565ce4..0cc86b1228da78bc7b1b3caf5117bec280518257 100644
Binary files a/parser/parser-cp2k/cp2kparser/versions/cp2k262/input_data/cp2k_input_tree.pickle and b/parser/parser-cp2k/cp2kparser/versions/cp2k262/input_data/cp2k_input_tree.pickle differ
diff --git a/parser/parser-cp2k/cp2kparser/versions/cp2k262/inputparser.py b/parser/parser-cp2k/cp2kparser/versions/cp2k262/inputparser.py
index 48dec90ca4812f7b3d930d39f531f1b72264d926..9ed4cc057a4b279f0fb3e216a6a7791af40af331 100644
--- a/parser/parser-cp2k/cp2kparser/versions/cp2k262/inputparser.py
+++ b/parser/parser-cp2k/cp2kparser/versions/cp2k262/inputparser.py
@@ -27,15 +27,35 @@ class CP2KInputParser(BasicParser):
     some point.
     """
     def __init__(self, file_path, parser_context):
+        """
+        Attributes:
+            input_tree: The input structure for this version of CP2K. The
+                structure is already present, in this module it will be filled with
+                data found from the input file.
+            input_lines: List of preprocessed lines in the input. Here all the
+                variables have been stated explicitly and the additional input files have
+                been merged.
+        """
         super(CP2KInputParser, self).__init__(file_path, parser_context)
-        self.root_section = None
         self.input_tree = None
+        self.input_lines = None
+        self.force_file_name = None
 
     def parse(self):
 
+        #=======================================================================
+        # Preprocess to spell out variables and to include stuff from other
+        # files
+        self.preprocess_input()
+
+        #=======================================================================
         # Gather the information from the input file
         self.fill_input_tree(self.file_path)
 
+        #=======================================================================
+        # Parse everything in the input to cp2k specific metadata
+        self.fill_metadata()
+
         #=======================================================================
         # Parse the used XC_functionals and their parameters
         xc = self.input_tree.get_section("FORCE_EVAL/DFT/XC/XC_FUNCTIONAL")
@@ -120,8 +140,9 @@ class CP2KInputParser(BasicParser):
 
         #=======================================================================
         # Single point force file name
-        force_file = self.input_tree.get_keyword("FORCE_EVAL/PRINT/FORCES/FILENAME")
-        if force_file != "__STD_OUT__":
+        # force_file = self.input_tree.get_keyword("FORCE_EVAL/PRINT/FORCES/FILENAME")
+        force_file = self.force_file_name
+        if force_file is not None and force_file != "__STD_OUT__":
             force_file_path = self.normalize_cp2k_path(force_file, "xyz")
             self.file_service.set_file_id(force_file_path, "force_file_single_point")
 
@@ -181,44 +202,102 @@ class CP2KInputParser(BasicParser):
 
         self.setup_version(self.parser_context.version_id)
         section_stack = []
+        self.input_tree.root_section.accessed = True
+
+        for line in self.input_lines:
+            line = line.split('!', 1)[0].strip()
+
+            # Skip empty lines
+            if len(line) == 0:
+                continue
+
+            # Section ends
+            if line.upper().startswith('&END'):
+                section_stack.pop()
+            # Section starts
+            elif line[0] == '&':
+                parts = line.split(' ', 1)
+                name = parts[0][1:].upper()
+                section_stack.append(name)
+
+                # Form the path
+                path = ""
+                for index, item in enumerate(section_stack):
+                    if index != 0:
+                        path += '/'
+                    path += item
+
+                # Mark the section as accessed.
+                self.input_tree.set_section_accessed(path)
+
+                # Save the section parameters
+                if len(parts) > 1:
+                    self.input_tree.set_parameter(path, parts[1].strip().upper())
+
+            # Ignore variables and includes that might still be here for some
+            # reason
+            elif line.upper().startswith('@'):
+                continue
+
+            # Contents (keywords, default keywords)
+            else:
+                split = line.split(' ', 1)
+                keyword_name = split[0].upper()
+                keyword_value = split[1]
+                self.input_tree.set_keyword(path + "/" + keyword_name, keyword_value)
+
+                # Here we store some exceptional print settings that are
+                # inportant to the parsing. These dont exist in the input tree
+                # because they take much space and are not really important
+                # otherwise.
+                if path == "FORCE_EVAL/PRINT/FORCES":
+                    if keyword_name == "FILENAME":
+                        self.force_file_name = keyword_value
+
+    def fill_metadata(self):
+        """Goes through the input data and pushes everything to the
+        backend.
+        """
+        name_stack = []
+        self.fill_metadata_recursively(self.input_tree.root_section, name_stack)
+
+    def fill_metadata_recursively(self, section, name_stack):
+        """Recursively goes through the input sections and pushes everything to the
+        backend.
+        """
+        if not section.accessed:
+            return
 
-        with open(file_path) as inp:
-            for line in inp:
-                line = line.split('!', 1)[0].strip()
+        name_stack.append(section.name)
+        path = "x_cp2k_{}".format(".".join(name_stack))
 
-                # Skip empty lines
-                if len(line) == 0:
-                    continue
+        gid = self.backend.openSection(path)
 
-                # Section ends
-                if line.upper().startswith('&END'):
-                    section_stack.pop()
-                # Section starts
-                elif line[0] == '&':
-                    parts = line.split(' ', 1)
-                    name = parts[0][1:].upper()
-                    section_stack.append(name)
-
-                    # Form the path
-                    path = ""
-                    for index, item in enumerate(section_stack):
-                        if index != 0:
-                            path += '/'
-                        path += item
-
-                    # Mark the section as accessed.
-                    self.input_tree.set_section_accessed(path)
-
-                    # Save the section parameters
-                    if len(parts) > 1:
-                        self.input_tree.set_parameter(path, parts[1].strip().upper())
-
-                # Contents (keywords, default keywords)
-                else:
-                    split = line.split(' ', 1)
-                    keyword_name = split[0].upper()
-                    keyword_value = split[1]
-                    self.input_tree.set_keyword(path + "/" + keyword_name, keyword_value)
+        # Keywords
+        for default_name in section.default_keyword_names:
+            keywords = section.keywords.get(default_name)
+            for keyword in keywords:
+                if keyword.value is not None:
+                    formatted_value = keyword.get_formatted_value()
+                    if formatted_value is not None:
+                        self.backend.addValue("{}.{}".format(path, keyword.default_name), formatted_value)
+
+        # Section parameter
+        if section.section_parameter is not None:
+            self.backend.addValue("{}.SECTION_PARAMETERS".format(path), section.section_parameter.value)
+
+        # Default keyword
+        if section.default_keyword is not None:
+            self.backend.addValue("{}.DEFAULT_KEYWORD".format(path), section.default_keyword.value)
+
+        # Subsections
+        for name, subsections in section.sections.iteritems():
+            for subsection in subsections:
+                self.fill_metadata_recursively(subsection, name_stack)
+
+        self.backend.closeSection(path, gid)
+
+        name_stack.pop()
 
     def setup_version(self, version_number):
         """ The pickle file which contains preparsed data from the
@@ -228,3 +307,87 @@ class CP2KInputParser(BasicParser):
         pickle_path = os.path.dirname(__file__) + "/input_data/cp2k_input_tree.pickle".format(version_number)
         input_tree_pickle_file = open(pickle_path, 'rb')
         self.input_tree = pickle.load(input_tree_pickle_file)
+
+    def preprocess_input(self):
+        """Preprocess the input file. Concatenate .inc files into the main
+        input file and explicitly state all variables.
+        """
+        # Read the input file into memory. It shouldn't be that big so we can
+        # do this easily
+        input_lines = []
+        with open(self.file_path, "r") as f:
+            for line in f:
+                input_lines.append(line.strip())
+
+        # Merge include files to input
+        extended_input = input_lines[:]  # Make a copy
+        i_line = 0
+        for line in input_lines:
+            if line.startswith("@INCLUDE") or line.startswith("@include"):
+                split = line.split(None, 1)
+                includepath = split[1]
+                basedir = os.path.dirname(self.file_path)
+                filepath = os.path.join(basedir, includepath)
+                filepath = os.path.abspath(filepath)
+                if not os.path.isfile(filepath):
+                    logger.warning("Could not find the include file '{}' stated in the CP2K input file. Continuing without it.".format(filepath))
+                    print filepath
+                    continue
+
+                # Get the content from include file
+                included_lines = []
+                with open(filepath, "r") as includef:
+                    for line in includef:
+                        included_lines.append(line.strip())
+                    del extended_input[i_line]
+                    extended_input[i_line:i_line] = included_lines
+                    i_line += len(included_lines)
+            i_line += 1
+
+        # Gather the variable definitions
+        variables = {}
+        input_set_removed = []
+        for i_line, line in enumerate(extended_input):
+            if line.startswith("@SET") or line.startswith("@set"):
+                components = line.split(None, 2)
+                name = components[1]
+                value = components[2]
+                variables[name] = value
+                logger.debug("Variable '{}' found with value '{}'".format(name, value))
+            else:
+                input_set_removed.append(line)
+
+        # Place the variables
+        variable_pattern = r"\@\{(\w+)\}|@(\w+)"
+        compiled = re.compile(variable_pattern)
+        reserved = ("include", "set", "if", "endif")
+        input_variables_replaced = []
+        for line in input_set_removed:
+            results = compiled.finditer(line)
+            new_line = line
+            offset = 0
+            for result in results:
+                options = result.groups()
+                first = options[0]
+                second = options[1]
+                if first:
+                    name = first
+                elif second:
+                    name = second
+                if name in reserved:
+                    continue
+                value = variables.get(name)
+                if not value:
+                    logger.error("Value for variable '{}' not set.".format(name))
+                    continue
+                len_value = len(value)
+                len_name = len(name)
+                start = result.start()
+                end = result.end()
+                beginning = new_line[:offset+start]
+                rest = new_line[offset+end:]
+                new_line = beginning + value + rest
+                offset += len_value - len_name - 1
+            input_variables_replaced.append(new_line)
+
+        self.input_lines = input_variables_replaced
diff --git a/test/unittests/cp2k_2.6.2/run_tests.py b/test/unittests/cp2k_2.6.2/run_tests.py
index d3d5a37cda31e545efb00c633d68b9e92ba3f732..12ac759d72de1744de2eadf2d4735aa76b1d8ad6 100644
--- a/test/unittests/cp2k_2.6.2/run_tests.py
+++ b/test/unittests/cp2k_2.6.2/run_tests.py
@@ -18,6 +18,13 @@ from cp2kparser import CP2KParser
 from nomadcore.unit_conversion.unit_conversion import convert_unit
 
 # Setup the logger so that it doesn't spam messages during tests
+logging.basicConfig(level=logging.DEBUG,
+                        format=('%(filename)s: '
+                                '%(levelname)s: '
+                                '%(funcName)s(): '
+                                '%(lineno)d:\t'
+                                '%(message)s')
+                        )
 logger = logging.getLogger("nomad")
 logger.setLevel(logging.CRITICAL)
 
@@ -41,8 +48,11 @@ def get_results(folder, metainfo_to_keep=None):
 
 
 #===============================================================================
-def get_result(folder, metaname):
-    results = get_results(folder, metaname)
+def get_result(folder, metaname, optimize=True):
+    if optimize:
+        results = get_results(folder, metaname)
+    else:
+        results = get_results(folder)
     result = results[metaname]
     return result
 
@@ -63,6 +73,15 @@ class TestErrors(unittest.TestCase):
     def test_unknown_version(self):
         get_result("errors/unknown_version", "XC_functional")
 
+    def test_unknown_input_keyword(self):
+        get_result("errors/unknown_input_keyword", "XC_functional")
+
+    def test_unknown_input_section(self):
+        get_result("errors/unknown_input_section", "XC_functional")
+
+    def test_unknown_input_section_parameter(self):
+        get_result("errors/unknown_input_section_parameter", "XC_functional")
+
 
 #===============================================================================
 class TestXCFunctional(unittest.TestCase):
@@ -125,8 +144,6 @@ class TestForceFiles(unittest.TestCase):
 
     def test_single_point(self):
 
-        # results = get_results("force_file/single_point")
-        # print results._results
         result = get_result("force_file/single_point", "atom_forces")
         expected_result = convert_unit(
             np.array([
@@ -253,6 +270,7 @@ class TestEnergyForce(unittest.TestCase):
     @classmethod
     def setUpClass(cls):
         cls.results = get_results("energy_force", "section_run")
+        # cls.results.print_summary()
 
     def test_energy_total_scf_iteration(self):
         energy_total = self.results["energy_total_scf_iteration"]
@@ -404,6 +422,27 @@ class TestEnergyForce(unittest.TestCase):
         expected_result = convert_unit(7.77641697E+00, "GPa")
         self.assertTrue(np.array_equal(result, expected_result))
 
+    def test_program_basis_set_type(self):
+        result = self.results["program_basis_set_type"]
+        self.assertEqual(result, "gaussian")
+
+
+#===============================================================================
+class TestPreprocessor(unittest.TestCase):
+
+    def test_include(self):
+        result = get_result("input_preprocessing/include", "x_cp2k_CP2K_INPUT.GLOBAL.PRINT_LEVEL", optimize=False)
+        self.assertEqual(result, "LOW")
+
+    def test_variable(self):
+        result = get_result("input_preprocessing/variable", "x_cp2k_CP2K_INPUT.GLOBAL.PROJECT_NAME", optimize=False)
+        self.assertEqual(result, "variable_test")
+
+    def test_variable_mutiple(self):
+        result = get_result("input_preprocessing/variable_multiple", "x_cp2k_CP2K_INPUT.FORCE_EVAL.DFT.MGRID.CUTOFF", optimize=False)
+        self.assertEqual(result, 50)
+
+
 #===============================================================================
 if __name__ == '__main__':
     pass
@@ -419,5 +458,6 @@ if __name__ == '__main__':
     suites.append(unittest.TestLoader().loadTestsFromTestCase(TestConfigurationPeriodicDimensions))
     suites.append(unittest.TestLoader().loadTestsFromTestCase(TestSCFConvergence))
     suites.append(unittest.TestLoader().loadTestsFromTestCase(TestForceFiles))
+    suites.append(unittest.TestLoader().loadTestsFromTestCase(TestPreprocessor))
     alltests = unittest.TestSuite(suites)
     unittest.TextTestRunner(verbosity=0).run(alltests)