From d443a581a055ec6057fc59da831eefe6bfd8fce0 Mon Sep 17 00:00:00 2001
From: "Himanen, Lauri (himanel1)" <lauri.himanen@aalto.fi>
Date: Sat, 16 Jan 2016 23:00:21 +0200
Subject: [PATCH] Added nomadtoolkit package for local analysis, refactoring of
 the cp2kparser code.

---
 .gitmodules                                   |   8 +-
 cp2kparser/README.md => README.md             |  46 +-------
 cp2kparser/cp2kparser/__init__.py             |  14 +--
 cp2kparser/cp2kparser/generics/__init__.py    |   1 -
 cp2kparser/cp2kparser/generics/parser.py      |  64 -----------
 .../csvparsing.py}                            |   2 +-
 ...kimplementations.py => implementations.py} |  19 ++--
 .../{cp2kinputparsers.py => inputparsing.py}  |   0
 ...{cp2koutputparsers.py => outputparsing.py} |   4 +-
 .../{cp2kparser.py => parsing.py}             |  28 ++++-
 cp2kparser/cp2kparser/metainfo                |   1 -
 .../cp2kparser/{engines => old}/__init__.py   |   0
 .../{engines => old}/csvengine/tests.py       |   0
 .../{engines => old}/csvengine/xyz/10.xyz     |   0
 .../{engines => old}/csvengine/xyz/100.xyz    |   0
 .../{engines => old}/csvengine/xyz/1000.xyz   | Bin
 .../{engines => old}/csvengine/xyz/10000.xyz  | Bin
 .../{engines => old}/regexengine.py           |   0
 .../cp2kparser/{engines => old}/xmlengine.py  |   0
 .../cp2kparser}/utils/__init__.py             |   0
 .../{generics => utils}/logconfig.py          |   0
 cp2kparser/cp2kparser/utils/parser.py         | 106 ++++++++++++++++++
 .../parserimplementation.py                   |   8 +-
 .../cp2kparser/{generics => utils}/testing.py |   0
 nomadanalysis/README.md                       |   1 -
 nomadanalysis/nomadanalysis.egg-info/PKG-INFO |  10 --
 .../nomadanalysis.egg-info/SOURCES.txt        |   9 --
 .../dependency_links.txt                      |   1 -
 .../nomadanalysis.egg-info/not-zip-safe       |   1 -
 .../nomadanalysis.egg-info/requires.txt       |   2 -
 .../nomadanalysis.egg-info/top_level.txt      |   1 -
 nomadanalysis/nomadanalysis/__init__.py       |   7 --
 .../nomadanalysis/examples/1_basics.py        |  11 --
 nomadtoolkit/README.md                        |  27 +++++
 nomadtoolkit/nomadtoolkit/__init__.py         |   1 +
 .../nomadtoolkit/analysis.py                  |   2 +-
 nomadtoolkit/nomadtoolkit/config.py           |  28 +++++
 .../nomadtoolkit/examples/1_basics.py         |  14 +++
 .../nomadtoolkit}/local_backend.py            |   6 +-
 nomadtoolkit/nomadtoolkit/utils/__init__.py   |   0
 .../nomadtoolkit}/utils/log.py                |   0
 {nomadanalysis => nomadtoolkit}/setup.py      |  12 +-
 nomadtoolkit/submodules/nomad-meta-info       |   1 +
 nomadtoolkit/submodules/python-common         |   1 +
 44 files changed, 243 insertions(+), 193 deletions(-)
 rename cp2kparser/README.md => README.md (68%)
 delete mode 100644 cp2kparser/cp2kparser/generics/__init__.py
 delete mode 100644 cp2kparser/cp2kparser/generics/parser.py
 rename cp2kparser/cp2kparser/{engines/csvengine.py => implementation/csvparsing.py} (99%)
 rename cp2kparser/cp2kparser/implementation/{cp2kimplementations.py => implementations.py} (97%)
 rename cp2kparser/cp2kparser/implementation/{cp2kinputparsers.py => inputparsing.py} (100%)
 rename cp2kparser/cp2kparser/implementation/{cp2koutputparsers.py => outputparsing.py} (99%)
 rename cp2kparser/cp2kparser/implementation/{cp2kparser.py => parsing.py} (71%)
 delete mode 160000 cp2kparser/cp2kparser/metainfo
 rename cp2kparser/cp2kparser/{engines => old}/__init__.py (100%)
 rename cp2kparser/cp2kparser/{engines => old}/csvengine/tests.py (100%)
 rename cp2kparser/cp2kparser/{engines => old}/csvengine/xyz/10.xyz (100%)
 rename cp2kparser/cp2kparser/{engines => old}/csvengine/xyz/100.xyz (100%)
 rename cp2kparser/cp2kparser/{engines => old}/csvengine/xyz/1000.xyz (100%)
 rename cp2kparser/cp2kparser/{engines => old}/csvengine/xyz/10000.xyz (100%)
 rename cp2kparser/cp2kparser/{engines => old}/regexengine.py (100%)
 rename cp2kparser/cp2kparser/{engines => old}/xmlengine.py (100%)
 rename {nomadanalysis/nomadanalysis => cp2kparser/cp2kparser}/utils/__init__.py (100%)
 rename cp2kparser/cp2kparser/{generics => utils}/logconfig.py (100%)
 create mode 100644 cp2kparser/cp2kparser/utils/parser.py
 rename cp2kparser/cp2kparser/{generics => utils}/parserimplementation.py (99%)
 rename cp2kparser/cp2kparser/{generics => utils}/testing.py (100%)
 delete mode 100644 nomadanalysis/README.md
 delete mode 100644 nomadanalysis/nomadanalysis.egg-info/PKG-INFO
 delete mode 100644 nomadanalysis/nomadanalysis.egg-info/SOURCES.txt
 delete mode 100644 nomadanalysis/nomadanalysis.egg-info/dependency_links.txt
 delete mode 100644 nomadanalysis/nomadanalysis.egg-info/not-zip-safe
 delete mode 100644 nomadanalysis/nomadanalysis.egg-info/requires.txt
 delete mode 100644 nomadanalysis/nomadanalysis.egg-info/top_level.txt
 delete mode 100644 nomadanalysis/nomadanalysis/__init__.py
 delete mode 100644 nomadanalysis/nomadanalysis/examples/1_basics.py
 create mode 100644 nomadtoolkit/README.md
 create mode 100644 nomadtoolkit/nomadtoolkit/__init__.py
 rename nomadanalysis/nomadanalysis/analyzer.py => nomadtoolkit/nomadtoolkit/analysis.py (96%)
 create mode 100644 nomadtoolkit/nomadtoolkit/config.py
 create mode 100644 nomadtoolkit/nomadtoolkit/examples/1_basics.py
 rename {nomadanalysis/nomadanalysis => nomadtoolkit/nomadtoolkit}/local_backend.py (91%)
 create mode 100644 nomadtoolkit/nomadtoolkit/utils/__init__.py
 rename {nomadanalysis/nomadanalysis => nomadtoolkit/nomadtoolkit}/utils/log.py (100%)
 rename {nomadanalysis => nomadtoolkit}/setup.py (58%)
 create mode 160000 nomadtoolkit/submodules/nomad-meta-info
 create mode 160000 nomadtoolkit/submodules/python-common

diff --git a/.gitmodules b/.gitmodules
index 59f7d05..0ace117 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -1,3 +1,7 @@
-[submodule "cp2kparser/cp2kparser/metainfo"]
-	path = cp2kparser/cp2kparser/metainfo
+
+[submodule "nomadtoolkit/submodules/nomad-meta-info"]
+	path = nomadtoolkit/submodules/nomad-meta-info
 	url = git@gitlab.mpcdf.mpg.de:nomad-lab/nomad-meta-info.git
+[submodule "nomadtoolkit/submodules/python-common"]
+	path = nomadtoolkit/submodules/python-common
+	url = git@gitlab.mpcdf.mpg.de:nomad-lab/python-common.git
diff --git a/cp2kparser/README.md b/README.md
similarity index 68%
rename from cp2kparser/README.md
rename to README.md
index 2ed3ac7..d4c8b07 100644
--- a/cp2kparser/README.md
+++ b/README.md
@@ -30,49 +30,9 @@
     ```
 
 # Structure
-Currently the python package is divided into three subpackages:
-- Engines: Classes for parsing different type of files
-- Generics: Generic utility classes and base classes
-- Implementation: The classes that actually define the parser functionality.
-
-## Engines
-Basically all the "engines", that is the modules that parse certain type of
-files, are reusable in other parsers. They could be put into a common
-repository where other developers can improve and extend them. One should also
-write tests for the engines that would validate their behaviour and ease the
-performance analysis.
-
-The engine classes work also as interfaces. You can change the engine behaviour
-while maintaining the same API in the parsers. For example one might improve
-the performance of an engine but if the function calls remain the same no other
-code has to be changed.
-
-Currently implemented engines that could be reused (not tested properly yet):
-- AtomsEngine: For reading various atomic coordinate files. Currently uses ASE
-  to read the files.
-- RegexEngine: For parsing text files with regular expressions. Uses the re2
-library if available (falls back to default python regex implementation if
-re2 not found).
-- CSVEngine: For parsing CSV-like files. Has a very
-flexible nature as you can specify comments, column delimiters, column
-indices and the patterns used to separate different configurations.
-- XMLEngine: For parsing XML files using XPath syntax.
-
-## Generics
-In the generics folder there is a module called nomadparser.py that defines a
-class called NomadParser. This acts as a base class for the cp2k parser defined
-in the implementation folder.
-
-The NomadParser class defines the interface which is eventually used by e.g.
-the scala code (will be modified later to conform to the common interface).
-This class is also responsible for some common tasks that are present in all
-parsers:
-
-- Unit conversion
-- JSON encoding
-- Caching
-- Time measurement for performance analysis
-- Providing file contents, sizes and handles
+Currently the python package is divided the following subpackages:
+- utils: Generic utility classes and base classes
+- implementation: The classes that actually define the parser functionality.
 
 # Tools and Methods
 
diff --git a/cp2kparser/cp2kparser/__init__.py b/cp2kparser/cp2kparser/__init__.py
index 8c2d19d..21526fc 100644
--- a/cp2kparser/cp2kparser/__init__.py
+++ b/cp2kparser/cp2kparser/__init__.py
@@ -1,12 +1,2 @@
-from .implementation.cp2kparser import CP2KParser
-# import cp2kparser.generics.logconfig
-# import cp2kparser.implementation.cp2kparser
-
-# Import classes for easier access. NOTE: Typically the __init__ file is empty.
-# This signifies python that the user can import anything from the
-# subdirectiories. Now that there are imports in the init file, you have to
-# explicitly state all imports that should be available.
-# import generics
-# import implementation
-# import engines
-# from implementation.cp2kparser import CP2KParser
+import cp2kparser.utils.logconfig
+from cp2kparser.implementation.parsing import CP2KParser
diff --git a/cp2kparser/cp2kparser/generics/__init__.py b/cp2kparser/cp2kparser/generics/__init__.py
deleted file mode 100644
index ee3ecd2..0000000
--- a/cp2kparser/cp2kparser/generics/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-#! /usr/bin/env python
diff --git a/cp2kparser/cp2kparser/generics/parser.py b/cp2kparser/cp2kparser/generics/parser.py
deleted file mode 100644
index 0396ac9..0000000
--- a/cp2kparser/cp2kparser/generics/parser.py
+++ /dev/null
@@ -1,64 +0,0 @@
-import os
-import logging
-from abc import ABCMeta, abstractmethod
-from nomadanalysis.local_backend import LocalBackend
-logger = logging.getLogger(__name__)
-
-
-#===============================================================================
-class Parser(object):
-    """
-    """
-    __metaclass__ = ABCMeta
-
-    def __init__(self, dirpath=None, files=None, metainfo_to_keep=None, backend=None):
-        """
-        """
-        self.parser_context = ParserContext()
-        self.parser_context.backend = backend
-        self.parser_context.files = files
-        self.parser_context.backend = backend
-        self.parser_context.metainfo_to_keep = metainfo_to_keep
-        self.implementation = None
-
-        # If directory provided, the interesting files are first identified
-        if dirpath:
-            files = self.search_path(dirpath)
-            self.parser_context.files = files
-
-        # If no backend provided, create one with default metainfos
-        if not backend:
-            metainfo_path = "/home/lauri/Dropbox/nomad-dev/nomad-meta-info/meta_info/nomad_meta_info/cp2k.nomadmetainfo.json"
-            metainfoenv, warnings = loadJsonFile(metainfo_path)
-            backend = LocalBackend(metainfoenv)
-            self.parser_context.backend = LocalBackend()
-
-    @abstractmethod
-    def setup(self):
-        """Deduce the version of the software that was used and setup a correct
-        implementation. The implementations should subclass NomadParser.
-
-        Returns:
-            A NomadParser object that is ready to do the parsing.
-        """
-        pass
-
-    def search_path(self, dirpath):
-        """Searches the given path for files that are of interest to this
-        parser. Returns them as a list of path strings.
-        """
-        files = []
-        for filename in os.listdir(dirpath):
-            files.append(os.path.join(dirpath, filename))
-        return files
-
-
-#===============================================================================
-class ParserContext(object):
-    """Contains everything needed to instantiate a parser implementation.
-    """
-    def __init__(self, files=None, metainfo_to_keep=None, backend=None, version_id=None):
-        self.files = files
-        self.version_id = version_id
-        self.metainfo_to_keep = metainfo_to_keep
-        self.backend = backend
diff --git a/cp2kparser/cp2kparser/engines/csvengine.py b/cp2kparser/cp2kparser/implementation/csvparsing.py
similarity index 99%
rename from cp2kparser/cp2kparser/engines/csvengine.py
rename to cp2kparser/cp2kparser/implementation/csvparsing.py
index 7435327..324d08c 100644
--- a/cp2kparser/cp2kparser/engines/csvengine.py
+++ b/cp2kparser/cp2kparser/implementation/csvparsing.py
@@ -17,7 +17,7 @@ else:
 
 
 #===============================================================================
-class CSVEngine(object):
+class CSVParser(object):
     """Used to parse out freeform CSV-like content.
     Currently only can parse floating point information.
 
diff --git a/cp2kparser/cp2kparser/implementation/cp2kimplementations.py b/cp2kparser/cp2kparser/implementation/implementations.py
similarity index 97%
rename from cp2kparser/cp2kparser/implementation/cp2kimplementations.py
rename to cp2kparser/cp2kparser/implementation/implementations.py
index b738f01..2684756 100644
--- a/cp2kparser/cp2kparser/implementation/cp2kimplementations.py
+++ b/cp2kparser/cp2kparser/implementation/implementations.py
@@ -1,13 +1,12 @@
 import re
 import os
 import logging
-from ..engines.csvengine import CSVEngine
-from ..implementation.cp2kinputparsers import CP2KInputParser
-from ..implementation.cp2kinputenginedata.input_tree import CP2KInput
-from ..implementation.cp2koutputparsers import *
-from ..generics.parserimplementation import ParserImplementation
+from cp2kparser.implementation.csvparsing import CSVParser
+from cp2kparser.implementation.inputparsing import CP2KInputParser
+from cp2kparser.implementation.cp2kinputenginedata.input_tree import CP2KInput
+from cp2kparser.implementation.outputparsing import *
+from cp2kparser.utils.parserimplementation import ParserImplementation
 from nomadcore.coordinate_reader import CoordinateReader
-from nomadcore.unit_conversion.unit_conversion import convert_unit
 logger = logging.getLogger(__name__)
 
 
@@ -22,11 +21,11 @@ class CP2KImplementation262(ParserImplementation):
 
         # Initialize the parsing tools. The input and output parsers need to
         # know the version id.
-        self.csvengine = CSVEngine(self)
+        self.csvengine = CSVParser(self)
         self.atomsengine = CoordinateReader()
         self.inputparser = CP2KInputParser()
         self.inputparser.setup_version(self.version_id)
-        self.outputparser = globals()["CP2KOutputParser{}".format(self.version_id)](self, self.metainfos)
+        self.outputparser = globals()["CP2KOutputParser{}".format(self.version_id)](self, self.metainfo_to_keep)
         self.input_tree = None
         self.extended_input = None
 
@@ -273,11 +272,9 @@ class CP2KImplementation262(ParserImplementation):
         # Use the SimpleMatcher to extract most of the results
         parserInfo = {"name": "cp2k-parser", "version": "1.0"}
         outputfilename = self.get_file_handle("output").name
-        backend = self.backend
-        metainfos = self.metainfos
         outputstructure = self.outputparser.outputstructure
         cachingLevelForMetaName = self.outputparser.cachingLevelForMetaName
-        self.parse_file(outputfilename, outputstructure, metainfos, backend, parserInfo, cachingLevelForMetaName, superContext=self.outputparser)
+        self.parse_file(outputfilename, outputstructure, parserInfo, cachingLevelForMetaName, superContext=self.outputparser)
 
         # Then extract the things that cannot be extracted by the SimpleMatcher
 
diff --git a/cp2kparser/cp2kparser/implementation/cp2kinputparsers.py b/cp2kparser/cp2kparser/implementation/inputparsing.py
similarity index 100%
rename from cp2kparser/cp2kparser/implementation/cp2kinputparsers.py
rename to cp2kparser/cp2kparser/implementation/inputparsing.py
diff --git a/cp2kparser/cp2kparser/implementation/cp2koutputparsers.py b/cp2kparser/cp2kparser/implementation/outputparsing.py
similarity index 99%
rename from cp2kparser/cp2kparser/implementation/cp2koutputparsers.py
rename to cp2kparser/cp2kparser/implementation/outputparsing.py
index 3b599b3..b9b5d31 100644
--- a/cp2kparser/cp2kparser/implementation/cp2koutputparsers.py
+++ b/cp2kparser/cp2kparser/implementation/outputparsing.py
@@ -76,7 +76,7 @@ class CP2KOutputParser262(object):
                                     startReStr=" FUNCTIONAL\|",
                                     forwardMatch=True,
                                     sections=["section_method", "cp2k_section_functionals"],
-                                    otherMetaInfo=["XC_functional"],
+                                    otherMetaInfo=["XC_functional_scf"],
                                     subMatchers=[
                                         SM(
                                             repeats=True,
@@ -244,7 +244,7 @@ class CP2KOutputParser262(object):
         functionals = "_".join(sorted(functionals))
 
         # Push the functional string into the backend
-        backend.addValue('XC_functional', functionals)
+        backend.addValue('XC_functional_scf', functionals)
 
     def onClose_cp2k_section_atom_position(self, backend, gIndex, section):
         """Get the initial atomic positions from cp2kparser.
diff --git a/cp2kparser/cp2kparser/implementation/cp2kparser.py b/cp2kparser/cp2kparser/implementation/parsing.py
similarity index 71%
rename from cp2kparser/cp2kparser/implementation/cp2kparser.py
rename to cp2kparser/cp2kparser/implementation/parsing.py
index 48fa28b..8f88d1a 100644
--- a/cp2kparser/cp2kparser/implementation/cp2kparser.py
+++ b/cp2kparser/cp2kparser/implementation/parsing.py
@@ -1,7 +1,7 @@
 import re
 import logging
-from ..generics.parser import Parser
-from ..implementation.cp2kimplementations import *
+from cp2kparser.utils.parser import Parser
+from cp2kparser.implementation.implementations import *
 logger = logging.getLogger(__name__)
 
 
@@ -17,8 +17,9 @@ class CP2KParser(Parser):
     After the implementation has been setup, you can parse the files with
     parse().
     """
-    def __init__(self, dirpath=None, files=None, metainfo_path=None, backend=None):
-        Parser.__init__(self, dirpath, files, metainfo_path, backend)
+
+    def __init__(self, contents=None, metainfo_to_keep=None, backend=None):
+        Parser.__init__(self, contents, metainfo_to_keep, backend)
 
     def setup(self):
         """Setups the version by looking at the output file and the version
@@ -57,6 +58,25 @@ class CP2KParser(Parser):
             self.parser_context.version_id = "262"
             self.implementation = globals()["CP2KImplementation262"](self.parser_context)
 
+    def search_parseable_files(self, files):
+        """Searches the given path for files that are of interest to this
+        parser. Returns them as a list of path strings.
+        """
+        return files
+
     def parse(self):
         self.setup()
         self.implementation.parse()
+
+    def get_metainfo_filename(self):
+        """This function should return the name of the metainfo file that is
+        specific for this parser. This name is used by the Analyzer class in
+        the nomadtoolkit.
+        """
+        return "cp2k.nomadmetainfo.json"
+
+
+#===============================================================================
+# This is what gets run when the scala layer calls for this parser
+if __name__ == "__main__":
+    print "Moi"
diff --git a/cp2kparser/cp2kparser/metainfo b/cp2kparser/cp2kparser/metainfo
deleted file mode 160000
index 163501e..0000000
--- a/cp2kparser/cp2kparser/metainfo
+++ /dev/null
@@ -1 +0,0 @@
-Subproject commit 163501eabba0fa385f28edcb55aa577de96e7624
diff --git a/cp2kparser/cp2kparser/engines/__init__.py b/cp2kparser/cp2kparser/old/__init__.py
similarity index 100%
rename from cp2kparser/cp2kparser/engines/__init__.py
rename to cp2kparser/cp2kparser/old/__init__.py
diff --git a/cp2kparser/cp2kparser/engines/csvengine/tests.py b/cp2kparser/cp2kparser/old/csvengine/tests.py
similarity index 100%
rename from cp2kparser/cp2kparser/engines/csvengine/tests.py
rename to cp2kparser/cp2kparser/old/csvengine/tests.py
diff --git a/cp2kparser/cp2kparser/engines/csvengine/xyz/10.xyz b/cp2kparser/cp2kparser/old/csvengine/xyz/10.xyz
similarity index 100%
rename from cp2kparser/cp2kparser/engines/csvengine/xyz/10.xyz
rename to cp2kparser/cp2kparser/old/csvengine/xyz/10.xyz
diff --git a/cp2kparser/cp2kparser/engines/csvengine/xyz/100.xyz b/cp2kparser/cp2kparser/old/csvengine/xyz/100.xyz
similarity index 100%
rename from cp2kparser/cp2kparser/engines/csvengine/xyz/100.xyz
rename to cp2kparser/cp2kparser/old/csvengine/xyz/100.xyz
diff --git a/cp2kparser/cp2kparser/engines/csvengine/xyz/1000.xyz b/cp2kparser/cp2kparser/old/csvengine/xyz/1000.xyz
similarity index 100%
rename from cp2kparser/cp2kparser/engines/csvengine/xyz/1000.xyz
rename to cp2kparser/cp2kparser/old/csvengine/xyz/1000.xyz
diff --git a/cp2kparser/cp2kparser/engines/csvengine/xyz/10000.xyz b/cp2kparser/cp2kparser/old/csvengine/xyz/10000.xyz
similarity index 100%
rename from cp2kparser/cp2kparser/engines/csvengine/xyz/10000.xyz
rename to cp2kparser/cp2kparser/old/csvengine/xyz/10000.xyz
diff --git a/cp2kparser/cp2kparser/engines/regexengine.py b/cp2kparser/cp2kparser/old/regexengine.py
similarity index 100%
rename from cp2kparser/cp2kparser/engines/regexengine.py
rename to cp2kparser/cp2kparser/old/regexengine.py
diff --git a/cp2kparser/cp2kparser/engines/xmlengine.py b/cp2kparser/cp2kparser/old/xmlengine.py
similarity index 100%
rename from cp2kparser/cp2kparser/engines/xmlengine.py
rename to cp2kparser/cp2kparser/old/xmlengine.py
diff --git a/nomadanalysis/nomadanalysis/utils/__init__.py b/cp2kparser/cp2kparser/utils/__init__.py
similarity index 100%
rename from nomadanalysis/nomadanalysis/utils/__init__.py
rename to cp2kparser/cp2kparser/utils/__init__.py
diff --git a/cp2kparser/cp2kparser/generics/logconfig.py b/cp2kparser/cp2kparser/utils/logconfig.py
similarity index 100%
rename from cp2kparser/cp2kparser/generics/logconfig.py
rename to cp2kparser/cp2kparser/utils/logconfig.py
diff --git a/cp2kparser/cp2kparser/utils/parser.py b/cp2kparser/cp2kparser/utils/parser.py
new file mode 100644
index 0000000..7e3a097
--- /dev/null
+++ b/cp2kparser/cp2kparser/utils/parser.py
@@ -0,0 +1,106 @@
+import os
+import logging
+from abc import ABCMeta, abstractmethod
+import nomadtoolkit.config
+from nomadcore.local_meta_info import loadJsonFile
+from nomadtoolkit.local_backend import LocalBackend
+logger = logging.getLogger(__name__)
+
+
+#===============================================================================
+class Parser(object):
+    """
+    Attributes:
+        self.implementation: an object that actually does the parsing and is
+            setup by this class based on the given contents.
+    """
+    __metaclass__ = ABCMeta
+    parser_name = None
+
+    def __init__(self, contents, metainfo_to_keep=None, backend=None):
+        """
+        Args:
+            contents: list of absolute filepaths as strings
+            metainfo_to_keep: list of metainfo names to parse as strings.
+            backend: the backend where the parsing results are outputted
+        """
+        self.parser_context = ParserContext()
+        self.parser_context.backend = backend
+        self.parser_context.metainfo_to_keep = metainfo_to_keep
+        self.implementation = None
+
+        # If single path provided, make it into a list
+        if isinstance(contents, basestring):
+            contents = [contents]
+
+        # Figure out all the files from the contents
+        files = set()
+        for content in contents:
+            if os.path.isdir(content):
+                dir_files = set()
+                for filename in os.listdir(content):
+                    dir_files.add(os.path.join(content, filename))
+                files |= dir_files
+            elif os.path.isfile(content):
+                files.add(content)
+            else:
+                logger.error("The string '{}' is not a valid path.".format(content))
+
+        # Filter the files leaving only the parseable ones. Each parser can
+        # specify which files are of interest or to include them all.
+        self.parser_context.files = self.search_parseable_files(files)
+
+        # If no backend provided, create Local one with default metainfos
+        if not backend:
+            metadir = nomadtoolkit.config.get_config("metaInfoPath")
+            default_metainfo_path = os.path.realpath(os.path.join(metadir, self.get_metainfo_filename()))
+            metainfoenv, warnings = loadJsonFile(default_metainfo_path)
+            backend = LocalBackend(metainfoenv)
+            self.parser_context.backend = backend
+
+    @abstractmethod
+    def setup(self):
+        """Deduce the version of the software that was used and setup a correct
+        implementation. The implementations should subclass
+        ParserImplementation and be stored to the 'implementation' attribute of
+        this class. You can give the parser_context wrapper object in the
+        parser implementation constructor to pass all the relevant data onto
+        the implementation.
+        """
+        pass
+
+    @abstractmethod
+    def search_parseable_files(self, files):
+        """From a list of filenames tries to guess which files are relevant to
+        the parsing process. Essentially filters the files before they are sent
+        to the parser implementation.
+        """
+        return files
+
+    @abstractmethod
+    def get_metainfo_filename(self):
+        """This function should return the name of the metainfo file that is
+        specific for this parser. This name is used by the Analyzer class in
+        the nomadtoolkit.
+        """
+        return None
+
+    @abstractmethod
+    def parse(self):
+        """Starts the actual parsing process outputting the results to the
+        backend.
+        """
+        self.setup()
+        if not self.implementation:
+            logger.error("No parser implementation has been setup.")
+
+
+#===============================================================================
+class ParserContext(object):
+    """Contains everything needed to instantiate a parser implementation.
+    """
+    def __init__(self, files=None, metainfo_to_keep=None, backend=None, version_id=None):
+        self.files = files
+        self.version_id = version_id
+        self.metainfo_to_keep = metainfo_to_keep
+        self.backend = backend
diff --git a/cp2kparser/cp2kparser/generics/parserimplementation.py b/cp2kparser/cp2kparser/utils/parserimplementation.py
similarity index 99%
rename from cp2kparser/cp2kparser/generics/parserimplementation.py
rename to cp2kparser/cp2kparser/utils/parserimplementation.py
index 61b90c4..ca2fb80 100644
--- a/cp2kparser/cp2kparser/generics/parserimplementation.py
+++ b/cp2kparser/cp2kparser/utils/parserimplementation.py
@@ -66,8 +66,6 @@ class ParserImplementation(object):
             self,
             fileToParse,
             mainFileDescription,
-            metainfos,
-            backend,
             parserInfo,
             cachingLevelForMetaName={},
             defaultDataCachingLevel=CachingLevel.ForwardAndCache,
@@ -79,8 +77,12 @@ class ParserImplementation(object):
         Args:
         Returns:
         """
+
+        metainfo_to_keep = self.metainfo_to_keep
+        backend = self.backend
+
         # Initialize the parser builder
-        parserBuilder = SimpleParserBuilder(mainFileDescription, backend.metaInfoEnv(), metainfos)
+        parserBuilder = SimpleParserBuilder(mainFileDescription, backend.metaInfoEnv(), metainfo_to_keep)
         if logger.isEnabledFor(logging.DEBUG):
             s = StringIO.StringIO()
             s.write("matchers:")
diff --git a/cp2kparser/cp2kparser/generics/testing.py b/cp2kparser/cp2kparser/utils/testing.py
similarity index 100%
rename from cp2kparser/cp2kparser/generics/testing.py
rename to cp2kparser/cp2kparser/utils/testing.py
diff --git a/nomadanalysis/README.md b/nomadanalysis/README.md
deleted file mode 100644
index bf317aa..0000000
--- a/nomadanalysis/README.md
+++ /dev/null
@@ -1 +0,0 @@
-# Nomad Analysis
diff --git a/nomadanalysis/nomadanalysis.egg-info/PKG-INFO b/nomadanalysis/nomadanalysis.egg-info/PKG-INFO
deleted file mode 100644
index 082b26c..0000000
--- a/nomadanalysis/nomadanalysis.egg-info/PKG-INFO
+++ /dev/null
@@ -1,10 +0,0 @@
-Metadata-Version: 1.0
-Name: nomadanalysis
-Version: 0.1
-Summary: Tools for analysing calculation results parsed by NOMAD parsers.
-Home-page: UNKNOWN
-Author: Lauri Himanen
-Author-email: lauri.himanen@gmail.com
-License: GPL3
-Description: UNKNOWN
-Platform: UNKNOWN
diff --git a/nomadanalysis/nomadanalysis.egg-info/SOURCES.txt b/nomadanalysis/nomadanalysis.egg-info/SOURCES.txt
deleted file mode 100644
index e5e018a..0000000
--- a/nomadanalysis/nomadanalysis.egg-info/SOURCES.txt
+++ /dev/null
@@ -1,9 +0,0 @@
-setup.py
-nomadanalysis/__init__.py
-nomadanalysis/analyzer.py
-nomadanalysis.egg-info/PKG-INFO
-nomadanalysis.egg-info/SOURCES.txt
-nomadanalysis.egg-info/dependency_links.txt
-nomadanalysis.egg-info/not-zip-safe
-nomadanalysis.egg-info/requires.txt
-nomadanalysis.egg-info/top_level.txt
\ No newline at end of file
diff --git a/nomadanalysis/nomadanalysis.egg-info/dependency_links.txt b/nomadanalysis/nomadanalysis.egg-info/dependency_links.txt
deleted file mode 100644
index 8b13789..0000000
--- a/nomadanalysis/nomadanalysis.egg-info/dependency_links.txt
+++ /dev/null
@@ -1 +0,0 @@
-
diff --git a/nomadanalysis/nomadanalysis.egg-info/not-zip-safe b/nomadanalysis/nomadanalysis.egg-info/not-zip-safe
deleted file mode 100644
index 8b13789..0000000
--- a/nomadanalysis/nomadanalysis.egg-info/not-zip-safe
+++ /dev/null
@@ -1 +0,0 @@
-
diff --git a/nomadanalysis/nomadanalysis.egg-info/requires.txt b/nomadanalysis/nomadanalysis.egg-info/requires.txt
deleted file mode 100644
index e076fe3..0000000
--- a/nomadanalysis/nomadanalysis.egg-info/requires.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-pint
-numpy
\ No newline at end of file
diff --git a/nomadanalysis/nomadanalysis.egg-info/top_level.txt b/nomadanalysis/nomadanalysis.egg-info/top_level.txt
deleted file mode 100644
index 5f449b7..0000000
--- a/nomadanalysis/nomadanalysis.egg-info/top_level.txt
+++ /dev/null
@@ -1 +0,0 @@
-nomadanalysis
diff --git a/nomadanalysis/nomadanalysis/__init__.py b/nomadanalysis/nomadanalysis/__init__.py
deleted file mode 100644
index 786cde3..0000000
--- a/nomadanalysis/nomadanalysis/__init__.py
+++ /dev/null
@@ -1,7 +0,0 @@
-#! /usr/bin/env python
-
-# This will activate the logging utilities for nomadanalysis
-import utils.log
-
-# Import the common classes here for less typing
-from .analyzer import Analyzer
diff --git a/nomadanalysis/nomadanalysis/examples/1_basics.py b/nomadanalysis/nomadanalysis/examples/1_basics.py
deleted file mode 100644
index 1592424..0000000
--- a/nomadanalysis/nomadanalysis/examples/1_basics.py
+++ /dev/null
@@ -1,11 +0,0 @@
-from nomadanalysis import Analyzer
-from cp2kparser import CP2KParser
-
-# Initialize the parser you want to use
-parser = CP2KParser()
-parser.dirpath = "/home/lauri/Dropbox/nomad-dev/parser-cp2k/cp2kparser/cp2kparser/tests/cp2k_2.6.2/forces/outputfile/n"
-parser.metainto_to_keep = ["section_run"]
-
-# Initialize the analyzer
-analyzer = Analyzer(parser)
-results = analyzer.parse()
diff --git a/nomadtoolkit/README.md b/nomadtoolkit/README.md
new file mode 100644
index 0000000..6adc1a1
--- /dev/null
+++ b/nomadtoolkit/README.md
@@ -0,0 +1,27 @@
+# Nomad Toolkit
+This a package that contains the necessary tools for running the nomad parsers
+locally. It contains the python-common and nomad-meta-info repositories as
+submodules for easier installation.
+
+This package does not contain any of the parsers themselves. You should
+download and install them separately. The parsers should have one main class
+that inherits the 'Parser' baseclass and implements it's interface.
+
+# Download
+Currently this package is contained inside the parser-cp2k repository because
+it is only used by it. If someones else want's to adopt these tools this
+package can be maybe separated to it's own repository.
+
+Use git to copy this repository to your local machine. You will also have to
+recursively download the submodules. All this can be achieved with the command:
+
+```sh
+git clone --recursive git@gitlab.mpcdf.mpg.de:nomad-lab/parser-cp2k.git
+```
+
+# Installation
+To install this toolkit run the included nomadtoolkit/setup.py file as follows:
+
+```sh
+python setup.py develop --user
+```
diff --git a/nomadtoolkit/nomadtoolkit/__init__.py b/nomadtoolkit/nomadtoolkit/__init__.py
new file mode 100644
index 0000000..f1e7f55
--- /dev/null
+++ b/nomadtoolkit/nomadtoolkit/__init__.py
@@ -0,0 +1 @@
+from nomadtoolkit.analysis import Analyzer
diff --git a/nomadanalysis/nomadanalysis/analyzer.py b/nomadtoolkit/nomadtoolkit/analysis.py
similarity index 96%
rename from nomadanalysis/nomadanalysis/analyzer.py
rename to nomadtoolkit/nomadtoolkit/analysis.py
index de1336f..9219086 100644
--- a/nomadanalysis/nomadanalysis/analyzer.py
+++ b/nomadtoolkit/nomadtoolkit/analysis.py
@@ -2,7 +2,7 @@ import sys
 import logging
 from nomadcore.local_meta_info import loadJsonFile
 from nomadcore.parser_backend import JsonParseEventsWriterBackend
-from nomadanalysis.local_backend import LocalBackend
+from nomadtoolkit.local_backend import LocalBackend
 
 
 logger = logging.getLogger(__name__)
diff --git a/nomadtoolkit/nomadtoolkit/config.py b/nomadtoolkit/nomadtoolkit/config.py
new file mode 100644
index 0000000..ce75639
--- /dev/null
+++ b/nomadtoolkit/nomadtoolkit/config.py
@@ -0,0 +1,28 @@
+import os
+import json
+
+
+def open_config_file():
+    return open(os.path.join(os.path.dirname(__file__), "config.json"), "r+")
+
+
+def open_config_json():
+    configfile = open_config_file()
+    contents = configfile.read()
+    jsonobject = json.loads(contents)
+    return jsonobject
+
+
+def set_config(name, value):
+    configfile = open_config_file()
+    contents = configfile.read()
+    jsonobject = json.loads(contents)
+    jsonobject[name] = value
+    configfile.seek(0)
+    configfile.truncate()
+    configfile.write(json.dumps(jsonobject, configfile))
+
+
+def get_config(name):
+    jsonobj = open_config_json()
+    return jsonobj.get(name)
diff --git a/nomadtoolkit/nomadtoolkit/examples/1_basics.py b/nomadtoolkit/nomadtoolkit/examples/1_basics.py
new file mode 100644
index 0000000..77fb385
--- /dev/null
+++ b/nomadtoolkit/nomadtoolkit/examples/1_basics.py
@@ -0,0 +1,14 @@
+from nomadtoolkit import Analyzer
+from cp2kparser import CP2KParser
+
+# Initialize the parser you want to use. By default the parser will use the
+# local backend. The local backend uses the metainfo files that come together
+# with the nomadtoolkit repository and it outputs results in a python
+# dictionary.
+dirpaths = "/home/lauri/Dropbox/nomad-dev/parser-cp2k/cp2kparser/cp2kparser/tests/cp2k_2.6.2/forces/outputfile/n"
+parser = CP2KParser(contents=dirpaths)
+
+# Initialize the analyzer
+analyzer = Analyzer(parser)
+results = analyzer.parse()
+# print results
diff --git a/nomadanalysis/nomadanalysis/local_backend.py b/nomadtoolkit/nomadtoolkit/local_backend.py
similarity index 91%
rename from nomadanalysis/nomadanalysis/local_backend.py
rename to nomadtoolkit/nomadtoolkit/local_backend.py
index b8fa931..f5a63bf 100644
--- a/nomadanalysis/nomadanalysis/local_backend.py
+++ b/nomadtoolkit/nomadtoolkit/local_backend.py
@@ -24,7 +24,7 @@ class LocalBackend(object):
         gIndex should be unique (no reopening of a closed section)"""
         self.__lastIndex[metaName] = gIndex
         self.__openSections.add((metaName, gIndex))
-        self.__jsonOutput({"event":"openSection", "metaName":metaName, "gIndex":gIndex})
+        self.__jsonOutput({"event": "openSection", "metaName": metaName, "gIndex": gIndex})
 
     def __jsonOutput(self, dic):
         pass
@@ -50,10 +50,10 @@ class LocalBackend(object):
     def metaInfoEnv(self):
         return self.__metaInfoEnv
 
-    def startedParsingSession(self, mainFileUri, parserInfo, parsingStatus = None, parsingErrors = None):
+    def startedParsingSession(self, mainFileUri, parserInfo, parsingStatus=None, parsingErrors=None):
         pass
 
-    def finishedParsingSession(self, parsingStatus, parsingErrors, mainFileUri = None, parserInfo = None):
+    def finishedParsingSession(self, parsingStatus, parsingErrors, mainFileUri=None, parserInfo=None):
         pass
 
 
diff --git a/nomadtoolkit/nomadtoolkit/utils/__init__.py b/nomadtoolkit/nomadtoolkit/utils/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/nomadanalysis/nomadanalysis/utils/log.py b/nomadtoolkit/nomadtoolkit/utils/log.py
similarity index 100%
rename from nomadanalysis/nomadanalysis/utils/log.py
rename to nomadtoolkit/nomadtoolkit/utils/log.py
diff --git a/nomadanalysis/setup.py b/nomadtoolkit/setup.py
similarity index 58%
rename from nomadanalysis/setup.py
rename to nomadtoolkit/setup.py
index 60a18a9..311144a 100644
--- a/nomadanalysis/setup.py
+++ b/nomadtoolkit/setup.py
@@ -1,17 +1,18 @@
 from setuptools import setup
+import os
 
 
 #===============================================================================
 def main():
     # Start package setup
     setup(
-        name="nomadanalysis",
+        name="nomadtoolkit",
         version="0.1",
         description="Tools for analysing calculation results parsed by NOMAD parsers.",
         author="Lauri Himanen",
         author_email="lauri.himanen@gmail.com",
         license="GPL3",
-        packages=["nomadanalysis"],
+        packages=["nomadtoolkit"],
         install_requires=[
             'pint',
             'numpy',
@@ -21,4 +22,11 @@ def main():
 
 # Run main function by default
 if __name__ == "__main__":
+
+    # Install the toolkit package
     main()
+
+    # Save the path where the metainfo are saved for further use
+    import nomadtoolkit.config
+    metapath = os.path.realpath(os.path.join(os.path.dirname(__file__), "submodules/nomad-meta-info/meta_info/nomad_meta_info"))
+    nomadtoolkit.config.set_config("metaInfoPath", metapath)
diff --git a/nomadtoolkit/submodules/nomad-meta-info b/nomadtoolkit/submodules/nomad-meta-info
new file mode 160000
index 0000000..c53dee7
--- /dev/null
+++ b/nomadtoolkit/submodules/nomad-meta-info
@@ -0,0 +1 @@
+Subproject commit c53dee7ab6a8d40b9b4c4c2d70262aec79fc05e5
diff --git a/nomadtoolkit/submodules/python-common b/nomadtoolkit/submodules/python-common
new file mode 160000
index 0000000..1ec174e
--- /dev/null
+++ b/nomadtoolkit/submodules/python-common
@@ -0,0 +1 @@
+Subproject commit 1ec174e5558f1ee46aca3a44a2205f3af86f23f8
-- 
GitLab