Restructured the folders and files to use a common project structure, updated readme.

c718e95a · Lauri Himanen · 0d024a53 · c718e95a · c718e95a · c718e95a
Commit c718e95a authored Oct 16, 2017 by Lauri Himanen
--- a/README.md
+++ b/README.md
@@ -6,14 +6,13 @@ This is the main repository of the [NOMAD](http://nomad-lab.eu) parser for
    from cp2kparser import CP2KParser
    import matplotlib.pyplot as mpl

-    # 1. Initialize a parser by giving a path to the CP2K output file and a list of
-    # default units
-    path = "path/to/main.file"
+    # 1. Initialize a parser with a set of default units.
    default_units = ["eV"]
-    parser = CP2KParser(path, default_units=default_units)
+    parser = CP2KParser(default_units=default_units)

-    # 2. Parse
-    results = parser.parse()
+    # 2. Parse a file
+    path = "path/to/main.file"
+    results = parser.parse(path)

    # 3. Query the results with using the id's created specifically for NOMAD.
    scf_energies = results["energy_total_scf_iteration"]
@@ -22,7 +21,7 @@ This is the main repository of the [NOMAD](http://nomad-lab.eu) parser for
 ```

 # Installation
-The code is python>=2.7 and python>=3.4 compatible. First download and install
+The code is python 2 and python 3 compatible. First download and install
 the nomadcore package:

 ```sh
@@ -47,15 +46,9 @@ cd parser-cp2k
 pip install -e .
 ```

-# Advanced
+# Notes
+The parser is based on CP2K 2.6.2.

-The parser is designed to support multiple versions of CP2K with a [DRY](https://en.wikipedia.org/wiki/Don%27t_repeat_yourself)
-approach: The initial parser class is based on CP2K 2.6.2, and other versions
-will be subclassed from it. By sublassing, all the previous functionality will
-be preserved, new functionality can be easily created, and old functionality
-overridden only where necesssary.
-
-# Upload Folder Structure, File Naming and CP2K Settings
 The CP2K input setting
 [PRINT_LEVEL](https://manual.cp2k.org/trunk/CP2K_INPUT/GLOBAL.html#PRINT_LEVEL)
 controls the amount of details that are outputted during the calculation. The
@@ -66,12 +59,6 @@ they are located very deep inside some folder structure or outside the folder
 where the output file is, the parser will not be able to locate them. For this
 reason it is recommended to keep the upload structure as flat as possible.

-## Testing
-The regression tests for this parser are located in
-**/cp2k/parser/parser-cp2k/cp2kparser/regtest**. You can run the tests by
-running the run_tests.py file in one of the version directories.
-
-## Notes for CP2K Developers
 Here is a list of features/fixes that would make the parsing of CP2K results
 easier:
 - The pdb trajectory output doesn't seem to conform to the actual standard as


--- a/parser/parser-cp2k/cp2kparser/parser.py
+++ b/parser/parser-cp2k/cp2kparser/parser.py
@@ -5,20 +5,23 @@ import re
 import logging
 import importlib
 from nomadcore.baseclasses import ParserInterface
+
+# Needs to be imported in order for the importlib calls to work in python 2.7
+import cp2kparser.versions.cp2k262.singlepointparser
+
 logger = logging.getLogger("nomad")


-#===============================================================================
 class CP2KParser(ParserInterface):
    """This class handles the initial setup before any parsing can happen. It
    determines which version of CP2K was used to generate the output and then
    sets up a correct main parser.

-    After the implementation has been setup, you can parse the files with
+    After the implementation has been setup, you can parse files with
    parse().
    """
-    def __init__(self, main_file, metainfo_to_keep=None, backend=None, default_units=None, metainfo_units=None, debug=False, log_level=logging.ERROR, store=True):
-        super(CP2KParser, self).__init__(main_file, metainfo_to_keep, backend, default_units, metainfo_units, debug, log_level, store)
+    def __init__(self, metainfo_to_keep=None, backend=None, default_units=None, metainfo_units=None, debug=False, log_level=logging.ERROR, store=True):
+        super(CP2KParser, self).__init__(metainfo_to_keep, backend, default_units, metainfo_units, debug, log_level, store)

    def setup_version(self):
        """Setups the version by looking at the output file and the version
@@ -84,10 +87,11 @@ class CP2KParser(ParserInterface):

        Args:
            version_id: An integer representing the CP2K version. The version
-                number is originally a string the form '2.6.2', but here the numbers
-                are just concatenated into a single integer number 262.
-            run_type: A string that identifies the RUN_TYPE for the calculation.
-                All the possible run types can be found in the CP2K reference manual.
+                number is originally a string the form '2.6.2', but here the
+                numbers are just concatenated into a single integer number 262.
+            run_type: A string that identifies the RUN_TYPE for the
+                calculation.  All the possible run types can be found in the
+                CP2K reference manual.

        Returns:
            A python class that should be instantiated later with the correct
@@ -110,7 +114,10 @@ class CP2KParser(ParserInterface):
        try:
            parser = parser_map[run_type]
        except KeyError:
-            logger.exception("A parser corresponding to the run_type '{}' could not be found.".format(run_type))
+            logger.exception(
+                "A parser corresponding to the run_type '{}' could not be found."
+                .format(run_type)
+            )
            raise

        # Currently the version id is a pure integer, so it can directly be mapped
@@ -118,20 +125,32 @@ class CP2KParser(ParserInterface):
        base = "cp2kparser.versions.cp2k{}.{}".format(version_id, parser.lower())
        parser_module = None
        parser_class = None
+
        try:
            parser_module = importlib.import_module(base)
        except ImportError:
-            logger.warning("Could not find a parser for version '{}' and run type '{}'. Trying to default to the base implementation for CP2K 2.6.2".format(version_id, run_type))
+            logger.warning(
+                "Could not find a parser for version '{}' and run type '{}'. "
+                "Trying to default to the base implementation for CP2K 2.6.2"
+                .format(version_id, run_type)
+            )
            base = "cp2kparser.versions.cp2k262.{}".format(parser.lower())
            try:
                parser_module = importlib.import_module(base)
            except ImportError:
-                logger.exception("Tried to default to the CP2K 2.6.2 implementation but could not find the correct modules for run_type '{}'.".format(run_type))
+                logger.exception(
+                    "Tried to default to the CP2K 2.6.2 implementation but "
+                    "could not find the correct modules for run_type '{}'."
+                    .format(run_type)
+                )
                raise
        try:
            parser_class = getattr(parser_module, "CP2K{}".format(parser))
        except AttributeError:
-            logger.exception("A parser class '{}' could not be found in the module '[]'.".format(parser_class, parser_module))
+            logger.exception(
+                "A parser class '{}' could not be found in the module '[]'."
+                .format(parser_class, parser_module)
+            )
            raise

        self.main_parser = parser_class(self.parser_context.main_file, self.parser_context)
--- a/parser/parser-cp2k/cp2kparser/tools/xmlpreparser.py
+++ b/parser/parser-cp2k/cp2kparser/tools/xmlpreparser.py
@@ -24,7 +24,6 @@ from cp2kparser.generic.inputparsing import Section, Keyword, DefaultKeyword, Se
 logger = logging


-#===============================================================================
 def generate_object_tree(xml_file, for_metainfo=False):

    xml_element = ET.parse(xml_file)
@@ -48,7 +47,6 @@ def generate_object_tree(xml_file, for_metainfo=False):
    return object_tree


-#===============================================================================
 def recursive_tree_generation(xml_element, for_metainfo=False, name_stack=[], ignore=True):

    # Make new section object for the root
@@ -199,7 +197,6 @@ def recursive_tree_generation(xml_element, for_metainfo=False, name_stack=[], ig
    return section


-#===============================================================================
 def generate_input_metainfos(object_tree):

    json_root = {
@@ -222,7 +219,6 @@ def generate_input_metainfos(object_tree):
        f.write(json.dumps(json_root, indent=2, separators=(',', ': ')))


-#===============================================================================
 def generate_metainfo_recursively(obj, parent, container, name_stack):

    json = None
@@ -245,7 +241,6 @@ def generate_metainfo_recursively(obj, parent, container, name_stack):
    container.append(json)


-#===============================================================================
 def generate_input_object_metainfo_json(child, parent, name_stack):
    path = ".".join(name_stack)
    # if path.startswith("."):
@@ -283,7 +278,6 @@ def generate_input_object_metainfo_json(child, parent, name_stack):
    return json_obj


-#===============================================================================
 def generate_section_metainfo_json(child, parent, name_stack):
    path = ".".join(name_stack[:-1])
    json_obj = {}
@@ -307,7 +301,6 @@ def generate_section_metainfo_json(child, parent, name_stack):
    return json_obj


-#===============================================================================
 # Run main function by default
 if __name__ == "__main__":



--- a/parser/parser-cp2k/cp2kparser/versions/cp2k262/commonparser.py
+++ b/parser/parser-cp2k/cp2kparser/versions/cp2k262/commonparser.py
@@ -12,7 +12,6 @@ from collections import defaultdict
 logger = logging.getLogger("nomad")


-#===============================================================================
 class CP2KCommonParser(CommonParser):
    """
    This class is used to store and instantiate common parts of the


--- a/parser/parser-cp2k/cp2kparser/versions/cp2k262/geooptparser.py
+++ b/parser/parser-cp2k/cp2kparser/versions/cp2k262/geooptparser.py
@@ -12,7 +12,6 @@ import logging
 logger = logging.getLogger("nomad")


-#===============================================================================
 class CP2KGeoOptParser(MainHierarchicalParser):
    """Used to parse the CP2K calculation with run types:
        -GEO_OPT/GEOMETRY_OPTIMIZATION


--- a/parser/parser-cp2k/cp2kparser/versions/cp2k262/inputparser.py
+++ b/parser/parser-cp2k/cp2kparser/versions/cp2k262/inputparser.py
@@ -11,7 +11,6 @@ from cp2kparser.generic.inputparsing import metainfo_data_prefix, metainfo_secti
 logger = logging.getLogger("nomad")


-#===============================================================================
 class CP2KInputParser(AbstractBaseParser):
    """Used to parse out a CP2K input file.



--- a/parser/parser-cp2k/cp2kparser/versions/cp2k262/mdparser.py
+++ b/parser/parser-cp2k/cp2kparser/versions/cp2k262/mdparser.py
@@ -13,7 +13,6 @@ import logging
 logger = logging.getLogger("nomad")


-#===============================================================================
 class CP2KMDParser(MainHierarchicalParser):
    """Used to parse the CP2K calculation with run types:
        -MD


--- a/parser/parser-cp2k/cp2kparser/versions/cp2k262/singlepointforceparser.py
+++ b/parser/parser-cp2k/cp2kparser/versions/cp2k262/singlepointforceparser.py
@@ -4,7 +4,6 @@ from nomadcore.baseclasses import AbstractBaseParser
 logger = logging.getLogger("nomad")


-#===============================================================================
 class CP2KSinglePointForceParser(AbstractBaseParser):
    """Used to parse out a force file printed out by a CP2K single point
    calculation. It is not exactly an ZYX file, so here we define separate


--- a/parser/parser-cp2k/cp2kparser/versions/cp2k262/singlepointparser.py
+++ b/parser/parser-cp2k/cp2kparser/versions/cp2k262/singlepointparser.py
 from __future__ import absolute_import
 from nomadcore.simple_parser import SimpleMatcher as SM
 from nomadcore.baseclasses import MainHierarchicalParser
-from .singlepointforceparser import CP2KSinglePointForceParser
+from cp2kparser.versions.cp2k262.singlepointforceparser import CP2KSinglePointForceParser
 from nomadcore.caching_backend import CachingLevel
-from .commonparser import CP2KCommonParser
+from cp2kparser.versions.cp2k262.commonparser import CP2KCommonParser
 import logging
 logger = logging.getLogger("nomad")


-#===============================================================================
 class CP2KSinglePointParser(MainHierarchicalParser):
    """The main parser class. Used to parse the CP2K calculation with run types:
        -ENERGY


--- a/regtest/README.md
+++ b/regtest/README.md
-# Unit tests
-This directory contains unit tests to evaluate the correctness of the parser in
-a systematic way. Ideally each parsed metainfo should have at least one unit
-test, and if the resulting values are predetermined, the available values
-should all be tested individually. Also certain scenarios that should produce a
-parsing error should be tested.
--- a/regtest/cp2k_2.6.2/callgraph.py
+++ b/regtest/cp2k_2.6.2/callgraph.py
-from pycallgraph import PyCallGraph
-from pycallgraph.output import GraphvizOutput
-from cp2kparser import CP2KParser
-
-with PyCallGraph(output=GraphvizOutput()):
-    filepath = "/home/lauri/Dropbox/nomad-dev/nomad-lab-base/parsers/cp2k/test/unittests/cp2k_2.6.2/energy_force/unittest.out"
-    parser = CP2KParser(filepath)
-    parser.parse()
--- a/regtest/cp2k_2.6.2/profiling.py
+++ b/regtest/cp2k_2.6.2/profiling.py
-import cProfile
-import pstats
-from run_tests import get_results
-
-
-def profile_energy_force():
-    """Used to profile the CPU usage in parsing RUN_TYPE ENERGY_FORCE.
-    """
-    profile = cProfile.Profile()
-    profile.run('get_results("energy_force", "section_run")')
-    stats = pstats.Stats(profile)
-    stats.strip_dirs()
-    stats.sort_stats("cumulative")
-    stats.print_stats(30)
-
-if __name__ == "__main__":
-    profile_energy_force()
--- a/regtest/cp2k_2.6.2/BASIS_SET
+++ b/regtest/cp2k_2.6.2/BASIS_SET
--- a/regtest/cp2k_2.6.2/GTH_POTENTIALS
+++ b/regtest/cp2k_2.6.2/GTH_POTENTIALS
--- a/regtest/cp2k_2.6.2/XC_functional/b3lyp/b3lyp.inp
+++ b/regtest/cp2k_2.6.2/XC_functional/b3lyp/b3lyp.inp
--- a/regtest/cp2k_2.6.2/XC_functional/b3lyp/unittest.out
+++ b/regtest/cp2k_2.6.2/XC_functional/b3lyp/unittest.out
--- a/regtest/cp2k_2.6.2/XC_functional/blyp/blyp.inp
+++ b/regtest/cp2k_2.6.2/XC_functional/blyp/blyp.inp
--- a/regtest/cp2k_2.6.2/XC_functional/blyp/unittest.out
+++ b/regtest/cp2k_2.6.2/XC_functional/blyp/unittest.out
--- a/regtest/cp2k_2.6.2/XC_functional/hcth120/hcth120.inp
+++ b/regtest/cp2k_2.6.2/XC_functional/hcth120/hcth120.inp
--- a/regtest/cp2k_2.6.2/XC_functional/hcth120/unittest.out
+++ b/regtest/cp2k_2.6.2/XC_functional/hcth120/unittest.out