Commit c718e95a authored by Lauri Himanen's avatar Lauri Himanen
Browse files

Restructured the folders and files to use a common project structure, updated readme.

parent 0d024a53
...@@ -6,14 +6,13 @@ This is the main repository of the [NOMAD](http://nomad-lab.eu) parser for ...@@ -6,14 +6,13 @@ This is the main repository of the [NOMAD](http://nomad-lab.eu) parser for
from cp2kparser import CP2KParser from cp2kparser import CP2KParser
import matplotlib.pyplot as mpl import matplotlib.pyplot as mpl
# 1. Initialize a parser by giving a path to the CP2K output file and a list of # 1. Initialize a parser with a set of default units.
# default units
path = "path/to/main.file"
default_units = ["eV"] default_units = ["eV"]
parser = CP2KParser(path, default_units=default_units) parser = CP2KParser(default_units=default_units)
# 2. Parse # 2. Parse a file
results = parser.parse() path = "path/to/main.file"
results = parser.parse(path)
# 3. Query the results with using the id's created specifically for NOMAD. # 3. Query the results with using the id's created specifically for NOMAD.
scf_energies = results["energy_total_scf_iteration"] scf_energies = results["energy_total_scf_iteration"]
...@@ -22,7 +21,7 @@ This is the main repository of the [NOMAD](http://nomad-lab.eu) parser for ...@@ -22,7 +21,7 @@ This is the main repository of the [NOMAD](http://nomad-lab.eu) parser for
``` ```
# Installation # Installation
The code is python>=2.7 and python>=3.4 compatible. First download and install The code is python 2 and python 3 compatible. First download and install
the nomadcore package: the nomadcore package:
```sh ```sh
...@@ -47,15 +46,9 @@ cd parser-cp2k ...@@ -47,15 +46,9 @@ cd parser-cp2k
pip install -e . pip install -e .
``` ```
# Advanced # Notes
The parser is based on CP2K 2.6.2.
The parser is designed to support multiple versions of CP2K with a [DRY](https://en.wikipedia.org/wiki/Don%27t_repeat_yourself)
approach: The initial parser class is based on CP2K 2.6.2, and other versions
will be subclassed from it. By sublassing, all the previous functionality will
be preserved, new functionality can be easily created, and old functionality
overridden only where necesssary.
# Upload Folder Structure, File Naming and CP2K Settings
The CP2K input setting The CP2K input setting
[PRINT_LEVEL](https://manual.cp2k.org/trunk/CP2K_INPUT/GLOBAL.html#PRINT_LEVEL) [PRINT_LEVEL](https://manual.cp2k.org/trunk/CP2K_INPUT/GLOBAL.html#PRINT_LEVEL)
controls the amount of details that are outputted during the calculation. The controls the amount of details that are outputted during the calculation. The
...@@ -66,12 +59,6 @@ they are located very deep inside some folder structure or outside the folder ...@@ -66,12 +59,6 @@ they are located very deep inside some folder structure or outside the folder
where the output file is, the parser will not be able to locate them. For this where the output file is, the parser will not be able to locate them. For this
reason it is recommended to keep the upload structure as flat as possible. reason it is recommended to keep the upload structure as flat as possible.
## Testing
The regression tests for this parser are located in
**/cp2k/parser/parser-cp2k/cp2kparser/regtest**. You can run the tests by
running the run_tests.py file in one of the version directories.
## Notes for CP2K Developers
Here is a list of features/fixes that would make the parsing of CP2K results Here is a list of features/fixes that would make the parsing of CP2K results
easier: easier:
- The pdb trajectory output doesn't seem to conform to the actual standard as - The pdb trajectory output doesn't seem to conform to the actual standard as
......
...@@ -5,20 +5,23 @@ import re ...@@ -5,20 +5,23 @@ import re
import logging import logging
import importlib import importlib
from nomadcore.baseclasses import ParserInterface from nomadcore.baseclasses import ParserInterface
# Needs to be imported in order for the importlib calls to work in python 2.7
import cp2kparser.versions.cp2k262.singlepointparser
logger = logging.getLogger("nomad") logger = logging.getLogger("nomad")
#===============================================================================
class CP2KParser(ParserInterface): class CP2KParser(ParserInterface):
"""This class handles the initial setup before any parsing can happen. It """This class handles the initial setup before any parsing can happen. It
determines which version of CP2K was used to generate the output and then determines which version of CP2K was used to generate the output and then
sets up a correct main parser. sets up a correct main parser.
After the implementation has been setup, you can parse the files with After the implementation has been setup, you can parse files with
parse(). parse().
""" """
def __init__(self, main_file, metainfo_to_keep=None, backend=None, default_units=None, metainfo_units=None, debug=False, log_level=logging.ERROR, store=True): def __init__(self, metainfo_to_keep=None, backend=None, default_units=None, metainfo_units=None, debug=False, log_level=logging.ERROR, store=True):
super(CP2KParser, self).__init__(main_file, metainfo_to_keep, backend, default_units, metainfo_units, debug, log_level, store) super(CP2KParser, self).__init__(metainfo_to_keep, backend, default_units, metainfo_units, debug, log_level, store)
def setup_version(self): def setup_version(self):
"""Setups the version by looking at the output file and the version """Setups the version by looking at the output file and the version
...@@ -84,10 +87,11 @@ class CP2KParser(ParserInterface): ...@@ -84,10 +87,11 @@ class CP2KParser(ParserInterface):
Args: Args:
version_id: An integer representing the CP2K version. The version version_id: An integer representing the CP2K version. The version
number is originally a string the form '2.6.2', but here the numbers number is originally a string the form '2.6.2', but here the
are just concatenated into a single integer number 262. numbers are just concatenated into a single integer number 262.
run_type: A string that identifies the RUN_TYPE for the calculation. run_type: A string that identifies the RUN_TYPE for the
All the possible run types can be found in the CP2K reference manual. calculation. All the possible run types can be found in the
CP2K reference manual.
Returns: Returns:
A python class that should be instantiated later with the correct A python class that should be instantiated later with the correct
...@@ -110,7 +114,10 @@ class CP2KParser(ParserInterface): ...@@ -110,7 +114,10 @@ class CP2KParser(ParserInterface):
try: try:
parser = parser_map[run_type] parser = parser_map[run_type]
except KeyError: except KeyError:
logger.exception("A parser corresponding to the run_type '{}' could not be found.".format(run_type)) logger.exception(
"A parser corresponding to the run_type '{}' could not be found."
.format(run_type)
)
raise raise
# Currently the version id is a pure integer, so it can directly be mapped # Currently the version id is a pure integer, so it can directly be mapped
...@@ -118,20 +125,32 @@ class CP2KParser(ParserInterface): ...@@ -118,20 +125,32 @@ class CP2KParser(ParserInterface):
base = "cp2kparser.versions.cp2k{}.{}".format(version_id, parser.lower()) base = "cp2kparser.versions.cp2k{}.{}".format(version_id, parser.lower())
parser_module = None parser_module = None
parser_class = None parser_class = None
try: try:
parser_module = importlib.import_module(base) parser_module = importlib.import_module(base)
except ImportError: except ImportError:
logger.warning("Could not find a parser for version '{}' and run type '{}'. Trying to default to the base implementation for CP2K 2.6.2".format(version_id, run_type)) logger.warning(
"Could not find a parser for version '{}' and run type '{}'. "
"Trying to default to the base implementation for CP2K 2.6.2"
.format(version_id, run_type)
)
base = "cp2kparser.versions.cp2k262.{}".format(parser.lower()) base = "cp2kparser.versions.cp2k262.{}".format(parser.lower())
try: try:
parser_module = importlib.import_module(base) parser_module = importlib.import_module(base)
except ImportError: except ImportError:
logger.exception("Tried to default to the CP2K 2.6.2 implementation but could not find the correct modules for run_type '{}'.".format(run_type)) logger.exception(
"Tried to default to the CP2K 2.6.2 implementation but "
"could not find the correct modules for run_type '{}'."
.format(run_type)
)
raise raise
try: try:
parser_class = getattr(parser_module, "CP2K{}".format(parser)) parser_class = getattr(parser_module, "CP2K{}".format(parser))
except AttributeError: except AttributeError:
logger.exception("A parser class '{}' could not be found in the module '[]'.".format(parser_class, parser_module)) logger.exception(
"A parser class '{}' could not be found in the module '[]'."
.format(parser_class, parser_module)
)
raise raise
self.main_parser = parser_class(self.parser_context.main_file, self.parser_context) self.main_parser = parser_class(self.parser_context.main_file, self.parser_context)
...@@ -24,7 +24,6 @@ from cp2kparser.generic.inputparsing import Section, Keyword, DefaultKeyword, Se ...@@ -24,7 +24,6 @@ from cp2kparser.generic.inputparsing import Section, Keyword, DefaultKeyword, Se
logger = logging logger = logging
#===============================================================================
def generate_object_tree(xml_file, for_metainfo=False): def generate_object_tree(xml_file, for_metainfo=False):
xml_element = ET.parse(xml_file) xml_element = ET.parse(xml_file)
...@@ -48,7 +47,6 @@ def generate_object_tree(xml_file, for_metainfo=False): ...@@ -48,7 +47,6 @@ def generate_object_tree(xml_file, for_metainfo=False):
return object_tree return object_tree
#===============================================================================
def recursive_tree_generation(xml_element, for_metainfo=False, name_stack=[], ignore=True): def recursive_tree_generation(xml_element, for_metainfo=False, name_stack=[], ignore=True):
# Make new section object for the root # Make new section object for the root
...@@ -199,7 +197,6 @@ def recursive_tree_generation(xml_element, for_metainfo=False, name_stack=[], ig ...@@ -199,7 +197,6 @@ def recursive_tree_generation(xml_element, for_metainfo=False, name_stack=[], ig
return section return section
#===============================================================================
def generate_input_metainfos(object_tree): def generate_input_metainfos(object_tree):
json_root = { json_root = {
...@@ -222,7 +219,6 @@ def generate_input_metainfos(object_tree): ...@@ -222,7 +219,6 @@ def generate_input_metainfos(object_tree):
f.write(json.dumps(json_root, indent=2, separators=(',', ': '))) f.write(json.dumps(json_root, indent=2, separators=(',', ': ')))
#===============================================================================
def generate_metainfo_recursively(obj, parent, container, name_stack): def generate_metainfo_recursively(obj, parent, container, name_stack):
json = None json = None
...@@ -245,7 +241,6 @@ def generate_metainfo_recursively(obj, parent, container, name_stack): ...@@ -245,7 +241,6 @@ def generate_metainfo_recursively(obj, parent, container, name_stack):
container.append(json) container.append(json)
#===============================================================================
def generate_input_object_metainfo_json(child, parent, name_stack): def generate_input_object_metainfo_json(child, parent, name_stack):
path = ".".join(name_stack) path = ".".join(name_stack)
# if path.startswith("."): # if path.startswith("."):
...@@ -283,7 +278,6 @@ def generate_input_object_metainfo_json(child, parent, name_stack): ...@@ -283,7 +278,6 @@ def generate_input_object_metainfo_json(child, parent, name_stack):
return json_obj return json_obj
#===============================================================================
def generate_section_metainfo_json(child, parent, name_stack): def generate_section_metainfo_json(child, parent, name_stack):
path = ".".join(name_stack[:-1]) path = ".".join(name_stack[:-1])
json_obj = {} json_obj = {}
...@@ -307,7 +301,6 @@ def generate_section_metainfo_json(child, parent, name_stack): ...@@ -307,7 +301,6 @@ def generate_section_metainfo_json(child, parent, name_stack):
return json_obj return json_obj
#===============================================================================
# Run main function by default # Run main function by default
if __name__ == "__main__": if __name__ == "__main__":
......
...@@ -12,7 +12,6 @@ from collections import defaultdict ...@@ -12,7 +12,6 @@ from collections import defaultdict
logger = logging.getLogger("nomad") logger = logging.getLogger("nomad")
#===============================================================================
class CP2KCommonParser(CommonParser): class CP2KCommonParser(CommonParser):
""" """
This class is used to store and instantiate common parts of the This class is used to store and instantiate common parts of the
......
...@@ -12,7 +12,6 @@ import logging ...@@ -12,7 +12,6 @@ import logging
logger = logging.getLogger("nomad") logger = logging.getLogger("nomad")
#===============================================================================
class CP2KGeoOptParser(MainHierarchicalParser): class CP2KGeoOptParser(MainHierarchicalParser):
"""Used to parse the CP2K calculation with run types: """Used to parse the CP2K calculation with run types:
-GEO_OPT/GEOMETRY_OPTIMIZATION -GEO_OPT/GEOMETRY_OPTIMIZATION
......
...@@ -11,7 +11,6 @@ from cp2kparser.generic.inputparsing import metainfo_data_prefix, metainfo_secti ...@@ -11,7 +11,6 @@ from cp2kparser.generic.inputparsing import metainfo_data_prefix, metainfo_secti
logger = logging.getLogger("nomad") logger = logging.getLogger("nomad")
#===============================================================================
class CP2KInputParser(AbstractBaseParser): class CP2KInputParser(AbstractBaseParser):
"""Used to parse out a CP2K input file. """Used to parse out a CP2K input file.
......
...@@ -13,7 +13,6 @@ import logging ...@@ -13,7 +13,6 @@ import logging
logger = logging.getLogger("nomad") logger = logging.getLogger("nomad")
#===============================================================================
class CP2KMDParser(MainHierarchicalParser): class CP2KMDParser(MainHierarchicalParser):
"""Used to parse the CP2K calculation with run types: """Used to parse the CP2K calculation with run types:
-MD -MD
......
...@@ -4,7 +4,6 @@ from nomadcore.baseclasses import AbstractBaseParser ...@@ -4,7 +4,6 @@ from nomadcore.baseclasses import AbstractBaseParser
logger = logging.getLogger("nomad") logger = logging.getLogger("nomad")
#===============================================================================
class CP2KSinglePointForceParser(AbstractBaseParser): class CP2KSinglePointForceParser(AbstractBaseParser):
"""Used to parse out a force file printed out by a CP2K single point """Used to parse out a force file printed out by a CP2K single point
calculation. It is not exactly an ZYX file, so here we define separate calculation. It is not exactly an ZYX file, so here we define separate
......
from __future__ import absolute_import from __future__ import absolute_import
from nomadcore.simple_parser import SimpleMatcher as SM from nomadcore.simple_parser import SimpleMatcher as SM
from nomadcore.baseclasses import MainHierarchicalParser from nomadcore.baseclasses import MainHierarchicalParser
from .singlepointforceparser import CP2KSinglePointForceParser from cp2kparser.versions.cp2k262.singlepointforceparser import CP2KSinglePointForceParser
from nomadcore.caching_backend import CachingLevel from nomadcore.caching_backend import CachingLevel
from .commonparser import CP2KCommonParser from cp2kparser.versions.cp2k262.commonparser import CP2KCommonParser
import logging import logging
logger = logging.getLogger("nomad") logger = logging.getLogger("nomad")
#===============================================================================
class CP2KSinglePointParser(MainHierarchicalParser): class CP2KSinglePointParser(MainHierarchicalParser):
"""The main parser class. Used to parse the CP2K calculation with run types: """The main parser class. Used to parse the CP2K calculation with run types:
-ENERGY -ENERGY
......
# Unit tests
This directory contains unit tests to evaluate the correctness of the parser in
a systematic way. Ideally each parsed metainfo should have at least one unit
test, and if the resulting values are predetermined, the available values
should all be tested individually. Also certain scenarios that should produce a
parsing error should be tested.
from pycallgraph import PyCallGraph
from pycallgraph.output import GraphvizOutput
from cp2kparser import CP2KParser
with PyCallGraph(output=GraphvizOutput()):
filepath = "/home/lauri/Dropbox/nomad-dev/nomad-lab-base/parsers/cp2k/test/unittests/cp2k_2.6.2/energy_force/unittest.out"
parser = CP2KParser(filepath)
parser.parse()
import cProfile
import pstats
from run_tests import get_results
def profile_energy_force():
"""Used to profile the CPU usage in parsing RUN_TYPE ENERGY_FORCE.
"""
profile = cProfile.Profile()
profile.run('get_results("energy_force", "section_run")')
stats = pstats.Stats(profile)
stats.strip_dirs()
stats.sort_stats("cumulative")
stats.print_stats(30)
if __name__ == "__main__":
profile_energy_force()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment