Commit c718e95a authored by Lauri Himanen's avatar Lauri Himanen
Browse files

Restructured the folders and files to use a common project structure, updated readme.

parent 0d024a53
......@@ -6,14 +6,13 @@ This is the main repository of the [NOMAD](http://nomad-lab.eu) parser for
from cp2kparser import CP2KParser
import matplotlib.pyplot as mpl
# 1. Initialize a parser by giving a path to the CP2K output file and a list of
# default units
path = "path/to/main.file"
# 1. Initialize a parser with a set of default units.
default_units = ["eV"]
parser = CP2KParser(path, default_units=default_units)
parser = CP2KParser(default_units=default_units)
# 2. Parse
results = parser.parse()
# 2. Parse a file
path = "path/to/main.file"
results = parser.parse(path)
# 3. Query the results with using the id's created specifically for NOMAD.
scf_energies = results["energy_total_scf_iteration"]
......@@ -22,7 +21,7 @@ This is the main repository of the [NOMAD](http://nomad-lab.eu) parser for
```
# Installation
The code is python>=2.7 and python>=3.4 compatible. First download and install
The code is python 2 and python 3 compatible. First download and install
the nomadcore package:
```sh
......@@ -47,15 +46,9 @@ cd parser-cp2k
pip install -e .
```
# Advanced
# Notes
The parser is based on CP2K 2.6.2.
The parser is designed to support multiple versions of CP2K with a [DRY](https://en.wikipedia.org/wiki/Don%27t_repeat_yourself)
approach: The initial parser class is based on CP2K 2.6.2, and other versions
will be subclassed from it. By sublassing, all the previous functionality will
be preserved, new functionality can be easily created, and old functionality
overridden only where necesssary.
# Upload Folder Structure, File Naming and CP2K Settings
The CP2K input setting
[PRINT_LEVEL](https://manual.cp2k.org/trunk/CP2K_INPUT/GLOBAL.html#PRINT_LEVEL)
controls the amount of details that are outputted during the calculation. The
......@@ -66,12 +59,6 @@ they are located very deep inside some folder structure or outside the folder
where the output file is, the parser will not be able to locate them. For this
reason it is recommended to keep the upload structure as flat as possible.
## Testing
The regression tests for this parser are located in
**/cp2k/parser/parser-cp2k/cp2kparser/regtest**. You can run the tests by
running the run_tests.py file in one of the version directories.
## Notes for CP2K Developers
Here is a list of features/fixes that would make the parsing of CP2K results
easier:
- The pdb trajectory output doesn't seem to conform to the actual standard as
......
......@@ -5,20 +5,23 @@ import re
import logging
import importlib
from nomadcore.baseclasses import ParserInterface
# Needs to be imported in order for the importlib calls to work in python 2.7
import cp2kparser.versions.cp2k262.singlepointparser
logger = logging.getLogger("nomad")
#===============================================================================
class CP2KParser(ParserInterface):
"""This class handles the initial setup before any parsing can happen. It
determines which version of CP2K was used to generate the output and then
sets up a correct main parser.
After the implementation has been setup, you can parse the files with
After the implementation has been setup, you can parse files with
parse().
"""
def __init__(self, main_file, metainfo_to_keep=None, backend=None, default_units=None, metainfo_units=None, debug=False, log_level=logging.ERROR, store=True):
super(CP2KParser, self).__init__(main_file, metainfo_to_keep, backend, default_units, metainfo_units, debug, log_level, store)
def __init__(self, metainfo_to_keep=None, backend=None, default_units=None, metainfo_units=None, debug=False, log_level=logging.ERROR, store=True):
super(CP2KParser, self).__init__(metainfo_to_keep, backend, default_units, metainfo_units, debug, log_level, store)
def setup_version(self):
"""Setups the version by looking at the output file and the version
......@@ -84,10 +87,11 @@ class CP2KParser(ParserInterface):
Args:
version_id: An integer representing the CP2K version. The version
number is originally a string the form '2.6.2', but here the numbers
are just concatenated into a single integer number 262.
run_type: A string that identifies the RUN_TYPE for the calculation.
All the possible run types can be found in the CP2K reference manual.
number is originally a string the form '2.6.2', but here the
numbers are just concatenated into a single integer number 262.
run_type: A string that identifies the RUN_TYPE for the
calculation. All the possible run types can be found in the
CP2K reference manual.
Returns:
A python class that should be instantiated later with the correct
......@@ -110,7 +114,10 @@ class CP2KParser(ParserInterface):
try:
parser = parser_map[run_type]
except KeyError:
logger.exception("A parser corresponding to the run_type '{}' could not be found.".format(run_type))
logger.exception(
"A parser corresponding to the run_type '{}' could not be found."
.format(run_type)
)
raise
# Currently the version id is a pure integer, so it can directly be mapped
......@@ -118,20 +125,32 @@ class CP2KParser(ParserInterface):
base = "cp2kparser.versions.cp2k{}.{}".format(version_id, parser.lower())
parser_module = None
parser_class = None
try:
parser_module = importlib.import_module(base)
except ImportError:
logger.warning("Could not find a parser for version '{}' and run type '{}'. Trying to default to the base implementation for CP2K 2.6.2".format(version_id, run_type))
logger.warning(
"Could not find a parser for version '{}' and run type '{}'. "
"Trying to default to the base implementation for CP2K 2.6.2"
.format(version_id, run_type)
)
base = "cp2kparser.versions.cp2k262.{}".format(parser.lower())
try:
parser_module = importlib.import_module(base)
except ImportError:
logger.exception("Tried to default to the CP2K 2.6.2 implementation but could not find the correct modules for run_type '{}'.".format(run_type))
logger.exception(
"Tried to default to the CP2K 2.6.2 implementation but "
"could not find the correct modules for run_type '{}'."
.format(run_type)
)
raise
try:
parser_class = getattr(parser_module, "CP2K{}".format(parser))
except AttributeError:
logger.exception("A parser class '{}' could not be found in the module '[]'.".format(parser_class, parser_module))
logger.exception(
"A parser class '{}' could not be found in the module '[]'."
.format(parser_class, parser_module)
)
raise
self.main_parser = parser_class(self.parser_context.main_file, self.parser_context)
......@@ -24,7 +24,6 @@ from cp2kparser.generic.inputparsing import Section, Keyword, DefaultKeyword, Se
logger = logging
#===============================================================================
def generate_object_tree(xml_file, for_metainfo=False):
xml_element = ET.parse(xml_file)
......@@ -48,7 +47,6 @@ def generate_object_tree(xml_file, for_metainfo=False):
return object_tree
#===============================================================================
def recursive_tree_generation(xml_element, for_metainfo=False, name_stack=[], ignore=True):
# Make new section object for the root
......@@ -199,7 +197,6 @@ def recursive_tree_generation(xml_element, for_metainfo=False, name_stack=[], ig
return section
#===============================================================================
def generate_input_metainfos(object_tree):
json_root = {
......@@ -222,7 +219,6 @@ def generate_input_metainfos(object_tree):
f.write(json.dumps(json_root, indent=2, separators=(',', ': ')))
#===============================================================================
def generate_metainfo_recursively(obj, parent, container, name_stack):
json = None
......@@ -245,7 +241,6 @@ def generate_metainfo_recursively(obj, parent, container, name_stack):
container.append(json)
#===============================================================================
def generate_input_object_metainfo_json(child, parent, name_stack):
path = ".".join(name_stack)
# if path.startswith("."):
......@@ -283,7 +278,6 @@ def generate_input_object_metainfo_json(child, parent, name_stack):
return json_obj
#===============================================================================
def generate_section_metainfo_json(child, parent, name_stack):
path = ".".join(name_stack[:-1])
json_obj = {}
......@@ -307,7 +301,6 @@ def generate_section_metainfo_json(child, parent, name_stack):
return json_obj
#===============================================================================
# Run main function by default
if __name__ == "__main__":
......
......@@ -12,7 +12,6 @@ from collections import defaultdict
logger = logging.getLogger("nomad")
#===============================================================================
class CP2KCommonParser(CommonParser):
"""
This class is used to store and instantiate common parts of the
......
......@@ -12,7 +12,6 @@ import logging
logger = logging.getLogger("nomad")
#===============================================================================
class CP2KGeoOptParser(MainHierarchicalParser):
"""Used to parse the CP2K calculation with run types:
-GEO_OPT/GEOMETRY_OPTIMIZATION
......
......@@ -11,7 +11,6 @@ from cp2kparser.generic.inputparsing import metainfo_data_prefix, metainfo_secti
logger = logging.getLogger("nomad")
#===============================================================================
class CP2KInputParser(AbstractBaseParser):
"""Used to parse out a CP2K input file.
......
......@@ -13,7 +13,6 @@ import logging
logger = logging.getLogger("nomad")
#===============================================================================
class CP2KMDParser(MainHierarchicalParser):
"""Used to parse the CP2K calculation with run types:
-MD
......
......@@ -4,7 +4,6 @@ from nomadcore.baseclasses import AbstractBaseParser
logger = logging.getLogger("nomad")
#===============================================================================
class CP2KSinglePointForceParser(AbstractBaseParser):
"""Used to parse out a force file printed out by a CP2K single point
calculation. It is not exactly an ZYX file, so here we define separate
......
from __future__ import absolute_import
from nomadcore.simple_parser import SimpleMatcher as SM
from nomadcore.baseclasses import MainHierarchicalParser
from .singlepointforceparser import CP2KSinglePointForceParser
from cp2kparser.versions.cp2k262.singlepointforceparser import CP2KSinglePointForceParser
from nomadcore.caching_backend import CachingLevel
from .commonparser import CP2KCommonParser
from cp2kparser.versions.cp2k262.commonparser import CP2KCommonParser
import logging
logger = logging.getLogger("nomad")
#===============================================================================
class CP2KSinglePointParser(MainHierarchicalParser):
"""The main parser class. Used to parse the CP2K calculation with run types:
-ENERGY
......
# Unit tests
This directory contains unit tests to evaluate the correctness of the parser in
a systematic way. Ideally each parsed metainfo should have at least one unit
test, and if the resulting values are predetermined, the available values
should all be tested individually. Also certain scenarios that should produce a
parsing error should be tested.
from pycallgraph import PyCallGraph
from pycallgraph.output import GraphvizOutput
from cp2kparser import CP2KParser
with PyCallGraph(output=GraphvizOutput()):
filepath = "/home/lauri/Dropbox/nomad-dev/nomad-lab-base/parsers/cp2k/test/unittests/cp2k_2.6.2/energy_force/unittest.out"
parser = CP2KParser(filepath)
parser.parse()
import cProfile
import pstats
from run_tests import get_results
def profile_energy_force():
"""Used to profile the CPU usage in parsing RUN_TYPE ENERGY_FORCE.
"""
profile = cProfile.Profile()
profile.run('get_results("energy_force", "section_run")')
stats = pstats.Stats(profile)
stats.strip_dirs()
stats.sort_stats("cumulative")
stats.print_stats(30)
if __name__ == "__main__":
profile_energy_force()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment