Commit 2560d3cb authored by Lauri Himanen's avatar Lauri Himanen

Broke the parsing of different CP2K run types into separate modules. The...

Broke the parsing of different CP2K run types into separate modules. The common functionality is now found in 'commonmatcher.py'. I think that this will make the code maintenance much easier than having everything in one huge file.
parent c45e41aa
......@@ -3,7 +3,7 @@ import re
import logging
from nomadcore.baseclasses import ParserInterface
from cp2kparser.versions.versionsetup import get_main_parser
logger = logging.getLogger(__name__)
logger = logging.getLogger("nomad")
#===============================================================================
......@@ -22,19 +22,31 @@ class CP2KParser(ParserInterface):
"""Setups the version by looking at the output file and the version
specified in it.
"""
# Search for the version specification and initialize a correct
# main parser for this version.
regex = re.compile(r" CP2K\| version string:\s+CP2K version ([\d\.]+)")
n_lines = 30
# Search for the CP2K version specification and the RUN_TYPE for the
# calculation. The correct and optimized parser is initialized based on
# this information.
regex_version = re.compile(r" CP2K\| version string:\s+CP2K version ([\d\.]+)")
regex_run_type = re.compile(r"\s+GLOBAL\| Run type\s+(.+)")
n_lines = 50
version_id = None
run_type = None
with open(self.parser_context.main_file, 'r') as outputfile:
for i_line in xrange(n_lines):
line = next(outputfile)
result = regex.match(line)
if result:
version_id = result.group(1).replace('.', '')
break
if not result:
logger.error("Could not find a version specification from the given main file.")
result_version = regex_version.match(line)
result_run_type = regex_run_type.match(line)
if result_version:
version_id = result_version.group(1).replace('.', '')
if result_run_type:
run_type = result_run_type.group(1)
if version_id is None:
msg = "Could not find a version specification from the given main file."
logger.exception(msg)
raise RuntimeError(msg)
if run_type is None:
msg = "Could not find a version specification from the given main file."
logger.exception(msg)
raise RuntimeError(msg)
# Setup the root folder to the fileservice that is used to access files
dirpath, filename = os.path.split(self.parser_context.main_file)
......@@ -44,7 +56,7 @@ class CP2KParser(ParserInterface):
# Setup the correct main parser based on the version id. If no match
# for the version is found, use the main parser for CP2K 2.6.2
self.main_parser = get_main_parser(version_id)(self.parser_context.main_file, self.parser_context)
self.main_parser = get_main_parser(version_id, run_type)(self.parser_context.main_file, self.parser_context)
def get_metainfo_filename(self):
return "cp2k.nomadmetainfo.json"
......
import re
import numpy as np
from nomadcore.simple_parser import SimpleMatcher as SM
from inputparser import CP2KInputParser
import logging
from nomadcore.simple_parser import extractOnCloseTriggers
logger = logging.getLogger("nomad")
#===============================================================================
class CommonMatcher(object):
"""
This class is used to store and instantiate common parts of the
hierarchical SimpleMatcher structure used in the parsing of a CP2K
output file.
"""
def __init__(self, parser_context):
# Repeating regex definitions
self.parser_context = parser_context
self.file_service = parser_context.file_service
self.regex_f = "-?\d+\.\d+(?:E(?:\+|-)\d+)?" # Regex for a floating point value
self.regex_i = "-?\d+" # Regex for an integer
def adHoc_cp2k_section_cell(self):
"""Used to extract the cell information.
"""
def wrapper(parser):
# Read the lines containing the cell vectors
a_line = parser.fIn.readline()
b_line = parser.fIn.readline()
c_line = parser.fIn.readline()
# Define the regex that extracts the components and apply it to the lines
regex_string = r" CELL\| Vector \w \[angstrom\]:\s+({0})\s+({0})\s+({0})".format(self.regex_f)
regex_compiled = re.compile(regex_string)
a_result = regex_compiled.match(a_line)
b_result = regex_compiled.match(b_line)
c_result = regex_compiled.match(c_line)
# Convert the string results into a 3x3 numpy array
cell = np.zeros((3, 3))
cell[0, :] = [float(x) for x in a_result.groups()]
cell[1, :] = [float(x) for x in b_result.groups()]
cell[2, :] = [float(x) for x in c_result.groups()]
# Push the results to the correct section
parser.backend.addArrayValues("simulation_cell", cell, unit="angstrom")
return wrapper
# SimpleMatcher for the header that is common to all run types
def header(self):
return SM(r" DBCSR\| Multiplication driver",
forwardMatch=True,
subMatchers=[
SM( r" DBCSR\| Multiplication driver",
sections=['cp2k_section_dbcsr'],
),
SM( r" \*\*\*\* \*\*\*\* \*\*\*\*\*\* \*\* PROGRAM STARTED AT\s+(?P<cp2k_run_start_date>\d{4}-\d{2}-\d{2}) (?P<cp2k_run_start_time>\d{2}:\d{2}:\d{2}.\d{3})",
sections=['cp2k_section_startinformation'],
),
SM( r" CP2K\|",
sections=['cp2k_section_programinformation'],
forwardMatch=True,
subMatchers=[
SM( r" CP2K\| version string:\s+(?P<program_version>[\w\d\W\s]+)"),
SM( r" CP2K\| source code revision number:\s+svn:(?P<cp2k_svn_revision>\d+)"),
]
),
SM( r" CP2K\| Input file name\s+(?P<cp2k_input_filename>.+$)",
sections=['cp2k_section_filenames'],
subMatchers=[
SM( r" GLOBAL\| Basis set file name\s+(?P<cp2k_basis_set_filename>.+$)"),
SM( r" GLOBAL\| Geminal file name\s+(?P<cp2k_geminal_filename>.+$)"),
SM( r" GLOBAL\| Potential file name\s+(?P<cp2k_potential_filename>.+$)"),
SM( r" GLOBAL\| MM Potential file name\s+(?P<cp2k_mm_potential_filename>.+$)"),
SM( r" GLOBAL\| Coordinate file name\s+(?P<cp2k_coordinate_filename>.+$)"),
]
),
SM( " CELL\|",
adHoc=self.adHoc_cp2k_section_cell(),
otherMetaInfo=["simulation_cell"]
),
SM( " DFT\|",
otherMetaInfo=["XC_functional", "self_interaction_correction_method"],
forwardMatch=True,
subMatchers=[
SM( " DFT\| Multiplicity\s+(?P<target_multiplicity>{})".format(self.regex_i)),
SM( " DFT\| Charge\s+(?P<total_charge>{})".format(self.regex_i)),
SM( " DFT\| Self-interaction correction \(SIC\)\s+(?P<self_interaction_correction_method>[^\n]+)"),
]
),
SM( " TOTAL NUMBERS AND MAXIMUM NUMBERS",
sections=["cp2k_section_total_numbers"],
subMatchers=[
SM( "\s+- Atoms:\s+(?P<number_of_atoms>\d+)"),
SM( "\s+- Shell sets:\s+(?P<cp2k_shell_sets>\d+)")
]
)
]
)
def onClose_section_method(self, backend, gIndex, section):
"""When all the functional definitions have been gathered, matches them
with the nomad correspondents and combines into one single string which
is put into the backend.
"""
# Transform the CP2K self-interaction correction string to the NOMAD
# correspondent, and push directly to the superBackend to avoid caching
sic_cp2k = section["self_interaction_correction_method"][0]
sic_map = {
"NO": "",
"AD SIC": "SIC_AD",
"Explicit Orbital SIC": "SIC_EXPLICIT_ORBITALS",
"SPZ/MAURI SIC": "SIC_MAURI_SPZ",
"US/MAURI SIC": "SIC_MAURI_US",
}
sic_nomad = sic_map.get(sic_cp2k)
if sic_nomad is not None:
backend.superBackend.addValue('self_interaction_correction_method', sic_nomad)
else:
logger.warning("Unknown self-interaction correction method used.")
def onClose_cp2k_section_filenames(self, backend, gIndex, section):
"""
"""
# If the input file is available, parse it
input_file = section["cp2k_input_filename"][0]
filepath = self.file_service.get_absolute_path_to_file(input_file)
if filepath is not None:
input_parser = CP2KInputParser(filepath, self.parser_context)
input_parser.parse()
else:
logger.warning("The input file of the calculation could not be found.")
def getOnCloseTriggers(self):
"""
Returns:
A dictionary containing a section name as a key, and a list of
trigger functions associated with closing that section.
"""
onClose = {}
for attr, callback in extractOnCloseTriggers(self).items():
onClose[attr] = [callback]
return onClose
"""Returns the main parser class based on the given version identifier. The
different version are grouped into subpackages.
"""
import importlib
import logging
logger = logging.getLogger("nomad")
def get_main_parser(version_id):
#===============================================================================
def get_main_parser(version_id, run_type):
"""
Setups a main parser class for this calculation. The main class can be
different for each version and run type.
Args:
version_id: An integer representing the CP2K version. The version
number is originally a string the form '2.6.2', but here the numbers
are just concatenated into a single integer number 262.
run_type: A string that identifies the RUN_TYPE for the calculation.
All the possible run types can be found in the CP2K reference manual.
Returns:
A python class that should be instantiated later with the correct
parameters.
"""
# Search for a RUN_TYPE specific parser
parser_map = {
"ENERGY": "SinglePointParser",
"ENERGY_FORCE": "SinglePointParser",
}
try:
parser = parser_map[run_type]
except KeyError:
logger.exception("A parser corresponding to the run_type '{}' could not be found.".format(run_type))
raise
# Currently the version id is a pure integer, so it can directly be mapped
# into a package name.
base = "cp2kparser.parsing.versions.cp2k{}.".format(version_id)
base = "cp2kparser.versions.cp2k{}.{}".format(version_id, parser.lower())
parser_module = None
parser_class = None
try:
main_parser = importlib.import_module(base + "mainparser").CP2KMainParser
parser_module = importlib.import_module(base)
except ImportError:
logger.debug("A parser with the version id '{}' could not be found. Defaulting to the base implementation based on CP2K 2.6.2.".format(version_id))
base = "cp2kparser.versions.cp2k262."
main_parser = importlib.import_module(base + "mainparser").CP2KMainParser
return main_parser
logger.warning("Could not find a parser for version '{}' and run type '{}'. Trying to default to the base implementation for CP2K 2.6.2".format(version_id, run_type))
base = "cp2kparser.versions.cp2k262.{}".format(parser.lower())
try:
parser_module = importlib.import_module(base)
except ImportError:
logger.exception("Tried to default to the CP2K 2.6.2 implementation but could not find the correct modules for run_type '{}'.".format(run_type))
raise
try:
parser_class = getattr(parser_module, "CP2K{}".format(parser))
except AttributeError:
logger.exception("A parser class '{}' could not be found in the module '[]'.".format(parser_class, parser_module))
raise
return parser_class
NONBONDED NEIGHBOR LISTS IN angstrom (PROCESS 0)
Atom-A X Y Z Atom-B X Y Z Cell(i,j,k) Distance ONFO VDW-scale EI-scale
7 1.357674 -1.357674 -1.357674 1 0.000000 0.000000 0.000000 0 0 0 2.3516
5 -1.357674 1.357674 -1.357674 1 0.000000 0.000000 0.000000 0 0 0 2.3516
8 -1.357674 -1.357674 1.357674 1 0.000000 0.000000 0.000000 0 0 0 2.3516
6 1.357674 1.357674 1.357674 1 0.000000 0.000000 0.000000 0 0 0 2.3516
6 1.357674 1.357674 1.357674 2 0.000000 2.715349 2.715349 0 0 0 2.3516
6 1.357674 1.357674 1.357674 3 2.715349 2.715349 0.000000 0 0 0 2.3516
6 1.357674 1.357674 1.357674 4 2.715349 0.000000 2.715349 0 0 0 2.3516
7 1.357674 -1.357674 -1.357674 3 2.715349 2.715349 0.000000 0 -1 0 2.3516
5 -1.357674 1.357674 -1.357674 3 2.715349 2.715349 0.000000 -1 0 0 2.3516
8 -1.357674 -1.357674 1.357674 4 2.715349 0.000000 2.715349 -1 0 0 2.3516
2 0.000000 2.715349 2.715349 8 -1.357674 -1.357674 1.357674 0 1 0 2.3516
3 2.715349 2.715349 0.000000 8 -1.357674 -1.357674 1.357674 1 1 0 2.3516
4 2.715349 0.000000 2.715349 7 1.357674 -1.357674 -1.357674 0 0 1 2.3516
2 0.000000 2.715349 2.715349 5 -1.357674 1.357674 -1.357674 0 0 1 2.3516
4 2.715349 0.000000 2.715349 5 -1.357674 1.357674 -1.357674 1 0 1 2.3516
2 0.000000 2.715349 2.715349 7 1.357674 -1.357674 -1.357674 0 1 1 2.3516
Total number of neighbor interactions for process 0: 16
&GLOBAL
PROJECT Si_bulk8
RUN_TYPE ENERGY_FORCE
PRINT_LEVEL HIGH
&END GLOBAL
&FORCE_EVAL
METHOD Quickstep
&SUBSYS
&KIND Si
ELEMENT Si
BASIS_SET DZVP-GTH-PADE
POTENTIAL GTH-PADE-q4
&END KIND
&CELL
A 5.430697500 0.000000000 0.000000000
B 0.000000000 5.430697500 0.000000000
C 0.000000000 0.000000000 5.430697500
&END CELL
&COORD
Si 0.000000000 0.000000000 0.000000000
Si 0.000000000 2.715348700 2.715348700
Si 2.715348700 2.715348700 0.000000000
Si 2.715348700 0.000000000 2.715348700
Si 4.073023100 1.357674400 4.073023100
Si 1.357674400 1.357674400 1.357674400
Si 1.357674400 4.073023100 4.073023100
Si 4.073023100 4.073023100 1.357674400
&END COORD
&END SUBSYS
&DFT
BASIS_SET_FILE_NAME ../../BASIS_SET
POTENTIAL_FILE_NAME ../../GTH_POTENTIALS
&QS
EPS_DEFAULT 1.0E-10
&END QS
&MGRID
NGRIDS 4
CUTOFF 300
REL_CUTOFF 60
&END MGRID
&XC
&XC_FUNCTIONAL PADE
&END XC_FUNCTIONAL
&END XC
&SCF
SCF_GUESS ATOMIC
EPS_SCF 1.0E-7
MAX_SCF 300
&DIAGONALIZATION ON
ALGORITHM STANDARD
&END DIAGONALIZATION
&MIXING T
METHOD BROYDEN_MIXING
ALPHA 0.4
NBROYDEN 8
&END MIXING
&END SCF
&END DFT
&PRINT
&FORCES ON
&END FORCES
&END PRINT
&END FORCE_EVAL
This diff is collapsed.
&GLOBAL
RUN_TYPE ENERGY_FORCE
PROJECT_NAME si_bulk
PRINT_LEVEL MEDIUM
&END GLOBAL
&FORCE_EVAL
METHOD Quickstep
&DFT
POTENTIAL_FILE_NAME ../../GTH_POTENTIALS
BASIS_SET_FILE_NAME ../../BASIS_SET
&QS
EPS_DEFAULT 1e-3
&END QS
&SCF
MAX_SCF 100
EPS_SCF 1e-03
SCF_GUESS ATOMIC
&PRINT
&RESTART OFF
BACKUP_COPIES 0
&END RESTART
&END PRINT
&END SCF
&XC
&XC_FUNCTIONAL LDA
&END XC_FUNCTIONAL
&END XC
&MGRID
CUTOFF 50
NGRIDS 2
REL_CUTOFF 25
&END MGRID
&END DFT
&SUBSYS
&COORD
Si 0.0 0.0 0.0
&END COORD
&CELL
A 5.4306975 0.0 0.0
C 0.0 0.0 5.4306975
B 0.0 5.4306975 0.0
PERIODIC XYZ
&END CELL
&KIND Si
POTENTIAL GTH-PADE-q4
BASIS_SET DZVP-GTH-PADE
&END KIND
&END SUBSYS
&END FORCE_EVAL
&GLOBAL
PROJECT Si_bulk8
RUN_TYPE ENERGY_FORCE
PRINT_LEVEL HIGH
&END GLOBAL
&FORCE_EVAL
METHOD Quickstep
&SUBSYS
&KIND Si
ELEMENT Si
BASIS_SET DZVP-GTH-PADE
POTENTIAL GTH-PADE-q4
&END KIND
&CELL
A 5.430697500 0.000000000 0.000000000
B 0.000000000 5.430697500 0.000000000
C 0.000000000 0.000000000 5.430697500
&END CELL
&COORD
Si 0.000000000 0.000000000 0.000000000
Si 0.000000000 2.715348700 2.715348700
Si 2.715348700 2.715348700 0.000000000
Si 2.715348700 0.000000000 2.715348700
Si 4.073023100 1.357674400 4.073023100
Si 1.357674400 1.357674400 1.357674400
Si 1.357674400 4.073023100 4.073023100
Si 4.073023100 4.073023100 1.357674400
&END COORD
&END SUBSYS
&DFT
BASIS_SET_FILE_NAME ../BASIS_SET
POTENTIAL_FILE_NAME ../GTH_POTENTIALS
&QS
EPS_DEFAULT 1.0E-10
&END QS
&MGRID
NGRIDS 4
CUTOFF 300
REL_CUTOFF 60
&END MGRID
&XC
&XC_FUNCTIONAL PADE
&END XC_FUNCTIONAL
&END XC
&SCF
SCF_GUESS ATOMIC
EPS_SCF 1.0E-7
MAX_SCF 300
&DIAGONALIZATION ON
ALGORITHM STANDARD
&END DIAGONALIZATION
&MIXING T
METHOD BROYDEN_MIXING
ALPHA 0.4
NBROYDEN 8
&END MIXING
&END SCF
&END DFT
&PRINT
&FORCES ON
&END FORCES
&END PRINT
&END FORCE_EVAL
This diff is collapsed.
&GLOBAL
PROJECT Si_bulk8
RUN_TYPE ENERGY_FORCE
PRINT_LEVEL HIGH
&END GLOBAL
&FORCE_EVAL
METHOD Quickstep
&SUBSYS
&KIND Si
ELEMENT Si
BASIS_SET DZVP-GTH-PADE
POTENTIAL GTH-PADE-q4
&END KIND
&CELL
A 5.430697500 0.000000000 0.000000000
B 0.000000000 5.430697500 0.000000000
C 0.000000000 0.000000000 5.430697500
&END CELL
&COORD
Si 0.000000000 0.000000000 0.000000000
Si 0.000000000 2.715348700 2.715348700
Si 2.715348700 2.715348700 0.000000000
Si 2.715348700 0.000000000 2.715348700
Si 4.073023100 1.357674400 4.073023100
Si 1.357674400 1.357674400 1.357674400
Si 1.357674400 4.073023100 4.073023100
Si 4.073023100 4.073023100 1.357674400
&END COORD
&END SUBSYS
&DFT
BASIS_SET_FILE_NAME ../BASIS_SET
POTENTIAL_FILE_NAME ../GTH_POTENTIALS
&QS
EPS_DEFAULT 1.0E-10
&END QS
&MGRID
NGRIDS 4
CUTOFF 300
REL_CUTOFF 60
&END MGRID
&XC
&XC_FUNCTIONAL PADE
&END XC_FUNCTIONAL
&END XC
&SCF
SCF_GUESS ATOMIC
EPS_SCF 1.0E-7
MAX_SCF 300
&DIAGONALIZATION ON
ALGORITHM STANDARD
&END DIAGONALIZATION
&MIXING T
METHOD BROYDEN_MIXING
ALPHA 0.4
NBROYDEN 8
&END MIXING
&END SCF
&END DFT
&PRINT
&FORCES ON
&END FORCES
&END PRINT
&END FORCE_EVAL
This diff is collapsed.
......@@ -47,6 +47,23 @@ def get_result(folder, metaname):
return result
#===============================================================================
class TestErrors(unittest.TestCase):
"""Test misc. error stuations which may occur during the parsing.
"""
def test_no_file(self):
self.assertRaises(IOError, get_result, "errors/no_file", "XC_functional")
def test_invalid_file(self):
self.assertRaises(RuntimeError, get_result, "errors/invalid_file", "XC_functional")
def test_invalid_run_type(self):
self.assertRaises(KeyError, get_result, "errors/invalid_run_type", "XC_functional")
def test_unknown_version(self):
get_result("errors/unknown_version", "XC_functional")
#===============================================================================
class TestXCFunctional(unittest.TestCase):
"""Tests that the XC functionals can be properly parsed.
......@@ -85,6 +102,21 @@ class TestXCFunctional(unittest.TestCase):
self.assertEqual(xc, "1*GGA_C_PBE+1*GGA_X_PBE")
#===============================================================================
class TestSCFConvergence(unittest.TestCase):
"""Tests whether the convergence status and number of SCF step can be
parsed correctly.
"""
def test_converged(self):
result = get_result("convergence/converged", "single_configuration_calculation_converged")
self.assertTrue(result)
def test_non_converged(self):
result = get_result("convergence/non_converged", "single_configuration_calculation_converged")
self.assertFalse(result)
#===============================================================================
class TestForceFiles(unittest.TestCase):
"""Tests that different force files that can be output, can actually be
......@@ -287,6 +319,22 @@ class TestEnergyForce(unittest.TestCase):
charge = self.results["total_charge"]
self.assertEqual(charge, 0)
def test_single_configuration_calculation_converged(self):
result = self.results["single_configuration_calculation_converged"]
self.assertTrue(result)
def test_scf_dft_number_of_iterations(self):
result = self.results["scf_dft_number_of_iterations"]