Commit fbbd0f41 authored by Lauri Himanen's avatar Lauri Himanen
Browse files

Added some part of the basis set information, added atomic kind information,...

Added some part of the basis set information, added atomic kind information, added other misc stuff.
parent 1070a2d1
......@@ -69,6 +69,7 @@ parser/parser-cp2k/cp2kparser/versions/**/input_data/*.html
parser/parser-cp2k/cp2kparser/tools/input_metainfo.json
test/unittests/BASIS_SET
test/unittests/GTH_POTENTIALS
test/unittests/cp2k_2.6.2/data
# Setuptools install folder
parser/parser-cp2k/cp2kparser.egg-info/
......@@ -76,6 +76,19 @@ class CP2KInput(object):
return None
return section
def get_section_list(self, path):
split_path = path.split("/")
last_section = split_path[-1]
split_path.pop()
section = self.root_section
for part in split_path:
section = section.get_subsection(part)
if not section:
message = "The CP2K input does not contain the section {}".format(path)
logger.warning(message)
return None
return section.get_subsection_list(last_section)
def get_keyword_and_section(self, path):
split_path = path.rsplit("/", 1)
keyword = split_path[1]
......@@ -350,6 +363,10 @@ class Section(object):
else:
logger.error("The subsection '{}' in '{}' does not exist.".format(name, self.name))
def get_subsection_list(self, name):
subsection = self.sections.get(name)
return subsection
def get_section_parameter(self):
"""Get the section parameter, or if not specified the lone keyword
value.
......
......@@ -29,6 +29,8 @@ class CommonMatcher(object):
self.regex_eol = "[^\n]+" # Regex for a single alphabetical letter
self.section_method_index = None
self.section_system_index = None
self.test_electronic_structure_method = "DFT"
self.basis_to_kind_mapping = []
#=======================================================================
# Cache levels
......@@ -38,6 +40,8 @@ class CommonMatcher(object):
'self_interaction_correction_method': CachingLevel.Cache,
'x_cp2k_section_programinformation': CachingLevel.ForwardAndCache,
'x_cp2k_section_quickstep_settings': CachingLevel.ForwardAndCache,
'x_cp2k_section_atomic_kind': CachingLevel.ForwardAndCache,
'x_cp2k_section_kind_basis_set': CachingLevel.ForwardAndCache,
}
#=======================================================================
......@@ -203,6 +207,51 @@ class CommonMatcher(object):
],
otherMetaInfo=["self_interaction_correction_method"],
),
SM( " DFT\+U\|",
adHoc=self.adHoc_dft_plus_u(),
),
SM( " QS\|",
forwardMatch=True,
subMatchers=[
SM( " QS\| Method:\s+{}".format(self.regex_word)),
SM( " QS\| Density plane wave grid type\s+{}".format(self.regex_eol)),
SM( " QS\| Number of grid levels:\s+{}".format(self.regex_i)),
SM( " QS\| Density cutoff \[a\.u\.\]:\s+{}".format(self.regex_f)),
SM( " QS\| Multi grid cutoff \[a\.u\.\]: 1\) grid level\s+{}".format(self.regex_f)),
SM( " QS\| 2\) grid level\s+{}".format(self.regex_f)),
SM( " QS\| 3\) grid level\s+{}".format(self.regex_f)),
SM( " QS\| 4\) grid level\s+{}".format(self.regex_f)),
SM( " QS\| Grid level progression factor:\s+{}".format(self.regex_f)),
SM( " QS\| Relative density cutoff \[a\.u\.\]:".format(self.regex_f)),
SM( " QS\| Consistent realspace mapping and integration"),
SM( " QS\| Interaction thresholds: eps_pgf_orb:\s+{}".format(self.regex_f)),
SM( " QS\| eps_filter_matrix:\s+{}".format(self.regex_f)),
SM( " QS\| eps_core_charge:\s+{}".format(self.regex_f)),
SM( " QS\| eps_rho_gspace:\s+{}".format(self.regex_f)),
SM( " QS\| eps_rho_rspace:\s+{}".format(self.regex_f)),
SM( " QS\| eps_gvg_rspace:\s+{}".format(self.regex_f)),
SM( " QS\| eps_ppl:\s+{}".format(self.regex_f)),
SM( " QS\| eps_ppnl:\s+{}".format(self.regex_f)),
],
),
SM( " ATOMIC KIND INFORMATION",
sections=["x_cp2k_section_atomic_kinds", "section_method_basis_set"],
subMatchers=[
SM( "\s+(?P<x_cp2k_kind_number>{0})\. Atomic kind: (?P<x_cp2k_kind_element_symbol>{1})\s+Number of atoms:\s+(?P<x_cp2k_kind_number_of_atoms>{1})".format(self.regex_i, self.regex_word),
repeats=True,
sections=["x_cp2k_section_atomic_kind", "x_cp2k_section_kind_basis_set"],
subMatchers=[
SM( " Orbital Basis Set\s+(?P<x_cp2k_kind_basis_set_name>{})".format(self.regex_word)),
SM( " Number of orbital shell sets:\s+(?P<x_cp2k_basis_set_number_of_orbital_shell_sets>{})".format(self.regex_i)),
SM( " Number of orbital shells:\s+(?P<x_cp2k_basis_set_number_of_orbital_shells>{})".format(self.regex_i)),
SM( " Number of primitive Cartesian functions:\s+(?P<x_cp2k_basis_set_number_of_primitive_cartesian_functions>{})".format(self.regex_i)),
SM( " Number of Cartesian basis functions:\s+(?P<x_cp2k_basis_set_number_of_cartesian_basis_functions>{})".format(self.regex_i)),
SM( " Number of spherical basis functions:\s+(?P<x_cp2k_basis_set_number_of_spherical_basis_functions>{})".format(self.regex_i)),
SM( " Norm type:\s+(?P<x_cp2k_basis_set_norm_type>{})".format(self.regex_i)),
]
)
]
),
SM( " Total number of",
forwardMatch=True,
sections=["x_cp2k_section_total_numbers"],
......@@ -245,7 +294,13 @@ class CommonMatcher(object):
SM( " max_diis:\s+{}".format(self.regex_i)),
SM( " eps_scf:\s+(?P<scf_threshold_energy_change>{})".format(self.regex_f)),
]
)
),
SM( " MP2\|",
adHoc=self.adHoc_mp2()
),
SM( " RI-RPA\|",
adHoc=self.adHoc_rpa()
),
]
)
......@@ -285,7 +340,32 @@ class CommonMatcher(object):
def onClose_x_cp2k_section_quickstep_settings(self, backend, gIndex, section):
backend.addValue("program_basis_set_type", "gaussian")
# backend.addValue("electronic_structure_method", "DFT")
backend.addValue("electronic_structure_method", self.test_electronic_structure_method)
def onClose_section_method_basis_set(self, backend, gIndex, section):
backend.addValue("method_basis_set_kind", "wavefunction")
backend.addValue("number_of_basis_sets_atom_centered", len(self.basis_to_kind_mapping))
backend.addArrayValues("mapping_section_method_basis_set_atom_centered", np.array(self.basis_to_kind_mapping))
def onClose_x_cp2k_section_atomic_kind(self, backend, gIndex, section):
kindID = backend.openSection("section_method_atom_kind")
basisID = backend.openSection("section_basis_set_atom_centered")
element_symbol = section.get_latest_value("x_cp2k_kind_element_symbol")
kind_number = section.get_latest_value("x_cp2k_kind_number")
basis_set_name = section.get_latest_value(["x_cp2k_section_kind_basis_set", "x_cp2k_kind_basis_set_name"])
atom_number = self.get_atomic_number(element_symbol)
kind_label = element_symbol + str(kind_number)
backend.addValue("method_atom_kind_atom_number", atom_number)
backend.addValue("method_atom_kind_label", kind_label)
backend.addValue("basis_set_atom_number", atom_number)
backend.addValue("basis_set_atom_centered_short_name", basis_set_name)
# Add the reference based mapping between basis and atomic kind
self.basis_to_kind_mapping.append([basisID, kindID])
backend.closeSection("section_basis_set_atom_centered", basisID)
backend.closeSection("section_method_atom_kind", kindID)
def onClose_x_cp2k_section_programinformation(self, backend, gIndex, section):
input_file = section.get_latest_value("x_cp2k_input_filename")
......@@ -401,12 +481,12 @@ class CommonMatcher(object):
"""
def wrapper(parser):
parser.fIn.readline()
eigenvalues = np.array([float(x) for x in parser.fIn.readline().split()][::-1])
eigenvalues = np.array([float(x) for x in parser.fIn.readline().split()])
parser.fIn.readline()
row1 = [float(x) for x in parser.fIn.readline().split()]
row2 = [float(x) for x in parser.fIn.readline().split()]
row3 = [float(x) for x in parser.fIn.readline().split()]
eigenvectors = np.fliplr(np.array([row1, row2, row3]))
eigenvectors = np.array([row1, row2, row3])
parser.backend.addArrayValues("x_cp2k_stress_tensor_eigenvalues", eigenvalues, unit="GPa")
parser.backend.addArrayValues("x_cp2k_stress_tensor_eigenvectors", eigenvectors)
return wrapper
......@@ -432,7 +512,7 @@ class CommonMatcher(object):
def wrapper(parser):
# Define the regex that extracts the information
regex_string = r"\s+\d+\s+\d+\s+(\w+)\s+\d+\s+({0})\s+({0})\s+({0})".format(self.regex_f)
regex_string = r"\s+\d+\s+(\d+)\s+(\w+)\s+\d+\s+({0})\s+({0})\s+({0})".format(self.regex_f)
regex_compiled = re.compile(regex_string)
match = True
......@@ -450,9 +530,9 @@ class CommonMatcher(object):
if result:
match = True
label = result.groups()[0]
label = result.groups()[1] + result.groups()[0]
labels.append(label)
coordinate = [float(x) for x in result.groups()[1:]]
coordinate = [float(x) for x in result.groups()[2:]]
coordinates.append(coordinate)
else:
match = False
......@@ -487,12 +567,28 @@ class CommonMatcher(object):
return wrapper
def adHoc_dft_plus_u(self):
def wrapper(parser):
self.test_electronic_structure_method = "DFT+U"
return wrapper
def adHoc_mp2(self):
def wrapper(parser):
self.test_electronic_structure_method = "MP2"
return wrapper
def adHoc_rpa(self):
def wrapper(parser):
self.test_electronic_structure_method = "RPA"
return wrapper
def debug(self):
def wrapper(parser):
print "FOUND"
print("FOUND")
return wrapper
#===========================================================================
# MISC functions
def getOnCloseTriggers(self):
"""
Returns:
......@@ -503,3 +599,42 @@ class CommonMatcher(object):
for attr, callback in extractOnCloseTriggers(self).items():
onClose[attr] = [callback]
return onClose
def get_atomic_number(self, symbol):
""" Returns the atomic number when given the atomic symbol.
Args:
symbol: atomic symbol as string
Returns:
The atomic number (number of protons) for the given symbol.
"""
chemical_symbols = [
'X', 'H', 'He', 'Li', 'Be',
'B', 'C', 'N', 'O', 'F',
'Ne', 'Na', 'Mg', 'Al', 'Si',
'P', 'S', 'Cl', 'Ar', 'K',
'Ca', 'Sc', 'Ti', 'V', 'Cr',
'Mn', 'Fe', 'Co', 'Ni', 'Cu',
'Zn', 'Ga', 'Ge', 'As', 'Se',
'Br', 'Kr', 'Rb', 'Sr', 'Y',
'Zr', 'Nb', 'Mo', 'Tc', 'Ru',
'Rh', 'Pd', 'Ag', 'Cd', 'In',
'Sn', 'Sb', 'Te', 'I', 'Xe',
'Cs', 'Ba', 'La', 'Ce', 'Pr',
'Nd', 'Pm', 'Sm', 'Eu', 'Gd',
'Tb', 'Dy', 'Ho', 'Er', 'Tm',
'Yb', 'Lu', 'Hf', 'Ta', 'W',
'Re', 'Os', 'Ir', 'Pt', 'Au',
'Hg', 'Tl', 'Pb', 'Bi', 'Po',
'At', 'Rn', 'Fr', 'Ra', 'Ac',
'Th', 'Pa', 'U', 'Np', 'Pu',
'Am', 'Cm', 'Bk', 'Cf', 'Es',
'Fm', 'Md', 'No', 'Lr'
]
atomic_numbers = {}
for Z, name in enumerate(chemical_symbols):
atomic_numbers[name] = Z
return atomic_numbers[symbol]
......@@ -64,7 +64,7 @@ class CP2KInputParser(BasicParser):
self.cache_service.add("vel_add_last")
self.cache_service.add("each_geo_opt")
self.cache_service.add("traj_add_last")
self.cache_service.add("electronic_structure_method")
# self.cache_service.add("electronic_structure_method")
def parse(self):
......@@ -219,6 +219,23 @@ class CP2KInputParser(BasicParser):
#=======================================================================
# See if some more exotic calculation is requested (e.g. MP2, DFT+U, GW, RPA)
# Search for a WF_CORRELATION section
# correlation = self.input_tree.get_section("FORCE_EVAL/DFT/XC/WF_CORRELATION")
# method = "DFT"
# if correlation.accessed:
# method = correlation.get_keyword_value_raw("METHOD")
# if method != "NONE":
# # Can't really decide which method used (MP2, RPA, GW)
# method = None
# # Search for DFT+U settings
# kinds = self.input_tree.get_section_list("FORCE_EVAL/SUBSYS/KIND")
# for kind in kinds:
# dft_u = kind.get_subsection("DFT_PLUS_U")
# if dft_u.accessed:
# method = "DFT+U"
# self.cache_service["electronic_structure_method"] = method
#=======================================================================
# Stress tensor calculation method
......
3
i = 1, E = -17.1638827717
O 0.0000004141 0.0000001855 -0.1099295744
H 0.0000000178 -0.7406066558 0.4944715801
H 0.0000000190 0.7406068464 0.4944717369
3
i = 2, E = -17.1638839665
O 0.0000000169 0.0000001909 -0.1091323465
H 0.0000000562 -0.7415786058 0.4930021571
H 0.0000000591 0.7415788439 0.4930023132
&GLOBAL
PROJECT H2O-rks-diag
PRINT_LEVEL low
RUN_TYPE GEO_OPT
&END GLOBAL
&MOTION
&GEO_OPT
OPTIMIZER BFGS
MAX_ITER 2
&END GEO_OPT
&END MOTION
&FORCE_EVAL
METHOD Quickstep
&DFT
BASIS_SET_FILE_NAME ../../data/GTH_BASIS_SETS
POTENTIAL_FILE_NAME ../../data/GTH_POTENTIALS
PLUS_U_METHOD mulliken_charges
&MGRID
CUTOFF 200
&END MGRID
&PRINT
&DFT_CONTROL_PARAMETERS
&END DFT_CONTROL_PARAMETERS
&END PRINT
&QS
EPS_DEFAULT 1.0E-8
MAP_CONSISTENT
&END QS
&SCF
EPS_SCF 1.0E-5
MAX_SCF 30
&OT OFF
MINIMIZER cg
&END OT
&OUTER_SCF OFF
EPS_SCF 1.0E-5
MAX_SCF 10
&END OUTER_SCF
&PRINT
&RESTART
&EACH
MD 1
QS_SCF 30
&END
ADD_LAST NUMERIC
FILENAME =RESTART-rks-diag
BACKUP_COPIES 0
&END RESTART
&END PRINT
SCF_GUESS atomic
&END SCF
&XC
&XC_FUNCTIONAL PBE
&END XC_FUNCTIONAL
&END XC
&END DFT
&SUBSYS
&CELL
ABC 4.0 4.0 4.0
&END CELL
&COORD
O 0.000 0.000 -0.111
H 0.000 -0.744 0.495
H 0.000 0.744 0.495
&END COORD
&KIND H
BASIS_SET DZVP-GTH
POTENTIAL GTH-PBE-q1
&END KIND
&KIND O
BASIS_SET DZVP-GTH
POTENTIAL GTH-PBE-q6
&DFT_PLUS_U
L 1
U_MINUS_J [eV] 2.0
&END DFT_PLUS_U
&END KIND
&PRINT
&KINDS
&END KINDS
&END PRINT
&END SUBSYS
&END FORCE_EVAL
&GLOBAL
PROJECT H2O-01
PRINT_LEVEL MEDIUM
RUN_TYPE ENERGY
&END GLOBAL
&FORCE_EVAL
METHOD Quickstep
&DFT
BASIS_SET_FILE_NAME ../../data/EMSL_BASIS_SETS
POTENTIAL_FILE_NAME ../../data/POTENTIAL
&MGRID
CUTOFF 300
REL_CUTOFF 40
&END MGRID
&POISSON
PERIODIC NONE
POISSON_SOLVER MT
&END POISSON
&QS
METHOD GAPW
EPS_DEFAULT 1.0E-12
&END QS
&SCF
SCF_GUESS ATOMIC
EPS_SCF 1.0E-5
MAX_SCF 1000
ADDED_MOS 1000 1000
&END SCF
&XC
&XC_FUNCTIONAL NONE
&END XC_FUNCTIONAL
&HF
FRACTION 1.0000000
&SCREENING
EPS_SCHWARZ 1.0E-7
&END SCREENING
&END HF
&WF_CORRELATION
METHOD DIRECT_CANONICAL
MEMORY 100.
&END
&END XC
! UKS
&END DFT
&SUBSYS
&CELL
ABC 7.0 7.0 7.0
PERIODIC NONE
&END CELL
&COORD
O 0.000000 0.000000 -0.111000
H 0.000000 -0.744000 0.495000
H 0.000000 0.744000 0.495000
&END COORD
&KIND H
BASIS_SET aug-cc-pVDZ
POTENTIAL ALL
&END KIND
&KIND O
BASIS_SET aug-cc-pVDZ
POTENTIAL ALL
&END KIND
&TOPOLOGY
&CENTER_COORDINATES
&END
&END TOPOLOGY
&END SUBSYS
&END FORCE_EVAL
3
O 0.000000 0.000000 -0.111000
H 0.000000 -0.744000 0.495000
H 0.000000 0.744000 0.495000
&GLOBAL
PROJECT RI_RPA_H2O
PRINT_LEVEL MEDIUM
RUN_TYPE ENERGY
&TIMINGS
THRESHOLD 0.01
&END
&END GLOBAL
&FORCE_EVAL
METHOD Quickstep
&DFT
BASIS_SET_FILE_NAME ../../data/HFX_BASIS
POTENTIAL_FILE_NAME ../../data/GTH_POTENTIALS
&MGRID
CUTOFF 100
REL_CUTOFF 20
&END MGRID
&POISSON
PERIODIC NONE
POISSON_SOLVER WAVELET
&END POISSON
&QS
METHOD GPW
EPS_DEFAULT 1.0E-15
EPS_PGF_ORB 1.0E-30
&END QS
&SCF
SCF_GUESS ATOMIC
EPS_SCF 1.0E-7
MAX_SCF 100
&PRINT
&RESTART OFF
&END
&END
&END SCF
&XC
&XC_FUNCTIONAL PBE
&END XC_FUNCTIONAL
&WF_CORRELATION
METHOD RI_RPA_GPW
&WFC_GPW
CUTOFF 100
REL_CUTOFF 20
&END
&RI_RPA
RPA_NUM_QUAD_POINTS 40
&HF
FRACTION 1.0000000
&SCREENING
EPS_SCHWARZ 1.0E-8
SCREEN_ON_INITIAL_P FALSE
&END SCREENING
&END HF
&END RI_RPA
MEMORY 200.
NUMBER_PROC 1
&END
&END XC
&END DFT
&SUBSYS
&CELL
ABC [angstrom] 8.000 8.000 8.000
PERIODIC NONE
&END CELL
&KIND H
BASIS_SET DZVP-GTH
RI_AUX_BASIS_SET RI_DZVP-GTH
POTENTIAL GTH-PBE-q1
&END KIND
&KIND O
BASIS_SET DZVP-GTH
RI_AUX_BASIS_SET RI_DZVP-GTH
POTENTIAL GTH-PBE-q6
&END KIND
&TOPOLOGY
COORD_FILE_NAME H2O_gas.xyz
COORD_FILE_FORMAT xyz
&CENTER_COORDINATES
&END
&END TOPOLOGY
&END SUBSYS
&END FORCE_EVAL
NONBONDED NEIGHBOR LISTS IN angstrom (PROCESS 1)
Atom-A X Y Z Atom-B X Y Z Cell(i,j,k) Distance ONFO VDW-scale EI-scale
7 1.357674 -1.357674 -1.357674 1 0.000000 0.000000 0.000000 0 0 0 2.3516
5 -1.357674 1.357674 -1.357674 1 0.000000 0.000000 0.000000 0 0 0 2.3516
8 -1.357674 -1.357674 1.357674 1 0.000000 0.000000 0.000000 0 0 0 2.3516
6 1.357674 1.357674 1.357674 1 0.000000 0.000000 0.000000 0 0 0 2.3516
6 1.357674 1.357674 1.357674 2 0.000000 2.715349 2.715349 0 0 0 2.3516
6 1.357674 1.357674 1.357674 3 2.715349 2.715349 0.000000 0 0 0 2.3516
6 1.357674 1.357674 1.357674 4 2.715349 0.000000 2.715349 0 0 0 2.3516
7 1.357674 -1.357674 -1.357674 3 2.715349 2.715349 0.000000 0 -1 0 2.3516
5 -1.357674 1.357674 -1.357674 3 2.715349 2.715349 0.000000 -1 0 0 2.3516
8 -1.357674 -1.357674 1.357674 4 2.715349 0.000000 2.715349 -1 0 0 2.3516
2 0.000000 2.715349 2.715349 8 -1.357674 -1.357674 1.357674 0 1 0 2.3516
3 2.715349 2.715349 0.000000 8 -1.357674 -1.357674 1.357674 1 1 0 2.3516
4 2.715349 0.000000 2.715349 7 1.357674 -1.357674 -1.357674 0 0 1 2.3516
2 0.000000 2.715349 2.715349 5 -1.357674 1.357674 -1.357674 0 0 1 2.3516
4 2.715349 0.000000 2.715349 5 -1.357674 1.357674 -1.357674 1 0 1 2.3516
2 0.000000 2.715349 2.715349 7 1.357674 -1.357674 -1.357674 0 1 1 2.3516
Total number of neighbor interactions for process 1: 16