Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
nomad-lab
parser-cp2k
Commits
e9092f06
Commit
e9092f06
authored
Dec 02, 2015
by
Lauri Himanen
Browse files
Started doing the cell parsing, etc.
parent
8341d560
Changes
10
Hide whitespace changes
Inline
Side-by-side
cp2kparser/engines/atomsengine.py
View file @
e9092f06
import
ase.io
import
ase.io
import
logging
import
logging
import
MDAnalysis
logger
=
logging
.
getLogger
(
__name__
)
logger
=
logging
.
getLogger
(
__name__
)
...
@@ -9,39 +8,24 @@ class AtomsEngine(object):
...
@@ -9,39 +8,24 @@ class AtomsEngine(object):
"""Used to parse various different atomic coordinate files.
"""Used to parse various different atomic coordinate files.
See the dictionary 'formats' for all the supported formats and a brief
See the dictionary 'formats' for all the supported formats and a brief
explanation.
explanation.Reading is primarily done by ASE or MDAnalysis, but in some cases own
implementation is used. Returns all coordinates as numpy arrays.
Reading is primarily done by ASE or MDAnalysis, but in some cases own
implementation had to be made.
Returns all coordinates as numpy arrays.
"""
"""
formats
=
{
formats
=
{
"xyz"
:
"
"
,
"xyz"
:
"(.xyz): The XYZ file format.
"
,
"cif"
:
"(.cif): Crystallographic Information File"
,
"cif"
:
"(.cif): Crystallographic Information File"
,
"pdb
-cp2k"
:
"(.pdb): Protein Data Bank
file written by CP2K, the format is a bit peculiar so a custom implementation is used
"
,
"pdb
"
:
"(.pdb): Protein Data Bank"
,
"pdb"
:
"(.pdb): Protein Data Bank
"
,
#"dcd": "(.dcd): Binary trajectory file format used by CHARMM, NAMD, and X-PLOR.
",
}
}
def
__init__
(
self
,
parser
):
"""
Args:
cp2k_parser: Instance of a NomadParser or it's subclass. Allows
access to e.g. unified file reading methods.
"""
self
.
parser
=
parser
def
determine_tool
(
self
,
format
):
def
determine_tool
(
self
,
format
):
"""Determines which tool to use for extracting trajectories in the
"""Determines which tool to use for extracting trajectories in the
given format.
given format.
"""
"""
ASE
=
"ASE"
custom
=
"custom"
formats
=
{
formats
=
{
"xyz"
:
ASE
,
"xyz"
:
"ASE"
,
"cif"
:
ASE
,
"cif"
:
"ASE"
,
"pdb-cp2k"
:
custom
,
"pdb"
:
"ASE"
,
"pdb"
:
ASE
,
}
}
result
=
formats
.
get
(
format
)
result
=
formats
.
get
(
format
)
if
result
:
if
result
:
...
@@ -59,15 +43,15 @@ class AtomsEngine(object):
...
@@ -59,15 +43,15 @@ class AtomsEngine(object):
else
:
else
:
return
True
return
True
def
n_atoms
(
self
,
contents
,
format
):
def
n_atoms
(
self
,
file_handle
,
format
):
"""Read the first configuration of the coordinate file to extract the
"""Read the first configuration of the coordinate file to extract the
number of atoms in it.
number of atoms in it.
"""
"""
iterator
=
self
.
iread
(
contents
,
format
)
iterator
=
self
.
iread
(
file_handle
,
format
)
pos
=
iterator
.
next
()
pos
=
iterator
.
next
()
return
pos
.
shape
[
0
]
return
pos
.
shape
[
0
]
def
iread
(
self
,
contents
,
format
,
index
=
0
):
def
iread
(
self
,
file_handle
,
format
,
index
=
0
):
"""Returns an iterator that goes through the given trajectory file one
"""Returns an iterator that goes through the given trajectory file one
configuration at a time. Good for e.g. streaming the contents to disc as the
configuration at a time. Good for e.g. streaming the contents to disc as the
whole file doesn't have to be loaded into memory.
whole file doesn't have to be loaded into memory.
...
@@ -76,22 +60,35 @@ class AtomsEngine(object):
...
@@ -76,22 +60,35 @@ class AtomsEngine(object):
if
not
self
.
check_format_support
(
format
):
if
not
self
.
check_format_support
(
format
):
return
return
if
file_handle
is
None
:
print
"NONE"
tool
=
self
.
determine_tool
(
format
)
tool
=
self
.
determine_tool
(
format
)
if
tool
==
"ASE"
:
return
self
.
ase_iread
(
file_handle
,
format
,
index
)
elif
tool
==
"custom"
:
return
self
.
custom_iread
(
file_handle
,
format
,
index
)
elif
tool
==
"MDAnalysis"
:
return
self
.
mdanalysis_iread
(
file_handle
,
format
,
index
)
def
ase_iread
(
self
,
file_handle
,
format
,
index
):
"""
"""
# After reading the ASE source code, it seems that the ASE iread does
# After reading the ASE source code, it seems that the ASE iread does
# actually read the entire file into memory and the yields the
# actually read the entire file into memory and the yields the
# configurations from it. Should be checked at some point.
# configurations from it. Should be checked at some point.
if
tool
==
"ASE"
:
def
ase_generator
(
iterator
):
iterator
=
ase
.
io
.
iread
(
contents
,
format
=
format
)
"""Used to wrap an iterator returned by ase.io.iread so that it returns
return
self
.
ase_wrapper
(
iterator
)
the positions instead of the ase.Atoms object.
elif
tool
==
"custom"
:
"""
if
format
==
"pdb-cp2k"
:
for
value
in
iterator
:
iterator
=
self
.
parser
.
csvengine
.
iread
(
contents
,
columns
=
[
3
,
4
,
5
],
comments
=
[
"TITLE"
,
"AUTHOR"
,
"REMARK"
,
"CRYST"
],
separator
=
"END"
)
yield
value
.
get_positions
()
return
iterator
iterator
=
ase
.
io
.
iread
(
file_handle
,
format
=
format
)
return
ase_generator
(
iterator
)
def
ase_wrapper
(
self
,
iterator
):
def
custom_iread
(
self
,
file_handle
,
format
,
index
):
"""Used to wrap an iterator returned by ase.io.iread so that it returns
"""
the positions instead of the ase.Atoms object.
"""
"""
for
value
in
iterator
:
pass
yield
value
.
get_positions
()
cp2kparser/engines/cp2kinputengine.py
View file @
e9092f06
...
@@ -64,6 +64,9 @@ class CP2KInputEngine(object):
...
@@ -64,6 +64,9 @@ class CP2KInputEngine(object):
path
+=
'/'
path
+=
'/'
path
+=
item
path
+=
item
# Mark the section as accessed.
self
.
input_tree
.
set_section_accessed
(
path
)
# Save the section parameters
# Save the section parameters
if
len
(
parts
)
>
1
:
if
len
(
parts
)
>
1
:
self
.
input_tree
.
set_parameter
(
path
,
parts
[
1
].
strip
())
self
.
input_tree
.
set_parameter
(
path
,
parts
[
1
].
strip
())
...
...
cp2kparser/engines/cp2kinputenginedata/cp2k_262/cp2k_input_tree.pickle
View file @
e9092f06
No preview for this file type
cp2kparser/engines/cp2kinputenginedata/input_tree.py
View file @
e9092f06
...
@@ -5,114 +5,141 @@ because the pickling of these classes is wrong if they are defined in the same
...
@@ -5,114 +5,141 @@ because the pickling of these classes is wrong if they are defined in the same
file which is run in console (module will be then __main__).
file which is run in console (module will be then __main__).
"""
"""
from
collections
import
defaultdict
from
collections
import
defaultdict
import
logging
logger
=
logging
.
getLogger
(
__name__
)
#===============================================================================
#===============================================================================
class
Keyword
(
object
):
class
Root
(
object
):
"""Information about a keyword in a CP2K calculation.
"""
def
__init__
(
self
,
default_name
,
default_value
):
self
.
value
=
None
self
.
default_name
=
default_name
self
.
default_value
=
default_value
def
__init__
(
self
,
root_section
):
self
.
root_section
=
root_section
#===============================================================================
def
set_parameter
(
self
,
path
,
value
):
class
Section
(
object
):
parameter
,
section
=
self
.
get_parameter_and_section
(
path
)
"""An input section in a CP2K calculation.
parameter
.
value
=
value
"""
def
__init__
(
self
,
name
):
def
set_keyword
(
self
,
path
,
value
):
self
.
name
=
name
keyword
,
section
=
self
.
get_keyword_and_section
(
path
)
self
.
keywords
=
defaultdict
(
list
)
if
keyword
and
section
:
self
.
default_keyword
=
""
keyword
.
value
=
value
self
.
parameter
=
None
elif
section
is
not
None
:
self
.
sections
=
defaultdict
(
list
)
# print "Saving default keyword at path '{}'".format(path)
split_path
=
path
.
rsplit
(
"/"
,
1
)
keyword
=
split_path
[
1
]
section
.
default_keyword
+=
keyword
+
" "
+
value
+
"
\n
"
def
get_section
(
self
,
path
):
def
get_section
(
self
,
path
):
split_path
=
path
.
split
(
"/"
)
split_path
=
path
.
split
(
"/"
)
section
=
self
section
=
self
.
root_section
for
part
in
split_path
:
for
part
in
split_path
:
section
=
section
.
sections
.
get
(
part
)
section
=
section
.
get_subsection
(
part
)
if
section
:
if
not
section
:
if
len
(
section
)
==
1
:
print
"Error in getting section at path '{}'."
.
format
(
path
)
section
=
section
[
0
]
else
:
# print "The subsection '{}' is repeated. Not yet supported.".format(path)
return
None
else
:
# print "Subsection '{}' does not exist in section '{}'".format(path, self.name)
return
None
return
None
return
section
return
section
def
get_keyword_
object
(
self
,
path
):
def
get_keyword_
and_section
(
self
,
path
):
split_path
=
path
.
rsplit
(
"/"
,
1
)
split_path
=
path
.
rsplit
(
"/"
,
1
)
keyword
=
split_path
[
1
]
keyword
=
split_path
[
1
]
section_path
=
split_path
[
0
]
section_path
=
split_path
[
0
]
section
=
self
.
get_section
(
section_path
)
section
=
self
.
get_section
(
section_path
)
keyword
=
section
.
keyword
s
.
get
(
keyword
)
keyword
=
section
.
get_
keyword
(
keyword
)
if
keyword
:
if
keyword
and
section
:
if
len
(
keyword
)
==
1
:
return
(
keyword
,
section
)
return
keyword
[
0
]
elif
section
:
# print "The keyword in '{}' does not exist or has too many entries.".format(path
)
return
(
None
,
section
)
return
None
return
(
None
,
None
)
def
get_keyword
(
self
,
path
):
def
get_keyword
(
self
,
path
):
"""Returns the keyword that is specified by the given path.
"""Returns the keyword that is specified by the given path.
If the keyword has no value set, returns the default value defined in
If the keyword has no value set, returns the default value defined in
the XML.
the XML.
"""
"""
keyword
=
self
.
get_keyword_
object
(
path
)
keyword
,
section
=
self
.
get_keyword_
and_section
(
path
)
if
keyword
:
if
keyword
:
if
keyword
.
value
is
not
None
:
if
keyword
.
value
is
not
None
:
return
keyword
.
value
return
keyword
.
value
else
:
else
:
return
keyword
.
default_value
if
section
.
accessed
:
return
keyword
.
default_value
def
get_default_keyword
(
self
,
path
):
def
get_default_keyword
(
self
,
path
):
return
self
.
get_section
(
path
).
default_keyword
return
self
.
get_section
(
path
).
default_keyword
def
set_keyword
(
self
,
path
,
value
):
def
set_section_accessed
(
self
,
path
):
keyword
=
self
.
get_keyword_object
(
path
)
section
=
self
.
get_section
(
path
)
if
keyword
:
section
.
accessed
=
True
keyword
.
value
=
value
else
:
# print "Saving default keyword at path '{}'".format(path)
split_path
=
path
.
rsplit
(
"/"
,
1
)
keyword
=
split_path
[
1
]
section_path
=
split_path
[
0
]
section
=
self
.
get_section
(
section_path
)
section
.
default_keyword
+=
keyword
+
" "
+
value
+
"
\n
"
def
get_keyword_default
(
self
,
path
):
def
get_keyword_default
(
self
,
path
):
keyword
=
self
.
get_keyword_
object
(
path
)
keyword
,
section
=
self
.
get_keyword_
and_section
(
path
)
if
keyword
:
if
keyword
:
return
keyword
.
default_value
return
keyword
.
default_value
def
get_parameter_
object
(
self
,
path
):
def
get_parameter_
and_section
(
self
,
path
):
section
=
self
.
get_section
(
path
)
section
=
self
.
get_section
(
path
)
parameter
=
section
.
parameter
parameter
=
section
.
parameter
if
parameter
:
return
(
parameter
,
section
)
return
parameter
else
:
print
"The section parameters object '{}' could not be found."
.
format
(
path
)
def
get_parameter
(
self
,
path
):
def
get_parameter
(
self
,
path
):
parameter
=
self
.
get_parameter_object
(
path
)
parameter
,
section
=
self
.
get_parameter_and_section
(
path
)
return
parameter
.
value
if
parameter
:
if
parameter
.
value
:
return
parameter
.
value
elif
section
and
section
.
accessed
:
return
parameter
.
lone_value
def
set_parameter
(
self
,
path
,
value
):
parameter
=
self
.
get_parameter_object
(
path
)
parameter
.
value
=
value
def
get_parameter_lone
(
self
,
path
):
# def get_parameter_lone(self, path):
parameter
=
self
.
get_parameter_object
(
path
)
# parameter = self.get_parameter_object(path)
return
parameter
.
lone_value
# return parameter.lone_value
# def get_parameter_default(self, path):
# parameter = self.get_parameter_object(path)
# return parameter.default_value
#===============================================================================
class
Keyword
(
object
):
"""Information about a keyword in a CP2K calculation.
"""
def
__init__
(
self
,
default_name
,
default_value
):
self
.
value
=
None
self
.
default_name
=
default_name
self
.
default_value
=
default_value
def
get_parameter_default
(
self
,
path
):
#===============================================================================
parameter
=
self
.
get_parameter_object
(
path
)
class
Section
(
object
):
return
parameter
.
default_value
"""An input section in a CP2K calculation.
"""
def
__init__
(
self
,
name
):
self
.
accessed
=
False
self
.
name
=
name
self
.
keywords
=
defaultdict
(
list
)
self
.
default_keyword
=
""
self
.
parameter
=
None
self
.
sections
=
defaultdict
(
list
)
def
get_keyword
(
self
,
name
):
keyword
=
self
.
keywords
.
get
(
name
)
if
keyword
:
if
len
(
keyword
)
==
1
:
return
keyword
[
0
]
else
:
logger
.
error
(
"The keyword '{}' in '{}' does not exist or has too many entries."
.
format
(
name
,
self
.
name
))
def
get_subsection
(
self
,
name
):
subsection
=
self
.
sections
.
get
(
name
)
if
subsection
:
if
len
(
subsection
)
==
1
:
return
subsection
[
0
]
else
:
logger
.
error
(
"The subsection '{}' in '{}' has too many entries."
.
format
(
name
,
self
.
name
))
else
:
logger
.
error
(
"The subsection '{}' in '{}' does not exist."
.
format
(
name
,
self
.
name
))
#===============================================================================
#===============================================================================
...
...
cp2kparser/engines/cp2kinputenginedata/xmlpreparser.py
View file @
e9092f06
...
@@ -85,7 +85,7 @@ def recursive_tree_generation(xml_element):
...
@@ -85,7 +85,7 @@ def recursive_tree_generation(xml_element):
# Run main function by default
# Run main function by default
if
__name__
==
"__main__"
:
if
__name__
==
"__main__"
:
xml_file
=
open
(
"./cp2k_262/cp2k_input.xml"
,
'r'
)
xml_file
=
open
(
"./cp2k_262/cp2k_input.xml"
,
'r'
)
object_tree
=
generate_object_tree
(
xml_file
)
object_tree
=
Root
(
generate_object_tree
(
xml_file
)
)
file_name
=
"./cp2k_262/cp2k_input_tree.pickle"
file_name
=
"./cp2k_262/cp2k_input_tree.pickle"
fh
=
open
(
file_name
,
"wb"
)
fh
=
open
(
file_name
,
"wb"
)
pickle
.
dump
(
object_tree
,
fh
,
protocol
=
2
)
pickle
.
dump
(
object_tree
,
fh
,
protocol
=
2
)
cp2kparser/engines/csvengine.py
View file @
e9092f06
...
@@ -88,9 +88,7 @@ class CSVEngine(object):
...
@@ -88,9 +88,7 @@ class CSVEngine(object):
# Start iterating
# Start iterating
configuration
=
[]
configuration
=
[]
print
contents
.
name
for
line
in
contents
:
# This actually reads line by line and only keeps the current line in memory
for
line
in
contents
:
# This actually reads line by line and only keeps the current line in memory
print
line
# If separator encountered, yield the stored configuration
# If separator encountered, yield the stored configuration
if
is_separator
(
line
):
if
is_separator
(
line
):
...
...
cp2kparser/generics/nomadparser.py
View file @
e9092f06
...
@@ -82,6 +82,7 @@ class NomadParser(object):
...
@@ -82,6 +82,7 @@ class NomadParser(object):
self
.
metainfo_to_keep
=
None
self
.
metainfo_to_keep
=
None
self
.
metainfo_to_skip
=
None
self
.
metainfo_to_skip
=
None
self
.
file_ids
=
{}
self
.
file_ids
=
{}
self
.
results
=
{}
self
.
filepaths_wo_id
=
None
self
.
filepaths_wo_id
=
None
self
.
test_mode
=
test_mode
self
.
test_mode
=
test_mode
self
.
backend
=
JsonParseEventsWriterBackend
(
None
,
stream
)
self
.
backend
=
JsonParseEventsWriterBackend
(
None
,
stream
)
...
@@ -178,17 +179,23 @@ class NomadParser(object):
...
@@ -178,17 +179,23 @@ class NomadParser(object):
Checks through the list given by get_supported_quantities and also
Checks through the list given by get_supported_quantities and also
checks the metainfoToSkip parameter given in the JSON input.
checks the metainfoToSkip parameter given in the JSON input.
"""
"""
if
name
not
in
self
.
metainfos
:
logger
.
error
(
"The metaname '{}' was not declared on the metainfo file defined in the JSON input."
.
format
(
name
))
return
False
if
name
not
in
self
.
get_supported_quantities
():
if
name
not
in
self
.
get_supported_quantities
():
logger
.
error
(
"The metaname '{}' is not available in this parser version."
.
format
(
name
))
return
False
return
False
if
name
in
self
.
metainfo_to_skip
:
if
name
in
self
.
metainfo_to_skip
:
logger
.
error
(
"The metaname '{}' cannot be calculated as it is in the list 'metaInfoToSkip'."
.
format
(
name
))
logger
.
error
(
"The metaname '{}' cannot be calculated as it is in the list 'metaInfoToSkip'."
.
format
(
name
))
return
False
return
False
return
True
return
True
def
parse
(
self
):
"""Start parsing the contents.
"""
# Determine which values in metainfo are parseable
metainfos
=
self
.
metainfos
.
itervalues
()
for
metainfo
in
metainfos
:
name
=
metainfo
[
"name"
]
if
self
.
check_quantity_availability
(
name
):
self
.
parse_quantity
(
name
)
def
parse_quantity
(
self
,
name
):
def
parse_quantity
(
self
,
name
):
"""Given a unique quantity id (=metaInfo name) which is supported by
"""Given a unique quantity id (=metaInfo name) which is supported by
the parser, parses the corresponding quantity (if available), converts
the parser, parses the corresponding quantity (if available), converts
...
@@ -202,7 +209,8 @@ class NomadParser(object):
...
@@ -202,7 +209,8 @@ class NomadParser(object):
if
not
available
:
if
not
available
:
return
return
result
=
self
.
start_parsing
(
name
)
# Get the result by parsing or from cache
result
=
self
.
get_result_object
(
name
)
if
result
is
not
None
:
if
result
is
not
None
:
if
isinstance
(
result
,
Result
):
if
isinstance
(
result
,
Result
):
...
@@ -215,10 +223,10 @@ class NomadParser(object):
...
@@ -215,10 +223,10 @@ class NomadParser(object):
self
.
result_saver
(
result
)
self
.
result_saver
(
result
)
# In test mode just return the values directly
# In test mode just return the values directly
else
:
else
:
if
result
.
value
is
not
None
:
if
result
.
value
is
not
None
:
if
result
.
value_iterable
is
None
:
if
result
.
value_iterable
is
None
:
return
result
.
value
return
result
.
value
if
result
.
value_iterable
is
not
None
:
el
if
result
.
value_iterable
is
not
None
:
values
=
[]
values
=
[]
for
value
in
result
.
value_iterable
:
for
value
in
result
.
value_iterable
:
values
.
append
(
value
)
values
.
append
(
value
)
...
@@ -226,6 +234,15 @@ class NomadParser(object):
...
@@ -226,6 +234,15 @@ class NomadParser(object):
if
values
.
size
!=
0
:
if
values
.
size
!=
0
:
return
values
return
values
def
get_result_object
(
self
,
name
):
# Check cache
result
=
self
.
results
.
get
(
name
)
if
result
is
None
:
result
=
self
.
start_parsing
(
name
)
if
result
.
cache
:
self
.
results
[
name
]
=
result
return
result
def
result_saver
(
self
,
result
):
def
result_saver
(
self
,
result
):
"""Given a result object, saves the results to the backend.
"""Given a result object, saves the results to the backend.
...
@@ -382,9 +399,22 @@ class Result(object):
...
@@ -382,9 +399,22 @@ class Result(object):
The repeatable values can also be given as generator functions. With
The repeatable values can also be given as generator functions. With
generators you can easily push results from a big data file piece by piece
generators you can easily push results from a big data file piece by piece
to the backend without loading the entire file into memory.
to the backend without loading the entire file into memory.
Attributes:
cache: Boolean indicating whether the result should be cached in memory.
name: The name of the metainfo corresponding to this result
value: The value of the result. Used for storing single results.
value_iterable: Iterable object containing multiple results.
unit: Unit of the result. Use the Pint units from UnitRegistry. e.g.
unit = ureg.newton. Used to automatically convert to SI.
dtypstr: The datatype string specified in metainfo.
shape: The expected shape of the result specified in metainfo.
repeats: A boolean indicating if this value can repeat. Specified in
metainfo.
"""
"""
def
__init__
(
self
,
meta_name
=
""
):
def
__init__
(
self
):
self
.
name
=
None
self
.
name
=
None
self
.
value
=
None
self
.
value
=
None
self
.
value_iterable
=
None
self
.
value_iterable
=
None
...
@@ -394,6 +424,7 @@ class Result(object):
...
@@ -394,6 +424,7 @@ class Result(object):
self
.
dtypestr
=
None
self
.
dtypestr
=
None
self
.
repeats
=
None
self
.
repeats
=
None
self
.
shape
=
None
self
.
shape
=
None
self
.
cache
=
False
#===============================================================================
#===============================================================================
...
...
cp2kparser/implementation/autoparser.py
View file @
e9092f06
...
@@ -14,6 +14,7 @@ def scan_path_for_files(path):
...
@@ -14,6 +14,7 @@ def scan_path_for_files(path):
".xyz"
,
".xyz"
,
".cif"
,
".cif"
,
".pdb"
,
".pdb"
,
".dcd"
,
}
}
files
=
{}
files
=
{}
for
filename
in
os
.
listdir
(
path
):
for
filename
in
os
.
listdir
(
path
):
...
...
cp2kparser/implementation/parser.py
View file @
e9092f06
...
@@ -33,7 +33,7 @@ class CP2KParser(NomadParser):
...
@@ -33,7 +33,7 @@ class CP2KParser(NomadParser):
self
.
regexengine
=
RegexEngine
(
self
)
self
.
regexengine
=
RegexEngine
(
self
)
self
.
xmlengine
=
XMLEngine
(
self
)
self
.
xmlengine
=
XMLEngine
(
self
)
self
.
inputengine
=
CP2KInputEngine
()
self
.
inputengine
=
CP2KInputEngine
()
self
.
atomsengine
=
AtomsEngine
(
self
)