Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
P
parser-quantum-espresso
Manage
Activity
Members
Code
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Deploy
Releases
Model registry
Analyze
Contributor analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
This is an archived project. Repository and other project resources are read-only.
Show more breadcrumbs
nomad-lab
parser-quantum-espresso
Commits
db458ca5
Commit
db458ca5
authored
8 years ago
by
Henning Glawe
Browse files
Options
Downloads
Patches
Plain Diff
add generic fortran90 namelist parser
parent
c4a8f259
No related branches found
No related tags found
No related merge requests found
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
parser/parser-quantum-espresso/FortranNamelistParser.py
+289
-0
289 additions, 0 deletions
parser/parser-quantum-espresso/FortranNamelistParser.py
with
289 additions
and
0 deletions
parser/parser-quantum-espresso/FortranNamelistParser.py
0 → 100644
+
289
−
0
View file @
db458ca5
import
setup_paths
import
re
import
sys
import
os
import
logging
from
nomadcore.match_highlighter
import
ANSI
LOGGER
=
logging
.
getLogger
(
__name__
)
# regex for _valid_ fortran float output, what a mess ...
RE_f
=
(
r
"
(?:
"
+
# outer alternative, between numbers and number-too-wide-for-field markers
r
"
([+-]?)(?:
"
+
# MANTISSA, SIGN (group 1, optional), followed by alternatives
'
|
'
.
join
([
# MANTISSA
r
"
(\d+(?!\.))
"
,
# MANTISSA without a decimal point, group 2
r
"
(\d*)
"
+
(
# MANTISSA, WHOLE part (group 3)
# we need negative look-ahead/look-behind assertions around the
# decimal point as there is too much optional stuff around
r
"
(?<![^\d\s+-])
"
+
# char preceding the dot must be nothing but number, whitespace, or sign
r
"
\.
"
+
r
"
(?![^eEdD\d\s,])
"
+
# char succeeding the dot must be nothing but number, exponential/precision char, comma or whitespace
r
"
(\d*)
"
# MANTISSA, FRACTIONAL part (group 4), separated by dot
)
])
+
r
"
)(?:
"
+
(
# EXPONENT part (optional)
r
"
([eEdD])
"
+
# PRECISION (group5)
r
"
([+-]?)(\d*)
"
# EXPONENT SIGN (group 6), VALUE (group 7)
)
+
"
)?
"
+
# make precision/exponet part optinal
r
"
|(\*+))
"
# outer alternative, between numbers and number-too-wide markers (group 8)
)
cRE_f
=
re
.
compile
(
RE_f
)
def
match_to_float
(
m
,
group_offset
=
0
):
group
=
[
m
.
group
(
0
)
]
+
[
m
.
group
(
group_offset
+
i
)
for
i
in
range
(
1
,
9
)]
LOGGER
.
debug
(
"
g: %s
"
,
str
(
group
))
if
group
[
8
]
is
not
None
:
pyfloat_str
=
'
nan
'
dtype
=
'
f
'
else
:
pyfloat_str
=
group
[
1
]
# sign, maybe zero-length
if
group
[
2
]
is
not
None
:
pyfloat_str
+=
group
[
2
]
dtype
=
'
i
'
else
:
pyfloat_str
+=
group
[
3
]
if
len
(
group
[
3
])
>
0
else
'
0
'
pyfloat_str
+=
'
.
'
pyfloat_str
+=
group
[
4
]
if
len
(
group
[
4
])
>
0
else
'
0
'
dtype
=
'
f
'
if
group
[
5
]
is
not
None
:
pyfloat_str
+=
'
e
'
+
group
[
6
]
pyfloat_str
+=
group
[
7
]
if
len
(
group
[
7
])
>
0
else
'
0
'
dtype
=
'
f
'
LOGGER
.
debug
(
"
pyfloat_str: %s
"
,
pyfloat_str
)
return
(
float
(
pyfloat_str
),
dtype
)
RE_unescape
=
{
'"'
:
re
.
compile
(
r
'""'
),
"'"
:
re
.
compile
(
r
"''"
),
}
def
unquote_string
(
value
):
result
=
value
[
1
:
-
1
]
return
RE_unescape
[
value
[
0
]].
sub
(
value
[
0
],
result
)
# quoted strings
cRE_string_quoted
=
re
.
compile
(
r
"
(?:
'
[^
'
]*
'
|\"[^\"]*\")
"
)
cRE_comment
=
re
.
compile
(
r
"
\s*!.*
"
)
RE_identifier
=
r
"
[a-zA-Z]\w*
"
# fortran identifier
cRE_start_group
=
re
.
compile
(
r
'
\s*&(
'
+
RE_identifier
+
r
'
)
'
)
# beginning of namelist group
cRE_end_group
=
re
.
compile
(
r
'
\s*/
'
)
cRE_start_assignment
=
re
.
compile
(
r
'
\s*(?P<target>
'
+
RE_identifier
+
r
'
)(?:\(\s*(?P<subscript>[^\)]*?)\s*\))?\s*=\s*
'
)
cRE_assigned_value
=
re
.
compile
(
r
'
\s*(?:
'
+
'
|
'
.
join
([
r
'
(?P<num>
'
+
RE_f
+
r
'
)
'
,
# integers and floats
r
'
\(\s*(?P<cnum_r>
'
+
RE_f
+
r
'
)\s*,\s*(?P<cnum_i>
'
+
RE_f
+
r
'
)\s*\)
'
,
# complex numbers
r
'
(?P<bool_t>\.t(?:rue)?\.)
'
,
# true-value bool
r
'
(?P<bool_f>\.f(?:alse)?\.)
'
,
# false-value bool
r
"
(?P<str_s>
'
[^
'
]*(?:[^
'
]|
''
)*
'
(?!
'
))
"
,
# single-quoted string, closed, allowing for escaped quotes ('')
r
'
(?P<str_d>
"
[^
"
]*(?:[^
"
]|
""
)*
"
(?!
"
))
'
,
# double-quoted string, closed, allowing for escaped quotes ("")
r
"
(?P<str_s_nc>
'
[^
'
]*(?:[^
'
]|
''
)*)
"
,
# single-quoted string, not closed
r
'
(?P<str_d_nc>
"
[^
"
]*(?:[^
"
]|
""
)*)
'
,
# double-quoted string, not closed
r
'
(?P<comment>!.*)
'
,
# comment
])
+
'
)
'
,
re
.
I
)
cRE_str_s_close
=
re
.
compile
(
r
"
([^
'
]*(?:[^
'
]|
''
)*
'
(?!
'
))
"
)
# single-quoted string, closing
cRE_str_d_close
=
re
.
compile
(
r
'
([^
"
]*(?:[^
"
]|
""
)*
"
(?!
"
))
'
)
# double-quoted string, closing
cRE_comma
=
re
.
compile
(
r
'
\s*,
'
)
class
FortranNamelistParser
(
object
):
"""
Parser for Fortran 90 Namelists
"""
def
__init__
(
self
,
file_path
):
self
.
input_tree
=
{}
self
.
file_path
=
file_path
self
.
state
=
0
self
.
nl_group
=
None
self
.
target
=
None
self
.
target_subscript
=
None
self
.
values
=
None
self
.
types
=
None
self
.
nvalues_after_comma
=
0
def
parse
(
self
):
with
open
(
self
.
file_path
,
"
r
"
)
as
fIn
:
# split lines into 'line' and 'comment' parts
for
line
in
fIn
:
# strip final newline if it exists
if
line
[
-
1
]
==
'
\n
'
:
line
=
line
[:
-
1
]
self
.
parse_line
(
line
)
def
parse_line
(
self
,
line
):
last_end
=
0
while
last_end
<
len
(
line
):
if
self
.
state
==
0
:
# we have no open group
m
=
cRE_start_group
.
match
(
line
,
last_end
)
if
m
is
not
None
:
self
.
nl_group
=
m
.
group
(
1
)
sys
.
stdout
.
write
(
ANSI
.
FG_BRIGHT_YELLOW
+
m
.
group
()
+
ANSI
.
RESET
)
last_end
=
m
.
end
()
self
.
state
=
1
self
.
onOpen_namelist_group
(
m
.
group
(
1
))
continue
# but comments may appear here
m
=
cRE_comment
.
match
(
line
,
last_end
)
if
m
is
not
None
:
sys
.
stdout
.
write
(
ANSI
.
FG_BLUE
+
m
.
group
()
+
ANSI
.
RESET
)
last_end
=
m
.
end
()
self
.
onComment
(
m
.
group
())
continue
elif
self
.
state
==
3
:
# we are inside single-quoted multiline string
m
=
cRE_str_s_close
.
match
(
line
,
last_end
)
if
m
is
None
:
sys
.
stdout
.
write
(
ANSI
.
FG_YELLOW
+
line
[
last_end
:]
+
ANSI
.
RESET
)
self
.
values
[
-
1
]
+=
"
\n
"
+
line
last_end
=
len
(
line
)
else
:
sys
.
stdout
.
write
(
ANSI
.
FG_YELLOW
+
m
.
group
()
+
ANSI
.
RESET
)
self
.
values
[
-
1
]
+=
"
\n
"
+
m
.
group
(
1
)
self
.
values
[
-
1
]
=
unquote_string
(
self
.
values
[
-
1
])
self
.
types
[
-
1
]
=
'
C
'
last_end
=
m
.
end
()
self
.
state
=
2
continue
elif
self
.
state
==
4
:
# we are inside double-quoted multiline string
m
=
cRE_str_d_close
.
match
(
line
,
last_end
)
if
m
is
None
:
sys
.
stdout
.
write
(
ANSI
.
FG_YELLOW
+
line
[
last_end
:]
+
ANSI
.
RESET
)
self
.
values
[
-
1
]
+=
"
\n
"
+
line
last_end
=
len
(
line
)
else
:
sys
.
stdout
.
write
(
ANSI
.
FG_YELLOW
+
m
.
group
()
+
ANSI
.
RESET
)
self
.
values
[
-
1
]
+=
"
\n
"
+
m
.
group
(
1
)
self
.
values
[
-
1
]
=
unquote_string
(
self
.
values
[
-
1
])
self
.
types
[
-
1
]
=
'
C
'
last_end
=
m
.
end
()
self
.
state
=
2
continue
else
:
# we are inside opened group
# check for group-closing /
m
=
cRE_end_group
.
match
(
line
,
last_end
)
if
m
is
not
None
:
if
self
.
target
is
not
None
:
self
.
onClose_value_assignment
(
self
.
target
,
self
.
target_subscript
,
self
.
values
,
self
.
types
)
self
.
target
=
None
self
.
target_subscript
=
None
self
.
values
=
None
self
.
types
=
None
self
.
nvalues_after_comma
=
0
self
.
onClose_namelist_group
(
self
.
nl_group
)
self
.
nl_group
=
None
sys
.
stdout
.
write
(
ANSI
.
BEGIN_INVERT
+
ANSI
.
FG_BRIGHT_YELLOW
+
m
.
group
()
+
ANSI
.
RESET
)
self
.
state
=
0
last_end
=
m
.
end
()
continue
# check for new assignment
m
=
cRE_start_assignment
.
match
(
line
,
last_end
)
if
m
is
not
None
:
if
self
.
target
is
not
None
:
self
.
onClose_value_assignment
(
self
.
target
,
self
.
target_subscript
,
self
.
values
,
self
.
types
)
self
.
state
=
2
last_end
=
m
.
end
()
sys
.
stdout
.
write
(
ANSI
.
FG_GREEN
+
m
.
group
()
+
ANSI
.
RESET
)
self
.
target
=
m
.
group
(
'
target
'
)
self
.
target_subscript
=
m
.
group
(
'
subscript
'
)
self
.
values
=
[]
self
.
types
=
[]
self
.
values_after_comma
=
0
self
.
onOpen_value_assignment
(
self
.
target
,
self
.
target_subscript
)
continue
if
self
.
state
>=
2
:
# we are inside the values-part of an assignment
m
=
cRE_assigned_value
.
match
(
line
,
last_end
)
if
m
is
not
None
:
if
m
.
group
(
'
num
'
)
is
not
None
:
(
value
,
dtype
)
=
match_to_float
(
m
,
group_offset
=
1
)
self
.
values
.
append
(
value
)
self
.
types
.
append
(
dtype
)
elif
m
.
group
(
'
cnum_r
'
)
is
not
None
:
(
cnum_r
,
dtype
)
=
match_to_float
(
m
,
group_offset
=
10
)
(
cnum_i
,
dtype
)
=
match_to_float
(
m
,
group_offset
=
19
)
self
.
values
.
append
(
complex
(
cnum_r
,
cnum_i
))
self
.
types
.
append
(
'
complex
'
)
elif
m
.
group
(
'
bool_t
'
)
is
not
None
:
self
.
values
.
append
(
True
)
self
.
types
.
append
(
'
b
'
)
elif
m
.
group
(
'
bool_f
'
)
is
not
None
:
self
.
values
.
append
(
False
)
self
.
types
.
append
(
'
b
'
)
elif
m
.
group
(
'
str_s
'
)
is
not
None
:
self
.
values
.
append
(
unquote_string
(
m
.
group
(
'
str_s
'
)))
self
.
types
.
append
(
'
C
'
)
elif
m
.
group
(
'
str_d
'
)
is
not
None
:
self
.
values
.
append
(
unquote_string
(
m
.
group
(
'
str_d
'
)))
self
.
types
.
append
(
'
C
'
)
elif
m
.
group
(
'
str_s_nc
'
)
is
not
None
:
# non-closed single-quoted string
self
.
state
=
3
self
.
values
.
append
(
m
.
group
(
'
str_s_nc
'
))
self
.
types
.
append
(
'
string_singlequoted
'
)
elif
m
.
group
(
'
str_d_nc
'
)
is
not
None
:
# non-closed double-quoted string
self
.
state
=
4
self
.
values
.
append
(
m
.
group
(
'
str_d_nc
'
))
self
.
types
.
append
(
'
string_doublequoted
'
)
elif
m
.
group
(
'
comment
'
)
is
not
None
:
sys
.
stdout
.
write
(
ANSI
.
FG_BLUE
+
m
.
group
()
+
ANSI
.
RESET
)
last_end
=
m
.
end
()
self
.
onComment
(
m
.
group
())
continue
self
.
values_after_comma
+=
1
sys
.
stdout
.
write
(
ANSI
.
FG_YELLOW
+
m
.
group
()
+
ANSI
.
RESET
)
last_end
=
m
.
end
()
continue
# special meaning of comma: may indicate Null values in array assignments
m
=
cRE_comma
.
match
(
line
,
last_end
)
if
m
is
not
None
:
if
self
.
values_after_comma
is
0
:
self
.
values
.
append
(
None
)
self
.
types
.
append
(
None
)
self
.
values_after_comma
=
0
sys
.
stdout
.
write
(
ANSI
.
FG_MAGENTA
+
m
.
group
()
+
ANSI
.
RESET
)
last_end
=
m
.
end
()
continue
break
if
last_end
<
len
(
line
):
line_leftover
=
line
[
last_end
:]
if
self
.
state
>
0
and
line_leftover
.
strip
():
LOGGER
.
error
(
"
ERROR: leftover chars in line while inside namelist group
"
)
sys
.
stdout
.
write
(
ANSI
.
BEGIN_INVERT
+
ANSI
.
FG_BRIGHT_RED
+
line_leftover
+
ANSI
.
RESET
)
else
:
sys
.
stdout
.
write
(
ANSI
.
BEGIN_INVERT
+
ANSI
.
FG_BLUE
+
line_leftover
+
ANSI
.
RESET
)
sys
.
stdout
.
write
(
'
\n
'
)
# Hooks to be overloaded in derived classes in order to do stuff
def
onComment
(
self
,
comment
):
pass
def
onOpen_namelist_group
(
self
,
groupname
):
pass
def
onClose_namelist_group
(
self
,
groupname
):
pass
def
onOpen_value_assignment
(
self
,
target
,
subscript
):
pass
def
onClose_value_assignment
(
self
,
target
,
subscript
,
values
,
dtypes
):
if
subscript
is
None
:
LOGGER
.
error
(
"
SET %s = %s (types: %s)
"
,
target
,
str
(
values
),
str
(
dtypes
))
else
:
LOGGER
.
error
(
"
SET %s(%s) = %s (types: %s)
"
,
target
,
subscript
,
str
(
values
),
str
(
dtypes
))
if
__name__
==
"
__main__
"
:
parser
=
FortranNamelistParser
(
sys
.
argv
[
1
])
parser
.
parse
()
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment