Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
nomad-lab
nomad-FAIR
Commits
5787da77
Commit
5787da77
authored
Aug 16, 2019
by
Markus Scheidgen
Browse files
Added EmptyParser and strict parsing do deal with PID entries without actual parsers.
parent
c0187fbb
Pipeline
#53899
passed with stages
in 22 minutes and 5 seconds
Changes
7
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
nomad/cli/client/local.py
View file @
5787da77
...
...
@@ -123,12 +123,12 @@ class CalcProcReproduction:
def
__exit__
(
self
,
*
args
):
self
.
upload_files
.
delete
()
def
parse
(
self
,
parser_name
:
str
=
None
)
->
LocalBackend
:
def
parse
(
self
,
parser_name
:
str
=
None
,
**
kwargs
)
->
LocalBackend
:
"""
Run the given parser on the downloaded calculation. If no parser is given,
do parser matching and use the respective parser.
"""
return
parse
(
self
.
mainfile
,
self
.
upload_files
,
parser_name
=
parser_name
,
logger
=
self
.
logger
)
return
parse
(
self
.
mainfile
,
self
.
upload_files
,
parser_name
=
parser_name
,
logger
=
self
.
logger
,
**
kwargs
)
def
normalize
(
self
,
normalizer
:
Union
[
str
,
Callable
],
parser_backend
:
LocalBackend
=
None
):
"""
...
...
@@ -153,7 +153,8 @@ class CalcProcReproduction:
@
click
.
option
(
'--show-metadata'
,
is_flag
=
True
,
help
=
'Print the extracted repo metadata.'
)
@
click
.
option
(
'--mainfile'
,
default
=
None
,
type
=
str
,
help
=
'Use this mainfile (in case mainfile cannot be retrived via API.'
)
@
click
.
option
(
'--skip-normalizers'
,
is_flag
=
True
,
help
=
'Do not normalize.'
)
def
local
(
calc_id
,
show_backend
,
show_metadata
,
skip_normalizers
,
**
kwargs
):
@
click
.
option
(
'--not-strict'
,
is_flag
=
True
,
help
=
'Also match artificial parsers.'
)
def
local
(
calc_id
,
show_backend
,
show_metadata
,
skip_normalizers
,
not_strict
,
**
kwargs
):
utils
.
configure_logging
()
utils
.
get_logger
(
__name__
).
info
(
'Using %s'
%
config
.
client
.
url
)
...
...
@@ -162,7 +163,7 @@ def local(calc_id, show_backend, show_metadata, skip_normalizers, **kwargs):
print
(
'Data being saved to .volumes/fs/tmp/repro_'
'%s if not already there'
%
local
.
upload_id
)
backend
=
local
.
parse
()
backend
=
local
.
parse
(
strict
=
not
not_strict
)
if
not
skip_normalizers
:
local
.
normalize_all
(
parser_backend
=
backend
)
...
...
nomad/cli/parse.py
View file @
5787da77
...
...
@@ -5,7 +5,7 @@ import click
import
sys
from
nomad
import
config
,
utils
,
files
from
nomad.parsing
import
LocalBackend
,
parser_dict
,
match_parser
from
nomad.parsing
import
LocalBackend
,
parser_dict
,
match_parser
,
MatchingParser
from
nomad.normalizing
import
normalizers
from
nomad.datamodel
import
CalcWithMetadata
...
...
@@ -14,7 +14,7 @@ from .cli import cli
def
parse
(
mainfile
:
str
,
upload_files
:
Union
[
str
,
files
.
StagingUploadFiles
],
parser_name
:
str
=
None
,
logger
=
None
)
->
LocalBackend
:
parser_name
:
str
=
None
,
strict
:
bool
=
True
,
logger
=
None
)
->
LocalBackend
:
"""
Run the given parser on the downloaded calculation. If no parser is given,
do parser matching and use the respective parser.
...
...
@@ -24,7 +24,11 @@ def parse(
if
parser_name
is
not
None
:
parser
=
parser_dict
.
get
(
parser_name
)
else
:
parser
=
match_parser
(
mainfile
,
upload_files
)
parser
=
match_parser
(
mainfile
,
upload_files
,
strict
=
strict
)
if
isinstance
(
parser
,
MatchingParser
):
parser_name
=
parser
.
name
else
:
parser_name
=
parser
.
__class__
.
__name__
assert
parser
is
not
None
,
'there is not parser matching %s'
%
mainfile
logger
=
logger
.
bind
(
parser
=
parser
.
name
)
# type: ignore
...
...
@@ -45,7 +49,7 @@ def parse(
parser_backend
.
addValue
(
'calc_id'
,
config
.
services
.
unavailable_value
)
parser_backend
.
addValue
(
'calc_hash'
,
"no hash"
)
parser_backend
.
addValue
(
'mainfile'
,
mainfile
)
parser_backend
.
addValue
(
'parser_name'
,
parser
.
__class__
.
_
_name
__
)
parser_backend
.
addValue
(
'parser_name'
,
parser_name
)
parser_backend
.
closeNonOverlappingSection
(
'section_entry_info'
)
logger
.
info
(
'ran parser'
)
...
...
@@ -89,10 +93,11 @@ def normalize_all(parser_backend: LocalBackend = None, logger=None) -> LocalBack
@
click
.
option
(
'--show-backend'
,
is_flag
=
True
,
default
=
False
,
help
=
'Print the backend data.'
)
@
click
.
option
(
'--show-metadata'
,
is_flag
=
True
,
default
=
False
,
help
=
'Print the extracted repo metadata.'
)
@
click
.
option
(
'--skip-normalizers'
,
is_flag
=
True
,
default
=
False
,
help
=
'Do not run the normalizer.'
)
def
_parse
(
mainfile
,
show_backend
,
show_metadata
,
skip_normalizers
):
@
click
.
option
(
'--not-strict'
,
is_flag
=
True
,
help
=
'Do also match artificial parsers.'
)
def
_parse
(
mainfile
,
show_backend
,
show_metadata
,
skip_normalizers
,
not_strict
):
utils
.
configure_logging
()
backend
=
parse
(
mainfile
,
'.'
)
backend
=
parse
(
mainfile
,
'.'
,
strict
=
not
not_strict
)
if
not
skip_normalizers
:
normalize_all
(
backend
)
...
...
nomad/datamodel/dft.py
View file @
5787da77
...
...
@@ -110,7 +110,7 @@ class DFTCalcWithMetadata(CalcWithMetadata):
except
KeyError
:
self
.
code_version
=
config
.
services
.
unavailable_value
self
.
atoms
=
get_optional_backend_value
(
backend
,
'atom_labels'
,
'section_system'
,
logger
=
logger
)
self
.
atoms
=
get_optional_backend_value
(
backend
,
'atom_labels'
,
'section_system'
,
[],
logger
=
logger
)
if
hasattr
(
self
.
atoms
,
'tolist'
):
self
.
atoms
=
self
.
atoms
.
tolist
()
self
.
n_atoms
=
len
(
self
.
atoms
)
...
...
nomad/parsing/__init__.py
View file @
5787da77
...
...
@@ -68,7 +68,7 @@ from nomad import files, config
from
nomad.parsing.backend
import
AbstractParserBackend
,
LocalBackend
,
LegacyLocalBackend
,
JSONStreamWriter
,
BadContextURI
,
WrongContextState
from
nomad.parsing.parser
import
Parser
,
LegacyParser
,
VaspOutcarParser
,
BrokenParser
,
MissingParser
,
MatchingParser
from
nomad.parsing.artificial
import
TemplateParser
,
GenerateRandomParser
,
ChaosParser
from
nomad.parsing.artificial
import
TemplateParser
,
GenerateRandomParser
,
ChaosParser
,
EmptyParser
_compressions
=
{
...
...
@@ -77,7 +77,7 @@ _compressions = {
}
def
match_parser
(
mainfile
:
str
,
upload_files
:
Union
[
str
,
files
.
StagingUploadFiles
])
->
'Parser'
:
def
match_parser
(
mainfile
:
str
,
upload_files
:
Union
[
str
,
files
.
StagingUploadFiles
]
,
strict
=
True
)
->
'Parser'
:
"""
Performs parser matching. This means it take the given mainfile and potentially
opens it with the given callback and tries to identify a parser that can parse
...
...
@@ -90,6 +90,7 @@ def match_parser(mainfile: str, upload_files: Union[str, files.StagingUploadFile
mainfile: The upload relative path to the mainfile
upload_files: Either a :class:`files.StagingUploadFiles` object or a directory name.
Directory name + mainfile needs to point to the file.
strict: Only match strict parsers, e.g. no artificial parsers for missing or empty entries.
Returns: The parser, or None if no parser could be matched.
"""
...
...
@@ -106,10 +107,15 @@ def match_parser(mainfile: str, upload_files: Union[str, files.StagingUploadFile
mime_type
=
magic
.
from_buffer
(
buffer
,
mime
=
True
)
for
parser
in
parsers
:
if
parser
.
domain
==
config
.
domain
:
if
parser
.
is_mainfile
(
mainfile_path
,
mime_type
,
buffer
,
compression
):
# TODO: deal with multiple possible parser specs
return
parser
if
strict
and
(
isinstance
(
parser
,
MissingParser
)
or
isinstance
(
parser
,
EmptyParser
)):
continue
if
parser
.
domain
!=
config
.
domain
:
continue
if
parser
.
is_mainfile
(
mainfile_path
,
mime_type
,
buffer
,
compression
):
# TODO: deal with multiple possible parser specs
return
parser
return
None
...
...
@@ -402,28 +408,23 @@ parsers = [
parser_class_name
=
'onetepparser.OnetepParser'
,
mainfile_contents_re
=
r
'####### # # ####### ####### ####### ######'
),
# These are supposedly octopus files, but they do not look like octopus files at all
# TODO We have migrated the wrong octopus mainfiles .. this should be removed now
# MissingParser(
# name='parser/octopus', code_name='Octopus', domain='DFT',
# mainfile_name_re=r'(inp)|(.*/inp)'
# ),
# We already have crystal with mainfile_contents_re, but this one does not always properly match
LegacyParser
(
name
=
'parsers/crystal'
,
code_name
=
'Crystal'
,
parser_class_name
=
'crystalparser.CrystalParser'
,
# There are some entries with PIDs that have mainfiles which do not match what
# the actual parsers expect. We use the EmptyParser to produce placeholder entries
# to keep the PIDs. These parsers will not match for new, non migrated data.
EmptyParser
(
name
=
'missing/octopus'
,
code_name
=
'Octopus'
,
domain
=
'DFT'
,
mainfile_name_re
=
r
'(inp)|(.*/inp)'
),
EmptyParser
(
name
=
'missing/crystal'
,
code_name
=
'Crystal'
,
mainfile_name_re
=
r
'.*\.cryst\.out'
),
# We already have wien2k with mainfile_contents_re, but this one does not always properly match
LegacyParser
(
name
=
'parsers/wien2k'
,
code_name
=
'WIEN2k'
,
parser_class_name
=
'wien2kparser.Wien2kParser'
,
EmptyParser
(
name
=
'missing/wien2k'
,
code_name
=
'WIEN2k'
,
mainfile_name_re
=
r
'.*\.scf'
),
# We already have fhi-aims with mainfile_contents_re, but this one does not always properly match
LegacyParser
(
name
=
'parsers/fhi-aims'
,
code_name
=
'FHI-aims'
,
parser_class_name
=
'fhiaimsparser.FHIaimsParser'
,
EmptyParser
(
name
=
'missing/fhi-aims'
,
code_name
=
'FHI-aims'
,
domain
=
'DFT'
,
mainfile_name_re
=
r
'.*\.fhiaims'
),
BrokenParser
()
...
...
nomad/parsing/artificial.py
View file @
5787da77
...
...
@@ -31,27 +31,40 @@ from nomadcore.local_meta_info import loadJsonFile, InfoKindEl
import
nomad_meta_info
from
nomad.parsing.backend
import
LocalBackend
from
nomad.parsing.parser
import
Parser
from
nomad.parsing.parser
import
Parser
,
MatchingParser
file_dir
=
os
.
path
.
dirname
(
os
.
path
.
abspath
(
nomad_meta_info
.
__file__
))
meta_info_path
=
os
.
path
.
normpath
(
os
.
path
.
join
(
file_dir
,
'vasp.nomadmetainfo.json'
))
meta_info_env
,
_
=
loadJsonFile
(
filePath
=
meta_info_path
,
dependencyLoader
=
None
,
extraArgsHandling
=
InfoKindEl
.
ADD_EXTRA_ARGS
,
uri
=
None
)
class
ArtificalParser
(
Parser
):
""" Base class for artifical parsers based on VASP metainfo. """
def
__init__
(
self
):
super
().
__init__
()
# use vasp metainfo, not to really use it, but because it works
file_dir
=
os
.
path
.
dirname
(
os
.
path
.
abspath
(
nomad_meta_info
.
__file__
))
meta_info_path
=
os
.
path
.
normpath
(
os
.
path
.
join
(
file_dir
,
'vasp.nomadmetainfo.json'
))
self
.
meta_info_env
,
_
=
loadJsonFile
(
filePath
=
meta_info_path
,
dependencyLoader
=
None
,
extraArgsHandling
=
InfoKindEl
.
ADD_EXTRA_ARGS
,
uri
=
None
)
self
.
backend
=
None
def
init_backend
(
self
):
self
.
backend
=
LocalBackend
(
metaInfoEnv
=
self
.
meta_info_env
,
debug
=
False
)
self
.
backend
=
LocalBackend
(
metaInfoEnv
=
meta_info_env
,
debug
=
False
)
@
property
def
name
(
self
):
return
self
.
__class__
.
name
class
EmptyParser
(
MatchingParser
):
"""
Implementation that produces an empty code_run
"""
def
run
(
self
,
mainfile
:
str
,
logger
=
None
)
->
LocalBackend
:
backend
=
LocalBackend
(
metaInfoEnv
=
meta_info_env
,
debug
=
False
)
# type: ignore
backend
.
openSection
(
'section_run'
)
backend
.
addValue
(
'program_name'
,
self
.
code_name
)
backend
.
closeSection
(
'section_run'
,
0
)
return
backend
class
TemplateParser
(
ArtificalParser
):
"""
A parser that generates data based on a template given via the
...
...
@@ -85,7 +98,7 @@ class TemplateParser(ArtificalParser):
else
:
value
=
self
.
transform_value
(
key
,
value
)
if
isinstance
(
value
,
list
):
shape
=
self
.
meta_info_env
[
key
].
get
(
'shape'
)
shape
=
meta_info_env
[
key
].
get
(
'shape'
)
if
shape
is
None
or
len
(
shape
)
==
0
:
for
single_value
in
value
:
self
.
backend
.
addValue
(
key
,
single_value
,
index
)
...
...
nomad/processing/data.py
View file @
5787da77
...
...
@@ -660,10 +660,10 @@ class Upload(Proc):
calc
.
reset
(
worker_hostname
=
self
.
worker_hostname
)
parser
=
match_parser
(
calc
.
mainfile
,
staging_upload_files
)
parser
=
match_parser
(
calc
.
mainfile
,
staging_upload_files
,
strict
=
False
)
if
parser
is
None
:
logger
.
warn
(
'no parser matches during re-process, use old parser'
,
logger
.
error
(
'no parser matches during re-process, use
the
old parser'
,
calc_id
=
calc
.
calc_id
)
elif
calc
.
parser
!=
parser
.
name
:
calc
.
parser
=
parser
.
name
...
...
tests/test_normalizing.py
View file @
5787da77
...
...
@@ -112,11 +112,14 @@ def assert_normalized(backend: LocalBackend):
assert
metadata
.
xc_functional
is
not
None
assert
metadata
.
system
is
not
None
assert
metadata
.
crystal_system
is
not
None
assert
len
(
metadata
.
atoms
)
>
0
assert
len
(
metadata
.
atoms
)
is
not
None
assert
metadata
.
spacegroup
is
not
None
exceptions
=
parser_exceptions
.
get
(
backend
.
get_value
(
'parser_name'
),
[])
if
metadata
.
formula
!=
config
.
services
.
unavailable_value
:
assert
len
(
metadata
.
atoms
)
>
0
for
key
in
calc_metadata_keys
:
if
key
not
in
exceptions
:
assert
getattr
(
metadata
,
key
)
!=
config
.
services
.
unavailable_value
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment