Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
nomad-lab
nomad-FAIR
Commits
553f50dd
Commit
553f50dd
authored
Apr 01, 2020
by
Markus Scheidgen
Browse files
Improved processing logs, fixed bugs arround missing/changed parser names.
parent
d754830b
Pipeline
#71882
failed with stages
in 31 minutes and 11 seconds
Changes
9
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
nomad/datamodel/datamodel.py
View file @
553f50dd
...
...
@@ -463,8 +463,7 @@ class EntryMetadata(metainfo.MSection):
if
domain_section
is
None
:
domain_section
=
self
.
m_create
(
domain_section_def
.
section_cls
)
if
backend
is
not
None
:
domain_section
.
apply_domain_metadata
(
backend
)
domain_section
.
apply_domain_metadata
(
backend
)
class
EntryArchive
(
metainfo
.
MSection
):
...
...
nomad/datamodel/dft.py
View file @
553f50dd
...
...
@@ -278,6 +278,14 @@ class DFTMetadata(MSection):
logger
=
utils
.
get_logger
(
__name__
).
bind
(
upload_id
=
entry
.
upload_id
,
calc_id
=
entry
.
calc_id
,
mainfile
=
entry
.
mainfile
)
if
backend
is
None
:
if
entry
.
parser_name
is
not
None
:
from
nomad.parsing
import
parser_dict
parser
=
parser_dict
.
get
(
entry
.
parser_name
)
if
hasattr
(
parser
,
'code_name'
):
self
.
code_name
=
parser
.
code_name
return
# code and code specific ids
self
.
code_name
=
backend
.
get_value
(
'program_name'
,
0
)
try
:
...
...
nomad/datamodel/ems.py
View file @
553f50dd
...
...
@@ -51,6 +51,9 @@ class EMSMetadata(MSection):
group_hash
=
Quantity
(
type
=
str
,
a_search
=
Search
())
def
apply_domain_metadata
(
self
,
backend
):
if
backend
is
None
:
return
entry
=
self
.
m_parent
logger
=
utils
.
get_logger
(
__name__
).
bind
(
upload_id
=
entry
.
upload_id
,
calc_id
=
entry
.
calc_id
,
mainfile
=
entry
.
mainfile
)
...
...
nomad/normalizing/normalizer.py
View file @
553f50dd
...
...
@@ -126,8 +126,8 @@ class SystemBasedNormalizer(Normalizer, metaclass=ABCMeta):
except
KeyError
as
e
:
self
.
logger
.
error
(
'
C
ould
not
read a
ll input data
'
,
normalizer
=
self
.
__class__
.
__name__
,
section
=
'section_system'
,
g_index
=
g_index
,
key_error
=
str
(
e
))
'
c
ould read a
system property
'
,
normalizer
=
self
.
__class__
.
__name__
,
section
=
'section_system'
,
g_index
=
g_index
,
key_error
=
str
(
e
)
,
exc_info
=
e
)
return
False
except
Exception
as
e
:
...
...
nomad/parsing/__init__.py
View file @
553f50dd
...
...
@@ -444,48 +444,57 @@ parsers = [
mainfile_contents_re
=
r
'Materials Studio DMol\^3'
),
LegacyParser
(
name
=
'parser/fleur'
,
code_name
=
'fleur'
,
domain
=
'dft'
,
name
=
'parser
s
/fleur'
,
code_name
=
'fleur'
,
domain
=
'dft'
,
parser_class_name
=
'fleurparser.FleurParser'
,
mainfile_contents_re
=
r
'This output is generated by fleur.'
),
LegacyParser
(
name
=
'parser/molcas'
,
code_name
=
'MOLCAS'
,
domain
=
'dft'
,
name
=
'parser
s
/molcas'
,
code_name
=
'MOLCAS'
,
domain
=
'dft'
,
parser_class_name
=
'molcasparser.MolcasParser'
,
mainfile_contents_re
=
r
'M O L C A S'
),
LegacyParser
(
name
=
'parser/onetep'
,
code_name
=
'ONETEP'
,
domain
=
'dft'
,
name
=
'parser
s
/onetep'
,
code_name
=
'ONETEP'
,
domain
=
'dft'
,
parser_class_name
=
'onetepparser.OnetepParser'
,
mainfile_contents_re
=
r
'####### # # ####### ####### ####### ######'
)
]
empty_parsers
=
[
EmptyParser
(
name
=
'missing/octopus'
,
code_name
=
'Octopus'
,
domain
=
'dft'
,
mainfile_name_re
=
r
'(inp)|(.*/inp)'
),
EmptyParser
(
name
=
'missing/crystal'
,
code_name
=
'Crystal'
,
domain
=
'dft'
,
mainfile_name_re
=
r
'.*\.cryst\.out'
),
EmptyParser
(
name
=
'missing/wien2k'
,
code_name
=
'WIEN2k'
,
domain
=
'dft'
,
mainfile_name_re
=
r
'.*\.scf'
),
EmptyParser
(
name
=
'missing/fhi-aims'
,
code_name
=
'FHI-aims'
,
domain
=
'dft'
,
mainfile_name_re
=
r
'.*\.fhiaims'
)
]
if
config
.
use_empty_parsers
:
# There are some entries with PIDs that have mainfiles which do not match what
# the actual parsers expect. We use the EmptyParser to produce placeholder entries
# to keep the PIDs. These parsers will not match for new, non migrated data.
parsers
.
extend
([
EmptyParser
(
name
=
'missing/octopus'
,
code_name
=
'Octopus'
,
domain
=
'dft'
,
mainfile_name_re
=
r
'(inp)|(.*/inp)'
),
EmptyParser
(
name
=
'missing/crystal'
,
code_name
=
'Crystal'
,
mainfile_name_re
=
r
'.*\.cryst\.out'
),
EmptyParser
(
name
=
'missing/wien2k'
,
code_name
=
'WIEN2k'
,
mainfile_name_re
=
r
'.*\.scf'
),
EmptyParser
(
name
=
'missing/fhi-aims'
,
code_name
=
'FHI-aims'
,
domain
=
'dft'
,
mainfile_name_re
=
r
'.*\.fhiaims'
)
])
parsers
.
extend
(
empty_parsers
)
parsers
.
append
(
BrokenParser
())
''' Instantiation and constructor based config of all parsers. '''
parser_dict
=
{
parser
.
name
:
parser
for
parser
in
parsers
}
# type: ignore
parser_dict
=
{
parser
.
name
:
parser
for
parser
in
parsers
+
empty_parsers
}
# type: ignore
''' A dict to access parsers by name. Usually 'parsers/<...>', e.g. 'parsers/vasp'. '''
# renamed parsers
parser_dict
[
'parser/broken'
]
=
parser_dict
[
'parsers/broken'
]
parser_dict
[
'parser/fleur'
]
=
parser_dict
[
'parsers/fleur'
]
parser_dict
[
'parser/molcas'
]
=
parser_dict
[
'parsers/molcas'
]
parser_dict
[
'parser/octopus'
]
=
parser_dict
[
'parsers/octopus'
]
parser_dict
[
'parser/onetep'
]
=
parser_dict
[
'parsers/onetep'
]
nomad/parsing/artificial.py
View file @
553f50dd
...
...
@@ -52,7 +52,7 @@ class EmptyParser(MatchingParser):
Implementation that produces an empty code_run
'''
def
run
(
self
,
mainfile
:
str
,
logger
=
None
)
->
Backend
:
backend
=
Backend
(
metainfo
=
'vasp'
)
backend
=
Backend
(
metainfo
=
self
.
code_name
,
domain
=
self
.
domain
,
logger
=
logger
)
backend
.
openSection
(
'section_run'
)
backend
.
addValue
(
'program_name'
,
self
.
code_name
)
backend
.
closeSection
(
'section_run'
,
0
)
...
...
nomad/parsing/parser.py
View file @
553f50dd
...
...
@@ -70,7 +70,7 @@ class BrokenParser(Parser):
'''
def
__init__
(
self
,
*
args
,
**
kwargs
):
super
().
__init__
(
*
args
,
**
kwargs
)
self
.
name
=
'parser/broken'
self
.
name
=
'parser
s
/broken'
self
.
code_name
=
'currupted mainfile'
self
.
_patterns
=
[
re
.
compile
(
r
'^pid=[0-9]+'
),
# some 'mainfile' contain list of log-kinda information with pids
...
...
nomad/processing/base.py
View file @
553f50dd
...
...
@@ -269,10 +269,15 @@ class Proc(Document, metaclass=ProcMetaclass):
errors_str
=
"; "
.
join
([
str
(
error
)
for
error
in
errors
])
Proc
.
log
(
logger
,
log_level
,
'task failed'
,
errors
=
errors_str
)
self
.
on_fail
()
logger
.
info
(
'process failed'
)
self
.
save
()
def
on_fail
(
self
):
pass
def
warning
(
self
,
*
warnings
,
log_level
=
logging
.
WARNING
,
**
kwargs
):
''' Allows to save warnings. Takes strings or exceptions as args. '''
assert
self
.
process_running
or
self
.
tasks_running
...
...
nomad/processing/data.py
View file @
553f50dd
...
...
@@ -97,6 +97,7 @@ class Calc(Proc):
(
'upload_id'
,
'tasks_status'
),
(
'upload_id'
,
'process_status'
),
(
'upload_id'
,
'metadata.nomad_version'
),
'parser'
,
'metadata.published'
,
'metadata.datasets'
'metadata.pid'
...
...
@@ -139,7 +140,8 @@ class Calc(Proc):
the archive.
'''
entry_metadata
=
datamodel
.
EntryMetadata
()
entry_metadata
.
domain
=
parser_dict
[
self
.
parser
].
domain
if
self
.
parser
is
not
None
:
entry_metadata
.
domain
=
parser_dict
[
self
.
parser
].
domain
entry_metadata
.
upload_id
=
self
.
upload_id
entry_metadata
.
calc_id
=
self
.
calc_id
entry_metadata
.
mainfile
=
self
.
mainfile
...
...
@@ -260,10 +262,15 @@ class Calc(Proc):
self
.
warnings
=
[
'no matching parser found during re-processing'
]
elif
self
.
parser
!=
parser
.
name
:
self
.
parser
=
parser
.
name
logger
.
info
(
'different parser matches during re-process, use new parser'
,
parser
=
parser
.
name
)
if
parser_dict
[
self
.
parser
].
name
==
parser
.
name
:
# parser was just renamed
self
.
parser
=
parser
.
name
else
:
self
.
parser
=
parser
.
name
logger
.
info
(
'different parser matches during re-process, use new parser'
,
parser
=
parser
.
name
)
try
:
self
.
_entry_metadata
=
self
.
user_metadata
()
...
...
@@ -318,29 +325,19 @@ class Calc(Proc):
except
Exception
as
e
:
logger
.
error
(
'could unload processing results'
,
exc_info
=
e
)
def
fail
(
self
,
*
errors
,
log_level
=
logging
.
ERROR
,
**
kwargs
):
def
on_
fail
(
self
):
# in case of failure, index a minimum set of metadata and mark
# processing failure
try
:
if
self
.
parser
is
not
None
:
try
:
parser
=
parser_dict
[
self
.
parser
]
if
hasattr
(
parser
,
'code_name'
):
self
.
_entry_metadata
.
code_name
=
parser
.
code_name
except
KeyError
:
# This only happens in re-processing. The parser was removed.
# The old parser was probably only used to keep this entry matching
# and in the system (retain its PID). With the current nomad this is
# not parsable anyhow.
self
.
_entry_metadata
.
code_name
=
config
.
services
.
unavailable_value
self
.
_entry_metadata
.
processed
=
False
self
.
apply_entry_metadata
(
self
.
_entry_metadata
)
if
self
.
_parser_backend
and
self
.
_parser_backend
.
resource
:
backend
=
self
.
_parser_backend
else
:
backend
=
None
self
.
_entry_metadata
.
apply_domain_metadata
(
backend
)
self
.
_entry_metadata
.
a_elastic
.
index
()
except
Exception
as
e
:
self
.
get_logger
().
error
(
...
...
@@ -352,8 +349,6 @@ class Calc(Proc):
self
.
get_logger
().
error
(
'could not write archive after processing failure'
,
exc_info
=
e
)
super
().
fail
(
*
errors
,
log_level
=
log_level
,
**
kwargs
)
def
on_process_complete
(
self
,
process_name
):
# the save might be necessary to correctly read the join condition from the db
self
.
save
()
...
...
@@ -470,6 +465,13 @@ class Calc(Proc):
log_data
.
update
(
archive_size
=
archive_size
)
def
write_archive
(
self
,
backend
:
Backend
):
def
filter_processing_logs
(
logs
):
if
len
(
logs
)
>
100
:
return
[
log
for
log
in
logs
if
log
.
get
(
'level'
)
!=
'DEBUG'
]
return
logs
if
self
.
_calc_proc_logs
is
None
:
self
.
_calc_proc_logs
=
[]
...
...
@@ -481,7 +483,7 @@ class Calc(Proc):
if
entry_archive
.
section_metadata
is
None
:
entry_archive
.
m_add_sub_section
(
datamodel
.
EntryArchive
.
section_metadata
,
self
.
_entry_metadata
)
entry_archive
.
processing_logs
=
self
.
_calc_proc_logs
entry_archive
.
processing_logs
=
filter_processing_logs
(
self
.
_calc_proc_logs
)
try
:
return
self
.
upload_files
.
write_archive
(
self
.
calc_id
,
entry_archive
.
m_to_dict
())
...
...
@@ -492,7 +494,7 @@ class Calc(Proc):
# most likely failed due to domain data, try to write metadata and processing logs
entry_archive
=
datamodel
.
EntryArchive
()
entry_archive
.
m_add_sub_section
(
datamodel
.
EntryArchive
.
section_metadata
,
self
.
_entry_metadata
)
entry_archive
.
processing_logs
=
self
.
_calc_proc_logs
entry_archive
.
processing_logs
=
filter_processing_logs
(
self
.
_calc_proc_logs
)
self
.
upload_files
.
write_archive
(
self
.
calc_id
,
entry_archive
.
m_to_dict
())
raise
e
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment