Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
nomad-lab
nomad-FAIR
Commits
9c86f599
Commit
9c86f599
authored
Jul 16, 2019
by
Markus Scheidgen
Browse files
Merge branch 'refactor' of gitlab.mpcdf.mpg.de:nomad-lab/nomad-FAIR into refactor
parents
496b37b7
38032188
Changes
9
Hide whitespace changes
Inline
Side-by-side
nomad/admin/__init__.py
View file @
9c86f599
...
...
@@ -23,4 +23,4 @@ from .__main__ import cli as cli_main
def
cli
():
cli_main
(
obj
=
POPO
())
cli_main
(
obj
=
POPO
())
# pylint: disable=E1120,E1123
nomad/admin/__main__.py
View file @
9c86f599
...
...
@@ -20,7 +20,7 @@ import shutil
from
tabulate
import
tabulate
from
elasticsearch_dsl
import
A
from
nomad
import
config
as
nomad_config
,
infrastructure
,
processing
,
utils
from
nomad
import
config
as
nomad_config
,
infrastructure
,
processing
from
nomad.search
import
Search
...
...
@@ -145,4 +145,4 @@ def clean(dry, skip_calcs, skip_fs, skip_es):
if
__name__
==
'__main__'
:
cli
(
obj
=
{})
# pylint: disable=E1120
cli
(
obj
=
{})
# pylint: disable=E1120
,E1123
nomad/client/__init__.py
View file @
9c86f599
...
...
@@ -16,6 +16,6 @@
Swagger/bravado based python client library for the API and various usefull shell commands.
"""
from
.
import
local
,
migration
,
upload
,
integrationtests
from
.
import
local
,
migration
,
upload
,
integrationtests
,
parse
from
.__main__
import
cli
,
create_client
from
.upload
import
stream_upload_with_client
nomad/client/local.py
View file @
9c86f599
...
...
@@ -17,16 +17,16 @@ import os
import
io
import
requests
import
click
from
typing
import
Union
,
Callable
,
cast
from
typing
import
Union
,
Callable
import
sys
import
ujson
import
bravado.exception
from
nomad
import
config
,
utils
from
nomad.files
import
ArchiveBasedStagingUploadFiles
from
nomad.parsing
import
parser_dict
,
LocalBackend
,
match_parser
from
nomad.normalizing
import
normalizers
from
nomad.datamodel
import
CalcWithMetadata
from
nomad.parsing
import
LocalBackend
from
nomad.client.parse
import
parse
,
normalize
,
normalize_all
from
.__main__
import
cli
...
...
@@ -126,30 +126,7 @@ class CalcProcReproduction:
Run the given parser on the downloaded calculation. If no parser is given,
do parser matching and use the respective parser.
"""
if
parser_name
is
not
None
:
parser
=
parser_dict
.
get
(
parser_name
)
else
:
parser
=
match_parser
(
self
.
mainfile
,
self
.
upload_files
)
assert
parser
is
not
None
,
'there is not parser matching %s'
%
self
.
mainfile
self
.
logger
=
self
.
logger
.
bind
(
parser
=
parser
.
name
)
# type: ignore
self
.
logger
.
info
(
'identified parser'
)
parser_backend
=
parser
.
run
(
self
.
upload_files
.
raw_file_object
(
self
.
mainfile
).
os_path
,
logger
=
self
.
logger
)
if
not
parser_backend
.
status
[
0
]
==
'ParseSuccess'
:
self
.
logger
.
error
(
'parsing was not successful'
,
status
=
parser_backend
.
status
)
parser_backend
.
openNonOverlappingSection
(
'section_entry_info'
)
parser_backend
.
addValue
(
'upload_id'
,
self
.
upload_id
)
parser_backend
.
addValue
(
'calc_id'
,
self
.
calc_id
)
parser_backend
.
addValue
(
'calc_hash'
,
"no hash"
)
parser_backend
.
addValue
(
'mainfile'
,
self
.
mainfile
)
parser_backend
.
addValue
(
'parser_name'
,
parser
.
__class__
.
__name__
)
parser_backend
.
closeNonOverlappingSection
(
'section_entry_info'
)
self
.
logger
.
info
(
'ran parser'
)
return
parser_backend
return
parse
(
self
.
mainfile
,
self
.
upload_files
,
parser_name
=
parser_name
,
logger
=
self
.
logger
)
def
normalize
(
self
,
normalizer
:
Union
[
str
,
Callable
],
parser_backend
:
LocalBackend
=
None
):
"""
...
...
@@ -158,28 +135,13 @@ class CalcProcReproduction:
if
parser_backend
is
None
:
parser_backend
=
self
.
parse
()
if
isinstance
(
normalizer
,
str
):
normalizer
=
next
(
normalizer_instance
for
normalizer_instance
in
normalizers
if
normalizer_instance
.
__class__
.
__name__
==
normalizer
)
assert
normalizer
is
not
None
,
'there is no normalizer %s'
%
str
(
normalizer
)
normalizer_instance
=
cast
(
Callable
,
normalizer
)(
parser_backend
)
logger
=
self
.
logger
.
bind
(
normalizer
=
normalizer_instance
.
__class__
.
__name__
)
self
.
logger
.
info
(
'identified normalizer'
)
normalizer_instance
.
normalize
(
logger
=
logger
)
self
.
logger
.
info
(
'ran normalizer'
)
return
parser_backend
return
normalize
(
parser_backend
=
parser_backend
,
normalizer
=
normalizer
,
logger
=
self
.
logger
)
def
normalize_all
(
self
,
parser_backend
:
LocalBackend
=
None
):
"""
Parse the downloaded calculation and run the whole normalizer chain.
"""
for
normalizer
in
normalizers
:
parser_backend
=
self
.
normalize
(
normalizer
,
parser_backend
=
parser_backend
)
return
parser_backend
return
normalize_all
(
parser_backend
=
parser_backend
,
logger
=
self
.
logger
)
@
cli
.
command
(
help
=
'Run processing locally.'
)
...
...
nomad/client/parse.py
0 → 100644
View file @
9c86f599
from
typing
import
Union
,
Callable
,
cast
import
os.path
import
ujson
import
click
import
sys
from
nomad
import
config
,
utils
,
files
from
nomad.parsing
import
LocalBackend
,
parser_dict
,
match_parser
from
nomad.normalizing
import
normalizers
from
nomad.datamodel
import
CalcWithMetadata
from
.__main__
import
cli
def
parse
(
mainfile
:
str
,
upload_files
:
Union
[
str
,
files
.
StagingUploadFiles
],
parser_name
:
str
=
None
,
logger
=
None
)
->
LocalBackend
:
"""
Run the given parser on the downloaded calculation. If no parser is given,
do parser matching and use the respective parser.
"""
if
logger
is
None
:
logger
=
utils
.
get_logger
(
__name__
)
if
parser_name
is
not
None
:
parser
=
parser_dict
.
get
(
parser_name
)
else
:
parser
=
match_parser
(
mainfile
,
upload_files
)
assert
parser
is
not
None
,
'there is not parser matching %s'
%
mainfile
logger
=
logger
.
bind
(
parser
=
parser
.
name
)
# type: ignore
logger
.
info
(
'identified parser'
)
if
isinstance
(
upload_files
,
str
):
mainfile_path
=
os
.
path
.
join
(
upload_files
,
mainfile
)
else
:
mainfile_path
=
upload_files
.
raw_file_object
(
mainfile
).
os_path
parser_backend
=
parser
.
run
(
mainfile_path
,
logger
=
logger
)
if
not
parser_backend
.
status
[
0
]
==
'ParseSuccess'
:
logger
.
error
(
'parsing was not successful'
,
status
=
parser_backend
.
status
)
parser_backend
.
openNonOverlappingSection
(
'section_entry_info'
)
parser_backend
.
addValue
(
'upload_id'
,
config
.
services
.
unavailable_value
)
parser_backend
.
addValue
(
'calc_id'
,
config
.
services
.
unavailable_value
)
parser_backend
.
addValue
(
'calc_hash'
,
"no hash"
)
parser_backend
.
addValue
(
'mainfile'
,
mainfile
)
parser_backend
.
addValue
(
'parser_name'
,
parser
.
__class__
.
__name__
)
parser_backend
.
closeNonOverlappingSection
(
'section_entry_info'
)
logger
.
info
(
'ran parser'
)
return
parser_backend
def
normalize
(
normalizer
:
Union
[
str
,
Callable
],
parser_backend
:
LocalBackend
=
None
,
logger
=
None
)
->
LocalBackend
:
if
logger
is
None
:
logger
=
utils
.
get_logger
(
__name__
)
if
isinstance
(
normalizer
,
str
):
normalizer
=
next
(
normalizer_instance
for
normalizer_instance
in
normalizers
if
normalizer_instance
.
__class__
.
__name__
==
normalizer
)
assert
normalizer
is
not
None
,
'there is no normalizer %s'
%
str
(
normalizer
)
normalizer_instance
=
cast
(
Callable
,
normalizer
)(
parser_backend
)
logger
=
logger
.
bind
(
normalizer
=
normalizer_instance
.
__class__
.
__name__
)
logger
.
info
(
'identified normalizer'
)
normalizer_instance
.
normalize
(
logger
=
logger
)
logger
.
info
(
'ran normalizer'
)
return
parser_backend
def
normalize_all
(
parser_backend
:
LocalBackend
=
None
,
logger
=
None
)
->
LocalBackend
:
"""
Parse the downloaded calculation and run the whole normalizer chain.
"""
for
normalizer
in
normalizers
:
parser_backend
=
normalize
(
normalizer
,
parser_backend
=
parser_backend
,
logger
=
logger
)
return
parser_backend
@
cli
.
command
(
help
=
'Run parsing and normalizing locally.'
,
name
=
'parse'
)
@
click
.
argument
(
'MAINFILE'
,
nargs
=
1
,
required
=
True
,
type
=
str
)
@
click
.
option
(
'--show-backend'
,
is_flag
=
True
,
default
=
False
,
help
=
'Print the backend data.'
)
@
click
.
option
(
'--show-metadata'
,
is_flag
=
True
,
default
=
False
,
help
=
'Print the extracted repo metadata.'
)
@
click
.
option
(
'--skip-normalizers'
,
is_flag
=
True
,
default
=
False
,
help
=
'Do not run the normalizer.'
)
def
_parse
(
mainfile
,
show_backend
,
show_metadata
,
skip_normalizers
):
utils
.
configure_logging
()
backend
=
parse
(
mainfile
,
'.'
)
if
not
skip_normalizers
:
normalize_all
(
backend
)
if
show_backend
:
backend
.
write_json
(
sys
.
stdout
,
pretty
=
True
)
if
show_metadata
:
metadata
=
CalcWithMetadata
()
metadata
.
apply_domain_metadata
(
backend
)
ujson
.
dump
(
metadata
.
to_dict
(),
sys
.
stdout
,
indent
=
4
)
nomad/datamodel/dft.py
View file @
9c86f599
...
...
@@ -89,17 +89,22 @@ class DFTCalcWithMetadata(CalcWithMetadata):
super
().
__init__
(
**
kwargs
)
def
apply_domain_metadata
(
self
,
backend
):
from
nomad.normalizing.system
import
normalized_atom_labels
logger
=
utils
.
get_logger
(
__name__
).
bind
(
upload_id
=
self
.
upload_id
,
calc_id
=
self
.
calc_id
,
mainfile
=
self
.
mainfile
)
self
.
code_name
=
backend
.
get_value
(
'program_name'
,
0
)
self
.
code_version
=
simplify_version
(
backend
.
get_value
(
'program_version'
,
0
))
try
:
self
.
code_version
=
simplify_version
(
backend
.
get_value
(
'program_version'
,
0
))
except
KeyError
:
self
.
code_version
=
config
.
services
.
unavailable_value
self
.
atoms
=
get_optional_backend_value
(
backend
,
'atom_labels'
,
'section_system'
,
logger
=
logger
)
if
hasattr
(
self
.
atoms
,
'tolist'
):
self
.
atoms
=
self
.
atoms
.
tolist
()
self
.
n_atoms
=
len
(
self
.
atoms
)
self
.
atoms
=
list
(
set
(
self
.
atoms
))
self
.
atoms
=
list
(
set
(
normalized_atom_labels
(
set
(
self
.
atoms
))
))
self
.
atoms
.
sort
()
self
.
crystal_system
=
get_optional_backend_value
(
...
...
nomad/normalizing/system.py
View file @
9c86f599
...
...
@@ -16,6 +16,7 @@ from typing import Any
import
ase
import
numpy
as
np
import
json
import
re
from
matid
import
SymmetryAnalyzer
from
matid.geometry
import
get_dimensionality
...
...
@@ -24,6 +25,23 @@ from nomad import utils, config
from
nomad.normalizing.normalizer
import
SystemBasedNormalizer
# use a regular expression to check atom labels; expression is build from list of
# all labels sorted desc to find Br and not B when searching for Br.
atom_label_re
=
re
.
compile
(
'|'
.
join
(
sorted
(
ase
.
data
.
chemical_symbols
,
key
=
lambda
x
:
len
(
x
),
reverse
=
True
)))
def
normalized_atom_labels
(
atom_labels
):
"""
Normalizes the given atom labels: they either are labels right away, or contain
additional numbers (to distinguish same species but different labels, see meta-info),
or we replace them with ase placeholder atom for unknown elements 'X'.
"""
return
[
ase
.
data
.
chemical_symbols
[
0
]
if
match
is
None
else
match
.
group
(
0
)
for
match
in
[
re
.
search
(
atom_label_re
,
atom_label
)
for
atom_label
in
atom_labels
]]
class
SystemNormalizer
(
SystemBasedNormalizer
):
"""
...
...
@@ -66,20 +84,25 @@ class SystemNormalizer(SystemBasedNormalizer):
# analyze atoms labels
atom_labels
=
get_value
(
'atom_labels'
,
nonp
=
True
)
if
atom_labels
is
not
None
:
atom_labels
=
normalized_atom_labels
(
atom_labels
)
atom_species
=
get_value
(
'atom_species'
,
nonp
=
True
)
if
atom_labels
is
None
and
atom_species
is
None
:
self
.
logger
.
error
(
'calculation has neither atom species nor labels'
)
return
# If there are no atom labels we create them from atom species data.
if
atom_labels
is
None
:
atom_labels
=
list
(
ase
.
data
.
chemical_symbols
[
species
]
for
species
in
atom_species
)
# At this point we should have atom labels. Check that each atom label in the atom
# labels list is a true atom label by checking if it is in the ASE list of atom labels.
if
not
all
(
label
in
ase
.
data
.
chemical_symbols
for
label
in
atom_labels
):
# Throw an error that the atom labels are poorly formated or there are unknown
# labels. Save first ten elemenets in logged error.
self
.
logger
.
error
(
'Atom labels cannot be recognized.'
,
atom_labels
=
atom_labels
[:
10
])
return
try
:
atom_labels
=
list
(
ase
.
data
.
chemical_symbols
[
species
]
for
species
in
atom_species
)
except
IndexError
:
self
.
logger
.
error
(
'calculation has atom species that are out of range'
)
return
self
.
_backend
.
addArrayValues
(
'atom_labels'
,
atom_labels
)
# At this point we should have atom labels.
try
:
atoms
=
ase
.
Atoms
(
symbols
=
atom_labels
)
chemical_symbols
=
list
(
atoms
.
get_chemical_symbols
())
...
...
@@ -91,8 +114,6 @@ class SystemNormalizer(SystemBasedNormalizer):
'cannot build ase atoms from atom labels'
,
atom_labels
=
atom_labels
[:
10
],
exc_info
=
e
,
error
=
str
(
e
))
raise
e
# Write labels. Rewrite if labels exist in backend already from parser.
self
.
_backend
.
addArrayValues
(
'atom_labels'
,
atom_labels
)
if
atom_species
is
None
:
atom_species
=
atoms
.
get_atomic_numbers
().
tolist
()
...
...
nomad/parsing/__init__.py
View file @
9c86f599
...
...
@@ -58,10 +58,11 @@ based on NOMAD-coe's *python-common* module.
:members:
"""
from
typing
import
Callable
,
IO
from
typing
import
Callable
,
IO
,
Union
import
magic
import
gzip
import
bz2
import
os.path
from
nomad
import
files
,
config
...
...
@@ -76,7 +77,7 @@ _compressions = {
}
def
match_parser
(
mainfile
:
str
,
upload_files
:
files
.
StagingUploadFiles
)
->
'Parser'
:
def
match_parser
(
mainfile
:
str
,
upload_files
:
Union
[
str
,
files
.
StagingUploadFiles
]
)
->
'Parser'
:
"""
Performs parser matching. This means it take the given mainfile and potentially
opens it with the given callback and tries to identify a parser that can parse
...
...
@@ -87,15 +88,21 @@ def match_parser(mainfile: str, upload_files: files.StagingUploadFiles) -> 'Pars
Arguments:
mainfile: The upload relative path to the mainfile
open: A function that allows to open a stream to the file
upload_files: Either a :class:`files.StagingUploadFiles` object or a directory name.
Directory name + mainfile needs to point to the file.
Returns: The parser, or None if no parser could be matched.
"""
with
upload_files
.
raw_file
(
mainfile
,
'rb'
)
as
f
:
if
isinstance
(
upload_files
,
str
):
mainfile_path
=
os
.
path
.
join
(
upload_files
,
mainfile
)
else
:
mainfile_path
=
upload_files
.
raw_file_object
(
mainfile
).
os_path
with
open
(
mainfile_path
,
'rb'
)
as
f
:
compression
,
open_compressed
=
_compressions
.
get
(
f
.
read
(
3
),
(
None
,
open
))
mainfile_path
=
upload_files
.
raw_file_object
(
mainfile
).
os_path
with
open_compressed
(
mainfile_path
,
'rb'
)
as
f
:
buffer
=
f
.
read
(
2048
)
with
open_compressed
(
mainfile_path
,
'rb'
)
as
c
f
:
buffer
=
c
f
.
read
(
2048
)
mime_type
=
magic
.
from_buffer
(
buffer
,
mime
=
True
)
for
parser
in
parsers
:
...
...
tests/test_normalizing.py
View file @
9c86f599
...
...
@@ -131,17 +131,22 @@ def test_normalizer_faulty_matid(caplog):
def
test_normalizer_single_string_atom_labels
(
caplog
):
""" Runs normalizer on ['Br1SiSiK'] expects error that it is formatted wrong."""
"""
Runs normalizer on ['Br1SiSiK'] expects error. Should replace the label with 'X' and
the numbers of postitions should not match the labels.
"""
backend
=
parse_file
(
single_string_atom_labels
)
run_normalize
(
backend
)
assert_log
(
caplog
,
'ERROR'
,
'
Atom labels cannot be recognized.
'
)
assert_log
(
caplog
,
'ERROR'
,
'
len of atom position does not match number of atoms
'
)
def
test_normalizer_unknown_atom_label
(
caplog
):
""" Runs normalizer on ['Br','Si','Si','Za'], expects Za throws an error"""
def
test_normalizer_unknown_atom_label
(
caplog
,
no_warn
):
""" Runs normalizer on ['Br','Si','Si','Za'], for normalizeation Za will be replaced,
but stays int the labels.
"""
backend
=
parse_file
(
unknown_atom_label
)
run_normalize
(
backend
)
assert
_log
(
caplog
,
'ERROR'
,
'A
tom
labels
cannot be recognized.'
)
assert
backend
.
get_value
(
'a
tom
_
labels
'
)[
3
]
==
'Za'
def
test_symmetry_classification_fcc
():
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment