Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
nomad-lab
nomad-FAIR
Commits
c1dae8ee
Commit
c1dae8ee
authored
Nov 25, 2019
by
Cuauhtemoc Salazar
Browse files
Springer normalizer update. Pipeline issues clean up
parent
584a2f39
Changes
4
Hide whitespace changes
Inline
Side-by-side
nomad-meta-info
@
6cc658ff
Compare
c3bb8b69
...
6cc658ff
Subproject commit
c3bb8b69771a3cc87ca500f80a182380175e09
cf
Subproject commit
6cc658ff3117dfccad9aace953d225385e59c5
cf
nomad/config.py
View file @
c1dae8ee
...
...
@@ -206,6 +206,7 @@ max_upload_size = 32 * (1024 ** 3)
springer_db_relative_path
=
'normalizing/data/SM_all08.db'
springer_db_path
=
os
.
path
.
join
(
os
.
path
.
dirname
(
os
.
path
.
abspath
(
__file__
)),
springer_db_relative_path
)
def
normalize_loglevel
(
value
,
default_level
=
logging
.
INFO
):
plain_value
=
value
if
plain_value
is
None
:
...
...
nomad/normalizing/system.py
View file @
c1dae8ee
...
...
@@ -42,9 +42,10 @@ atom_label_re = re.compile('|'.join(
springer_db_connection
=
None
def
open_springer_database
():
"""
Create a global connection to the Springer database in a way that
Create a global connection to the Springer database in a way that
each worker opens the database just once.
"""
global
springer_db_connection
...
...
@@ -54,10 +55,11 @@ def open_springer_database():
if
not
os
.
path
.
exists
(
db_file
):
utils
.
get_logger
(
__name__
).
error
(
'Springer database not found'
)
return
None
springer_db_connection
=
sqlite3
.
connect
(
db_file
)
springer_db_connection
=
sqlite3
.
connect
(
db_file
)
return
springer_db_connection
def
normalized_atom_labels
(
atom_labels
):
"""
Normalizes the given atom labels: they either are labels right away, or contain
...
...
@@ -68,26 +70,26 @@ def normalized_atom_labels(atom_labels):
ase
.
data
.
chemical_symbols
[
0
]
if
match
is
None
else
match
.
group
(
0
)
for
match
in
[
re
.
search
(
atom_label_re
,
atom_label
)
for
atom_label
in
atom_labels
]]
def
formula_normalizer
(
atoms
):
"""
Reads the chemical symbols in ase.atoms and returns a normalized formula.
Formula normalization is on the basis of atom counting,
e.g., Tc -> Tc100, SZn -> S50Zn50, Co2Nb -> Co67Nb33
e.g., Tc -> Tc100, SZn -> S50Zn50, Co2Nb -> Co67Nb33
"""
#
#
chem_symb
=
atoms
.
get_chemical_symbols
()
atoms_counter
=
Counter
(
chem_symb
)
# dictionary
atoms_counter
=
Counter
(
chem_symb
)
# dictionary
atoms_total
=
sum
(
atoms_counter
.
values
())
atoms_normed
=
[]
for
key
in
atoms_counter
.
keys
():
norm
=
str
(
round
(
100
*
atoms_counter
[
key
]
/
atoms_total
))
atoms_normed
.
append
(
key
+
norm
)
norm
=
str
(
round
(
100
*
atoms_counter
[
key
]
/
atoms_total
))
atoms_normed
.
append
(
key
+
norm
)
#
atoms_normed
.
sort
()
return
''
.
join
(
atoms_normed
)
class
SystemNormalizer
(
SystemBasedNormalizer
):
...
...
@@ -170,7 +172,7 @@ class SystemNormalizer(SystemBasedNormalizer):
if
atom_species
is
None
:
atom_species
=
atoms
.
get_atomic_numbers
().
tolist
()
self
.
_backend
.
addArrayValues
(
'atom_species'
,
atom_species
)
else
:
else
:
if
not
isinstance
(
atom_species
,
list
):
atom_species
=
[
atom_species
]
if
atom_species
!=
atoms
.
get_atomic_numbers
().
tolist
():
...
...
@@ -385,31 +387,31 @@ class SystemNormalizer(SystemBasedNormalizer):
self
.
_backend
.
addArrayValues
(
'equivalent_atoms_original'
,
orig_equivalent_atoms
)
self
.
_backend
.
closeSection
(
'section_original_system'
,
orig_gid
)
self
.
_backend
.
closeSection
(
'section_symmetry'
,
symmetry_gid
)
self
.
springer_classification
(
atoms
,
space_group_number
)
# Springer Normalizer
self
.
springer_classification
(
atoms
,
space_group_number
)
# Springer Normalizer
self
.
prototypes
(
prim_num
,
prim_wyckoff
,
space_group_number
)
self
.
_backend
.
closeSection
(
'section_symmetry'
,
symmetry_gid
)
def
springer_classification
(
self
,
atoms
,
space_group_number
):
# SPRINGER NORMALIZER
normalized_formula
=
formula_normalizer
(
atoms
)
#
normalized_formula
=
formula_normalizer
(
atoms
)
#
springer_db_connection
=
open_springer_database
()
if
springer_db_connection
is
None
:
if
springer_db_connection
is
None
:
return
cur
=
springer_db_connection
.
cursor
()
cur
=
springer_db_connection
.
cursor
()
# SQL QUERY
# SQL QUERY
# (this replaces the four queries done in the old 'classify4me_SM_normalizer.py')
cur
.
execute
(
"""
cur
.
execute
(
"""
SELECT
entry.entry_id,
entry.alphabetic_formula,
GROUP_CONCAT(DISTINCT compound_classes.compound_class_name),
GROUP_CONCAT(DISTINCT classification.classification_name)
GROUP_CONCAT(DISTINCT classification.classification_name)
FROM entry
LEFT JOIN entry_compound_class as ecc ON ecc.entry_nr = entry.entry_nr
LEFT JOIN compound_classes ON ecc.compound_class_nr = compound_classes.compound_class_nr
...
...
@@ -422,57 +424,57 @@ class SystemNormalizer(SystemBasedNormalizer):
"""
%
(
normalized_formula
,
space_group_number
))
results
=
cur
.
fetchall
()
# All SQL queries done
dbdict
=
{}
for
ituple
in
results
:
for
ituple
in
results
:
for
item
in
ituple
:
# 'spr' means 'springer'
spr_id
=
ituple
[
0
]
spr_aformula
=
ituple
[
1
]
# alphabetical formula
spr_aformula
=
ituple
[
1
]
# alphabetical formula
spr_url
=
'http://materials.springer.com/isp/crystallographic/docs/'
+
spr_id
spr_compound
=
ituple
[
2
].
split
(
','
)
# convert string to list
spr_classification
=
ituple
[
3
].
split
(
','
)
#
#
spr_compound
.
sort
()
spr_classification
.
sort
()
dbdict
[
spr_id
]
=
{
'spr_id'
:
spr_id
,
'spr_aformula'
:
spr_aformula
,
'spr_url'
:
spr_url
,
'spr_compound'
:
spr_compound
,
'spr_classification'
:
spr_classification
}
dbdict
[
spr_id
]
=
{
'spr_id'
:
spr_id
,
'spr_aformula'
:
spr_aformula
,
'spr_url'
:
spr_url
,
'spr_compound'
:
spr_compound
,
'spr_classification'
:
spr_classification
}
# SPRINGER's METAINFO UPDATE
# LAYOUT: Five sections under 'section_springer_material' for each material ID:
# SPRINGER's METAINFO UPDATE
# LAYOUT: Five sections under 'section_springer_material' for each material ID:
# id, alphabetical formula, url, compound_class, clasification.
# As per Markus/Luca's emails, we don't expose Springer bib references (Springer's paywall)
for
material
in
dbdict
.
values
():
self
.
_backend
.
openNonOverlappingSection
(
'section_springer_material'
)
for
material
in
dbdict
.
values
():
self
.
_backend
.
openNonOverlappingSection
(
'section_springer_material'
)
self
.
_backend
.
addValue
(
'springer_id'
,
material
[
'spr_id'
])
self
.
_backend
.
addValue
(
'springer_alphabetical_formula'
,
material
[
'spr_aformula'
])
self
.
_backend
.
addValue
(
'springer_alphabetical_formula'
,
material
[
'spr_aformula'
])
self
.
_backend
.
addValue
(
'springer_url'
,
material
[
'spr_url'
])
self
.
_backend
.
addArrayValues
(
'springer_compound_class'
,
material
[
'spr_compound'
])
self
.
_backend
.
addArrayValues
(
'springer_classification'
,
material
[
'spr_classification'
])
self
.
_backend
.
closeNonOverlappingSection
(
'section_springer_material'
)
self
.
_backend
.
closeNonOverlappingSection
(
'section_springer_material'
)
# CHECK if the springer_classification and springer_compound_class found for each springer_id match
# Check the 'springer_classification' and 'springer_compound_class' information
# found is the same for all springer_id's
dkeys
=
list
(
dbdict
.
keys
())
class_0
=
dbdict
[
dkeys
[
0
]][
'spr_classification'
]
comp_0
=
dbdict
[
spr_id
][
'spr_compound'
]
for
ii
in
range
(
1
,
len
(
dkeys
)):
class_test
=
class_0
==
dbdict
[
dkeys
[
ii
]][
'spr_classification'
]
comp_test
=
comp_0
==
dbdict
[
dkeys
[
ii
]][
'spr_compound'
]
if
(
class_test
or
comp_test
)
is
False
:
self
.
logger
.
warning
(
'Mismatch in Springer classification or compounds'
)
if
len
(
dkeys
)
!=
0
:
class_0
=
dbdict
[
dkeys
[
0
]][
'spr_classification'
]
comp_0
=
dbdict
[
spr_id
][
'spr_compound'
]
# compare 'class_0' and 'comp_0' against the rest
for
ii
in
range
(
1
,
len
(
dkeys
)):
class_test
=
(
class_0
==
dbdict
[
dkeys
[
ii
]][
'spr_classification'
])
comp_test
=
(
comp_0
==
dbdict
[
dkeys
[
ii
]][
'spr_compound'
])
if
(
class_test
or
comp_test
)
is
False
:
self
.
logger
.
warning
(
'Mismatch in Springer classification or compounds'
)
def
prototypes
(
self
,
atomSpecies
,
wyckoffs
,
spg_nr
):
try
:
norm_wyckoff
=
SystemNormalizer
.
get_normalized_wyckoff
(
atomSpecies
,
wyckoffs
)
...
...
tests/test_normalizing.py
View file @
c1dae8ee
...
...
@@ -205,31 +205,31 @@ def test_vasp_incar_system():
"""
backend
=
parse_file
(
vasp_parser
)
backend
=
run_normalize
(
backend
)
expected_value
=
'SrTiO3'
# material's formula in vasp.xml
expected_value
=
'SrTiO3'
# material's formula in vasp.xml
# backend_value = backend.get_value('x_vasp_unknown_incars') # OK
# backend_value = backend.get_value('x_vasp_atom_kind_refs') # OK
backend_value
=
backend
.
get_value
(
'x_vasp_incar_SYSTEM'
)
# OK
#backend_value = backend.get_value('x_vasp_unknown_incars') # OK
#backend_value = backend.get_value('x_vasp_atom_kind_refs') # OK
backend_value
=
backend
.
get_value
(
'x_vasp_incar_SYSTEM'
)
# OK
print
(
"backend_value: "
,
backend_value
)
assert
expected_value
==
backend_value
def
test_springer_normalizer
():
"""
Ensure the Springer normalizer works well with the VASP example.
Ensure the Springer normalizer works well with the VASP example.
"""
backend
=
parse_file
(
vasp_parser
)
backend
=
run_normalize
(
backend
)
backend_value
=
backend
.
get_value
(
'springer_url'
,
89
)
# with get_value('springer_id') fails.
expecte
d_value
=
'http://materials.springer.com/isp/crystallographic/docs/sd_1932539'
print
(
"backend_value: "
,
backend_value
)
assert
expected_value
==
backend_value
backen
d_value
=
backend
.
get_value
(
'springer_id'
,
89
)
expected_value
=
'sd_1932539'
assert
expected_value
==
backend_value
# FIXME: search for ID,
# also check NON empty for the others
# avois storing single use variables
backend_value
=
backend
.
get_value
(
'springer_alphabetical_formula'
,
89
)
expected_value
=
'O3SrTi'
assert
expected_value
==
backend_value
# TODO: add test fo rptototypes
\ No newline at end of file
backend_value
=
backend
.
get_value
(
'springer_url'
,
89
)
expected_value
=
'http://materials.springer.com/isp/crystallographic/docs/sd_1932539'
assert
expected_value
==
backend_value
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment