Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
nomad-lab
nomad-FAIR
Commits
827e261a
Commit
827e261a
authored
Dec 20, 2019
by
Markus Scheidgen
Browse files
Merge remote-tracking branch 'origin/normalizer' into dev-merge.
parents
7b0a1bea
f87ba5e2
Changes
10
Expand all
Hide whitespace changes
Inline
Side-by-side
.gitlab-ci.yml
View file @
827e261a
...
...
@@ -92,8 +92,10 @@ tests:
NOMAD_MONGO_HOST
:
mongo
NOMAD_KEYCLOAK_CLIENT_SECRET
:
${CI_KEYCLOAK_TEST_CLIENT_SECRET}
NOMAD_KEYCLOAK_PASSWORD
:
${CI_KEYCLOAK_ADMIN_PASSWORD}
NOMAD_SPRINGER_DB_PATH
:
/nomad/fairdi/db/data/springer.db
script
:
-
cd /app
-
ls /builds
-
python -m pytest --cov=nomad -sv tests
except
:
refs
:
...
...
nomad/config.py
View file @
827e261a
...
...
@@ -207,6 +207,10 @@ max_upload_size = 32 * (1024 ** 3)
raw_file_strip_cutoff
=
1000
springer_db_relative_path
=
'normalizing/data/SM_all08.db'
springer_db_path
=
os
.
path
.
join
(
os
.
path
.
dirname
(
os
.
path
.
abspath
(
__file__
)),
springer_db_relative_path
)
def
normalize_loglevel
(
value
,
default_level
=
logging
.
INFO
):
plain_value
=
value
if
plain_value
is
None
:
...
...
nomad/normalizing/__init__.py
View file @
827e261a
...
...
@@ -33,14 +33,15 @@ There is one ABC for all normalizer:
from
typing
import
List
,
Any
,
Iterable
,
Type
from
.normalizer
import
Normalizer
from
.system
import
SystemNormalizer
from
.dos
import
DosNormalizer
from
.fhiaims
import
FhiAimsBaseNormalizer
from
.normalizer
import
Normalizer
from
.optimade
import
OptimadeNormalizer
from
.system
import
SystemNormalizer
normalizers
:
Iterable
[
Type
[
Normalizer
]]
=
[
SystemNormalizer
,
OptimadeNormalizer
,
FhiAimsBaseNormalizer
FhiAimsBaseNormalizer
,
DosNormalizer
]
nomad/normalizing/data/.gitignore
View file @
827e261a
SM_all08.db
\ No newline at end of file
SM_all08.db
nomad/normalizing/dos.py
0 → 100644
View file @
827e261a
# Copyright 2018 Markus Scheidgen
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an"AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
.normalizer
import
Normalizer
import
numpy
as
np
class
DosNormalizer
(
Normalizer
):
def
normalize
(
self
,
logger
=
None
)
->
None
:
if
logger
is
not
None
:
self
.
logger
=
logger
.
bind
(
normalizer
=
self
.
__class__
.
__name__
)
# 'scc': single_configuration_calculation
section_scc_indices
=
self
.
_backend
.
get_sections
(
'section_single_configuration_calculation'
)
for
scc_index
in
section_scc_indices
:
section_dos_indices
=
self
.
_backend
.
get_sections
(
'section_dos'
,
scc_index
)
for
dos_index
in
section_dos_indices
:
try
:
dos
=
self
.
_backend
.
get_value
(
'dos_values'
,
dos_index
)
# a numpy.ndarray
except
KeyError
:
# section dos without doc_values
continue
system_index
=
self
.
_backend
.
get_value
(
'single_configuration_calculation_to_system_ref'
,
scc_index
)
atom_positions
=
self
.
_backend
.
get_value
(
'atom_positions'
,
system_index
)
lattice_vectors
=
self
.
_backend
.
get_value
(
'lattice_vectors'
,
system_index
)
number_of_atoms
=
np
.
shape
(
atom_positions
)[
0
]
unit_cell_volume
=
np
.
linalg
.
det
(
lattice_vectors
)
# Final quantities
dos_normed
=
dos
/
(
number_of_atoms
*
unit_cell_volume
)
# Add quantities to NOMAD's Metainfo
self
.
_backend
.
addArrayValues
(
'dos_values_normalized'
,
dos_normed
,
dos_index
)
nomad/normalizing/system.py
View file @
827e261a
...
...
@@ -12,11 +12,14 @@
# See the License for the specific language governing permissions and
# limitations under the License.
from
collections
import
Counter
from
typing
import
Any
import
ase
import
numpy
as
np
import
json
import
re
import
os
import
sqlite3
import
functools
import
fractions
...
...
@@ -28,13 +31,34 @@ from nomadcore.structure_types import structure_types_by_spacegroup as str_types
from
nomad
import
utils
,
config
from
nomad.normalizing.normalizer
import
SystemBasedNormalizer
# use a regular expression to check atom labels; expression is build from list of
# all labels sorted desc to find Br and not B when searching for Br.
atom_label_re
=
re
.
compile
(
'|'
.
join
(
sorted
(
ase
.
data
.
chemical_symbols
,
key
=
lambda
x
:
len
(
x
),
reverse
=
True
)))
springer_db_connection
=
None
def
open_springer_database
():
"""
Create a global connection to the Springer database in a way that
each worker opens the database just once.
"""
global
springer_db_connection
if
springer_db_connection
is
None
:
# filepath definition in 'nomad-FAIR/nomad/config.py'
db_file
=
config
.
springer_db_path
if
not
os
.
path
.
exists
(
db_file
):
utils
.
get_logger
(
__name__
).
error
(
'Springer database not found'
)
return
None
springer_db_connection
=
sqlite3
.
connect
(
db_file
,
check_same_thread
=
False
,
uri
=
True
)
# we lift the thread check because we share the connection among workers
# 'uri=True': open a database in read-only mode
return
springer_db_connection
def
normalized_atom_labels
(
atom_labels
):
"""
Normalizes the given atom labels: they either are labels right away, or contain
...
...
@@ -46,6 +70,26 @@ def normalized_atom_labels(atom_labels):
for
match
in
[
re
.
search
(
atom_label_re
,
atom_label
)
for
atom_label
in
atom_labels
]]
def
formula_normalizer
(
atoms
):
"""
Reads the chemical symbols in ase.atoms and returns a normalized formula.
Formula normalization is on the basis of atom counting,
e.g., Tc -> Tc100, SZn -> S50Zn50, Co2Nb -> Co67Nb33
"""
#
chem_symb
=
atoms
.
get_chemical_symbols
()
atoms_counter
=
Counter
(
chem_symb
)
# dictionary
atoms_total
=
sum
(
atoms_counter
.
values
())
atoms_normed
=
[]
for
key
in
atoms_counter
.
keys
():
norm
=
str
(
round
(
100
*
atoms_counter
[
key
]
/
atoms_total
))
atoms_normed
.
append
(
key
+
norm
)
#
atoms_normed
.
sort
()
return
''
.
join
(
atoms_normed
)
class
SystemNormalizer
(
SystemBasedNormalizer
):
"""
...
...
@@ -345,11 +389,97 @@ class SystemNormalizer(SystemBasedNormalizer):
self
.
_backend
.
addArrayValues
(
'wyckoff_letters_original'
,
orig_wyckoff
)
self
.
_backend
.
addArrayValues
(
'equivalent_atoms_original'
,
orig_equivalent_atoms
)
self
.
_backend
.
closeSection
(
'section_original_system'
,
orig_gid
)
self
.
_backend
.
closeSection
(
'section_symmetry'
,
symmetry_gid
)
self
.
springer_classification
(
atoms
,
space_group_number
)
# Springer Normalizer
self
.
prototypes
(
prim_num
,
prim_wyckoff
,
space_group_number
)
self
.
_backend
.
closeSection
(
'section_symmetry'
,
symmetry_gid
)
def
springer_classification
(
self
,
atoms
,
space_group_number
):
# SPRINGER NORMALIZER
normalized_formula
=
formula_normalizer
(
atoms
)
#
springer_db_connection
=
open_springer_database
()
if
springer_db_connection
is
None
:
return
cur
=
springer_db_connection
.
cursor
()
# SQL QUERY
# (this replaces the four queries done in the old 'classify4me_SM_normalizer.py')
cur
.
execute
(
"""
SELECT
entry.entry_id,
entry.alphabetic_formula,
GROUP_CONCAT(DISTINCT compound_classes.compound_class_name),
GROUP_CONCAT(DISTINCT classification.classification_name)
FROM entry
LEFT JOIN entry_compound_class as ecc ON ecc.entry_nr = entry.entry_nr
LEFT JOIN compound_classes ON ecc.compound_class_nr = compound_classes.compound_class_nr
LEFT JOIN entry_classification as ec ON ec.entry_nr = entry.entry_nr
LEFT JOIN classification ON ec.classification_nr = classification.classification_nr
LEFT JOIN entry_reference as er ON er.entry_nr = entry.entry_nr
LEFT JOIN reference ON reference.reference_nr = er.entry_nr
WHERE entry.normalized_formula = ( %r ) and entry.space_group_number = '%d'
GROUP BY entry.entry_id;
"""
%
(
normalized_formula
,
space_group_number
))
results
=
cur
.
fetchall
()
# 'results' is a list of tuples, i.e. '[(a,b,c,d), ..., (a,b,c,d)]'
# All SQL queries done
# Storing 'results' in a dictionary
dbdict
=
{}
for
ituple
in
results
:
# 'spr' means 'springer'
spr_id
=
ituple
[
0
]
spr_aformula
=
ituple
[
1
]
# alphabetical formula
spr_url
=
'http://materials.springer.com/isp/crystallographic/docs/'
+
spr_id
spr_compound
=
ituple
[
2
].
split
(
','
)
# split to convert string to list
spr_classification
=
ituple
[
3
].
split
(
','
)
#
spr_compound
.
sort
()
spr_classification
.
sort
()
#
dbdict
[
spr_id
]
=
{
'spr_id'
:
spr_id
,
'spr_aformula'
:
spr_aformula
,
'spr_url'
:
spr_url
,
'spr_compound'
:
spr_compound
,
'spr_classification'
:
spr_classification
}
# =============
# SPRINGER's METAINFO UPDATE
# LAYOUT: Five sections under 'section_springer_material' for each material ID:
# id, alphabetical formula, url, compound_class, clasification.
# As per Markus/Luca's emails, we don't expose Springer bib references (Springer's paywall)
for
material
in
dbdict
.
values
():
self
.
_backend
.
openNonOverlappingSection
(
'section_springer_material'
)
self
.
_backend
.
addValue
(
'springer_id'
,
material
[
'spr_id'
])
self
.
_backend
.
addValue
(
'springer_alphabetical_formula'
,
material
[
'spr_aformula'
])
self
.
_backend
.
addValue
(
'springer_url'
,
material
[
'spr_url'
])
self
.
_backend
.
addArrayValues
(
'springer_compound_class'
,
material
[
'spr_compound'
])
self
.
_backend
.
addArrayValues
(
'springer_classification'
,
material
[
'spr_classification'
])
self
.
_backend
.
closeNonOverlappingSection
(
'section_springer_material'
)
# Check the 'springer_classification' and 'springer_compound_class' information
# found is the same for all springer_id's
dkeys
=
list
(
dbdict
.
keys
())
if
len
(
dkeys
)
!=
0
:
class_0
=
dbdict
[
dkeys
[
0
]][
'spr_classification'
]
comp_0
=
dbdict
[
spr_id
][
'spr_compound'
]
# compare 'class_0' and 'comp_0' against the rest
for
ii
in
range
(
1
,
len
(
dkeys
)):
class_test
=
(
class_0
==
dbdict
[
dkeys
[
ii
]][
'spr_classification'
])
comp_test
=
(
comp_0
==
dbdict
[
dkeys
[
ii
]][
'spr_compound'
])
if
(
class_test
or
comp_test
)
is
False
:
self
.
logger
.
warning
(
'Mismatch in Springer classification or compounds'
)
def
prototypes
(
self
,
atomSpecies
,
wyckoffs
,
spg_nr
):
try
:
norm_wyckoff
=
SystemNormalizer
.
get_normalized_wyckoff
(
atomSpecies
,
wyckoffs
)
...
...
ops/helm/nomad/templates/nomad-configmap.yml
View file @
827e261a
...
...
@@ -60,3 +60,5 @@ data:
client_id: "{{ .Values.keycloak.clientId }}"
client_secret: "*"
password: "*"
springer_db_path: "{{ .Values.springerDbPath }}"
ops/helm/nomad/values.yaml
View file @
827e261a
...
...
@@ -150,3 +150,5 @@ volumes:
## Everything else
# The domain configuration, currently there is DFT and EMS
domain
:
DFT
springerDbPath
:
/nomad/fairdi/db/data/springer.db
tests/data/parsers/vasp/vasp_dos.xml
0 → 100644
View file @
827e261a
This diff is collapsed.
Click to expand it.
tests/test_normalizing.py
View file @
827e261a
...
...
@@ -13,6 +13,7 @@
# limitations under the License.
import
pytest
import
numpy
as
np
from
nomad
import
datamodel
,
config
from
nomad.parsing
import
LocalBackend
...
...
@@ -37,6 +38,12 @@ unknown_atom_label = (
fcc_symmetry
=
(
'parsers/template'
,
'tests/data/normalizers/fcc_crystal_structure.json'
)
vasp_parser
=
(
'parsers/vasp'
,
'tests/data/parsers/vasp/vasp.xml'
)
vasp_parser_dos
=
(
'parsers/vasp'
,
'tests/data/parsers/vasp/vasp_dos.xml'
)
glucose_atom_labels
=
(
'parsers/template'
,
'tests/data/normalizers/glucose_atom_labels.json'
)
...
...
@@ -194,3 +201,55 @@ def test_reduced_chemical_formula():
expected_red_chem_formula
=
'C6H12O6'
reduced_chemical_formula
=
backend
.
get_value
(
'chemical_composition_bulk_reduced'
)
assert
expected_red_chem_formula
==
reduced_chemical_formula
def
test_vasp_incar_system
():
"""
Ensure we can test an incar value in the VASP example
"""
backend
=
parse_file
(
vasp_parser
)
backend
=
run_normalize
(
backend
)
expected_value
=
'SrTiO3'
# material's formula in vasp.xml
# backend_value = backend.get_value('x_vasp_unknown_incars') # OK
# backend_value = backend.get_value('x_vasp_atom_kind_refs') # OK
backend_value
=
backend
.
get_value
(
'x_vasp_incar_SYSTEM'
)
# OK
print
(
"backend_value: "
,
backend_value
)
assert
expected_value
==
backend_value
def
test_springer_normalizer
():
"""
Ensure the Springer normalizer works well with the VASP example.
"""
backend
=
parse_file
(
vasp_parser
)
backend
=
run_normalize
(
backend
)
backend_value
=
backend
.
get_value
(
'springer_id'
,
89
)
expected_value
=
'sd_1932539'
assert
expected_value
==
backend_value
backend_value
=
backend
.
get_value
(
'springer_alphabetical_formula'
,
89
)
expected_value
=
'O3SrTi'
assert
expected_value
==
backend_value
backend_value
=
backend
.
get_value
(
'springer_url'
,
89
)
expected_value
=
'http://materials.springer.com/isp/crystallographic/docs/sd_1932539'
assert
expected_value
==
backend_value
def
test_dos_normalizer
():
"""
Ensure the DOS normalizer acted on the DOS values. We take a VASP example.
"""
backend
=
parse_file
(
vasp_parser_dos
)
backend
=
run_normalize
(
backend
)
# Check if 'dos_values' were indeed normalized
# 'dvn' stands for 'dos_values_normalized'
backend_dvn
=
backend
.
get_value
(
'dos_values_normalized'
,
0
)
last_value
=
backend_dvn
[
0
,
-
1
]
expected
=
1.7362195274239454e+47
# Compare floats properly with numpy (delta tolerance involved)
assert
np
.
allclose
(
last_value
,
expected
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment