Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
nomad-lab
normalizer-prototypes
Commits
868d8321
Commit
868d8321
authored
Jul 02, 2017
by
Daria M. Tomecka
Browse files
adding corrected version that classifies the data properly
parent
a5b23e63
Changes
1
Hide whitespace changes
Inline
Side-by-side
normalizer/normalizer-prototypes/classify4me_prototypes.py
View file @
868d8321
#! /usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Includes function that reads json files and classifies the stuctures
included on the basis of encyclopedia and the calculated in the preprocessor
space_group and normalized_wyckoff, and returns a dictionary with key - json
file name and value - classification_name (with labels in the same format as
Reads calculations and classifies their structures on the basis of prototypes
and the space_group and normalized_wyckoff, and adds labels to the calculatiojs
- classification_name (with labels in the same format as
in the read_prototypes function).
"""
...
...
@@ -18,12 +17,15 @@ __date__ = "18/05/17"
import
sys
import
ase.io
from
ase.data
import
chemical_symbols
import
json
import
numpy
as
np
import
time
import
datetime
import
os
,
os
.
path
import
logging
import
functools
import
fractions
import
setup_paths
#from nomad_sim.wrappers import get_json_list
...
...
@@ -59,6 +61,7 @@ import logging
atomSpecies
=
None
#atomSpecies = atom_species
cell
=
None
def
get_normalized_wyckoff
(
atomic_number
,
wyckoff
):
"""Returns a normalized Wyckoff sequence for the given atomic numbers and
...
...
@@ -69,8 +72,10 @@ def get_normalized_wyckoff(atomic_number, wyckoff):
for
nr
in
atomic_number
:
atomCount
[
nr
]
=
atomCount
.
get
(
nr
,
0
)
+
1
wycDict
=
{}
#logging.error("atomic_number: %s, wyckoff: %s", atomic_number, wyckoff)
for
i
,
wk
in
enumerate
(
wyckoff
):
oldVal
=
wycDict
.
get
(
wk
,
{})
#print("i:",i, "wyckoff", wyckoff, "wk", wk)
nr
=
atomic_number
[
i
]
oldVal
[
nr
]
=
oldVal
.
get
(
nr
,
0
)
+
1
wycDict
[
wk
]
=
oldVal
...
...
@@ -110,26 +115,7 @@ def get_normalized_wyckoff(atomic_number, wyckoff):
if
c
!=
0
:
return
c
return
0
sortedAt
=
list
(
atomCount
.
keys
())
sortedAt
.
sort
(
key
=
functools
.
cmp_to_key
(
compareAtNr
))
standardAtomNames
=
{}
for
i
,
at
in
enumerate
(
sortedAt
):
standardAtomNames
[
at
]
=
(
"X_%d"
%
i
)
standardWyc
=
{}
for
wk
,
ats
in
wycDict
.
items
():
stdAts
=
{}
for
at
,
count
in
ats
.
items
():
stdAts
[
standardAtomNames
[
at
]]
=
count
standardWyc
[
wk
]
=
stdAts
if
standardWyc
:
if
(
c
!=
0
):
return
c
for
wk
in
sortedWyc
:
p
=
wycDict
[
wk
]
c
=
cmpp
(
p
.
get
(
at1
,
0
),
p
.
get
(
at2
,
0
))
if
c
!=
0
:
return
c
return
0
sortedAt
=
list
(
atomCount
.
keys
())
sortedAt
.
sort
(
key
=
functools
.
cmp_to_key
(
compareAtNr
))
standardAtomNames
=
{}
...
...
@@ -170,7 +156,7 @@ def get_structure_type(space_group, norm_wyckoff):
#if current_bravais_lattice == bravais_lattice \
# and current_wyckoffs == wyckoff_letters_compact: current_norm_wyckoffs = type_description.get('normalized_wyckoff')
current_norm_wyckoffs
=
type_description
.
get
(
"normalized_wyckoff"
)
current_norm_wyckoffs
=
type_description
.
get
(
"normalized_wyckoff
_spg
"
)
if
current_norm_wyckoffs
and
current_norm_wyckoffs
==
norm_wyckoff
:
structure_type_info
=
type_description
break
...
...
@@ -185,13 +171,13 @@ def get_structure_type(space_group, norm_wyckoff):
def
_structure_type_info
(
self
):
"""Known structure types"""
return
structure
.
get_structure_type
(
return
get_structure_type
(
self
.
space_group
,
self
.
normalized_wyckoff
)
def
toAtomNr
(
str
):
def
toAtomNr
(
str
ing
):
"returns the atom number of the given symbol"
baseStr
=
str
[:
3
].
title
()
baseStr
=
str
ing
[:
3
].
title
()
if
baseStr
.
startswith
(
"Uu"
)
and
baseStr
in
chemical_symbols
[
1
:]:
return
chemical_symbols
.
index
(
baseStr
)
if
baseStr
[:
2
]
in
chemical_symbols
[
1
:]:
...
...
@@ -201,6 +187,39 @@ def toAtomNr(str):
else
:
return
0
def
dictToNarray
(
dictValue
):
v
=
dictValue
[
'flatData'
]
return
np
.
reshape
(
np
.
asarray
(
v
),
dictValue
[
'shape'
])
def
protoNormalizeWycoff
(
protoDict
):
"""recalculates the normalized wyckoff values for the given prototype dictionary"""
cell
=
np
.
asarray
(
protoDict
[
'lattice_vectors'
])
atomSpecies
=
[
toAtomNr
(
at
)
for
at
in
protoDict
[
'atom_labels'
]]
atomPos
=
np
.
asarray
(
protoDict
[
'atom_positions'
])
symm
=
systemToSpg
(
cell
,
atomSpecies
,
atomPos
)
wyckoffs
=
symm
.
get
(
"wyckoffs"
)
norm_wyckoff
=
get_normalized_wyckoff
(
atomSpecies
,
wyckoffs
)
return
norm_wyckoff
def
updatePrototypesWyckoff
(
protos
):
for
sp
,
pts
in
protos
.
items
():
for
protoDict
in
pts
:
try
:
wy
=
protoNormalizeWycoff
(
protoDict
)
protoDict
[
'normalized_wyckoff_spg'
]
=
wy
except
:
logging
.
exception
(
"Failed to compute normalized wyckoffs for %s"
,
protoDict
)
def
systemToSpg
(
cell
,
atomSpecies
,
atomPos
):
"""uses spg to calculate the symmetry of the given system"""
acell
=
cell
*
1.0e10
cellInv
=
np
.
linalg
.
inv
(
cell
)
symm
=
spglib
.
get_symmetry_dataset
((
acell
,
np
.
dot
(
atomPos
,
cellInv
),
atomSpecies
),
0.002
,
-
1
)
# use m instead of Angstrom?
#symm = spglib.get_symmetry_dataset(filename)
# sym = spglib.get_symmetry_dataset((number,wyckoff, pearsons_symbol)
return
symm
def
classify_by_norm_wyckoff
(
sectionSystem
):
try
:
...
...
@@ -216,7 +235,23 @@ def classify_by_norm_wyckoff(sectionSystem):
###
#atomic_number = atom_species
#as in the normalized version
cell
=
None
conf
=
sectionSystem
lab
=
conf
.
get
(
"atom_labels"
,
None
)
##periodicDirs = conf.get("configuration_periodic_dimensions", periodicDirs)
atomSpecies
=
[
toAtomNr
(
l
)
for
l
in
lab
[
0
][
'flatData'
]]
#print (atomSpecies)
newCell
=
conf
.
get
(
"simulation_cell"
)
if
newCell
:
cell
=
dictToNarray
(
newCell
)
symm
=
None
#print("***full:",cell)
#acell = cell.reshape(3,3)
atomPos
=
dictToNarray
(
conf
.
get
(
"atom_positions"
)[
0
])
symm
=
systemToSpg
(
cell
,
atomSpecies
,
atomPos
)
wyckoffs
=
symm
.
get
(
"wyckoffs"
)
spg_nr
=
symm
.
get
(
"number"
)
#norm_wyckoff = get_normalized_wyckoff
# preprocessor = list(results.values())
...
...
@@ -226,24 +261,13 @@ def classify_by_norm_wyckoff(sectionSystem):
# norm_wyckoff = preprocessor["normalized_wyckoff"]
###
conf
=
sectionSystem
lab
=
conf
.
get
(
"atom_labels"
,
None
)
periodicDirs
=
conf
.
get
(
"configuration_periodic_dimensions"
,
periodicDirs
)
atomSpecies
=
[
toAtomNr
(
l
)
for
l
in
lab
]
symm
=
None
symm
=
spglib
.
get_symmetry_dataset
(
filename
)
# sym = spglib.get_symmetry_dataset((number,wyckoff, pearsons_symbol)
wyckoffs
=
str
(
symm
.
get
(
"wyckoffs"
))
spg_nr
=
symm
.
get
(
"number"
)
# pearson - symm.get("xxx")
#space_group = symmetry_dataset["number"]
updatePrototypesWyckoff
(
str_types_by_spg
)
###
norm_wyckoff
=
get_normalized_wyckoff
(
atomSpecies
,
wyckoffs
)
protoDict
=
structure
.
get_structure_type
(
spg_nr
,
norm_wyckoff
)
protoDict
=
get_structure_type
(
spg_nr
,
norm_wyckoff
)
if
protoDict
is
None
:
proto
=
"%d-_"
%
spg_nr
...
...
@@ -255,7 +279,8 @@ def classify_by_norm_wyckoff(sectionSystem):
proto
=
'%d-%s-%s'
%
(
spg_nr
,
protoDict
.
get
(
"Prototype"
,
"-"
),
protoDict
.
get
(
"Pearsons Symbol"
,
"-"
))
return
proto
except
:
logging
.
exception
(
"failure while computing for %r"
,
json_file_name
)
#logging.exception("failure while computing for %r",json_file_name)
logging
.
exception
(
"failure while computing for that example"
)
return
None
...
...
@@ -280,9 +305,9 @@ def main():
break
label
=
classify_by_norm_wyckoff
(
sectSys
)
if
label
:
backend
.
openContext
(
sectSys
.
uri
)
backend
.
openContext
(
sectSys
[
'
uri
'
]
)
backend
.
addValue
(
"prototype_label"
,
label
)
backend
.
closeContext
(
sectSys
.
uri
)
backend
.
closeContext
(
sectSys
[
'
uri
'
]
)
if
__name__
==
'__main__'
:
main
()
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment