Commit 868d8321 authored by Daria M. Tomecka's avatar Daria M. Tomecka
Browse files

adding corrected version that classifies the data properly

parent a5b23e63
#! /usr/bin/env python3 #! /usr/bin/env python3
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
""" """
Includes function that reads json files and classifies the stuctures Reads calculations and classifies their structures on the basis of prototypes
included on the basis of encyclopedia and the calculated in the preprocessor and the space_group and normalized_wyckoff, and adds labels to the calculatiojs
space_group and normalized_wyckoff, and returns a dictionary with key - json - classification_name (with labels in the same format as
file name and value - classification_name (with labels in the same format as
in the read_prototypes function). in the read_prototypes function).
""" """
...@@ -18,12 +17,15 @@ __date__ = "18/05/17" ...@@ -18,12 +17,15 @@ __date__ = "18/05/17"
import sys import sys
import ase.io import ase.io
from ase.data import chemical_symbols
import json import json
import numpy as np import numpy as np
import time import time
import datetime import datetime
import os, os.path import os, os.path
import logging import logging
import functools
import fractions
import setup_paths import setup_paths
#from nomad_sim.wrappers import get_json_list #from nomad_sim.wrappers import get_json_list
...@@ -59,6 +61,7 @@ import logging ...@@ -59,6 +61,7 @@ import logging
atomSpecies = None atomSpecies = None
#atomSpecies = atom_species #atomSpecies = atom_species
cell = None
def get_normalized_wyckoff(atomic_number, wyckoff): def get_normalized_wyckoff(atomic_number, wyckoff):
"""Returns a normalized Wyckoff sequence for the given atomic numbers and """Returns a normalized Wyckoff sequence for the given atomic numbers and
...@@ -69,8 +72,10 @@ def get_normalized_wyckoff(atomic_number, wyckoff): ...@@ -69,8 +72,10 @@ def get_normalized_wyckoff(atomic_number, wyckoff):
for nr in atomic_number: for nr in atomic_number:
atomCount[nr] = atomCount.get(nr, 0) + 1 atomCount[nr] = atomCount.get(nr, 0) + 1
wycDict = {} wycDict = {}
#logging.error("atomic_number: %s, wyckoff: %s", atomic_number, wyckoff)
for i, wk in enumerate(wyckoff): for i, wk in enumerate(wyckoff):
oldVal = wycDict.get(wk, {}) oldVal = wycDict.get(wk, {})
#print("i:",i, "wyckoff", wyckoff, "wk", wk)
nr = atomic_number[i] nr = atomic_number[i]
oldVal[nr] = oldVal.get(nr, 0) + 1 oldVal[nr] = oldVal.get(nr, 0) + 1
wycDict[wk] = oldVal wycDict[wk] = oldVal
...@@ -110,26 +115,7 @@ def get_normalized_wyckoff(atomic_number, wyckoff): ...@@ -110,26 +115,7 @@ def get_normalized_wyckoff(atomic_number, wyckoff):
if c != 0: if c != 0:
return c return c
return 0 return 0
sortedAt = list(atomCount.keys())
sortedAt.sort(key=functools.cmp_to_key(compareAtNr))
standardAtomNames = {}
for i, at in enumerate(sortedAt):
standardAtomNames[at] = ("X_%d" % i)
standardWyc = {}
for wk, ats in wycDict.items():
stdAts = {}
for at, count in ats.items():
stdAts[standardAtomNames[at]] = count
standardWyc[wk] = stdAts
if standardWyc:
if (c != 0):
return c
for wk in sortedWyc:
p = wycDict[wk]
c = cmpp(p.get(at1, 0), p.get(at2, 0))
if c != 0:
return c
return 0
sortedAt = list(atomCount.keys()) sortedAt = list(atomCount.keys())
sortedAt.sort(key=functools.cmp_to_key(compareAtNr)) sortedAt.sort(key=functools.cmp_to_key(compareAtNr))
standardAtomNames = {} standardAtomNames = {}
...@@ -170,7 +156,7 @@ def get_structure_type(space_group, norm_wyckoff): ...@@ -170,7 +156,7 @@ def get_structure_type(space_group, norm_wyckoff):
#if current_bravais_lattice == bravais_lattice \ #if current_bravais_lattice == bravais_lattice \
# and current_wyckoffs == wyckoff_letters_compact: current_norm_wyckoffs = type_description.get('normalized_wyckoff') # and current_wyckoffs == wyckoff_letters_compact: current_norm_wyckoffs = type_description.get('normalized_wyckoff')
current_norm_wyckoffs = type_description.get("normalized_wyckoff") current_norm_wyckoffs = type_description.get("normalized_wyckoff_spg")
if current_norm_wyckoffs and current_norm_wyckoffs == norm_wyckoff: if current_norm_wyckoffs and current_norm_wyckoffs == norm_wyckoff:
structure_type_info = type_description structure_type_info = type_description
break break
...@@ -185,13 +171,13 @@ def get_structure_type(space_group, norm_wyckoff): ...@@ -185,13 +171,13 @@ def get_structure_type(space_group, norm_wyckoff):
def _structure_type_info(self): def _structure_type_info(self):
"""Known structure types""" """Known structure types"""
return structure.get_structure_type( return get_structure_type(
self.space_group, self.space_group,
self.normalized_wyckoff) self.normalized_wyckoff)
def toAtomNr(str): def toAtomNr(string):
"returns the atom number of the given symbol" "returns the atom number of the given symbol"
baseStr = str[:3].title() baseStr = string[:3].title()
if baseStr.startswith("Uu") and baseStr in chemical_symbols[1:]: if baseStr.startswith("Uu") and baseStr in chemical_symbols[1:]:
return chemical_symbols.index(baseStr) return chemical_symbols.index(baseStr)
if baseStr[:2] in chemical_symbols[1:]: if baseStr[:2] in chemical_symbols[1:]:
...@@ -201,6 +187,39 @@ def toAtomNr(str): ...@@ -201,6 +187,39 @@ def toAtomNr(str):
else: else:
return 0 return 0
def dictToNarray(dictValue):
v=dictValue['flatData']
return np.reshape(np.asarray(v), dictValue['shape'])
def protoNormalizeWycoff(protoDict):
"""recalculates the normalized wyckoff values for the given prototype dictionary"""
cell = np.asarray(protoDict['lattice_vectors'])
atomSpecies = [toAtomNr(at) for at in protoDict['atom_labels']]
atomPos = np.asarray(protoDict['atom_positions'])
symm = systemToSpg(cell, atomSpecies, atomPos)
wyckoffs = symm.get("wyckoffs")
norm_wyckoff = get_normalized_wyckoff(atomSpecies,wyckoffs)
return norm_wyckoff
def updatePrototypesWyckoff(protos):
for sp, pts in protos.items():
for protoDict in pts:
try:
wy = protoNormalizeWycoff(protoDict)
protoDict['normalized_wyckoff_spg'] = wy
except:
logging.exception("Failed to compute normalized wyckoffs for %s", protoDict)
def systemToSpg(cell, atomSpecies, atomPos):
"""uses spg to calculate the symmetry of the given system"""
acell = cell*1.0e10
cellInv = np.linalg.inv(cell)
symm = spglib.get_symmetry_dataset((acell, np.dot(atomPos,cellInv), atomSpecies),
0.002, -1) # use m instead of Angstrom?
#symm = spglib.get_symmetry_dataset(filename)
# sym = spglib.get_symmetry_dataset((number,wyckoff, pearsons_symbol)
return symm
def classify_by_norm_wyckoff(sectionSystem): def classify_by_norm_wyckoff(sectionSystem):
try: try:
...@@ -216,7 +235,23 @@ def classify_by_norm_wyckoff(sectionSystem): ...@@ -216,7 +235,23 @@ def classify_by_norm_wyckoff(sectionSystem):
### ###
#atomic_number = atom_species #atomic_number = atom_species
#as in the normalized version #as in the normalized version
cell = None
conf = sectionSystem
lab = conf.get("atom_labels", None)
##periodicDirs = conf.get("configuration_periodic_dimensions", periodicDirs)
atomSpecies = [toAtomNr(l) for l in lab[0]['flatData']]
#print (atomSpecies)
newCell = conf.get("simulation_cell")
if newCell:
cell = dictToNarray(newCell)
symm = None
#print("***full:",cell)
#acell = cell.reshape(3,3)
atomPos = dictToNarray(conf.get("atom_positions")[0])
symm = systemToSpg(cell, atomSpecies, atomPos)
wyckoffs = symm.get("wyckoffs")
spg_nr = symm.get("number")
#norm_wyckoff = get_normalized_wyckoff #norm_wyckoff = get_normalized_wyckoff
# preprocessor = list(results.values()) # preprocessor = list(results.values())
...@@ -226,24 +261,13 @@ def classify_by_norm_wyckoff(sectionSystem): ...@@ -226,24 +261,13 @@ def classify_by_norm_wyckoff(sectionSystem):
# norm_wyckoff = preprocessor["normalized_wyckoff"] # norm_wyckoff = preprocessor["normalized_wyckoff"]
### ###
conf = sectionSystem
lab = conf.get("atom_labels", None)
periodicDirs = conf.get("configuration_periodic_dimensions", periodicDirs)
atomSpecies = [toAtomNr(l) for l in lab]
symm = None
symm = spglib.get_symmetry_dataset(filename)
# sym = spglib.get_symmetry_dataset((number,wyckoff, pearsons_symbol)
wyckoffs = str(symm.get("wyckoffs"))
spg_nr = symm.get("number")
# pearson - symm.get("xxx") # pearson - symm.get("xxx")
#space_group = symmetry_dataset["number"] #space_group = symmetry_dataset["number"]
updatePrototypesWyckoff(str_types_by_spg)
### ###
norm_wyckoff = get_normalized_wyckoff(atomSpecies,wyckoffs) norm_wyckoff = get_normalized_wyckoff(atomSpecies,wyckoffs)
protoDict = structure.get_structure_type(spg_nr, norm_wyckoff) protoDict = get_structure_type(spg_nr, norm_wyckoff)
if protoDict is None: if protoDict is None:
proto = "%d-_" % spg_nr proto = "%d-_" % spg_nr
...@@ -255,7 +279,8 @@ def classify_by_norm_wyckoff(sectionSystem): ...@@ -255,7 +279,8 @@ def classify_by_norm_wyckoff(sectionSystem):
proto = '%d-%s-%s' % (spg_nr, protoDict.get("Prototype","-"),protoDict.get("Pearsons Symbol","-")) proto = '%d-%s-%s' % (spg_nr, protoDict.get("Prototype","-"),protoDict.get("Pearsons Symbol","-"))
return proto return proto
except: except:
logging.exception("failure while computing for %r",json_file_name) #logging.exception("failure while computing for %r",json_file_name)
logging.exception("failure while computing for that example")
return None return None
...@@ -280,9 +305,9 @@ def main(): ...@@ -280,9 +305,9 @@ def main():
break break
label = classify_by_norm_wyckoff(sectSys) label = classify_by_norm_wyckoff(sectSys)
if label: if label:
backend.openContext(sectSys.uri) backend.openContext(sectSys['uri'])
backend.addValue("prototype_label", label) backend.addValue("prototype_label", label)
backend.closeContext(sectSys.uri) backend.closeContext(sectSys['uri'])
if __name__ == '__main__': if __name__ == '__main__':
main() main()
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment