Commit 9545b727 authored by Daria M. Tomecka's avatar Daria M. Tomecka
Browse files

add temporary script for classification - two more metadata from aflowlib adding to calc.

parent e490979e
#! /usr/bin/env python3
# -*- coding: utf-8 -*-
Reads calculation data and classifies their structures by prototypes
on the basis of the space_group and normalized_wyckoff, and adds labels to the calculations
- prototype_label (with labels in the same format as
in the read_prototypes function).
from __future__ import absolute_import
__author__ = "Daria M. Tomecka and Fawzi Mohamed"
__copyright__ = "Copyright 2017, The NOMAD Project"
__maintainer__ = "Daria M. Tomecka"
__email__ = ";"
__date__ = "12/07/17"
import sys
from import chemical_symbols
import json
import numpy as np
import time
import datetime
import os, os.path
import logging
import functools
import fractions
import setup_paths
### new updated location for the prototypes
from nomadcore.structure_types import structure_types_by_spacegroup as str_types_by_spg
from nomadcore.parse_streamed_dicts import ParseStreamedDicts
from nomadcore.local_meta_info import loadJsonFile, InfoKindEl
from nomadcore.parser_backend import JsonParseEventsWriterBackend
#prototypes_file = os.path.normpath("/nomad-lab-base/analysis-tools/structural-similarity/python-modules/nomad_sim/")
import sys
import spglib
import logging
#LOGGER = logging.getLogger(__name__)
atomSpecies = None
cell = None
def get_normalized_wyckoff(atomic_number, wyckoff):
"""Returns a normalized Wyckoff sequence for the given atomic numbers and
wyckoff symbols.
# print("at", atomic_number, wyckoff)
atomCount = {}
for nr in atomic_number:
atomCount[nr] = atomCount.get(nr, 0) + 1
wycDict = {}
#logging.error("atomic_number: %s, wyckoff: %s", atomic_number, wyckoff)
for i, wk in enumerate(wyckoff):
oldVal = wycDict.get(wk, {})
#print("i:",i, "wyckoff", wyckoff, "wk", wk)
nr = atomic_number[i]
oldVal[nr] = oldVal.get(nr, 0) + 1
wycDict[wk] = oldVal
sortedWyc = list(wycDict.keys())
def cmpp(a, b):
return ((a < b) - (a > b))
def compareAtNr(at1, at2):
c = cmpp(atomCount[at1], atomCount[at2])
if (c != 0):
return c
for wk in sortedWyc:
p = wycDict[wk]
c = cmpp(p.get(at1, 0), p.get(at2, 0))
if c != 0:
return c
return 0
sortedAt = list(atomCount.keys())
standardAtomNames = {}
for i, at in enumerate(sortedAt):
standardAtomNames[at] = ("X_%d" % i)
standardWyc = {}
for wk, ats in wycDict.items():
stdAts = {}
for at, count in ats.items():
stdAts[standardAtomNames[at]] = count
standardWyc[wk] = stdAts
if standardWyc:
counts = [c for x in standardWyc.values() for c in x.values()]
# logging.error("counts: %s", counts)
gcd = counts[0]
for c in counts[1:]:
gcd = fractions.gcd(gcd,c)
if gcd != 1:
for wk,d in standardWyc.items():
for at,c in d.items():
d[at] = c // gcd
return standardWyc
def get_structure_type(space_group, norm_wyckoff):
"""Returns the information on the prototype.
structure_type_info = {}
for type_description in str_types_by_spg.get((space_group), []):
current_norm_wyckoffs = type_description.get("normalized_wyckoff_spg")
if current_norm_wyckoffs and current_norm_wyckoffs == norm_wyckoff:
structure_type_info = type_description
if structure_type_info:
return structure_type_info
return None
def _structure_type_info(self):
"""Known structure types"""
return get_structure_type(
def toAtomNr(string):
"returns the atom number of the given symbol"
baseStr = string[:3].title()
if baseStr.startswith("Uu") and baseStr in chemical_symbols[1:]:
return chemical_symbols.index(baseStr)
if baseStr[:2] in chemical_symbols[1:]:
return chemical_symbols.index(baseStr[:2])
elif baseStr[:1] in chemical_symbols[1:]:
return chemical_symbols.index(baseStr[:1])
return 0
def dictToNarray(dictValue):
"""function that gets the dictionary with flat data ans shape and give the array"""
return np.reshape(np.asarray(v), dictValue['shape'])
def protoNormalizeWycoff(protoDict):
"""recalculates the normalized wyckoff values for the given prototype dictionary"""
cell = np.asarray(protoDict['lattice_vectors'])
atomSpecies = [toAtomNr(at) for at in protoDict['atom_labels']]
atomPos = np.asarray(protoDict['atom_positions'])
symm = systemToSpg(cell, atomSpecies, atomPos)
wyckoffs = symm.get("wyckoffs")
norm_wyckoff = get_normalized_wyckoff(atomSpecies,wyckoffs)
return norm_wyckoff
def updatePrototypesWyckoff(protos):
for sp, pts in protos.items():
for protoDict in pts:
wy = protoNormalizeWycoff(protoDict)
protoDict['normalized_wyckoff_spg'] = wy
logging.exception("Failed to compute normalized wyckoffs for %s", json.dumps(protoDict))
def systemToSpg(cell, atomSpecies, atomPos):
"""uses spg to calculate the symmetry of the given system"""
acell = cell*1.0e10
cellInv = np.linalg.inv(cell)
symm = spglib.get_symmetry_dataset((acell,,cellInv), atomSpecies),
0.002, -1) # use m instead of Angstrom?
return symm
def classify_by_norm_wyckoff(sectionSystem):
#atomic_number = atom_species
#as in the normalized version
cell = None
conf = sectionSystem
lab = conf.get("atom_labels", None)
if lab is None : return None
##periodicDirs = conf.get("configuration_periodic_dimensions", periodicDirs)
atomSpecies = [toAtomNr(l) for l in lab['flatData']]
#print (atomSpecies)
newCell = conf.get("simulation_cell")
if newCell is None : return None
if newCell:
cell = dictToNarray(newCell)
symm = None
#acell = cell.reshape(3,3)
atomPos = dictToNarray(conf.get("atom_positions"))
if atomPos is None : return None
symm = systemToSpg(cell, atomSpecies, atomPos)
wyckoffs = symm.get("wyckoffs")
spg_nr = symm.get("number")
### adds recalculated wyckoff positions
norm_wyckoff = get_normalized_wyckoff(atomSpecies,wyckoffs)
protoDict = get_structure_type(spg_nr, norm_wyckoff)
if protoDict is None:
proto = "%d-_" % spg_nr
#if protoDict.get("Notes","") not in ["", "_", "-", "–"]:
# proto = '%d-%s' % (spg_nr, protoDict)
#proto = '%d-%s' % (spg_nr, protoDict)
proto = '%d-%s-%s' % (spg_nr, protoDict.get("Prototype","-"),protoDict.get("Pearsons Symbol","-"))
aflow_prototype_id = protoDict.get("aflow_prototype_id","-")
aflow_prototype_url = protoDict.get("aflow_prototype_url","-")
return proto, aflow_prototype_id, aflow_prototype_url
#logging.exception("failure while computing for %r",json_file_name)
logging.exception("failure while computing for that example")
return None
def main():
metapath = '../../../../nomad-meta-info/meta_info/nomad_meta_info/' +\
metaInfoPath = os.path.normpath(
os.path.join(os.path.dirname(os.path.abspath(__file__)), metapath))
metaInfoEnv, warns = loadJsonFile(filePath=metaInfoPath,
backend = JsonParseEventsWriterBackend(metaInfoEnv)
parserInfo = {'name':'PrototypesNormalizer', 'version': '1.0'})
dictReader = ParseStreamedDicts(sys.stdin)
while True:
sectSys = dictReader.readNextDict()
if sectSys is None:
if "aflow_prototype_id" in sectSys:
if "aflow_prototype_url" in sectSys:
label = classify_by_norm_wyckoff(sectSys)
if label:
backend.addValue("prototype_label", label)
# backend.addValue("prototype_aflow_id", aflow_prototype_id)
# backend.addValue("prototype_aflow_url", aflow_prototype_url)
# if "aflow_prototype_url" in sectSys:
# backend.addValue("prototype_aflow_url",sectSys["aflow_prototype_url"])
logging.exception("exception trying to calculate prototype for %s",sectSys)
backend.finishedParsingSession("ParseSuccess", None)
if __name__ == '__main__':
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment