Commit a6763bfd authored by Lauri Himanen's avatar Lauri Himanen
Browse files

Added new cli command for updating the aflow prototype library. There is an...

Added new cli command for updating the aflow prototype library. There is an issue with the zinc blende structure, to be investigated.
parent 3e8dc8a7
Pipeline #68556 failed with stages
in 22 minutes and 31 seconds
......@@ -16,10 +16,23 @@ import click
import datetime
import elasticsearch.helpers
import sys
import io
import re
import uuid
import json
from io import StringIO
import numpy as np
import requests
from ase import Atoms
import ase.io
from bs4 import BeautifulSoup
from matid import SymmetryAnalyzer
from nomad import processing as proc, search, datamodel, infrastructure, utils, config
from nomad.normalizing.structure import get_normalized_wyckoff
from nomad.cli.cli import cli
from nomad import config
@cli.group(help='''The nomad admin commands to do nasty stuff directly on the databases.
......@@ -210,3 +223,203 @@ RewriteRule ^/NomadRepository-1.1/views/calculation.zul$ /{0}/gui/entry/pid/%1?
AllowEncodedSlashes On
'''.format(prefix, host, port))
def write_prototype_data_file(aflow_prototypes: dict, filepath) -> None:
"""Writes the prototype data file in a compressed format to a python
module.
Args:
aflow_prototypes
"""
class NoIndent(object):
def __init__(self, value):
self.value = value
class NoIndentEncoder(json.JSONEncoder):
"""A custom JSON encoder that can pretty-print objects wrapped in the
NoIndent class.
"""
def __init__(self, *args, **kwargs):
super(NoIndentEncoder, self).__init__(*args, **kwargs)
self.kwargs = dict(kwargs)
del self.kwargs['indent']
self._replacement_map = {}
def default(self, o): # pylint: disable=E0202
if isinstance(o, NoIndent):
key = uuid.uuid4().hex
self._replacement_map[key] = json.dumps(o.value, **self.kwargs)
return "@@%s@@" % (key,)
else:
return super(NoIndentEncoder, self).default(o)
def encode(self, o):
result = super(NoIndentEncoder, self).encode(o)
for k, v in self._replacement_map.items():
result = result.replace('"@@%s@@"' % (k,), v)
return result
prototype_dict = aflow_prototypes["prototypes_by_spacegroup"]
for prototypes in prototype_dict.values():
for prototype in prototypes:
# Save the information back in a prettified form
prototype["atom_positions"] = NoIndent(prototype["atom_positions"])
prototype["atom_labels"] = NoIndent(prototype["atom_labels"])
prototype["lattice_vectors"] = NoIndent(prototype["lattice_vectors"])
try:
prototype["normalized_wyckoff_matid"] = NoIndent(prototype["normalized_wyckoff_matid"])
except KeyError:
pass
# Save the updated data
with io.open(filepath, "w", encoding="utf8") as f:
json_dump = json.dumps(aflow_prototypes, ensure_ascii=False, indent=4, sort_keys=True, cls=NoIndentEncoder)
json_dump = re.sub(r"\"(-?\d+(?:[\.,]\d+)?)\"", r'\1', json_dump) # Removes quotes around numbers
f.write("# -*- coding: utf-8 -*-\naflow_prototypes = {}\n".format(json_dump))
@ops.command(help='Updates the AFLOW prototype information using the latest online version and writes the results to a python module in the given FILEPATH.')
@click.argument('FILEPATH', nargs=1, type=str)
@click.option('--matches-only', is_flag=True, help='Only update the match information that depends on the symmetry analysis settings. Will not perform and online update.')
@click.pass_context
def prototypes_update(ctx, filepath, matches_only):
if matches_only:
from nomad.normalizing.data.aflow_prototypes import aflow_prototypes
else:
# The basic AFLOW prototype data is available in a Javascript file. Here we
# retrieve it and read only the prototype list from it.
prototypes_file_url = 'http://aflowlib.org/CrystalDatabase/js/table_sort.js'
r = requests.get(prototypes_file_url, allow_redirects=True)
datastring = r.content.decode("utf-8")
datastring = datastring.split('];')[0]
datastring = datastring.split('= [')[1]
data = json.loads('[' + datastring + ']')
newdictarray = []
n_prototypes = 0
n_missing = 0
for protodict in data:
n_prototypes += 1
newdict = {}
# Make prototype plaintext
prototype = BeautifulSoup(protodict["Prototype"], "html5lib").getText()
# Add to new dictionary
newdict['Notes'] = protodict['Notes']
newdict['Prototype'] = prototype
newdict['Space Group Symbol'] = protodict['Space Group Symbol']
newdict['Space Group Number'] = protodict['Space Group Number']
newdict['Pearsons Symbol'] = protodict['Pearson Symbol']
newdict['Strukturbericht Designation'] = protodict['Strukturbericht Designation']
newdict['aflow_prototype_id'] = protodict['AFLOW Prototype']
newdict['aflow_prototype_url'] = 'http://www.aflowlib.org/CrystalDatabase/' + protodict['href'][2:]
# Download cif or poscar if possible make ASE Atoms object if possible
# to obtain labels, positions, cell
cifurl = 'http://www.aflowlib.org/CrystalDatabase/CIF/' + protodict['href'][2:-5] + '.cif'
r = requests.get(cifurl, allow_redirects=True)
cif_str = r.content.decode("utf-8")
cif_file = StringIO()
cif_file.write(cif_str)
cif_file.seek(0)
try:
atoms = ase.io.read(cif_file, format='cif')
except Exception:
print("Error in getting prototype structure from CIF: {}", format(cifurl))
# Then try to get structure from POSCAR
try:
poscarurl = 'http://www.aflowlib.org/CrystalDatabase/POSCAR/' + protodict['href'][2:-5] + '.poscar'
r = requests.get(poscarurl, allow_redirects=True)
poscar_str = r.content.decode("utf-8")
poscar_file = StringIO()
poscar_file.write(poscar_str)
poscar_file.seek(0)
atoms = ase.io.read(poscar_file, format='vasp')
except Exception:
print("Error in getting prototype structure from POSCAR: {}".format(poscarurl))
print("Could not read prototype structure from CIF or POSCAR file for prototype: {}, {}, ".format(prototype, newdict['aflow_prototype_url']))
n_missing += 1
continue
atom_positions = atoms.get_positions()
atom_labels = atoms.get_chemical_symbols()
cell = atoms.get_cell()
newdict['lattice_vectors'] = cell.tolist()
newdict['atom_positions'] = atom_positions.tolist()
newdict['atom_labels'] = atom_labels
newdictarray.append(newdict)
print("Processed: {}".format(len(newdictarray)))
# Sort prototype dictionaries by spacegroup and make dictionary
structure_types_by_spacegroup = {}
for i_sg in range(1, 231):
protos_sg = []
for newdict in newdictarray:
if newdict['Space Group Number'] == i_sg:
protos_sg.append(newdict)
structure_types_by_spacegroup[i_sg] = protos_sg
# Wrap in a dictionary that can hold other data, e.g. the symmemtry tolerance parameter.
aflow_prototypes = {
"prototypes_by_spacegroup": structure_types_by_spacegroup
}
print(
"Extracted latest AFLOW prototypes online. Total number of "
"successfully fetched prototypes: {}, missing: {}"
.format(n_prototypes, n_missing)
)
# Update matches
n_prototypes = 0
n_failed = 0
n_unmatched = 0
prototype_dict = aflow_prototypes["prototypes_by_spacegroup"]
for aflow_spg_number, prototypes in prototype_dict.items():
n_prototypes += len(prototypes)
for prototype in prototypes:
# Read prototype structure
pos = np.array(prototype["atom_positions"])
labels = prototype["atom_labels"]
cell = np.array(prototype["lattice_vectors"])
atoms = Atoms(
symbols=labels,
positions=pos,
cell=cell,
pbc=True
)
# Try to first see if the space group can be matched with the one in AFLOW
tolerance = config.normalize.symmetry_tolerance
try:
symm = SymmetryAnalyzer(atoms, tolerance)
spg_number = symm.get_space_group_number()
wyckoff_matid = symm.get_wyckoff_letters_conventional()
norm_system = symm.get_conventional_system()
except Exception:
n_failed += 1
else:
# If the space group is matched, add the MatID normalized Wyckoff
# letters to the data.
if spg_number == aflow_spg_number:
atomic_numbers = norm_system.get_atomic_numbers()
normalized_wyckoff_matid = get_normalized_wyckoff(atomic_numbers, wyckoff_matid)
prototype["normalized_wyckoff_matid"] = normalized_wyckoff_matid
else:
n_unmatched += 1
print(
"Updated matches in AFLOW prototype library. Total number of "
"prototypes: {}, unmatched: {}, failed: {}"
.format(n_prototypes, n_unmatched, n_failed)
)
aflow_prototypes["matid_symmetry_tolerance"] = tolerance
# Write data file to the specified path
write_prototype_data_file(aflow_prototypes, filepath)
This diff is collapsed.
......@@ -11,17 +11,11 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import io
import re
import functools
import fractions
import json
import uuid
from typing import Dict
from matid import SymmetryAnalyzer
import numpy as np
from ase import Atoms
from nomad.normalizing.data.aflow_prototypes import aflow_prototypes
from nomad import config
......@@ -136,100 +130,3 @@ def search_aflow_prototype(space_group: int, norm_wyckoff: dict) -> dict:
structure_type_info = type_description
break
return structure_type_info
def update_aflow_prototype_information(filepath: str) -> None:
"""Used to update the AFLOW prototype information. Creates a new python
module with updated symmetry tolerance parameter and the wyckoff positions
as detected by MatID.
This function is relatively heavy and should only be run if the symmetry
tolerance has been changed or the symmetry detection routine has been
updated.
Args:
filepath: Path to the python file in which the new symmetry information
will be written.
"""
class NoIndent(object):
def __init__(self, value):
self.value = value
class NoIndentEncoder(json.JSONEncoder):
"""A custom JSON encoder that can pretty-print objects wrapped in the
NoIndent class.
"""
def __init__(self, *args, **kwargs):
super(NoIndentEncoder, self).__init__(*args, **kwargs)
self.kwargs = dict(kwargs)
del self.kwargs['indent']
self._replacement_map = {}
def default(self, o): # pylint: disable=E0202
if isinstance(o, NoIndent):
key = uuid.uuid4().hex
self._replacement_map[key] = json.dumps(o.value, **self.kwargs)
return "@@%s@@" % (key,)
else:
return super(NoIndentEncoder, self).default(o)
def encode(self, o):
result = super(NoIndentEncoder, self).encode(o)
for k, v in self._replacement_map.items():
result = result.replace('"@@%s@@"' % (k,), v)
return result
n_prototypes = 0
n_failed = 0
n_unmatched = 0
prototype_dict = aflow_prototypes["prototypes_by_spacegroup"]
for aflow_spg_number, prototypes in prototype_dict.items():
n_prototypes += len(prototypes)
for prototype in prototypes:
# Read prototype structure
pos = np.array(prototype["atom_positions"]) * 1E10
labels = prototype["atom_labels"]
cell = np.array(prototype["lattice_vectors"]) * 1E10
atoms = Atoms(
symbols=labels,
positions=pos,
cell=cell,
pbc=True
)
# Try to first see if the space group can be matched with the one in AFLOW
tolerance = config.normalize.symmetry_tolerance
try:
symm = SymmetryAnalyzer(atoms, tolerance)
spg_number = symm.get_space_group_number()
wyckoff_matid = symm.get_wyckoff_letters_conventional()
norm_system = symm.get_conventional_system()
except Exception:
n_failed += 1
else:
# If the space group is matched, add the MatID normalized Wyckoff
# letters to the data.
if spg_number == aflow_spg_number:
atomic_numbers = norm_system.get_atomic_numbers()
normalized_wyckoff_matid = get_normalized_wyckoff(atomic_numbers, wyckoff_matid)
prototype["normalized_wyckoff_matid"] = NoIndent(normalized_wyckoff_matid)
else:
n_unmatched += 1
# Save the information back in a prettified form
prototype["atom_positions"] = NoIndent(prototype["atom_positions"])
prototype["atom_labels"] = NoIndent(prototype["atom_labels"])
prototype["lattice_vectors"] = NoIndent(prototype["lattice_vectors"])
try:
prototype["normalized_wyckoff"] = NoIndent(prototype["normalized_wyckoff"])
except KeyError:
pass
print(f"Updated AFLOW prototype library. Total number of prototypes: {n_prototypes}, unmatched: {n_unmatched}, failed: {n_failed}")
# Save the updated data
with io.open(filepath, "w", encoding="utf8") as f:
json_dump = json.dumps(aflow_prototypes, ensure_ascii=False, indent=4, sort_keys=True, cls=NoIndentEncoder)
json_dump = re.sub(r"\"(-?\d+(?:[\.,]\d+)?)\"", r'\1', json_dump) # Removes quotes around numbers
f.write("# -*- coding: utf-8 -*-\naflow_prototypes = {}\n".format(json_dump))
......@@ -312,10 +312,9 @@ class SystemNormalizer(SystemBasedNormalizer):
None: The method should write symmetry variables
to the backend which is member of this class.
"""
# Try to use Matid's symmetry analyzer to anlyze the ASE object.
# TODO: dts, find out what the symmetry_tol does.
# Try to use Matid's symmetry analyzer to analyze the ASE object.
try:
symm = SymmetryAnalyzer(atoms, symmetry_tol=0.1)
symm = SymmetryAnalyzer(atoms, symmetry_tol=config.normalize.symmetry_tolerance)
space_group_number = symm.get_space_group_number()
......@@ -355,6 +354,8 @@ class SystemNormalizer(SystemBasedNormalizer):
self.logger.error('matid symmetry analysis fails with exception', exc_info=e)
return
raise Exception(conv_wyckoff)
# Write data extracted from Matid symmetry analysis to the backend.
symmetry_gid = self._backend.openSection('section_symmetry')
# TODO: @dts, should we change the symmetry_method to MATID?
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment