Commits (4)
import numpy as np
from bitarray import bitarray
from functools import partial
from .grid import Grid
from .similarity import tanimoto_similarity
ELECTRON_CHARGE = 1.602176565e-19
class DOSFingerprint():
def __init__(self, stepsize = 0.05):
def __init__(self, stepsize = 0.05, similarity_function = tanimoto_similarity, **kwargs):
self.bins = ''
self.indices = []
self.stepsize = stepsize
self.filling_factor = 0
self.grid_id = None
self.set_similarity_function(similarity_function, **kwargs)
def calculate(self, dos_energies, dos_values, grid_id = 'dg_cut:56:-2:7:(-10, 5)'):
energy, dos = self._convert_dos(dos_energies, dos_values)
def calculate(self, dos_energies, dos_values, grid_id = 'dg_cut:56:-2:7:(-10, 5)', unit_cell_volume = 1, n_atoms = 1):
energy, dos = self._convert_dos(dos_energies, dos_values, unit_cell_volume = unit_cell_volume, n_atoms = n_atoms)
raw_energies, raw_dos = self._integrate_to_bins(energy, dos)
grid = Grid().create(grid_id = grid_id)
self.grid_id = grid.get_grid_id()
......@@ -35,6 +38,15 @@ class DOSFingerprint():
self.filling_factor = fp_dict['filling_factor']
return self
def set_similarity_function(self, similarity_function, **kwargs):
self.similarity_function = partial(similarity_function, **kwargs)
def get_similarity(self, fingerprint):
return self.similarity_function(self, fingerprint)
def get_similarities(self, list_of_fingerprints):
return np.array([self.similarity_function(self, fp) for fp in list_of_fingerprints])
def _integrate_to_bins(self, xs, ys):
"""
Performs stepwise numerical integration of ``ys`` over the range of ``xs``. The stepsize of the generated histogram is controlled by DOSFingerprint().stepsize.
......@@ -49,15 +61,15 @@ class DOSFingerprint():
y_integ = np.array([np.trapz(y_interp[idx:idx + 2], x_interp[idx:idx + 2]) for idx in range(len(x_interp)-1)])
return x_interp[:-1], y_integ
def _convert_dos(self, energy, dos):
def _convert_dos(self, energy, dos, unit_cell_volume = 1, n_atoms = 1):
"""
Convert units of DOS from energy: Joule; dos: states/volume/Joule to eV and sum spin channels if they are present.
"""
energy = np.array([value / ELECTRON_CHARGE for value in energy])
dos_channels = [np.array(values) for values in dos]
dos = sum(dos_channels) * ELECTRON_CHARGE
dos = sum(dos_channels) * ELECTRON_CHARGE * unit_cell_volume * n_atoms
return energy, dos
def _binary_bin(self, dos_value, grid_bins):
bin_dos = ''
for grid_bin in grid_bins:
......
......@@ -25,4 +25,8 @@ def tanimoto_similarity(fingerprint1, fingerprint2):
a = fp1.count()
b = fp2.count()
c = (fp1 & fp2).count()
return c / float(a + b - c)
\ No newline at end of file
try:
tc = c / float(a + b - c)
except ZeroDivisionError:
tc = 0
return tc
......@@ -4,7 +4,7 @@ with open("README.md", "r") as fh:
long_description = fh.read()
setuptools.setup(
name="nomadDOSfingerprints",
name="nomad_dos_fingerprints",
version="1.0",
author="Martin Kuban",
author_email="kuban@physik.hu-berlin.de",
......
......@@ -2,7 +2,7 @@ import pytest
import numpy as np
from nomad_dos_fingerprints import DOSFingerprint, tanimoto_similarity
from nomad_dos_fingerprints.DOSfingerprint import ELECTRON_CHARGE
from nomad_dos_fingerprints.DOSfingerprint import ELECTRON_CHARGE
def test_integrate_to_bins():
test_data_x = np.linspace(0, np.pi, num = 1000)
......
......@@ -4,9 +4,9 @@ import numpy as np
with open(os.path.join(os.path.dirname(__file__), 'fingerprint_generation_test_data.json'), 'r') as test_data_file:
test_data = json.load(test_data_file)
def test_fingerprint_values():
for fp, mid in test_data['fingerprints']:
raw_data = test_data[mid]
new_fingerprint = DOSFingerprint().calculate(raw_data['dos_energies'], raw_data['dos_values'])
......@@ -15,7 +15,7 @@ def test_fingerprint_values():
old_fingerprint.indices = json.loads(fp)['indices']
old_fingerprint.grid_id = new_fingerprint.grid_id
assert old_fingerprint.indices == new_fingerprint.indices
assert np.isclose(tanimoto_similarity(old_fingerprint, new_fingerprint),1, atol=5e-2)
assert np.isclose(old_fingerprint.get_similarity(new_fingerprint),1, atol=5e-2)
def test_materials_similarity():
......@@ -25,10 +25,7 @@ def test_materials_similarity():
raw_data = [test_data[mid] for mid in mids]
new_fingerprints = [DOSFingerprint().calculate(entry['dos_energies'], entry['dos_values']) for entry in raw_data]
matrix = []
for fp1 in new_fingerprints:
row = []
for fp2 in new_fingerprints:
row.append(tanimoto_similarity(fp1,fp2))
matrix.append(row)
for fp in new_fingerprints:
matrix.append(fp.get_similarities(new_fingerprints))
print(matrix - np.array(similarity_matrix))
assert np.isclose(similarity_matrix, matrix, atol = 5e-2).all()
\ No newline at end of file
assert np.isclose(similarity_matrix, matrix, atol = 5e-2).all()