Skip to content
Snippets Groups Projects
Commit ceec0ea0 authored by Martin Kuban's avatar Martin Kuban
Browse files

changed to old version of fingerprint matching

added plotting
parent 489f48fe
No related branches found
No related tags found
No related merge requests found
......@@ -3,4 +3,4 @@
*.code-workspace
run_tests.bat
.vscode
.coverage
\ No newline at end of file
.coverage*
\ No newline at end of file
......@@ -25,6 +25,16 @@ class DOSFingerprint():
def to_dict(self):
return dict(bins = self.bins, indices = self.indices, stepsize = self.stepsize, grid_id = self.grid_id, filling_factor = self.filling_factor)
@staticmethod
def from_dict(fp_dict):
self = DOSFingerprint()
self.bins = fp_dict['bins']
self.indices = fp_dict['indices']
self.stepsize = fp_dict['stepsize']
self.grid_id = fp_dict['grid_id']
self.filling_factor = fp_dict['filling_factor']
return self
def _integrate_to_bins(self, xs, ys):
"""
Performs stepwise numerical integration of ``ys`` over the range of ``xs``. The stepsize of the generated histogram is controlled by DOSFingerprint().stepsize.
......@@ -36,12 +46,6 @@ class DOSFingerprint():
x_interp = np.arange(xstart, xstop + self.stepsize, self.stepsize)
y_interp = np.interp(x_interp, xs, ys)
y_integ = []
"""
for idx in range(len(x_interp)-1):
print(x_interp[idx:idx+2], y_interp[idx:idx + 2])
print(np.trapz(y_interp[idx:idx + 1], x_interp[idx:idx + 1]))
y_integ.append(np.trapz(y_interp[idx:idx + 1], x_interp[idx:idx + 1]))
"""
y_integ = np.array([np.trapz(y_interp[idx:idx + 2], x_interp[idx:idx + 2]) for idx in range(len(x_interp)-1)])
return x_interp[:-1], y_integ
......@@ -69,7 +73,8 @@ class DOSFingerprint():
"""
grid_array = grid.grid()
# cut the energy and dos to grid size
energy, dos = np.transpose([(e,d) for e,d in zip(energy, dos) if (e >= grid_array[0][0] and e <= grid_array[-1][0])])
energy, dos = np.transpose([(e,d) for e,d in zip(energy, dos) if (e >= grid_array[0][0] and e <= grid_array[-1][0])])
# calculate fingerprint
bin_fp = ''
grid_index = 0
for idx, grid_e in enumerate(grid_array):
......
import matplotlib.pyplot as plt
from bitarray import bitarray
def plot_FP_in_grid(byte_fingerprint, grid, show = True, label = '', axes = None, **kwargs):
x=[]
y=[]
all_width=[]
bin_fp=bitarray()
bin_fp.frombytes(bytes.fromhex(byte_fingerprint.bins))
grid_indices=byte_fingerprint.indices
plotgrid=grid.grid()
plotgrid=plotgrid[grid_indices[0]:grid_indices[1]]
bit_position=0
for index,item in enumerate(plotgrid):
if index<len(plotgrid)-1:
width=plotgrid[index+1][0]-item[0]
else:
width=abs(item[0]-plotgrid[index-1][0])
for idx, dos_value in enumerate(item[1]):
if bin_fp[bit_position]==1 and (bin_fp[bit_position+1] == 0 or idx == len(item[1])-1):
x.append(item[0])
y.append(dos_value)
all_width.append(width)
bit_position+=1
if axes == None:
plt.bar(x,y,width=all_width,align='edge', label = label, **kwargs)
else:
axes.bar(x,y,width=all_width,align='edge', label = label, **kwargs)
if show:
plt.show()
import numpy as np
from bitarray import bitarray
def tanimoto_similarity(fingerprint1, fingerprint2):
def match_fingerprints(fingerprint1, fingerprint2):
if fingerprint1.grid_id != fingerprint2.grid_id:
raise AssertionError('Can not calculate similarity of fingerprints that have been calculated with different grids.')
# match fingerprints
num_bins = int(fingerprint1.grid_id.split(':')[1])
offset = abs(fingerprint1.indices[0]-fingerprint2.indices[0])
fingerprints = sorted([fingerprint1.to_dict(), fingerprint2.to_dict()], key = lambda x: x['indices'][0], reverse=True)
if offset != 0:
fingerprints[0]['bins'] = int(offset * num_bins / 8) * '00' + fingerprints[0]['bins']
min_len = min([len(fingerprint['bins']) for fingerprint in fingerprints])
mask = bitarray()
fp1 = bitarray()
fp2 = bitarray()
mask.frombytes(bytes.fromhex(int(offset * num_bins / 8) * '00' + int((min_len / 2 - offset)) * 'ff'))
fp1.frombytes(bytes.fromhex(fingerprints[0]['bins'][:min_len]))
fp2.frombytes(bytes.fromhex(fingerprints[1]['bins'][:min_len]))
fp1 = fp1 & mask
fp2 = fp2 & mask
fp1.frombytes(bytes.fromhex(fingerprint1.bins))
fp2.frombytes(bytes.fromhex(fingerprint2.bins))
start_index = max([fingerprint1.indices[0], fingerprint2.indices[0]])
stop_index = min([fingerprint1.indices[1], fingerprint2.indices[1]])
# find offsets
dsp1 = (start_index - fingerprint1.indices[0]) * num_bins
dsp2 = (start_index - fingerprint2.indices[0]) * num_bins
dep1 = (fingerprint1.indices[1] - stop_index) * num_bins
dep2 = (fingerprint2.indices[1] - stop_index) * num_bins
fp1 = fp1[dsp1:len(fp1) - 1 - dep1]
fp2 = fp2[dsp2:len(fp2) - 1 - dep2]
return fp1, fp2
def tanimoto_similarity(fingerprint1, fingerprint2):
fp1, fp2 = match_fingerprints(fingerprint1, fingerprint2)
a = fp1.count()
b = fp2.count()
c = (fp1 & fp2).count()
......
import pytest
import numpy as np
from nomad_dos_fingerprints import DOSFingerprint
from nomad_dos_fingerprints import DOSFingerprint, tanimoto_similarity
from nomad_dos_fingerprints.DOSfingerprint import ELECTRON_CHARGE
def test_integrate_to_bins():
......@@ -29,3 +29,11 @@ def test_convert_dos():
x, y = fp._convert_dos(test_data_x* ELECTRON_CHARGE, [test_data_y/ ELECTRON_CHARGE])
assert np.isclose(x,test_data_x).all()
assert np.isclose(y, test_data_y).all()
def test_serialization():
test_data_x = np.linspace(0, np.pi, num = 1000)
test_data_y = [np.sin(x) for x in test_data_x]
fp = DOSFingerprint(stepsize=0.001).calculate([x * ELECTRON_CHARGE for x in test_data_x], [[x / ELECTRON_CHARGE for x in test_data_y]])
fp_json = fp.to_dict()
fp_again = DOSFingerprint().from_dict(fp_json)
assert tanimoto_similarity(fp, fp_again) == 1
import pytest, os, json
from bitarray import bitarray
from nomad_dos_fingerprints import tanimoto_similarity, DOSFingerprint
from nomad_dos_fingerprints import tanimoto_similarity, DOSFingerprint, Grid
from nomad_dos_fingerprints.DOSfingerprint import ELECTRON_CHARGE
from nomad_dos_fingerprints.plotting import plot_FP_in_grid
import matplotlib.pyplot as plt
with open(os.path.join(os.path.dirname(__file__), 'fingerprint_generation_test_data.json'), 'r') as test_data_file:
test_data = json.load(test_data_file)
......@@ -25,8 +27,17 @@ def test_matching_of_spectra():
data = test_data["17661:2634879"]
cut_energies = []
cut_dos = []
cut_energies = [e for e,d in zip(data['dos_energies'], data['dos_values'][0]) if (e / ELECTRON_CHARGE > -4 and e / ELECTRON_CHARGE < 2)]
cut_dos = [d for e,d in zip(data['dos_energies'], data['dos_values'][0]) if (e / ELECTRON_CHARGE > -4 and e / ELECTRON_CHARGE < 2)]
cut_energies = [e for e,d in zip(data['dos_energies'], data['dos_values'][0]) if (e / ELECTRON_CHARGE > -7.3 and e / ELECTRON_CHARGE < 2)]
cut_dos = [d for e,d in zip(data['dos_energies'], data['dos_values'][0]) if (e / ELECTRON_CHARGE > -7.3 and e / ELECTRON_CHARGE < 2)]
plt.figure()
plt.plot([x / ELECTRON_CHARGE for x in data['dos_energies']], data['dos_values'][0])
plt.plot([x / ELECTRON_CHARGE for x in cut_energies], cut_dos)
fp = DOSFingerprint().calculate(data['dos_energies'], data['dos_values'])
cut_fp = DOSFingerprint().calculate(cut_energies, [cut_dos])
assert tanimoto_similarity(fp, cut_fp) == 1
\ No newline at end of file
grid = Grid().create(grid_id=fp.grid_id)
#print(grid.grid()[cut_fp.indices[0]][0])
plt.figure()
plot_FP_in_grid(fp, grid, show=False)
plot_FP_in_grid(cut_fp, grid, alpha = 0.5)
assert tanimoto_similarity(cut_fp, fp) == tanimoto_similarity(fp, cut_fp)
assert 1 - tanimoto_similarity(fp, cut_fp) < 1e-2
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment