Skip to content
Snippets Groups Projects
Commit 9f83c872 authored by Martin Kuban's avatar Martin Kuban
Browse files

tests passing

reproduces original similarity values up to 0.05
parent aca0104f
No related branches found
No related tags found
No related merge requests found
......@@ -12,14 +12,19 @@ class DOSFingerprint():
self.indices = []
self.stepsize = stepsize
self.filling_factor = 0
self.grid_id = None
def calculate(self, dos_energies, dos_values):
energy, dos = self._convert_dos(dos_energies, dos_values)
raw_energies, raw_dos = self._integrate_to_bins(energy, dos)
grid = Grid().create()
self.grid_id = grid.get_grid_id()
self.indices, self.bins = self._calculate_bytes(raw_energies, raw_dos, grid)
return self
def to_dict(self):
return dict(bins = self.bins, indices = self.indices, stepsize = self.stepsize, grid_id = self.grid_id, filling_factor = self.filling_factor)
def _integrate_to_bins(self, xs, ys):
"""
Performs stepwise numerical integration of ``ys`` over the range of ``xs``. The stepsize of the generated histogram is controlled by DOSFingerprint().stepsize.
......
from .DOSfingerprint import DOSFingerprint
from .grid import Grid
\ No newline at end of file
from .grid import Grid
from .similarity import tanimoto_similarity
\ No newline at end of file
import numpy as np
from bitarray import bitarray
def tanimoto_similarity(fingerprint1, fingerprint2):
if fingerprint1.grid_id != fingerprint2.grid_id:
raise AssertionError('Can not calculate similarity of fingerprints that have been calculated with different grids.')
# match fingerprints
num_bins = int(fingerprint1.grid_id.split(':')[1])
offset = abs(fingerprint1.indices[0]-fingerprint2.indices[0])
fingerprints = sorted([fingerprint1.to_dict(), fingerprint2.to_dict()], key = lambda x: x['indices'][0], reverse=True)
if offset != 0:
fingerprints[0]['bins'] = int(offset * num_bins / 8) * '00' + fingerprints[0]['bins']
min_len = min([len(fingerprint['bins']) for fingerprint in fingerprints])
mask = bitarray()
fp1 = bitarray()
fp2 = bitarray()
mask.frombytes(bytes.fromhex(int(offset * num_bins / 8) * '00' + int((min_len / 2 - offset)) * 'ff'))
fp1.frombytes(bytes.fromhex(fingerprints[0]['bins'][:min_len]))
fp2.frombytes(bytes.fromhex(fingerprints[1]['bins'][:min_len]))
fp1 = fp1 & mask
fp2 = fp2 & mask
a = fp1.count()
b = fp2.count()
c = (fp1 & fp2).count()
return c / float(a + b - c)
\ No newline at end of file
from nomad_dos_fingerprints import DOSFingerprint
from nomad_dos_fingerprints import DOSFingerprint, tanimoto_similarity
import pytest, os, json
import numpy as np
with open(os.path.join(os.path.dirname(__file__), 'fingerprint_generation_test_data.json'), 'r') as test_data_file:
test_data = json.load(test_data_file)
......@@ -9,5 +10,25 @@ def test_fingerprint_values():
for fp, mid in test_data['fingerprints']:
raw_data = test_data[mid]
new_fingerprint = DOSFingerprint().calculate(raw_data['dos_energies'], raw_data['dos_values'])
assert json.loads(fp)['indices'] == new_fingerprint.indices
assert json.loads(fp)['bins'] == new_fingerprint.bins
old_fingerprint = DOSFingerprint()
old_fingerprint.bins = json.loads(fp)['bins']
old_fingerprint.indices = json.loads(fp)['indices']
old_fingerprint.grid_id = new_fingerprint.grid_id
assert old_fingerprint.indices == new_fingerprint.indices
assert np.isclose(tanimoto_similarity(old_fingerprint, new_fingerprint),1, atol=5e-2)
def test_materials_similarity():
fingerprints = test_data['fingerprints']
similarity_matrix = test_data['simat']
mids = [x[1] for x in fingerprints]
raw_data = [test_data[mid] for mid in mids]
new_fingerprints = [DOSFingerprint().calculate(entry['dos_energies'], entry['dos_values']) for entry in raw_data]
matrix = []
for fp1 in new_fingerprints:
row = []
for fp2 in new_fingerprints:
row.append(tanimoto_similarity(fp1,fp2))
matrix.append(row)
print(matrix - np.array(similarity_matrix))
assert np.isclose(similarity_matrix, matrix, atol = 5e-2).all()
\ No newline at end of file
import pytest
from bitarray import bitarray
from nomad_dos_fingerprints import tanimoto_similarity, DOSFingerprint
def test_tanimoto():
# generate fp-type data and check if this can be realized with binary-strings only
fp1 = DOSFingerprint()
fp2 = DOSFingerprint()
fp1.bins = bitarray('00000000111111110000000011111111').tobytes().hex()
fp2.bins = bitarray('1111111100000000').tobytes().hex()
grid_id = 'a:8:b'
fp1.grid_id = grid_id
fp2.grid_id = grid_id
fp1.indices = [0,3]
fp2.indices = [1,2]
assert tanimoto_similarity(fp1, fp2) == 1
assert tanimoto_similarity(fp1, fp1) == 1
assert tanimoto_similarity(fp2, fp2) == 1
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment