Skip to content
Snippets Groups Projects
Commit c6e7e7a1 authored by Lucas Miranda's avatar Lucas Miranda
Browse files

Updated notebook with empirically derived radius for latent neighborhood

parent bf9f0a8a
Branches
Tags
No related merge requests found
Pipeline #95859 canceled
......@@ -211,10 +211,20 @@ class neighbor_cluster_purity(tf.keras.callbacks.Callback):
"""
def __init__(
self, r, variational=True, validation_data=None, samples=10000, log_dir="."
self,
encoding_dim,
variational=True,
validation_data=None,
samples=10000,
log_dir=".",
):
super().__init__()
self.r = r
self.enc = encoding_dim
self.r = (
-0.14220132706202965 * np.log2(validation_data.shape[0])
+ 0.17189696892334544 * self.enc
+ 1.6940295848037952
) # Empirically derived from data. See examples/set_default_entropy_radius.ipynb for details
self.variational = variational
self.validation_data = validation_data
self.samples = samples
......@@ -249,6 +259,7 @@ class neighbor_cluster_purity(tf.keras.callbacks.Callback):
encoding = encoder.predict(self.validation_data)
groups = grouper.predict(self.validation_data)
hard_groups = groups.argmax(axis=1)
max_groups = groups.max(axis=1)
# compute pairwise distances on latent space
pdist = pairwise_distances(encoding)
......@@ -259,7 +270,7 @@ class neighbor_cluster_purity(tf.keras.callbacks.Callback):
range(encoding.shape[0]), self.samples, replace=False
)
purity_vector = np.zeros(self.samples)
purity_weights = np.zeros(self.samples)
neighbor_number = np.zeros(self.samples)
for i, sample in enumerate(random_idxs):
......@@ -270,7 +281,10 @@ class neighbor_cluster_purity(tf.keras.callbacks.Callback):
neigh_entropy = entropy(z)
purity_vector[i] = neigh_entropy
purity_weights[i] = np.sum(neighborhood)
neighbor_number[i] = np.sum(neighborhood)
# Compute weights multiplying neighbor number and target confidence
purity_weights = neighbor_number * max_groups
writer = tf.summary.create_file_writer(self.log_dir)
with writer.as_default():
......@@ -279,6 +293,16 @@ class neighbor_cluster_purity(tf.keras.callbacks.Callback):
data=np.average(purity_vector, weights=purity_weights),
step=epoch,
)
tf.summary.scalar(
"average_neighbors_in_radius",
data=np.average(neighbor_number),
step=epoch,
)
tf.summary.scalar(
"average_confidence_in_selected_cluster",
data=np.average(max_groups),
step=epoch,
)
class uncorrelated_features_constraint(Constraint):
......
......@@ -399,7 +399,6 @@ else:
cp=False,
variational=variational,
entropy_samples=entropy_samples,
entropy_radius=entropy_radius,
phenotype_class=pheno_class,
predictor=predictor,
loss=loss,
......
......@@ -76,7 +76,6 @@ def get_callbacks(
reg_cat_clusters: bool = False,
reg_cluster_variance: bool = False,
entropy_samples: int = 15000,
entropy_radius: float = None,
logparam: dict = None,
outpath: str = ".",
) -> List[Union[Any]]:
......@@ -113,13 +112,7 @@ def get_callbacks(
)
entropy = deepof.model_utils.neighbor_cluster_purity(
r=(
entropy_radius
if entropy_radius is not None
else 0.15 * logparam["encoding"]
- 0.18 # equation derived empirically to keep neighbor number constant.
# See examples/set_default_entropy_radius.ipynb for details
),
encoding_dim=logparam["encoding"],
samples=entropy_samples,
validation_data=X_val,
log_dir=os.path.join(outpath, "metrics", run_ID),
......@@ -270,7 +263,6 @@ def autoencoder_fitting(
variational: bool,
reg_cat_clusters: bool,
reg_cluster_variance: bool,
entropy_radius: float,
entropy_samples: int,
):
"""Implementation function for deepof.data.coordinates.deep_unsupervised_embedding"""
......@@ -300,7 +292,6 @@ def autoencoder_fitting(
phenotype_class=phenotype_class,
predictor=predictor,
loss=loss,
entropy_radius=entropy_radius,
entropy_samples=entropy_samples,
reg_cat_clusters=reg_cat_clusters,
reg_cluster_variance=reg_cluster_variance,
......
......@@ -572,9 +572,9 @@ def circular_arena_recognition(frame: np.array) -> np.array:
ellipse_params = cv2.fitEllipse(cnts[0])
center_coordinates = tuple([int(i) for i in ellipse_params[0]])
axes_length = tuple([int(i) // 2 for i in ellipse_params[1]])
angle = ellipse_params[2]
ellipse_angle = ellipse_params[2]
return center_coordinates, axes_length, angle
return center_coordinates, axes_length, ellipse_angle
def rolling_speed(
......
Source diff could not be displayed: it is too large. Options to address this: view the blob.
......@@ -270,7 +270,7 @@ def test_find_learning_rate():
def test_neighbor_cluster_purity():
X = np.random.uniform(0, 10, [1500, 5, 6])
X = np.random.normal(0, 1, [1500, 25, 6])
test_model = deepof.models.SEQ_2_SEQ_GMVAE()
gmvaep = test_model.build(X.shape)[3]
......@@ -279,6 +279,6 @@ def test_neighbor_cluster_purity():
X,
X,
callbacks=deepof.model_utils.neighbor_cluster_purity(
r=0.5, validation_data=X, variational=True
encoding_dim=6, validation_data=X, variational=True
),
)
......@@ -67,7 +67,6 @@ def test_get_callbacks(
True,
True,
None,
entropy_radius=0.5,
)
assert type(runID) == str
assert type(tbc) == tf.keras.callbacks.TensorBoard
......@@ -171,7 +170,6 @@ def test_tune_search(
cp=False,
reg_cat_clusters=True,
reg_cluster_variance=True,
entropy_radius=0.75,
entropy_samples=10,
logparam=None,
)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment