Commit c6e7e7a1 authored by lucas_miranda's avatar lucas_miranda
Browse files

Updated notebook with empirically derived radius for latent neighborhood

parent bf9f0a8a
Pipeline #95859 canceled with stages
in 65 minutes and 15 seconds
......@@ -211,10 +211,20 @@ class neighbor_cluster_purity(tf.keras.callbacks.Callback):
"""
def __init__(
self, r, variational=True, validation_data=None, samples=10000, log_dir="."
self,
encoding_dim,
variational=True,
validation_data=None,
samples=10000,
log_dir=".",
):
super().__init__()
self.r = r
self.enc = encoding_dim
self.r = (
-0.14220132706202965 * np.log2(validation_data.shape[0])
+ 0.17189696892334544 * self.enc
+ 1.6940295848037952
) # Empirically derived from data. See examples/set_default_entropy_radius.ipynb for details
self.variational = variational
self.validation_data = validation_data
self.samples = samples
......@@ -249,6 +259,7 @@ class neighbor_cluster_purity(tf.keras.callbacks.Callback):
encoding = encoder.predict(self.validation_data)
groups = grouper.predict(self.validation_data)
hard_groups = groups.argmax(axis=1)
max_groups = groups.max(axis=1)
# compute pairwise distances on latent space
pdist = pairwise_distances(encoding)
......@@ -259,7 +270,7 @@ class neighbor_cluster_purity(tf.keras.callbacks.Callback):
range(encoding.shape[0]), self.samples, replace=False
)
purity_vector = np.zeros(self.samples)
purity_weights = np.zeros(self.samples)
neighbor_number = np.zeros(self.samples)
for i, sample in enumerate(random_idxs):
......@@ -270,7 +281,10 @@ class neighbor_cluster_purity(tf.keras.callbacks.Callback):
neigh_entropy = entropy(z)
purity_vector[i] = neigh_entropy
purity_weights[i] = np.sum(neighborhood)
neighbor_number[i] = np.sum(neighborhood)
# Compute weights multiplying neighbor number and target confidence
purity_weights = neighbor_number * max_groups
writer = tf.summary.create_file_writer(self.log_dir)
with writer.as_default():
......@@ -279,6 +293,16 @@ class neighbor_cluster_purity(tf.keras.callbacks.Callback):
data=np.average(purity_vector, weights=purity_weights),
step=epoch,
)
tf.summary.scalar(
"average_neighbors_in_radius",
data=np.average(neighbor_number),
step=epoch,
)
tf.summary.scalar(
"average_confidence_in_selected_cluster",
data=np.average(max_groups),
step=epoch,
)
class uncorrelated_features_constraint(Constraint):
......
......@@ -399,7 +399,6 @@ else:
cp=False,
variational=variational,
entropy_samples=entropy_samples,
entropy_radius=entropy_radius,
phenotype_class=pheno_class,
predictor=predictor,
loss=loss,
......
......@@ -76,7 +76,6 @@ def get_callbacks(
reg_cat_clusters: bool = False,
reg_cluster_variance: bool = False,
entropy_samples: int = 15000,
entropy_radius: float = None,
logparam: dict = None,
outpath: str = ".",
) -> List[Union[Any]]:
......@@ -113,13 +112,7 @@ def get_callbacks(
)
entropy = deepof.model_utils.neighbor_cluster_purity(
r=(
entropy_radius
if entropy_radius is not None
else 0.15 * logparam["encoding"]
- 0.18 # equation derived empirically to keep neighbor number constant.
# See examples/set_default_entropy_radius.ipynb for details
),
encoding_dim=logparam["encoding"],
samples=entropy_samples,
validation_data=X_val,
log_dir=os.path.join(outpath, "metrics", run_ID),
......@@ -270,7 +263,6 @@ def autoencoder_fitting(
variational: bool,
reg_cat_clusters: bool,
reg_cluster_variance: bool,
entropy_radius: float,
entropy_samples: int,
):
"""Implementation function for deepof.data.coordinates.deep_unsupervised_embedding"""
......@@ -300,7 +292,6 @@ def autoencoder_fitting(
phenotype_class=phenotype_class,
predictor=predictor,
loss=loss,
entropy_radius=entropy_radius,
entropy_samples=entropy_samples,
reg_cat_clusters=reg_cat_clusters,
reg_cluster_variance=reg_cluster_variance,
......
......@@ -572,9 +572,9 @@ def circular_arena_recognition(frame: np.array) -> np.array:
ellipse_params = cv2.fitEllipse(cnts[0])
center_coordinates = tuple([int(i) for i in ellipse_params[0]])
axes_length = tuple([int(i) // 2 for i in ellipse_params[1]])
angle = ellipse_params[2]
ellipse_angle = ellipse_params[2]
return center_coordinates, axes_length, angle
return center_coordinates, axes_length, ellipse_angle
def rolling_speed(
......
This diff is collapsed.
......@@ -270,7 +270,7 @@ def test_find_learning_rate():
def test_neighbor_cluster_purity():
X = np.random.uniform(0, 10, [1500, 5, 6])
X = np.random.normal(0, 1, [1500, 25, 6])
test_model = deepof.models.SEQ_2_SEQ_GMVAE()
gmvaep = test_model.build(X.shape)[3]
......@@ -279,6 +279,6 @@ def test_neighbor_cluster_purity():
X,
X,
callbacks=deepof.model_utils.neighbor_cluster_purity(
r=0.5, validation_data=X, variational=True
encoding_dim=6, validation_data=X, variational=True
),
)
......@@ -67,7 +67,6 @@ def test_get_callbacks(
True,
True,
None,
entropy_radius=0.5,
)
assert type(runID) == str
assert type(tbc) == tf.keras.callbacks.TensorBoard
......@@ -171,7 +170,6 @@ def test_tune_search(
cp=False,
reg_cat_clusters=True,
reg_cluster_variance=True,
entropy_radius=0.75,
entropy_samples=10,
logparam=None,
)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment