Commit 20d84de8 authored by lucas_miranda's avatar lucas_miranda
Browse files

Added CustomStopper class to train_utils.py, to start early stopping only after annealing is over

parent ab55fbf2
Pipeline #89197 passed with stage
in 29 minutes and 38 seconds
......@@ -658,7 +658,9 @@ class SEQ_2_SEQ_GMVAE:
generator = Model_B3(generator)
generator = Model_D5(generator)
generator = Model_B4(generator)
generator = Dense(tfpl.IndependentNormal.params_size(input_shape[2:]))(generator)
generator = Dense(tfpl.IndependentNormal.params_size(input_shape[2:]))(
generator
)
x_decoded_mean = tfpl.IndependentNormal(
event_shape=input_shape[2:],
convert_to_tensor_fn=tfp.distributions.Distribution.mean,
......@@ -675,15 +677,6 @@ class SEQ_2_SEQ_GMVAE:
model_metrics = {"vae_reconstruction": ["mae", "mse"]}
loss_weights = [1.0]
# x_decoded_mean = TimeDistributed(Dense(input_shape[2]), name="vae_prediction")(
# generator
# )
#
# model_outs = [x_decoded_mean]
# model_losses = [Huber(delta=self.delta, reduction="sum_over_batch_size")]
# model_metrics = {"vae_reconstruction": ["mae", "mse"]}
# loss_weights = [1.0]
if self.predictor > 0:
# Define and instantiate predictor
predictor = Dense(
......
......@@ -104,8 +104,8 @@ def get_callbacks(
("P" if predictor > 0 and variational else ""),
("_Pheno" if phenotype_class > 0 else ""),
("_loss={}".format(loss) if variational else ""),
("_encoding={}".format(logparam["encoding"])),
("_k={}".format(logparam["k"])),
("_encoding={}".format(logparam["encoding"]) if logparam is not None else ""),
("_k={}".format(logparam["k"]) if logparam is not None else ""),
(datetime.now().strftime("%Y%m%d-%H%M%S")),
)
......
%% Cell type:code id: tags:
``` python
%load_ext autoreload
%autoreload 2
```
%% Cell type:code id: tags:
``` python
import os
os.chdir(os.path.dirname("../"))
```
%% Cell type:code id: tags:
``` python
import deepof.data
import deepof.models
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from sklearn.preprocessing import StandardScaler, MinMaxScaler
import tensorflow as tf
import tqdm.notebook as tqdm
from ipywidgets import interact
```
%% Cell type:markdown id: tags:
# Retrieve phenotypes
%% Cell type:code id: tags:
``` python
flatten = lambda t: [item for sublist in t for item in sublist]
```
%% Cell type:code id: tags:
``` python
# Load first batch
dset11 = pd.ExcelFile(
"../../Desktop/deepof-data/tagged_videos/Individual_datasets/DLC_batch_1/DLC_single_CDR1_1/1.Openfield_data-part1/JB05.1-OF-SI-part1.xlsx"
)
dset12 = pd.ExcelFile(
"../../Desktop/deepof-data/tagged_videos/Individual_datasets/DLC_batch_1/DLC_single_CDR1_1/2.Openfielddata-part2/AnimalID's-JB05.1-part2.xlsx"
)
dset11 = pd.read_excel(dset11, "Tabelle2")
dset12 = pd.read_excel(dset12, "Tabelle2")
dset11.Test = dset11.Test.apply(lambda x: "Test {}_s1.1".format(x))
dset12.Test = dset12.Test.apply(lambda x: "Test {}_s1.2".format(x))
dset1 = {"CSDS":list(dset11.loc[dset11.Treatment.isin(["CTR+CSDS","NatCre+CSDS"]), "Test"]) +
list(dset12.loc[dset12.Treatment.isin(["CTR+CSDS","NatCre+CSDS"]), "Test"]),
"NS": list(dset11.loc[dset11.Treatment.isin(["CTR+nonstressed","NatCre+nonstressed"]), "Test"]) +
list(dset12.loc[dset12.Treatment.isin(["CTR+nonstressed","NatCre+nonstressed"]), "Test"]),}
dset1inv = {}
for i in flatten(list(dset1.values())):
if i in dset1["CSDS"]:
dset1inv[i] = "CSDS"
else:
dset1inv[i] = "NS"
assert len(dset1inv) == dset11.shape[0] + dset12.shape[0], "You missed some labels!"
```
%% Cell type:code id: tags:
``` python
# Load second batch
dset21 = pd.read_excel(
"../../Desktop/deepof-data/tagged_videos/Individual_datasets/DLC_batch_2/Part1/2_Single/stressproject22.04.2020genotypes-openfieldday1.xlsx"
)
dset22 = pd.read_excel(
"../../Desktop/deepof-data/tagged_videos/Individual_datasets/DLC_batch_2/Part2/2_Single/OpenFieldvideos-part2.xlsx"
)
dset21.Test = dset21.Test.apply(lambda x: "Test {}_s2.1".format(x))
dset22.Test = dset22.Test.apply(lambda x: "Test {}_s2.2".format(x))
dset2 = {"CSDS":list(dset21.loc[dset21.Treatment == "Stress", "Test"]) +
list(dset22.loc[dset22.Treatment == "Stressed", "Test"]),
"NS": list(dset21.loc[dset21.Treatment == "Nonstressed", "Test"]) +
list(dset22.loc[dset22.Treatment == "Nonstressed", "Test"])}
dset2inv = {}
for i in flatten(list(dset2.values())):
if i in dset2["CSDS"]:
dset2inv[i] = "CSDS"
else:
dset2inv[i] = "NS"
assert len(dset2inv) == dset21.shape[0] + dset22.shape[0], "You missed some labels!"
```
%% Cell type:code id: tags:
``` python
# Load third batch
dset31 = pd.read_excel(
"../../Desktop/deepof-data/tagged_videos/Individual_datasets/DLC_batch_3/1.Day2OF-SIpart1/JB05 2Female-ELS-OF-SIpart1.xlsx"
)
dset32 = pd.read_excel(
"../../Desktop/deepof-data/tagged_videos/Individual_datasets/DLC_batch_3/2.Day3OF-SIpart2/JB05 2FEMALE-ELS-OF-SIpart2.xlsx"
)
dset31.Test = dset31.Test.apply(lambda x: "Test {}_s3.1".format(x))
dset32.Test = dset32.Test.apply(lambda x: "Test {}_s3.2".format(x))
dset3 = {"CSDS":[],
"NS": list(dset31.loc[:, "Test"]) +
list(dset32.loc[:, "Test"])}
dset3inv = {}
for i in flatten(list(dset3.values())):
if i in dset3["CSDS"]:
dset3inv[i] = "CSDS"
else:
dset3inv[i] = "NS"
assert len(dset3inv) == dset31.shape[0] + dset32.shape[0], "You missed some labels!"
```
%% Cell type:code id: tags:
``` python
# Load fourth batch
dset41 = os.listdir("../../Desktop/deepof-data/tagged_videos/Individual_datasets/DLC_batch_4/JB05.4-OpenFieldvideos/")
# Remove empty video!
dset41 = [vid for vid in dset41 if "52" not in vid]
dset4 = {"CSDS":[],
"NS": [i[:-4]+"_s4" for i in dset41]}
dset4inv = {}
for i in flatten(list(dset4.values())):
if i in dset4["CSDS"]:
dset4inv[i] = "CSDS"
else:
dset4inv[i] = "NS"
assert len(dset4inv) == len(dset41), "You missed some labels!"
```
%% Cell type:code id: tags:
``` python
# Merge phenotype dicts and serialise!
aggregated_dset = {**dset1inv, **dset2inv, **dset3inv, **dset4inv}
```
%% Cell type:code id: tags:
``` python
from collections import Counter
print(Counter(aggregated_dset.values()))
print(115+52)
```
%%%% Output: stream
Counter({'NS': 115, 'CSDS': 52})
167
%% Cell type:markdown id: tags:
# Define and run project
%% Cell type:code id: tags:
``` python
%%time
deepof_main = deepof.data.project(path=os.path.join("..","..","Desktop","deepof_single_topview"),
deepof_main = deepof.data.project(path=os.path.join("..","..","Desktop","deepof-data","deepof_single_topview"),
smooth_alpha=0.99,
arena_dims=[380],
exclude_bodyparts=["Tail_1", "Tail_2", "Tail_tip", "Tail_base", "Spine_2"]
#exclude_bodyparts=["Tail_1", "Tail_2", "Tail_tip", "Tail_base", "Spine_2"]
#exp_conditions=dset2inv
)
```
%%%% Output: stream
CPU times: user 28.4 s, sys: 5 s, total: 33.4 s
Wall time: 10.8 s
CPU times: user 28.1 s, sys: 4.99 s, total: 33.1 s
Wall time: 7.5 s
%% Cell type:code id: tags:
``` python
%%time
deepof_main = deepof_main.run(verbose=True)
print(deepof_main)
```
%%%% Output: stream
Loading trajectories...
Smoothing trajectories...
Computing distances...
Computing angles...
Done!
deepof analysis of 167 videos
CPU times: user 23.5 s, sys: 1.76 s, total: 25.3 s
Wall time: 29.7 s
CPU times: user 46.6 s, sys: 5.09 s, total: 51.7 s
Wall time: 53 s
%% Cell type:code id: tags:
``` python
all_quality = pd.concat([tab for tab in deepof_main.get_quality().values()]).droplevel("scorer", axis=1)
```
%% Cell type:code id: tags:
``` python
all_quality.boxplot(rot=45)
plt.ylim(0.99985, 1.00001)
plt.show()
```
%%%% Output: display_data
![]()
%% Cell type:code id: tags:
``` python
@interact(quality_top=(0., 1., 0.01))
def low_quality_tags(quality_top):
pd.DataFrame(pd.melt(all_quality).groupby("bodyparts").value.apply(
lambda y: sum(y<quality_top) / len(y) * 100)
).sort_values(by="value", ascending=False).plot.bar(rot=45)
plt.xlabel("body part")
plt.ylabel("Tags with quality under {} (%)".format(quality_top))
plt.tight_layout()
plt.legend([])
plt.show()
```
%%%% Output: display_data
%% Cell type:markdown id: tags:
# Generate coords
%% Cell type:code id: tags:
``` python
%%time
deepof_coords = deepof_main.get_coords(center="Center", polar=False, speed=0, align="Spine_1", align_inplace=True, propagate_labels=False)
#deepof_dists = deepof_main.get_distances(propagate_labels=False)
#deepof_angles = deepof_main.get_angles(propagate_labels=False)
```
%%%% Output: stream
CPU times: user 49.3 s, sys: 510 ms, total: 49.8 s
Wall time: 49.5 s
CPU times: user 54.3 s, sys: 475 ms, total: 54.8 s
Wall time: 54.8 s
%% Cell type:markdown id: tags:
# Visualization
%% Cell type:code id: tags:
``` python
%%time
tf.keras.backend.clear_session()
print("Preprocessing training set...")
deepof_train = deepof_coords.preprocess(
window_size=11,
window_step=11,
conv_filter=None,
scale="minmax",
shuffle=True,
test_videos=0,
)[0]
print("Loading pre-trained model...")
encoder, _, grouper, gmvaep, = deepof.models.SEQ_2_SEQ_GMVAE(
encoder, decoder, grouper, gmvaep, = deepof.models.SEQ_2_SEQ_GMVAE(
loss="ELBO",
number_of_components=10,
compile_model=True,
kl_warmup_epochs=20,
montecarlo_kl=333,
neuron_control=True,
montecarlo_kl=10,
encoding=6,
mmd_warmup_epochs=0,
mmd_warmup_epochs=20,
predictor=0,
phenotype_prediction=0,
).build(deepof_train.shape)[:4]
gmvaep.load_weights("../../Desktop/deepof-data/trained_weights/GMVAE_loss=ELBO_encoding=6_run_1_final_weights.h5")
gmvaep.load_weights("../../Desktop/GMVAE_loss=ELBO_encoding=6_run_0_final_weights.h5")
```
%%%% Output: stream
Preprocessing training set...
Loading pre-trained model...
(227686, 11, 16)
%%%% Output: error
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<timed exec> in <module>
~/opt/anaconda3/envs/Machine_Learning/lib/python3.6/site-packages/tensorflow/python/keras/engine/training.py in load_weights(self, filepath, by_name, skip_mismatch, options)
2209 f, self.layers, skip_mismatch=skip_mismatch)
2210 else:
-> 2211 hdf5_format.load_weights_from_hdf5_group(f, self.layers)
2212
2213 def _updated_config(self):
~/opt/anaconda3/envs/Machine_Learning/lib/python3.6/site-packages/tensorflow/python/keras/saving/hdf5_format.py in load_weights_from_hdf5_group(f, layers)
704 str(len(symbolic_weights)) +
705 ' weights, but the saved weights have ' +
--> 706 str(len(weight_values)) + ' elements.')
707 weight_value_tuples += zip(symbolic_weights, weight_values)
708 K.batch_set_value(weight_value_tuples)
ValueError: Layer #12 (named "kl_divergence_layer" in the current model) was found to correspond to layer kl_divergence_layer in the save file. However the new layer kl_divergence_layer expects 3 weights, but the saved weights have 1 elements.
%% Cell type:code id: tags:
``` python
samples = 10000
all_clusters = grouper.predict(deepof_train[:samples])
all_encodings = encoder.predict(deepof_train[:samples])
```
%% Cell type:code id: tags:
``` python
video_key = np.random.choice(list(deepof_coords.keys()), 1)[0]
video_key
```
%%%% Output: execute_result
'Test 42_s12'
(227686, 11, 26)
CPU times: user 3.35 s, sys: 656 ms, total: 4.01 s
Wall time: 4.07 s
%% Cell type:code id: tags:
``` python
video_key = np.random.choice(list(deepof_coords.keys()), 1)[0]
video_input = deepof.data.table_dict({video_key:deepof_coords[video_key]}, typ="coords").preprocess(
window_size=11,
window_step=1,
conv_filter=None,
scale="minmax",
scale="standard",
shuffle=False,
test_videos=0,
)[0]
scaler = MinMaxScaler()
scaler.fit(deepof_coords[video_key])
scaler = StandardScaler()
scaler.fit(np.array(pd.concat(list(deepof_coords.values()))))
# Get reconstruction
video_pred = gmvaep.predict(video_input)[:, 6, :]
# Get encodings
video_clusters = grouper.predict(video_input)
video_encodings = encoder.predict(video_input)
# video_clusters = grouper.predict(video_input)
# video_encodings = encoder.predict(video_input)
scaled_video_pred = scaler.inverse_transform(video_pred)
scaled_video_input = scaler.inverse_transform(video_input[:, 6, :])
scaled_video_input = pd.DataFrame(scaled_video_input, columns=deepof_coords[video_key].columns)
scaled_video_pred = pd.DataFrame(scaled_video_pred, columns=deepof_coords[video_key].columns)
```
%% Cell type:code id: tags:
``` python
tf.reduce_mean( tf.keras.losses.mean_absolute_error(video_pred, video_input[:, 6, :]))
```
%%%% Output: execute_result
<tf.Tensor: shape=(), dtype=float64, numpy=0.6589403550173625>
<tf.Tensor: shape=(), dtype=float64, numpy=0.8289839462658529>
%% Cell type:code id: tags:
``` python
# Draft: function to produce a video with the animal in motion using cv2
import cv2
w = 400
h = 400
factor = 2.5
# Instantiate video
writer = cv2.VideoWriter()
writer.open(
"test_video.avi",
cv2.VideoWriter_fourcc(*"MJPG"),
24,
(int(w * factor), int(h * factor)),
True,
)
for frame in tqdm.tqdm(range(500)):
for frame in tqdm.tqdm(range(100)):
image = np.zeros((h, w, 3), np.uint8) + 30
for bpart in scaled_video_input.columns.levels[0]:
try:
pos = (
(-int(scaled_video_input[bpart].loc[frame, "x"]) + w // 2),
(-int(scaled_video_input[bpart].loc[frame, "y"]) + h // 2),
)
pos_pred = (
(-int(scaled_video_pred[bpart].loc[frame, "x"]) + w // 2),
(-int(scaled_video_pred[bpart].loc[frame, "y"]) + h // 2),
)
cv2.circle(image, pos, 2, (0, 0, 255), -1)
cv2.circle(image, pos_pred, 2, (0, 255, 0), -1)
except KeyError:
continue
# draw skeleton
def draw_line(start, end, df, col):
for bpart in end:
cv2.line(
image,
tuple(-df[start].loc[frame, :].astype(int) + w // 2),
tuple(-df[bpart].loc[frame, :].astype(int) + h // 2),
col,
1,
)
for df, col in zip([scaled_video_input, scaled_video_pred], [(0,0,255),(0,255,0)]):
draw_line("Nose", ["Left_ear", "Right_ear"], df, col)
draw_line("Spine_1", ["Left_ear", "Right_ear", "Left_fhip", "Right_fhip"], df, col)
#draw_line("Spine_2", ["Spine_1", "Tail_base", "Left_bhip", "Right_bhip"], df, col)
#draw_line("Tail_1", ["Tail_base", "Tail_2"], df, col)
#draw_line("Tail_tip", ["Tail_2"], df, col)
draw_line("Spine_2", ["Spine_1", "Tail_base", "Left_bhip", "Right_bhip"], df, col)
draw_line("Tail_1", ["Tail_base", "Tail_2"], df, col)
draw_line("Tail_tip", ["Tail_2"], df, col)
image = cv2.resize(image, (0, 0), fx=factor, fy=factor)
writer.write(image)
writer.release()
cv2.destroyAllWindows()
```
%%%% Output: display_data
%%%% Output: stream
%% Cell type:code id: tags:
``` python
samples = 100000
all_clusters = grouper.predict(deepof_train[:samples])
all_encodings = encoder.predict(deepof_train[:samples])
```
%% Cell type:code id: tags:
``` python
all_clusters
```
%% Cell type:code id: tags:
``` python
sns.scatterplot(all_encodings[:,0], all_encodings[:,1], hue=np.argmax(all_clusters,axis=1))
```
%% Cell type:code id: tags:
``` python
# import umap
# from sklearn.manifold import TSNE
# from sklearn.decomposition import PCA
# from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
# import plotly.express as px
```
%% Cell type:code id: tags:
``` python
def plot_encodings(data, samples, n, clusters, threshold, highlight=None):
reducer = LinearDiscriminantAnalysis(n_components=n)
clusters = clusters[:samples, :]
# filter = np.max(np.mean(clusters, axis=0), axis=1) > threshold
clusters = np.argmax(clusters, axis=1)#[filter]
rep = reducer.fit_transform(data[:samples], clusters)
if n == 2:
df = pd.DataFrame({"encoding-1":rep[:,0],"encoding-2":rep[:,1],"clusters":["A"+str(i) for i in clusters]})
enc = px.scatter(data_frame=df, x="encoding-1", y="encoding-2",
color="clusters", width=600, height=600,
color_discrete_sequence=px.colors.qualitative.T10)
#if highlight:
# ig.add_trace(go.Scatter(x=, y=)
elif n == 3:
df3d = pd.DataFrame({"encoding-1":rep[:,0],"encoding-2":rep[:,1],"encoding-3":rep[:,2],
"clusters":["A"+str(i) for i in clusters]})
enc = px.scatter_3d(data_frame=df3d, x="encoding-1", y="encoding-2", z="encoding-3",
color="clusters", width=600, height=600,
color_discrete_sequence=px.colors.qualitative.T10)
return enc
```
%% Cell type:code id: tags:
``` python
plot_encodings(all_encodings, 10000, 2, all_clusters, 1, 10)
```
%%%% Output: error
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
<ipython-input-53-299d75cbfe33> in <module>
----> 1 plot_encodings(all_encodings, 10000, 2, all_clusters, 1, 10)
<ipython-input-52-f274dd41a1f9> in plot_encodings(data, samples, n, clusters, threshold, highlight)
1 def plot_encodings(data, samples, n, clusters, threshold, highlight=None):
2
----> 3 reducer = LinearDiscriminantAnalysis(n_components=n)
4 clusters = clusters[:samples, :]
5
NameError: name 'LinearDiscriminantAnalysis' is not defined
%% Cell type:markdown id: tags:
# Preprocessing
%% Cell type:code id: tags: