Commit 3234508f authored by lucas_miranda's avatar lucas_miranda
Browse files

Reformatted files using last version of Black; fixed seaborn update issues

parent 9db84390
Pipeline #88235 passed with stage
in 24 minutes and 22 seconds
......@@ -290,7 +290,11 @@ class project:
scales = self.scales[:, 2:]
distance_dict = {
key: deepof.utils.bpart_distance(tab, scales[i, 1], scales[i, 0],)
key: deepof.utils.bpart_distance(
tab,
scales[i, 1],
scales[i, 0],
)
for i, (key, tab) in enumerate(tab_dict.items())
}
......@@ -825,7 +829,9 @@ class table_dict(dict):
return heatmaps
def get_training_set(
self, test_videos: int = 0, encode_labels: bool = True,
self,
test_videos: int = 0,
encode_labels: bool = True,
) -> Tuple[np.ndarray, list, Union[np.ndarray, list], list]:
"""Generates training and test sets as numpy.array objects for model training"""
......
......@@ -30,18 +30,40 @@ class SEQ_2_SEQ_AE(HyperModel):
"""Retrieve hyperparameters to tune"""
conv_filters = hp.Int(
"units_conv", min_value=32, max_value=256, step=32, default=256,
"units_conv",
min_value=32,
max_value=256,
step=32,
default=256,
)
lstm_units_1 = hp.Int(
"units_lstm", min_value=128, max_value=512, step=32, default=256,
"units_lstm",
min_value=128,
max_value=512,
step=32,
default=256,
)
dense_2 = hp.Int(
"units_dense2", min_value=32, max_value=256, step=32, default=64,
"units_dense2",
min_value=32,
max_value=256,
step=32,
default=64,
)
dropout_rate = hp.Float(
"dropout_rate", min_value=0.0, max_value=0.5, default=0.25, step=0.05,
"dropout_rate",
min_value=0.0,
max_value=0.5,
default=0.25,
step=0.05,
)
encoding = hp.Int(
"encoding",
min_value=16,
max_value=64,
step=8,
default=24,
)
encoding = hp.Int("encoding", min_value=16, max_value=64, step=8, default=24,)
return conv_filters, lstm_units_1, dense_2, dropout_rate, encoding
......
......@@ -360,7 +360,9 @@ class KLDivergenceLayer(tfpl.KLDivergenceAddLoss):
kl_batch = self._regularizer(distribution_a)
self.add_loss(kl_batch, inputs=[distribution_a])
self.add_metric(
kl_batch, aggregation="mean", name="kl_divergence",
kl_batch,
aggregation="mean",
name="kl_divergence",
)
# noinspection PyProtectedMember
self.add_metric(self._regularizer._weight, aggregation="mean", name="kl_rate")
......
......@@ -33,7 +33,9 @@ class SEQ_2_SEQ_AE:
""" Simple sequence to sequence autoencoder implemented with tf.keras """
def __init__(
self, architecture_hparams: Dict = {}, huber_delta: float = 1.0,
self,
architecture_hparams: Dict = {},
huber_delta: float = 1.0,
):
self.hparams = self.get_hparams(architecture_hparams)
self.CONV_filters = self.hparams["units_conv"]
......@@ -118,13 +120,19 @@ class SEQ_2_SEQ_AE:
# Decoder layers
Model_D0 = deepof.model_utils.DenseTranspose(
Model_E5, activation="elu", output_dim=self.ENCODING,
Model_E5,
activation="elu",
output_dim=self.ENCODING,
)
Model_D1 = deepof.model_utils.DenseTranspose(
Model_E4, activation="elu", output_dim=self.DENSE_2,
Model_E4,
activation="elu",
output_dim=self.DENSE_2,
)
Model_D2 = deepof.model_utils.DenseTranspose(
Model_E3, activation="elu", output_dim=self.DENSE_1,
Model_E3,
activation="elu",
output_dim=self.DENSE_1,
)
Model_D3 = RepeatVector(input_shape[1])
Model_D4 = Bidirectional(
......@@ -161,7 +169,10 @@ class SEQ_2_SEQ_AE:
Model_D5,
)
def build(self, input_shape: tuple,) -> Tuple[Any, Any, Any]:
def build(
self,
input_shape: tuple,
) -> Tuple[Any, Any, Any]:
"""Builds the tf.keras model"""
(
......@@ -213,7 +224,10 @@ class SEQ_2_SEQ_AE:
model.compile(
loss=Huber(delta=self.delta),
optimizer=Nadam(lr=self.learn_rate, clipvalue=0.5,),
optimizer=Nadam(
lr=self.learn_rate,
clipvalue=0.5,
),
metrics=["mae"],
)
......@@ -298,7 +312,10 @@ class SEQ_2_SEQ_GMVAE:
),
components=[
tfd.Independent(
tfd.Normal(loc=init_means[k], scale=1,),
tfd.Normal(
loc=init_means[k],
scale=1,
),
reinterpreted_batch_ndims=1,
)
for k in range(self.number_of_components)
......@@ -537,7 +554,10 @@ class SEQ_2_SEQ_GMVAE:
encoder = BatchNormalization()(encoder)
# encoding_shuffle = deepof.model_utils.MCDropout(self.DROPOUT_RATE)(encoder)
z_cat = Dense(self.number_of_components, activation="softmax",)(encoder)
z_cat = Dense(
self.number_of_components,
activation="softmax",
)(encoder)
z_cat = deepof.model_utils.Entropy_regulariser(self.entropy_reg_weight)(z_cat)
z_gauss = Dense(
deepof.model_utils.tfpl.IndependentNormal.params_size(
......@@ -553,12 +573,16 @@ class SEQ_2_SEQ_GMVAE:
if self.overlap_loss:
z_gauss = deepof.model_utils.Gaussian_mixture_overlap(
self.ENCODING, self.number_of_components, loss=self.overlap_loss,
self.ENCODING,
self.number_of_components,
loss=self.overlap_loss,
)(z_gauss)
z = deepof.model_utils.tfpl.DistributionLambda(
lambda gauss: tfd.mixture.Mixture(
cat=tfd.categorical.Categorical(probs=gauss[0],),
cat=tfd.categorical.Categorical(
probs=gauss[0],
),
components=[
tfd.Independent(
tfd.Normal(
......@@ -663,7 +687,11 @@ class SEQ_2_SEQ_GMVAE:
grouper = Model(x, z_cat, name="Deep_Gaussian_Mixture_clustering")
# noinspection PyUnboundLocalVariable
gmvaep = Model(inputs=x, outputs=model_outs, name="SEQ_2_SEQ_GMVAE",)
gmvaep = Model(
inputs=x,
outputs=model_outs,
name="SEQ_2_SEQ_GMVAE",
)
# Build generator as a separate entity
g = Input(shape=self.ENCODING)
......@@ -682,7 +710,10 @@ class SEQ_2_SEQ_GMVAE:
if self.compile:
gmvaep.compile(
loss=model_losses,
optimizer=Nadam(lr=self.learn_rate, clipvalue=self.clipvalue,),
optimizer=Nadam(
lr=self.learn_rate,
clipvalue=self.clipvalue,
),
metrics=model_metrics,
loss_weights=loss_weights,
)
......
......@@ -254,7 +254,8 @@ def following_path(
)
follow = np.all(
np.array([(dist_df.min(axis=1) < tol), right_orient1, right_orient2]), axis=0,
np.array([(dist_df.min(axis=1) < tol), right_orient1, right_orient2]),
axis=0,
)
return follow
......@@ -289,9 +290,8 @@ def single_behaviour_analysis(
for condition in beh_dict.keys():
for ind in treatment_dict[condition]:
beh_dict[condition].append(
np.sum(behavioural_dict[ind][behaviour_name])
/ len(behavioural_dict[ind][behaviour_name])
beh_dict[condition] += np.sum(behavioural_dict[ind][behaviour_name]) / len(
behavioural_dict[ind][behaviour_name]
)
return_list = [beh_dict]
......@@ -301,7 +301,10 @@ def single_behaviour_analysis(
fig, ax = plt.subplots(dpi=plot)
sns.boxplot(
list(beh_dict.keys()), list(beh_dict.values()), orient="vertical", ax=ax
x=list(beh_dict.keys()),
y=list(beh_dict.values()),
orient="vertical",
ax=ax,
)
ax.set_title("{} across groups".format(behaviour_name))
......@@ -614,11 +617,13 @@ def tag_rulebased_frames(
write_on_frame("Nose-Tail", corners["downright"])
if tag_dict["sidebyside"][fnum]:
write_on_frame(
"Side-side", conditional_pos(),
"Side-side",
conditional_pos(),
)
if tag_dict["sidereside"][fnum]:
write_on_frame(
"Side-Rside", conditional_pos(),
"Side-Rside",
conditional_pos(),
)
for _id, down_pos, up_pos in zipped_pos:
if (
......@@ -626,7 +631,9 @@ def tag_rulebased_frames(
and not tag_dict[_id + "_climbing"][fnum]
):
write_on_frame(
"*f", (int(w * 0.3 / 10), int(h / 10)), conditional_col(),
"*f",
(int(w * 0.3 / 10), int(h / 10)),
conditional_col(),
)
for _id, down_pos, up_pos in zipped_pos:
......
......@@ -310,7 +310,12 @@ if not tune:
tf.keras.backend.clear_session()
run_ID, tensorboard_callback, onecycle, cp_callback = get_callbacks(
X_train, batch_size, True, variational, predictor, loss,
X_train,
batch_size,
True,
variational,
predictor,
loss,
)
if not variational:
......@@ -393,7 +398,10 @@ if not tune:
epochs=250,
batch_size=batch_size,
verbose=1,
validation_data=(Xvals, yvals,),
validation_data=(
Xvals,
yvals,
),
callbacks=callbacks_,
)
......
......@@ -83,11 +83,14 @@ def get_callbacks(
log_dir = os.path.abspath("logs/fit/{}".format(run_ID))
tensorboard_callback = tf.keras.callbacks.TensorBoard(
log_dir=log_dir, histogram_freq=1, profile_batch=2,
log_dir=log_dir,
histogram_freq=1,
profile_batch=2,
)
onecycle = deepof.model_utils.one_cycle_scheduler(
X_train.shape[0] // batch_size * 250, max_rate=0.005,
X_train.shape[0] // batch_size * 250,
max_rate=0.005,
)
callbacks = [run_ID, tensorboard_callback, onecycle]
......
......@@ -282,7 +282,8 @@ def align_trajectories(data: np.array, mode: str = "all") -> np.array:
for frame in range(data.shape[0]):
aligned_trajs[frame] = rotate(
data[frame].reshape([-1, 2], order="C"), angles[frame],
data[frame].reshape([-1, 2], order="C"),
angles[frame],
).reshape(data.shape[1:], order="C")
if mode == "all" or mode == "none":
......
......@@ -48,9 +48,9 @@ def plot_heatmap(
for i, bpart in enumerate(bodyparts):
heatmap = dframe[bpart]
if len(bodyparts) > 1:
sns.kdeplot(heatmap.x, heatmap.y, cmap="jet", shade=True, alpha=1, ax=ax[i])
sns.kdeplot(heatmap.x, heatmap.y, cmap=None, shade=True, alpha=1, ax=ax[i])
else:
sns.kdeplot(heatmap.x, heatmap.y, cmap="jet", shade=True, alpha=1, ax=ax)
sns.kdeplot(heatmap.x, heatmap.y, cmap=None, shade=True, alpha=1, ax=ax)
ax = np.array([ax])
[x.set_xlim(xlim) for x in ax]
......
%% Cell type:code id: tags:
``` python
%load_ext autoreload
%autoreload 2
```
%% Cell type:code id: tags:
``` python
import os
os.chdir(os.path.dirname("../"))
```
%% Cell type:code id: tags:
``` python
import deepof.data
import deepof.models
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from sklearn.preprocessing import StandardScaler
import tensorflow as tf
import tqdm.notebook as tqdm
from ipywidgets import interact
```
%% Cell type:markdown id: tags:
# Retrieve phenotypes
%% Cell type:code id: tags:
``` python
flatten = lambda t: [item for sublist in t for item in sublist]
```
%% Cell type:code id: tags:
``` python
# Load first batch
dset11 = pd.ExcelFile(
"../../Desktop/deepof-data/tagged_videos/Individual_datasets/DLC_batch_1/DLC_single_CDR1_1/1.Openfield_data-part1/JB05.1-OF-SI-part1.xlsx"
)
dset12 = pd.ExcelFile(
"../../Desktop/deepof-data/tagged_videos/Individual_datasets/DLC_batch_1/DLC_single_CDR1_1/2.Openfielddata-part2/AnimalID's-JB05.1-part2.xlsx"
)
dset11 = pd.read_excel(dset11, "Tabelle2")
dset12 = pd.read_excel(dset12, "Tabelle2")
dset11.Test = dset11.Test.apply(lambda x: "Test {}_s1.1".format(x))
dset12.Test = dset12.Test.apply(lambda x: "Test {}_s1.2".format(x))
dset1 = {"CSDS":list(dset11.loc[dset11.Treatment.isin(["CTR+CSDS","NatCre+CSDS"]), "Test"]) +
list(dset12.loc[dset12.Treatment.isin(["CTR+CSDS","NatCre+CSDS"]), "Test"]),
"NS": list(dset11.loc[dset11.Treatment.isin(["CTR+nonstressed","NatCre+nonstressed"]), "Test"]) +
list(dset12.loc[dset12.Treatment.isin(["CTR+nonstressed","NatCre+nonstressed"]), "Test"]),}
dset1inv = {}
for i in flatten(list(dset1.values())):
if i in dset1["CSDS"]:
dset1inv[i] = "CSDS"
else:
dset1inv[i] = "NS"
assert len(dset1inv) == dset11.shape[0] + dset12.shape[0], "You missed some labels!"
```
%% Cell type:code id: tags:
``` python
# Load second batch
dset21 = pd.read_excel(
"../../Desktop/deepof-data/tagged_videos/Individual_datasets/DLC_batch_2/Part1/2_Single/stressproject22.04.2020genotypes-openfieldday1.xlsx"
)
dset22 = pd.read_excel(
"../../Desktop/deepof-data/tagged_videos/Individual_datasets/DLC_batch_2/Part2/2_Single/OpenFieldvideos-part2.xlsx"
)
dset21.Test = dset21.Test.apply(lambda x: "Test {}_s2.1".format(x))
dset22.Test = dset22.Test.apply(lambda x: "Test {}_s2.2".format(x))
dset2 = {"CSDS":list(dset21.loc[dset21.Treatment == "Stress", "Test"]) +
list(dset22.loc[dset22.Treatment == "Stressed", "Test"]),
"NS": list(dset21.loc[dset21.Treatment == "Nonstressed", "Test"]) +
list(dset22.loc[dset22.Treatment == "Nonstressed", "Test"])}
dset2inv = {}
for i in flatten(list(dset2.values())):
if i in dset2["CSDS"]:
dset2inv[i] = "CSDS"
else:
dset2inv[i] = "NS"
assert len(dset2inv) == dset21.shape[0] + dset22.shape[0], "You missed some labels!"
```
%% Cell type:code id: tags:
``` python
# Load third batch
dset31 = pd.read_excel(
"../../Desktop/deepof-data/tagged_videos/Individual_datasets/DLC_batch_3/1.Day2OF-SIpart1/JB05 2Female-ELS-OF-SIpart1.xlsx"
)
dset32 = pd.read_excel(
"../../Desktop/deepof-data/tagged_videos/Individual_datasets/DLC_batch_3/2.Day3OF-SIpart2/JB05 2FEMALE-ELS-OF-SIpart2.xlsx"
)
dset31.Test = dset31.Test.apply(lambda x: "Test {}_s3.1".format(x))
dset32.Test = dset32.Test.apply(lambda x: "Test {}_s3.2".format(x))
dset3 = {"CSDS":[],
"NS": list(dset31.loc[:, "Test"]) +
list(dset32.loc[:, "Test"])}
dset3inv = {}
for i in flatten(list(dset3.values())):
if i in dset3["CSDS"]:
dset3inv[i] = "CSDS"
else:
dset3inv[i] = "NS"
assert len(dset3inv) == dset31.shape[0] + dset32.shape[0], "You missed some labels!"
```
%% Cell type:code id: tags:
``` python
# Load fourth batch
dset41 = os.listdir("../../Desktop/deepof-data/tagged_videos/Individual_datasets/DLC_batch_4/JB05.4-OpenFieldvideos/")
# Remove empty video!
dset41 = [vid for vid in dset41 if "52" not in vid]
dset4 = {"CSDS":[],
"NS": [i[:-4]+"_s4" for i in dset41]}
dset4inv = {}
for i in flatten(list(dset4.values())):
if i in dset4["CSDS"]:
dset4inv[i] = "CSDS"
else:
dset4inv[i] = "NS"
assert len(dset4inv) == len(dset41), "You missed some labels!"
```
%% Cell type:code id: tags:
``` python
# Merge phenotype dicts and serialise!
aggregated_dset = {**dset1inv, **dset2inv, **dset3inv, **dset4inv}
```
%% Cell type:code id: tags:
``` python
from collections import Counter
print(Counter(aggregated_dset.values()))
print(115+52)
```
%%%% Output: stream
Counter({'NS': 115, 'CSDS': 52})
167
%% Cell type:markdown id: tags:
# Define and run project
%% Cell type:code id: tags:
``` python
%%time
deepof_main = deepof.data.project(path=os.path.join("..","..","Desktop","deepof-data","tagged_videos","phenotest"),
deepof_main = deepof.data.project(path=os.path.join("..","..","Desktop","deepof_single_topview"),
smooth_alpha=0.99,
arena_dims=[380],
exp_conditions=dset2inv)
#exp_conditions=dset2inv
)
```
%%%% Output: stream
CPU times: user 10.3 s, sys: 1.94 s, total: 12.2 s
Wall time: 2.67 s
CPU times: user 27.2 s, sys: 4.86 s, total: 32 s
Wall time: 7.35 s
%% Cell type:code id: tags:
``` python
%%time
deepof_main = deepof_main.run(verbose=True)
print(deepof_main)
```
%%%% Output: stream
Loading trajectories...
Smoothing trajectories...
Computing distances...
Computing angles...
Done!
CPU times: user 10.3 s, sys: 729 ms, total: 11.1 s
Wall time: 11.1 s
CPU times: user 41.1 s, sys: 3.98 s, total: 45.1 s
Wall time: 46.2 s
%% Cell type:code id: tags:
``` python
all_quality = pd.concat([tab for tab in deepof_main.get_quality().values()]).droplevel("scorer", axis=1)
```
%% Cell type:code id: tags:
``` python
all_quality.boxplot(rot=45)
plt.ylim(0.99985, 1.00001)
plt.show()
```
%% Cell type:code id: tags:
``` python
@interact(quality_top=(0., 1., 0.01))
def low_quality_tags(quality_top):
pd.DataFrame(pd.melt(all_quality).groupby("bodyparts").value.apply(
lambda y: sum(y<quality_top) / len(y) * 100)
).sort_values(by="value", ascending=False).plot.bar(rot=45)
plt.xlabel("body part")
plt.ylabel("Tags with quality under {} (%)".format(quality_top))
plt.tight_layout()
plt.legend([])
plt.show()
```
%% Cell type:markdown id: tags:
# Generate coords
%% Cell type:code id: tags:
``` python
%%time
deepof_coords = deepof_main.get_coords(center="Center", polar=False, speed=0, align="Spine_1", align_inplace=True, propagate_labels=True)
deepof_dists = deepof_main.get_distances(propagate_labels=False)
deepof_angles = deepof_main.get_angles(propagate_labels=False)
```
%% Cell type:markdown id: tags:
# Visualization
%% Cell type:code id: tags:
``` python
dfencs = pd.read_hdf('../../Desktop/dash_data_1_20201120-141341.h5')
dfencs.cluster = dfencs.cluster.astype(str) + "a"
clust_occur = pd.read_hdf('../../Desktop/dash_data_2_20201120-141341.h5')
```
%% Cell type:code id: tags:
``` python
pal = sns.color_palette("tab10", n_colors=10)
plt.rcParams['figure.dpi'] = 100
fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2,2)
sns.barplot(data=clust_occur.loc[clust_occur.reset_index().epoch==1,:], x="cluster", y="count", ax=ax1,
palette=pal)
sns.barplot(data=clust_occur.loc[clust_occur.reset_index().epoch==2,:], x="cluster", y="count", ax=ax2,
palette=pal)
sns.scatterplot(data=dfencs.loc[dfencs.epoch==1,:], x="x", y="y", hue="cluster", legend=False, cmap="jet", ax=ax3, alpha=0.4,
palette=pal, size=1, edgecolor=None)
sns.scatterplot(data=dfencs.loc[dfencs.epoch==2,:], x="x", y="y", hue="cluster", legend=False, cmap="jet", ax=ax4, alpha=0.4,
palette=pal, size=1, edgecolor=None)