Commit b001609f authored by lucas_miranda's avatar lucas_miranda
Browse files

Added test_deepof for testing functions with pytest and hypothesis

parent e9f79f58
......@@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "code",
"execution_count": 218,
"execution_count": 48,
"metadata": {},
"outputs": [
{
......@@ -24,7 +24,7 @@
},
{
"cell_type": "code",
"execution_count": 219,
"execution_count": 49,
"metadata": {},
"outputs": [],
"source": [
......@@ -40,7 +40,7 @@
},
{
"cell_type": "code",
"execution_count": 220,
"execution_count": 50,
"metadata": {
"tags": [
"parameters"
......@@ -61,7 +61,7 @@
},
{
"cell_type": "code",
"execution_count": 221,
"execution_count": 51,
"metadata": {},
"outputs": [],
"source": [
......@@ -71,7 +71,7 @@
},
{
"cell_type": "code",
"execution_count": 222,
"execution_count": 52,
"metadata": {},
"outputs": [
{
......@@ -90,7 +90,7 @@
" 'Day2Test40DLC']"
]
},
"execution_count": 222,
"execution_count": 52,
"metadata": {},
"output_type": "execute_result"
}
......@@ -101,7 +101,7 @@
},
{
"cell_type": "code",
"execution_count": 223,
"execution_count": 53,
"metadata": {},
"outputs": [],
"source": [
......@@ -117,15 +117,15 @@
},
{
"cell_type": "code",
"execution_count": 224,
"execution_count": 54,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 2.73 s, sys: 809 ms, total: 3.54 s\n",
"Wall time: 1.37 s\n"
"CPU times: user 2.72 s, sys: 846 ms, total: 3.57 s\n",
"Wall time: 1.45 s\n"
]
}
],
......@@ -148,15 +148,15 @@
},
{
"cell_type": "code",
"execution_count": 225,
"execution_count": 55,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 6.67 s, sys: 1.02 s, total: 7.69 s\n",
"Wall time: 1.65 s\n"
"CPU times: user 6.56 s, sys: 1.05 s, total: 7.62 s\n",
"Wall time: 1.73 s\n"
]
}
],
......@@ -185,7 +185,7 @@
},
{
"cell_type": "code",
"execution_count": 226,
"execution_count": 56,
"metadata": {},
"outputs": [
{
......@@ -198,8 +198,8 @@
"Computing angles...\n",
"Done!\n",
"Coordinates of 47 videos across 4 conditions\n",
"CPU times: user 5.22 s, sys: 714 ms, total: 5.93 s\n",
"Wall time: 6.67 s\n"
"CPU times: user 5.54 s, sys: 1.09 s, total: 6.63 s\n",
"Wall time: 13.1 s\n"
]
},
{
......@@ -208,7 +208,7 @@
"source.preprocess.coordinates"
]
},
"execution_count": 226,
"execution_count": 56,
"metadata": {},
"output_type": "execute_result"
}
......@@ -222,7 +222,7 @@
},
{
"cell_type": "code",
"execution_count": 227,
"execution_count": 57,
"metadata": {},
"outputs": [
{
......@@ -235,8 +235,8 @@
"Computing angles...\n",
"Done!\n",
"DLC analysis of 31 videos\n",
"CPU times: user 3.14 s, sys: 326 ms, total: 3.47 s\n",
"Wall time: 3.64 s\n"
"CPU times: user 3.04 s, sys: 262 ms, total: 3.31 s\n",
"Wall time: 6.14 s\n"
]
},
{
......@@ -245,7 +245,7 @@
"source.preprocess.coordinates"
]
},
"execution_count": 227,
"execution_count": 57,
"metadata": {},
"output_type": "execute_result"
}
......@@ -266,7 +266,7 @@
},
{
"cell_type": "code",
"execution_count": 228,
"execution_count": 58,
"metadata": {
"scrolled": true
},
......@@ -275,8 +275,8 @@
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 1.26 s, sys: 74.7 ms, total: 1.33 s\n",
"Wall time: 1.37 s\n"
"CPU times: user 1.16 s, sys: 41.1 ms, total: 1.2 s\n",
"Wall time: 1.24 s\n"
]
},
{
......@@ -285,7 +285,7 @@
"'coords'"
]
},
"execution_count": 228,
"execution_count": 58,
"metadata": {},
"output_type": "execute_result"
}
......@@ -301,7 +301,7 @@
},
{
"cell_type": "code",
"execution_count": 229,
"execution_count": 59,
"metadata": {},
"outputs": [
{
......@@ -310,7 +310,7 @@
"FrozenList([['B_Center', 'B_Left_ear', 'B_Left_flank', 'B_Nose', 'B_Right_ear', 'B_Right_flank', 'B_Tail_base'], ['x', 'y']])"
]
},
"execution_count": 229,
"execution_count": 59,
"metadata": {},
"output_type": "execute_result"
}
......@@ -321,15 +321,15 @@
},
{
"cell_type": "code",
"execution_count": 230,
"execution_count": 60,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 89.9 ms, sys: 77.2 ms, total: 167 ms\n",
"Wall time: 209 ms\n"
"CPU times: user 92.1 ms, sys: 62 ms, total: 154 ms\n",
"Wall time: 297 ms\n"
]
},
{
......@@ -338,7 +338,7 @@
"'dists'"
]
},
"execution_count": 230,
"execution_count": 60,
"metadata": {},
"output_type": "execute_result"
}
......@@ -354,15 +354,15 @@
},
{
"cell_type": "code",
"execution_count": 231,
"execution_count": 61,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 232 ms, sys: 157 ms, total: 388 ms\n",
"Wall time: 419 ms\n"
"CPU times: user 288 ms, sys: 281 ms, total: 569 ms\n",
"Wall time: 626 ms\n"
]
},
{
......@@ -371,7 +371,7 @@
"'angles'"
]
},
"execution_count": 231,
"execution_count": 61,
"metadata": {},
"output_type": "execute_result"
}
......@@ -573,7 +573,7 @@
},
{
"cell_type": "code",
"execution_count": 237,
"execution_count": 62,
"metadata": {},
"outputs": [],
"source": [
......@@ -586,7 +586,7 @@
},
{
"cell_type": "code",
"execution_count": 238,
"execution_count": 63,
"metadata": {},
"outputs": [],
"source": [
......@@ -599,7 +599,7 @@
},
{
"cell_type": "code",
"execution_count": 239,
"execution_count": 66,
"metadata": {},
"outputs": [
{
......@@ -607,8 +607,9 @@
"output_type": "stream",
"text": [
"(70504, 13, 12)\n",
"CPU times: user 2 s, sys: 153 ms, total: 2.16 s\n",
"Wall time: 2.19 s\n"
"(70504, 13, 12)\n",
"CPU times: user 2.09 s, sys: 130 ms, total: 2.22 s\n",
"Wall time: 2.3 s\n"
]
}
],
......@@ -622,15 +623,15 @@
},
{
"cell_type": "code",
"execution_count": 240,
"execution_count": 65,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 9.33 s, sys: 90 ms, total: 9.42 s\n",
"Wall time: 9.42 s\n"
"CPU times: user 9.95 s, sys: 82.6 ms, total: 10 s\n",
"Wall time: 10 s\n"
]
},
{
......@@ -639,7 +640,7 @@
"(465019, 13, 12)"
]
},
"execution_count": 240,
"execution_count": 65,
"metadata": {},
"output_type": "execute_result"
}
......@@ -9426,9 +9427,7 @@
{
"cell_type": "code",
"execution_count": 41,
"metadata": {
"scrolled": false
},
"metadata": {},
"outputs": [],
"source": [
"# animated_cluster_heatmap(pttest, 4, clusts, samples=10)"
......
%% Cell type:code id: tags:
 
``` python
%load_ext autoreload
%autoreload 2
 
import warnings
warnings.filterwarnings("ignore")
```
 
%%%% Output: stream
 
The autoreload extension is already loaded. To reload it, use:
%reload_ext autoreload
 
%% Cell type:code id: tags:
 
``` python
#from source.utils import *
from source.preprocess import *
from source.model_utils import *
import pickle
import matplotlib.pyplot as plt
import pandas as pd
from collections import defaultdict
from tqdm import tqdm_notebook as tqdm
```
 
%% Cell type:code id: tags:parameters
 
``` python
path = "../../Desktop/DLC_social_1/"
path2 = "../../Desktop/DLC_social_2/"
```
 
%% Cell type:markdown id: tags:
 
# Set up and design the project
 
%% Cell type:code id: tags:
 
``` python
with open('{}DLC_social_1_exp_conditions.pickle'.format(path), 'rb') as handle:
Treatment_dict = pickle.load(handle)
```
 
%% Cell type:code id: tags:
 
``` python
Treatment_dict["WT+NS"]
```
 
%%%% Output: execute_result
 
['Test 6DLC',
'Test 15DLC',
'Test 24DLC',
'Test 29DLC',
'Test 38DLC',
'Test 47DLC',
'Day2Test8DLC',
'Day2Test13DLC',
'Day2Test22DLC',
'Day2Test31DLC',
'Day2Test40DLC']
 
%% Cell type:code id: tags:
 
``` python
#Which angles to compute?
bp_dict = {'B_Nose':['B_Left_ear','B_Right_ear'],
'B_Left_ear':['B_Nose','B_Right_ear','B_Center','B_Left_flank'],
'B_Right_ear':['B_Nose','B_Left_ear','B_Center','B_Right_flank'],
'B_Center':['B_Left_ear','B_Right_ear','B_Left_flank','B_Right_flank','B_Tail_base'],
'B_Left_flank':['B_Left_ear','B_Center','B_Tail_base'],
'B_Right_flank':['B_Right_ear','B_Center','B_Tail_base'],
'B_Tail_base':['B_Center','B_Left_flank','B_Right_flank']}
```
 
%% Cell type:code id: tags:
 
``` python
%%time
DLC_social_1 = project(path=path,#Path where to find the required files
smooth_alpha=0.5, #Alpha value for exponentially weighted smoothing
distances=['B_Center','B_Nose','B_Left_ear','B_Right_ear','B_Left_flank',
'B_Right_flank','B_Tail_base'],
ego=False,
angles=True,
connectivity=bp_dict,
arena='circular', #Type of arena used in the experiments
arena_dims=[380], #Dimensions of the arena. Just one if it's circular
subset_condition="B",
video_format='.mp4',
table_format='.h5',
exp_conditions=Treatment_dict)
```
 
%%%% Output: stream
 
CPU times: user 2.73 s, sys: 809 ms, total: 3.54 s
Wall time: 1.37 s
CPU times: user 2.72 s, sys: 846 ms, total: 3.57 s
Wall time: 1.45 s
 
%% Cell type:code id: tags:
 
``` python
%%time
DLC_social_2 = project(path=path2,#Path where to find the required files
smooth_alpha=0.5, #Alpha value for exponentially weighted smoothing
distances=['B_Center','B_Nose','B_Left_ear','B_Right_ear','B_Left_flank',
'B_Right_flank','B_Tail_base'],
ego=False,
angles=True,
connectivity=bp_dict,
arena='circular', #Type of arena used in the experiments
arena_dims=[380], #Dimensions of the arena. Just one if it's circular
subset_condition="B",
video_format='.mp4',
table_format='.h5')
```
 
%%%% Output: stream
 
CPU times: user 6.67 s, sys: 1.02 s, total: 7.69 s
Wall time: 1.65 s
CPU times: user 6.56 s, sys: 1.05 s, total: 7.62 s
Wall time: 1.73 s
 
%% Cell type:markdown id: tags:
 
# Run project
 
%% Cell type:code id: tags:
 
``` python
%%time
DLC_social_1_coords = DLC_social_1.run(verbose=True)
print(DLC_social_1_coords)
type(DLC_social_1_coords)
```
 
%%%% Output: stream
 
Loading trajectories...
Smoothing trajectories...
Computing distances...
Computing angles...
Done!
Coordinates of 47 videos across 4 conditions
CPU times: user 5.22 s, sys: 714 ms, total: 5.93 s
Wall time: 6.67 s
CPU times: user 5.54 s, sys: 1.09 s, total: 6.63 s
Wall time: 13.1 s
 
%%%% Output: execute_result
 
source.preprocess.coordinates
 
%% Cell type:code id: tags:
 
``` python
%%time
DLC_social_2_coords = DLC_social_2.run(verbose=True)
print(DLC_social_2_coords)
type(DLC_social_2_coords)
```
 
%%%% Output: stream
 
Loading trajectories...
Smoothing trajectories...
Computing distances...
Computing angles...
Done!
DLC analysis of 31 videos
CPU times: user 3.14 s, sys: 326 ms, total: 3.47 s
Wall time: 3.64 s
CPU times: user 3.04 s, sys: 262 ms, total: 3.31 s
Wall time: 6.14 s
 
%%%% Output: execute_result
 
source.preprocess.coordinates
 
%% Cell type:markdown id: tags:
 
# Generate coords
 
%% Cell type:code id: tags:
 
``` python
%%time
ptest = DLC_social_1_coords.get_coords(center="B_Center", polar=False, speed=0, length='00:10:00')
ptest._type
 
ptest2 = DLC_social_2_coords.get_coords(center="B_Center", polar=False, speed=0, length='00:10:00')
ptest2._type
```
 
%%%% Output: stream
 
CPU times: user 1.26 s, sys: 74.7 ms, total: 1.33 s
Wall time: 1.37 s
CPU times: user 1.16 s, sys: 41.1 ms, total: 1.2 s
Wall time: 1.24 s
 
%%%% Output: execute_result
 
'coords'
 
%% Cell type:code id: tags:
 
``` python
ptest['Test 13DLC'].columns.levels
```
 
%%%% Output: execute_result
 
FrozenList([['B_Center', 'B_Left_ear', 'B_Left_flank', 'B_Nose', 'B_Right_ear', 'B_Right_flank', 'B_Tail_base'], ['x', 'y']])
 
%% Cell type:code id: tags:
 
``` python
%%time
dtest = DLC_social_1_coords.get_distances(speed=0, length='00:10:00')
dtest._type
 
dtest2 = DLC_social_2_coords.get_distances(speed=0, length='00:10:00')
dtest2._type
```
 
%%%% Output: stream
 
CPU times: user 89.9 ms, sys: 77.2 ms, total: 167 ms
Wall time: 209 ms
CPU times: user 92.1 ms, sys: 62 ms, total: 154 ms
Wall time: 297 ms
 
%%%% Output: execute_result
 
'dists'
 
%% Cell type:code id: tags:
 
``` python
%%time
atest = DLC_social_1_coords.get_angles(degrees=True, speed=0, length='00:10:00')
atest._type
 
atest2 = DLC_social_2_coords.get_angles(degrees=True, speed=0, length='00:10:00')
atest2._type
```
 
%%%% Output: stream
 
CPU times: user 232 ms, sys: 157 ms, total: 388 ms
Wall time: 419 ms
CPU times: user 288 ms, sys: 281 ms, total: 569 ms
Wall time: 626 ms
 
%%%% Output: execute_result
 
'angles'
 
%% Cell type:markdown id: tags:
 
# Visualization playground
 
%% Cell type:code id: tags:
 
``` python
# ptest.plot_heatmaps(['B_Nose'], i=2)
```
 
%% Cell type:code id: tags:
 
``` python
ptest['Day2Test13DLC']['B_Nose'].iloc[:5000]
```
 
%%%% Output: execute_result
 
coords x y
00:00:00 19.370636 42.280235
00:00:00.040000 19.370636 42.280235
00:00:00.080000 19.500778 42.243202
00:00:00.120000 19.207077 42.388008
00:00:00.160000 19.395851 42.293156
... ... ...
00:03:19.800000 -35.108001 -15.180417
00:03:19.840000 -36.197332 -15.403811
00:03:19.880000 -37.723694 -13.514239
00:03:19.920000 -39.470217 -13.346349
00:03:19.960000 -40.692077 -11.919606
[5000 rows x 2 columns]
 
%% Cell type:code id: tags:
 
``` python
#Plot animation of trajectory over time with different smoothings
# plt.plot(ptestb['Day2Test13DLC']['B_Nose'].iloc[:50]['x'],
# ptestb['Day2Test13DLC']['B_Nose'].iloc[:50]['y'], label='alpha=0.95')
 
# plt.plot(ptestd['Day2Test13DLC']['B_Nose'].iloc[:50]['x'],
# ptestd['Day2Test13DLC']['B_Nose'].iloc[:50]['y'], label='alpha=0.65')
 
# plt.xlabel('x')
# plt.ylabel('y')
# plt.title('Mouse Center Trajectory using different exponential smoothings')
# plt.legend()
# plt.show()
```
 
%% Cell type:markdown id: tags:
 
# Dimensionality reduction playground
 
%% Cell type:code id: tags:
 
``` python
#pca = ptest.pca(4, 1000)
```
 
%% Cell type:code id: tags:
 
``` python
#plt.scatter(*pca[0].T)
#plt.show()
```
 
%% Cell type:markdown id: tags:
 
# Preprocessing playground
 
%% Cell type:code id: tags:
 
``` python
mtest = merge_tables(
DLC_social_1_coords.get_coords(center="B_Center", polar=False, length='00:10:00', align='B_Nose')
#DLC_social_1_coords.get_distances(speed=0, length='00:10:00'),
#DLC_social_1_coords.get_angles(degrees=True, speed=0, length='00:10:00'),
)
```
 
%% Cell type:code id: tags:
 
``` python
mtest2 = merge_tables(
DLC_social_2_coords.get_coords(center="B_Center", polar=False, length='00:10:00', align='B_Nose'),
#DLC_social_2_coords.get_distances(speed=0, length='00:10:00'),
#DLC_social_2_coords.get_angles(degrees=True, speed=0, length='00:10:00'),
)
```
 
%% Cell type:code id: tags:
 
``` python
%%time
pttest = mtest.preprocess(window_size=13, window_step=10, filter=None, sigma=55,
shift=0, scale='standard', align='center', shuffle=True, test_videos=0)
print(pttest.shape)
#print(pttrain.shape)
```
 
%%%% Output: stream
 
(70504, 13, 12)
CPU times: user 2 s, sys: 153 ms, total: 2.16 s
Wall time: 2.19 s
(70504, 13, 12)
CPU times: user 2.09 s, sys: 130 ms, total: 2.22 s
Wall time: 2.3 s
 
%% Cell type:code id: tags:
 
``` python
%%time
pttest2 = mtest2.preprocess(window_size=13, window_step=1, filter=None, sigma=55,
shift=0, scale="standard", align='all', shuffle=False)
pttest2.shape
```
 
%%%% Output: stream
 
CPU times: user 9.33 s, sys: 90 ms, total: 9.42 s
Wall time: 9.42 s
CPU times: user 9.95 s, sys: 82.6 ms, total: 10 s
Wall time: 10 s
 
%%%% Output: execute_result
 
(465019, 13, 12)
 
%% Cell type:code id: tags:
 
``` python
n = 100
 
plt.scatter(pttest[:n,10,0], pttest[:n,10,1], label='Nose')
plt.scatter(pttest[:n,10,2], pttest[:n,10,3], label='Right ear')
plt.scatter(pttest[:n,10,4], pttest[:n,10,5], label='Right hips')
plt.scatter(pttest[:n,10,6], pttest[:n,10,7], label='Left ear')
plt.scatter(pttest[:n,10,8], pttest[:n,10,9], label='Left hips')
plt.scatter(pttest[:n,10,10], pttest[:n,10,11], label='Tail base')
 
 
plt.xlabel('x')
plt.ylabel('y')
plt.legend()
plt.show()
```
 
%%%% Output: display_data
 
 
%% Cell type:markdown id: tags:
 
# Trained models playground
 
%% Cell type:markdown id: tags:
 
### Seq 2 seq Variational Auto Encoder
 
%% Cell type:code id: tags:
 
``` python
from datetime import datetime
import tensorflow.keras as k
import tensorflow as tf
```
 
%% Cell type:code id: tags:
 
``` python
NAME = 'Baseline_AE_512_wu10_slide10_gauss_fullval'
log_dir = os.path.abspath(
"logs/fit/{}_{}".format(NAME, datetime.now().strftime("%Y%m%d-%H%M%S"))
)
tensorboard_callback = k.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)
```
 
%% Cell type:code id: tags:
 
``` python
from source.models import SEQ_2_SEQ_AE, SEQ_2_SEQ_GMVAE
```
 
%% Cell type:code id: tags:
 
``` python
encoder, decoder, ae = SEQ_2_SEQ_AE(pttest.shape).build()
ae.build(pttest.shape)
```
 
%% Cell type:code id: tags:
 
``` python
ae.summary()
```
 
%%%% Output: stream
 
Model: "SEQ_2_SEQ_AE"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
SEQ_2_SEQ_Encoder (Sequentia (None, 16) 1769680
_________________________________________________________________
SEQ_2_SEQ_Decoder (Sequentia multiple 2678172
=================================================================
Total params: 4,405,660
Trainable params: 4,401,308
Non-trainable params: 4,352
_________________________________________________________________
 
%% Cell type:code id: tags:
 
``` python
%%time
 
tf.keras.backend.clear_session()
 
encoder, generator, grouper, gmvaep, kl_warmup_callback, mmd_warmup_callback = SEQ_2_SEQ_GMVAE(pttest.shape,
loss='ELBO',
number_of_components=30,
kl_warmup_epochs=10,
mmd_warmup_epochs=10,
encoding=16,
predictor=False).build()
# gmvaep.build(pttest.shape)
```
 
%%%% Output: stream
 
CPU times: user 10.2 s, sys: 2.66 s, total: 12.8 s
Wall time: 7.17 s
 
%% Cell type:code id: tags:
 
``` python
import tensorflow as tf
from tensorflow import keras as keras
K = tf.keras.backend
 
class ExponentialLearningRate(tf.keras.callbacks.Callback):
def __init__(self, factor):
self.factor = factor
self.rates = []
self.losses = []
def on_batch_end(self, batch, logs):
self.rates.append(K.get_value(self.model.optimizer.lr))
self.losses.append(logs["loss"])
K.set_value(self.model.optimizer.lr, self.model.optimizer.lr * self.factor)
 
 
def find_learning_rate(model, X, y, epochs=1, batch_size=32, min_rate=10**-5, max_rate=10):
init_weights = model.get_weights()
iterations = len(X) // batch_size * epochs
factor = np.exp(np.log(max_rate / min_rate) / iterations)
init_lr = K.get_value(model.optimizer.lr)
K.set_value(model.optimizer.lr, min_rate)
exp_lr = ExponentialLearningRate(factor)
history = model.fit(X, y, epochs=epochs, batch_size=batch_size,
callbacks=[exp_lr])
K.set_value(model.optimizer.lr, init_lr)
model.set_weights(init_weights)
return exp_lr.rates, exp_lr.losses
 
 
def plot_lr_vs_loss(rates, losses):
plt.plot(rates, losses)
plt.gca().set_xscale('log')
plt.hlines(min(losses), min(rates), max(rates))
plt.axis([min(rates), max(rates), min(losses), (losses[0] + min(losses)) / 2])
plt.xlabel("Learning rate")
plt.ylabel("Loss")
```
 
%% Cell type:code id: tags:
 
``` python
class OneCycleScheduler(tf.keras.callbacks.Callback):
def __init__(self, iterations, max_rate, start_rate=None,
last_iterations=None, last_rate=None):
self.iterations = iterations
self.max_rate = max_rate
self.start_rate = start_rate or max_rate / 10
self.last_iterations = last_iterations or iterations // 10 + 1
self.half_iteration = (iterations - self.last_iterations) // 2
self.last_rate = last_rate or self.start_rate / 1000
self.iteration = 0
def _interpolate(self, iter1, iter2, rate1, rate2):
return ((rate2 - rate1) * (self.iteration - iter1)
/ (iter2 - iter1) + rate1)
def on_batch_begin(self, batch, logs):
if self.iteration < self.half_iteration:
rate = self._interpolate(0, self.half_iteration, self.start_rate, self.max_rate)
elif self.iteration < 2 * self.half_iteration:
rate = self._interpolate(self.half_iteration, 2 * self.half_iteration,
self.max_rate, self.start_rate)
else:
rate = self._interpolate(2 * self.half_iteration, self.iterations,
self.start_rate, self.last_rate)
rate = max(rate, self.last_rate)
self.iteration += 1
K.set_value(self.model.optimizer.lr, rate)
```
 
%% Cell type:code id: tags:
 
``` python
batch_size = 512
rates, losses = find_learning_rate(gmvaep, pttest[:512*10], pttest[:512*10], epochs=1, batch_size=batch_size)
plot_lr_vs_loss(rates, losses)
plt.title("Learning rate tuning")
plt.axis([min(rates), max(rates), min(losses), (losses[0] + min(losses)) / 1.4])
plt.show()
```
 
%%%% Output: stream
 
10/10 [==============================] - 8s 848ms/step - loss: 1914403.7500 - mae: 3.2971 - -weight_entropy: -2.4303 - kl_divergence: 71.5170 - kl_rate: 1.0000 - dead_neurons: 0.0000e+00
 
%%%% Output: display_data
 
 
%% Cell type:markdown id: tags:
 
# Encoding plots
 
%% Cell type:code id: tags:
 
``` python
import umap
from sklearn.manifold import TSNE
from sklearn.decomposition import PCA
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
import plotly.express as px
```
 
%% Cell type:code id: tags:
 
``` python
data = pttest
samples = 15000
montecarlo = 10
```
 
%% Cell type:code id: tags:
 
``` python
weights = "GMVAE_components=30_loss=ELBO_kl_warmup=30_mmd_warmup=30_20200804-225526_final_weights.h5"
 
gmvaep.load_weights(weights)
 
if montecarlo:
clusts = np.stack([grouper(data[:samples]) for sample in (tqdm(range(montecarlo)))])
clusters = clusts.mean(axis=0)
clusters = np.argmax(clusters, axis=1)
 
else:
clusters = grouper(data[:samples], training=False)
 
 
clusters = np.argmax(clusters, axis=1)
```
 
%%%% Output: display_data
 
 
%%%% Output: stream
 
 
%% Cell type:code id: tags:
 
``` python
def plot_encodings(data, samples, n, clusters, threshold):
 
reducer = PCA(n_components=n)
clusters = clusters[:, :samples]
filter = np.max(np.mean(clusters, axis=0), axis=1) > threshold
encoder.predict(data[:samples][filter])
print("{}/{} samples used ({}%); confidence threshold={}".format(sum(filter),
samples,
sum(filter)/samples*100,
threshold))
 
clusters = np.argmax(np.mean(clusters, axis=0), axis=1)[filter]
rep = reducer.fit_transform(encoder.predict(data[:samples][filter]))
 
if n == 2:
df = pd.DataFrame({"encoding-1":rep[:,0],"encoding-2":rep[:,1],"clusters":["A"+str(i) for i in clusters]})
 
enc = px.scatter(data_frame=df, x="encoding-1", y="encoding-2",
color="clusters", width=600, height=600,
color_discrete_sequence=px.colors.qualitative.T10)
 
 
elif n == 3:
df3d = pd.DataFrame({"encoding-1":rep[:,0],"encoding-2":rep[:,1],"encoding-3":rep[:,2],
"clusters":["A"+str(i) for i in clusters]})
 
enc = px.scatter_3d(data_frame=df3d, x="encoding-1", y="encoding-2", z="encoding-3",
color="clusters", width=600, height=600,
color_discrete_sequence=px.colors.qualitative.T10)
 
return enc
 
plot_encodings(data, 5000, 2, clusts, 0.5)
```
 
%%%% Output: stream
 
3581/5000 samples used (71.61999999999999%); confidence threshold=0.5
 
%%%% Output: display_data
 
 
%% Cell type:markdown id: tags:
 
# Confidence per cluster
 
%% Cell type:code id: tags:
 
``` python
from collections import Counter
Counter(clusters)
```
 
%%%% Output: execute_result
 
Counter({1: 8170, 2: 879, 0: 2508, 5: 2067, 4: 127, 3: 1249})
 
%% Cell type:code id: tags:
 
``` python
# Confidence distribution per cluster
for cl in range(5):
cl_select = np.argmax(np.mean(clusts, axis=0), axis=1) == cl
dt = np.mean(clusts[:,cl_select,cl], axis=0)
sns.kdeplot(dt, shade=True, label=cl)
 
plt.xlabel('MC Dropout confidence')
plt.ylabel('Density')
 
plt.show()
```
 
%%%% Output: display_data
 
 
%% Cell type:code id: tags:
 
``` python
def animated_cluster_heatmap(data, clust, clusters, threshold=0.75, samples=False):
 
if not samples:
samples = data.shape[0]
tpoints = data.shape[1]
bdparts = data.shape[2] // 2
 
cls = clusters[:,:samples,:]
filt = np.max(np.mean(cls, axis=0), axis=1) > threshold
 
cls = np.argmax(np.mean(cls, axis=0), axis=1)[filt]
clust_series = data[:samples][filt][cls==clust]
 
rshape = clust_series.reshape(clust_series.shape[0]*clust_series.shape[1],
clust_series.shape[2])
 
cluster_df = pd.DataFrame()
cluster_df['x'] = rshape[:,[0,2,4,6,8,10]].flatten(order='F')
cluster_df['y'] = rshape[:,[1,3,5,7,9,11]].flatten(order='F')
cluster_df['bpart'] = np.tile(np.repeat(np.arange(bdparts),
clust_series.shape[0]), tpoints)
cluster_df['frame'] = np.tile(np.repeat(np.arange(tpoints),
clust_series.shape[0]), bdparts)
 
fig = px.density_contour(data_frame=cluster_df, x='x', y='y', animation_frame='frame',
width=600, height=600,
color='bpart',color_discrete_sequence=px.colors.qualitative.T10)
 
fig.update_traces(contours_coloring="fill",
contours_showlabels = True)
 
fig.update_xaxes(range=[-3, 3])
fig.update_yaxes(range=[-3, 3])
 
return fig
```
 
%% Cell type:code id: tags:
 
``` python
# animated_cluster_heatmap(pttest, 4, clusts, samples=10)
```
 
%% Cell type:markdown id: tags:
 
# Stability across runs
 
%% Cell type:code id: tags:
 
``` python
weights = [i for i in os.listdir() if "GMVAE" in i and ".h5" in i]
mult_clusters = np.zeros([len(weights), samples])
mean_conf = []
 
for k,i in tqdm(enumerate(sorted(weights))):
print(i)
gmvaep.load_weights(i)
 
if montecarlo:
clusters = np.stack([grouper(data[:samples]) for sample in (tqdm(range(montecarlo)))])
clusters = clusters.mean(axis=0)
mean_conf.append(clusters.max(axis=1))
clusters = np.argmax(clusters, axis=1)
 
 
else:
clusters = grouper(data[:samples], training=False)
mean_conf.append(clusters.max(axis=1))
clusters = np.argmax(clusters, axis=1)
 
mult_clusters[k] = clusters
```
 
%%%% Output: display_data
 
 
%%%% Output: stream
 
GMVAE_components=6_loss=ELBO_kl_warmup=30_mmd_warmup=30_20200804-154627_final_weights.h5
 
%%%% Output: display_data
 
 
%%%% Output: stream
 
GMVAE_components=6_loss=ELBO_kl_warmup=30_mmd_warmup=30_20200804-161511_final_weights.h5
 
%%%% Output: display_data
 
 
%%%% Output: stream
 
GMVAE_components=6_loss=ELBO_kl_warmup=30_mmd_warmup=30_20200804-163815_final_weights.h5
 
%%%% Output: display_data
 
 
%%%% Output: stream
 
 
%% Cell type:code id: tags:
 
``` python
clusts.shape
```
 
%%%% Output: execute_result
 
(10, 15000, 6)
 
%% Cell type:code id: tags:
 
``` python
import pandas as pd
from itertools import combinations
from sklearn.metrics import adjusted_rand_score
```
 
%% Cell type:code id: tags:
 
``` python
mult_clusters
```
 
%%%% Output: execute_result
 
array([[1., 1., 1., ..., 1., 0., 0.],
[2., 2., 0., ..., 5., 4., 4.],
[5., 5., 5., ..., 3., 3., 3.]])
 
%% Cell type:code id: tags:
 
``` python
thr = 0.95
ari_dist = []
 
for i,k in enumerate(combinations(range(len(weights)),2)):
filt = ((mean_conf[k[0]] > thr) & (mean_conf[k[1]]>thr))
 
ari = adjusted_rand_score(mult_clusters[k[0]][filt],
mult_clusters[k[1]][filt])
 
ari_dist.append(ari)
```
 
%% Cell type:code id: tags:
 
``` python
ari_dist
```
 
%%%% Output: execute_result
 
[0.9144861890871067, 0.6904442697979751, 0.31790249897582956]
 
%% Cell type:code id: tags:
 
``` python
random_ari = []
for i in tqdm(range(6)):
random_ari.append(adjusted_rand_score(np.random.uniform(0,6,50).astype(int),
np.random.uniform(0,6,50).astype(int)))
```
 
%%%% Output: display_data
 
 
%%%% Output: stream
 
 
%% Cell type:code id: tags:
 
``` python
sns.kdeplot(ari_dist, label="ARI gmvaep", shade=True)
sns.kdeplot(random_ari, label="ARI random", shade=True)
 
plt.xlabel("Normalised Adjusted Rand Index")
plt.ylabel("Density")
 
plt.legend()
plt.show()
```
 
%%%% Output: display_data