Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Lucas Miranda
deepOF
Commits
2f603a5e
Commit
2f603a5e
authored
Jun 04, 2021
by
lucas_miranda
Browse files
Increased default dimensionality of latent space
parent
77f40ee9
Changes
3
Expand all
Hide whitespace changes
Inline
Side-by-side
deepof/models.py
View file @
2f603a5e
...
...
@@ -17,7 +17,7 @@ from tensorflow.keras.activations import softplus
from
tensorflow.keras.constraints
import
UnitNorm
from
tensorflow.keras.initializers
import
he_uniform
from
tensorflow.keras.layers
import
BatchNormalization
,
Bidirectional
from
tensorflow.keras.layers
import
Dense
,
Dropout
,
LSTM
from
tensorflow.keras.layers
import
Dense
,
Dropout
,
GRU
from
tensorflow.keras.layers
import
RepeatVector
,
Reshape
,
TimeDistributed
from
tensorflow.keras.losses
import
Huber
from
tensorflow.keras.optimizers
import
Nadam
...
...
@@ -57,17 +57,17 @@ class GMVAE:
self
.
batch_size
=
batch_size
self
.
bidirectional_merge
=
self
.
hparams
[
"bidirectional_merge"
]
self
.
CONV_filters
=
self
.
hparams
[
"units_conv"
]
self
.
DENSE_1
=
int
(
self
.
hparams
[
"units_
lstm
"
]
/
2
)
self
.
DENSE_1
=
int
(
self
.
hparams
[
"units_
gru
"
]
/
2
)
self
.
DENSE_2
=
self
.
hparams
[
"units_dense2"
]
self
.
DROPOUT_RATE
=
self
.
hparams
[
"dropout_rate"
]
self
.
ENCODING
=
encoding
self
.
LSTM
_units_1
=
self
.
hparams
[
"units_
lstm
"
]
self
.
LSTM
_units_2
=
int
(
self
.
hparams
[
"units_
lstm
"
]
/
2
)
self
.
GRU
_units_1
=
self
.
hparams
[
"units_
gru
"
]
self
.
GRU
_units_2
=
int
(
self
.
hparams
[
"units_
gru
"
]
/
2
)
self
.
clipvalue
=
self
.
hparams
[
"clipvalue"
]
self
.
dense_activation
=
self
.
hparams
[
"dense_activation"
]
self
.
dense_layers_per_branch
=
self
.
hparams
[
"dense_layers_per_branch"
]
self
.
learn_rate
=
self
.
hparams
[
"learning_rate"
]
self
.
lstm
_unroll
=
True
self
.
gru
_unroll
=
True
self
.
compile
=
compile_model
self
.
kl_annealing_mode
=
kl_annealing_mode
self
.
kl_warmup
=
kl_warmup_epochs
...
...
@@ -140,7 +140,7 @@ class GMVAE:
"learning_rate"
:
1e-3
,
"units_conv"
:
64
,
"units_dense2"
:
32
,
"units_
lstm
"
:
128
,
"units_
gru
"
:
128
,
}
for
k
,
v
in
params
.
items
():
...
...
@@ -155,31 +155,31 @@ class GMVAE:
Model_E0
=
tf
.
keras
.
layers
.
Conv1D
(
filters
=
self
.
CONV_filters
,
kernel_size
=
5
,
strides
=
1
,
strides
=
2
,
# Increased strides to yield shorter sequences
padding
=
"same"
,
activation
=
self
.
dense_activation
,
kernel_initializer
=
he_uniform
(),
use_bias
=
True
,
)
Model_E1
=
Bidirectional
(
LSTM
(
self
.
LSTM
_units_1
,
GRU
(
self
.
GRU
_units_1
,
activation
=
"tanh"
,
recurrent_activation
=
"sigmoid"
,
return_sequences
=
True
,
unroll
=
self
.
lstm
_unroll
,
unroll
=
self
.
gru
_unroll
,
# kernel_constraint=UnitNorm(axis=0),
use_bias
=
True
,
),
merge_mode
=
self
.
bidirectional_merge
,
)
Model_E2
=
Bidirectional
(
LSTM
(
self
.
LSTM
_units_2
,
GRU
(
self
.
GRU
_units_2
,
activation
=
"tanh"
,
recurrent_activation
=
"sigmoid"
,
return_sequences
=
False
,
unroll
=
self
.
lstm
_unroll
,
unroll
=
self
.
gru
_unroll
,
# kernel_constraint=UnitNorm(axis=0),
use_bias
=
True
,
),
...
...
@@ -231,24 +231,24 @@ class GMVAE:
)
Model_D3
=
RepeatVector
(
input_shape
[
1
])
Model_D4
=
Bidirectional
(
LSTM
(
self
.
LSTM
_units_2
,
GRU
(
self
.
GRU
_units_2
,
activation
=
"tanh"
,
recurrent_activation
=
"sigmoid"
,
return_sequences
=
True
,
unroll
=
self
.
lstm
_unroll
,
unroll
=
self
.
gru
_unroll
,
# kernel_constraint=UnitNorm(axis=1),
use_bias
=
True
,
),
merge_mode
=
self
.
bidirectional_merge
,
)
Model_D5
=
Bidirectional
(
LSTM
(
self
.
LSTM
_units_1
,
GRU
(
self
.
GRU
_units_1
,
activation
=
"tanh"
,
recurrent_activation
=
"sigmoid"
,
return_sequences
=
True
,
unroll
=
self
.
lstm
_unroll
,
unroll
=
self
.
gru
_unroll
,
# kernel_constraint=UnitNorm(axis=1),
use_bias
=
True
,
),
...
...
@@ -272,24 +272,24 @@ class GMVAE:
use_bias
=
True
,
)
Model_P2
=
Bidirectional
(
LSTM
(
self
.
LSTM
_units_1
,
GRU
(
self
.
GRU
_units_1
,
activation
=
"tanh"
,
recurrent_activation
=
"sigmoid"
,
return_sequences
=
True
,
unroll
=
self
.
lstm
_unroll
,
unroll
=
self
.
gru
_unroll
,
# kernel_constraint=UnitNorm(axis=1),
use_bias
=
True
,
),
merge_mode
=
self
.
bidirectional_merge
,
)
Model_P3
=
Bidirectional
(
LSTM
(
self
.
LSTM
_units_1
,
GRU
(
self
.
GRU
_units_1
,
activation
=
"tanh"
,
recurrent_activation
=
"sigmoid"
,
return_sequences
=
True
,
unroll
=
self
.
lstm
_unroll
,
unroll
=
self
.
gru
_unroll
,
# kernel_constraint=UnitNorm(axis=1),
use_bias
=
True
,
),
...
...
@@ -631,8 +631,11 @@ class GMVAE:
# TODO:
# - Check usefulness of stateful sequential layers! (stateful=True in the
LSTM
s)
# - Check usefulness of stateful sequential layers! (stateful=True in the
GRU
s)
# - Investigate full covariance matrix approximation for the latent space! (details on tfp course) :)
# - Explore expanding the event dims of the final reconstruction layer
# - Think about gradient penalty to avoid mode collapse (as in WGAN-GP)
# - Think about using spectral normalization
# - REVISIT DROPOUT - CAN HELP WITH TRAINING STABILIZATION
# - Decrease learning rate!
# - Implement residual blocks!
\ No newline at end of file
deepof/train_utils.py
View file @
2f603a5e
...
...
@@ -52,11 +52,11 @@ def load_treatments(train_path):
to be loaded as metadata in the coordinates class"""
try
:
with
open
(
os
.
path
.
join
(
train_path
,
[
i
for
i
in
os
.
listdir
(
train_path
)
if
i
.
endswith
(
".json"
)][
0
],
),
"r"
,
os
.
path
.
join
(
train_path
,
[
i
for
i
in
os
.
listdir
(
train_path
)
if
i
.
endswith
(
".json"
)][
0
],
),
"r"
,
)
as
handle
:
treatment_dict
=
json
.
load
(
handle
)
except
IndexError
:
...
...
@@ -66,25 +66,25 @@ def load_treatments(train_path):
def
get_callbacks
(
X_train
:
np
.
array
,
batch_size
:
int
,
phenotype_prediction
:
float
,
next_sequence_prediction
:
float
,
rule_based_prediction
:
float
,
overlap_loss
:
float
,
loss
:
str
,
loss_warmup
:
int
=
0
,
warmup_mode
:
str
=
"none"
,
X_val
:
np
.
array
=
None
,
input_type
:
str
=
False
,
cp
:
bool
=
False
,
reg_cat_clusters
:
bool
=
False
,
reg_cluster_variance
:
bool
=
False
,
entropy_samples
:
int
=
15000
,
entropy_knn
:
int
=
100
,
logparam
:
dict
=
None
,
outpath
:
str
=
"."
,
run
:
int
=
False
,
X_train
:
np
.
array
,
batch_size
:
int
,
phenotype_prediction
:
float
,
next_sequence_prediction
:
float
,
rule_based_prediction
:
float
,
overlap_loss
:
float
,
loss
:
str
,
loss_warmup
:
int
=
0
,
warmup_mode
:
str
=
"none"
,
X_val
:
np
.
array
=
None
,
input_type
:
str
=
False
,
cp
:
bool
=
False
,
reg_cat_clusters
:
bool
=
False
,
reg_cluster_variance
:
bool
=
False
,
entropy_samples
:
int
=
15000
,
entropy_knn
:
int
=
100
,
logparam
:
dict
=
None
,
outpath
:
str
=
"."
,
run
:
int
=
False
,
)
->
List
[
Union
[
Any
]]:
"""Generates callbacks for model training, including:
- run_ID: run name, with coarse parameter details;
...
...
@@ -202,15 +202,15 @@ def log_hyperparameters(phenotype_class: float, rec: str):
# noinspection PyUnboundLocalVariable
def
tensorboard_metric_logging
(
run_dir
:
str
,
hpms
:
Any
,
ae
:
Any
,
X_val
:
np
.
ndarray
,
y_val
:
np
.
ndarray
,
next_sequence_prediction
:
float
,
phenotype_prediction
:
float
,
rule_based_prediction
:
float
,
rec
:
str
,
run_dir
:
str
,
hpms
:
Any
,
ae
:
Any
,
X_val
:
np
.
ndarray
,
y_val
:
np
.
ndarray
,
next_sequence_prediction
:
float
,
phenotype_prediction
:
float
,
rule_based_prediction
:
float
,
rec
:
str
,
):
"""Autoencoder metric logging in tensorboard"""
...
...
@@ -270,35 +270,35 @@ def tensorboard_metric_logging(
def
autoencoder_fitting
(
preprocessed_object
:
Tuple
[
np
.
ndarray
,
np
.
ndarray
,
np
.
ndarray
,
np
.
ndarray
],
batch_size
:
int
,
encoding_size
:
int
,
epochs
:
int
,
hparams
:
dict
,
kl_annealing_mode
:
str
,
kl_warmup
:
int
,
log_history
:
bool
,
log_hparams
:
bool
,
loss
:
str
,
mmd_annealing_mode
:
str
,
mmd_warmup
:
int
,
montecarlo_kl
:
int
,
n_components
:
int
,
output_path
:
str
,
overlap_loss
:
float
,
next_sequence_prediction
:
float
,
phenotype_prediction
:
float
,
rule_based_prediction
:
float
,
pretrained
:
str
,
save_checkpoints
:
bool
,
save_weights
:
bool
,
reg_cat_clusters
:
bool
,
reg_cluster_variance
:
bool
,
entropy_samples
:
int
,
entropy_knn
:
int
,
input_type
:
str
,
run
:
int
=
0
,
strategy
:
tf
.
distribute
.
Strategy
=
tf
.
distribute
.
MirroredStrategy
(),
preprocessed_object
:
Tuple
[
np
.
ndarray
,
np
.
ndarray
,
np
.
ndarray
,
np
.
ndarray
],
batch_size
:
int
,
encoding_size
:
int
,
epochs
:
int
,
hparams
:
dict
,
kl_annealing_mode
:
str
,
kl_warmup
:
int
,
log_history
:
bool
,
log_hparams
:
bool
,
loss
:
str
,
mmd_annealing_mode
:
str
,
mmd_warmup
:
int
,
montecarlo_kl
:
int
,
n_components
:
int
,
output_path
:
str
,
overlap_loss
:
float
,
next_sequence_prediction
:
float
,
phenotype_prediction
:
float
,
rule_based_prediction
:
float
,
pretrained
:
str
,
save_checkpoints
:
bool
,
save_weights
:
bool
,
reg_cat_clusters
:
bool
,
reg_cluster_variance
:
bool
,
entropy_samples
:
int
,
entropy_knn
:
int
,
input_type
:
str
,
run
:
int
=
0
,
strategy
:
tf
.
distribute
.
Strategy
=
tf
.
distribute
.
MirroredStrategy
(),
):
"""Implementation function for deepof.data.coordinates.deep_unsupervised_embedding"""
...
...
@@ -317,8 +317,8 @@ def autoencoder_fitting(
# Generate validation dataset for callback usage
X_val_dataset
=
(
tf
.
data
.
Dataset
.
from_tensor_slices
(
X_val
)
.
with_options
(
options
)
.
batch
(
batch_size
*
strategy
.
num_replicas_in_sync
,
drop_remainder
=
True
)
.
with_options
(
options
)
.
batch
(
batch_size
*
strategy
.
num_replicas_in_sync
,
drop_remainder
=
True
)
)
# Defines what to log on tensorboard (useful for trying out different models)
...
...
@@ -361,7 +361,7 @@ def autoencoder_fitting(
logparams
,
metrics
=
log_hyperparameters
(
phenotype_prediction
,
rec
)
with
tf
.
summary
.
create_file_writer
(
os
.
path
.
join
(
output_path
,
"hparams"
,
run_ID
)
os
.
path
.
join
(
output_path
,
"hparams"
,
run_ID
)
).
as_default
():
hp
.
hparams_config
(
hparams
=
logparams
,
...
...
@@ -422,28 +422,28 @@ def autoencoder_fitting(
Xvals
,
yvals
=
X_val
[:
-
1
],
[
X_val
[:
-
1
],
X_val
[
1
:]]
if
phenotype_prediction
>
0.0
:
ys
+=
[
y_train
[
-
Xs
.
shape
[
0
]
:,
0
]]
yvals
+=
[
y_val
[
-
Xvals
.
shape
[
0
]
:,
0
]]
ys
+=
[
y_train
[
-
Xs
.
shape
[
0
]:,
0
]]
yvals
+=
[
y_val
[
-
Xvals
.
shape
[
0
]:,
0
]]
# Remove the used column (phenotype) from both y arrays
y_train
=
y_train
[:,
1
:]
y_val
=
y_val
[:,
1
:]
if
rule_based_prediction
>
0.0
:
ys
+=
[
y_train
[
-
Xs
.
shape
[
0
]
:]]
yvals
+=
[
y_val
[
-
Xvals
.
shape
[
0
]
:]]
ys
+=
[
y_train
[
-
Xs
.
shape
[
0
]:]]
yvals
+=
[
y_val
[
-
Xvals
.
shape
[
0
]:]]
# Convert data to tf.data.Dataset objects
train_dataset
=
(
tf
.
data
.
Dataset
.
from_tensor_slices
((
Xs
,
tuple
(
ys
)))
.
batch
(
batch_size
*
strategy
.
num_replicas_in_sync
,
drop_remainder
=
True
)
.
shuffle
(
buffer_size
=
X_train
.
shape
[
0
])
.
with_options
(
options
)
.
batch
(
batch_size
*
strategy
.
num_replicas_in_sync
,
drop_remainder
=
True
)
.
shuffle
(
buffer_size
=
X_train
.
shape
[
0
])
.
with_options
(
options
)
)
val_dataset
=
(
tf
.
data
.
Dataset
.
from_tensor_slices
((
Xvals
,
tuple
(
yvals
)))
.
batch
(
batch_size
*
strategy
.
num_replicas_in_sync
,
drop_remainder
=
True
)
.
with_options
(
options
)
.
batch
(
batch_size
*
strategy
.
num_replicas_in_sync
,
drop_remainder
=
True
)
.
with_options
(
options
)
)
ae
.
fit
(
...
...
@@ -484,23 +484,23 @@ def autoencoder_fitting(
def
tune_search
(
data
:
List
[
np
.
array
],
encoding_size
:
int
,
hypertun_trials
:
int
,
hpt_type
:
str
,
k
:
int
,
kl_warmup_epochs
:
int
,
loss
:
str
,
mmd_warmup_epochs
:
int
,
overlap_loss
:
float
,
next_sequence_prediction
:
float
,
phenotype_prediction
:
float
,
rule_based_prediction
:
float
,
project_name
:
str
,
callbacks
:
List
,
n_epochs
:
int
=
30
,
n_replicas
:
int
=
1
,
outpath
:
str
=
"."
,
data
:
List
[
np
.
array
],
encoding_size
:
int
,
hypertun_trials
:
int
,
hpt_type
:
str
,
k
:
int
,
kl_warmup_epochs
:
int
,
loss
:
str
,
mmd_warmup_epochs
:
int
,
overlap_loss
:
float
,
next_sequence_prediction
:
float
,
phenotype_prediction
:
float
,
rule_based_prediction
:
float
,
project_name
:
str
,
callbacks
:
List
,
n_epochs
:
int
=
30
,
n_replicas
:
int
=
1
,
outpath
:
str
=
"."
,
)
->
Union
[
bool
,
Tuple
[
Any
,
Any
]]:
"""Define the search space using keras-tuner and bayesian optimization
...
...
@@ -592,16 +592,16 @@ def tune_search(
Xvals
,
yvals
=
X_val
[:
-
1
],
[
X_val
[:
-
1
],
X_val
[
1
:]]
if
phenotype_prediction
>
0.0
:
ys
+=
[
y_train
[
-
Xs
.
shape
[
0
]
:,
0
]]
yvals
+=
[
y_val
[
-
Xvals
.
shape
[
0
]
:,
0
]]
ys
+=
[
y_train
[
-
Xs
.
shape
[
0
]:,
0
]]
yvals
+=
[
y_val
[
-
Xvals
.
shape
[
0
]:,
0
]]
# Remove the used column (phenotype) from both y arrays
y_train
=
y_train
[:,
1
:]
y_val
=
y_val
[:,
1
:]
if
rule_based_prediction
>
0.0
:
ys
+=
[
y_train
[
-
Xs
.
shape
[
0
]
:]]
yvals
+=
[
y_val
[
-
Xvals
.
shape
[
0
]
:]]
ys
+=
[
y_train
[
-
Xs
.
shape
[
0
]:]]
yvals
+=
[
y_val
[
-
Xvals
.
shape
[
0
]:]]
tuner
.
search
(
Xs
,
...
...
supplementary_notebooks/deepof_model_evaluation.ipynb
View file @
2f603a5e
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment