Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Lucas Miranda
deepOF
Commits
a6d9fa9d
Commit
a6d9fa9d
authored
Apr 27, 2021
by
lucas_miranda
Browse files
Removed outdated non-variational autoencoder model
parent
e7fd0a08
Changes
9
Hide whitespace changes
Inline
Side-by-side
deepof/data.py
View file @
a6d9fa9d
...
...
@@ -900,7 +900,6 @@ class coordinates:
pretrained
:
str
=
False
,
save_checkpoints
:
bool
=
False
,
save_weights
:
bool
=
True
,
variational
:
bool
=
True
,
reg_cat_clusters
:
bool
=
False
,
reg_cluster_variance
:
bool
=
False
,
entropy_samples
:
int
=
10000
,
...
...
@@ -938,8 +937,6 @@ class coordinates:
is appended to the latent space,
aiming to predict what happens immediately next in the sequence, which can help with regularization.
- pretrained (bool): If True, a pretrained set of weights is expected.
- variational (bool): If True (default) a variational autoencoder is used. If False,
a simple autoencoder is used for dimensionality reduction
Returns:
- return_list (tuple): List containing all relevant trained models for unsupervised prediction.
...
...
@@ -968,7 +965,6 @@ class coordinates:
pretrained
=
pretrained
,
save_checkpoints
=
save_checkpoints
,
save_weights
=
save_weights
,
variational
=
variational
,
reg_cat_clusters
=
reg_cat_clusters
,
reg_cluster_variance
=
reg_cluster_variance
,
entropy_samples
=
entropy_samples
,
...
...
deepof/hypermodels.py
View file @
a6d9fa9d
...
...
@@ -18,78 +18,7 @@ tfd = tfp.distributions
tfpl
=
tfp
.
layers
class
SEQ_2_SEQ_AE
(
HyperModel
):
"""Hyperparameter tuning pipeline for deepof.models.SEQ_2_SEQ_AE"""
def
__init__
(
self
,
input_shape
):
super
().
__init__
()
self
.
input_shape
=
input_shape
@
staticmethod
def
get_hparams
(
hp
):
"""Retrieve hyperparameters to tune"""
conv_filters
=
hp
.
Int
(
"units_conv"
,
min_value
=
32
,
max_value
=
256
,
step
=
32
,
default
=
256
,
)
lstm_units_1
=
hp
.
Int
(
"units_lstm"
,
min_value
=
128
,
max_value
=
512
,
step
=
32
,
default
=
256
,
)
dense_2
=
hp
.
Int
(
"units_dense2"
,
min_value
=
32
,
max_value
=
256
,
step
=
32
,
default
=
64
,
)
dropout_rate
=
hp
.
Float
(
"dropout_rate"
,
min_value
=
0.0
,
max_value
=
0.5
,
default
=
0.25
,
step
=
0.05
,
)
encoding
=
hp
.
Int
(
"encoding"
,
min_value
=
16
,
max_value
=
64
,
step
=
8
,
default
=
24
,
)
return
conv_filters
,
lstm_units_1
,
dense_2
,
dropout_rate
,
encoding
def
build
(
self
,
hp
):
"""Overrides Hypermodel's build method"""
# HYPERPARAMETERS TO TUNE
conv_filters
,
lstm_units_1
,
dense_2
,
dropout_rate
,
encoding
=
self
.
get_hparams
(
hp
)
# INSTANCIATED MODEL
model
=
deepof
.
models
.
SEQ_2_SEQ_AE
(
architecture_hparams
=
{
"units_conv"
:
conv_filters
,
"units_lstm"
:
lstm_units_1
,
"units_dense_2"
:
dense_2
,
"dropout_rate"
:
dropout_rate
,
"encoding"
:
encoding
,
}
).
build
(
self
.
input_shape
)[
2
]
return
model
class
SEQ_2_SEQ_GMVAE
(
HyperModel
):
class
GMVAE
(
HyperModel
):
"""Hyperparameter tuning pipeline for deepof.models.SEQ_2_SEQ_GMVAE"""
def
__init__
(
...
...
@@ -173,7 +102,7 @@ class SEQ_2_SEQ_GMVAE(HyperModel):
lstm_units_1
,
)
=
self
.
get_hparams
(
hp
)
gmvaep
=
deepof
.
models
.
SEQ_2_SEQ_
GMVAE
(
gmvaep
=
deepof
.
models
.
GMVAE
(
architecture_hparams
=
{
"bidirectional_merge"
:
"ave"
,
"clipvalue"
:
clipvalue
,
...
...
deepof/model_utils.py
View file @
a6d9fa9d
...
...
@@ -215,7 +215,6 @@ class neighbor_latent_entropy(tf.keras.callbacks.Callback):
def
__init__
(
self
,
encoding_dim
:
int
,
variational
:
bool
=
True
,
validation_data
:
np
.
ndarray
=
None
,
k
:
int
=
100
,
samples
:
int
=
10000
,
...
...
@@ -223,7 +222,6 @@ class neighbor_latent_entropy(tf.keras.callbacks.Callback):
):
super
().
__init__
()
self
.
enc
=
encoding_dim
self
.
variational
=
variational
self
.
validation_data
=
validation_data
self
.
k
=
k
self
.
samples
=
samples
...
...
@@ -233,7 +231,7 @@ class neighbor_latent_entropy(tf.keras.callbacks.Callback):
def
on_epoch_end
(
self
,
epoch
,
logs
=
None
):
""" Passes samples through the encoder and computes cluster purity on the latent embedding """
if
self
.
validation_data
is
not
None
and
self
.
variational
:
if
self
.
validation_data
is
not
None
:
# Get encoer and grouper from full model
latent_distribution
=
[
...
...
@@ -536,10 +534,18 @@ class Cluster_overlap(Layer):
using the average inter-cluster MMD as a metric
"""
def
__init__
(
self
,
lat_dims
,
n_components
,
loss
=
False
,
samples
=
10
,
*
args
,
**
kwargs
):
self
.
lat_dims
=
lat_dims
self
.
n_components
=
n_components
self
.
loss
=
loss
def
__init__
(
self
,
encoding_dim
:
int
,
k
:
int
=
100
,
loss_weight
:
float
=
False
,
samples
:
int
=
512
,
*
args
,
**
kwargs
):
self
.
enc
=
encoding_dim
self
.
k
=
k
self
.
loss_weight
=
loss_weight
self
.
samples
=
samples
super
(
Cluster_overlap
,
self
).
__init__
(
*
args
,
**
kwargs
)
...
...
@@ -547,9 +553,9 @@ class Cluster_overlap(Layer):
"""Updates Constraint metadata"""
config
=
super
().
get_config
().
copy
()
config
.
update
({
"
lat_dims"
:
self
.
lat_dims
})
config
.
update
({
"
n_components"
:
self
.
n_components
})
config
.
update
({
"loss"
:
self
.
loss
})
config
.
update
({
"
enc"
:
self
.
enc
})
config
.
update
({
"
k"
:
self
.
k
})
config
.
update
({
"loss
_weight
"
:
self
.
loss
_weight
})
config
.
update
({
"samples"
:
self
.
samples
})
return
config
...
...
deepof/models.py
View file @
a6d9fa9d
...
...
@@ -28,217 +28,8 @@ tfb = tfp.bijectors
tfd
=
tfp
.
distributions
tfpl
=
tfp
.
layers
# noinspection PyDefaultArgument
class
SEQ_2_SEQ_AE
:
""" Simple sequence to sequence autoencoder implemented with tf.keras """
def
__init__
(
self
,
architecture_hparams
:
Dict
=
{},
huber_delta
:
float
=
1.0
,
):
self
.
hparams
=
self
.
get_hparams
(
architecture_hparams
)
self
.
CONV_filters
=
self
.
hparams
[
"units_conv"
]
self
.
LSTM_units_1
=
self
.
hparams
[
"units_lstm"
]
self
.
LSTM_units_2
=
int
(
self
.
hparams
[
"units_lstm"
]
/
2
)
self
.
DENSE_1
=
int
(
self
.
hparams
[
"units_lstm"
]
/
2
)
self
.
DENSE_2
=
self
.
hparams
[
"units_dense2"
]
self
.
DROPOUT_RATE
=
self
.
hparams
[
"dropout_rate"
]
self
.
ENCODING
=
self
.
hparams
[
"encoding"
]
self
.
learn_rate
=
self
.
hparams
[
"learning_rate"
]
self
.
delta
=
huber_delta
@
staticmethod
def
get_hparams
(
hparams
):
"""Sets the default parameters for the model. Overwritable with a dictionary"""
defaults
=
{
"units_conv"
:
256
,
"units_lstm"
:
256
,
"units_dense2"
:
64
,
"dropout_rate"
:
0.25
,
"encoding"
:
16
,
"learning_rate"
:
1e-3
,
}
for
k
,
v
in
hparams
.
items
():
defaults
[
k
]
=
v
return
defaults
def
get_layers
(
self
,
input_shape
):
"""Instanciate all layers in the model"""
# Encoder Layers
Model_E0
=
tf
.
keras
.
layers
.
Conv1D
(
filters
=
self
.
CONV_filters
,
kernel_size
=
5
,
strides
=
1
,
padding
=
"causal"
,
activation
=
"elu"
,
kernel_initializer
=
he_uniform
(),
)
Model_E1
=
Bidirectional
(
LSTM
(
self
.
LSTM_units_1
,
activation
=
"tanh"
,
recurrent_activation
=
"sigmoid"
,
return_sequences
=
True
,
kernel_constraint
=
UnitNorm
(
axis
=
0
),
)
)
Model_E2
=
Bidirectional
(
LSTM
(
self
.
LSTM_units_2
,
activation
=
"tanh"
,
recurrent_activation
=
"sigmoid"
,
return_sequences
=
False
,
kernel_constraint
=
UnitNorm
(
axis
=
0
),
)
)
Model_E3
=
Dense
(
self
.
DENSE_1
,
activation
=
"elu"
,
kernel_constraint
=
UnitNorm
(
axis
=
0
),
kernel_initializer
=
he_uniform
(),
)
Model_E4
=
Dense
(
self
.
DENSE_2
,
activation
=
"elu"
,
kernel_constraint
=
UnitNorm
(
axis
=
0
),
kernel_initializer
=
he_uniform
(),
)
Model_E5
=
Dense
(
self
.
ENCODING
,
activation
=
"elu"
,
kernel_constraint
=
UnitNorm
(
axis
=
1
),
activity_regularizer
=
deepof
.
model_utils
.
uncorrelated_features_constraint
(
2
,
weightage
=
1.0
),
kernel_initializer
=
Orthogonal
(),
)
# Decoder layers
Model_D0
=
deepof
.
model_utils
.
DenseTranspose
(
Model_E5
,
activation
=
"elu"
,
output_dim
=
self
.
ENCODING
,
)
Model_D1
=
deepof
.
model_utils
.
DenseTranspose
(
Model_E4
,
activation
=
"elu"
,
output_dim
=
self
.
DENSE_2
,
)
Model_D2
=
deepof
.
model_utils
.
DenseTranspose
(
Model_E3
,
activation
=
"elu"
,
output_dim
=
self
.
DENSE_1
,
)
Model_D3
=
RepeatVector
(
input_shape
[
1
])
Model_D4
=
Bidirectional
(
LSTM
(
self
.
LSTM_units_1
,
activation
=
"tanh"
,
recurrent_activation
=
"sigmoid"
,
return_sequences
=
True
,
# kernel_constraint=UnitNorm(axis=1),
)
)
Model_D5
=
Bidirectional
(
LSTM
(
self
.
LSTM_units_1
,
activation
=
"sigmoid"
,
recurrent_activation
=
"sigmoid"
,
return_sequences
=
True
,
# kernel_constraint=UnitNorm(axis=1),
)
)
return
(
Model_E0
,
Model_E1
,
Model_E2
,
Model_E3
,
Model_E4
,
Model_E5
,
Model_D0
,
Model_D1
,
Model_D2
,
Model_D3
,
Model_D4
,
Model_D5
,
)
def
build
(
self
,
input_shape
:
tuple
,
)
->
Tuple
[
Any
,
Any
,
Any
]:
"""Builds the tf.keras model"""
(
Model_E0
,
Model_E1
,
Model_E2
,
Model_E3
,
Model_E4
,
Model_E5
,
Model_D0
,
Model_D1
,
Model_D2
,
Model_D3
,
Model_D4
,
Model_D5
,
)
=
self
.
get_layers
(
input_shape
)
# Define and instantiate encoder
encoder
=
Sequential
(
name
=
"SEQ_2_SEQ_Encoder"
)
encoder
.
add
(
Input
(
shape
=
input_shape
[
1
:]))
encoder
.
add
(
Model_E0
)
encoder
.
add
(
BatchNormalization
())
encoder
.
add
(
Model_E1
)
encoder
.
add
(
BatchNormalization
())
encoder
.
add
(
Model_E2
)
encoder
.
add
(
BatchNormalization
())
encoder
.
add
(
Model_E3
)
encoder
.
add
(
BatchNormalization
())
encoder
.
add
(
Dropout
(
self
.
DROPOUT_RATE
))
encoder
.
add
(
Model_E4
)
encoder
.
add
(
BatchNormalization
())
encoder
.
add
(
Model_E5
)
# Define and instantiate decoder
decoder
=
Sequential
(
name
=
"SEQ_2_SEQ_Decoder"
)
decoder
.
add
(
Model_D0
)
decoder
.
add
(
BatchNormalization
())
decoder
.
add
(
Model_D1
)
decoder
.
add
(
BatchNormalization
())
decoder
.
add
(
Model_D2
)
decoder
.
add
(
BatchNormalization
())
decoder
.
add
(
Model_D3
)
decoder
.
add
(
Model_D4
)
decoder
.
add
(
BatchNormalization
())
decoder
.
add
(
Model_D5
)
decoder
.
add
(
TimeDistributed
(
Dense
(
input_shape
[
2
])))
model
=
Sequential
([
encoder
,
decoder
],
name
=
"SEQ_2_SEQ_AE"
)
model
.
compile
(
loss
=
Huber
(
delta
=
self
.
delta
),
optimizer
=
Nadam
(
lr
=
self
.
learn_rate
,
clipvalue
=
0.5
,
),
metrics
=
[
"mae"
],
)
model
.
build
(
input_shape
)
return
encoder
,
decoder
,
model
# noinspection PyDefaultArgument
class
SEQ_2_SEQ_
GMVAE
:
class
GMVAE
:
""" Gaussian Mixture Variational Autoencoder for pose motif elucidation. """
def
__init__
(
...
...
@@ -630,7 +421,7 @@ class SEQ_2_SEQ_GMVAE:
z_gauss
=
deepof
.
model_utils
.
Cluster_overlap
(
self
.
ENCODING
,
self
.
number_of_components
,
loss
=
self
.
overlap_loss
,
loss
_weight
=
self
.
overlap_loss
,
)(
z_gauss
)
z
=
tfpl
.
DistributionLambda
(
...
...
deepof/train_model.py
View file @
a6d9fa9d
...
...
@@ -217,13 +217,6 @@ parser.add_argument(
type
=
int
,
default
=
1
,
)
parser
.
add_argument
(
"--variational"
,
"-v"
,
help
=
"Sets the model to train to a variational Bayesian autoencoder. Defaults to True"
,
default
=
True
,
type
=
str2bool
,
)
parser
.
add_argument
(
"--window-size"
,
"-ws"
,
...
...
@@ -276,7 +269,6 @@ smooth_alpha = args.smooth_alpha
train_path
=
os
.
path
.
abspath
(
args
.
train_path
)
tune
=
args
.
hyperparameter_tuning
val_num
=
args
.
val_num
variational
=
bool
(
args
.
variational
)
window_size
=
args
.
window_size
window_step
=
args
.
window_step
run
=
args
.
run
...
...
@@ -408,7 +400,6 @@ if not tune:
rule_based_prediction
=
rule_based_prediction
,
save_checkpoints
=
False
,
save_weights
=
True
,
variational
=
variational
,
reg_cat_clusters
=
(
"categorical"
in
latent_reg
),
reg_cluster_variance
=
(
"variance"
in
latent_reg
),
entropy_samples
=
entropy_samples
,
...
...
@@ -419,13 +410,9 @@ if not tune:
else
:
# Runs hyperparameter tuning with the specified parameters and saves the results
hyp
=
"S2SGMVAE"
if
variational
else
"S2SAE"
run_ID
,
tensorboard_callback
,
entropy
,
onecycle
=
get_callbacks
(
X_train
=
X_train
,
batch_size
=
batch_size
,
variational
=
variational
,
phenotype_prediction
=
phenotype_prediction
,
next_sequence_prediction
=
next_sequence_prediction
,
rule_based_prediction
=
rule_base_prediction
,
...
...
@@ -456,7 +443,7 @@ else:
next_sequence_prediction
=
next_sequence_prediction
,
phenotype_prediction
=
phenotype_prediction
,
rule_based_prediction
=
rule_base_prediction
,
project_name
=
"{}-based_
{}
_{}"
.
format
(
input_type
,
hyp
,
tune
.
capitalize
()),
project_name
=
"{}-based_
GMVAE
_{}"
.
format
(
input_type
,
tune
.
capitalize
()),
callbacks
=
[
tensorboard_callback
,
onecycle
,
...
...
@@ -477,7 +464,7 @@ else:
with
open
(
os
.
path
.
join
(
output_path
,
"{}-based_
{}
_{}_params.pickle"
.
format
(
input_type
,
hyp
,
tune
.
capitalize
()),
"{}-based_
GMVAE
_{}_params.pickle"
.
format
(
input_type
,
tune
.
capitalize
()),
),
"wb"
,
)
as
handle
:
...
...
deepof/train_utils.py
View file @
a6d9fa9d
...
...
@@ -68,7 +68,6 @@ def load_treatments(train_path):
def
get_callbacks
(
X_train
:
np
.
array
,
batch_size
:
int
,
variational
:
bool
,
phenotype_prediction
:
float
,
next_sequence_prediction
:
float
,
rule_based_prediction
:
float
,
...
...
@@ -103,13 +102,13 @@ def get_callbacks(
latreg
=
"categorical+variance"
run_ID
=
"{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}"
.
format
(
(
"
GMVAE"
if
variational
else
"
AE"
),
(
"
deepof_GMV
AE"
),
(
"_input_type={}"
.
format
(
input_type
)
if
input_type
else
"coords"
),
(
"_window_size={}"
.
format
(
X_train
.
shape
[
1
])),
(
"_NextSeqPred={}"
.
format
(
next_sequence_prediction
)
if
variational
else
""
),
(
"_PhenoPred={}"
.
format
(
phenotype_prediction
)
if
variational
else
""
),
(
"_RuleBasedPred={}"
.
format
(
rule_based_prediction
)
if
variational
else
""
),
(
"_loss={}"
.
format
(
loss
)
if
variational
else
""
),
(
"_NextSeqPred={}"
.
format
(
next_sequence_prediction
)),
(
"_PhenoPred={}"
.
format
(
phenotype_prediction
)),
(
"_RuleBasedPred={}"
.
format
(
rule_based_prediction
)),
(
"_loss={}"
.
format
(
loss
)),
(
"_loss_warmup={}"
.
format
(
loss_warmup
)),
(
"_warmup_mode={}"
.
format
(
warmup_mode
)),
(
"_encoding={}"
.
format
(
logparam
[
"encoding"
])
if
logparam
is
not
None
else
""
),
...
...
@@ -133,7 +132,6 @@ def get_callbacks(
samples
=
entropy_samples
,
validation_data
=
X_val
,
log_dir
=
os
.
path
.
join
(
outpath
,
"metrics"
,
run_ID
),
variational
=
variational
,
)
onecycle
=
deepof
.
model_utils
.
one_cycle_scheduler
(
...
...
@@ -299,7 +297,6 @@ def autoencoder_fitting(
pretrained
:
str
,
save_checkpoints
:
bool
,
save_weights
:
bool
,
variational
:
bool
,
reg_cat_clusters
:
bool
,
reg_cluster_variance
:
bool
,
entropy_samples
:
int
,
...
...
@@ -342,7 +339,6 @@ def autoencoder_fitting(
run_ID
,
*
cbacks
=
get_callbacks
(
X_train
=
X_train
,
batch_size
=
batch_size
,
variational
=
variational
,
phenotype_prediction
=
phenotype_prediction
,
next_sequence_prediction
=
next_sequence_prediction
,
rule_based_prediction
=
rule_based_prediction
,
...
...
@@ -384,159 +380,109 @@ def autoencoder_fitting(
except
IndexError
:
rule_based_features
=
0
# Build models
if
not
variational
:
encoder
,
decoder
,
ae
=
deepof
.
models
.
SEQ_2_SEQ_AE
(
({}
if
hparams
is
None
else
hparams
)
# Build model
with
strategy
.
scope
():
(
encoder
,
generator
,
grouper
,
ae
,
prior
,
posterior
,)
=
deepof
.
models
.
GMVAE
(
architecture_hparams
=
({}
if
hparams
is
None
else
hparams
),
batch_size
=
batch_size
*
strategy
.
num_replicas_in_sync
,
compile_model
=
True
,
encoding
=
encoding_size
,
kl_annealing_mode
=
kl_annealing_mode
,
kl_warmup_epochs
=
kl_warmup
,
loss
=
loss
,
mmd_annealing_mode
=
mmd_annealing_mode
,
mmd_warmup_epochs
=
mmd_warmup
,
montecarlo_kl
=
montecarlo_kl
,
number_of_components
=
n_components
,
overlap_loss
=
False
,
next_sequence_prediction
=
next_sequence_prediction
,
phenotype_prediction
=
phenotype_prediction
,
rule_based_prediction
=
rule_based_prediction
,
rule_based_features
=
rule_based_features
,
reg_cat_clusters
=
reg_cat_clusters
,
reg_cluster_variance
=
reg_cluster_variance
,
).
build
(
X_train
.
shape
)
return_list
=
(
encoder
,
decoder
,
ae
)
else
:
with
strategy
.
scope
():
(
encoder
,
generator
,
grouper
,
ae
,
prior
,
posterior
,
)
=
deepof
.
models
.
SEQ_2_SEQ_GMVAE
(
architecture_hparams
=
({}
if
hparams
is
None
else
hparams
),
batch_size
=
batch_size
*
strategy
.
num_replicas_in_sync
,
compile_model
=
True
,
encoding
=
encoding_size
,
kl_annealing_mode
=
kl_annealing_mode
,
kl_warmup_epochs
=
kl_warmup
,
loss
=
loss
,
mmd_annealing_mode
=
mmd_annealing_mode
,
mmd_warmup_epochs
=
mmd_warmup
,
montecarlo_kl
=
montecarlo_kl
,
number_of_components
=
n_components
,
overlap_loss
=
False
,
next_sequence_prediction
=
next_sequence_prediction
,
phenotype_prediction
=
phenotype_prediction
,
rule_based_prediction
=
rule_based_prediction
,
rule_based_features
=
rule_based_features
,
reg_cat_clusters
=
reg_cat_clusters
,
reg_cluster_variance
=
reg_cluster_variance
,
).
build
(
X_train
.
shape
)
return_list
=
(
encoder
,
generator
,
grouper
,
ae
)
return_list
=
(
encoder
,
generator
,
grouper
,
ae
)
if
pretrained
:
# If pretrained models are specified, load weights and return
ae
.
load_weights
(
pretrained
)
return
return_list
else
:
if
not
variational
:
ae
.
fit
(
x
=
X_train
,
y
=
X_train
,
epochs
=
epochs
,
verbose
=
1
,
validation_data
=
(
X_val
,
X_val
),
callbacks
=
cbacks
+
[
CustomStopper
(
monitor
=
"val_loss"
,
patience
=
15
,
restore_best_weights
=
True
,
start_epoch
=
max
(
kl_warmup
,
mmd_warmup
),
),
],
)
callbacks_
=
cbacks
+
[
CustomStopper
(
monitor
=
"val_loss"
,
patience
=
15
,
restore_best_weights
=
True
,
start_epoch
=
max
(
kl_warmup
,
mmd_warmup
),
),
]
if
not
os
.
path
.
exists
(
os
.
path
.
join
(
output_path
,
"trained_weights"
)):
os
.
makedirs
(
os
.
path
.
join
(
output_path
,
"trained_weights"
))
if
save_weights
:
ae
.
save_weights
(
os
.
path
.
join
(
"{}"
.
format
(
output_path
),
"trained_weights"
,
"{}_final_weights.h5"
.
format
(
run_ID
),
)
)
else
: