diff --git a/source/hypermodels.py b/source/hypermodels.py index ab7fc667b4fffe4ef399b6f716121104885693b9..61121ab5b79e9bcae490fe4c5ec594f292c6ded0 100644 --- a/source/hypermodels.py +++ b/source/hypermodels.py @@ -196,6 +196,7 @@ class SEQ_2_SEQ_VAE(HyperModel): strides=1, padding="causal", activation="relu", + kernel_initializer=he_uniform(), ) Model_E1 = Bidirectional( LSTM( @@ -213,19 +214,31 @@ class SEQ_2_SEQ_VAE(HyperModel): kernel_constraint=UnitNorm(axis=0), ) ) - Model_E3 = Dense(DENSE_1, activation="relu", kernel_constraint=UnitNorm(axis=0)) - Model_E4 = Dense(DENSE_2, activation="relu", kernel_constraint=UnitNorm(axis=0)) + Model_E3 = Dense( + DENSE_1, + activation="relu", + kernel_constraint=UnitNorm(axis=0), + kernel_initializer=he_uniform(), + ) + Model_E4 = Dense( + DENSE_2, + activation="relu", + kernel_constraint=UnitNorm(axis=0), + kernel_initializer=he_uniform(), + ) Model_E5 = Dense( ENCODING, activation="relu", kernel_constraint=UnitNorm(axis=1), activity_regularizer=UncorrelatedFeaturesConstraint(3, weightage=1.0), + kernel_initializer=Orthogonal(), ) # Decoder layers - Model_D0 = DenseTranspose(Model_E5, activation="relu", output_dim=ENCODING) - Model_D1 = DenseTranspose(Model_E4, activation="relu", output_dim=DENSE_2) - Model_D2 = DenseTranspose(Model_E3, activation="relu", output_dim=DENSE_1) + + Model_D0 = DenseTranspose(Model_E5, activation="relu", output_dim=ENCODING,) + Model_D1 = DenseTranspose(Model_E4, activation="relu", output_dim=DENSE_2,) + Model_D2 = DenseTranspose(Model_E3, activation="relu", output_dim=DENSE_1,) Model_D3 = RepeatVector(self.input_shape[1]) Model_D4 = Bidirectional( LSTM( @@ -247,11 +260,16 @@ class SEQ_2_SEQ_VAE(HyperModel): # Define and instanciate encoder x = Input(shape=self.input_shape[1:]) encoder = Model_E0(x) + encoder = BatchNormalization()(encoder) encoder = Model_E1(encoder) + encoder = BatchNormalization()(encoder) encoder = Model_E2(encoder) + encoder = BatchNormalization()(encoder) encoder = Model_E3(encoder) + encoder = BatchNormalization()(encoder) encoder = Dropout(DROPOUT_RATE)(encoder) encoder = Model_E4(encoder) + encoder = BatchNormalization()(encoder) encoder = Model_E5(encoder) z_mean = Dense(ENCODING)(encoder) @@ -265,21 +283,26 @@ class SEQ_2_SEQ_VAE(HyperModel): if "MMD" in self.loss: z = MMDiscrepancyLayer()(z) - # Define and instanciate decoder + # Define and instanciate generator generator = Model_D0(z) + generator = BatchNormalization()(generator) generator = Model_D1(generator) + generator = BatchNormalization()(generator) generator = Model_D2(generator) + generator = BatchNormalization()(generator) generator = Model_D3(generator) + generator = BatchNormalization()(generator) generator = Model_D4(generator) + generator = BatchNormalization()(generator) generator = Model_D5(generator) x_decoded_mean = TimeDistributed(Dense(self.input_shape[2]))(generator) # end-to-end autoencoder - vae = Model(x, x_decoded_mean) + vae = Model(x, x_decoded_mean, name="SEQ_2_SEQ_VAE") - def huber_loss(x, x_decoded_mean): - huber_loss = Huber(reduction="sum", delta=100.0) - return self.input_shape[1:] * huber_loss(x, x_decoded_mean) + def huber_loss(x_, x_decoded_mean_): + huber = Huber(reduction="sum", delta=100.0) + return self.input_shape[1:] * huber(x_, x_decoded_mean_) vae.compile( loss=huber_loss, diff --git a/source/models.py b/source/models.py index ebcf6bb0d85df760fb08f9725c87e29d9c743104..9e59b8bd5e6ed0175b3de838457e72f593a6d0d4 100644 --- a/source/models.py +++ b/source/models.py @@ -82,20 +82,10 @@ class SEQ_2_SEQ_AE: # Decoder layers Model_D0 = DenseTranspose( - Model_E5, - activation="relu", - output_dim=self.ENCODING, - ) - Model_D1 = DenseTranspose( - Model_E4, - activation="relu", - output_dim=self.DENSE_2, - ) - Model_D2 = DenseTranspose( - Model_E3, - activation="relu", - output_dim=self.DENSE_1, + Model_E5, activation="relu", output_dim=self.ENCODING, ) + Model_D1 = DenseTranspose(Model_E4, activation="relu", output_dim=self.DENSE_2,) + Model_D2 = DenseTranspose(Model_E3, activation="relu", output_dim=self.DENSE_1,) Model_D3 = RepeatVector(self.input_shape[1]) Model_D4 = Bidirectional( LSTM( @@ -229,20 +219,10 @@ class SEQ_2_SEQ_VAE: # Decoder layers Model_D0 = DenseTranspose( - Model_E5, - activation="relu", - output_dim=self.ENCODING, - ) - Model_D1 = DenseTranspose( - Model_E4, - activation="relu", - output_dim=self.DENSE_2, - ) - Model_D2 = DenseTranspose( - Model_E3, - activation="relu", - output_dim=self.DENSE_1, + Model_E5, activation="relu", output_dim=self.ENCODING, ) + Model_D1 = DenseTranspose(Model_E4, activation="relu", output_dim=self.DENSE_2,) + Model_D2 = DenseTranspose(Model_E3, activation="relu", output_dim=self.DENSE_1,) Model_D3 = RepeatVector(self.input_shape[1]) Model_D4 = Bidirectional( LSTM( @@ -321,9 +301,9 @@ class SEQ_2_SEQ_VAE: _x_decoded_mean = TimeDistributed(Dense(self.input_shape[2]))(_generator) generator = Model(g, _x_decoded_mean, name="SEQ_2_SEQ_VGenerator") - def huber_loss(x, x_decoded_mean): - huber_loss = Huber(reduction="sum", delta=100.0) - return self.input_shape[1:] * huber_loss(x, x_decoded_mean) + def huber_loss(x_, x_decoded_mean_): + huber = Huber(reduction="sum", delta=100.0) + return self.input_shape[1:] * huber(x_, x_decoded_mean_) vae.compile( loss=huber_loss, @@ -342,6 +322,7 @@ class SEQ_2_SEQ_VAME: class SEQ_2_SEQ_MMVAE: pass + # TODO next: # - VAE loss function (though this should be analysed later on taking the encodings into account) # - Smaller input sliding window (10-15 frames)