diff --git a/source/hypermodels.py b/source/hypermodels.py
index ab7fc667b4fffe4ef399b6f716121104885693b9..61121ab5b79e9bcae490fe4c5ec594f292c6ded0 100644
--- a/source/hypermodels.py
+++ b/source/hypermodels.py
@@ -196,6 +196,7 @@ class SEQ_2_SEQ_VAE(HyperModel):
             strides=1,
             padding="causal",
             activation="relu",
+            kernel_initializer=he_uniform(),
         )
         Model_E1 = Bidirectional(
             LSTM(
@@ -213,19 +214,31 @@ class SEQ_2_SEQ_VAE(HyperModel):
                 kernel_constraint=UnitNorm(axis=0),
             )
         )
-        Model_E3 = Dense(DENSE_1, activation="relu", kernel_constraint=UnitNorm(axis=0))
-        Model_E4 = Dense(DENSE_2, activation="relu", kernel_constraint=UnitNorm(axis=0))
+        Model_E3 = Dense(
+            DENSE_1,
+            activation="relu",
+            kernel_constraint=UnitNorm(axis=0),
+            kernel_initializer=he_uniform(),
+        )
+        Model_E4 = Dense(
+            DENSE_2,
+            activation="relu",
+            kernel_constraint=UnitNorm(axis=0),
+            kernel_initializer=he_uniform(),
+        )
         Model_E5 = Dense(
             ENCODING,
             activation="relu",
             kernel_constraint=UnitNorm(axis=1),
             activity_regularizer=UncorrelatedFeaturesConstraint(3, weightage=1.0),
+            kernel_initializer=Orthogonal(),
         )
 
         # Decoder layers
-        Model_D0 = DenseTranspose(Model_E5, activation="relu", output_dim=ENCODING)
-        Model_D1 = DenseTranspose(Model_E4, activation="relu", output_dim=DENSE_2)
-        Model_D2 = DenseTranspose(Model_E3, activation="relu", output_dim=DENSE_1)
+
+        Model_D0 = DenseTranspose(Model_E5, activation="relu", output_dim=ENCODING,)
+        Model_D1 = DenseTranspose(Model_E4, activation="relu", output_dim=DENSE_2,)
+        Model_D2 = DenseTranspose(Model_E3, activation="relu", output_dim=DENSE_1,)
         Model_D3 = RepeatVector(self.input_shape[1])
         Model_D4 = Bidirectional(
             LSTM(
@@ -247,11 +260,16 @@ class SEQ_2_SEQ_VAE(HyperModel):
         # Define and instanciate encoder
         x = Input(shape=self.input_shape[1:])
         encoder = Model_E0(x)
+        encoder = BatchNormalization()(encoder)
         encoder = Model_E1(encoder)
+        encoder = BatchNormalization()(encoder)
         encoder = Model_E2(encoder)
+        encoder = BatchNormalization()(encoder)
         encoder = Model_E3(encoder)
+        encoder = BatchNormalization()(encoder)
         encoder = Dropout(DROPOUT_RATE)(encoder)
         encoder = Model_E4(encoder)
+        encoder = BatchNormalization()(encoder)
         encoder = Model_E5(encoder)
 
         z_mean = Dense(ENCODING)(encoder)
@@ -265,21 +283,26 @@ class SEQ_2_SEQ_VAE(HyperModel):
         if "MMD" in self.loss:
             z = MMDiscrepancyLayer()(z)
 
-        # Define and instanciate decoder
+        # Define and instanciate generator
         generator = Model_D0(z)
+        generator = BatchNormalization()(generator)
         generator = Model_D1(generator)
+        generator = BatchNormalization()(generator)
         generator = Model_D2(generator)
+        generator = BatchNormalization()(generator)
         generator = Model_D3(generator)
+        generator = BatchNormalization()(generator)
         generator = Model_D4(generator)
+        generator = BatchNormalization()(generator)
         generator = Model_D5(generator)
         x_decoded_mean = TimeDistributed(Dense(self.input_shape[2]))(generator)
 
         # end-to-end autoencoder
-        vae = Model(x, x_decoded_mean)
+        vae = Model(x, x_decoded_mean, name="SEQ_2_SEQ_VAE")
 
-        def huber_loss(x, x_decoded_mean):
-            huber_loss = Huber(reduction="sum", delta=100.0)
-            return self.input_shape[1:] * huber_loss(x, x_decoded_mean)
+        def huber_loss(x_, x_decoded_mean_):
+            huber = Huber(reduction="sum", delta=100.0)
+            return self.input_shape[1:] * huber(x_, x_decoded_mean_)
 
         vae.compile(
             loss=huber_loss,
diff --git a/source/models.py b/source/models.py
index ebcf6bb0d85df760fb08f9725c87e29d9c743104..9e59b8bd5e6ed0175b3de838457e72f593a6d0d4 100644
--- a/source/models.py
+++ b/source/models.py
@@ -82,20 +82,10 @@ class SEQ_2_SEQ_AE:
 
         # Decoder layers
         Model_D0 = DenseTranspose(
-            Model_E5,
-            activation="relu",
-            output_dim=self.ENCODING,
-        )
-        Model_D1 = DenseTranspose(
-            Model_E4,
-            activation="relu",
-            output_dim=self.DENSE_2,
-        )
-        Model_D2 = DenseTranspose(
-            Model_E3,
-            activation="relu",
-            output_dim=self.DENSE_1,
+            Model_E5, activation="relu", output_dim=self.ENCODING,
         )
+        Model_D1 = DenseTranspose(Model_E4, activation="relu", output_dim=self.DENSE_2,)
+        Model_D2 = DenseTranspose(Model_E3, activation="relu", output_dim=self.DENSE_1,)
         Model_D3 = RepeatVector(self.input_shape[1])
         Model_D4 = Bidirectional(
             LSTM(
@@ -229,20 +219,10 @@ class SEQ_2_SEQ_VAE:
         # Decoder layers
 
         Model_D0 = DenseTranspose(
-            Model_E5,
-            activation="relu",
-            output_dim=self.ENCODING,
-        )
-        Model_D1 = DenseTranspose(
-            Model_E4,
-            activation="relu",
-            output_dim=self.DENSE_2,
-        )
-        Model_D2 = DenseTranspose(
-            Model_E3,
-            activation="relu",
-            output_dim=self.DENSE_1,
+            Model_E5, activation="relu", output_dim=self.ENCODING,
         )
+        Model_D1 = DenseTranspose(Model_E4, activation="relu", output_dim=self.DENSE_2,)
+        Model_D2 = DenseTranspose(Model_E3, activation="relu", output_dim=self.DENSE_1,)
         Model_D3 = RepeatVector(self.input_shape[1])
         Model_D4 = Bidirectional(
             LSTM(
@@ -321,9 +301,9 @@ class SEQ_2_SEQ_VAE:
         _x_decoded_mean = TimeDistributed(Dense(self.input_shape[2]))(_generator)
         generator = Model(g, _x_decoded_mean, name="SEQ_2_SEQ_VGenerator")
 
-        def huber_loss(x, x_decoded_mean):
-            huber_loss = Huber(reduction="sum", delta=100.0)
-            return self.input_shape[1:] * huber_loss(x, x_decoded_mean)
+        def huber_loss(x_, x_decoded_mean_):
+            huber = Huber(reduction="sum", delta=100.0)
+            return self.input_shape[1:] * huber(x_, x_decoded_mean_)
 
         vae.compile(
             loss=huber_loss,
@@ -342,6 +322,7 @@ class SEQ_2_SEQ_VAME:
 class SEQ_2_SEQ_MMVAE:
     pass
 
+
 # TODO next:
 #      - VAE loss function (though this should be analysed later on taking the encodings into account)
 #      - Smaller input sliding window (10-15 frames)