models.py 28.5 KB
Newer Older
1
2
# @author lucasmiranda42

3
from tensorflow.keras import backend as K
4
from tensorflow.keras import Input, Model, Sequential
5
from tensorflow.keras.callbacks import LambdaCallback
6
from tensorflow.keras.constraints import UnitNorm
7
from tensorflow.keras.initializers import he_uniform, Orthogonal
8
9
from tensorflow.keras.layers import BatchNormalization, Bidirectional, Dense
from tensorflow.keras.layers import Dropout, Lambda, LSTM
10
from tensorflow.keras.layers import RepeatVector, TimeDistributed
11
from tensorflow.keras.losses import Huber
12
from tensorflow.keras.optimizers import Adam
13
from source.model_utils import *
14
15
16
17
import tensorflow as tf


class SEQ_2_SEQ_AE:
18
19
20
    def __init__(
        self,
        input_shape,
lucas_miranda's avatar
lucas_miranda committed
21
22
23
24
25
26
27
        CONV_filters=256,
        LSTM_units_1=256,
        LSTM_units_2=64,
        DENSE_2=64,
        DROPOUT_RATE=0.25,
        ENCODING=32,
        learn_rate=1e-3,
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
    ):
        self.input_shape = input_shape
        self.CONV_filters = CONV_filters
        self.LSTM_units_1 = LSTM_units_1
        self.LSTM_units_2 = LSTM_units_2
        self.DENSE_1 = LSTM_units_2
        self.DENSE_2 = DENSE_2
        self.DROPOUT_RATE = DROPOUT_RATE
        self.ENCODING = ENCODING
        self.learn_rate = learn_rate

    def build(self):
        # Encoder Layers
        Model_E0 = tf.keras.layers.Conv1D(
            filters=self.CONV_filters,
            kernel_size=5,
            strides=1,
            padding="causal",
            activation="relu",
47
            kernel_initializer=he_uniform(),
48
        )
49
        Model_E1 = Bidirectional(
50
            LSTM(
51
52
53
                self.LSTM_units_1,
                activation="tanh",
                return_sequences=True,
54
                kernel_constraint=UnitNorm(axis=0),
55
56
            )
        )
57
        Model_E2 = Bidirectional(
58
            LSTM(
59
60
61
                self.LSTM_units_2,
                activation="tanh",
                return_sequences=False,
62
                kernel_constraint=UnitNorm(axis=0),
63
64
            )
        )
65
        Model_E3 = Dense(
66
67
68
69
            self.DENSE_1,
            activation="relu",
            kernel_constraint=UnitNorm(axis=0),
            kernel_initializer=he_uniform(),
70
71
        )
        Model_E4 = Dense(
72
73
74
75
            self.DENSE_2,
            activation="relu",
            kernel_constraint=UnitNorm(axis=0),
            kernel_initializer=he_uniform(),
76
        )
77
78
79
        Model_E5 = Dense(
            self.ENCODING,
            activation="relu",
80
            kernel_constraint=UnitNorm(axis=1),
81
            activity_regularizer=UncorrelatedFeaturesConstraint(3, weightage=1.0),
82
            kernel_initializer=Orthogonal(),
83
84
85
        )

        # Decoder layers
86
        Model_D0 = DenseTranspose(
87
            Model_E5, activation="relu", output_dim=self.ENCODING,
88
        )
89
90
        Model_D1 = DenseTranspose(Model_E4, activation="relu", output_dim=self.DENSE_2,)
        Model_D2 = DenseTranspose(Model_E3, activation="relu", output_dim=self.DENSE_1,)
91
        Model_D3 = RepeatVector(self.input_shape[1])
92
        Model_D4 = Bidirectional(
93
            LSTM(
94
95
96
                self.LSTM_units_1,
                activation="tanh",
                return_sequences=True,
97
                kernel_constraint=UnitNorm(axis=1),
98
99
            )
        )
100
        Model_D5 = Bidirectional(
101
            LSTM(
102
103
104
                self.LSTM_units_1,
                activation="sigmoid",
                return_sequences=True,
105
                kernel_constraint=UnitNorm(axis=1),
106
107
108
            )
        )

109
        # Define and instantiate encoder
lucas_miranda's avatar
lucas_miranda committed
110
        encoder = Sequential(name="SEQ_2_SEQ_Encoder")
111
        encoder.add(Input(shape=self.input_shape[1:]))
112
        encoder.add(Model_E0)
113
        encoder.add(BatchNormalization())
114
        encoder.add(Model_E1)
115
        encoder.add(BatchNormalization())
116
        encoder.add(Model_E2)
117
        encoder.add(BatchNormalization())
118
        encoder.add(Model_E3)
119
        encoder.add(BatchNormalization())
120
121
        encoder.add(Dropout(self.DROPOUT_RATE))
        encoder.add(Model_E4)
122
        encoder.add(BatchNormalization())
123
124
        encoder.add(Model_E5)

125
        # Define and instantiate decoder
lucas_miranda's avatar
lucas_miranda committed
126
        decoder = Sequential(name="SEQ_2_SEQ_Decoder")
127
        decoder.add(Model_D0)
128
        encoder.add(BatchNormalization())
129
        decoder.add(Model_D1)
130
        encoder.add(BatchNormalization())
131
        decoder.add(Model_D2)
132
        encoder.add(BatchNormalization())
133
        decoder.add(Model_D3)
134
        decoder.add(Model_D4)
135
        encoder.add(BatchNormalization())
136
137
138
        decoder.add(Model_D5)
        decoder.add(TimeDistributed(Dense(self.input_shape[2])))

lucas_miranda's avatar
lucas_miranda committed
139
        model = Sequential([encoder, decoder], name="SEQ_2_SEQ_AE")
140
141

        model.compile(
142
            loss=Huber(reduction="sum", delta=100.0),
143
            optimizer=Adam(lr=self.learn_rate, clipvalue=0.5,),
144
145
146
            metrics=["mae"],
        )

lucas_miranda's avatar
lucas_miranda committed
147
        return encoder, decoder, model
148
149
150


class SEQ_2_SEQ_VAE:
151
152
153
    def __init__(
        self,
        input_shape,
lucas_miranda's avatar
lucas_miranda committed
154
155
156
157
158
159
160
        CONV_filters=256,
        LSTM_units_1=256,
        LSTM_units_2=64,
        DENSE_2=64,
        DROPOUT_RATE=0.25,
        ENCODING=32,
        learn_rate=1e-3,
161
        loss="ELBO+MMD",
162
163
        kl_warmup_epochs=0,
        mmd_warmup_epochs=0,
164
165
166
167
168
169
170
171
172
173
    ):
        self.input_shape = input_shape
        self.CONV_filters = CONV_filters
        self.LSTM_units_1 = LSTM_units_1
        self.LSTM_units_2 = LSTM_units_2
        self.DENSE_1 = LSTM_units_2
        self.DENSE_2 = DENSE_2
        self.DROPOUT_RATE = DROPOUT_RATE
        self.ENCODING = ENCODING
        self.learn_rate = learn_rate
174
        self.loss = loss
175
176
        self.kl_warmup = kl_warmup_epochs
        self.mmd_warmup = mmd_warmup_epochs
177

178
179
180
181
        assert (
            "ELBO" in self.loss or "MMD" in self.loss
        ), "loss must be one of ELBO, MMD or ELBO+MMD (default)"

182
183
184
    def build(self):
        # Encoder Layers
        Model_E0 = tf.keras.layers.Conv1D(
185
            filters=self.CONV_filters,
186
187
188
189
            kernel_size=5,
            strides=1,
            padding="causal",
            activation="relu",
190
            kernel_initializer=he_uniform(),
191
        )
192
        Model_E1 = Bidirectional(
193
            LSTM(
194
                self.LSTM_units_1,
195
196
                activation="tanh",
                return_sequences=True,
197
                kernel_constraint=UnitNorm(axis=0),
198
199
            )
        )
200
        Model_E2 = Bidirectional(
201
            LSTM(
202
                self.LSTM_units_2,
203
204
                activation="tanh",
                return_sequences=False,
205
                kernel_constraint=UnitNorm(axis=0),
206
207
            )
        )
208
        Model_E3 = Dense(
209
210
211
212
            self.DENSE_1,
            activation="relu",
            kernel_constraint=UnitNorm(axis=0),
            kernel_initializer=he_uniform(),
213
214
        )
        Model_E4 = Dense(
215
216
217
218
            self.DENSE_2,
            activation="relu",
            kernel_constraint=UnitNorm(axis=0),
            kernel_initializer=he_uniform(),
219
        )
220
        Model_E5 = Dense(
221
            self.ENCODING,
222
            activation="relu",
223
            kernel_constraint=UnitNorm(axis=1),
224
            activity_regularizer=UncorrelatedFeaturesConstraint(3, weightage=1.0),
225
            kernel_initializer=Orthogonal(),
226
227
228
        )

        # Decoder layers
229
230
231
232
233
        Model_B1 = BatchNormalization()
        Model_B2 = BatchNormalization()
        Model_B3 = BatchNormalization()
        Model_B4 = BatchNormalization()
        Model_B5 = BatchNormalization()
234
        Model_D0 = DenseTranspose(
235
            Model_E5, activation="relu", output_dim=self.ENCODING,
236
        )
237
238
        Model_D1 = DenseTranspose(Model_E4, activation="relu", output_dim=self.DENSE_2,)
        Model_D2 = DenseTranspose(Model_E3, activation="relu", output_dim=self.DENSE_1,)
lucas_miranda's avatar
lucas_miranda committed
239
        Model_D3 = RepeatVector(self.input_shape[1])
240
        Model_D4 = Bidirectional(
241
            LSTM(
242
                self.LSTM_units_1,
243
244
                activation="tanh",
                return_sequences=True,
245
                kernel_constraint=UnitNorm(axis=1),
246
247
            )
        )
248
        Model_D5 = Bidirectional(
249
            LSTM(
250
                self.LSTM_units_1,
251
252
                activation="sigmoid",
                return_sequences=True,
253
                kernel_constraint=UnitNorm(axis=1),
254
255
256
            )
        )

257
        # Define and instantiate encoder
258
        x = Input(shape=self.input_shape[1:])
259
        encoder = Model_E0(x)
260
        encoder = BatchNormalization()(encoder)
261
        encoder = Model_E1(encoder)
262
        encoder = BatchNormalization()(encoder)
263
        encoder = Model_E2(encoder)
264
        encoder = BatchNormalization()(encoder)
265
        encoder = Model_E3(encoder)
266
        encoder = BatchNormalization()(encoder)
267
        encoder = Dropout(self.DROPOUT_RATE)(encoder)
268
        encoder = Model_E4(encoder)
269
        encoder = BatchNormalization()(encoder)
270
271
        encoder = Model_E5(encoder)

272
273
        z_mean = Dense(self.ENCODING)(encoder)
        z_log_sigma = Dense(self.ENCODING)(encoder)
274

275
        # Define and control custom loss functions
276
        kl_warmup_callback = False
277
        if "ELBO" in self.loss:
278

279
            kl_beta = K.variable(1.0, name="kl_beta")
280
            kl_beta._trainable = False
281
282
            if self.kl_warmup:

283
284
285
286
287
                kl_warmup_callback = LambdaCallback(
                    on_epoch_begin=lambda epoch, logs: K.set_value(
                        kl_beta, K.min([epoch / self.kl_warmup, 1])
                    )
                )
288
289

            z_mean, z_log_sigma = KLDivergenceLayer(beta=kl_beta)([z_mean, z_log_sigma])
290
291
292

        z = Lambda(sampling)([z_mean, z_log_sigma])

293
        mmd_warmup_callback = False
294
        if "MMD" in self.loss:
295

296
            mmd_beta = K.variable(1.0, name="mmd_beta")
297
            mmd_beta._trainable = False
298
            if self.mmd_warmup:
299

300
301
302
303
                mmd_warmup_callback = LambdaCallback(
                    on_epoch_begin=lambda epoch, logs: K.set_value(
                        mmd_beta, K.min([epoch / self.mmd_warmup, 1])
                    )
304
                )
305
306

            z = MMDiscrepancyLayer(beta=mmd_beta)(z)
307

308
        # Define and instantiate generator
lucas_miranda's avatar
lucas_miranda committed
309
        generator = Model_D0(z)
310
        generator = Model_B1(generator)
lucas_miranda's avatar
lucas_miranda committed
311
        generator = Model_D1(generator)
312
        generator = Model_B2(generator)
lucas_miranda's avatar
lucas_miranda committed
313
        generator = Model_D2(generator)
314
        generator = Model_B3(generator)
lucas_miranda's avatar
lucas_miranda committed
315
316
        generator = Model_D3(generator)
        generator = Model_D4(generator)
317
        generator = Model_B4(generator)
lucas_miranda's avatar
lucas_miranda committed
318
        generator = Model_D5(generator)
319
        generator = Model_B5(generator)
lucas_miranda's avatar
lucas_miranda committed
320
        x_decoded_mean = TimeDistributed(Dense(self.input_shape[2]))(generator)
321

322
        # end-to-end autoencoder
lucas_miranda's avatar
lucas_miranda committed
323
        encoder = Model(x, z_mean, name="SEQ_2_SEQ_VEncoder")
324
        vae = Model(x, x_decoded_mean, name="SEQ_2_SEQ_VAE")
lucas_miranda's avatar
lucas_miranda committed
325

326
327
328
        # Build generator as a separate entity
        g = Input(shape=self.ENCODING)
        _generator = Model_D0(g)
329
        _generator = Model_B1(_generator)
330
        _generator = Model_D1(_generator)
331
        _generator = Model_B2(_generator)
332
        _generator = Model_D2(_generator)
333
        _generator = Model_B3(_generator)
334
335
        _generator = Model_D3(_generator)
        _generator = Model_D4(_generator)
336
        _generator = Model_B4(_generator)
337
        _generator = Model_D5(_generator)
338
        _generator = Model_B5(_generator)
339
340
        _x_decoded_mean = TimeDistributed(Dense(self.input_shape[2]))(_generator)
        generator = Model(g, _x_decoded_mean, name="SEQ_2_SEQ_VGenerator")
341

342
343
344
        def huber_loss(x_, x_decoded_mean_):
            huber = Huber(reduction="sum", delta=100.0)
            return self.input_shape[1:] * huber(x_, x_decoded_mean_)
345
346

        vae.compile(
347
            loss=huber_loss,
lucas_miranda's avatar
lucas_miranda committed
348
            optimizer=Adam(lr=self.learn_rate,),
349
350
351
            metrics=["mae"],
        )

352
        return encoder, generator, vae, kl_warmup_callback, mmd_warmup_callback
353
354


355
356
class SEQ_2_SEQ_VAEP:
    def __init__(
357
358
359
360
361
362
363
364
365
366
        self,
        input_shape,
        CONV_filters=256,
        LSTM_units_1=256,
        LSTM_units_2=64,
        DENSE_2=64,
        DROPOUT_RATE=0.25,
        ENCODING=32,
        learn_rate=1e-3,
        loss="ELBO+MMD",
367
368
        kl_warmup_epochs=0,
        mmd_warmup_epochs=0,
369
370
371
372
373
374
375
376
377
378
379
    ):
        self.input_shape = input_shape
        self.CONV_filters = CONV_filters
        self.LSTM_units_1 = LSTM_units_1
        self.LSTM_units_2 = LSTM_units_2
        self.DENSE_1 = LSTM_units_2
        self.DENSE_2 = DENSE_2
        self.DROPOUT_RATE = DROPOUT_RATE
        self.ENCODING = ENCODING
        self.learn_rate = learn_rate
        self.loss = loss
380
381
        self.kl_warmup = kl_warmup_epochs
        self.mmd_warmup = mmd_warmup_epochs
382

383
384
385
386
        assert (
            "ELBO" in self.loss or "MMD" in self.loss
        ), "loss must be one of ELBO, MMD or ELBO+MMD (default)"

387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
    def build(self):
        # Encoder Layers
        Model_E0 = tf.keras.layers.Conv1D(
            filters=self.CONV_filters,
            kernel_size=5,
            strides=1,
            padding="causal",
            activation="relu",
            kernel_initializer=he_uniform(),
        )
        Model_E1 = Bidirectional(
            LSTM(
                self.LSTM_units_1,
                activation="tanh",
                return_sequences=True,
                kernel_constraint=UnitNorm(axis=0),
            )
        )
        Model_E2 = Bidirectional(
            LSTM(
                self.LSTM_units_2,
                activation="tanh",
                return_sequences=False,
                kernel_constraint=UnitNorm(axis=0),
            )
        )
        Model_E3 = Dense(
            self.DENSE_1,
            activation="relu",
            kernel_constraint=UnitNorm(axis=0),
            kernel_initializer=he_uniform(),
        )
        Model_E4 = Dense(
            self.DENSE_2,
            activation="relu",
            kernel_constraint=UnitNorm(axis=0),
            kernel_initializer=he_uniform(),
        )
        Model_E5 = Dense(
            self.ENCODING,
            activation="relu",
            kernel_constraint=UnitNorm(axis=1),
            activity_regularizer=UncorrelatedFeaturesConstraint(3, weightage=1.0),
            kernel_initializer=Orthogonal(),
        )

        # Decoder layers
434
435
436
437
438
        Model_B1 = BatchNormalization()
        Model_B2 = BatchNormalization()
        Model_B3 = BatchNormalization()
        Model_B4 = BatchNormalization()
        Model_B5 = BatchNormalization()
439
440
441
        Model_D0 = DenseTranspose(
            Model_E5, activation="relu", output_dim=self.ENCODING,
        )
442
443
        Model_D1 = DenseTranspose(Model_E4, activation="relu", output_dim=self.DENSE_2,)
        Model_D2 = DenseTranspose(Model_E3, activation="relu", output_dim=self.DENSE_1,)
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
        Model_D3 = RepeatVector(self.input_shape[1])
        Model_D4 = Bidirectional(
            LSTM(
                self.LSTM_units_1,
                activation="tanh",
                return_sequences=True,
                kernel_constraint=UnitNorm(axis=1),
            )
        )
        Model_D5 = Bidirectional(
            LSTM(
                self.LSTM_units_1,
                activation="sigmoid",
                return_sequences=True,
                kernel_constraint=UnitNorm(axis=1),
            )
        )

462
        # Define and instantiate encoder
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
        x = Input(shape=self.input_shape[1:])
        encoder = Model_E0(x)
        encoder = BatchNormalization()(encoder)
        encoder = Model_E1(encoder)
        encoder = BatchNormalization()(encoder)
        encoder = Model_E2(encoder)
        encoder = BatchNormalization()(encoder)
        encoder = Model_E3(encoder)
        encoder = BatchNormalization()(encoder)
        encoder = Dropout(self.DROPOUT_RATE)(encoder)
        encoder = Model_E4(encoder)
        encoder = BatchNormalization()(encoder)
        encoder = Model_E5(encoder)

        z_mean = Dense(self.ENCODING)(encoder)
        z_log_sigma = Dense(self.ENCODING)(encoder)

480
        # Define and control custom loss functions
481
        kl_warmup_callback = False
482
        if "ELBO" in self.loss:
483

484
            kl_beta = K.variable(1.0, name="kl_beta")
485
            kl_beta._trainable = False
486
            if self.kl_warmup:
487
488
489
490
                kl_warmup_callback = LambdaCallback(
                    on_epoch_begin=lambda epoch, logs: K.set_value(
                        kl_beta, K.min([epoch / self.kl_warmup, 1])
                    )
491
                )
492
493

            z_mean, z_log_sigma = KLDivergenceLayer(beta=kl_beta)([z_mean, z_log_sigma])
494
495
496

        z = Lambda(sampling)([z_mean, z_log_sigma])

497
        mmd_warmup_callback = False
498
        if "MMD" in self.loss:
499

500
            mmd_beta = K.variable(1.0, name="mmd_beta")
501
            mmd_beta._trainable = False
502
503
504
505
506
            if self.mmd_warmup:
                mmd_warmup_callback = LambdaCallback(
                    on_epoch_begin=lambda epoch, logs: K.set_value(
                        mmd_beta, K.min([epoch / self.mmd_warmup, 1])
                    )
507
508
509
                )

            z = MMDiscrepancyLayer(beta=mmd_beta)(z)
510

511
        # Define and instantiate generator
512
        generator = Model_D0(z)
513
        generator = Model_B1(generator)
514
        generator = Model_D1(generator)
515
        generator = Model_B2(generator)
516
        generator = Model_D2(generator)
517
        generator = Model_B3(generator)
518
519
        generator = Model_D3(generator)
        generator = Model_D4(generator)
520
        generator = Model_B4(generator)
521
        generator = Model_D5(generator)
522
        generator = Model_B5(generator)
523
524
525
        x_decoded_mean = TimeDistributed(
            Dense(self.input_shape[2]), name="vaep_reconstruction"
        )(generator)
526

527
        # Define and instantiate predictor
528
529
530
        predictor = Dense(
            self.ENCODING, activation="relu", kernel_initializer=he_uniform()
        )(z)
531
        predictor = BatchNormalization()(predictor)
532
533
534
        predictor = Dense(
            self.DENSE_2, activation="relu", kernel_initializer=he_uniform()
        )(predictor)
535
        predictor = BatchNormalization()(predictor)
536
537
538
        predictor = Dense(
            self.DENSE_1, activation="relu", kernel_initializer=he_uniform()
        )(predictor)
539
        predictor = BatchNormalization()(predictor)
540
        predictor = RepeatVector(self.input_shape[1])(predictor)
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
        predictor = Bidirectional(
            LSTM(
                self.LSTM_units_1,
                activation="tanh",
                return_sequences=True,
                kernel_constraint=UnitNorm(axis=1),
            )
        )(predictor)
        predictor = BatchNormalization()(predictor)
        predictor = Bidirectional(
            LSTM(
                self.LSTM_units_1,
                activation="sigmoid",
                return_sequences=True,
                kernel_constraint=UnitNorm(axis=1),
            )
        )(predictor)
558
        predictor = BatchNormalization()(predictor)
559
560
561
        x_predicted_mean = TimeDistributed(
            Dense(self.input_shape[2]), name="vaep_prediction"
        )(predictor)
562

563
564
        # end-to-end autoencoder
        encoder = Model(x, z_mean, name="SEQ_2_SEQ_VEncoder")
565
566
567
        vaep = Model(
            inputs=x, outputs=[x_decoded_mean, x_predicted_mean], name="SEQ_2_SEQ_VAE"
        )
568
569
570
571

        # Build generator as a separate entity
        g = Input(shape=self.ENCODING)
        _generator = Model_D0(g)
572
        _generator = Model_B1(_generator)
573
        _generator = Model_D1(_generator)
574
        _generator = Model_B2(_generator)
575
        _generator = Model_D2(_generator)
576
        _generator = Model_B3(_generator)
577
578
        _generator = Model_D3(_generator)
        _generator = Model_D4(_generator)
579
        _generator = Model_B4(_generator)
580
        _generator = Model_D5(_generator)
581
        _generator = Model_B5(_generator)
582
583
584
585
586
587
588
589
        _x_decoded_mean = TimeDistributed(Dense(self.input_shape[2]))(_generator)
        generator = Model(g, _x_decoded_mean, name="SEQ_2_SEQ_VGenerator")

        def huber_loss(x_, x_decoded_mean_):
            huber = Huber(reduction="sum", delta=100.0)
            return self.input_shape[1:] * huber(x_, x_decoded_mean_)

        vaep.compile(
590
            loss=huber_loss,
591
            optimizer=Adam(lr=self.learn_rate,),
592
593
594
            metrics=["mae"],
        )

595
        return encoder, generator, vaep, kl_warmup_callback, mmd_warmup_callback
596
597


598
599
600
601
602
603
604
605
606
607
608
609
610
611
class SEQ_2_SEQ_MMVAEP:
    def __init__(
            self,
            input_shape,
            CONV_filters=256,
            LSTM_units_1=256,
            LSTM_units_2=64,
            DENSE_2=64,
            DROPOUT_RATE=0.25,
            ENCODING=32,
            learn_rate=1e-3,
            loss="ELBO+MMD",
            kl_warmup_epochs=0,
            mmd_warmup_epochs=0,
612
            number_of_components=1,
613
614
615
616
617
618
619
620
621
622
623
624
625
    ):
        self.input_shape = input_shape
        self.CONV_filters = CONV_filters
        self.LSTM_units_1 = LSTM_units_1
        self.LSTM_units_2 = LSTM_units_2
        self.DENSE_1 = LSTM_units_2
        self.DENSE_2 = DENSE_2
        self.DROPOUT_RATE = DROPOUT_RATE
        self.ENCODING = ENCODING
        self.learn_rate = learn_rate
        self.loss = loss
        self.kl_warmup = kl_warmup_epochs
        self.mmd_warmup = mmd_warmup_epochs
626
        self.number_of_components = number_of_components
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840

        assert (
                "ELBO" in self.loss or "MMD" in self.loss
        ), "loss must be one of ELBO, MMD or ELBO+MMD (default)"

    def build(self):
        # Encoder Layers
        Model_E0 = tf.keras.layers.Conv1D(
            filters=self.CONV_filters,
            kernel_size=5,
            strides=1,
            padding="causal",
            activation="relu",
            kernel_initializer=he_uniform(),
        )
        Model_E1 = Bidirectional(
            LSTM(
                self.LSTM_units_1,
                activation="tanh",
                return_sequences=True,
                kernel_constraint=UnitNorm(axis=0),
            )
        )
        Model_E2 = Bidirectional(
            LSTM(
                self.LSTM_units_2,
                activation="tanh",
                return_sequences=False,
                kernel_constraint=UnitNorm(axis=0),
            )
        )
        Model_E3 = Dense(
            self.DENSE_1,
            activation="relu",
            kernel_constraint=UnitNorm(axis=0),
            kernel_initializer=he_uniform(),
        )
        Model_E4 = Dense(
            self.DENSE_2,
            activation="relu",
            kernel_constraint=UnitNorm(axis=0),
            kernel_initializer=he_uniform(),
        )
        Model_E5 = Dense(
            self.ENCODING,
            activation="relu",
            kernel_constraint=UnitNorm(axis=1),
            activity_regularizer=UncorrelatedFeaturesConstraint(3, weightage=1.0),
            kernel_initializer=Orthogonal(),
        )

        # Decoder layers
        Model_B1 = BatchNormalization()
        Model_B2 = BatchNormalization()
        Model_B3 = BatchNormalization()
        Model_B4 = BatchNormalization()
        Model_B5 = BatchNormalization()
        Model_D0 = DenseTranspose(
            Model_E5, activation="relu", output_dim=self.ENCODING,
        )
        Model_D1 = DenseTranspose(Model_E4, activation="relu", output_dim=self.DENSE_2, )
        Model_D2 = DenseTranspose(Model_E3, activation="relu", output_dim=self.DENSE_1, )
        Model_D3 = RepeatVector(self.input_shape[1])
        Model_D4 = Bidirectional(
            LSTM(
                self.LSTM_units_1,
                activation="tanh",
                return_sequences=True,
                kernel_constraint=UnitNorm(axis=1),
            )
        )
        Model_D5 = Bidirectional(
            LSTM(
                self.LSTM_units_1,
                activation="sigmoid",
                return_sequences=True,
                kernel_constraint=UnitNorm(axis=1),
            )
        )

        # Define and instantiate encoder
        x = Input(shape=self.input_shape[1:])
        encoder = Model_E0(x)
        encoder = BatchNormalization()(encoder)
        encoder = Model_E1(encoder)
        encoder = BatchNormalization()(encoder)
        encoder = Model_E2(encoder)
        encoder = BatchNormalization()(encoder)
        encoder = Model_E3(encoder)
        encoder = BatchNormalization()(encoder)
        encoder = Dropout(self.DROPOUT_RATE)(encoder)
        encoder = Model_E4(encoder)
        encoder = BatchNormalization()(encoder)
        encoder = Model_E5(encoder)

        z_mean = Dense(self.ENCODING)(encoder)
        z_log_sigma = Dense(self.ENCODING)(encoder)

        # Define and control custom loss functions
        kl_warmup_callback = False
        if "ELBO" in self.loss:

            kl_beta = K.variable(1.0, name="kl_beta")
            kl_beta._trainable = False
            if self.kl_warmup:
                kl_warmup_callback = LambdaCallback(
                    on_epoch_begin=lambda epoch, logs: K.set_value(
                        kl_beta, K.min([epoch / self.kl_warmup, 1])
                    )
                )

            z_mean, z_log_sigma = KLDivergenceLayer(beta=kl_beta)([z_mean, z_log_sigma])

        z = Lambda(sampling)([z_mean, z_log_sigma])

        mmd_warmup_callback = False
        if "MMD" in self.loss:

            mmd_beta = K.variable(1.0, name="mmd_beta")
            mmd_beta._trainable = False
            if self.mmd_warmup:
                mmd_warmup_callback = LambdaCallback(
                    on_epoch_begin=lambda epoch, logs: K.set_value(
                        mmd_beta, K.min([epoch / self.mmd_warmup, 1])
                    )
                )

            z = MMDiscrepancyLayer(beta=mmd_beta)(z)

        # Define and instantiate generator
        generator = Model_D0(z)
        generator = Model_B1(generator)
        generator = Model_D1(generator)
        generator = Model_B2(generator)
        generator = Model_D2(generator)
        generator = Model_B3(generator)
        generator = Model_D3(generator)
        generator = Model_D4(generator)
        generator = Model_B4(generator)
        generator = Model_D5(generator)
        generator = Model_B5(generator)
        x_decoded_mean = TimeDistributed(
            Dense(self.input_shape[2]), name="gmvaep_reconstruction"
        )(generator)

        # Define and instantiate predictor
        predictor = Dense(
            self.ENCODING, activation="relu", kernel_initializer=he_uniform()
        )(z)
        predictor = BatchNormalization()(predictor)
        predictor = Dense(
            self.DENSE_2, activation="relu", kernel_initializer=he_uniform()
        )(predictor)
        predictor = BatchNormalization()(predictor)
        predictor = Dense(
            self.DENSE_1, activation="relu", kernel_initializer=he_uniform()
        )(predictor)
        predictor = BatchNormalization()(predictor)
        predictor = RepeatVector(self.input_shape[1])(predictor)
        predictor = Bidirectional(
            LSTM(
                self.LSTM_units_1,
                activation="tanh",
                return_sequences=True,
                kernel_constraint=UnitNorm(axis=1),
            )
        )(predictor)
        predictor = BatchNormalization()(predictor)
        predictor = Bidirectional(
            LSTM(
                self.LSTM_units_1,
                activation="sigmoid",
                return_sequences=True,
                kernel_constraint=UnitNorm(axis=1),
            )
        )(predictor)
        predictor = BatchNormalization()(predictor)
        x_predicted_mean = TimeDistributed(
            Dense(self.input_shape[2]), name="gmvaep_prediction"
        )(predictor)

        # end-to-end autoencoder
        encoder = Model(x, z_mean, name="SEQ_2_SEQ_VEncoder")
        gmvaep = Model(
            inputs=x, outputs=[x_decoded_mean, x_predicted_mean], name="SEQ_2_SEQ_VAE"
        )

        # Build generator as a separate entity
        g = Input(shape=self.ENCODING)
        _generator = Model_D0(g)
        _generator = Model_B1(_generator)
        _generator = Model_D1(_generator)
        _generator = Model_B2(_generator)
        _generator = Model_D2(_generator)
        _generator = Model_B3(_generator)
        _generator = Model_D3(_generator)
        _generator = Model_D4(_generator)
        _generator = Model_B4(_generator)
        _generator = Model_D5(_generator)
        _generator = Model_B5(_generator)
        _x_decoded_mean = TimeDistributed(Dense(self.input_shape[2]))(_generator)
        generator = Model(g, _x_decoded_mean, name="SEQ_2_SEQ_VGenerator")

        def huber_loss(x_, x_decoded_mean_):
            huber = Huber(reduction="sum", delta=100.0)
            return self.input_shape[1:] * huber(x_, x_decoded_mean_)

        gmvaep.compile(
            loss=huber_loss,
            optimizer=Adam(lr=self.learn_rate, ),
            metrics=["mae"],
        )

        return encoder, generator, gmvaep, kl_warmup_callback, mmd_warmup_callback
lucas_miranda's avatar
lucas_miranda committed
841

842

843
844
# TODO:
#       - Gaussian Mixture + Categorical priors -> Deep Clustering
845
846
#
# TODO (in the non-immediate future):
847
848
849
#       - free bits paper
#       - Attention mechanism for encoder / decoder (does it make sense?)
#       - Transformer encoder/decoder (does it make sense?)