models.py 29.2 KB
Newer Older
1
2
# @author lucasmiranda42

3
from tensorflow.keras import backend as K
4
from tensorflow.keras import Input, Model, Sequential
5
from tensorflow.keras.activations import softplus
6
from tensorflow.keras.callbacks import LambdaCallback
7
from tensorflow.keras.constraints import UnitNorm
8
from tensorflow.keras.initializers import he_uniform, Orthogonal
9
10
11
from tensorflow.keras.layers import BatchNormalization, Bidirectional, Concatenate
from tensorflow.keras.layers import Dense, Dropout, LSTM
from tensorflow.keras.layers import RepeatVector, Reshape, TimeDistributed
12
from tensorflow.keras.losses import Huber
13
from tensorflow.keras.optimizers import Adam
14
from source.model_utils import *
15
import tensorflow as tf
16
17
18
19
import tensorflow_probability as tfp

tfd = tfp.distributions
tfpl = tfp.layers
20
21
22


class SEQ_2_SEQ_AE:
23
24
25
    def __init__(
        self,
        input_shape,
lucas_miranda's avatar
lucas_miranda committed
26
27
28
29
30
31
32
        CONV_filters=256,
        LSTM_units_1=256,
        LSTM_units_2=64,
        DENSE_2=64,
        DROPOUT_RATE=0.25,
        ENCODING=32,
        learn_rate=1e-3,
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
    ):
        self.input_shape = input_shape
        self.CONV_filters = CONV_filters
        self.LSTM_units_1 = LSTM_units_1
        self.LSTM_units_2 = LSTM_units_2
        self.DENSE_1 = LSTM_units_2
        self.DENSE_2 = DENSE_2
        self.DROPOUT_RATE = DROPOUT_RATE
        self.ENCODING = ENCODING
        self.learn_rate = learn_rate

    def build(self):
        # Encoder Layers
        Model_E0 = tf.keras.layers.Conv1D(
            filters=self.CONV_filters,
            kernel_size=5,
            strides=1,
            padding="causal",
            activation="relu",
52
            kernel_initializer=he_uniform(),
53
        )
54
        Model_E1 = Bidirectional(
55
            LSTM(
56
57
58
                self.LSTM_units_1,
                activation="tanh",
                return_sequences=True,
59
                kernel_constraint=UnitNorm(axis=0),
60
61
            )
        )
62
        Model_E2 = Bidirectional(
63
            LSTM(
64
65
66
                self.LSTM_units_2,
                activation="tanh",
                return_sequences=False,
67
                kernel_constraint=UnitNorm(axis=0),
68
69
            )
        )
70
        Model_E3 = Dense(
71
72
73
74
            self.DENSE_1,
            activation="relu",
            kernel_constraint=UnitNorm(axis=0),
            kernel_initializer=he_uniform(),
75
76
        )
        Model_E4 = Dense(
77
78
79
80
            self.DENSE_2,
            activation="relu",
            kernel_constraint=UnitNorm(axis=0),
            kernel_initializer=he_uniform(),
81
        )
82
83
84
        Model_E5 = Dense(
            self.ENCODING,
            activation="relu",
85
            kernel_constraint=UnitNorm(axis=1),
86
            activity_regularizer=UncorrelatedFeaturesConstraint(3, weightage=1.0),
87
            kernel_initializer=Orthogonal(),
88
89
90
        )

        # Decoder layers
91
        Model_D0 = DenseTranspose(
92
            Model_E5, activation="relu", output_dim=self.ENCODING,
93
        )
94
95
        Model_D1 = DenseTranspose(Model_E4, activation="relu", output_dim=self.DENSE_2,)
        Model_D2 = DenseTranspose(Model_E3, activation="relu", output_dim=self.DENSE_1,)
96
        Model_D3 = RepeatVector(self.input_shape[1])
97
        Model_D4 = Bidirectional(
98
            LSTM(
99
100
101
                self.LSTM_units_1,
                activation="tanh",
                return_sequences=True,
102
                kernel_constraint=UnitNorm(axis=1),
103
104
            )
        )
105
        Model_D5 = Bidirectional(
106
            LSTM(
107
108
109
                self.LSTM_units_1,
                activation="sigmoid",
                return_sequences=True,
110
                kernel_constraint=UnitNorm(axis=1),
111
112
113
            )
        )

114
        # Define and instantiate encoder
lucas_miranda's avatar
lucas_miranda committed
115
        encoder = Sequential(name="SEQ_2_SEQ_Encoder")
116
        encoder.add(Input(shape=self.input_shape[1:]))
117
        encoder.add(Model_E0)
118
        encoder.add(BatchNormalization())
119
        encoder.add(Model_E1)
120
        encoder.add(BatchNormalization())
121
        encoder.add(Model_E2)
122
        encoder.add(BatchNormalization())
123
        encoder.add(Model_E3)
124
        encoder.add(BatchNormalization())
125
126
        encoder.add(Dropout(self.DROPOUT_RATE))
        encoder.add(Model_E4)
127
        encoder.add(BatchNormalization())
128
129
        encoder.add(Model_E5)

130
        # Define and instantiate decoder
lucas_miranda's avatar
lucas_miranda committed
131
        decoder = Sequential(name="SEQ_2_SEQ_Decoder")
132
        decoder.add(Model_D0)
133
        decoder.add(BatchNormalization())
134
        decoder.add(Model_D1)
135
        decoder.add(BatchNormalization())
136
        decoder.add(Model_D2)
137
        decoder.add(BatchNormalization())
138
        decoder.add(Model_D3)
139
        decoder.add(Model_D4)
140
        decoder.add(BatchNormalization())
141
142
143
        decoder.add(Model_D5)
        decoder.add(TimeDistributed(Dense(self.input_shape[2])))

lucas_miranda's avatar
lucas_miranda committed
144
        model = Sequential([encoder, decoder], name="SEQ_2_SEQ_AE")
145
146

        model.compile(
147
            loss=Huber(reduction="sum", delta=100.0),
148
            optimizer=Adam(lr=self.learn_rate, clipvalue=0.5,),
149
150
151
            metrics=["mae"],
        )

lucas_miranda's avatar
lucas_miranda committed
152
        return encoder, decoder, model
153
154
155


class SEQ_2_SEQ_VAE:
156
157
158
    def __init__(
        self,
        input_shape,
lucas_miranda's avatar
lucas_miranda committed
159
160
161
162
163
164
165
        CONV_filters=256,
        LSTM_units_1=256,
        LSTM_units_2=64,
        DENSE_2=64,
        DROPOUT_RATE=0.25,
        ENCODING=32,
        learn_rate=1e-3,
166
        loss="ELBO+MMD",
167
168
        kl_warmup_epochs=0,
        mmd_warmup_epochs=0,
169
        prior="standard_normal",
170
171
172
173
174
175
176
177
178
179
    ):
        self.input_shape = input_shape
        self.CONV_filters = CONV_filters
        self.LSTM_units_1 = LSTM_units_1
        self.LSTM_units_2 = LSTM_units_2
        self.DENSE_1 = LSTM_units_2
        self.DENSE_2 = DENSE_2
        self.DROPOUT_RATE = DROPOUT_RATE
        self.ENCODING = ENCODING
        self.learn_rate = learn_rate
180
        self.loss = loss
181
        self.prior = prior
182
183
        self.kl_warmup = kl_warmup_epochs
        self.mmd_warmup = mmd_warmup_epochs
184

185
186
187
188
189
190
        if self.prior == "standard_normal":
            self.prior = tfd.Independent(
                tfd.Normal(loc=tf.zeros(self.ENCODING), scale=1),
                reinterpreted_batch_ndims=1,
            )

191
192
193
194
        assert (
            "ELBO" in self.loss or "MMD" in self.loss
        ), "loss must be one of ELBO, MMD or ELBO+MMD (default)"

195
196
197
    def build(self):
        # Encoder Layers
        Model_E0 = tf.keras.layers.Conv1D(
198
            filters=self.CONV_filters,
199
200
201
202
            kernel_size=5,
            strides=1,
            padding="causal",
            activation="relu",
203
            kernel_initializer=he_uniform(),
204
        )
205
        Model_E1 = Bidirectional(
206
            LSTM(
207
                self.LSTM_units_1,
208
209
                activation="tanh",
                return_sequences=True,
210
                kernel_constraint=UnitNorm(axis=0),
211
212
            )
        )
213
        Model_E2 = Bidirectional(
214
            LSTM(
215
                self.LSTM_units_2,
216
217
                activation="tanh",
                return_sequences=False,
218
                kernel_constraint=UnitNorm(axis=0),
219
220
            )
        )
221
        Model_E3 = Dense(
222
223
224
225
            self.DENSE_1,
            activation="relu",
            kernel_constraint=UnitNorm(axis=0),
            kernel_initializer=he_uniform(),
226
227
        )
        Model_E4 = Dense(
228
229
230
231
            self.DENSE_2,
            activation="relu",
            kernel_constraint=UnitNorm(axis=0),
            kernel_initializer=he_uniform(),
232
        )
233
234

        # Decoder layers
235
236
237
238
        Model_B1 = BatchNormalization()
        Model_B2 = BatchNormalization()
        Model_B3 = BatchNormalization()
        Model_B4 = BatchNormalization()
239
240
241
        Model_D1 = Dense(
            self.DENSE_2, activation="relu", kernel_initializer=he_uniform()
        )
242
        Model_D2 = DenseTranspose(Model_E3, activation="relu", output_dim=self.DENSE_1,)
lucas_miranda's avatar
lucas_miranda committed
243
        Model_D3 = RepeatVector(self.input_shape[1])
244
        Model_D4 = Bidirectional(
245
            LSTM(
246
                self.LSTM_units_1,
247
248
                activation="tanh",
                return_sequences=True,
249
                kernel_constraint=UnitNorm(axis=1),
250
251
            )
        )
252
        Model_D5 = Bidirectional(
253
            LSTM(
254
                self.LSTM_units_1,
255
256
                activation="sigmoid",
                return_sequences=True,
257
                kernel_constraint=UnitNorm(axis=1),
258
259
260
            )
        )

261
        # Define and instantiate encoder
262
        x = Input(shape=self.input_shape[1:])
263
        encoder = Model_E0(x)
264
        encoder = BatchNormalization()(encoder)
265
        encoder = Model_E1(encoder)
266
        encoder = BatchNormalization()(encoder)
267
        encoder = Model_E2(encoder)
268
        encoder = BatchNormalization()(encoder)
269
        encoder = Model_E3(encoder)
270
        encoder = BatchNormalization()(encoder)
271
        encoder = Dropout(self.DROPOUT_RATE)(encoder)
272
        encoder = Model_E4(encoder)
273
        encoder = BatchNormalization()(encoder)
274

275
        encoder = Dense(
276
            tfpl.IndependentNormal.params_size(self.ENCODING), activation=None
277
        )(encoder)
278

279
        # Define and control custom loss functions
280
        kl_warmup_callback = False
281
        if "ELBO" in self.loss:
282

283
            kl_beta = K.variable(1.0, name="kl_beta")
284
            kl_beta._trainable = False
285
286
            if self.kl_warmup:

287
288
289
290
291
                kl_warmup_callback = LambdaCallback(
                    on_epoch_begin=lambda epoch, logs: K.set_value(
                        kl_beta, K.min([epoch / self.kl_warmup, 1])
                    )
                )
292

293
294
295
296
        z = tfpl.IndependentNormal(
            self.ENCODING,
            activity_regularizer=UncorrelatedFeaturesConstraint(3, weightage=1.0),
        )(encoder)
297

298
299
        if "ELBO" in self.loss:
            z = KLDivergenceLayer(self.prior, weight=kl_beta)(z)
300

301
        mmd_warmup_callback = False
302
        if "MMD" in self.loss:
303

304
            mmd_beta = K.variable(1.0, name="mmd_beta")
305
            mmd_beta._trainable = False
306
            if self.mmd_warmup:
307

308
309
310
311
                mmd_warmup_callback = LambdaCallback(
                    on_epoch_begin=lambda epoch, logs: K.set_value(
                        mmd_beta, K.min([epoch / self.mmd_warmup, 1])
                    )
312
                )
313

314
            z = MMDiscrepancyLayer(prior=self.prior, beta=mmd_beta)(z)
315

316
        # Define and instantiate generator
317
        generator = Model_D1(z)
318
        generator = Model_B1(generator)
lucas_miranda's avatar
lucas_miranda committed
319
        generator = Model_D2(generator)
320
        generator = Model_B2(generator)
lucas_miranda's avatar
lucas_miranda committed
321
322
        generator = Model_D3(generator)
        generator = Model_D4(generator)
323
        generator = Model_B3(generator)
lucas_miranda's avatar
lucas_miranda committed
324
        generator = Model_D5(generator)
325
        generator = Model_B4(generator)
lucas_miranda's avatar
lucas_miranda committed
326
        x_decoded_mean = TimeDistributed(Dense(self.input_shape[2]))(generator)
327

328
        # end-to-end autoencoder
329
        encoder = Model(x, z, name="SEQ_2_SEQ_VEncoder")
330
        vae = Model(x, x_decoded_mean, name="SEQ_2_SEQ_VAE")
lucas_miranda's avatar
lucas_miranda committed
331

332
333
        # Build generator as a separate entity
        g = Input(shape=self.ENCODING)
334
        _generator = Model_D1(g)
335
        _generator = Model_B1(_generator)
336
        _generator = Model_D2(_generator)
337
        _generator = Model_B2(_generator)
338
339
        _generator = Model_D3(_generator)
        _generator = Model_D4(_generator)
340
        _generator = Model_B3(_generator)
341
        _generator = Model_D5(_generator)
342
        _generator = Model_B4(_generator)
343
344
        _x_decoded_mean = TimeDistributed(Dense(self.input_shape[2]))(_generator)
        generator = Model(g, _x_decoded_mean, name="SEQ_2_SEQ_VGenerator")
345

346
347
348
        def huber_loss(x_, x_decoded_mean_):
            huber = Huber(reduction="sum", delta=100.0)
            return self.input_shape[1:] * huber(x_, x_decoded_mean_)
349
350

        vae.compile(
351
            loss=huber_loss, optimizer=Adam(lr=self.learn_rate,), metrics=["mae"],
352
353
        )

354
        return encoder, generator, vae, kl_warmup_callback, mmd_warmup_callback
355
356


357
358
class SEQ_2_SEQ_VAEP:
    def __init__(
359
360
361
362
363
364
365
366
367
368
        self,
        input_shape,
        CONV_filters=256,
        LSTM_units_1=256,
        LSTM_units_2=64,
        DENSE_2=64,
        DROPOUT_RATE=0.25,
        ENCODING=32,
        learn_rate=1e-3,
        loss="ELBO+MMD",
369
370
        kl_warmup_epochs=0,
        mmd_warmup_epochs=0,
371
        prior="standard_normal",
372
373
374
375
376
377
378
379
380
381
382
    ):
        self.input_shape = input_shape
        self.CONV_filters = CONV_filters
        self.LSTM_units_1 = LSTM_units_1
        self.LSTM_units_2 = LSTM_units_2
        self.DENSE_1 = LSTM_units_2
        self.DENSE_2 = DENSE_2
        self.DROPOUT_RATE = DROPOUT_RATE
        self.ENCODING = ENCODING
        self.learn_rate = learn_rate
        self.loss = loss
383
        self.prior = prior
384
385
        self.kl_warmup = kl_warmup_epochs
        self.mmd_warmup = mmd_warmup_epochs
386

387
388
389
390
391
392
        if self.prior == "standard_normal":
            self.prior = tfd.Independent(
                tfd.Normal(loc=tf.zeros(self.ENCODING), scale=1),
                reinterpreted_batch_ndims=1,
            )

393
394
395
396
        assert (
            "ELBO" in self.loss or "MMD" in self.loss
        ), "loss must be one of ELBO, MMD or ELBO+MMD (default)"

397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
    def build(self):
        # Encoder Layers
        Model_E0 = tf.keras.layers.Conv1D(
            filters=self.CONV_filters,
            kernel_size=5,
            strides=1,
            padding="causal",
            activation="relu",
            kernel_initializer=he_uniform(),
        )
        Model_E1 = Bidirectional(
            LSTM(
                self.LSTM_units_1,
                activation="tanh",
                return_sequences=True,
                kernel_constraint=UnitNorm(axis=0),
            )
        )
        Model_E2 = Bidirectional(
            LSTM(
                self.LSTM_units_2,
                activation="tanh",
                return_sequences=False,
                kernel_constraint=UnitNorm(axis=0),
            )
        )
        Model_E3 = Dense(
            self.DENSE_1,
            activation="relu",
            kernel_constraint=UnitNorm(axis=0),
            kernel_initializer=he_uniform(),
        )
        Model_E4 = Dense(
            self.DENSE_2,
            activation="relu",
            kernel_constraint=UnitNorm(axis=0),
            kernel_initializer=he_uniform(),
        )

        # Decoder layers
437
438
439
440
        Model_B1 = BatchNormalization()
        Model_B2 = BatchNormalization()
        Model_B3 = BatchNormalization()
        Model_B4 = BatchNormalization()
441
        Model_D1 = Dense(
442
443
            self.DENSE_2, activation="relu", kernel_initializer=he_uniform()
        )
444
        Model_D2 = DenseTranspose(Model_E3, activation="relu", output_dim=self.DENSE_1,)
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
        Model_D3 = RepeatVector(self.input_shape[1])
        Model_D4 = Bidirectional(
            LSTM(
                self.LSTM_units_1,
                activation="tanh",
                return_sequences=True,
                kernel_constraint=UnitNorm(axis=1),
            )
        )
        Model_D5 = Bidirectional(
            LSTM(
                self.LSTM_units_1,
                activation="sigmoid",
                return_sequences=True,
                kernel_constraint=UnitNorm(axis=1),
            )
        )

463
        # Define and instantiate encoder
464
465
466
467
468
469
470
471
472
473
474
475
476
        x = Input(shape=self.input_shape[1:])
        encoder = Model_E0(x)
        encoder = BatchNormalization()(encoder)
        encoder = Model_E1(encoder)
        encoder = BatchNormalization()(encoder)
        encoder = Model_E2(encoder)
        encoder = BatchNormalization()(encoder)
        encoder = Model_E3(encoder)
        encoder = BatchNormalization()(encoder)
        encoder = Dropout(self.DROPOUT_RATE)(encoder)
        encoder = Model_E4(encoder)
        encoder = BatchNormalization()(encoder)

477
        encoder = Dense(
478
            tfpl.IndependentNormal.params_size(self.ENCODING), activation=None
479
        )(encoder)
480

481
        # Define and control custom loss functions
482
        kl_warmup_callback = False
483
        if "ELBO" in self.loss:
484

485
            kl_beta = K.variable(1.0, name="kl_beta")
486
            kl_beta._trainable = False
487
            if self.kl_warmup:
488
489
490
491
                kl_warmup_callback = LambdaCallback(
                    on_epoch_begin=lambda epoch, logs: K.set_value(
                        kl_beta, K.min([epoch / self.kl_warmup, 1])
                    )
492
                )
493

494
495
496
497
        z = tfpl.IndependentNormal(
            self.ENCODING,
            activity_regularizer=UncorrelatedFeaturesConstraint(3, weightage=1.0),
        )(encoder)
498

499
500
        if "ELBO" in self.loss:
            z = KLDivergenceLayer(self.prior, weight=kl_beta)(z)
501

502
        mmd_warmup_callback = False
503
        if "MMD" in self.loss:
504

505
            mmd_beta = K.variable(1.0, name="mmd_beta")
506
            mmd_beta._trainable = False
507
508
509
510
511
            if self.mmd_warmup:
                mmd_warmup_callback = LambdaCallback(
                    on_epoch_begin=lambda epoch, logs: K.set_value(
                        mmd_beta, K.min([epoch / self.mmd_warmup, 1])
                    )
512
513
                )

514
            z = MMDiscrepancyLayer(prior=self.prior, beta=mmd_beta)(z)
515

516
        # Define and instantiate generator
517
        generator = Model_D1(z)
518
        generator = Model_B1(generator)
519
        generator = Model_D2(generator)
520
        generator = Model_B2(generator)
521
522
        generator = Model_D3(generator)
        generator = Model_D4(generator)
523
        generator = Model_B3(generator)
524
        generator = Model_D5(generator)
525
        generator = Model_B4(generator)
526
527
528
        x_decoded_mean = TimeDistributed(
            Dense(self.input_shape[2]), name="vaep_reconstruction"
        )(generator)
529

530
        # Define and instantiate predictor
531
532
        predictor = Dense(
            self.DENSE_2, activation="relu", kernel_initializer=he_uniform()
533
        )(z)
534
        predictor = BatchNormalization()(predictor)
535
536
537
        predictor = Dense(
            self.DENSE_1, activation="relu", kernel_initializer=he_uniform()
        )(predictor)
538
        predictor = BatchNormalization()(predictor)
539
        predictor = RepeatVector(self.input_shape[1])(predictor)
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
        predictor = Bidirectional(
            LSTM(
                self.LSTM_units_1,
                activation="tanh",
                return_sequences=True,
                kernel_constraint=UnitNorm(axis=1),
            )
        )(predictor)
        predictor = BatchNormalization()(predictor)
        predictor = Bidirectional(
            LSTM(
                self.LSTM_units_1,
                activation="sigmoid",
                return_sequences=True,
                kernel_constraint=UnitNorm(axis=1),
            )
        )(predictor)
557
        predictor = BatchNormalization()(predictor)
558
559
560
        x_predicted_mean = TimeDistributed(
            Dense(self.input_shape[2]), name="vaep_prediction"
        )(predictor)
561

562
        # end-to-end autoencoder
563
        encoder = Model(x, z, name="SEQ_2_SEQ_VEncoder")
564
        vaep = Model(
565
            inputs=x, outputs=[x_decoded_mean, x_predicted_mean], name="SEQ_2_SEQ_VAEP"
566
        )
567
568
569

        # Build generator as a separate entity
        g = Input(shape=self.ENCODING)
570
        _generator = Model_D1(g)
571
        _generator = Model_B1(_generator)
572
        _generator = Model_D2(_generator)
573
        _generator = Model_B2(_generator)
574
575
        _generator = Model_D3(_generator)
        _generator = Model_D4(_generator)
576
        _generator = Model_B3(_generator)
577
        _generator = Model_D5(_generator)
578
        _generator = Model_B4(_generator)
579
580
581
582
583
584
585
586
        _x_decoded_mean = TimeDistributed(Dense(self.input_shape[2]))(_generator)
        generator = Model(g, _x_decoded_mean, name="SEQ_2_SEQ_VGenerator")

        def huber_loss(x_, x_decoded_mean_):
            huber = Huber(reduction="sum", delta=100.0)
            return self.input_shape[1:] * huber(x_, x_decoded_mean_)

        vaep.compile(
587
            loss=huber_loss, optimizer=Adam(lr=self.learn_rate,), metrics=["mae"],
588
589
        )

590
        return encoder, generator, vaep, kl_warmup_callback, mmd_warmup_callback
591
592


593
594
class SEQ_2_SEQ_MMVAEP:
    def __init__(
595
596
597
598
599
600
601
602
603
604
605
606
        self,
        input_shape,
        CONV_filters=256,
        LSTM_units_1=256,
        LSTM_units_2=64,
        DENSE_2=64,
        DROPOUT_RATE=0.25,
        ENCODING=32,
        learn_rate=1e-3,
        loss="ELBO+MMD",
        kl_warmup_epochs=0,
        mmd_warmup_epochs=0,
607
        prior="standard_normal",
608
        number_of_components=1,
609
610
611
612
613
614
615
616
617
618
619
    ):
        self.input_shape = input_shape
        self.CONV_filters = CONV_filters
        self.LSTM_units_1 = LSTM_units_1
        self.LSTM_units_2 = LSTM_units_2
        self.DENSE_1 = LSTM_units_2
        self.DENSE_2 = DENSE_2
        self.DROPOUT_RATE = DROPOUT_RATE
        self.ENCODING = ENCODING
        self.learn_rate = learn_rate
        self.loss = loss
620
        self.prior = prior
621
622
        self.kl_warmup = kl_warmup_epochs
        self.mmd_warmup = mmd_warmup_epochs
623
        self.number_of_components = number_of_components
624

625
        if self.prior == "standard_normal":
626
627
628
629
630
631
632
633
634
635
636
            self.prior = tfd.mixture.Mixture(
                tfd.categorical.Categorical(
                    probs=tf.ones(self.number_of_components) / self.number_of_components
                ),
                [
                    tfd.Independent(
                        tfd.Normal(loc=tf.zeros(self.ENCODING), scale=1),
                        reinterpreted_batch_ndims=1,
                    )
                    for _ in range(self.number_of_components)
                ],
637
            )
638
639
640

        assert (
            "ELBO" in self.loss or "MMD" in self.loss
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
        ), "loss must be one of ELBO, MMD or ELBO+MMD (default)"

    def build(self):
        # Encoder Layers
        Model_E0 = tf.keras.layers.Conv1D(
            filters=self.CONV_filters,
            kernel_size=5,
            strides=1,
            padding="causal",
            activation="relu",
            kernel_initializer=he_uniform(),
        )
        Model_E1 = Bidirectional(
            LSTM(
                self.LSTM_units_1,
                activation="tanh",
                return_sequences=True,
                kernel_constraint=UnitNorm(axis=0),
            )
        )
        Model_E2 = Bidirectional(
            LSTM(
                self.LSTM_units_2,
                activation="tanh",
                return_sequences=False,
                kernel_constraint=UnitNorm(axis=0),
            )
        )
        Model_E3 = Dense(
            self.DENSE_1,
            activation="relu",
            kernel_constraint=UnitNorm(axis=0),
            kernel_initializer=he_uniform(),
        )
        Model_E4 = Dense(
            self.DENSE_2,
            activation="relu",
            kernel_constraint=UnitNorm(axis=0),
            kernel_initializer=he_uniform(),
        )

        # Decoder layers
        Model_B1 = BatchNormalization()
        Model_B2 = BatchNormalization()
        Model_B3 = BatchNormalization()
        Model_B4 = BatchNormalization()
687
688
        Model_D1 = Dense(
            self.DENSE_2, activation="relu", kernel_initializer=he_uniform()
689
        )
690
        Model_D2 = DenseTranspose(Model_E3, activation="relu", output_dim=self.DENSE_1,)
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
        Model_D3 = RepeatVector(self.input_shape[1])
        Model_D4 = Bidirectional(
            LSTM(
                self.LSTM_units_1,
                activation="tanh",
                return_sequences=True,
                kernel_constraint=UnitNorm(axis=1),
            )
        )
        Model_D5 = Bidirectional(
            LSTM(
                self.LSTM_units_1,
                activation="sigmoid",
                return_sequences=True,
                kernel_constraint=UnitNorm(axis=1),
            )
        )

        # Define and instantiate encoder
        x = Input(shape=self.input_shape[1:])
        encoder = Model_E0(x)
        encoder = BatchNormalization()(encoder)
        encoder = Model_E1(encoder)
        encoder = BatchNormalization()(encoder)
        encoder = Model_E2(encoder)
        encoder = BatchNormalization()(encoder)
        encoder = Model_E3(encoder)
        encoder = BatchNormalization()(encoder)
        encoder = Dropout(self.DROPOUT_RATE)(encoder)
        encoder = Model_E4(encoder)
        encoder = BatchNormalization()(encoder)
722

723
724
725
726
727
        z_cat = Dense(self.number_of_components, activation="softmax")(encoder)
        z_gauss = Dense(
            tfpl.IndependentNormal.params_size(
                self.ENCODING * self.number_of_components
            ),
728
            activation=None,
729
        )(encoder)
730
731
732
733
734
735
736
737
738
739
740
741
742
743

        # Define and control custom loss functions
        kl_warmup_callback = False
        if "ELBO" in self.loss:

            kl_beta = K.variable(1.0, name="kl_beta")
            kl_beta._trainable = False
            if self.kl_warmup:
                kl_warmup_callback = LambdaCallback(
                    on_epoch_begin=lambda epoch, logs: K.set_value(
                        kl_beta, K.min([epoch / self.kl_warmup, 1])
                    )
                )

744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
        z_gauss = Reshape([2 * self.ENCODING, self.number_of_components])(z_gauss)
        z = tfpl.DistributionLambda(
            lambda gauss: tfd.mixture.Mixture(
                cat=tfd.categorical.Categorical(probs=gauss[0],),
                components=[
                    tfd.Independent(
                        tfd.Normal(
                            loc=gauss[1][..., : self.ENCODING, k],
                            scale=softplus(gauss[1][..., self.ENCODING :, k]),
                        ),
                        reinterpreted_batch_ndims=1,
                    )
                    for k in range(self.number_of_components)
                ],
            ),
            activity_regularizer=UncorrelatedFeaturesConstraint(3, weightage=1.0),
        )([z_cat, z_gauss])
761

762
763
        if "ELBO" in self.loss:
            z = KLDivergenceLayer(self.prior, weight=kl_beta)(z)
764
765
766
767
768
769
770
771
772
773
774
775
776

        mmd_warmup_callback = False
        if "MMD" in self.loss:

            mmd_beta = K.variable(1.0, name="mmd_beta")
            mmd_beta._trainable = False
            if self.mmd_warmup:
                mmd_warmup_callback = LambdaCallback(
                    on_epoch_begin=lambda epoch, logs: K.set_value(
                        mmd_beta, K.min([epoch / self.mmd_warmup, 1])
                    )
                )

777
            z = MMDiscrepancyLayer(prior=self.prior, beta=mmd_beta)(z)
778
779

        # Define and instantiate generator
780
        generator = Model_D1(z)
781
782
        generator = Model_B1(generator)
        generator = Model_D2(generator)
783
        generator = Model_B2(generator)
784
785
        generator = Model_D3(generator)
        generator = Model_D4(generator)
786
        generator = Model_B3(generator)
787
        generator = Model_D5(generator)
788
        generator = Model_B4(generator)
789
        x_decoded_mean = TimeDistributed(
790
            Dense(self.input_shape[2]), name="vaep_reconstruction"
791
792
793
794
795
        )(generator)

        # Define and instantiate predictor
        predictor = Dense(
            self.DENSE_2, activation="relu", kernel_initializer=he_uniform()
796
        )(z)
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
        predictor = BatchNormalization()(predictor)
        predictor = Dense(
            self.DENSE_1, activation="relu", kernel_initializer=he_uniform()
        )(predictor)
        predictor = BatchNormalization()(predictor)
        predictor = RepeatVector(self.input_shape[1])(predictor)
        predictor = Bidirectional(
            LSTM(
                self.LSTM_units_1,
                activation="tanh",
                return_sequences=True,
                kernel_constraint=UnitNorm(axis=1),
            )
        )(predictor)
        predictor = BatchNormalization()(predictor)
        predictor = Bidirectional(
            LSTM(
                self.LSTM_units_1,
                activation="sigmoid",
                return_sequences=True,
                kernel_constraint=UnitNorm(axis=1),
            )
        )(predictor)
        predictor = BatchNormalization()(predictor)
        x_predicted_mean = TimeDistributed(
822
            Dense(self.input_shape[2]), name="vaep_prediction"
823
824
825
        )(predictor)

        # end-to-end autoencoder
826
        encoder = Model(x, z, name="SEQ_2_SEQ_VEncoder")
827
        grouper = Model(x, z_cat, name="Deep_Gaussian_Mixture_clustering")
828
        gmvaep = Model(
829
            inputs=x, outputs=[x_decoded_mean, x_predicted_mean], name="SEQ_2_SEQ_VAEP"
830
831
832
833
        )

        # Build generator as a separate entity
        g = Input(shape=self.ENCODING)
834
        _generator = Model_D1(g)
835
836
        _generator = Model_B1(_generator)
        _generator = Model_D2(_generator)
837
        _generator = Model_B2(_generator)
838
839
        _generator = Model_D3(_generator)
        _generator = Model_D4(_generator)
840
        _generator = Model_B3(_generator)
841
        _generator = Model_D5(_generator)
842
        _generator = Model_B4(_generator)
843
844
845
846
847
848
849
850
        _x_decoded_mean = TimeDistributed(Dense(self.input_shape[2]))(_generator)
        generator = Model(g, _x_decoded_mean, name="SEQ_2_SEQ_VGenerator")

        def huber_loss(x_, x_decoded_mean_):
            huber = Huber(reduction="sum", delta=100.0)
            return self.input_shape[1:] * huber(x_, x_decoded_mean_)

        gmvaep.compile(
851
            loss=huber_loss, optimizer=Adam(lr=self.learn_rate,), metrics=["mae"],
852
853
        )

854
855
856
857
858
859
860
861
        return (
            encoder,
            generator,
            grouper,
            gmvaep,
            kl_warmup_callback,
            mmd_warmup_callback,
        )
lucas_miranda's avatar
lucas_miranda committed
862

863

864
# TODO:
865
#       - Try Bayesian nets!
866
#       - Gaussian Mixture + Categorical priors -> Deep Clustering
867
868
869
#           - prior of equal gaussians
#           - prior of equal gaussians + gaussian noise on the means (not exactly the same init)
#       - MCMC sampling (n>1) (already suported by tfp! we should try it)
870
871
#
# TODO (in the non-immediate future):
872
873
874
#       - free bits paper
#       - Attention mechanism for encoder / decoder (does it make sense?)
#       - Transformer encoder/decoder (does it make sense?)