models.py 19 KB
Newer Older
1
2
# @author lucasmiranda42

3
from tensorflow.keras import backend as K
4
from tensorflow.keras import Input, Model, Sequential
5
from tensorflow.keras.callbacks import LambdaCallback
6
from tensorflow.keras.constraints import UnitNorm
7
from tensorflow.keras.initializers import he_uniform, Orthogonal
8
9
from tensorflow.keras.layers import BatchNormalization, Bidirectional, Dense
from tensorflow.keras.layers import Dropout, Lambda, LSTM
10
from tensorflow.keras.layers import RepeatVector, TimeDistributed
11
from tensorflow.keras.losses import Huber
12
from tensorflow.keras.optimizers import Adam
13
from source.model_utils import *
14
15
16
17
import tensorflow as tf


class SEQ_2_SEQ_AE:
18
19
20
    def __init__(
        self,
        input_shape,
lucas_miranda's avatar
lucas_miranda committed
21
22
23
24
25
26
27
        CONV_filters=256,
        LSTM_units_1=256,
        LSTM_units_2=64,
        DENSE_2=64,
        DROPOUT_RATE=0.25,
        ENCODING=32,
        learn_rate=1e-3,
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
    ):
        self.input_shape = input_shape
        self.CONV_filters = CONV_filters
        self.LSTM_units_1 = LSTM_units_1
        self.LSTM_units_2 = LSTM_units_2
        self.DENSE_1 = LSTM_units_2
        self.DENSE_2 = DENSE_2
        self.DROPOUT_RATE = DROPOUT_RATE
        self.ENCODING = ENCODING
        self.learn_rate = learn_rate

    def build(self):
        # Encoder Layers
        Model_E0 = tf.keras.layers.Conv1D(
            filters=self.CONV_filters,
            kernel_size=5,
            strides=1,
            padding="causal",
            activation="relu",
47
            kernel_initializer=he_uniform(),
48
        )
49
        Model_E1 = Bidirectional(
50
            LSTM(
51
52
53
                self.LSTM_units_1,
                activation="tanh",
                return_sequences=True,
54
                kernel_constraint=UnitNorm(axis=0),
55
56
            )
        )
57
        Model_E2 = Bidirectional(
58
            LSTM(
59
60
61
                self.LSTM_units_2,
                activation="tanh",
                return_sequences=False,
62
                kernel_constraint=UnitNorm(axis=0),
63
64
            )
        )
65
        Model_E3 = Dense(
66
67
68
69
            self.DENSE_1,
            activation="relu",
            kernel_constraint=UnitNorm(axis=0),
            kernel_initializer=he_uniform(),
70
71
        )
        Model_E4 = Dense(
72
73
74
75
            self.DENSE_2,
            activation="relu",
            kernel_constraint=UnitNorm(axis=0),
            kernel_initializer=he_uniform(),
76
        )
77
78
79
        Model_E5 = Dense(
            self.ENCODING,
            activation="relu",
80
            kernel_constraint=UnitNorm(axis=1),
81
            activity_regularizer=UncorrelatedFeaturesConstraint(3, weightage=1.0),
82
            kernel_initializer=Orthogonal(),
83
84
85
        )

        # Decoder layers
86
        Model_D0 = DenseTranspose(
87
            Model_E5, activation="relu", output_dim=self.ENCODING,
88
        )
89
90
        Model_D1 = DenseTranspose(Model_E4, activation="relu", output_dim=self.DENSE_2,)
        Model_D2 = DenseTranspose(Model_E3, activation="relu", output_dim=self.DENSE_1,)
91
        Model_D3 = RepeatVector(self.input_shape[1])
92
        Model_D4 = Bidirectional(
93
            LSTM(
94
95
96
                self.LSTM_units_1,
                activation="tanh",
                return_sequences=True,
97
                kernel_constraint=UnitNorm(axis=1),
98
99
            )
        )
100
        Model_D5 = Bidirectional(
101
            LSTM(
102
103
104
                self.LSTM_units_1,
                activation="sigmoid",
                return_sequences=True,
105
                kernel_constraint=UnitNorm(axis=1),
106
107
108
            )
        )

109
        # Define and instantiate encoder
lucas_miranda's avatar
lucas_miranda committed
110
        encoder = Sequential(name="SEQ_2_SEQ_Encoder")
111
        encoder.add(Input(shape=self.input_shape[1:]))
112
        encoder.add(Model_E0)
113
        encoder.add(BatchNormalization())
114
        encoder.add(Model_E1)
115
        encoder.add(BatchNormalization())
116
        encoder.add(Model_E2)
117
        encoder.add(BatchNormalization())
118
        encoder.add(Model_E3)
119
        encoder.add(BatchNormalization())
120
121
        encoder.add(Dropout(self.DROPOUT_RATE))
        encoder.add(Model_E4)
122
        encoder.add(BatchNormalization())
123
124
        encoder.add(Model_E5)

125
        # Define and instantiate decoder
lucas_miranda's avatar
lucas_miranda committed
126
        decoder = Sequential(name="SEQ_2_SEQ_Decoder")
127
        decoder.add(Model_D0)
128
        encoder.add(BatchNormalization())
129
        decoder.add(Model_D1)
130
        encoder.add(BatchNormalization())
131
        decoder.add(Model_D2)
132
        encoder.add(BatchNormalization())
133
        decoder.add(Model_D3)
134
        decoder.add(Model_D4)
135
        encoder.add(BatchNormalization())
136
137
138
        decoder.add(Model_D5)
        decoder.add(TimeDistributed(Dense(self.input_shape[2])))

lucas_miranda's avatar
lucas_miranda committed
139
        model = Sequential([encoder, decoder], name="SEQ_2_SEQ_AE")
140
141

        model.compile(
142
            loss=Huber(reduction="sum", delta=100.0),
143
            optimizer=Adam(lr=self.learn_rate, clipvalue=0.5,),
144
145
146
            metrics=["mae"],
        )

lucas_miranda's avatar
lucas_miranda committed
147
        return encoder, decoder, model
148
149
150


class SEQ_2_SEQ_VAE:
151
152
153
    def __init__(
        self,
        input_shape,
lucas_miranda's avatar
lucas_miranda committed
154
155
156
157
158
159
160
        CONV_filters=256,
        LSTM_units_1=256,
        LSTM_units_2=64,
        DENSE_2=64,
        DROPOUT_RATE=0.25,
        ENCODING=32,
        learn_rate=1e-3,
161
        loss="ELBO+MMD",
162
163
        kl_warmup_epochs=0,
        mmd_warmup_epochs=0,
164
165
166
167
168
169
170
171
172
173
    ):
        self.input_shape = input_shape
        self.CONV_filters = CONV_filters
        self.LSTM_units_1 = LSTM_units_1
        self.LSTM_units_2 = LSTM_units_2
        self.DENSE_1 = LSTM_units_2
        self.DENSE_2 = DENSE_2
        self.DROPOUT_RATE = DROPOUT_RATE
        self.ENCODING = ENCODING
        self.learn_rate = learn_rate
174
        self.loss = loss
175
176
        self.kl_warmup = kl_warmup_epochs
        self.mmd_warmup = mmd_warmup_epochs
177

178
179
180
181
        assert (
            "ELBO" in self.loss or "MMD" in self.loss
        ), "loss must be one of ELBO, MMD or ELBO+MMD (default)"

182
183
184
    def build(self):
        # Encoder Layers
        Model_E0 = tf.keras.layers.Conv1D(
185
            filters=self.CONV_filters,
186
187
188
189
            kernel_size=5,
            strides=1,
            padding="causal",
            activation="relu",
190
            kernel_initializer=he_uniform(),
191
        )
192
        Model_E1 = Bidirectional(
193
            LSTM(
194
                self.LSTM_units_1,
195
196
                activation="tanh",
                return_sequences=True,
197
                kernel_constraint=UnitNorm(axis=0),
198
199
            )
        )
200
        Model_E2 = Bidirectional(
201
            LSTM(
202
                self.LSTM_units_2,
203
204
                activation="tanh",
                return_sequences=False,
205
                kernel_constraint=UnitNorm(axis=0),
206
207
            )
        )
208
        Model_E3 = Dense(
209
210
211
212
            self.DENSE_1,
            activation="relu",
            kernel_constraint=UnitNorm(axis=0),
            kernel_initializer=he_uniform(),
213
214
        )
        Model_E4 = Dense(
215
216
217
218
            self.DENSE_2,
            activation="relu",
            kernel_constraint=UnitNorm(axis=0),
            kernel_initializer=he_uniform(),
219
        )
220
        Model_E5 = Dense(
221
            self.ENCODING,
222
            activation="relu",
223
            kernel_constraint=UnitNorm(axis=1),
224
            activity_regularizer=UncorrelatedFeaturesConstraint(3, weightage=1.0),
225
            kernel_initializer=Orthogonal(),
226
227
228
        )

        # Decoder layers
229
230
231
232
233
        Model_B1 = BatchNormalization()
        Model_B2 = BatchNormalization()
        Model_B3 = BatchNormalization()
        Model_B4 = BatchNormalization()
        Model_B5 = BatchNormalization()
234
        Model_D0 = DenseTranspose(
235
            Model_E5, activation="relu", output_dim=self.ENCODING,
236
        )
237
238
        Model_D1 = DenseTranspose(Model_E4, activation="relu", output_dim=self.DENSE_2,)
        Model_D2 = DenseTranspose(Model_E3, activation="relu", output_dim=self.DENSE_1,)
lucas_miranda's avatar
lucas_miranda committed
239
        Model_D3 = RepeatVector(self.input_shape[1])
240
        Model_D4 = Bidirectional(
241
            LSTM(
242
                self.LSTM_units_1,
243
244
                activation="tanh",
                return_sequences=True,
245
                kernel_constraint=UnitNorm(axis=1),
246
247
            )
        )
248
        Model_D5 = Bidirectional(
249
            LSTM(
250
                self.LSTM_units_1,
251
252
                activation="sigmoid",
                return_sequences=True,
253
                kernel_constraint=UnitNorm(axis=1),
254
255
256
            )
        )

257
        # Define and instantiate encoder
258
        x = Input(shape=self.input_shape[1:])
259
        encoder = Model_E0(x)
260
        encoder = BatchNormalization()(encoder)
261
        encoder = Model_E1(encoder)
262
        encoder = BatchNormalization()(encoder)
263
        encoder = Model_E2(encoder)
264
        encoder = BatchNormalization()(encoder)
265
        encoder = Model_E3(encoder)
266
        encoder = BatchNormalization()(encoder)
267
        encoder = Dropout(self.DROPOUT_RATE)(encoder)
268
        encoder = Model_E4(encoder)
269
        encoder = BatchNormalization()(encoder)
270
271
        encoder = Model_E5(encoder)

272
273
        z_mean = Dense(self.ENCODING)(encoder)
        z_log_sigma = Dense(self.ENCODING)(encoder)
274

275
        kl_wu = False
276
        if "ELBO" in self.loss:
277
278
279
280
281
282
283
284
285
286
287
288

            kl_beta = 1
            if self.kl_warmup:

                def klwarmup(epoch):
                    value = K.min([epoch / self.kl_warmup, 1])
                    print("beta:", value)
                    kl_beta = value

                kl_wu = LambdaCallback(on_epoch_end=lambda epoch, log: klwarmup(epoch))

            z_mean, z_log_sigma = KLDivergenceLayer(beta=kl_beta)([z_mean, z_log_sigma])
289
290
291

        z = Lambda(sampling)([z_mean, z_log_sigma])

292
        mmd_wu = False
293
        if "MMD" in self.loss:
294
295
296
297
298
299
300
301
302
303
304

            mmd_beta = 1
            if self.kl_warmup:
                def mmdwarmup(epoch):
                    value = K.min([epoch / self.mmd_warmup, 1])
                    print("mmd_beta:", value)
                    mmd_beta = value

                mmd_wu = LambdaCallback(on_epoch_end=lambda epoch, log: mmdwarmup(epoch))

            z = MMDiscrepancyLayer(beta=mmd_beta)(z)
305

306
        # Define and instantiate generator
lucas_miranda's avatar
lucas_miranda committed
307
        generator = Model_D0(z)
308
        generator = Model_B1(generator)
lucas_miranda's avatar
lucas_miranda committed
309
        generator = Model_D1(generator)
310
        generator = Model_B2(generator)
lucas_miranda's avatar
lucas_miranda committed
311
        generator = Model_D2(generator)
312
        generator = Model_B3(generator)
lucas_miranda's avatar
lucas_miranda committed
313
314
        generator = Model_D3(generator)
        generator = Model_D4(generator)
315
        generator = Model_B4(generator)
lucas_miranda's avatar
lucas_miranda committed
316
        generator = Model_D5(generator)
317
        generator = Model_B5(generator)
lucas_miranda's avatar
lucas_miranda committed
318
        x_decoded_mean = TimeDistributed(Dense(self.input_shape[2]))(generator)
319

320
        # end-to-end autoencoder
lucas_miranda's avatar
lucas_miranda committed
321
        encoder = Model(x, z_mean, name="SEQ_2_SEQ_VEncoder")
322
        vae = Model(x, x_decoded_mean, name="SEQ_2_SEQ_VAE")
lucas_miranda's avatar
lucas_miranda committed
323

324
325
326
        # Build generator as a separate entity
        g = Input(shape=self.ENCODING)
        _generator = Model_D0(g)
327
        _generator = Model_B1(_generator)
328
        _generator = Model_D1(_generator)
329
        _generator = Model_B2(_generator)
330
        _generator = Model_D2(_generator)
331
        _generator = Model_B3(_generator)
332
333
        _generator = Model_D3(_generator)
        _generator = Model_D4(_generator)
334
        _generator = Model_B4(_generator)
335
        _generator = Model_D5(_generator)
336
        _generator = Model_B5(_generator)
337
338
        _x_decoded_mean = TimeDistributed(Dense(self.input_shape[2]))(_generator)
        generator = Model(g, _x_decoded_mean, name="SEQ_2_SEQ_VGenerator")
339

340
341
342
        def huber_loss(x_, x_decoded_mean_):
            huber = Huber(reduction="sum", delta=100.0)
            return self.input_shape[1:] * huber(x_, x_decoded_mean_)
343
344

        vae.compile(
345
            loss=huber_loss,
lucas_miranda's avatar
lucas_miranda committed
346
            optimizer=Adam(lr=self.learn_rate,),
347
348
349
350
            metrics=["mae"],
            experimental_run_tf_function=False,
        )

351
        return encoder, generator, vae, kl_wu, mmd_wu
352
353


354
355
class SEQ_2_SEQ_VAEP:
    def __init__(
356
357
358
359
360
361
362
363
364
365
        self,
        input_shape,
        CONV_filters=256,
        LSTM_units_1=256,
        LSTM_units_2=64,
        DENSE_2=64,
        DROPOUT_RATE=0.25,
        ENCODING=32,
        learn_rate=1e-3,
        loss="ELBO+MMD",
366
367
        kl_warmup_epochs=0,
        mmd_warmup_epochs=0,
368
369
370
371
372
373
374
375
376
377
378
    ):
        self.input_shape = input_shape
        self.CONV_filters = CONV_filters
        self.LSTM_units_1 = LSTM_units_1
        self.LSTM_units_2 = LSTM_units_2
        self.DENSE_1 = LSTM_units_2
        self.DENSE_2 = DENSE_2
        self.DROPOUT_RATE = DROPOUT_RATE
        self.ENCODING = ENCODING
        self.learn_rate = learn_rate
        self.loss = loss
379
380
        self.kl_warmup = kl_warmup_epochs
        self.mmd_warmup = mmd_warmup_epochs
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428

    def build(self):
        # Encoder Layers
        Model_E0 = tf.keras.layers.Conv1D(
            filters=self.CONV_filters,
            kernel_size=5,
            strides=1,
            padding="causal",
            activation="relu",
            kernel_initializer=he_uniform(),
        )
        Model_E1 = Bidirectional(
            LSTM(
                self.LSTM_units_1,
                activation="tanh",
                return_sequences=True,
                kernel_constraint=UnitNorm(axis=0),
            )
        )
        Model_E2 = Bidirectional(
            LSTM(
                self.LSTM_units_2,
                activation="tanh",
                return_sequences=False,
                kernel_constraint=UnitNorm(axis=0),
            )
        )
        Model_E3 = Dense(
            self.DENSE_1,
            activation="relu",
            kernel_constraint=UnitNorm(axis=0),
            kernel_initializer=he_uniform(),
        )
        Model_E4 = Dense(
            self.DENSE_2,
            activation="relu",
            kernel_constraint=UnitNorm(axis=0),
            kernel_initializer=he_uniform(),
        )
        Model_E5 = Dense(
            self.ENCODING,
            activation="relu",
            kernel_constraint=UnitNorm(axis=1),
            activity_regularizer=UncorrelatedFeaturesConstraint(3, weightage=1.0),
            kernel_initializer=Orthogonal(),
        )

        # Decoder layers
429
430
431
432
433
        Model_B1 = BatchNormalization()
        Model_B2 = BatchNormalization()
        Model_B3 = BatchNormalization()
        Model_B4 = BatchNormalization()
        Model_B5 = BatchNormalization()
434
435
436
        Model_D0 = DenseTranspose(
            Model_E5, activation="relu", output_dim=self.ENCODING,
        )
437
438
        Model_D1 = DenseTranspose(Model_E4, activation="relu", output_dim=self.DENSE_2,)
        Model_D2 = DenseTranspose(Model_E3, activation="relu", output_dim=self.DENSE_1,)
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
        Model_D3 = RepeatVector(self.input_shape[1])
        Model_D4 = Bidirectional(
            LSTM(
                self.LSTM_units_1,
                activation="tanh",
                return_sequences=True,
                kernel_constraint=UnitNorm(axis=1),
            )
        )
        Model_D5 = Bidirectional(
            LSTM(
                self.LSTM_units_1,
                activation="sigmoid",
                return_sequences=True,
                kernel_constraint=UnitNorm(axis=1),
            )
        )

457
        # Define and instantiate encoder
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
        x = Input(shape=self.input_shape[1:])
        encoder = Model_E0(x)
        encoder = BatchNormalization()(encoder)
        encoder = Model_E1(encoder)
        encoder = BatchNormalization()(encoder)
        encoder = Model_E2(encoder)
        encoder = BatchNormalization()(encoder)
        encoder = Model_E3(encoder)
        encoder = BatchNormalization()(encoder)
        encoder = Dropout(self.DROPOUT_RATE)(encoder)
        encoder = Model_E4(encoder)
        encoder = BatchNormalization()(encoder)
        encoder = Model_E5(encoder)

        z_mean = Dense(self.ENCODING)(encoder)
        z_log_sigma = Dense(self.ENCODING)(encoder)

        if "ELBO" in self.loss:
            z_mean, z_log_sigma = KLDivergenceLayer()([z_mean, z_log_sigma])

        z = Lambda(sampling)([z_mean, z_log_sigma])

        if "MMD" in self.loss:
            z = MMDiscrepancyLayer()(z)

483
        # Define and instantiate generator
484
        generator = Model_D0(z)
485
        generator = Model_B1(generator)
486
        generator = Model_D1(generator)
487
        generator = Model_B2(generator)
488
        generator = Model_D2(generator)
489
        generator = Model_B3(generator)
490
491
        generator = Model_D3(generator)
        generator = Model_D4(generator)
492
        generator = Model_B4(generator)
493
        generator = Model_D5(generator)
494
        generator = Model_B5(generator)
495
496
        x_decoded_mean = TimeDistributed(Dense(self.input_shape[2]))(generator)

497
        # Define and instantiate predictor
498
499
500
        predictor = Dense(
            self.ENCODING, activation="relu", kernel_initializer=he_uniform()
        )(z)
501
        predictor = BatchNormalization()(predictor)
502
503
504
        predictor = Dense(
            self.DENSE_2, activation="relu", kernel_initializer=he_uniform()
        )(predictor)
505
        predictor = BatchNormalization()(predictor)
506
507
508
        predictor = Dense(
            self.DENSE_1, activation="relu", kernel_initializer=he_uniform()
        )(predictor)
509
        predictor = BatchNormalization()(predictor)
510
        predictor = RepeatVector(self.input_shape[1])(predictor)
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
        predictor = Bidirectional(
            LSTM(
                self.LSTM_units_1,
                activation="tanh",
                return_sequences=True,
                kernel_constraint=UnitNorm(axis=1),
            )
        )(predictor)
        predictor = BatchNormalization()(predictor)
        predictor = Bidirectional(
            LSTM(
                self.LSTM_units_1,
                activation="sigmoid",
                return_sequences=True,
                kernel_constraint=UnitNorm(axis=1),
            )
        )(predictor)
528
        predictor = BatchNormalization()(predictor)
529
530
        x_predicted_mean = TimeDistributed(Dense(self.input_shape[2]))(predictor)

531
532
        # end-to-end autoencoder
        encoder = Model(x, z_mean, name="SEQ_2_SEQ_VEncoder")
533
534
535
        vaep = Model(
            inputs=x, outputs=[x_decoded_mean, x_predicted_mean], name="SEQ_2_SEQ_VAE"
        )
536
537
538
539

        # Build generator as a separate entity
        g = Input(shape=self.ENCODING)
        _generator = Model_D0(g)
540
        _generator = Model_B1(_generator)
541
        _generator = Model_D1(_generator)
542
        _generator = Model_B2(_generator)
543
        _generator = Model_D2(_generator)
544
        _generator = Model_B3(_generator)
545
546
        _generator = Model_D3(_generator)
        _generator = Model_D4(_generator)
547
        _generator = Model_B4(_generator)
548
        _generator = Model_D5(_generator)
549
        _generator = Model_B5(_generator)
550
551
552
553
554
555
556
557
        _x_decoded_mean = TimeDistributed(Dense(self.input_shape[2]))(_generator)
        generator = Model(g, _x_decoded_mean, name="SEQ_2_SEQ_VGenerator")

        def huber_loss(x_, x_decoded_mean_):
            huber = Huber(reduction="sum", delta=100.0)
            return self.input_shape[1:] * huber(x_, x_decoded_mean_)

        vaep.compile(
558
            loss=huber_loss,
559
            optimizer=Adam(lr=self.learn_rate,),
560
561
562
563
564
            metrics=["mae"],
            experimental_run_tf_function=False,
        )

        return encoder, generator, vaep
565
566


567
class SEQ_2_SEQ_MMVAE:
568
    pass
lucas_miranda's avatar
lucas_miranda committed
569

570

571
# TODO:
572
#       - Add learning rate scheduler callback
573
574
575
576
577
#       - KL / MMD warmup (Ladder Variational Autoencoders)
#       - Gaussian Mixture + Categorical priors -> Deep Clustering
#       - free bits paper
#       - Attention mechanism for encoder / decoder (does it make sense?)
#       - Transformer encoder/decoder (does it make sense?)