models.py 18.1 KB
Newer Older
1
2
3
# @author lucasmiranda42

from tensorflow.keras import Input, Model, Sequential
4
from tensorflow.keras.constraints import UnitNorm
5
from tensorflow.keras.initializers import he_uniform, Orthogonal
6
7
from tensorflow.keras.layers import BatchNormalization, Bidirectional, Dense
from tensorflow.keras.layers import Dropout, Lambda, LSTM
8
from tensorflow.keras.layers import RepeatVector, TimeDistributed
9
from tensorflow.keras.losses import Huber
10
from tensorflow.keras.optimizers import Adam
11
from source.model_utils import *
12
13
14
15
import tensorflow as tf


class SEQ_2_SEQ_AE:
16
17
18
    def __init__(
        self,
        input_shape,
lucas_miranda's avatar
lucas_miranda committed
19
20
21
22
23
24
25
        CONV_filters=256,
        LSTM_units_1=256,
        LSTM_units_2=64,
        DENSE_2=64,
        DROPOUT_RATE=0.25,
        ENCODING=32,
        learn_rate=1e-3,
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
    ):
        self.input_shape = input_shape
        self.CONV_filters = CONV_filters
        self.LSTM_units_1 = LSTM_units_1
        self.LSTM_units_2 = LSTM_units_2
        self.DENSE_1 = LSTM_units_2
        self.DENSE_2 = DENSE_2
        self.DROPOUT_RATE = DROPOUT_RATE
        self.ENCODING = ENCODING
        self.learn_rate = learn_rate

    def build(self):
        # Encoder Layers
        Model_E0 = tf.keras.layers.Conv1D(
            filters=self.CONV_filters,
            kernel_size=5,
            strides=1,
            padding="causal",
            activation="relu",
45
            kernel_initializer=he_uniform(),
46
        )
47
        Model_E1 = Bidirectional(
48
            LSTM(
49
50
51
                self.LSTM_units_1,
                activation="tanh",
                return_sequences=True,
52
                kernel_constraint=UnitNorm(axis=0),
53
54
            )
        )
55
        Model_E2 = Bidirectional(
56
            LSTM(
57
58
59
                self.LSTM_units_2,
                activation="tanh",
                return_sequences=False,
60
                kernel_constraint=UnitNorm(axis=0),
61
62
            )
        )
63
        Model_E3 = Dense(
64
65
66
67
            self.DENSE_1,
            activation="relu",
            kernel_constraint=UnitNorm(axis=0),
            kernel_initializer=he_uniform(),
68
69
        )
        Model_E4 = Dense(
70
71
72
73
            self.DENSE_2,
            activation="relu",
            kernel_constraint=UnitNorm(axis=0),
            kernel_initializer=he_uniform(),
74
        )
75
76
77
        Model_E5 = Dense(
            self.ENCODING,
            activation="relu",
78
            kernel_constraint=UnitNorm(axis=1),
79
            activity_regularizer=UncorrelatedFeaturesConstraint(3, weightage=1.0),
80
            kernel_initializer=Orthogonal(),
81
82
83
        )

        # Decoder layers
84
        Model_D0 = DenseTranspose(
85
            Model_E5, activation="relu", output_dim=self.ENCODING,
86
        )
87
88
        Model_D1 = DenseTranspose(Model_E4, activation="relu", output_dim=self.DENSE_2,)
        Model_D2 = DenseTranspose(Model_E3, activation="relu", output_dim=self.DENSE_1,)
89
        Model_D3 = RepeatVector(self.input_shape[1])
90
        Model_D4 = Bidirectional(
91
            LSTM(
92
93
94
                self.LSTM_units_1,
                activation="tanh",
                return_sequences=True,
95
                kernel_constraint=UnitNorm(axis=1),
96
97
            )
        )
98
        Model_D5 = Bidirectional(
99
            LSTM(
100
101
102
                self.LSTM_units_1,
                activation="sigmoid",
                return_sequences=True,
103
                kernel_constraint=UnitNorm(axis=1),
104
105
106
            )
        )

107
        # Define and instantiate encoder
lucas_miranda's avatar
lucas_miranda committed
108
        encoder = Sequential(name="SEQ_2_SEQ_Encoder")
109
        encoder.add(Input(shape=self.input_shape[1:]))
110
        encoder.add(Model_E0)
111
        encoder.add(BatchNormalization())
112
        encoder.add(Model_E1)
113
        encoder.add(BatchNormalization())
114
        encoder.add(Model_E2)
115
        encoder.add(BatchNormalization())
116
        encoder.add(Model_E3)
117
        encoder.add(BatchNormalization())
118
119
        encoder.add(Dropout(self.DROPOUT_RATE))
        encoder.add(Model_E4)
120
        encoder.add(BatchNormalization())
121
122
        encoder.add(Model_E5)

123
        # Define and instantiate decoder
lucas_miranda's avatar
lucas_miranda committed
124
        decoder = Sequential(name="SEQ_2_SEQ_Decoder")
125
        decoder.add(Model_D0)
126
        encoder.add(BatchNormalization())
127
        decoder.add(Model_D1)
128
        encoder.add(BatchNormalization())
129
        decoder.add(Model_D2)
130
        encoder.add(BatchNormalization())
131
        decoder.add(Model_D3)
132
        decoder.add(Model_D4)
133
        encoder.add(BatchNormalization())
134
135
136
        decoder.add(Model_D5)
        decoder.add(TimeDistributed(Dense(self.input_shape[2])))

lucas_miranda's avatar
lucas_miranda committed
137
        model = Sequential([encoder, decoder], name="SEQ_2_SEQ_AE")
138
139

        model.compile(
140
            loss=Huber(reduction="sum", delta=100.0),
141
            optimizer=Adam(lr=self.learn_rate, clipvalue=0.5,),
142
143
144
            metrics=["mae"],
        )

lucas_miranda's avatar
lucas_miranda committed
145
        return encoder, decoder, model
146
147
148


class SEQ_2_SEQ_VAE:
149
150
151
    def __init__(
        self,
        input_shape,
lucas_miranda's avatar
lucas_miranda committed
152
153
154
155
156
157
158
        CONV_filters=256,
        LSTM_units_1=256,
        LSTM_units_2=64,
        DENSE_2=64,
        DROPOUT_RATE=0.25,
        ENCODING=32,
        learn_rate=1e-3,
159
        loss="ELBO+MMD",
160
161
        kl_warmup_epochs=0,
        mmd_warmup_epochs=0,
162
163
164
165
166
167
168
169
170
171
    ):
        self.input_shape = input_shape
        self.CONV_filters = CONV_filters
        self.LSTM_units_1 = LSTM_units_1
        self.LSTM_units_2 = LSTM_units_2
        self.DENSE_1 = LSTM_units_2
        self.DENSE_2 = DENSE_2
        self.DROPOUT_RATE = DROPOUT_RATE
        self.ENCODING = ENCODING
        self.learn_rate = learn_rate
172
        self.loss = loss
173
174
        self.kl_warmup = kl_warmup_epochs
        self.mmd_warmup = mmd_warmup_epochs
175
176
177
178

    def build(self):
        # Encoder Layers
        Model_E0 = tf.keras.layers.Conv1D(
179
            filters=self.CONV_filters,
180
181
182
183
            kernel_size=5,
            strides=1,
            padding="causal",
            activation="relu",
184
            kernel_initializer=he_uniform(),
185
        )
186
        Model_E1 = Bidirectional(
187
            LSTM(
188
                self.LSTM_units_1,
189
190
                activation="tanh",
                return_sequences=True,
191
                kernel_constraint=UnitNorm(axis=0),
192
193
            )
        )
194
        Model_E2 = Bidirectional(
195
            LSTM(
196
                self.LSTM_units_2,
197
198
                activation="tanh",
                return_sequences=False,
199
                kernel_constraint=UnitNorm(axis=0),
200
201
            )
        )
202
        Model_E3 = Dense(
203
204
205
206
            self.DENSE_1,
            activation="relu",
            kernel_constraint=UnitNorm(axis=0),
            kernel_initializer=he_uniform(),
207
208
        )
        Model_E4 = Dense(
209
210
211
212
            self.DENSE_2,
            activation="relu",
            kernel_constraint=UnitNorm(axis=0),
            kernel_initializer=he_uniform(),
213
        )
214
        Model_E5 = Dense(
215
            self.ENCODING,
216
            activation="relu",
217
            kernel_constraint=UnitNorm(axis=1),
218
            activity_regularizer=UncorrelatedFeaturesConstraint(3, weightage=1.0),
219
            kernel_initializer=Orthogonal(),
220
221
222
        )

        # Decoder layers
223
224
225
226
227
        Model_B1 = BatchNormalization()
        Model_B2 = BatchNormalization()
        Model_B3 = BatchNormalization()
        Model_B4 = BatchNormalization()
        Model_B5 = BatchNormalization()
228
        Model_D0 = DenseTranspose(
229
            Model_E5, activation="relu", output_dim=self.ENCODING,
230
        )
231
232
        Model_D1 = DenseTranspose(Model_E4, activation="relu", output_dim=self.DENSE_2,)
        Model_D2 = DenseTranspose(Model_E3, activation="relu", output_dim=self.DENSE_1,)
lucas_miranda's avatar
lucas_miranda committed
233
        Model_D3 = RepeatVector(self.input_shape[1])
234
        Model_D4 = Bidirectional(
235
            LSTM(
236
                self.LSTM_units_1,
237
238
                activation="tanh",
                return_sequences=True,
239
                kernel_constraint=UnitNorm(axis=1),
240
241
            )
        )
242
        Model_D5 = Bidirectional(
243
            LSTM(
244
                self.LSTM_units_1,
245
246
                activation="sigmoid",
                return_sequences=True,
247
                kernel_constraint=UnitNorm(axis=1),
248
249
250
            )
        )

251
        # Define and instantiate encoder
252
        x = Input(shape=self.input_shape[1:])
253
        encoder = Model_E0(x)
254
        encoder = BatchNormalization()(encoder)
255
        encoder = Model_E1(encoder)
256
        encoder = BatchNormalization()(encoder)
257
        encoder = Model_E2(encoder)
258
        encoder = BatchNormalization()(encoder)
259
        encoder = Model_E3(encoder)
260
        encoder = BatchNormalization()(encoder)
261
        encoder = Dropout(self.DROPOUT_RATE)(encoder)
262
        encoder = Model_E4(encoder)
263
        encoder = BatchNormalization()(encoder)
264
265
        encoder = Model_E5(encoder)

266
267
        z_mean = Dense(self.ENCODING)(encoder)
        z_log_sigma = Dense(self.ENCODING)(encoder)
268
269
270
271
272
273
274
275
276

        if "ELBO" in self.loss:
            z_mean, z_log_sigma = KLDivergenceLayer()([z_mean, z_log_sigma])

        z = Lambda(sampling)([z_mean, z_log_sigma])

        if "MMD" in self.loss:
            z = MMDiscrepancyLayer()(z)

277
        # Define and instantiate generator
lucas_miranda's avatar
lucas_miranda committed
278
        generator = Model_D0(z)
279
        generator = Model_B1(generator)
lucas_miranda's avatar
lucas_miranda committed
280
        generator = Model_D1(generator)
281
        generator = Model_B2(generator)
lucas_miranda's avatar
lucas_miranda committed
282
        generator = Model_D2(generator)
283
        generator = Model_B3(generator)
lucas_miranda's avatar
lucas_miranda committed
284
285
        generator = Model_D3(generator)
        generator = Model_D4(generator)
286
        generator = Model_B4(generator)
lucas_miranda's avatar
lucas_miranda committed
287
        generator = Model_D5(generator)
288
        generator = Model_B5(generator)
lucas_miranda's avatar
lucas_miranda committed
289
        x_decoded_mean = TimeDistributed(Dense(self.input_shape[2]))(generator)
290

291
        # end-to-end autoencoder
lucas_miranda's avatar
lucas_miranda committed
292
        encoder = Model(x, z_mean, name="SEQ_2_SEQ_VEncoder")
293
        vae = Model(x, x_decoded_mean, name="SEQ_2_SEQ_VAE")
lucas_miranda's avatar
lucas_miranda committed
294

295
296
297
        # Build generator as a separate entity
        g = Input(shape=self.ENCODING)
        _generator = Model_D0(g)
298
        _generator = Model_B1(_generator)
299
        _generator = Model_D1(_generator)
300
        _generator = Model_B2(_generator)
301
        _generator = Model_D2(_generator)
302
        _generator = Model_B3(_generator)
303
304
        _generator = Model_D3(_generator)
        _generator = Model_D4(_generator)
305
        _generator = Model_B4(_generator)
306
        _generator = Model_D5(_generator)
307
        _generator = Model_B5(_generator)
308
309
        _x_decoded_mean = TimeDistributed(Dense(self.input_shape[2]))(_generator)
        generator = Model(g, _x_decoded_mean, name="SEQ_2_SEQ_VGenerator")
310

311
312
313
        def huber_loss(x_, x_decoded_mean_):
            huber = Huber(reduction="sum", delta=100.0)
            return self.input_shape[1:] * huber(x_, x_decoded_mean_)
314
315

        vae.compile(
316
            loss=huber_loss,
lucas_miranda's avatar
lucas_miranda committed
317
            optimizer=Adam(lr=self.learn_rate,),
318
319
320
321
            metrics=["mae"],
            experimental_run_tf_function=False,
        )

322
        return encoder, generator, vae
323
324


325
326
class SEQ_2_SEQ_VAEP:
    def __init__(
327
328
329
330
331
332
333
334
335
336
        self,
        input_shape,
        CONV_filters=256,
        LSTM_units_1=256,
        LSTM_units_2=64,
        DENSE_2=64,
        DROPOUT_RATE=0.25,
        ENCODING=32,
        learn_rate=1e-3,
        loss="ELBO+MMD",
337
338
        kl_warmup_epochs=0,
        mmd_warmup_epochs=0,
339
340
341
342
343
344
345
346
347
348
349
    ):
        self.input_shape = input_shape
        self.CONV_filters = CONV_filters
        self.LSTM_units_1 = LSTM_units_1
        self.LSTM_units_2 = LSTM_units_2
        self.DENSE_1 = LSTM_units_2
        self.DENSE_2 = DENSE_2
        self.DROPOUT_RATE = DROPOUT_RATE
        self.ENCODING = ENCODING
        self.learn_rate = learn_rate
        self.loss = loss
350
351
        self.kl_warmup = kl_warmup_epochs
        self.mmd_warmup = mmd_warmup_epochs
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399

    def build(self):
        # Encoder Layers
        Model_E0 = tf.keras.layers.Conv1D(
            filters=self.CONV_filters,
            kernel_size=5,
            strides=1,
            padding="causal",
            activation="relu",
            kernel_initializer=he_uniform(),
        )
        Model_E1 = Bidirectional(
            LSTM(
                self.LSTM_units_1,
                activation="tanh",
                return_sequences=True,
                kernel_constraint=UnitNorm(axis=0),
            )
        )
        Model_E2 = Bidirectional(
            LSTM(
                self.LSTM_units_2,
                activation="tanh",
                return_sequences=False,
                kernel_constraint=UnitNorm(axis=0),
            )
        )
        Model_E3 = Dense(
            self.DENSE_1,
            activation="relu",
            kernel_constraint=UnitNorm(axis=0),
            kernel_initializer=he_uniform(),
        )
        Model_E4 = Dense(
            self.DENSE_2,
            activation="relu",
            kernel_constraint=UnitNorm(axis=0),
            kernel_initializer=he_uniform(),
        )
        Model_E5 = Dense(
            self.ENCODING,
            activation="relu",
            kernel_constraint=UnitNorm(axis=1),
            activity_regularizer=UncorrelatedFeaturesConstraint(3, weightage=1.0),
            kernel_initializer=Orthogonal(),
        )

        # Decoder layers
400
401
402
403
404
        Model_B1 = BatchNormalization()
        Model_B2 = BatchNormalization()
        Model_B3 = BatchNormalization()
        Model_B4 = BatchNormalization()
        Model_B5 = BatchNormalization()
405
406
407
        Model_D0 = DenseTranspose(
            Model_E5, activation="relu", output_dim=self.ENCODING,
        )
408
409
        Model_D1 = DenseTranspose(Model_E4, activation="relu", output_dim=self.DENSE_2,)
        Model_D2 = DenseTranspose(Model_E3, activation="relu", output_dim=self.DENSE_1,)
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
        Model_D3 = RepeatVector(self.input_shape[1])
        Model_D4 = Bidirectional(
            LSTM(
                self.LSTM_units_1,
                activation="tanh",
                return_sequences=True,
                kernel_constraint=UnitNorm(axis=1),
            )
        )
        Model_D5 = Bidirectional(
            LSTM(
                self.LSTM_units_1,
                activation="sigmoid",
                return_sequences=True,
                kernel_constraint=UnitNorm(axis=1),
            )
        )

428
        # Define and instantiate encoder
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
        x = Input(shape=self.input_shape[1:])
        encoder = Model_E0(x)
        encoder = BatchNormalization()(encoder)
        encoder = Model_E1(encoder)
        encoder = BatchNormalization()(encoder)
        encoder = Model_E2(encoder)
        encoder = BatchNormalization()(encoder)
        encoder = Model_E3(encoder)
        encoder = BatchNormalization()(encoder)
        encoder = Dropout(self.DROPOUT_RATE)(encoder)
        encoder = Model_E4(encoder)
        encoder = BatchNormalization()(encoder)
        encoder = Model_E5(encoder)

        z_mean = Dense(self.ENCODING)(encoder)
        z_log_sigma = Dense(self.ENCODING)(encoder)

        if "ELBO" in self.loss:
            z_mean, z_log_sigma = KLDivergenceLayer()([z_mean, z_log_sigma])

        z = Lambda(sampling)([z_mean, z_log_sigma])

        if "MMD" in self.loss:
            z = MMDiscrepancyLayer()(z)

454
        # Define and instantiate generator
455
        generator = Model_D0(z)
456
        generator = Model_B1(generator)
457
        generator = Model_D1(generator)
458
        generator = Model_B2(generator)
459
        generator = Model_D2(generator)
460
        generator = Model_B3(generator)
461
462
        generator = Model_D3(generator)
        generator = Model_D4(generator)
463
        generator = Model_B4(generator)
464
        generator = Model_D5(generator)
465
        generator = Model_B5(generator)
466
467
        x_decoded_mean = TimeDistributed(Dense(self.input_shape[2]))(generator)

468
        # Define and instantiate predictor
469
470
471
        predictor = Dense(
            self.ENCODING, activation="relu", kernel_initializer=he_uniform()
        )(z)
472
        predictor = BatchNormalization()(predictor)
473
474
475
        predictor = Dense(
            self.DENSE_2, activation="relu", kernel_initializer=he_uniform()
        )(predictor)
476
        predictor = BatchNormalization()(predictor)
477
478
479
        predictor = Dense(
            self.DENSE_1, activation="relu", kernel_initializer=he_uniform()
        )(predictor)
480
        predictor = BatchNormalization()(predictor)
481
        predictor = RepeatVector(self.input_shape[1])(predictor)
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
        predictor = Bidirectional(
            LSTM(
                self.LSTM_units_1,
                activation="tanh",
                return_sequences=True,
                kernel_constraint=UnitNorm(axis=1),
            )
        )(predictor)
        predictor = BatchNormalization()(predictor)
        predictor = Bidirectional(
            LSTM(
                self.LSTM_units_1,
                activation="sigmoid",
                return_sequences=True,
                kernel_constraint=UnitNorm(axis=1),
            )
        )(predictor)
499
        predictor = BatchNormalization()(predictor)
500
501
        x_predicted_mean = TimeDistributed(Dense(self.input_shape[2]))(predictor)

502
503
        # end-to-end autoencoder
        encoder = Model(x, z_mean, name="SEQ_2_SEQ_VEncoder")
504
505
506
        vaep = Model(
            inputs=x, outputs=[x_decoded_mean, x_predicted_mean], name="SEQ_2_SEQ_VAE"
        )
507
508
509
510

        # Build generator as a separate entity
        g = Input(shape=self.ENCODING)
        _generator = Model_D0(g)
511
        _generator = Model_B1(_generator)
512
        _generator = Model_D1(_generator)
513
        _generator = Model_B2(_generator)
514
        _generator = Model_D2(_generator)
515
        _generator = Model_B3(_generator)
516
517
        _generator = Model_D3(_generator)
        _generator = Model_D4(_generator)
518
        _generator = Model_B4(_generator)
519
        _generator = Model_D5(_generator)
520
        _generator = Model_B5(_generator)
521
522
523
524
525
526
527
528
        _x_decoded_mean = TimeDistributed(Dense(self.input_shape[2]))(_generator)
        generator = Model(g, _x_decoded_mean, name="SEQ_2_SEQ_VGenerator")

        def huber_loss(x_, x_decoded_mean_):
            huber = Huber(reduction="sum", delta=100.0)
            return self.input_shape[1:] * huber(x_, x_decoded_mean_)

        vaep.compile(
529
            loss=huber_loss,
530
            optimizer=Adam(lr=self.learn_rate,),
531
532
533
534
535
            metrics=["mae"],
            experimental_run_tf_function=False,
        )

        return encoder, generator, vaep
536
537


538
class SEQ_2_SEQ_MMVAE:
539
    pass
lucas_miranda's avatar
lucas_miranda committed
540

541

542
# TODO:
543
#       - Add learning rate scheduler callback
544
545
546
547
548
#       - KL / MMD warmup (Ladder Variational Autoencoders)
#       - Gaussian Mixture + Categorical priors -> Deep Clustering
#       - free bits paper
#       - Attention mechanism for encoder / decoder (does it make sense?)
#       - Transformer encoder/decoder (does it make sense?)