deepof_experiments.smk 5.94 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
# @authors lucasmiranda42
# encoding: utf-8
# deepof_experiments

"""

Snakefile for data and imputation.
Execution: sbatch snakemake
Plot DAG: snakemake --snakefile deepof_experiments.smk --forceall --dag | dot -Tpdf > deepof_experiments_DAG.pdf
Plot rule graph: snakemake --snakefile deepof_experiments.smk --forceall --rulegraph | dot -Tpdf > deepof_experiments_RULEGRAPH.pdf

"""

14
import os
15

lucas_miranda's avatar
lucas_miranda committed
16
outpath = "/psycl/g/mpsstatgen/lucas/DLC/DLC_autoencoders/DeepOF/deepof/logs/"
17

18
19
warmup_epochs = [15]
warmup_mode = ["sigmoid"]
20
losses = ["ELBO"]  # , "MMD", "ELBO+MMD"]
21
overlap_loss = [0.1, 0.2, 0.5, 0.75, 1.]
22
encodings = [32]  # [2, 4, 6, 8, 10, 12, 14, 16]
23
cluster_numbers = [15]  # [1, 5, 10, 15, 20, 25]
24
latent_reg = ["variance"]  # ["none", "categorical", "variance", "categorical+variance"]
25
entropy_knn = [10]
26
next_sequence_pred_weights = [0.15]
27
phenotype_pred_weights = [0.0]
28
rule_based_pred_weights = [0.0]
29
window_lengths = [22]  # range(11,56,11)
30
input_types = ["coords"]
31
run = list(range(1, 11))
32

33

34
35
rule deepof_experiments:
    input:
36
        # Elliptical arena detection
lucas_miranda's avatar
lucas_miranda committed
37
        # "/psycl/g/mpsstatgen/lucas/DLC/DLC_autoencoders/DeepOF/deepof/supplementary_notebooks/recognise_elliptical_arena.ipynb",
38
        #
39
        # Hyperparameter tuning
40
41
42
43
44
45
46
47
48
        # expand(
        #     os.path.join(
        #         outpath,
        #         "coarse_hyperparameter_tuning/trained_weights/GMVAE_loss={loss}_k={k}_encoding={enc}_final_weights.h5",
        #     ),
        #     loss=losses,
        #     k=cluster_numbers,
        #     enc=encodings,
        # ),
49
        #
50
        # Train a variety of models
lucas_miranda's avatar
lucas_miranda committed
51
        expand(
52
            outpath + "train_models/trained_weights/"
53
            "deepof_"
54
            "GMVAE_input_type={input_type}_"
55
            "window_size={window_size}_"
56
57
58
            "NSPred={nspredweight}_"
            "PPred={phenpredweight}_"
            "RBPred={rulesweight}_"
59
            "loss={loss}_"
60
            "overlap_loss={overlap_loss}_"
61
62
            "loss_warmup={warmup}_"
            "warmup_mode={warmup_mode}_"
63
64
65
            "encoding={encs}_"
            "k={k}_"
            "latreg={latreg}_"
66
            "entknn={entknn}_"
67
68
            "run={run}_"
            "final_weights.h5",
69
            input_type=input_types,
70
            window_size=window_lengths,
lucas_miranda's avatar
lucas_miranda committed
71
            loss=losses,
72
            overlap_loss=overlap_loss,
73
74
            warmup=warmup_epochs,
            warmup_mode=warmup_mode,
lucas_miranda's avatar
lucas_miranda committed
75
76
77
78
            encs=encodings,
            k=cluster_numbers,
            latreg=latent_reg,
            entknn=entropy_knn,
79
80
81
82
            nspredweight=next_sequence_pred_weights,
            phenpredweight=phenotype_pred_weights,
            rulesweight=rule_based_pred_weights,
            run=run,
lucas_miranda's avatar
lucas_miranda committed
83
        ),
84

85

86
87
rule elliptical_arena_detector:
    input:
88
        to_exec="/psycl/g/mpsstatgen/lucas/DLC/DLC_autoencoders/DeepOF/deepof/supplementary_notebooks/recognise_elliptical_arena_blank.ipynb",
89
    output:
90
        exec="/psycl/g/mpsstatgen/lucas/DLC/DLC_autoencoders/DeepOF/deepof/supplementary_notebooks/recognise_elliptical_arena.ipynb",
91
    shell:
92
        "papermill {input.to_exec} "
93
        "-p vid_path './supplementary_notebooks/' "
94
        "-p log_path './logs/' "
95
        "-p out_path './deepof/trained_models/' "
96
97
98
        "{output.exec}"


99
rule coarse_hyperparameter_tuning:
100
    input:
101
        data_path="/psycl/g/mpsstatgen/lucas/DLC/DLC_models/deepof_single_topview/",
102
103
104
    output:
        trained_models=os.path.join(
            outpath,
105
            "coarse_hyperparameter_tuning/trained_weights/GMVAE_loss={loss}_k={k}_encoding={enc}_final_weights.h5",
106
107
108
109
        ),
    shell:
        "pipenv run python -m deepof.train_model "
        "--train-path {input.data_path} "
110
        "--val-num 25 "
111
        "--components {wildcards.k} "
112
        "--input-type coords "
113
114
115
        "--next-sequence-prediction {wildcards.nspredweight} "
        "--phenotype-prediction {wildcards.phenpredweight} "
        "--rule-based-prediction {wildcards.rulesweight} "
116
        "--loss {wildcards.loss} "
117
118
        "--kl-warmup 30 "
        "--mmd-warmup 30 "
119
        "--encoding-size {wildcards.enc} "
120
        "--batch-size 512 "
121
        "--window-size 24 "
122
        "--window-step 12 "
123
124
        "--output-path {outpath}coarse_hyperparameter_tuning "
        "--hyperparameter-tuning hyperband "
125
        "--hpt-trials 1"
126
127


128
rule train_models:
129
    input:
130
131
132
        data_path=ancient(
            "/psycl/g/mpsstatgen/lucas/DLC/DLC_models/deepof_single_topview/"
        ),
133
    output:
134
        trained_models=outpath + "train_models/trained_weights/"
135
        "deepof_"
136
        "GMVAE_input_type={input_type}_"
137
        "window_size={window_size}_"
138
139
140
        "NSPred={nspredweight}_"
        "PPred={phenpredweight}_"
        "RBPred={rulesweight}_"
141
        "loss={loss}_"
142
        "overlap_loss={overlap_loss}_"                           
143
144
        "loss_warmup={warmup}_"
        "warmup_mode={warmup_mode}_"
145
146
147
        "encoding={encs}_"
        "k={k}_"
        "latreg={latreg}_"
148
        "entknn={entknn}_"
149
150
        "run={run}_"
        "final_weights.h5",
151
152
153
    shell:
        "pipenv run python -m deepof.train_model "
        "--train-path {input.data_path} "
154
        "--val-num 15 "
155
        "--components {wildcards.k} "
156
157
158
159
        "--input-type {wildcards.input_type} "
        "--next-sequence-prediction {wildcards.nspredweight} "
        "--phenotype-prediction {wildcards.phenpredweight} "
        "--rule-based-prediction {wildcards.rulesweight} "
160
161
        "--latent-reg {wildcards.latreg} "
        "--loss {wildcards.loss} "
162
        "--overlap-loss {wildcards.overlap_loss} "
163
164
        "--kl-annealing-mode {wildcards.warmup_mode} "
        "--kl-warmup {wildcards.warmup} "
165
        "--mmd-annealing-mode {wildcards.warmup_mode} "
166
        "--mmd-warmup {wildcards.warmup} "
167
168
        "--montecarlo-kl 10 "
        "--encoding-size {wildcards.encs} "
169
        "--entropy-knn {wildcards.entknn} "
170
        "--batch-size 256 "
171
        "--window-size {wildcards.window_size} "
172
        "--window-step 11 "
173
        "--run {wildcards.run} "
174
        "--output-path {outpath}train_models"