deepof_experiments.smk 5.9 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
# @authors lucasmiranda42
# encoding: utf-8
# deepof_experiments

"""

Snakefile for data and imputation.
Execution: sbatch snakemake
Plot DAG: snakemake --snakefile deepof_experiments.smk --forceall --dag | dot -Tpdf > deepof_experiments_DAG.pdf
Plot rule graph: snakemake --snakefile deepof_experiments.smk --forceall --rulegraph | dot -Tpdf > deepof_experiments_RULEGRAPH.pdf

"""

14
import os
15

lucas_miranda's avatar
lucas_miranda committed
16
outpath = "/psycl/g/mpsstatgen/lucas/DLC/DLC_autoencoders/DeepOF/deepof/logs/"
17

18
19
warmup_epochs = [15]
warmup_mode = ["sigmoid"]
20
losses = ["ELBO"]  # , "MMD", "ELBO+MMD"]
21
overlap_loss = [0.1, 0.2, 0.5, 0.75, 1.]
22
encodings = [6]  # [2, 4, 6, 8, 10, 12, 14, 16]
23
cluster_numbers = [15]  # [1, 5, 10, 15, 20, 25]
24
latent_reg = ["variance"]  # ["none", "categorical", "variance", "categorical+variance"]
25
entropy_knn = [10]
26
next_sequence_pred_weights = [0.15]
27
phenotype_pred_weights = [0.0]
28
rule_based_pred_weights = [0.0]
29
window_lengths = [22]  # range(11,56,11)
30
input_types = ["coords"]
31
run = list(range(1, 11))
32

33

34
35
rule deepof_experiments:
    input:
36
        # Elliptical arena detection
lucas_miranda's avatar
lucas_miranda committed
37
        # "/psycl/g/mpsstatgen/lucas/DLC/DLC_autoencoders/DeepOF/deepof/supplementary_notebooks/recognise_elliptical_arena.ipynb",
38
        #
39
        # Hyperparameter tuning
40
41
42
43
44
45
46
47
48
        # expand(
        #     os.path.join(
        #         outpath,
        #         "coarse_hyperparameter_tuning/trained_weights/GMVAE_loss={loss}_k={k}_encoding={enc}_final_weights.h5",
        #     ),
        #     loss=losses,
        #     k=cluster_numbers,
        #     enc=encodings,
        # ),
49
        #
50
        # Train a variety of models
lucas_miranda's avatar
lucas_miranda committed
51
        expand(
52
            outpath + "train_models/trained_weights/"
53
            "GMVAE_input_type={input_type}_"
54
            "window_size={window_size}_"
55
56
57
            "NSPred={nspredweight}_"
            "PPred={phenpredweight}_"
            "RBPred={rulesweight}_"
58
            "loss={loss}_"
59
            "overlap_loss={overlap_loss}_"
60
61
            "loss_warmup={warmup}_"
            "warmup_mode={warmup_mode}_"
62
63
64
            "encoding={encs}_"
            "k={k}_"
            "latreg={latreg}_"
65
            "entknn={entknn}_"
66
67
            "run={run}_"
            "final_weights.h5",
68
            input_type=input_types,
69
            window_size=window_lengths,
lucas_miranda's avatar
lucas_miranda committed
70
            loss=losses,
71
            overlap_loss=overlap_loss,
72
73
            warmup=warmup_epochs,
            warmup_mode=warmup_mode,
lucas_miranda's avatar
lucas_miranda committed
74
75
76
77
            encs=encodings,
            k=cluster_numbers,
            latreg=latent_reg,
            entknn=entropy_knn,
78
79
80
81
            nspredweight=next_sequence_pred_weights,
            phenpredweight=phenotype_pred_weights,
            rulesweight=rule_based_pred_weights,
            run=run,
lucas_miranda's avatar
lucas_miranda committed
82
        ),
83

84

85
86
rule elliptical_arena_detector:
    input:
87
        to_exec="/psycl/g/mpsstatgen/lucas/DLC/DLC_autoencoders/DeepOF/deepof/supplementary_notebooks/recognise_elliptical_arena_blank.ipynb",
88
    output:
89
        exec="/psycl/g/mpsstatgen/lucas/DLC/DLC_autoencoders/DeepOF/deepof/supplementary_notebooks/recognise_elliptical_arena.ipynb",
90
    shell:
91
        "papermill {input.to_exec} "
92
        "-p vid_path './supplementary_notebooks/' "
93
        "-p log_path './logs/' "
94
        "-p out_path './deepof/trained_models/' "
95
96
97
        "{output.exec}"


98
rule coarse_hyperparameter_tuning:
99
    input:
100
        data_path="/psycl/g/mpsstatgen/lucas/DLC/DLC_models/deepof_single_topview/",
101
102
103
    output:
        trained_models=os.path.join(
            outpath,
104
            "coarse_hyperparameter_tuning/trained_weights/GMVAE_loss={loss}_k={k}_encoding={enc}_final_weights.h5",
105
106
107
108
        ),
    shell:
        "pipenv run python -m deepof.train_model "
        "--train-path {input.data_path} "
109
        "--val-num 25 "
110
        "--components {wildcards.k} "
111
        "--input-type coords "
112
113
114
        "--next-sequence-prediction {wildcards.nspredweight} "
        "--phenotype-prediction {wildcards.phenpredweight} "
        "--rule-based-prediction {wildcards.rulesweight} "
115
        "--loss {wildcards.loss} "
116
117
        "--kl-warmup 30 "
        "--mmd-warmup 30 "
118
        "--encoding-size {wildcards.enc} "
119
        "--batch-size 512 "
120
        "--window-size 24 "
121
        "--window-step 12 "
122
123
        "--output-path {outpath}coarse_hyperparameter_tuning "
        "--hyperparameter-tuning hyperband "
124
        "--hpt-trials 1"
125
126


127
rule train_models:
128
    input:
129
130
131
        data_path=ancient(
            "/psycl/g/mpsstatgen/lucas/DLC/DLC_models/deepof_single_topview/"
        ),
132
    output:
133
        trained_models=outpath + "train_models/trained_weights/"
134
        "GMVAE_input_type={input_type}_"
135
        "window_size={window_size}_"
136
137
138
        "NSPred={nspredweight}_"
        "PPred={phenpredweight}_"
        "RBPred={rulesweight}_"
139
        "loss={loss}_"
140
        "overlap_loss={overlap_loss}_"                           
141
142
        "loss_warmup={warmup}_"
        "warmup_mode={warmup_mode}_"
143
144
145
        "encoding={encs}_"
        "k={k}_"
        "latreg={latreg}_"
146
        "entknn={entknn}_"
147
148
        "run={run}_"
        "final_weights.h5",
149
150
151
    shell:
        "pipenv run python -m deepof.train_model "
        "--train-path {input.data_path} "
152
        "--val-num 15 "
153
        "--components {wildcards.k} "
154
155
156
157
        "--input-type {wildcards.input_type} "
        "--next-sequence-prediction {wildcards.nspredweight} "
        "--phenotype-prediction {wildcards.phenpredweight} "
        "--rule-based-prediction {wildcards.rulesweight} "
158
159
        "--latent-reg {wildcards.latreg} "
        "--loss {wildcards.loss} "
160
        "--overlap_loss {wildcards.overlap_loss} "
161
162
        "--kl-annealing-mode {wildcards.warmup_mode} "
        "--kl-warmup {wildcards.warmup} "
163
        "--mmd-annealing-mode {wildcards.warmup_mode} "
164
        "--mmd-warmup {wildcards.warmup} "
165
166
        "--montecarlo-kl 10 "
        "--encoding-size {wildcards.encs} "
167
        "--entropy-knn {wildcards.entknn} "
168
        "--batch-size 256 "
169
        "--window-size {wildcards.window_size} "
170
        "--window-step 11 "
171
        "--run {wildcards.run} "
172
        "--output-path {outpath}train_models"