Commit 43edd573 authored by lucas_miranda's avatar lucas_miranda
Browse files

Added notebook with empirically derived radius for latent neighborhood radius

parent 88e41fb9
Pipeline #95749 canceled with stages
in 56 seconds
......@@ -51,11 +51,11 @@ def load_treatments(train_path):
to be loaded as metadata in the coordinates class"""
try:
with open(
os.path.join(
train_path,
[i for i in os.listdir(train_path) if i.endswith(".pkl")][0],
),
"rb",
os.path.join(
train_path,
[i for i in os.listdir(train_path) if i.endswith(".pkl")][0],
),
"rb",
) as handle:
treatment_dict = pickle.load(handle)
except IndexError:
......@@ -65,20 +65,20 @@ def load_treatments(train_path):
def get_callbacks(
X_train: np.array,
batch_size: int,
variational: bool,
phenotype_class: float,
predictor: float,
loss: str,
X_val: np.array = None,
cp: bool = False,
reg_cat_clusters: bool = False,
reg_cluster_variance: bool = False,
entropy_samples: int = 15000,
entropy_radius: float = None,
logparam: dict = None,
outpath: str = ".",
X_train: np.array,
batch_size: int,
variational: bool,
phenotype_class: float,
predictor: float,
loss: str,
X_val: np.array = None,
cp: bool = False,
reg_cat_clusters: bool = False,
reg_cluster_variance: bool = False,
entropy_samples: int = 15000,
entropy_radius: float = None,
logparam: dict = None,
outpath: str = ".",
) -> List[Union[Any]]:
"""Generates callbacks for model training, including:
- run_ID: run name, with coarse parameter details;
......@@ -117,7 +117,7 @@ def get_callbacks(
entropy_radius
if entropy_radius is not None
else 0.15 * logparam["encoding"]
- 0.18 # equation derived empirically to keep neighbor number constant.
- 0.18 # equation derived empirically to keep neighbor number constant.
# See examples/set_default_entropy_radius.ipynb for details
),
samples=entropy_samples,
......@@ -200,14 +200,14 @@ def log_hyperparameters(phenotype_class: float, rec: str):
# noinspection PyUnboundLocalVariable
def tensorboard_metric_logging(
run_dir: str,
hpms: Any,
ae: Any,
X_val: np.ndarray,
y_val: np.ndarray,
phenotype_class: float,
predictor: float,
rec: str,
run_dir: str,
hpms: Any,
ae: Any,
X_val: np.ndarray,
y_val: np.ndarray,
phenotype_class: float,
predictor: float,
rec: str,
):
"""Autoencoder metric logging in tensorboard"""
......@@ -249,29 +249,29 @@ def tensorboard_metric_logging(
def autoencoder_fitting(
preprocessed_object: Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray],
batch_size: int,
encoding_size: int,
epochs: int,
hparams: dict,
kl_warmup: int,
log_history: bool,
log_hparams: bool,
loss: str,
mmd_warmup: int,
montecarlo_kl: int,
n_components: int,
output_path: str,
phenotype_class: float,
predictor: float,
pretrained: str,
save_checkpoints: bool,
save_weights: bool,
variational: bool,
reg_cat_clusters: bool,
reg_cluster_variance: bool,
entropy_radius: float,
entropy_samples: int,
preprocessed_object: Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray],
batch_size: int,
encoding_size: int,
epochs: int,
hparams: dict,
kl_warmup: int,
log_history: bool,
log_hparams: bool,
loss: str,
mmd_warmup: int,
montecarlo_kl: int,
n_components: int,
output_path: str,
phenotype_class: float,
predictor: float,
pretrained: str,
save_checkpoints: bool,
save_weights: bool,
variational: bool,
reg_cat_clusters: bool,
reg_cluster_variance: bool,
entropy_radius: float,
entropy_samples: int,
):
"""Implementation function for deepof.data.coordinates.deep_unsupervised_embedding"""
......@@ -316,7 +316,7 @@ def autoencoder_fitting(
logparams, metrics = log_hyperparameters(phenotype_class, rec)
with tf.summary.create_file_writer(
os.path.join(output_path, "hparams", run_ID)
os.path.join(output_path, "hparams", run_ID)
).as_default():
hp.hparams_config(
hparams=logparams,
......@@ -375,14 +375,14 @@ def autoencoder_fitting(
verbose=1,
validation_data=(X_val, X_val),
callbacks=cbacks
+ [
CustomStopper(
monitor="val_loss",
patience=5,
restore_best_weights=True,
start_epoch=max(kl_warmup, mmd_warmup),
),
],
+ [
CustomStopper(
monitor="val_loss",
patience=5,
restore_best_weights=True,
start_epoch=max(kl_warmup, mmd_warmup),
),
],
)
if save_weights:
......@@ -459,23 +459,23 @@ def autoencoder_fitting(
def tune_search(
data: List[np.array],
encoding_size: int,
hypertun_trials: int,
hpt_type: str,
hypermodel: str,
k: int,
kl_warmup_epochs: int,
loss: str,
mmd_warmup_epochs: int,
overlap_loss: float,
phenotype_class: float,
predictor: float,
project_name: str,
callbacks: List,
n_epochs: int = 30,
n_replicas: int = 1,
outpath: str = ".",
data: List[np.array],
encoding_size: int,
hypertun_trials: int,
hpt_type: str,
hypermodel: str,
k: int,
kl_warmup_epochs: int,
loss: str,
mmd_warmup_epochs: int,
overlap_loss: float,
phenotype_class: float,
predictor: float,
project_name: str,
callbacks: List,
n_epochs: int = 30,
n_replicas: int = 1,
outpath: str = ".",
) -> Union[bool, Tuple[Any, Any]]:
"""Define the search space using keras-tuner and bayesian optimization
......@@ -514,11 +514,13 @@ def tune_search(
if hypermodel == "S2SAE": # pragma: no cover
assert (
predictor == 0.0 and phenotype_class == 0.0
predictor == 0.0 and phenotype_class == 0.0
), "Prediction branches are only available for variational models. See documentation for more details"
batch_size = 1
hypermodel = deepof.hypermodels.SEQ_2_SEQ_AE(input_shape=X_train.shape)
elif hypermodel == "S2SGMVAE":
batch_size = 64
hypermodel = deepof.hypermodels.SEQ_2_SEQ_GMVAE(
input_shape=X_train.shape,
encoding=encoding_size,
......@@ -583,7 +585,7 @@ def tune_search(
epochs=n_epochs,
validation_data=(Xvals, yvals),
verbose=1,
batch_size=(1 if hypermodel=="S2SAE" else 64),
batch_size=batch_size,
callbacks=callbacks,
)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment