From ea6a83e1cb20346b9e156fb828f3086e6d7f0da0 Mon Sep 17 00:00:00 2001 From: Marcel Langer <me@sirmarcel.com> Date: Fri, 6 Mar 2020 12:36:57 +0100 Subject: [PATCH] Add example script to prepare the datasets used for HPO --- prepare_data.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) create mode 100644 prepare_data.py diff --git a/prepare_data.py b/prepare_data.py new file mode 100644 index 0000000..42c8576 --- /dev/null +++ b/prepare_data.py @@ -0,0 +1,17 @@ +# Used to prepare the example datasets for hyper-parameter optimisation + +import numpy as np + +np.random.seed(123) +import cmlkit + +data = cmlkit.load_dataset("nmd18_train") + +rest, train, test = cmlkit.utility.threeway_split(data.n, 800, 200) + +train = cmlkit.dataset.Subset.from_dataset(data, idx=train, name="nmd18_hpo_train") +print(train.n) +train.save(directory="data/cmlkit") +test = cmlkit.dataset.Subset.from_dataset(data, idx=test, name="nmd18_hpo_test") +print(test.n) +test.save(directory="data/cmlkit") -- GitLab