Commit 24e27544 authored by Luigi Sbailo's avatar Luigi Sbailo
Browse files

Update notebook to latest SISSO version

parent b1f7ef94
......@@ -67,17 +67,10 @@
With the selection of "PRM2020" (or default selection) as SISSO rung, a special feature space is uploaded, which contains much fewer features than in the production calculation used in <a href="https://journals.aps.org/prmaterials/abstract/10.1103/PhysRevMaterials.2.083802" target="_blank">PRM 2020</a>. This allows to reobtain in the notebook the same result in a reasonsable time. Still, the provided feature space contains thousands of the top ranked features and SISSO finds the best nD model.
%% Cell type:code id: tags:
``` python
%load_ext autoreload
%autoreload 2
```
%% Cell type:code id: tags:
``` python
%%HTML
<script>
code_show=true;
function code_toggle() {
if (code_show)
......@@ -101,11 +94,13 @@
%% Cell type:code id: tags:
``` python
from sissopp import get_max_number_feats, get_estimate_n_feat_next_rung, generate_fs, SISSOClassifier, generate_phi_0_from_csv, FeatureSpace
from sissopp import Inputs, FeatureSpace, SISSOClassifier, FeatureNode, Unit
from sissopp.py_interface import read_csv
from sissopp.py_interface.import_dataframe import get_unit
from tetradymite_PRM2020.visualizer import Visualizer
import numpy as np
import pandas as pd
import os
```
......@@ -132,11 +127,11 @@
%% Cell type:code id: tags:
``` python
# This piece of code is not run at initialization.
# It serves to create the molecular structures which are visualized.
# It can create the molecular structures which are visualized.
path_structure = './data/tetradymite_PRM2020/structures/'
try:
os.mkdir(path_structure)
except OSError:
......@@ -215,78 +210,60 @@
```
%% Cell type:code id: tags:
``` python
def get_featspace_sisso(
df,
ops= ['add', 'sub', 'abs_diff', 'mult', 'div', 'exp', 'neg_exp', 'inv', 'sq', 'cb',
'sqrt', 'cbrt', 'log', 'abs'],
cols="all",
max_phi=2,
def get_feat_space_and_sisso_regressor(
selected_ops=["add", "abs_diff", "div", "sq", "exp"],
selected_features = 'all',
max_rung=2,
n_sis_select=50,
remove_double_divison=True,
max_dim=3,
n_residual=1,
n_dim=2,
n_residual=10,
default=True,
):
if default:
phi_0, prop_label, prop_unit, prop, prop_test, task_sizes_train, task_sizes_test, leave_out_inds = generate_phi_0_from_csv(
selected_ops = ["add", "sub", "mult", "div", "abs_diff", "sq", "cb", "sqrt", "cbrt", "inv", "abs"]
selected_features = 'all'
inputs = read_csv(
df_train,
"Class",
prop_key="Class",
cols='all',
task_key=None,
max_rung=max_rung,
leave_out_frac=0.0,
leave_out_inds=None,
max_rung=1
)
feat_space = generate_fs(
phi_0,
prop,
task_sizes_train,
["add", "sub", "mult", "div", "abs_diff", "sq", "cb", "sqrt", "cbrt", "inv", "abs"],
[],
"classification",
0,
n_sis_select
)
)
else:
phi_0, prop_label, prop_unit, prop, prop_test, task_sizes_train, task_sizes_test, leave_out_inds = generate_phi_0_from_csv(
inputs = read_csv(
df_feat,
"Class",
cols=cols,
task_key=None,
leave_out_frac=0.0,
leave_out_inds=None,
max_rung=max_phi
)
feat_space = generate_fs(
phi_0,
prop,
task_sizes_train,
ops,
[],
"classification",
max_phi,
n_sis_select
)
prop_key="Class",
cols=selected_features,
max_rung=max_rung,
leave_out_frac=0.0
)
sisso = SISSOClassifier(
feat_space,
prop_label,
prop_unit,
prop,
prop_test,
task_sizes_train,
task_sizes_test,
leave_out_inds,
max_dim,
10,
10
)
return feat_space, sisso
inputs.max_rung = max_rung
inputs.allowed_ops = selected_ops
inputs.n_sis_select = n_sis_select
inputs.n_dim = n_dim
inputs.n_residual = n_residual
inputs.n_model_store = 1
inputs.calc_type = "classification"
inputs.sample_ids_train = df_feat.index.tolist()
inputs.prop_train = df_feat["Class"].to_numpy()
inputs.prop_test = np.array([])
inputs.prop_label = "Class"
inputs.task_names = ["all_mats"]
feat_space = FeatureSpace(inputs)
sisso = SISSOClassifier(inputs, feat_space)
return feat_space, sisso
```
%% Cell type:code id: tags:
``` python
......@@ -367,22 +344,23 @@
tier = rung_selection.value
default = False
global feat_space
global sisso
try:
feat_space, sisso = get_featspace_sisso(
df = df_train,
ops = allowed_operations,
cols = selected_features,
max_phi = tier,
n_sis_select = feat_per_iter_selection.value,
remove_double_divison=True,
max_dim = dimension_selection.value,
n_residual = 1,
default = default)
feat_space, sisso = get_feat_space_and_sisso_regressor(
selected_ops = allowed_operations,
selected_features = selected_features,
max_rung = tier,
n_sis_select = feat_per_iter_selection.value,
n_dim = dimension_selection.value,
n_residual = 10,
default = default
)
clear_output()
if (dimension_selection.value>1):
plot_button.disabled=False
else:
plot_button.disabled=True
......@@ -461,11 +439,11 @@
feat_label_box = widgets.VBox([widgets.Label(value='Features:', layout=thin_layout)]+feat_labels)
for box in feat_list: box.disabled = True
rung_selection = widgets.Dropdown(options=['PRM2020', 1,2,3], value=2,layout=thin_layout)
rung_selection.value = 'PRM2020'
feat_per_iter_selection = widgets.BoundedIntText(value = 50, min=10, max=100, step=1, layout=thin_layout)
feat_per_iter_selection = widgets.BoundedIntText(value = 50, min=10, max=200, step=1, layout=thin_layout)
dimension_selection = widgets.BoundedIntText(value = 2, min=1, max=4, step=1, layout = thin_layout)
settings_box = widgets.VBox([
widgets.Label(value='Settings:', layout=wide_layout),
widgets.Label(value='SISSO rung:', layout=wide_layout),
rung_selection,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment