Commit 24e27544 authored by Luigi Sbailo's avatar Luigi Sbailo
Browse files

Update notebook to latest SISSO version

parent b1f7ef94
...@@ -67,17 +67,10 @@ ...@@ -67,17 +67,10 @@
With the selection of "PRM2020" (or default selection) as SISSO rung, a special feature space is uploaded, which contains much fewer features than in the production calculation used in <a href="https://journals.aps.org/prmaterials/abstract/10.1103/PhysRevMaterials.2.083802" target="_blank">PRM 2020</a>. This allows to reobtain in the notebook the same result in a reasonsable time. Still, the provided feature space contains thousands of the top ranked features and SISSO finds the best nD model. With the selection of "PRM2020" (or default selection) as SISSO rung, a special feature space is uploaded, which contains much fewer features than in the production calculation used in <a href="https://journals.aps.org/prmaterials/abstract/10.1103/PhysRevMaterials.2.083802" target="_blank">PRM 2020</a>. This allows to reobtain in the notebook the same result in a reasonsable time. Still, the provided feature space contains thousands of the top ranked features and SISSO finds the best nD model.
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
%load_ext autoreload
%autoreload 2
```
%% Cell type:code id: tags:
``` python
%%HTML %%HTML
<script> <script>
code_show=true; code_show=true;
function code_toggle() { function code_toggle() {
if (code_show) if (code_show)
...@@ -101,11 +94,13 @@ ...@@ -101,11 +94,13 @@
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
from sissopp import get_max_number_feats, get_estimate_n_feat_next_rung, generate_fs, SISSOClassifier, generate_phi_0_from_csv, FeatureSpace from sissopp import Inputs, FeatureSpace, SISSOClassifier, FeatureNode, Unit
from sissopp.py_interface import read_csv
from sissopp.py_interface.import_dataframe import get_unit
from tetradymite_PRM2020.visualizer import Visualizer from tetradymite_PRM2020.visualizer import Visualizer
import numpy as np import numpy as np
import pandas as pd import pandas as pd
import os import os
``` ```
...@@ -132,11 +127,11 @@ ...@@ -132,11 +127,11 @@
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
# This piece of code is not run at initialization. # This piece of code is not run at initialization.
# It serves to create the molecular structures which are visualized. # It can create the molecular structures which are visualized.
path_structure = './data/tetradymite_PRM2020/structures/' path_structure = './data/tetradymite_PRM2020/structures/'
try: try:
os.mkdir(path_structure) os.mkdir(path_structure)
except OSError: except OSError:
...@@ -215,78 +210,60 @@ ...@@ -215,78 +210,60 @@
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
def get_featspace_sisso( def get_feat_space_and_sisso_regressor(
df, selected_ops=["add", "abs_diff", "div", "sq", "exp"],
ops= ['add', 'sub', 'abs_diff', 'mult', 'div', 'exp', 'neg_exp', 'inv', 'sq', 'cb', selected_features = 'all',
'sqrt', 'cbrt', 'log', 'abs'], max_rung=2,
cols="all",
max_phi=2,
n_sis_select=50, n_sis_select=50,
remove_double_divison=True, n_dim=2,
max_dim=3, n_residual=10,
n_residual=1,
default=True, default=True,
): ):
if default: if default:
phi_0, prop_label, prop_unit, prop, prop_test, task_sizes_train, task_sizes_test, leave_out_inds = generate_phi_0_from_csv(
selected_ops = ["add", "sub", "mult", "div", "abs_diff", "sq", "cb", "sqrt", "cbrt", "inv", "abs"]
selected_features = 'all'
inputs = read_csv(
df_train, df_train,
"Class", prop_key="Class",
cols='all', cols='all',
task_key=None, max_rung=max_rung,
leave_out_frac=0.0, leave_out_frac=0.0,
leave_out_inds=None, )
max_rung=1
)
feat_space = generate_fs(
phi_0,
prop,
task_sizes_train,
["add", "sub", "mult", "div", "abs_diff", "sq", "cb", "sqrt", "cbrt", "inv", "abs"],
[],
"classification",
0,
n_sis_select
)
else: else:
phi_0, prop_label, prop_unit, prop, prop_test, task_sizes_train, task_sizes_test, leave_out_inds = generate_phi_0_from_csv(
inputs = read_csv(
df_feat, df_feat,
"Class", prop_key="Class",
cols=cols, cols=selected_features,
task_key=None, max_rung=max_rung,
leave_out_frac=0.0, leave_out_frac=0.0
leave_out_inds=None, )
max_rung=max_phi
)
feat_space = generate_fs(
phi_0,
prop,
task_sizes_train,
ops,
[],
"classification",
max_phi,
n_sis_select
)
sisso = SISSOClassifier( inputs.max_rung = max_rung
feat_space, inputs.allowed_ops = selected_ops
prop_label, inputs.n_sis_select = n_sis_select
prop_unit, inputs.n_dim = n_dim
prop, inputs.n_residual = n_residual
prop_test, inputs.n_model_store = 1
task_sizes_train, inputs.calc_type = "classification"
task_sizes_test, inputs.sample_ids_train = df_feat.index.tolist()
leave_out_inds, inputs.prop_train = df_feat["Class"].to_numpy()
max_dim, inputs.prop_test = np.array([])
10, inputs.prop_label = "Class"
10 inputs.task_names = ["all_mats"]
)
return feat_space, sisso
feat_space = FeatureSpace(inputs)
sisso = SISSOClassifier(inputs, feat_space)
return feat_space, sisso
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
...@@ -367,22 +344,23 @@ ...@@ -367,22 +344,23 @@
tier = rung_selection.value tier = rung_selection.value
default = False default = False
global feat_space global feat_space
global sisso global sisso
try: try:
feat_space, sisso = get_featspace_sisso( feat_space, sisso = get_feat_space_and_sisso_regressor(
df = df_train, selected_ops = allowed_operations,
ops = allowed_operations, selected_features = selected_features,
cols = selected_features, max_rung = tier,
max_phi = tier, n_sis_select = feat_per_iter_selection.value,
n_sis_select = feat_per_iter_selection.value, n_dim = dimension_selection.value,
remove_double_divison=True, n_residual = 10,
max_dim = dimension_selection.value, default = default
n_residual = 1, )
default = default)
clear_output() clear_output()
if (dimension_selection.value>1): if (dimension_selection.value>1):
plot_button.disabled=False plot_button.disabled=False
else: else:
plot_button.disabled=True plot_button.disabled=True
...@@ -461,11 +439,11 @@ ...@@ -461,11 +439,11 @@
feat_label_box = widgets.VBox([widgets.Label(value='Features:', layout=thin_layout)]+feat_labels) feat_label_box = widgets.VBox([widgets.Label(value='Features:', layout=thin_layout)]+feat_labels)
for box in feat_list: box.disabled = True for box in feat_list: box.disabled = True
rung_selection = widgets.Dropdown(options=['PRM2020', 1,2,3], value=2,layout=thin_layout) rung_selection = widgets.Dropdown(options=['PRM2020', 1,2,3], value=2,layout=thin_layout)
rung_selection.value = 'PRM2020' rung_selection.value = 'PRM2020'
feat_per_iter_selection = widgets.BoundedIntText(value = 50, min=10, max=100, step=1, layout=thin_layout) feat_per_iter_selection = widgets.BoundedIntText(value = 50, min=10, max=200, step=1, layout=thin_layout)
dimension_selection = widgets.BoundedIntText(value = 2, min=1, max=4, step=1, layout = thin_layout) dimension_selection = widgets.BoundedIntText(value = 2, min=1, max=4, step=1, layout = thin_layout)
settings_box = widgets.VBox([ settings_box = widgets.VBox([
widgets.Label(value='Settings:', layout=wide_layout), widgets.Label(value='Settings:', layout=wide_layout),
widgets.Label(value='SISSO rung:', layout=wide_layout), widgets.Label(value='SISSO rung:', layout=wide_layout),
rung_selection, rung_selection,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment