From 1946095ca41fa358ae98c83139d408c67810564d Mon Sep 17 00:00:00 2001
From: sbailo <sbailo@fhi-berlin.mpg.de>
Date: Tue, 14 Sep 2021 16:55:10 +0200
Subject: [PATCH] Update sisso version

---
 compressed_sensing.ipynb | 217 +++++++++++++++++++--------------------
 1 file changed, 108 insertions(+), 109 deletions(-)

diff --git a/compressed_sensing.ipynb b/compressed_sensing.ipynb
index b2aa5d4..e17c779 100644
--- a/compressed_sensing.ipynb
+++ b/compressed_sensing.ipynb
@@ -83,8 +83,8 @@
    "execution_count": null,
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2021-06-22T09:42:42.268643Z",
-     "start_time": "2021-06-22T09:42:40.392686Z"
+     "end_time": "2021-09-14T14:36:47.545893Z",
+     "start_time": "2021-09-14T14:36:46.839700Z"
     }
    },
    "outputs": [],
@@ -113,7 +113,9 @@
     "from compressed_sensing.combine_features import combine_features\n",
     "from compressed_sensing.scatter_plot import  show_scatter_plot\n",
     "from compressed_sensing.visualizer import Visualizer\n",
-    "from sissopp import generate_fs, SISSORegressor, generate_phi_0_from_csv, FeatureSpace, get_max_number_feats\n",
+    "from sissopp import Inputs, FeatureSpace, SISSORegressor, FeatureNode, Unit\n",
+    "from sissopp.py_interface import read_csv\n",
+    "from sissopp.py_interface.import_dataframe import get_unit\n",
     "\n",
     "from atomicfeaturespackage.atomicproperties import atomic_properties_lda2015\n",
     "from nomad import client, config\n",
@@ -150,8 +152,8 @@
    "execution_count": null,
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2021-06-22T09:42:45.581746Z",
-     "start_time": "2021-06-22T09:42:42.270632Z"
+     "end_time": "2021-09-14T14:36:57.175434Z",
+     "start_time": "2021-09-14T14:36:48.736437Z"
     }
    },
    "outputs": [],
@@ -263,8 +265,8 @@
    "execution_count": null,
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2021-06-22T09:42:45.607705Z",
-     "start_time": "2021-06-22T09:42:45.583545Z"
+     "end_time": "2021-09-14T14:36:57.195830Z",
+     "start_time": "2021-09-14T14:36:57.176614Z"
     },
     "scrolled": true
    },
@@ -291,8 +293,8 @@
    "execution_count": null,
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2021-06-22T09:42:45.623120Z",
-     "start_time": "2021-06-22T09:42:45.609700Z"
+     "end_time": "2021-09-14T14:37:01.662950Z",
+     "start_time": "2021-09-14T14:37:01.644066Z"
     }
    },
    "outputs": [],
@@ -317,8 +319,8 @@
    "execution_count": null,
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2021-06-22T09:42:45.665800Z",
-     "start_time": "2021-06-22T09:42:45.624712Z"
+     "end_time": "2021-09-14T14:37:02.307822Z",
+     "start_time": "2021-09-14T14:37:02.250990Z"
     }
    },
    "outputs": [],
@@ -345,8 +347,8 @@
    "execution_count": null,
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2021-06-22T09:42:46.048536Z",
-     "start_time": "2021-06-22T09:42:45.669334Z"
+     "end_time": "2021-09-14T14:37:04.825131Z",
+     "start_time": "2021-09-14T14:37:04.378637Z"
     }
    },
    "outputs": [],
@@ -372,8 +374,8 @@
    "execution_count": null,
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2021-06-22T09:42:46.053843Z",
-     "start_time": "2021-06-22T09:42:46.050319Z"
+     "end_time": "2021-09-14T14:37:06.530006Z",
+     "start_time": "2021-09-14T14:37:06.524090Z"
     }
    },
    "outputs": [],
@@ -397,8 +399,8 @@
    "execution_count": null,
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2021-06-22T09:42:46.124712Z",
-     "start_time": "2021-06-22T09:42:46.056271Z"
+     "end_time": "2021-09-14T14:37:07.725528Z",
+     "start_time": "2021-09-14T14:37:07.686359Z"
     },
     "scrolled": true
    },
@@ -440,8 +442,8 @@
    "execution_count": null,
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2021-06-22T09:42:46.130680Z",
-     "start_time": "2021-06-22T09:42:46.126443Z"
+     "end_time": "2021-09-14T14:37:09.223213Z",
+     "start_time": "2021-09-14T14:37:09.215988Z"
     }
    },
    "outputs": [],
@@ -476,8 +478,8 @@
    "execution_count": null,
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2021-06-22T09:42:46.151259Z",
-     "start_time": "2021-06-22T09:42:46.132520Z"
+     "end_time": "2021-09-14T14:37:10.879747Z",
+     "start_time": "2021-09-14T14:37:10.856231Z"
     },
     "scrolled": true
    },
@@ -491,8 +493,8 @@
    "execution_count": null,
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2021-06-22T09:42:46.223162Z",
-     "start_time": "2021-06-22T09:42:46.153106Z"
+     "end_time": "2021-09-14T14:37:11.790379Z",
+     "start_time": "2021-09-14T14:37:11.692269Z"
     },
     "scrolled": true
    },
@@ -525,8 +527,8 @@
    "execution_count": null,
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2021-06-22T09:42:58.926924Z",
-     "start_time": "2021-06-22T09:42:46.224978Z"
+     "end_time": "2021-09-14T14:37:32.251715Z",
+     "start_time": "2021-09-14T14:37:13.373601Z"
     },
     "scrolled": false
    },
@@ -561,8 +563,8 @@
    "execution_count": null,
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2021-06-22T09:42:59.154799Z",
-     "start_time": "2021-06-22T09:42:58.929342Z"
+     "end_time": "2021-09-14T14:37:32.474272Z",
+     "start_time": "2021-09-14T14:37:32.252863Z"
     }
    },
    "outputs": [],
@@ -615,8 +617,8 @@
    "execution_count": null,
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2021-06-22T09:42:59.160703Z",
-     "start_time": "2021-06-22T09:42:59.156238Z"
+     "end_time": "2021-09-14T14:37:32.480312Z",
+     "start_time": "2021-09-14T14:37:32.475676Z"
     }
    },
    "outputs": [],
@@ -656,8 +658,8 @@
    "execution_count": null,
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2021-06-22T09:42:59.520176Z",
-     "start_time": "2021-06-22T09:42:59.162140Z"
+     "end_time": "2021-09-14T14:37:32.848135Z",
+     "start_time": "2021-09-14T14:37:32.481568Z"
     },
     "scrolled": true
    },
@@ -676,8 +678,8 @@
    "execution_count": null,
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2021-06-22T09:42:59.780893Z",
-     "start_time": "2021-06-22T09:42:59.521568Z"
+     "end_time": "2021-09-14T14:37:33.126968Z",
+     "start_time": "2021-09-14T14:37:32.849214Z"
     }
    },
    "outputs": [],
@@ -720,8 +722,8 @@
    "execution_count": null,
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2021-06-22T09:43:00.194507Z",
-     "start_time": "2021-06-22T09:42:59.782215Z"
+     "end_time": "2021-09-14T14:37:33.572817Z",
+     "start_time": "2021-09-14T14:37:33.128358Z"
     }
    },
    "outputs": [],
@@ -746,8 +748,8 @@
    "execution_count": null,
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2021-06-22T09:43:00.341979Z",
-     "start_time": "2021-06-22T09:43:00.196635Z"
+     "end_time": "2021-09-14T14:37:36.491231Z",
+     "start_time": "2021-09-14T14:37:36.283852Z"
     },
     "scrolled": true
    },
@@ -778,8 +780,8 @@
    "execution_count": null,
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2021-06-22T09:43:00.357950Z",
-     "start_time": "2021-06-22T09:43:00.343441Z"
+     "end_time": "2021-09-14T14:37:43.419332Z",
+     "start_time": "2021-09-14T14:37:43.402425Z"
     }
    },
    "outputs": [],
@@ -814,8 +816,8 @@
    "execution_count": null,
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2021-06-22T09:43:01.357593Z",
-     "start_time": "2021-06-22T09:43:00.359519Z"
+     "end_time": "2021-09-14T14:42:31.392780Z",
+     "start_time": "2021-09-14T14:42:31.021449Z"
     },
     "scrolled": false
    },
@@ -823,38 +825,37 @@
    "source": [
     "n_nonzero_coefs=3\n",
     "n_features_per_sis_iter=50\n",
-    "phi_0, prop_label, prop_unit, prop, prop_test, task_sizes_train, task_sizes_test, leave_out_inds = generate_phi_0_from_csv(\n",
+    "selected_features = ['r_s_A', 'r_p_A', 'r_d_A', 'EA_A', 'IP_A', 'r_s_B', 'r_p_B', 'r_d_B', 'EA_B', 'IP_B']\n",
+    "selected_ops = ['add', 'abs_diff', 'exp', 'sq', 'div']\n",
+    "\n",
+    "inputs = read_csv(\n",
     "    df_plus_reduced, \n",
-    "    \"energy_diff\", \n",
-    "    cols=['r_s_A', 'r_p_A', 'r_d_A', 'EA_A', 'IP_A', 'r_s_B', 'r_p_B', 'r_d_B', 'EA_B', 'IP_B'], \n",
-    "    task_key=None, \n",
-    "    leave_out_frac=0.0, \n",
-    "    leave_out_inds=None,\n",
-    "    max_rung=2\n",
-    ")\n",
-    "feat_space = generate_fs(\n",
-    "    phi_0, \n",
-    "    prop, \n",
-    "    task_sizes_train, \n",
-    "    ['add', 'abs_diff', 'exp', 'sq', 'div'],\n",
-    "    [],\n",
-    "    'regression',  \n",
-    "    2, \n",
-    "    n_features_per_sis_iter\n",
-    ")\n",
-    "sisso = SISSORegressor(\n",
-    "    feat_space,\n",
-    "    prop_label,\n",
-    "    prop_unit,\n",
-    "    prop,\n",
-    "    prop_test,\n",
-    "    task_sizes_train,\n",
-    "    task_sizes_test,\n",
-    "    leave_out_inds,\n",
-    "    n_nonzero_coefs,\n",
-    "    1,\n",
-    "    1\n",
-    ")\n",
+    "    prop_key=\"energy_diff\",\n",
+    "    cols=selected_features,\n",
+    "    max_rung=2,\n",
+    "    leave_out_frac=0.0\n",
+    "    )\n",
+    "inputs.allowed_ops = selected_ops\n",
+    "inputs.n_sis_select = n_features_per_sis_iter\n",
+    "inputs.n_dim = 3\n",
+    "inputs.max_rung = 2\n",
+    "inputs.n_residual = 1\n",
+    "inputs.n_model_store = 1\n",
+    "inputs.calc_type = \"regression\"\n",
+    "inputs.leave_out_inds = []\n",
+    "inputs.task_sizes_train = [82]\n",
+    "inputs.task_sizes_test = [0]\n",
+    "inputs.sample_ids_train = df_plus_reduced.index.tolist()\n",
+    "inputs.prop_train = df_plus_reduced[\"energy_diff\"].to_numpy()\n",
+    "inputs.prop_test = np.array([])\n",
+    "inputs.prop_label = \"energy_diff\"\n",
+    "inputs.prop_unit = Unit(\"eV\")\n",
+    "inputs.task_names = [\"all_mats\"]\n",
+    "\n",
+    "feat_space = FeatureSpace(inputs)\n",
+    "\n",
+    "sisso = SISSORegressor(inputs, feat_space)\n",
+    "\n",
     "sisso.fit()\n",
     "for i in range(n_nonzero_coefs):\n",
     "    print(str(i+1)+'D model')\n",
@@ -877,8 +878,8 @@
    "execution_count": null,
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2021-06-22T09:43:01.645402Z",
-     "start_time": "2021-06-22T09:43:01.362033Z"
+     "end_time": "2021-09-14T14:43:18.097287Z",
+     "start_time": "2021-09-14T14:43:17.730259Z"
     },
     "scrolled": false
    },
@@ -886,39 +887,37 @@
    "source": [
     "n_nonzero_coefs=2\n",
     "n_features_per_sis_iter=50\n",
-    "phi_0, prop_label, prop_unit, prop, prop_test, task_sizes_train, task_sizes_test, leave_out_inds = generate_phi_0_from_csv(\n",
+    "selected_features = ['r_s_A', 'r_p_A', 'r_d_A', 'EA_A', 'IP_A', 'r_s_B', 'r_p_B', 'r_d_B', 'EA_B', 'IP_B']\n",
+    "selected_ops = ['add', 'abs_diff', 'exp', 'sq', 'div']\n",
+    "\n",
+    "inputs = read_csv(\n",
     "    df_plus_reduced, \n",
-    "    \"energy_diff\", \n",
-    "    cols=['r_s_A', 'r_p_A', 'r_d_A', 'EA_A', 'IP_A', 'r_s_B', 'r_p_B', 'r_d_B', 'EA_B', 'IP_B'], \n",
-    "    task_key=None, \n",
-    "    leave_out_frac=0.0, \n",
-    "    leave_out_inds=None,\n",
-    "    max_rung=2\n",
-    "\n",
-    ")\n",
-    "feat_space = generate_fs(\n",
-    "    phi_0, \n",
-    "    prop, \n",
-    "    task_sizes_train, \n",
-    "    ['add','abs_diff','exp', 'sq', 'div'],\n",
-    "    [],\n",
-    "    calc_type='regression',\n",
-    "    max_phi=2, \n",
-    "    n_sis_select=n_features_per_sis_iter\n",
-    ")\n",
-    "sisso = SISSORegressor(\n",
-    "    feat_space,\n",
-    "    prop_label,\n",
-    "    prop_unit,\n",
-    "    prop,\n",
-    "    prop_test,\n",
-    "    task_sizes_train,\n",
-    "    task_sizes_test,\n",
-    "    leave_out_inds,\n",
-    "    n_nonzero_coefs,\n",
-    "    1,\n",
-    "    1\n",
-    ")\n",
+    "    prop_key=\"energy_diff\",\n",
+    "    cols=selected_features,\n",
+    "    max_rung=2,\n",
+    "    leave_out_frac=0.0\n",
+    "    )\n",
+    "inputs.allowed_ops = selected_ops\n",
+    "inputs.n_sis_select = n_features_per_sis_iter\n",
+    "inputs.n_dim = 3\n",
+    "inputs.max_rung = 2\n",
+    "inputs.n_residual = 1\n",
+    "inputs.n_model_store = 1\n",
+    "inputs.calc_type = \"regression\"\n",
+    "inputs.leave_out_inds = []\n",
+    "inputs.task_sizes_train = [82]\n",
+    "inputs.task_sizes_test = [0]\n",
+    "inputs.sample_ids_train = df_plus_reduced.index.tolist()\n",
+    "inputs.prop_train = df_plus_reduced[\"energy_diff\"].to_numpy()\n",
+    "inputs.prop_test = np.array([])\n",
+    "inputs.prop_label = \"energy_diff\"\n",
+    "inputs.prop_unit = Unit(\"eV\")\n",
+    "inputs.task_names = [\"all_mats\"]\n",
+    "\n",
+    "feat_space = FeatureSpace(inputs)\n",
+    "\n",
+    "sisso = SISSORegressor(inputs, feat_space)\n",
+    "\n",
     "sisso.fit()\n",
     "for i in range(n_nonzero_coefs):\n",
     "    print(str(i+1)+'D model')\n",
@@ -943,8 +942,8 @@
    "execution_count": null,
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2021-06-22T09:43:02.261205Z",
-     "start_time": "2021-06-22T09:43:01.648568Z"
+     "end_time": "2021-09-14T14:53:34.164803Z",
+     "start_time": "2021-09-14T14:53:33.773134Z"
     },
     "scrolled": false
    },
@@ -1216,7 +1215,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.7.10"
+   "version": "3.7.3"
   }
  },
  "nbformat": 4,
-- 
GitLab