From 1946095ca41fa358ae98c83139d408c67810564d Mon Sep 17 00:00:00 2001 From: sbailo <sbailo@fhi-berlin.mpg.de> Date: Tue, 14 Sep 2021 16:55:10 +0200 Subject: [PATCH] Update sisso version --- compressed_sensing.ipynb | 217 +++++++++++++++++++-------------------- 1 file changed, 108 insertions(+), 109 deletions(-) diff --git a/compressed_sensing.ipynb b/compressed_sensing.ipynb index b2aa5d4..e17c779 100644 --- a/compressed_sensing.ipynb +++ b/compressed_sensing.ipynb @@ -83,8 +83,8 @@ "execution_count": null, "metadata": { "ExecuteTime": { - "end_time": "2021-06-22T09:42:42.268643Z", - "start_time": "2021-06-22T09:42:40.392686Z" + "end_time": "2021-09-14T14:36:47.545893Z", + "start_time": "2021-09-14T14:36:46.839700Z" } }, "outputs": [], @@ -113,7 +113,9 @@ "from compressed_sensing.combine_features import combine_features\n", "from compressed_sensing.scatter_plot import show_scatter_plot\n", "from compressed_sensing.visualizer import Visualizer\n", - "from sissopp import generate_fs, SISSORegressor, generate_phi_0_from_csv, FeatureSpace, get_max_number_feats\n", + "from sissopp import Inputs, FeatureSpace, SISSORegressor, FeatureNode, Unit\n", + "from sissopp.py_interface import read_csv\n", + "from sissopp.py_interface.import_dataframe import get_unit\n", "\n", "from atomicfeaturespackage.atomicproperties import atomic_properties_lda2015\n", "from nomad import client, config\n", @@ -150,8 +152,8 @@ "execution_count": null, "metadata": { "ExecuteTime": { - "end_time": "2021-06-22T09:42:45.581746Z", - "start_time": "2021-06-22T09:42:42.270632Z" + "end_time": "2021-09-14T14:36:57.175434Z", + "start_time": "2021-09-14T14:36:48.736437Z" } }, "outputs": [], @@ -263,8 +265,8 @@ "execution_count": null, "metadata": { "ExecuteTime": { - "end_time": "2021-06-22T09:42:45.607705Z", - "start_time": "2021-06-22T09:42:45.583545Z" + "end_time": "2021-09-14T14:36:57.195830Z", + "start_time": "2021-09-14T14:36:57.176614Z" }, "scrolled": true }, @@ -291,8 +293,8 @@ "execution_count": null, "metadata": { "ExecuteTime": { - "end_time": "2021-06-22T09:42:45.623120Z", - "start_time": "2021-06-22T09:42:45.609700Z" + "end_time": "2021-09-14T14:37:01.662950Z", + "start_time": "2021-09-14T14:37:01.644066Z" } }, "outputs": [], @@ -317,8 +319,8 @@ "execution_count": null, "metadata": { "ExecuteTime": { - "end_time": "2021-06-22T09:42:45.665800Z", - "start_time": "2021-06-22T09:42:45.624712Z" + "end_time": "2021-09-14T14:37:02.307822Z", + "start_time": "2021-09-14T14:37:02.250990Z" } }, "outputs": [], @@ -345,8 +347,8 @@ "execution_count": null, "metadata": { "ExecuteTime": { - "end_time": "2021-06-22T09:42:46.048536Z", - "start_time": "2021-06-22T09:42:45.669334Z" + "end_time": "2021-09-14T14:37:04.825131Z", + "start_time": "2021-09-14T14:37:04.378637Z" } }, "outputs": [], @@ -372,8 +374,8 @@ "execution_count": null, "metadata": { "ExecuteTime": { - "end_time": "2021-06-22T09:42:46.053843Z", - "start_time": "2021-06-22T09:42:46.050319Z" + "end_time": "2021-09-14T14:37:06.530006Z", + "start_time": "2021-09-14T14:37:06.524090Z" } }, "outputs": [], @@ -397,8 +399,8 @@ "execution_count": null, "metadata": { "ExecuteTime": { - "end_time": "2021-06-22T09:42:46.124712Z", - "start_time": "2021-06-22T09:42:46.056271Z" + "end_time": "2021-09-14T14:37:07.725528Z", + "start_time": "2021-09-14T14:37:07.686359Z" }, "scrolled": true }, @@ -440,8 +442,8 @@ "execution_count": null, "metadata": { "ExecuteTime": { - "end_time": "2021-06-22T09:42:46.130680Z", - "start_time": "2021-06-22T09:42:46.126443Z" + "end_time": "2021-09-14T14:37:09.223213Z", + "start_time": "2021-09-14T14:37:09.215988Z" } }, "outputs": [], @@ -476,8 +478,8 @@ "execution_count": null, "metadata": { "ExecuteTime": { - "end_time": "2021-06-22T09:42:46.151259Z", - "start_time": "2021-06-22T09:42:46.132520Z" + "end_time": "2021-09-14T14:37:10.879747Z", + "start_time": "2021-09-14T14:37:10.856231Z" }, "scrolled": true }, @@ -491,8 +493,8 @@ "execution_count": null, "metadata": { "ExecuteTime": { - "end_time": "2021-06-22T09:42:46.223162Z", - "start_time": "2021-06-22T09:42:46.153106Z" + "end_time": "2021-09-14T14:37:11.790379Z", + "start_time": "2021-09-14T14:37:11.692269Z" }, "scrolled": true }, @@ -525,8 +527,8 @@ "execution_count": null, "metadata": { "ExecuteTime": { - "end_time": "2021-06-22T09:42:58.926924Z", - "start_time": "2021-06-22T09:42:46.224978Z" + "end_time": "2021-09-14T14:37:32.251715Z", + "start_time": "2021-09-14T14:37:13.373601Z" }, "scrolled": false }, @@ -561,8 +563,8 @@ "execution_count": null, "metadata": { "ExecuteTime": { - "end_time": "2021-06-22T09:42:59.154799Z", - "start_time": "2021-06-22T09:42:58.929342Z" + "end_time": "2021-09-14T14:37:32.474272Z", + "start_time": "2021-09-14T14:37:32.252863Z" } }, "outputs": [], @@ -615,8 +617,8 @@ "execution_count": null, "metadata": { "ExecuteTime": { - "end_time": "2021-06-22T09:42:59.160703Z", - "start_time": "2021-06-22T09:42:59.156238Z" + "end_time": "2021-09-14T14:37:32.480312Z", + "start_time": "2021-09-14T14:37:32.475676Z" } }, "outputs": [], @@ -656,8 +658,8 @@ "execution_count": null, "metadata": { "ExecuteTime": { - "end_time": "2021-06-22T09:42:59.520176Z", - "start_time": "2021-06-22T09:42:59.162140Z" + "end_time": "2021-09-14T14:37:32.848135Z", + "start_time": "2021-09-14T14:37:32.481568Z" }, "scrolled": true }, @@ -676,8 +678,8 @@ "execution_count": null, "metadata": { "ExecuteTime": { - "end_time": "2021-06-22T09:42:59.780893Z", - "start_time": "2021-06-22T09:42:59.521568Z" + "end_time": "2021-09-14T14:37:33.126968Z", + "start_time": "2021-09-14T14:37:32.849214Z" } }, "outputs": [], @@ -720,8 +722,8 @@ "execution_count": null, "metadata": { "ExecuteTime": { - "end_time": "2021-06-22T09:43:00.194507Z", - "start_time": "2021-06-22T09:42:59.782215Z" + "end_time": "2021-09-14T14:37:33.572817Z", + "start_time": "2021-09-14T14:37:33.128358Z" } }, "outputs": [], @@ -746,8 +748,8 @@ "execution_count": null, "metadata": { "ExecuteTime": { - "end_time": "2021-06-22T09:43:00.341979Z", - "start_time": "2021-06-22T09:43:00.196635Z" + "end_time": "2021-09-14T14:37:36.491231Z", + "start_time": "2021-09-14T14:37:36.283852Z" }, "scrolled": true }, @@ -778,8 +780,8 @@ "execution_count": null, "metadata": { "ExecuteTime": { - "end_time": "2021-06-22T09:43:00.357950Z", - "start_time": "2021-06-22T09:43:00.343441Z" + "end_time": "2021-09-14T14:37:43.419332Z", + "start_time": "2021-09-14T14:37:43.402425Z" } }, "outputs": [], @@ -814,8 +816,8 @@ "execution_count": null, "metadata": { "ExecuteTime": { - "end_time": "2021-06-22T09:43:01.357593Z", - "start_time": "2021-06-22T09:43:00.359519Z" + "end_time": "2021-09-14T14:42:31.392780Z", + "start_time": "2021-09-14T14:42:31.021449Z" }, "scrolled": false }, @@ -823,38 +825,37 @@ "source": [ "n_nonzero_coefs=3\n", "n_features_per_sis_iter=50\n", - "phi_0, prop_label, prop_unit, prop, prop_test, task_sizes_train, task_sizes_test, leave_out_inds = generate_phi_0_from_csv(\n", + "selected_features = ['r_s_A', 'r_p_A', 'r_d_A', 'EA_A', 'IP_A', 'r_s_B', 'r_p_B', 'r_d_B', 'EA_B', 'IP_B']\n", + "selected_ops = ['add', 'abs_diff', 'exp', 'sq', 'div']\n", + "\n", + "inputs = read_csv(\n", " df_plus_reduced, \n", - " \"energy_diff\", \n", - " cols=['r_s_A', 'r_p_A', 'r_d_A', 'EA_A', 'IP_A', 'r_s_B', 'r_p_B', 'r_d_B', 'EA_B', 'IP_B'], \n", - " task_key=None, \n", - " leave_out_frac=0.0, \n", - " leave_out_inds=None,\n", - " max_rung=2\n", - ")\n", - "feat_space = generate_fs(\n", - " phi_0, \n", - " prop, \n", - " task_sizes_train, \n", - " ['add', 'abs_diff', 'exp', 'sq', 'div'],\n", - " [],\n", - " 'regression', \n", - " 2, \n", - " n_features_per_sis_iter\n", - ")\n", - "sisso = SISSORegressor(\n", - " feat_space,\n", - " prop_label,\n", - " prop_unit,\n", - " prop,\n", - " prop_test,\n", - " task_sizes_train,\n", - " task_sizes_test,\n", - " leave_out_inds,\n", - " n_nonzero_coefs,\n", - " 1,\n", - " 1\n", - ")\n", + " prop_key=\"energy_diff\",\n", + " cols=selected_features,\n", + " max_rung=2,\n", + " leave_out_frac=0.0\n", + " )\n", + "inputs.allowed_ops = selected_ops\n", + "inputs.n_sis_select = n_features_per_sis_iter\n", + "inputs.n_dim = 3\n", + "inputs.max_rung = 2\n", + "inputs.n_residual = 1\n", + "inputs.n_model_store = 1\n", + "inputs.calc_type = \"regression\"\n", + "inputs.leave_out_inds = []\n", + "inputs.task_sizes_train = [82]\n", + "inputs.task_sizes_test = [0]\n", + "inputs.sample_ids_train = df_plus_reduced.index.tolist()\n", + "inputs.prop_train = df_plus_reduced[\"energy_diff\"].to_numpy()\n", + "inputs.prop_test = np.array([])\n", + "inputs.prop_label = \"energy_diff\"\n", + "inputs.prop_unit = Unit(\"eV\")\n", + "inputs.task_names = [\"all_mats\"]\n", + "\n", + "feat_space = FeatureSpace(inputs)\n", + "\n", + "sisso = SISSORegressor(inputs, feat_space)\n", + "\n", "sisso.fit()\n", "for i in range(n_nonzero_coefs):\n", " print(str(i+1)+'D model')\n", @@ -877,8 +878,8 @@ "execution_count": null, "metadata": { "ExecuteTime": { - "end_time": "2021-06-22T09:43:01.645402Z", - "start_time": "2021-06-22T09:43:01.362033Z" + "end_time": "2021-09-14T14:43:18.097287Z", + "start_time": "2021-09-14T14:43:17.730259Z" }, "scrolled": false }, @@ -886,39 +887,37 @@ "source": [ "n_nonzero_coefs=2\n", "n_features_per_sis_iter=50\n", - "phi_0, prop_label, prop_unit, prop, prop_test, task_sizes_train, task_sizes_test, leave_out_inds = generate_phi_0_from_csv(\n", + "selected_features = ['r_s_A', 'r_p_A', 'r_d_A', 'EA_A', 'IP_A', 'r_s_B', 'r_p_B', 'r_d_B', 'EA_B', 'IP_B']\n", + "selected_ops = ['add', 'abs_diff', 'exp', 'sq', 'div']\n", + "\n", + "inputs = read_csv(\n", " df_plus_reduced, \n", - " \"energy_diff\", \n", - " cols=['r_s_A', 'r_p_A', 'r_d_A', 'EA_A', 'IP_A', 'r_s_B', 'r_p_B', 'r_d_B', 'EA_B', 'IP_B'], \n", - " task_key=None, \n", - " leave_out_frac=0.0, \n", - " leave_out_inds=None,\n", - " max_rung=2\n", - "\n", - ")\n", - "feat_space = generate_fs(\n", - " phi_0, \n", - " prop, \n", - " task_sizes_train, \n", - " ['add','abs_diff','exp', 'sq', 'div'],\n", - " [],\n", - " calc_type='regression',\n", - " max_phi=2, \n", - " n_sis_select=n_features_per_sis_iter\n", - ")\n", - "sisso = SISSORegressor(\n", - " feat_space,\n", - " prop_label,\n", - " prop_unit,\n", - " prop,\n", - " prop_test,\n", - " task_sizes_train,\n", - " task_sizes_test,\n", - " leave_out_inds,\n", - " n_nonzero_coefs,\n", - " 1,\n", - " 1\n", - ")\n", + " prop_key=\"energy_diff\",\n", + " cols=selected_features,\n", + " max_rung=2,\n", + " leave_out_frac=0.0\n", + " )\n", + "inputs.allowed_ops = selected_ops\n", + "inputs.n_sis_select = n_features_per_sis_iter\n", + "inputs.n_dim = 3\n", + "inputs.max_rung = 2\n", + "inputs.n_residual = 1\n", + "inputs.n_model_store = 1\n", + "inputs.calc_type = \"regression\"\n", + "inputs.leave_out_inds = []\n", + "inputs.task_sizes_train = [82]\n", + "inputs.task_sizes_test = [0]\n", + "inputs.sample_ids_train = df_plus_reduced.index.tolist()\n", + "inputs.prop_train = df_plus_reduced[\"energy_diff\"].to_numpy()\n", + "inputs.prop_test = np.array([])\n", + "inputs.prop_label = \"energy_diff\"\n", + "inputs.prop_unit = Unit(\"eV\")\n", + "inputs.task_names = [\"all_mats\"]\n", + "\n", + "feat_space = FeatureSpace(inputs)\n", + "\n", + "sisso = SISSORegressor(inputs, feat_space)\n", + "\n", "sisso.fit()\n", "for i in range(n_nonzero_coefs):\n", " print(str(i+1)+'D model')\n", @@ -943,8 +942,8 @@ "execution_count": null, "metadata": { "ExecuteTime": { - "end_time": "2021-06-22T09:43:02.261205Z", - "start_time": "2021-06-22T09:43:01.648568Z" + "end_time": "2021-09-14T14:53:34.164803Z", + "start_time": "2021-09-14T14:53:33.773134Z" }, "scrolled": false }, @@ -1216,7 +1215,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.10" + "version": "3.7.3" } }, "nbformat": 4, -- GitLab