diff --git a/compressed_sensing.ipynb b/compressed_sensing.ipynb index 84242fbe4dab85253e643a76b1d646497d34b0f6..c5fc700e9cdf97ce1af5cb491c17898a2952b3c9 100644 --- a/compressed_sensing.ipynb +++ b/compressed_sensing.ipynb @@ -83,8 +83,8 @@ "execution_count": null, "metadata": { "ExecuteTime": { - "end_time": "2020-12-08T09:03:39.953385Z", - "start_time": "2020-12-08T09:03:39.026752Z" + "end_time": "2020-12-09T21:31:49.230962Z", + "start_time": "2020-12-09T21:31:47.034743Z" } }, "outputs": [], @@ -145,8 +145,8 @@ "execution_count": null, "metadata": { "ExecuteTime": { - "end_time": "2020-12-08T09:03:42.922862Z", - "start_time": "2020-12-08T09:03:42.801656Z" + "end_time": "2020-12-09T21:31:49.389466Z", + "start_time": "2020-12-09T21:31:49.232579Z" }, "scrolled": true }, @@ -211,8 +211,8 @@ "execution_count": null, "metadata": { "ExecuteTime": { - "end_time": "2020-12-08T09:03:43.573065Z", - "start_time": "2020-12-08T09:03:43.170821Z" + "end_time": "2020-12-09T21:31:49.786278Z", + "start_time": "2020-12-09T21:31:49.391244Z" } }, "outputs": [], @@ -238,8 +238,8 @@ "execution_count": null, "metadata": { "ExecuteTime": { - "end_time": "2020-12-08T09:03:43.577912Z", - "start_time": "2020-12-08T09:03:43.574728Z" + "end_time": "2020-12-09T21:31:49.791034Z", + "start_time": "2020-12-09T21:31:49.788111Z" } }, "outputs": [], @@ -263,8 +263,8 @@ "execution_count": null, "metadata": { "ExecuteTime": { - "end_time": "2020-12-08T09:03:43.720363Z", - "start_time": "2020-12-08T09:03:43.673644Z" + "end_time": "2020-12-09T21:31:49.842588Z", + "start_time": "2020-12-09T21:31:49.792447Z" }, "scrolled": true }, @@ -306,8 +306,8 @@ "execution_count": null, "metadata": { "ExecuteTime": { - "end_time": "2020-12-08T09:03:44.079388Z", - "start_time": "2020-12-08T09:03:44.069671Z" + "end_time": "2020-12-09T21:31:49.848867Z", + "start_time": "2020-12-09T21:31:49.844112Z" } }, "outputs": [], @@ -342,8 +342,8 @@ "execution_count": null, "metadata": { "ExecuteTime": { - "end_time": "2020-12-08T09:03:45.786743Z", - "start_time": "2020-12-08T09:03:45.759462Z" + "end_time": "2020-12-09T21:31:49.875013Z", + "start_time": "2020-12-09T21:31:49.850538Z" }, "scrolled": true }, @@ -357,8 +357,8 @@ "execution_count": null, "metadata": { "ExecuteTime": { - "end_time": "2020-12-08T09:03:46.659432Z", - "start_time": "2020-12-08T09:03:46.560523Z" + "end_time": "2020-12-09T21:31:49.985837Z", + "start_time": "2020-12-09T21:31:49.876873Z" }, "scrolled": true }, @@ -391,8 +391,8 @@ "execution_count": null, "metadata": { "ExecuteTime": { - "end_time": "2020-12-08T09:04:06.908636Z", - "start_time": "2020-12-08T09:03:47.956918Z" + "end_time": "2020-12-09T21:32:09.553916Z", + "start_time": "2020-12-09T21:31:49.987916Z" }, "scrolled": false }, @@ -427,8 +427,8 @@ "execution_count": null, "metadata": { "ExecuteTime": { - "end_time": "2020-12-08T09:04:07.160724Z", - "start_time": "2020-12-08T09:04:06.910229Z" + "end_time": "2020-12-09T21:32:09.785212Z", + "start_time": "2020-12-09T21:32:09.555346Z" } }, "outputs": [], @@ -481,8 +481,8 @@ "execution_count": null, "metadata": { "ExecuteTime": { - "end_time": "2020-12-08T08:40:40.153539Z", - "start_time": "2020-12-08T08:40:40.149276Z" + "end_time": "2020-12-09T21:32:09.791576Z", + "start_time": "2020-12-09T21:32:09.787208Z" } }, "outputs": [], @@ -522,8 +522,8 @@ "execution_count": null, "metadata": { "ExecuteTime": { - "end_time": "2020-12-08T08:40:40.501898Z", - "start_time": "2020-12-08T08:40:40.155648Z" + "end_time": "2020-12-09T21:32:10.154805Z", + "start_time": "2020-12-09T21:32:09.793271Z" }, "scrolled": true }, @@ -542,8 +542,8 @@ "execution_count": null, "metadata": { "ExecuteTime": { - "end_time": "2020-12-08T08:40:40.773979Z", - "start_time": "2020-12-08T08:40:40.503329Z" + "end_time": "2020-12-09T21:32:10.443908Z", + "start_time": "2020-12-09T21:32:10.156133Z" } }, "outputs": [], @@ -581,6 +581,25 @@ "### The SISSO method" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2020-12-09T21:32:10.857853Z", + "start_time": "2020-12-09T21:32:10.445342Z" + } + }, + "outputs": [], + "source": [ + "#import Data\n", + "selected_feature_list = ['r_s', 'r_p', 'r_d', 'EA', 'IP']\n", + "allowed_operations = ['+','|-|','exp', '^2']\n", + "P, df_D = get_data(selected_feature_list, allowed_operations)\n", + "D = df_D.values\n", + "features_list = df_D.columns.tolist()" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -593,14 +612,45 @@ "execution_count": null, "metadata": { "ExecuteTime": { - "end_time": "2020-12-08T09:04:07.243017Z", - "start_time": "2020-12-08T09:04:07.162549Z" + "end_time": "2020-12-09T21:32:15.329849Z", + "start_time": "2020-12-09T21:32:15.101788Z" }, "scrolled": true }, "outputs": [], "source": [ - "# here we define a different dataframe to make it compatible with the SISSO regressor object\n", + " sisso = SissoRegressor(n_nonzero_coefs=3, n_features_per_sis_iter=10)\n", + "\n", + "sisso.fit(D, P)\n", + "sisso.print_models(features_list)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Run the SISSO method with a (relatively) big feature space\n", + "<div style=\"list-style:disc; margin: 2px;padding: 10px;border: 0px;border:8px double green; font-size:16px;padding-left: 32px;padding-right: 22px; width:89%\">\n", + "<li>Reproduce the results from the <a href=\"http://journals.aps.org/prl/abstract/10.1103/PhysRevLett.114.105503\" target=\"_blank\">reference publication</a> by including further features.</li>\n", + "<li>Visualize the 2D descriptors in a structure map.</li>\n", + "<li>Experiment with different settings and investigate the influence of the input parameters on the results. (OPTIONAL)</li>\n", + "</div>\n", + "Note the size of the feature space, the needed time to run the code and the accuracy (using the default settings)!" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2020-12-09T21:33:34.682503Z", + "start_time": "2020-12-09T21:33:34.590337Z" + }, + "scrolled": true + }, + "outputs": [], + "source": [ + "# here we define a different dataframe to make it compatible with the c++ implementation of SISSO\n", "# load data\n", "RS_structures = read(\"data/compressed_sensing/RS_structures.xyz\", index=':')\n", "ZB_structures = read(\"data/compressed_sensing/ZB_structures.xyz\", index=':')\n", @@ -654,71 +704,8 @@ "execution_count": null, "metadata": { "ExecuteTime": { - "end_time": "2020-12-08T09:04:07.346777Z", - "start_time": "2020-12-08T09:04:07.244250Z" - }, - "scrolled": true - }, - "outputs": [], - "source": [ - "n_nonzero_coefs=3\n", - "n_features_per_sis_iter=10\n", - "phi_0, prop_unit, prop, prop_test, task_sizes_train, task_sizes_test, leave_out_inds = generate_phi_0_from_csv(\n", - " df_plus, \"energy_diff\", \n", - " cols=['r_s_A', 'r_p_A', 'r_d_A', 'EA_A', 'IP_A', 'r_s_B', 'r_p_B', 'r_d_B', 'EA_B', 'IP_B'], \n", - " task_key=None, leave_out_frac=0.0, leave_out_inds=None\n", - ")\n", - "feat_space = generate_fs(\n", - " phi_0, \n", - " prop, \n", - " task_sizes_train, \n", - " ['add','abs_diff','exp', 'sq'],\n", - " 'regression', \n", - " 2, \n", - " n_features_per_sis_iter\n", - ")\n", - "sisso = SISSORegressor(\n", - " feat_space,\n", - " prop_unit,\n", - " prop,\n", - " prop_test,\n", - " task_sizes_train,\n", - " task_sizes_test,\n", - " leave_out_inds,\n", - " n_nonzero_coefs,\n", - " 1,\n", - " 1\n", - ")\n", - "sisso.fit()\n", - "for i in range(n_nonzero_coefs):\n", - " print(str(i+1)+'D model')\n", - " print(\"RMSE: {:.4} | Descriptor: {}\".format(sisso.models[i][0].rmse, sisso.models[i][0]))\n", - " string = \"c0:{:.4}\".format(sisso.models[i][0].coefs[0][-1])\n", - " for j in range(i+1):\n", - " string = string + str(\" | a\"+str(j)+\":{:.4}\".format(sisso.models[i][0].coefs[0][j]))\n", - " print(string + '\\n')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Run the SISSO method with a (relatively) big feature space\n", - "<div style=\"list-style:disc; margin: 2px;padding: 10px;border: 0px;border:8px double green; font-size:16px;padding-left: 32px;padding-right: 22px; width:89%\">\n", - "<li>Reproduce the results from the <a href=\"http://journals.aps.org/prl/abstract/10.1103/PhysRevLett.114.105503\" target=\"_blank\">reference publication</a> by including further features.</li>\n", - "<li>Visualize the 2D descriptors in a structure map.</li>\n", - "<li>Experiment with different settings and investigate the influence of the input parameters on the results. (OPTIONAL)</li>\n", - "</div>\n", - "Note the size of the feature space, the needed time to run the code and the accuracy (using the default settings)!" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "ExecuteTime": { - "end_time": "2020-12-08T09:04:15.868023Z", - "start_time": "2020-12-08T09:04:10.727266Z" + "end_time": "2020-12-09T21:33:39.961597Z", + "start_time": "2020-12-09T21:33:34.758589Z" }, "scrolled": false }, @@ -774,8 +761,8 @@ "execution_count": null, "metadata": { "ExecuteTime": { - "end_time": "2020-12-08T09:04:16.257735Z", - "start_time": "2020-12-08T09:04:15.869867Z" + "end_time": "2020-12-09T21:33:42.060185Z", + "start_time": "2020-12-09T21:33:41.655421Z" }, "scrolled": false }, @@ -831,8 +818,8 @@ "execution_count": null, "metadata": { "ExecuteTime": { - "end_time": "2020-12-08T09:04:16.439244Z", - "start_time": "2020-12-08T09:04:16.259950Z" + "end_time": "2020-12-09T21:33:45.327451Z", + "start_time": "2020-12-09T21:33:45.136210Z" } }, "outputs": [], @@ -854,8 +841,8 @@ "execution_count": null, "metadata": { "ExecuteTime": { - "end_time": "2020-12-08T09:04:17.222753Z", - "start_time": "2020-12-08T09:04:16.440847Z" + "end_time": "2020-12-09T21:33:46.787348Z", + "start_time": "2020-12-09T21:33:45.846486Z" }, "scrolled": false },