diff --git a/assets/kaggle_competition/Logo_MPG.png b/assets/kaggle_competition/logo_MPG.png similarity index 100% rename from assets/kaggle_competition/Logo_MPG.png rename to assets/kaggle_competition/logo_MPG.png diff --git a/assets/kaggle_competition/Logo_NOMAD.png b/assets/kaggle_competition/logo_NOMAD.png similarity index 100% rename from assets/kaggle_competition/Logo_NOMAD.png rename to assets/kaggle_competition/logo_NOMAD.png diff --git a/kaggle_competition.ipynb b/kaggle_competition.ipynb index 6d989f30f5cad04c9505aebf3bb03c232add1da3..88c61cb20f80c2d7f468df814163fbdc5ce9a27f 100644 --- a/kaggle_competition.ipynb +++ b/kaggle_competition.ipynb @@ -2,11 +2,11 @@ "cells": [ { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "metadata": { "ExecuteTime": { - "end_time": "2021-01-19T10:23:31.012040Z", - "start_time": "2021-01-19T10:23:31.009452Z" + "end_time": "2021-11-11T17:10:52.300325Z", + "start_time": "2021-11-11T17:10:52.290470Z" }, "init_cell": true }, @@ -29,44 +29,15 @@ } }, "source": [ - "<div id=\"teaser\" style=' background-position: right center; background-size: 00px; background-repeat: no-repeat; \n", - " padding-top: 20px;\n", - " padding-right: 10px;\n", - " padding-bottom: 170px;\n", - " padding-left: 10px;\n", - " border-bottom: 14px double #333;\n", - " border-top: 14px double #333;' > \n", - "\n", - " \n", - " <div style=\"text-align:center\">\n", - " <b><font size=\"6.4\">2018 NOMAD-Kaggle research competition</font></b> \n", - " </div>\n", - " \n", - "<p>\n", - " created by:\n", - " Xiangyue Liu<sup>1</sup> (<a href=\"mailto:xyliu@fhi-berlin.mpg.de\">email</a>),\n", - " Christopher Sutton<sup>1</sup> (<a href=\"mailto:sutton@fhi-berlin.mpg.de\">email</a>),\n", - " Luca M. Ghiringhelli<sup>1</sup>(<a href=\"mailto:ghiringhelli@fhi-berlin.mpg.de\">email</a>),\n", - " Takenori Yamamoto<sup>2</sup>, \n", - " Lars Blumenthal<sup>3,4</sup>,\n", - " Jacek Golebiowski<sup>3,4</sup>, \n", - " Angelo Ziletti<sup>1</sup>, \n", - " and Matthias Scheffler<sup>1</sup>\n", - " \n", - "<sup>1</sup> Fritz Haber Institute of the Max Planck Society, Faradayweg 4-6, D-14195 Berlin, Germany <br>\n", - "<sup>2</sup> Research Institute for Mathematical and Computational Sciences (RIMCS), LLC, Yokohama, Japan <br>\n", - "<sup>4</sup> EPSRC Centre for Doctoral Training on Theory and Simulation of Materials Department of Physics, Imperial College London, London, U.K. <br>\n", - "<sup>5</sup> Thomas Young Centre for Theory and Simulation of Materials, Department of Materials, Imperial College London, London, U.K <br>\n", - " \n", - " \n", - " \n", - "<span class=\"nomad--last-updated\" data-version=\"v1.0.0\">[Last updated: January 19, 2021]</span>\n", - " \n", - "<div> \n", - "<img style=\"float: left;\" src=\"assets/kaggle_competition/Logo_MPG.png\" width=\"200\"> \n", - "<img style=\"float: right;\" src=\"assets/kaggle_competition/Logo_NOMAD.png\" width=\"250\">\n", - "</div>\n", - "</div>\n" + "<img src=\"assets/kaggle_competition/header.jpg\" width=\"900\"> " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "<img style=\"float: left;\" src=\"assets/kaggle_competition/logo_NOMAD.png\" width=300>\n", + "<img style=\"float: right;\" src=\"assets/kaggle_competition/logo_MPG.png\" width=170> " ] }, { @@ -106,16 +77,45 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "metadata": { "ExecuteTime": { - "end_time": "2021-01-19T10:23:31.032317Z", - "start_time": "2021-01-19T10:23:31.014029Z" + "end_time": "2021-11-11T17:10:52.313149Z", + "start_time": "2021-11-11T17:10:52.302093Z" }, "init_cell": true, "scrolled": false }, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "<script>\n", + " code_show=true; \n", + " function code_toggle() {\n", + " if (code_show)\n", + " {\n", + " $('div.input').hide();\n", + " } \n", + " else \n", + " {\n", + " $('div.input').show();\n", + " }\n", + " code_show = !code_show\n", + " } \n", + " $( document ).ready(code_toggle);\n", + "</script>\n", + "The Python code for this notebook is by default hidden for easier reading.\n", + "To toggle on/off the raw code, click <a href=\"javascript:code_toggle()\">here</a>.\n" + ], + "text/plain": [ + "<IPython.core.display.HTML object>" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ "%%HTML\n", "<script>\n", @@ -150,16 +150,232 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "metadata": { "ExecuteTime": { - "end_time": "2021-01-19T10:23:31.049437Z", - "start_time": "2021-01-19T10:23:31.034656Z" + "end_time": "2021-11-11T17:10:52.319760Z", + "start_time": "2021-11-11T17:10:52.314773Z" }, "init_cell": true, "scrolled": false }, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "<!-- CSS Style Inline: -->\n", + " <style type=\"text/css\">\n", + " #jmol_div227{\n", + " height: 350px;\n", + " width: 350px;\n", + " float: left;\n", + " }\n", + " #jmol_div12{\n", + " height: 350px;\n", + " width: 350px;\n", + " float: left;\n", + " }\n", + " #jmol_div206{\n", + " height: 350px;\n", + " width: 350px;\n", + " float: left;\n", + " }\n", + " #jmol_div33{\n", + " height: 350px;\n", + " width: 350px;\n", + " float: left;\n", + " }\n", + " #jmol_div194{\n", + " height: 350px;\n", + " width: 350px;\n", + " float: left;\n", + " }\n", + " #jmol_div167{\n", + " height: 350px;\n", + " width: 350px;\n", + " float: left;\n", + " }\n", + " </style>\n", + "\n", + " <!-- Load Jmol javascript library -->\n", + " <script type=\"text/javascript\" src=\"assets/kaggle_competition/jsmol/JSmol.min.js\"></script>\n", + "\n", + " <!-- calls to jQuery and Jmol (inline) -->\n", + " <script type=\"text/javascript\">\n", + " // Jmol readyFunction \n", + " jmol_isReady = function(applet) {\n", + " document.title = (applet._id + \" - Jmol \" + Jmol.___JmolVersion)\n", + " Jmol._getElement(applet, \"appletdiv\").style.border=\"0px solid blue\"\n", + " }\n", + " // initialize Jmol Applet\n", + " var myJmol227 = \"myJmol227\";\n", + " var Info227 = {\n", + " width: \"100%\",\n", + " height: \"100%\",\n", + " color: \"#ffffff\", \n", + " use: \"HTML5\",\n", + " j2sPath: \"assets/kaggle_competition/jsmol/j2s\",\n", + " jarPath: \"assets/kaggle_competition/jsmol/java\",\n", + " jarFile: \"JmolAppletSigned.jar\",\n", + " debug: false,\n", + " readyFunction: jmol_isReady,\n", + " script: 'load \"data/kaggle_competition/xyz/geometry3-spacegroup227.xyz\" ;',\n", + " allowJavaScript: false,\n", + " disableJ2SLoadMonitor: true,\n", + " }\n", + " var myJmol12 = \"myJmol12\";\n", + " var Info12 = {\n", + " width: \"100%\",\n", + " height: \"100%\",\n", + " color: \"#ffffff\", \n", + " use: \"HTML5\",\n", + " j2sPath: \"assets/kaggle_competition/jsmol/j2s\",\n", + " jarPath: \"assets/kaggle_competition/jsmol/java\",\n", + " jarFile: \"JmolAppletSigned.jar\",\n", + " debug: false,\n", + " readyFunction: jmol_isReady,\n", + " script: 'load \"data/kaggle_competition/xyz/geometry8-spacegroup12.xyz\" ;',\n", + " allowJavaScript: false,\n", + " disableJ2SLoadMonitor: true,\n", + " }\n", + " var myJmol206 = \"myJmol206\";\n", + " var Info206 = {\n", + " width: \"100%\",\n", + " height: \"100%\",\n", + " color: \"#ffffff\", \n", + " use: \"HTML5\",\n", + " j2sPath: \"assets/kaggle_competition/jsmol/j2s\",\n", + " jarPath: \"assets/kaggle_competition/jsmol/java\",\n", + " jarFile: \"JmolAppletSigned.jar\",\n", + " debug: false,\n", + " readyFunction: jmol_isReady,\n", + " script: 'load \"data/kaggle_competition/xyz/geometry7-spacegroup206.xyz\" ;',\n", + " allowJavaScript: false,\n", + " disableJ2SLoadMonitor: true,\n", + " }\n", + " var myJmol33 = \"myJmol33\";\n", + " var Info33 = {\n", + " width: \"100%\",\n", + " height: \"100%\",\n", + " color: \"#ffffff\", \n", + " use: \"HTML5\",\n", + " j2sPath: \"assets/kaggle_competition/jsmol/j2s\",\n", + " jarPath: \"assets/kaggle_competition/jsmol/java\",\n", + " jarFile: \"JmolAppletSigned.jar\",\n", + " debug: false,\n", + " readyFunction: jmol_isReady,\n", + " script: 'load \"data/kaggle_competition/xyz/geometry1-spacegroup33.xyz\" ;',\n", + " allowJavaScript: false,\n", + " disableJ2SLoadMonitor: true,\n", + " }\n", + " var myJmol194 = \"myJmol194\";\n", + " var Info194 = {\n", + " width: \"100%\",\n", + " height: \"100%\",\n", + " color: \"#ffffff\", \n", + " use: \"HTML5\",\n", + " j2sPath: \"assets/kaggle_competition/jsmol/j2s\",\n", + " jarPath: \"assets/kaggle_competition/jsmol/java\",\n", + " jarFile: \"JmolAppletSigned.jar\",\n", + " debug: false,\n", + " readyFunction: jmol_isReady,\n", + " script: 'load \"data/kaggle_competition/xyz/geometry2-spacegroup194.xyz\" ;',\n", + " allowJavaScript: false,\n", + " disableJ2SLoadMonitor: true,\n", + " }\n", + " var myJmol167 = \"myJmol167\";\n", + " var Info167 = {\n", + " width: \"100%\",\n", + " height: \"100%\",\n", + " color: \"#ffffff\", \n", + " use: \"HTML5\",\n", + " j2sPath: \"assets/kaggle_competition/jsmol/j2s\",\n", + " jarPath: \"assets/kaggle_competition/jsmol/java\",\n", + " jarFile: \"JmolAppletSigned.jar\",\n", + " debug: false,\n", + " readyFunction: jmol_isReady,\n", + " script: 'load \"data/kaggle_competition/xyz/geometry4-spacegroup167.xyz\" ;',\n", + " allowJavaScript: false,\n", + " disableJ2SLoadMonitor: true,\n", + " }\n", + " // jQuery ready functions\n", + " // is called when page has been completely loaded\n", + " $(document).ready(function() {\n", + " $(\"#jmol_div227\").html(Jmol.getAppletHtml(myJmol227, Info227))\n", + " })\n", + " $(document).ready(function() {\n", + " $(\"#jmol_div12\").html(Jmol.getAppletHtml(myJmol12, Info12))\n", + " })\n", + " $(document).ready(function() {\n", + " $(\"#jmol_div206\").html(Jmol.getAppletHtml(myJmol206, Info206))\n", + " })\n", + " $(document).ready(function() {\n", + " $(\"#jmol_div33\").html(Jmol.getAppletHtml(myJmol33, Info33))\n", + " })\n", + " $(document).ready(function() {\n", + " $(\"#jmol_div194\").html(Jmol.getAppletHtml(myJmol194, Info194))\n", + " })\n", + " $(document).ready(function() {\n", + " $(\"#jmol_div167\").html(Jmol.getAppletHtml(myJmol167, Info167))\n", + " })\n", + " var lastPrompt=0;\n", + " \n", + " \n", + " \n", + " function show_hide_structures()\n", + " {\n", + " var x = document.getElementById(\"geometry_jmol\");\n", + " if (x.style.display === \"none\") \n", + " {\n", + " x.style.display = \"block\";\n", + " } \n", + " else \n", + " {\n", + " x.style.display = \"none\";\n", + " }\n", + " }\n", + " \n", + " </script>\n", + "\n", + "<div>\n", + " <button type=\"button\" style=\"background-color:#f2f2f2;border:#555555;border-radius: 4px;font-size: 16px; width:200px; height:30px\" onclick=\"show_hide_structures()\">Show/hide structures</button>\n", + " <br><br>\n", + "</div>\n", + " \n", + "<div id=\"geometry_jmol\">\n", + " <table>\n", + " <tr>\n", + " <th><center>Spacegroup 12 (C2/m)</center></th>\n", + " <th><center>Spacegroup 33 (Pna2<sub>1</sub>)</center></th>\n", + " <th><center>Spacegroup 167 (R<font style=\"text-decoration: overline\">3</font>c)</center></th>\n", + " </tr>\n", + " <tr>\n", + " <th><div id='jmol_div12'></div></th>\n", + " <th><div id='jmol_div33'></div></th>\n", + " <th><div id='jmol_div167'></div></th>\n", + " </tr>\n", + " \n", + " <tr>\n", + " <th><center>Spacegroup 194 (P6<sub>3</sub>/mmc)</center></th>\n", + " <th><center>Spacegroup 206 (Ia<font style=\"text-decoration: overline\">3</font>)</center></th>\n", + " <th><center>Spacegroup 227 (Fd<font style=\"text-decoration: overline\">3</font>m)</center></th>\n", + " </tr>\n", + " <tr>\n", + " <th><div id='jmol_div194'></div></th>\n", + " <th><div id='jmol_div206'></div></th>\n", + " <th><div id='jmol_div227'></div></th>\n", + " </tr>\n", + " </table>\n", + "</div>\n" + ], + "text/plain": [ + "<IPython.core.display.HTML object>" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ "%%html\n", "<!-- CSS Style Inline: -->\n", @@ -434,16 +650,466 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "metadata": { "ExecuteTime": { - "end_time": "2021-01-19T10:23:31.064353Z", - "start_time": "2021-01-19T10:23:31.051499Z" + "end_time": "2021-11-11T17:10:52.329322Z", + "start_time": "2021-11-11T17:10:52.322187Z" }, "init_cell": true, "scrolled": false }, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "<br><br><br>\n", + "<font size=\"6.5em\"><b>Make predictions on formation energies and bandgaps</b></font>\n", + "<br><hr><br>\n", + "<font size=\"5em\"><b>Winning representations combined with different regression methods</b></font>\n", + "<br><br>\n", + "<font size = \"3.5em\"> To understand the relative importance of the representation vs. regression model, one can examine the performance of each representation combined with different regression models. \n", + "The hyperparameters are optimized for each representation/regressor combination. </font>\n", + "<br>\n", + "<font size = \"3.5em\" color=\"009FC2\"><br>Warning: the learning algorithm employed in this study (e.g. grid-search) can not guarantee deterministic results. The actual predictions can divergent from the published data.\n", + "</font>\n", + "\n", + "<br><br><br>\n", + "\n", + "<form>\n", + " <font size=\"4em\">Select a representation and a regression method:</font>\n", + " <select id=\"representation\" onclick=\"show_options()\">\n", + " <option value=\"ngram\">n-gram</option>\n", + " <option value=\"soap\">SOAP</option>\n", + " </select>\n", + " <select id=\"regression\" onclick=\"show_options()\">\n", + " <option value=\"krr\">KRR</option>\n", + " <option value=\"nn\">Neural network</option>\n", + " </select>\n", + " \n", + " <button type=\"button\" id=\"button_show_options\" style=\"background-color:#f2f2f2;border:#555555;border-radius: 4px;font-size: 16px; width:150px; height:30px\" onclick=\"show_options()\">More options</button>\n", + " <button type=\"button\" id=\"button_hide_options\" style=\"background-color:#f2f2f2;border:#555555;border-radius: 4px;font-size: 16px; width:150px; height:30px; display: none\" onclick=\"hide_options()\">Less options</button>\n", + "\n", + "</form>\n", + "\n", + "\n", + "\n", + "<script type=\"text/Javascript\">\n", + "\n", + " function show_options()\n", + " {\n", + " representation_value = document.getElementById(\"representation\").value;\n", + " regression_value = document.getElementById(\"regression\").value;\n", + " \n", + " if(representation_value == \"ngram\")\n", + " {\n", + " document.getElementById(\"div_options_soap\").style.display=\"none\";\n", + " document.getElementById(\"div_options_ngram\").style.display=\"\";\n", + " document.getElementById(\"button_show_options\").style.display=\"none\";\n", + " document.getElementById(\"button_hide_options\").style.display=\"\"; \n", + " }\n", + " else if(representation_value == \"soap\")\n", + " {\n", + " document.getElementById(\"div_options_ngram\").style.display=\"none\";\n", + " document.getElementById(\"div_options_soap\").style.display=\"\";\n", + " document.getElementById(\"button_show_options\").style.display=\"none\";\n", + " document.getElementById(\"button_hide_options\").style.display=\"\"; \n", + " }\n", + " if(regression_value == \"nn\")\n", + " {\n", + " document.getElementById(\"div_options_nn\").style.display=\"\"; \n", + " set_nn_default();\n", + " }\n", + " else if(regression_value == \"krr\")\n", + " {\n", + " document.getElementById(\"div_options_nn\").style.display=\"none\"; \n", + " }\n", + " }\n", + " function hide_options()\n", + " {\n", + " document.getElementById(\"button_show_options\").style.display=\"\";\n", + " document.getElementById(\"button_hide_options\").style.display=\"none\";\n", + " document.getElementById(\"div_options_ngram\").style.display=\"none\";\n", + " document.getElementById(\"div_options_soap\").style.display=\"none\";\n", + " document.getElementById(\"div_options_nn\").style.display=\"none\";\n", + " }\n", + " function set_nn_n_neurons()\n", + " {\n", + " document.getElementById(\"div_options_nn_n_neurons\").style.display=\"\";\n", + " n_layers = document.getElementById(\"input_n_nn\").value;\n", + " \n", + " contents = \"<br><font size=\\\"3em\\\">Number of neurons in each hidden layer</font><br>\"\n", + " for(i=0; i<n_layers; i++)\n", + " {\n", + " content_add = \"<font size=\\\"3em\\\"> Layer \" + String(i+1) + \": </font><input\\ type=\\\"number\\\"\\ id=\\\"input_n_neurons_\" + String(i) + \"\\\"\\ value=\\\"256\\\"\\ min=\\\"1\\\"\\ max=\\\"1024\\\"\\ >\";\n", + " contents += content_add;\n", + " if((i+1)%6 ==0)\n", + " {\n", + " contents += \"<br>\"\n", + " }\n", + " }\n", + " //alert(contents);\n", + " document.getElementById(\"div_options_nn_n_neurons\").innerHTML = contents;\n", + " }\n", + " function set_nn_default()\n", + " {\n", + " document.getElementById(\"div_options_nn_n_neurons\").style.display=\"\";\n", + " representation_value = document.getElementById(\"representation\").value;\n", + " if(representation_value == \"ngram\")\n", + " {\n", + " n_layers = 11;\n", + " document.getElementById(\"input_n_nn\").value = n_layers;\n", + " contents = \"<br><font size=\\\"3em\\\">Number of neurons in each hidden layer</font><br>\"\n", + " for(i=0; i<7; i++)\n", + " {\n", + " content_add = \"<font size=\\\"3em\\\"> Layer \" + String(i+1) + \": </font><input\\ type=\\\"number\\\"\\ id=\\\"input_n_neurons_\" + String(i) + \"\\\"\\ value=\\\"100\\\"\\ min=\\\"1\\\"\\ max=\\\"1024\\\"\\ >\";\n", + " contents += content_add;\n", + " if((i+1)%6 ==0)\n", + " {\n", + " contents += \"<br>\"\n", + " }\n", + "\n", + " }\n", + " for(i=7; i<11; i++)\n", + " {\n", + " content_add = \"<font size=\\\"3em\\\"> Layer \" + String(i+1) + \": </font><input\\ type=\\\"number\\\"\\ id=\\\"input_n_neurons_\" + String(i) + \"\\\"\\ value=\\\"50\\\"\\ min=\\\"1\\\"\\ max=\\\"1024\\\"\\ >\";\n", + " contents += content_add;\n", + " if((i+1)%6 ==0)\n", + " {\n", + " contents += \"<br>\"\n", + " }\n", + " }\n", + " }\n", + " else if(representation_value == \"soap\")\n", + " {\n", + " n_layers = 3;\n", + " document.getElementById(\"input_n_nn\").value = n_layers;\n", + " \n", + " contents = \"<br><font size=\\\"3em\\\">Number of neurons in each hidden layer</font><br>\"\n", + " for(i=0; i<1; i++)\n", + " {\n", + " content_add = \"<font size=\\\"3em\\\"> Layer \" + String(i+1) + \": </font><input\\ type=\\\"number\\\"\\ id=\\\"input_n_neurons_\" + String(i) + \"\\\"\\ value=\\\"512\\\"\\ min=\\\"1\\\"\\ max=\\\"1024\\\"\\ >\";\n", + " contents += content_add;\n", + " }\n", + " for(i=1; i<3; i++)\n", + " {\n", + " content_add = \"<font size=\\\"3em\\\"> Layer \" + String(i+1) + \": </font><input\\ type=\\\"number\\\"\\ id=\\\"input_n_neurons_\" + String(i) + \"\\\"\\ value=\\\"256\\\"\\ min=\\\"1\\\"\\ max=\\\"1024\\\"\\ >\";\n", + " contents += content_add;\n", + " }\n", + " }\n", + " \n", + " document.getElementById(\"div_options_nn_n_neurons\").innerHTML = contents;\n", + " save_options_nn();\n", + " \n", + " }\n", + " function save_options_nn()\n", + " {\n", + " n_layers = document.getElementById(\"input_n_nn\").value;\n", + " var n_neurons = [];\n", + " for(i=0; i<n_layers; i++)\n", + " {\n", + " id_i = \"input_n_neurons_\" + String(i);\n", + " n_neurons_i = document.getElementById(id_i).value;\n", + " n_neurons.push(n_neurons_i);\n", + " }\n", + " var command = \"N_nn_neurons = \" + n_neurons + \";\";\n", + " command += \"N_nn_layers = int(\" + n_layers + \");\"\n", + " var kernel = IPython.notebook.kernel;\n", + " kernel.execute(command);\n", + " \n", + " }\n", + " function save_options()\n", + " {\n", + " representation_value = document.getElementById(\"representation\").value;\n", + " regression_value = document.getElementById(\"regression\").value;\n", + " \n", + " if(representation_value == \"ngram\")\n", + " {\n", + " ngram_N = document.getElementById(\"input_ngram_N\").value;\n", + " n_threads = document.getElementById(\"input_n_threads\").value;\n", + " var command = \"ngram_N = int(\" + ngram_N + \"); n_threads = int(\" + n_threads + \");\" \n", + " }\n", + " if(representation_value == \"soap\")\n", + " {\n", + " soap_cutoff = document.getElementById(\"input_soap_cutoff\").value;\n", + " soap_lmax = document.getElementById(\"input_soap_lmax\").value;\n", + " soap_nmax = document.getElementById(\"input_soap_nmax\").value;\n", + " n_threads = document.getElementById(\"input_n_threads\").value;\n", + " var command = \"soap_cutoff = int(\" + soap_cutoff + \");\";\n", + " command += \"soap_lmax = int(\" + soap_lmax + \");\"; \n", + " command += \"soap_nmax = int(\" + soap_nmax + \");\";\n", + " command += \"n_threads = int(\" + n_threads + \");\";\n", + " }\n", + " \n", + " console.log(\"Executing Command: \" + command); \n", + " var kernel = IPython.notebook.kernel;\n", + " kernel.execute(command);\n", + " }\n", + "\n", + "</script>\n", + "\n", + "<div id=\"div_options_ngram\" style=\"display: none; position:relative; left:5%\">\n", + " <br>\n", + " <font size=\"3em\">N-grams size (N = 1 ~ 4)</font> <input type=\"number\" id=\"input_ngram_N\" value=\"3\" min=\"1\" max=\"4\" >\n", + " <br>\n", + " <font size=\"3em\">Number of threads used in regression (N = 1 ~ 4)</font> <input type=\"number\" id=\"input_n_threads\" value=\"4\" min=\"1\" max=\"4\" >\n", + " <br>\n", + " <button type=\"button\" id=\"save_options_ngram\" style=\"background-color:#f2f2f2;border:#555555;border-radius: 4px;font-size: 16px; width:150px; height:30px;\" onclick=\"save_options()\">Save options</button>\n", + "</div>\n", + "\n", + "<div id=\"div_options_soap\" style=\"display: none; position:relative; left:5%\">\n", + " <br>\n", + " <font size=\"3em\">SOAP cutoff (Ang) </font> <input type=\"number\" id=\"input_soap_cutoff\" value=\"10\" min=\"1\" max=\"25\">\n", + " <font size=\"3em\">Max. l </font><input type=\"number\" id=\"input_soap_lmax\" value=\"4\" min=\"1\" max=\"8\" >\n", + " <font size=\"3em\">Max. n </font><input type=\"number\" id=\"input_soap_nmax\" value=\"4\" min=\"1\" max=\"8\" >\n", + " <br>\n", + " <font size=\"3em\">Number of threads used in regression (N = 1 ~ 4)</font> <input type=\"number\" id=\"input_n_threads\" value=\"4\" min=\"1\" max=\"4\" >\n", + " <br>\n", + " <button type=\"button\" id=\"save_options_soap\" style=\"background-color:#f2f2f2;border:#555555;border-radius: 4px;font-size: 16px; width:150px; height:30px;\" onclick=\"save_options()\">Save options</button>\n", + "</div>\n", + "<br>\n", + "<div id=\"div_options_nn\" style=\"display: none; position:relative; left:5%\">\n", + " <br>\n", + " <font size=\"3em\">Number of (linear) hidden layers </font> <input type=\"number\" id=\"input_n_nn\" value=\"2\" min=\"1\" max=\"20\">\n", + " <button type=\"button\" id=\"set_n_neurons\" style=\"background-color:#f2f2f2;border:#555555;border-radius: 4px;font-size: 16px; width:250px; height:30px;\" onclick=\"set_nn_n_neurons()\">Set the number of neurons</button>\n", + " \n", + " <div id=\"div_options_nn_n_neurons\" style=\"display: none; position:relative; left:0%\">\n", + " <br>\n", + " <font size=\"3em\">Number of neurons in each hidden layer</font>\n", + " </div>\n", + " <br>\n", + " <button type=\"button\" id=\"set_nn_default\" style=\"background-color:#f2f2f2;border:#555555;border-radius: 4px;font-size: 16px; width:200px; height:30px;\" onclick=\"set_nn_default()\">Use default configuration</button>\n", + " <button type=\"button\" id=\"save_options_nn\" style=\"background-color:#f2f2f2;border:#555555;border-radius: 4px;font-size: 16px; width:150px; height:30px;\" onclick=\"save_options_nn()\">Save options</button>\n", + "</div>\n", + "\n", + "<div id=\"demoa\"></div>\n", + "<br><br>\n", + "\n", + "<button type=\"button\" id=\"button_show_sample_output\" style=\"background-color:#f2f2f2;border:#555555;border-radius: 4px;font-size: 16px; width:200px; height:30px\" onclick=\"show_sample_output()\">Show sample outputs</button>\n", + "<button type=\"button\" id=\"button_hide_sample_output\" style=\"background-color:#f2f2f2;border:#555555;border-radius: 4px;font-size: 16px; width:200px; height:30px; display: none\" onclick=\"hide_sample_output()\">Hide sample outputs</button>\n", + "\n", + "<button type=\"button\" style=\"background-color:#f2f2f2;border:#555555;border-radius: 4px;font-size: 16px; width:150px; height:30px\" onclick=\"get_repr_regr_combination()\">Get predictions</button>\n", + "<input type=\"checkbox\" id=\"if_learning_curve\" value=\"learning_curve\" onclick=show_warning_learning_curve()> Show learning curve\n", + "\n", + "<input type=\"checkbox\" id=\"if_use_prestored\" value=\"use_prestored\" onclick=show_warning_use_prestored_model() checked=true> Use prestored models\n", + "<div id=\"warning_learning_curve\" style=\"display: none\">\n", + " <br>\n", + " <font color=\"009FC2\">\n", + " Warning: It can be very time-consuming (5~20 min/point, depending on method, model size, and number of threads employed) when getting a learning curve.\n", + " </font>\n", + " <p><font size=\"3em\">Number of points in learning curve: <input type=\"number\" id=\"N_learning_curve\" value=\"4\" min=\"1\" max=\"25\" style=\"display:none\"></font></p>\n", + "</div>\n", + "<div id=\"warning_use_prestored\" style=\"display: none\">\n", + " <br>\n", + " <font color=\"009FC2\">\n", + " Warning: It can be very time-consuming (10~20 min, depending on method, model size, and number of threads employed) to generate models on the fly.\n", + " </font>\n", + "</div>\n", + "\n", + "\n", + "<div id=\"sample_output\" style=\"display: none\">\n", + " <br><br>\n", + " <font size=\"4em\"><b> Sample outputs of predictions on formation energy and bandgap</b></font>\n", + " <br><br>\n", + " <font size=\"3em\">Representation: N-grams<br><br>Regression method: Kernel-ridge regression (KRR)</font>\n", + " <br><br>\n", + " <img src=\"assets/kaggle_competition/results-ngram-krr.png\" width=\"60%\">\n", + " <br><br>\n", + " \n", + "</div>\n", + "\n", + "<style>\n", + "* {\n", + " box-sizing: border-box;\n", + "}\n", + "\n", + ".column {\n", + " float: left;\n", + " width: 33.33%;\n", + " padding: 5px;\n", + "}\n", + "\n", + "/* Clearfix (clear floats) */\n", + ".row::after {\n", + " content: \"\";\n", + " clear: both;\n", + " display: table;\n", + "}\n", + "</style>\n", + "\n", + "<div id=\"sample_learning_curve\" style=\"display: none\">\n", + " <font size=\"4em\"><b> Sample outputs of learning curves for formation energy and bandgap predictions</b></font>\n", + " <br><br>\n", + " <font size=\"3em\">Representation: N-gram<br><br>Regression method: Kernel-ridge regression (KRR)</font>\n", + " <br><br>\n", + " <div style=\"display: table; width: 95%\"> \n", + " <div class=\"row\">\n", + " <div class=\"column\">\n", + " <img src=\"./imgs/learning_curve-formation-ngram-krr.png\" style=\"width:100%\">\n", + " </div>\n", + " <div class=\"column\">\n", + " <img src=\"./imgs/learning_curve-bandgap-ngram-krr.png\" style=\"width:100%\">\n", + " </div>\n", + " </div>\n", + " </div>\n", + " <br><br>\n", + "</div>\n", + "\n", + "<script type=\"text/Javascript\">\n", + "\n", + " window.findCellIndicesByTag = function findCellIndicesByTag(tagName) {\n", + " return (Jupyter.notebook.get_cells()\n", + " .filter(\n", + " ({metadata: {tags}}) => tags && tags.includes(tagName)\n", + " )\n", + " .map((cell) => Jupyter.notebook.find_cell_index(cell))\n", + " );\n", + " };\n", + "\n", + "\n", + " window.runCells = function runPlotCells(tags) {\n", + " var c = window.findCellIndicesByTag(tags);\n", + " Jupyter.notebook.execute_cells(c);\n", + " };\n", + " \n", + " function show_sample_output()\n", + " {\n", + " document.getElementById(\"button_show_sample_output\").style.display=\"none\";\n", + " document.getElementById(\"button_hide_sample_output\").style.display=\"\";\n", + " \n", + " var check_learning_curve = document.getElementById(\"if_learning_curve\");\n", + " var learning_curve_output = document.getElementById(\"sample_learning_curve\");\n", + " var output = document.getElementById(\"sample_output\");\n", + " //var representation_value = document.getElementById(\"representation\").value;\n", + " //var regression_value = document.getElementById(\"regression\").value;\n", + " \n", + " output.style.display = \"block\";\n", + " \n", + " \n", + " \n", + " \n", + " if (check_learning_curve.checked == true)\n", + " {\n", + " learning_curve_output.style.display = \"block\"; \n", + " } \n", + " else \n", + " {\n", + " learning_curve_output.style.display = \"none\";\n", + " }\n", + " }\n", + "\n", + " function hide_sample_output()\n", + " {\n", + " document.getElementById(\"button_show_sample_output\").style.display=\"\";\n", + " document.getElementById(\"button_hide_sample_output\").style.display=\"none\";\n", + "\n", + " var learning_curve_output = document.getElementById(\"sample_learning_curve\");\n", + " var output = document.getElementById(\"sample_output\");\n", + "\n", + " output.style.display = \"none\"\n", + " learning_curve_output.style.display = \"none\";\n", + " }\n", + " \n", + " function show_warning_learning_curve()\n", + " {\n", + " var checkBox = document.getElementById(\"if_learning_curve\");\n", + " learning_curve_output = document.getElementById(\"sample_learning_curve\");\n", + " var warning = document.getElementById(\"warning_learning_curve\");\n", + " if (checkBox.checked == true)\n", + " {\n", + " warning.style.display = \"block\";\n", + " document.getElementById(\"N_learning_curve\").style.display=\"\";\n", + " var N_learning_curve = document.getElementById(\"N_learning_curve\").value;\n", + " var command = \" N_learning_curve = int(\" + N_learning_curve + \");\";\n", + " var kernel = IPython.notebook.kernel;\n", + " kernel.execute(command);\n", + " \n", + " } \n", + " else \n", + " {\n", + " warning.style.display = \"none\";\n", + " document.getElementById(\"N_learning_curve\").style.display=\"none\";\n", + " }\n", + " \n", + " }\n", + " \n", + " function show_warning_use_prestored_model()\n", + " {\n", + " var checkBox = document.getElementById(\"if_use_prestored\");\n", + " var warning = document.getElementById(\"warning_use_prestored\");\n", + " if (checkBox.checked == true)\n", + " {\n", + " warning.style.display = \"none\";\n", + " \n", + " } \n", + " else \n", + " {\n", + " warning.style.display = \"block\";\n", + " }\n", + " if_use_prestored = document.getElementById(\"if_use_prestored\").checked;\n", + " var command = \" if_use_prestored_models = '\" + if_use_prestored.toString() + \"';\";\n", + " var kernel = IPython.notebook.kernel;\n", + " kernel.execute(command);\n", + " }\n", + "\n", + " function get_repr_regr_combination()\n", + " {\n", + " ngram_N = document.getElementById(\"input_ngram_N\").value;\n", + " n_threads = document.getElementById(\"input_n_threads\").value;\n", + " soap_cutoff = document.getElementById(\"input_soap_cutoff\").value;\n", + " soap_lmax = document.getElementById(\"input_soap_lmax\").value;\n", + " soap_nmax = document.getElementById(\"input_soap_nmax\").value;\n", + " \n", + "\n", + " \n", + " var command = \"ngram_N = int(\" + ngram_N + \"); n_threads = int(\" + n_threads + \");\" \n", + " + \" soap_cutoff = int(\" + soap_cutoff + \");\" \n", + " + \" soap_lmax = int(\" + soap_lmax + \");\" \n", + " + \" soap_nmax = int(\" + soap_nmax + \");\" ;\n", + " console.log(\"Executing Command: \" + command); \n", + " var kernel = IPython.notebook.kernel;\n", + " kernel.execute(command);\n", + " \n", + " \n", + " representation_value = document.getElementById(\"representation\").value;\n", + " regression_value = document.getElementById(\"regression\").value;\n", + " if_learning_curve = document.getElementById(\"if_learning_curve\").checked;\n", + " if_use_prestored = document.getElementById(\"if_use_prestored\").checked;\n", + " N_learning_curve = document.getElementById(\"N_learning_curve\").value;\n", + " \n", + " var command = \" representation = '\" + representation_value + \n", + " \"'; regression = '\" + regression_value + \"';\" + \" if_learningcurve = '\" + if_learning_curve.toString() + \"';\" \n", + " + \" if_use_prestored_models = '\" + if_use_prestored.toString() + \"';\"\n", + " + \" N_learning_curve = int(\" + N_learning_curve + \");\";\n", + " \n", + " console.log(\"Executing Command: \" + command);\n", + " \n", + " var kernel = IPython.notebook.kernel;\n", + " kernel.execute(command);\n", + "\n", + " Jupyter.notebook.execute_cells(window.findCellIndicesByTag('ngram'));//# N-gram descriptor\n", + " Jupyter.notebook.execute_cells(window.findCellIndicesByTag('soap'));//# SOAP descriptors\n", + " Jupyter.notebook.execute_cells(window.findCellIndicesByTag('krr-ngram'));//# KRR\n", + " Jupyter.notebook.execute_cells(window.findCellIndicesByTag('init')); \n", + " Jupyter.notebook.execute_cells(window.findCellIndicesByTag('init_read_data')); \n", + " Jupyter.notebook.execute_cells(window.findCellIndicesByTag('plot'));\n", + " Jupyter.notebook.execute_cells(window.findCellIndicesByTag('main'));//# Read and predict \n", + " \n", + " }\n", + " \n", + " \n", + "\n", + "</script>\n", + "\n" + ], + "text/plain": [ + "<IPython.core.display.HTML object>" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ "%%HTML\n", "<br><br><br>\n", @@ -888,18 +1554,31 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "metadata": { "ExecuteTime": { - "end_time": "2021-01-19T10:23:31.085760Z", - "start_time": "2021-01-19T10:23:31.066074Z" + "end_time": "2021-11-11T17:10:52.773343Z", + "start_time": "2021-11-11T17:10:52.330752Z" }, "init_cell": true, "tags": [ "init" ] }, - "outputs": [], + "outputs": [ + { + "ename": "ModuleNotFoundError", + "evalue": "No module named 'networkx'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m<ipython-input-5-5decc5b9aed6>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 11\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 12\u001b[0m \u001b[0;31m# n-gram\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 13\u001b[0;31m \u001b[0;32mimport\u001b[0m \u001b[0mkaggle_competition\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcrystal_graph\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mcg\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 14\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 15\u001b[0m \u001b[0;31m# SOAP\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/AI-toolkit/analytics/tutorials/analytics-kaggle-competition/kaggle_competition/__init__.py\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 16\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mnumpy\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 17\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mpandas\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mpd\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 18\u001b[0;31m \u001b[0;32mimport\u001b[0m \u001b[0mnetworkx\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mnx\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 19\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mmatplotlib\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpyplot\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mplt\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 20\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mwarnings\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'networkx'" + ] + } + ], "source": [ "import os\n", "import numpy as np\n", @@ -2598,7 +3277,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.9" + "version": "3.7.3" }, "toc": { "base_numbering": 1,