diff --git a/assets/kaggle_competition/Logo_MPG.png b/assets/kaggle_competition/Logo_MPG.png new file mode 100644 index 0000000000000000000000000000000000000000..9c65d4be3575dc8e32da983ea00b728dca00845a Binary files /dev/null and b/assets/kaggle_competition/Logo_MPG.png differ diff --git a/assets/kaggle_competition/Logo_NOMAD.png b/assets/kaggle_competition/Logo_NOMAD.png new file mode 100644 index 0000000000000000000000000000000000000000..2187e3b9351e11aa693758559114c1f2a6670731 Binary files /dev/null and b/assets/kaggle_competition/Logo_NOMAD.png differ diff --git a/kaggle_competition.ipynb b/kaggle_competition.ipynb index a3e59bc9f02430f105e24a67d03b12a438686728..8f155a76ecc9fd00fe480a828a34ce49a1a361ba 100644 --- a/kaggle_competition.ipynb +++ b/kaggle_competition.ipynb @@ -2,90 +2,15 @@ "cells": [ { "cell_type": "code", - "execution_count": 7, - "metadata": { - "ExecuteTime": { - "end_time": "2020-02-01T15:46:52.140106Z", - "start_time": "2020-02-01T15:46:52.114694Z" - }, - "init_cell": true, - "scrolled": false - }, - "outputs": [ - { - "data": { - "text/html": [ - "<script>\n", - " code_show=true; \n", - " function code_toggle() {\n", - " if (code_show)\n", - " {\n", - " $('div.input').hide();\n", - " } \n", - " else \n", - " {\n", - " $('div.input').show();\n", - " }\n", - " code_show = !code_show\n", - " } \n", - " $( document ).ready(code_toggle);\n", - "</script>\n", - "The raw code for this notebook is by default hidden for easier reading.\n", - "To toggle on/off the raw code, click <a href=\"javascript:code_toggle()\">here</a>.\n" - ], - "text/plain": [ - "<IPython.core.display.HTML object>" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "%%HTML\n", - "<script>\n", - " code_show=true; \n", - " function code_toggle() {\n", - " if (code_show)\n", - " {\n", - " $('div.input').hide();\n", - " } \n", - " else \n", - " {\n", - " $('div.input').show();\n", - " }\n", - " code_show = !code_show\n", - " } \n", - " $( document ).ready(code_toggle);\n", - "</script>\n", - "The raw code for this notebook is by default hidden for easier reading.\n", - "To toggle on/off the raw code, click <a href=\"javascript:code_toggle()\">here</a>." - ] - }, - { - "cell_type": "code", - "execution_count": 8, + "execution_count": null, "metadata": { "ExecuteTime": { - "end_time": "2020-02-01T15:46:52.183997Z", - "start_time": "2020-02-01T15:46:52.146433Z" + "end_time": "2021-01-13T18:11:03.313270Z", + "start_time": "2021-01-13T18:11:03.304294Z" }, "init_cell": true }, - "outputs": [ - { - "data": { - "text/html": [ - "<style>.container { width:100% !important; }</style>" - ], - "text/plain": [ - "<IPython.core.display.HTML object>" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "import warnings\n", "warnings.filterwarnings('ignore')\n", @@ -96,135 +21,54 @@ ] }, { - "cell_type": "code", - "execution_count": 9, + "cell_type": "markdown", "metadata": { "ExecuteTime": { - "end_time": "2020-02-01T15:46:52.287831Z", - "start_time": "2020-02-01T15:46:52.187398Z" - }, - "init_cell": true - }, - "outputs": [ - { - "data": { - "text/html": [ - "\n", - "\n", - "<div align=\"left\" style=\"background-color: rgba(149,170,79, 1.0); width: 100%; height: 470px\">\n", - " <div >\n", - " <table>\n", - " <tr></tr>\n", - " <tr>\n", - " <td><img id=\"nomad\" align=\"right\" src=\"https://nomad-coe.eu/uploads/nomad/images/NOMAD_Logo2.png\" width=\"70%\" alt=\"NOMAD Logo\"></td>\n", - " <td><font size=14em color=\"#20335d\" align=\"left\"><b>NOMAD Analytics Toolkit</b></font></td>\n", - " <td><img height=\"85px\" width=\"80px\" src=\"https://www.nomad-coe.eu/uploads/nomad/backgrounds/head_big-data_analytics_2.png\"></td>\n", - " </tr>\n", - " </table>\n", - " </div>\n", - "\n", - " <br><br>\n", - " <div style=\"position:relative; left:3%\"><font size=6em color=\"#20335d\" ><b> - NOMAD 2018 Kaggle competition</b></font></div>\n", - " <p style=\"position:relative;left:10%; \">\n", - " <br>\n", - " Created by:\n", - " Xiangyue Liu<sup>1</sup> (<a href=\"mailto:xyliu@fhi-berlin.mpg.de\">email</a>),\n", - " Christopher Sutton<sup>1</sup> (<a href=\"mailto:sutton@fhi-berlin.mpg.de\">email</a>),\n", - " Luca M. Ghiringhelli<sup>1</sup>(<a href=\"mailto:ghiringhelli@fhi-berlin.mpg.de\">email</a>), <br>\n", - " Takenori Yamamoto<sup>2</sup>, \n", - " Yury Lysogorskiy<sup>3</sup>, \n", - " Lars Blumenthal<sup>4,5</sup>,\n", - " Thomas Hammerschmidt<sup>3</sup>,\n", - " Jacek Golebiowski<sup>4,5</sup>, <br>\n", - " Angelo Ziletti<sup>1</sup>, Matthias Scheffler<sup>1</sup>\n", - "\n", - " <br><br>\n", - "\n", - " <sup>1</sup> Fritz Haber Institute of the Max Planck Society, Faradayweg 4-6, D-14195 Berlin, Germany <br>\n", - " <sup>2</sup> Research Institute for Mathematical and Computational Sciences (RIMCS), LLC, Yokohama, Japan <br>\n", - " <sup>3</sup> ICAMS, Ruhr-Universität Bochum, Germany <br>\n", - " <sup>4</sup> EPSRC Centre for Doctoral Training on Theory and Simulation of Materials Department of Physics, Imperial College London, London, U.K. <br>\n", - " <sup>5</sup> Thomas Young Centre for Theory and Simulation of Materials, Department of Materials, Imperial College London, London, U.K <br>\n", - " <br>\n", - "\n", - "\n", - " </p>\n", - " <br>\n", - " <div style=\"position:relative;bottom:3%\">\n", - " <div style=\"position:absolute;right:5%;bottom: 0%;\"><font color=\"#999999\" size=\"10em\">v1.0.0</font></div>\n", - " <div style=\"position:absolute;right:5%;bottom: 0%;\"><font color=\"#666666\" size=\"2.7em\">[Last updated: March 21, 2019]</font></div>\n", - " </div>\n", - "\n", - "</div>\n", - "\n", - "\n", - "<div style='text-align: right;'>\n", - " <a href=\"https://analytics-toolkit.nomad-coe.eu/home/\" class=\"btn btn-primary\" style=\"font-size:larger;\">Back to Analytics Home</a> \n", - " <a href=\"https://www.nomad-coe.eu/\" class=\"btn btn-primary\" style=\"font-size:larger; \">Back to nomad-coe</a> \n", - "</div>\n" - ], - "text/plain": [ - "<IPython.core.display.HTML object>" - ] - }, - "metadata": {}, - "output_type": "display_data" + "end_time": "2021-01-13T17:26:20.372223Z", + "start_time": "2021-01-13T17:26:20.368698Z" } - ], + }, "source": [ - "%%HTML\n", - "\n", - "\n", - "<div align=\"left\" style=\"background-color: rgba(149,170,79, 1.0); width: 100%; height: 470px\">\n", - " <div >\n", - " <table>\n", - " <tr></tr>\n", - " <tr>\n", - " <td><img id=\"nomad\" align=\"right\" src=\"https://nomad-coe.eu/uploads/nomad/images/NOMAD_Logo2.png\" width=\"70%\" alt=\"NOMAD Logo\"></td>\n", - " <td><font size=14em color=\"#20335d\" align=\"left\"><b>NOMAD Analytics Toolkit</b></font></td>\n", - " <td><img height=\"85px\" width=\"80px\" src=\"https://www.nomad-coe.eu/uploads/nomad/backgrounds/head_big-data_analytics_2.png\"></td>\n", - " </tr>\n", - " </table>\n", - " </div>\n", - "\n", - " <br><br>\n", - " <div style=\"position:relative; left:3%\"><font size=6em color=\"#20335d\" ><b> - NOMAD 2018 Kaggle competition</b></font></div>\n", - " <p style=\"position:relative;left:10%; \">\n", - " <br>\n", - " Created by:\n", - " Xiangyue Liu<sup>1</sup> (<a href=\"mailto:xyliu@fhi-berlin.mpg.de\">email</a>),\n", - " Christopher Sutton<sup>1</sup> (<a href=\"mailto:sutton@fhi-berlin.mpg.de\">email</a>),\n", - " Luca M. Ghiringhelli<sup>1</sup>(<a href=\"mailto:ghiringhelli@fhi-berlin.mpg.de\">email</a>), <br>\n", - " Takenori Yamamoto<sup>2</sup>, \n", - " Yury Lysogorskiy<sup>3</sup>, \n", - " Lars Blumenthal<sup>4,5</sup>,\n", - " Thomas Hammerschmidt<sup>3</sup>,\n", - " Jacek Golebiowski<sup>4,5</sup>, <br>\n", - " Angelo Ziletti<sup>1</sup>, Matthias Scheffler<sup>1</sup>\n", - "\n", - " <br><br>\n", - "\n", - " <sup>1</sup> Fritz Haber Institute of the Max Planck Society, Faradayweg 4-6, D-14195 Berlin, Germany <br>\n", - " <sup>2</sup> Research Institute for Mathematical and Computational Sciences (RIMCS), LLC, Yokohama, Japan <br>\n", - " <sup>3</sup> ICAMS, Ruhr-Universität Bochum, Germany <br>\n", - " <sup>4</sup> EPSRC Centre for Doctoral Training on Theory and Simulation of Materials Department of Physics, Imperial College London, London, U.K. <br>\n", - " <sup>5</sup> Thomas Young Centre for Theory and Simulation of Materials, Department of Materials, Imperial College London, London, U.K <br>\n", - " <br>\n", - "\n", - "\n", - " </p>\n", - " <br>\n", - " <div style=\"position:relative;bottom:3%\">\n", - " <div style=\"position:absolute;right:5%;bottom: 0%;\"><font color=\"#999999\" size=\"10em\">v1.0.0</font></div>\n", - " <div style=\"position:absolute;right:5%;bottom: 0%;\"><font color=\"#666666\" size=\"2.7em\">[Last updated: March 21, 2019]</font></div>\n", - " </div>\n", - "\n", + "<div id=\"teaser\" style=' background-position: right center; background-size: 00px; background-repeat: no-repeat; \n", + " padding-top: 20px;\n", + " padding-right: 10px;\n", + " padding-bottom: 170px;\n", + " padding-left: 10px;\n", + " border-bottom: 14px double #333;\n", + " border-top: 14px double #333;' > \n", + "\n", + " \n", + " <div style=\"text-align:center\">\n", + " <b><font size=\"6.4\">NOMAD 2018 Kaggle competition</font></b> \n", + " </div>\n", + " \n", + "<p>\n", + " created by:\n", + " Xiangyue Liu<sup>1</sup> (<a href=\"mailto:xyliu@fhi-berlin.mpg.de\">email</a>),\n", + " Christopher Sutton<sup>1</sup> (<a href=\"mailto:sutton@fhi-berlin.mpg.de\">email</a>),\n", + " Luca M. Ghiringhelli<sup>1</sup>(<a href=\"mailto:ghiringhelli@fhi-berlin.mpg.de\">email</a>),\n", + " Takenori Yamamoto<sup>2</sup>, \n", + " Yury Lysogorskiy<sup>3</sup>, \n", + " Lars Blumenthal<sup>4,5</sup>,\n", + " Thomas Hammerschmidt<sup>3</sup>,\n", + " Jacek Golebiowski<sup>4,5</sup>, \n", + " Angelo Ziletti<sup>1</sup>, \n", + " and Matthias Scheffler<sup>1</sup>\n", + " \n", + "<sup>1</sup> Fritz Haber Institute of the Max Planck Society, Faradayweg 4-6, D-14195 Berlin, Germany <br>\n", + "<sup>2</sup> Research Institute for Mathematical and Computational Sciences (RIMCS), LLC, Yokohama, Japan <br>\n", + "<sup>3</sup> ICAMS, Ruhr-Universität Bochum, Germany <br>\n", + "<sup>4</sup> EPSRC Centre for Doctoral Training on Theory and Simulation of Materials Department of Physics, Imperial College London, London, U.K. <br>\n", + "<sup>5</sup> Thomas Young Centre for Theory and Simulation of Materials, Department of Materials, Imperial College London, London, U.K <br>\n", + " \n", + " \n", + " \n", + "<span class=\"nomad--last-updated\" data-version=\"v1.0.0\">[Last updated: March 21, 2019]</span>\n", + " \n", + "<div> \n", + "<img style=\"float: left;\" src=\"assets/kaggle_competition/Logo_MPG.png\" width=\"200\"> \n", + "<img style=\"float: right;\" src=\"assets/kaggle_competition/Logo_NOMAD.png\" width=\"250\">\n", "</div>\n", - "\n", - "\n", - "<div style='text-align: right;'>\n", - " <a href=\"https://analytics-toolkit.nomad-coe.eu/home/\" class=\"btn btn-primary\" style=\"font-size:larger;\">Back to Analytics Home</a> \n", - " <a href=\"https://www.nomad-coe.eu/\" class=\"btn btn-primary\" style=\"font-size:larger; \">Back to nomad-coe</a> \n", "</div>\n" ] }, @@ -263,6 +107,39 @@ "Because only 100 values were used for assessing the performance on the leaderboard, participants had to ensure the predictive accuracy of their model for unseen data, even if a disagreement was found with the public leaderboard score. This is evident in the summary of the average RMSLE for all of the participants with scores below 0.25 in Figure 1, where a large shift in the values between the public leaderboard (100 compounds) and private leaderboard (500 compounds). The winning score has a RMSLE of 0.0509, while the 2nd and 3rd places winners were closely stacked together with a RMSLE of 0.0521 and 0.0523. However, within the first bin, there were a total of four participants with an RMLSE 0.053 (i.e., 0.45% of participants).\n" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2021-01-13T18:11:03.326736Z", + "start_time": "2021-01-13T18:11:03.314598Z" + }, + "init_cell": true, + "scrolled": false + }, + "outputs": [], + "source": [ + "%%HTML\n", + "<script>\n", + " code_show=true; \n", + " function code_toggle() {\n", + " if (code_show)\n", + " {\n", + " $('div.input').hide();\n", + " } \n", + " else \n", + " {\n", + " $('div.input').show();\n", + " }\n", + " code_show = !code_show\n", + " } \n", + " $( document ).ready(code_toggle);\n", + "</script>\n", + "The Python code for this notebook is by default hidden for easier reading.\n", + "To toggle on/off the raw code, click <a href=\"javascript:code_toggle()\">here</a>." + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -276,232 +153,16 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": null, "metadata": { "ExecuteTime": { - "end_time": "2020-02-01T15:46:52.325061Z", - "start_time": "2020-02-01T15:46:52.294166Z" + "end_time": "2021-01-13T18:11:03.342708Z", + "start_time": "2021-01-13T18:11:03.328322Z" }, "init_cell": true, "scrolled": false }, - "outputs": [ - { - "data": { - "text/html": [ - "<!-- CSS Style Inline: -->\n", - " <style type=\"text/css\">\n", - " #jmol_div227{\n", - " height: 350px;\n", - " width: 350px;\n", - " float: left;\n", - " }\n", - " #jmol_div12{\n", - " height: 350px;\n", - " width: 350px;\n", - " float: left;\n", - " }\n", - " #jmol_div206{\n", - " height: 350px;\n", - " width: 350px;\n", - " float: left;\n", - " }\n", - " #jmol_div33{\n", - " height: 350px;\n", - " width: 350px;\n", - " float: left;\n", - " }\n", - " #jmol_div194{\n", - " height: 350px;\n", - " width: 350px;\n", - " float: left;\n", - " }\n", - " #jmol_div167{\n", - " height: 350px;\n", - " width: 350px;\n", - " float: left;\n", - " }\n", - " </style>\n", - "\n", - " <!-- Load Jmol javascript library -->\n", - " <script type=\"text/javascript\" src=\"assets/kaggle_competition/jsmol/JSmol.min.js\"></script>\n", - "\n", - " <!-- calls to jQuery and Jmol (inline) -->\n", - " <script type=\"text/javascript\">\n", - " // Jmol readyFunction \n", - " jmol_isReady = function(applet) {\n", - " document.title = (applet._id + \" - Jmol \" + Jmol.___JmolVersion)\n", - " Jmol._getElement(applet, \"appletdiv\").style.border=\"0px solid blue\"\n", - " }\n", - " // initialize Jmol Applet\n", - " var myJmol227 = \"myJmol227\";\n", - " var Info227 = {\n", - " width: \"100%\",\n", - " height: \"100%\",\n", - " color: \"#ffffff\", \n", - " use: \"HTML5\",\n", - " j2sPath: \"assets/kaggle_competition/jsmol/j2s\",\n", - " jarPath: \"assets/kaggle_competition/jsmol/java\",\n", - " jarFile: \"JmolAppletSigned.jar\",\n", - " debug: false,\n", - " readyFunction: jmol_isReady,\n", - " script: 'load \"data/kaggle_competition/xyz/geometry3-spacegroup227.xyz\" ;',\n", - " allowJavaScript: false,\n", - " disableJ2SLoadMonitor: true,\n", - " }\n", - " var myJmol12 = \"myJmol12\";\n", - " var Info12 = {\n", - " width: \"100%\",\n", - " height: \"100%\",\n", - " color: \"#ffffff\", \n", - " use: \"HTML5\",\n", - " j2sPath: \"assets/kaggle_competition/jsmol/j2s\",\n", - " jarPath: \"assets/kaggle_competition/jsmol/java\",\n", - " jarFile: \"JmolAppletSigned.jar\",\n", - " debug: false,\n", - " readyFunction: jmol_isReady,\n", - " script: 'load \"data/kaggle_competition/xyz/geometry8-spacegroup12.xyz\" ;',\n", - " allowJavaScript: false,\n", - " disableJ2SLoadMonitor: true,\n", - " }\n", - " var myJmol206 = \"myJmol206\";\n", - " var Info206 = {\n", - " width: \"100%\",\n", - " height: \"100%\",\n", - " color: \"#ffffff\", \n", - " use: \"HTML5\",\n", - " j2sPath: \"assets/kaggle_competition/jsmol/j2s\",\n", - " jarPath: \"assets/kaggle_competition/jsmol/java\",\n", - " jarFile: \"JmolAppletSigned.jar\",\n", - " debug: false,\n", - " readyFunction: jmol_isReady,\n", - " script: 'load \"data/kaggle_competition/xyz/geometry7-spacegroup206.xyz\" ;',\n", - " allowJavaScript: false,\n", - " disableJ2SLoadMonitor: true,\n", - " }\n", - " var myJmol33 = \"myJmol33\";\n", - " var Info33 = {\n", - " width: \"100%\",\n", - " height: \"100%\",\n", - " color: \"#ffffff\", \n", - " use: \"HTML5\",\n", - " j2sPath: \"assets/kaggle_competition/jsmol/j2s\",\n", - " jarPath: \"assets/kaggle_competition/jsmol/java\",\n", - " jarFile: \"JmolAppletSigned.jar\",\n", - " debug: false,\n", - " readyFunction: jmol_isReady,\n", - " script: 'load \"data/kaggle_competition/xyz/geometry1-spacegroup33.xyz\" ;',\n", - " allowJavaScript: false,\n", - " disableJ2SLoadMonitor: true,\n", - " }\n", - " var myJmol194 = \"myJmol194\";\n", - " var Info194 = {\n", - " width: \"100%\",\n", - " height: \"100%\",\n", - " color: \"#ffffff\", \n", - " use: \"HTML5\",\n", - " j2sPath: \"assets/kaggle_competition/jsmol/j2s\",\n", - " jarPath: \"assets/kaggle_competition/jsmol/java\",\n", - " jarFile: \"JmolAppletSigned.jar\",\n", - " debug: false,\n", - " readyFunction: jmol_isReady,\n", - " script: 'load \"data/kaggle_competition/xyz/geometry2-spacegroup194.xyz\" ;',\n", - " allowJavaScript: false,\n", - " disableJ2SLoadMonitor: true,\n", - " }\n", - " var myJmol167 = \"myJmol167\";\n", - " var Info167 = {\n", - " width: \"100%\",\n", - " height: \"100%\",\n", - " color: \"#ffffff\", \n", - " use: \"HTML5\",\n", - " j2sPath: \"assets/kaggle_competition/jsmol/j2s\",\n", - " jarPath: \"assets/kaggle_competition/jsmol/java\",\n", - " jarFile: \"JmolAppletSigned.jar\",\n", - " debug: false,\n", - " readyFunction: jmol_isReady,\n", - " script: 'load \"data/kaggle_competition/xyz/geometry4-spacegroup167.xyz\" ;',\n", - " allowJavaScript: false,\n", - " disableJ2SLoadMonitor: true,\n", - " }\n", - " // jQuery ready functions\n", - " // is called when page has been completely loaded\n", - " $(document).ready(function() {\n", - " $(\"#jmol_div227\").html(Jmol.getAppletHtml(myJmol227, Info227))\n", - " })\n", - " $(document).ready(function() {\n", - " $(\"#jmol_div12\").html(Jmol.getAppletHtml(myJmol12, Info12))\n", - " })\n", - " $(document).ready(function() {\n", - " $(\"#jmol_div206\").html(Jmol.getAppletHtml(myJmol206, Info206))\n", - " })\n", - " $(document).ready(function() {\n", - " $(\"#jmol_div33\").html(Jmol.getAppletHtml(myJmol33, Info33))\n", - " })\n", - " $(document).ready(function() {\n", - " $(\"#jmol_div194\").html(Jmol.getAppletHtml(myJmol194, Info194))\n", - " })\n", - " $(document).ready(function() {\n", - " $(\"#jmol_div167\").html(Jmol.getAppletHtml(myJmol167, Info167))\n", - " })\n", - " var lastPrompt=0;\n", - " \n", - " \n", - " \n", - " function show_hide_structures()\n", - " {\n", - " var x = document.getElementById(\"geometry_jmol\");\n", - " if (x.style.display === \"none\") \n", - " {\n", - " x.style.display = \"block\";\n", - " } \n", - " else \n", - " {\n", - " x.style.display = \"none\";\n", - " }\n", - " }\n", - " \n", - " </script>\n", - "\n", - "<div>\n", - " <button type=\"button\" style=\"background-color:#f2f2f2;border:#555555;border-radius: 4px;font-size: 16px; width:200px; height:30px\" onclick=\"show_hide_structures()\">Show/hide structures</button>\n", - " <br><br>\n", - "</div>\n", - " \n", - "<div id=\"geometry_jmol\">\n", - " <table>\n", - " <tr>\n", - " <th><center>Spacegroup 12 (C2/m)</center></th>\n", - " <th><center>Spacegroup 33 (Pna2<sub>1</sub>)</center></th>\n", - " <th><center>Spacegroup 167 (R<font style=\"text-decoration: overline\">3</font>c)</center></th>\n", - " </tr>\n", - " <tr>\n", - " <th><div id='jmol_div12'></div></th>\n", - " <th><div id='jmol_div33'></div></th>\n", - " <th><div id='jmol_div167'></div></th>\n", - " </tr>\n", - " \n", - " <tr>\n", - " <th><center>Spacegroup 194 (P6<sub>3</sub>/mmc)</center></th>\n", - " <th><center>Spacegroup 206 (Ia<font style=\"text-decoration: overline\">3</font>)</center></th>\n", - " <th><center>Spacegroup 227 (Fd<font style=\"text-decoration: overline\">3</font>m)</center></th>\n", - " </tr>\n", - " <tr>\n", - " <th><div id='jmol_div194'></div></th>\n", - " <th><div id='jmol_div206'></div></th>\n", - " <th><div id='jmol_div227'></div></th>\n", - " </tr>\n", - " </table>\n", - "</div>\n" - ], - "text/plain": [ - "<IPython.core.display.HTML object>" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "%%html\n", "<!-- CSS Style Inline: -->\n", @@ -776,466 +437,16 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": null, "metadata": { "ExecuteTime": { - "end_time": "2020-02-01T15:46:52.392478Z", - "start_time": "2020-02-01T15:46:52.330622Z" + "end_time": "2021-01-13T18:11:03.355707Z", + "start_time": "2021-01-13T18:11:03.344401Z" }, "init_cell": true, "scrolled": false }, - "outputs": [ - { - "data": { - "text/html": [ - "<br><br><br>\n", - "<font size=\"6.5em\"><b>Make predictions on formation energies and bandgaps</b></font>\n", - "<br><hr><br>\n", - "<font size=\"5em\"><b>Winning representations combined with different regression methods</b></font>\n", - "<br><br>\n", - "<font size = \"3.5em\"> To understand the relative importance of the representation vs. regression model, one can examine the performance of each representation combined with different regression models. \n", - "The hyperparameters are optimized for each representation/regressor combination. </font>\n", - "<br>\n", - "<font size = \"3.5em\" color=\"009FC2\"><br>Warning: the learning algorithm employed in this study (e.g. grid-search) can not guarantee deterministic results. The actual predictions can divergent from the published data.\n", - "</font>\n", - "\n", - "<br><br><br>\n", - "\n", - "<form>\n", - " <font size=\"4em\">Select a representation and a regression method:</font>\n", - " <select id=\"representation\" onclick=\"show_options()\">\n", - " <option value=\"ngram\">n-gram</option>\n", - " <option value=\"soap\">SOAP</option>\n", - " </select>\n", - " <select id=\"regression\" onclick=\"show_options()\">\n", - " <option value=\"krr\">KRR</option>\n", - " <option value=\"nn\">Neural network</option>\n", - " </select>\n", - " \n", - " <button type=\"button\" id=\"button_show_options\" style=\"background-color:#f2f2f2;border:#555555;border-radius: 4px;font-size: 16px; width:150px; height:30px\" onclick=\"show_options()\">More options</button>\n", - " <button type=\"button\" id=\"button_hide_options\" style=\"background-color:#f2f2f2;border:#555555;border-radius: 4px;font-size: 16px; width:150px; height:30px; display: none\" onclick=\"hide_options()\">Less options</button>\n", - "\n", - "</form>\n", - "\n", - "\n", - "\n", - "<script type=\"text/Javascript\">\n", - "\n", - " function show_options()\n", - " {\n", - " representation_value = document.getElementById(\"representation\").value;\n", - " regression_value = document.getElementById(\"regression\").value;\n", - " \n", - " if(representation_value == \"ngram\")\n", - " {\n", - " document.getElementById(\"div_options_soap\").style.display=\"none\";\n", - " document.getElementById(\"div_options_ngram\").style.display=\"\";\n", - " document.getElementById(\"button_show_options\").style.display=\"none\";\n", - " document.getElementById(\"button_hide_options\").style.display=\"\"; \n", - " }\n", - " else if(representation_value == \"soap\")\n", - " {\n", - " document.getElementById(\"div_options_ngram\").style.display=\"none\";\n", - " document.getElementById(\"div_options_soap\").style.display=\"\";\n", - " document.getElementById(\"button_show_options\").style.display=\"none\";\n", - " document.getElementById(\"button_hide_options\").style.display=\"\"; \n", - " }\n", - " if(regression_value == \"nn\")\n", - " {\n", - " document.getElementById(\"div_options_nn\").style.display=\"\"; \n", - " set_nn_default();\n", - " }\n", - " else if(regression_value == \"krr\")\n", - " {\n", - " document.getElementById(\"div_options_nn\").style.display=\"none\"; \n", - " }\n", - " }\n", - " function hide_options()\n", - " {\n", - " document.getElementById(\"button_show_options\").style.display=\"\";\n", - " document.getElementById(\"button_hide_options\").style.display=\"none\";\n", - " document.getElementById(\"div_options_ngram\").style.display=\"none\";\n", - " document.getElementById(\"div_options_soap\").style.display=\"none\";\n", - " document.getElementById(\"div_options_nn\").style.display=\"none\";\n", - " }\n", - " function set_nn_n_neurons()\n", - " {\n", - " document.getElementById(\"div_options_nn_n_neurons\").style.display=\"\";\n", - " n_layers = document.getElementById(\"input_n_nn\").value;\n", - " \n", - " contents = \"<br><font size=\\\"3em\\\">Number of neurons in each hidden layer</font><br>\"\n", - " for(i=0; i<n_layers; i++)\n", - " {\n", - " content_add = \"<font size=\\\"3em\\\"> Layer \" + String(i+1) + \": </font><input\\ type=\\\"number\\\"\\ id=\\\"input_n_neurons_\" + String(i) + \"\\\"\\ value=\\\"256\\\"\\ min=\\\"1\\\"\\ max=\\\"1024\\\"\\ >\";\n", - " contents += content_add;\n", - " if((i+1)%6 ==0)\n", - " {\n", - " contents += \"<br>\"\n", - " }\n", - " }\n", - " //alert(contents);\n", - " document.getElementById(\"div_options_nn_n_neurons\").innerHTML = contents;\n", - " }\n", - " function set_nn_default()\n", - " {\n", - " document.getElementById(\"div_options_nn_n_neurons\").style.display=\"\";\n", - " representation_value = document.getElementById(\"representation\").value;\n", - " if(representation_value == \"ngram\")\n", - " {\n", - " n_layers = 11;\n", - " document.getElementById(\"input_n_nn\").value = n_layers;\n", - " contents = \"<br><font size=\\\"3em\\\">Number of neurons in each hidden layer</font><br>\"\n", - " for(i=0; i<7; i++)\n", - " {\n", - " content_add = \"<font size=\\\"3em\\\"> Layer \" + String(i+1) + \": </font><input\\ type=\\\"number\\\"\\ id=\\\"input_n_neurons_\" + String(i) + \"\\\"\\ value=\\\"100\\\"\\ min=\\\"1\\\"\\ max=\\\"1024\\\"\\ >\";\n", - " contents += content_add;\n", - " if((i+1)%6 ==0)\n", - " {\n", - " contents += \"<br>\"\n", - " }\n", - "\n", - " }\n", - " for(i=7; i<11; i++)\n", - " {\n", - " content_add = \"<font size=\\\"3em\\\"> Layer \" + String(i+1) + \": </font><input\\ type=\\\"number\\\"\\ id=\\\"input_n_neurons_\" + String(i) + \"\\\"\\ value=\\\"50\\\"\\ min=\\\"1\\\"\\ max=\\\"1024\\\"\\ >\";\n", - " contents += content_add;\n", - " if((i+1)%6 ==0)\n", - " {\n", - " contents += \"<br>\"\n", - " }\n", - " }\n", - " }\n", - " else if(representation_value == \"soap\")\n", - " {\n", - " n_layers = 3;\n", - " document.getElementById(\"input_n_nn\").value = n_layers;\n", - " \n", - " contents = \"<br><font size=\\\"3em\\\">Number of neurons in each hidden layer</font><br>\"\n", - " for(i=0; i<1; i++)\n", - " {\n", - " content_add = \"<font size=\\\"3em\\\"> Layer \" + String(i+1) + \": </font><input\\ type=\\\"number\\\"\\ id=\\\"input_n_neurons_\" + String(i) + \"\\\"\\ value=\\\"512\\\"\\ min=\\\"1\\\"\\ max=\\\"1024\\\"\\ >\";\n", - " contents += content_add;\n", - " }\n", - " for(i=1; i<3; i++)\n", - " {\n", - " content_add = \"<font size=\\\"3em\\\"> Layer \" + String(i+1) + \": </font><input\\ type=\\\"number\\\"\\ id=\\\"input_n_neurons_\" + String(i) + \"\\\"\\ value=\\\"256\\\"\\ min=\\\"1\\\"\\ max=\\\"1024\\\"\\ >\";\n", - " contents += content_add;\n", - " }\n", - " }\n", - " \n", - " document.getElementById(\"div_options_nn_n_neurons\").innerHTML = contents;\n", - " save_options_nn();\n", - " \n", - " }\n", - " function save_options_nn()\n", - " {\n", - " n_layers = document.getElementById(\"input_n_nn\").value;\n", - " var n_neurons = [];\n", - " for(i=0; i<n_layers; i++)\n", - " {\n", - " id_i = \"input_n_neurons_\" + String(i);\n", - " n_neurons_i = document.getElementById(id_i).value;\n", - " n_neurons.push(n_neurons_i);\n", - " }\n", - " var command = \"N_nn_neurons = \" + n_neurons + \";\";\n", - " command += \"N_nn_layers = int(\" + n_layers + \");\"\n", - " var kernel = IPython.notebook.kernel;\n", - " kernel.execute(command);\n", - " \n", - " }\n", - " function save_options()\n", - " {\n", - " representation_value = document.getElementById(\"representation\").value;\n", - " regression_value = document.getElementById(\"regression\").value;\n", - " \n", - " if(representation_value == \"ngram\")\n", - " {\n", - " ngram_N = document.getElementById(\"input_ngram_N\").value;\n", - " n_threads = document.getElementById(\"input_n_threads\").value;\n", - " var command = \"ngram_N = int(\" + ngram_N + \"); n_threads = int(\" + n_threads + \");\" \n", - " }\n", - " if(representation_value == \"soap\")\n", - " {\n", - " soap_cutoff = document.getElementById(\"input_soap_cutoff\").value;\n", - " soap_lmax = document.getElementById(\"input_soap_lmax\").value;\n", - " soap_nmax = document.getElementById(\"input_soap_nmax\").value;\n", - " n_threads = document.getElementById(\"input_n_threads\").value;\n", - " var command = \"soap_cutoff = int(\" + soap_cutoff + \");\";\n", - " command += \"soap_lmax = int(\" + soap_lmax + \");\"; \n", - " command += \"soap_nmax = int(\" + soap_nmax + \");\";\n", - " command += \"n_threads = int(\" + n_threads + \");\";\n", - " }\n", - " \n", - " console.log(\"Executing Command: \" + command); \n", - " var kernel = IPython.notebook.kernel;\n", - " kernel.execute(command);\n", - " }\n", - "\n", - "</script>\n", - "\n", - "<div id=\"div_options_ngram\" style=\"display: none; position:relative; left:5%\">\n", - " <br>\n", - " <font size=\"3em\">N-grams size (N = 1 ~ 4)</font> <input type=\"number\" id=\"input_ngram_N\" value=\"3\" min=\"1\" max=\"4\" >\n", - " <br>\n", - " <font size=\"3em\">Number of threads used in regression (N = 1 ~ 4)</font> <input type=\"number\" id=\"input_n_threads\" value=\"4\" min=\"1\" max=\"4\" >\n", - " <br>\n", - " <button type=\"button\" id=\"save_options_ngram\" style=\"background-color:#f2f2f2;border:#555555;border-radius: 4px;font-size: 16px; width:150px; height:30px;\" onclick=\"save_options()\">Save options</button>\n", - "</div>\n", - "\n", - "<div id=\"div_options_soap\" style=\"display: none; position:relative; left:5%\">\n", - " <br>\n", - " <font size=\"3em\">SOAP cutoff (Ang) </font> <input type=\"number\" id=\"input_soap_cutoff\" value=\"10\" min=\"1\" max=\"25\">\n", - " <font size=\"3em\">Max. l </font><input type=\"number\" id=\"input_soap_lmax\" value=\"4\" min=\"1\" max=\"8\" >\n", - " <font size=\"3em\">Max. n </font><input type=\"number\" id=\"input_soap_nmax\" value=\"4\" min=\"1\" max=\"8\" >\n", - " <br>\n", - " <font size=\"3em\">Number of threads used in regression (N = 1 ~ 4)</font> <input type=\"number\" id=\"input_n_threads\" value=\"4\" min=\"1\" max=\"4\" >\n", - " <br>\n", - " <button type=\"button\" id=\"save_options_soap\" style=\"background-color:#f2f2f2;border:#555555;border-radius: 4px;font-size: 16px; width:150px; height:30px;\" onclick=\"save_options()\">Save options</button>\n", - "</div>\n", - "<br>\n", - "<div id=\"div_options_nn\" style=\"display: none; position:relative; left:5%\">\n", - " <br>\n", - " <font size=\"3em\">Number of (linear) hidden layers </font> <input type=\"number\" id=\"input_n_nn\" value=\"2\" min=\"1\" max=\"20\">\n", - " <button type=\"button\" id=\"set_n_neurons\" style=\"background-color:#f2f2f2;border:#555555;border-radius: 4px;font-size: 16px; width:250px; height:30px;\" onclick=\"set_nn_n_neurons()\">Set the number of neurons</button>\n", - " \n", - " <div id=\"div_options_nn_n_neurons\" style=\"display: none; position:relative; left:0%\">\n", - " <br>\n", - " <font size=\"3em\">Number of neurons in each hidden layer</font>\n", - " </div>\n", - " <br>\n", - " <button type=\"button\" id=\"set_nn_default\" style=\"background-color:#f2f2f2;border:#555555;border-radius: 4px;font-size: 16px; width:200px; height:30px;\" onclick=\"set_nn_default()\">Use default configuration</button>\n", - " <button type=\"button\" id=\"save_options_nn\" style=\"background-color:#f2f2f2;border:#555555;border-radius: 4px;font-size: 16px; width:150px; height:30px;\" onclick=\"save_options_nn()\">Save options</button>\n", - "</div>\n", - "\n", - "<div id=\"demoa\"></div>\n", - "<br><br>\n", - "\n", - "<button type=\"button\" id=\"button_show_sample_output\" style=\"background-color:#f2f2f2;border:#555555;border-radius: 4px;font-size: 16px; width:200px; height:30px\" onclick=\"show_sample_output()\">Show sample outputs</button>\n", - "<button type=\"button\" id=\"button_hide_sample_output\" style=\"background-color:#f2f2f2;border:#555555;border-radius: 4px;font-size: 16px; width:200px; height:30px; display: none\" onclick=\"hide_sample_output()\">Hide sample outputs</button>\n", - "\n", - "<button type=\"button\" style=\"background-color:#f2f2f2;border:#555555;border-radius: 4px;font-size: 16px; width:150px; height:30px\" onclick=\"get_repr_regr_combination()\">Get predictions</button>\n", - "<input type=\"checkbox\" id=\"if_learning_curve\" value=\"learning_curve\" onclick=show_warning_learning_curve()> Show learning curve\n", - "\n", - "<input type=\"checkbox\" id=\"if_use_prestored\" value=\"use_prestored\" onclick=show_warning_use_prestored_model() checked=true> Use prestored models\n", - "<div id=\"warning_learning_curve\" style=\"display: none\">\n", - " <br>\n", - " <font color=\"009FC2\">\n", - " Warning: It can be very time-consuming (5~20 min/point, depending on method, model size, and number of threads employed) when getting a learning curve.\n", - " </font>\n", - " <p><font size=\"3em\">Number of points in learning curve: <input type=\"number\" id=\"N_learning_curve\" value=\"4\" min=\"1\" max=\"25\" style=\"display:none\"></font></p>\n", - "</div>\n", - "<div id=\"warning_use_prestored\" style=\"display: none\">\n", - " <br>\n", - " <font color=\"009FC2\">\n", - " Warning: It can be very time-consuming (10~20 min, depending on method, model size, and number of threads employed) to generate models on the fly.\n", - " </font>\n", - "</div>\n", - "\n", - "\n", - "<div id=\"sample_output\" style=\"display: none\">\n", - " <br><br>\n", - " <font size=\"4em\"><b> Sample outputs of predictions on formation energy and bandgap</b></font>\n", - " <br><br>\n", - " <font size=\"3em\">Representation: N-grams<br><br>Regression method: Kernel-ridge regression (KRR)</font>\n", - " <br><br>\n", - " <img src=\"assets/kaggle_competition/results-ngram-krr.png\" width=\"60%\">\n", - " <br><br>\n", - " \n", - "</div>\n", - "\n", - "<style>\n", - "* {\n", - " box-sizing: border-box;\n", - "}\n", - "\n", - ".column {\n", - " float: left;\n", - " width: 33.33%;\n", - " padding: 5px;\n", - "}\n", - "\n", - "/* Clearfix (clear floats) */\n", - ".row::after {\n", - " content: \"\";\n", - " clear: both;\n", - " display: table;\n", - "}\n", - "</style>\n", - "\n", - "<div id=\"sample_learning_curve\" style=\"display: none\">\n", - " <font size=\"4em\"><b> Sample outputs of learning curves for formation energy and bandgap predictions</b></font>\n", - " <br><br>\n", - " <font size=\"3em\">Representation: N-gram<br><br>Regression method: Kernel-ridge regression (KRR)</font>\n", - " <br><br>\n", - " <div style=\"display: table; width: 95%\"> \n", - " <div class=\"row\">\n", - " <div class=\"column\">\n", - " <img src=\"./imgs/learning_curve-formation-ngram-krr.png\" style=\"width:100%\">\n", - " </div>\n", - " <div class=\"column\">\n", - " <img src=\"./imgs/learning_curve-bandgap-ngram-krr.png\" style=\"width:100%\">\n", - " </div>\n", - " </div>\n", - " </div>\n", - " <br><br>\n", - "</div>\n", - "\n", - "<script type=\"text/Javascript\">\n", - "\n", - " window.findCellIndicesByTag = function findCellIndicesByTag(tagName) {\n", - " return (Jupyter.notebook.get_cells()\n", - " .filter(\n", - " ({metadata: {tags}}) => tags && tags.includes(tagName)\n", - " )\n", - " .map((cell) => Jupyter.notebook.find_cell_index(cell))\n", - " );\n", - " };\n", - "\n", - "\n", - " window.runCells = function runPlotCells(tags) {\n", - " var c = window.findCellIndicesByTag(tags);\n", - " Jupyter.notebook.execute_cells(c);\n", - " };\n", - " \n", - " function show_sample_output()\n", - " {\n", - " document.getElementById(\"button_show_sample_output\").style.display=\"none\";\n", - " document.getElementById(\"button_hide_sample_output\").style.display=\"\";\n", - " \n", - " var check_learning_curve = document.getElementById(\"if_learning_curve\");\n", - " var learning_curve_output = document.getElementById(\"sample_learning_curve\");\n", - " var output = document.getElementById(\"sample_output\");\n", - " //var representation_value = document.getElementById(\"representation\").value;\n", - " //var regression_value = document.getElementById(\"regression\").value;\n", - " \n", - " output.style.display = \"block\";\n", - " \n", - " \n", - " \n", - " \n", - " if (check_learning_curve.checked == true)\n", - " {\n", - " learning_curve_output.style.display = \"block\"; \n", - " } \n", - " else \n", - " {\n", - " learning_curve_output.style.display = \"none\";\n", - " }\n", - " }\n", - "\n", - " function hide_sample_output()\n", - " {\n", - " document.getElementById(\"button_show_sample_output\").style.display=\"\";\n", - " document.getElementById(\"button_hide_sample_output\").style.display=\"none\";\n", - "\n", - " var learning_curve_output = document.getElementById(\"sample_learning_curve\");\n", - " var output = document.getElementById(\"sample_output\");\n", - "\n", - " output.style.display = \"none\"\n", - " learning_curve_output.style.display = \"none\";\n", - " }\n", - " \n", - " function show_warning_learning_curve()\n", - " {\n", - " var checkBox = document.getElementById(\"if_learning_curve\");\n", - " learning_curve_output = document.getElementById(\"sample_learning_curve\");\n", - " var warning = document.getElementById(\"warning_learning_curve\");\n", - " if (checkBox.checked == true)\n", - " {\n", - " warning.style.display = \"block\";\n", - " document.getElementById(\"N_learning_curve\").style.display=\"\";\n", - " var N_learning_curve = document.getElementById(\"N_learning_curve\").value;\n", - " var command = \" N_learning_curve = int(\" + N_learning_curve + \");\";\n", - " var kernel = IPython.notebook.kernel;\n", - " kernel.execute(command);\n", - " \n", - " } \n", - " else \n", - " {\n", - " warning.style.display = \"none\";\n", - " document.getElementById(\"N_learning_curve\").style.display=\"none\";\n", - " }\n", - " \n", - " }\n", - " \n", - " function show_warning_use_prestored_model()\n", - " {\n", - " var checkBox = document.getElementById(\"if_use_prestored\");\n", - " var warning = document.getElementById(\"warning_use_prestored\");\n", - " if (checkBox.checked == true)\n", - " {\n", - " warning.style.display = \"none\";\n", - " \n", - " } \n", - " else \n", - " {\n", - " warning.style.display = \"block\";\n", - " }\n", - " if_use_prestored = document.getElementById(\"if_use_prestored\").checked;\n", - " var command = \" if_use_prestored_models = '\" + if_use_prestored.toString() + \"';\";\n", - " var kernel = IPython.notebook.kernel;\n", - " kernel.execute(command);\n", - " }\n", - "\n", - " function get_repr_regr_combination()\n", - " {\n", - " ngram_N = document.getElementById(\"input_ngram_N\").value;\n", - " n_threads = document.getElementById(\"input_n_threads\").value;\n", - " soap_cutoff = document.getElementById(\"input_soap_cutoff\").value;\n", - " soap_lmax = document.getElementById(\"input_soap_lmax\").value;\n", - " soap_nmax = document.getElementById(\"input_soap_nmax\").value;\n", - " \n", - "\n", - " \n", - " var command = \"ngram_N = int(\" + ngram_N + \"); n_threads = int(\" + n_threads + \");\" \n", - " + \" soap_cutoff = int(\" + soap_cutoff + \");\" \n", - " + \" soap_lmax = int(\" + soap_lmax + \");\" \n", - " + \" soap_nmax = int(\" + soap_nmax + \");\" ;\n", - " console.log(\"Executing Command: \" + command); \n", - " var kernel = IPython.notebook.kernel;\n", - " kernel.execute(command);\n", - " \n", - " \n", - " representation_value = document.getElementById(\"representation\").value;\n", - " regression_value = document.getElementById(\"regression\").value;\n", - " if_learning_curve = document.getElementById(\"if_learning_curve\").checked;\n", - " if_use_prestored = document.getElementById(\"if_use_prestored\").checked;\n", - " N_learning_curve = document.getElementById(\"N_learning_curve\").value;\n", - " \n", - " var command = \" representation = '\" + representation_value + \n", - " \"'; regression = '\" + regression_value + \"';\" + \" if_learningcurve = '\" + if_learning_curve.toString() + \"';\" \n", - " + \" if_use_prestored_models = '\" + if_use_prestored.toString() + \"';\"\n", - " + \" N_learning_curve = int(\" + N_learning_curve + \");\";\n", - " \n", - " console.log(\"Executing Command: \" + command);\n", - " \n", - " var kernel = IPython.notebook.kernel;\n", - " kernel.execute(command);\n", - "\n", - " Jupyter.notebook.execute_cells(window.findCellIndicesByTag('ngram'));//# N-gram descriptor\n", - " Jupyter.notebook.execute_cells(window.findCellIndicesByTag('soap'));//# SOAP descriptors\n", - " Jupyter.notebook.execute_cells(window.findCellIndicesByTag('krr-ngram'));//# KRR\n", - " Jupyter.notebook.execute_cells(window.findCellIndicesByTag('init')); \n", - " Jupyter.notebook.execute_cells(window.findCellIndicesByTag('init_read_data')); \n", - " Jupyter.notebook.execute_cells(window.findCellIndicesByTag('plot'));\n", - " Jupyter.notebook.execute_cells(window.findCellIndicesByTag('main'));//# Read and predict \n", - " \n", - " }\n", - " \n", - " \n", - "\n", - "</script>\n", - "\n" - ], - "text/plain": [ - "<IPython.core.display.HTML object>" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "%%HTML\n", "<br><br><br>\n", @@ -1680,11 +891,11 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": null, "metadata": { "ExecuteTime": { - "end_time": "2020-02-01T15:46:52.418689Z", - "start_time": "2020-02-01T15:46:52.403641Z" + "end_time": "2021-01-13T18:11:04.110973Z", + "start_time": "2021-01-13T18:11:03.357061Z" }, "init_cell": true, "tags": [ @@ -3374,6 +2585,7 @@ } ], "metadata": { + "celltoolbar": "Initialization Cell", "kernelspec": { "display_name": "Python 3", "language": "python", @@ -3389,7 +2601,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.3" + "version": "3.7.9" }, "toc": { "base_numbering": 1,