diff --git a/.local/share/jupyter/runtime/jpserver-7-open.html b/.local/share/jupyter/runtime/jpserver-7-open.html
deleted file mode 100644
index 7007cbd78eeda20cf68e6eb48df8c4370e5001d2..0000000000000000000000000000000000000000
--- a/.local/share/jupyter/runtime/jpserver-7-open.html
+++ /dev/null
@@ -1,17 +0,0 @@
-
-<!DOCTYPE html>
-<html lang="en">
-<head>
-    <meta charset="UTF-8">
-    <meta http-equiv="refresh" content="1;url=http://14647573e0b2:8888/lab?token=cc8d44f7374bbea52018b5792ba766996cabdb096ce54d78" />
-    <title>Opening Jupyter Application</title>
-</head>
-<body>
-
-<p>
-    This page should redirect you to a Jupyter application. If it doesn't,
-    <a href="http://14647573e0b2:8888/lab?token=cc8d44f7374bbea52018b5792ba766996cabdb096ce54d78">click here to go to Jupyter</a>.
-</p>
-
-</body>
-</html>
\ No newline at end of file
diff --git a/.local/share/jupyter/runtime/jpserver-7.json b/.local/share/jupyter/runtime/jpserver-7.json
deleted file mode 100644
index 74d167330698dbf4000cb460e4237ccb6ff97adb..0000000000000000000000000000000000000000
--- a/.local/share/jupyter/runtime/jpserver-7.json
+++ /dev/null
@@ -1,13 +0,0 @@
-{
-  "base_url": "/",
-  "hostname": "0.0.0.0",
-  "password": false,
-  "pid": 7,
-  "port": 8888,
-  "root_dir": "/home/jovyan",
-  "secure": false,
-  "sock": "",
-  "token": "cc8d44f7374bbea52018b5792ba766996cabdb096ce54d78",
-  "url": "http://14647573e0b2:8888/",
-  "version": "1.19.1"
-}
\ No newline at end of file
diff --git a/.local/share/jupyter/runtime/kernel-838ee26a-75b5-4144-bb04-62a3a355f610.json b/.local/share/jupyter/runtime/kernel-838ee26a-75b5-4144-bb04-62a3a355f610.json
deleted file mode 100644
index 19338b82c754214db91b34b909a572309a17fea7..0000000000000000000000000000000000000000
--- a/.local/share/jupyter/runtime/kernel-838ee26a-75b5-4144-bb04-62a3a355f610.json
+++ /dev/null
@@ -1,12 +0,0 @@
-{
-  "shell_port": 47033,
-  "iopub_port": 44389,
-  "stdin_port": 38097,
-  "control_port": 60831,
-  "hb_port": 56101,
-  "ip": "127.0.0.1",
-  "key": "e408025f-76dc26aa2d2a3dc3881f31a1",
-  "transport": "tcp",
-  "signature_scheme": "hmac-sha256",
-  "kernel_name": ""
-}
\ No newline at end of file
diff --git a/.local/share/jupyter/runtime/kernel-9f94ad2e-8ce1-41bd-bf37-cbfe8b5524c0.json b/.local/share/jupyter/runtime/kernel-9f94ad2e-8ce1-41bd-bf37-cbfe8b5524c0.json
deleted file mode 100644
index f9e090d6733d366dc1d5e49b8b331e97fe961168..0000000000000000000000000000000000000000
--- a/.local/share/jupyter/runtime/kernel-9f94ad2e-8ce1-41bd-bf37-cbfe8b5524c0.json
+++ /dev/null
@@ -1,12 +0,0 @@
-{
-  "shell_port": 54461,
-  "iopub_port": 59251,
-  "stdin_port": 57297,
-  "control_port": 48283,
-  "hb_port": 38341,
-  "ip": "127.0.0.1",
-  "key": "a21f6436-6992a3489d1e025953ec5c65",
-  "transport": "tcp",
-  "signature_scheme": "hmac-sha256",
-  "kernel_name": ""
-}
\ No newline at end of file
diff --git a/Dockerfile b/Dockerfile
index 23ff0686d9ca695fccda8a12c71bf24f225ee10e..9e701e1dfb38b1a0e05008f6f5ad2127a4b8e4ab 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -114,7 +114,5 @@ RUN mkdir build \
 USER ${NB_UID}
 WORKDIR "${HOME}"
 
-COPY --chown=${NB_UID}:${NB_GID} notebook ./notebook
-# COPY --chown=${NB_UID}:${NB_GID} assets/ assets/
-# COPY --chown=${NB_UID}:${NB_GID} compressed_sensing.ipynb .
+COPY --chown=${NB_UID}:${NB_GID} notebook ./
 
diff --git a/README.md b/README.md
index dce3c2032fc46cfead4242dc0f8550af9f50f3db..c8c8378e0417354c20076058823bbfd5f1729277 100644
--- a/README.md
+++ b/README.md
@@ -1,5 +1,7 @@
 # tutorial-catalysis-MRS2021
-
 ```bash
-docker run --rm -it -v "$PWD":/home/jovyan/ -p 8888:8888/tcp gitlab-registry.mpcdf.mpg.de/nomad-lab/ai-toolkit/tutorial-catalysis-MRS2021
+docker build -t gitlab-registry.mpcdf.mpg.de/nomad-lab/ai-toolkit/tutorial-catalysis-mrs2021 .
+```
+```bash
+docker run --rm -it -v "$PWD":/home/jovyan/work/ -p 8888:8888/tcp gitlab-registry.mpcdf.mpg.de/nomad-lab/ai-toolkit/tutorial-catalysis-mrs2021
 ```
\ No newline at end of file
diff --git a/notebook/catalysis_MRS2021.ipynb b/notebook/catalysis_MRS2021.ipynb
index 70508a59834d5dcbfe2c8a3a7420e51cf8a850e4..131bba5572519fdf7f51b5eac393792a6d71de1e 100644
--- a/notebook/catalysis_MRS2021.ipynb
+++ b/notebook/catalysis_MRS2021.ipynb
@@ -32,7 +32,7 @@
     "    <img style=\"float: left; margin: 15px 20px 0px 0px;\" src=\"assets/logos/nomad-infrastructure.svg\" width=\"120\">\n",
     "    <img style=\"float: left; margin: 5px 20px 0px 10px;\" src=\"assets/logos/mpcdf.svg\" width=\"270\">\n",
     "</div>\n",
-    "<p style=\"text-align: right; padding: 0px 10px 10px 0px;\">[Last updated: June, 2024]</p>"
+    "<p style=\"text-align: right; padding: 0px 10px 10px 0px;\">[Last updated: June, 2024]</p>\n"
    ]
   },
   {
@@ -59,16 +59,15 @@
     "R. Ouyang, E. Ahmetcik, C. Carbogno, M. Scheffler, L. M. Ghiringhelli: <span style=\"font-style: italic;\">Simultaneous learning of several materialsproperties from incomplete databases with multi-task SISSO</span>, J. Phys. Mater. 2 024002 (2019) <a href=\"https://iopscience.iop.org/article/10.1088/2515-7639/ab077b/pdf\" target=\"_blank\">[PDF]</a> .\n",
     "</div>\n",
     "\n",
-    "\n",
     "# Reactivity in heterogeneous catalysis: an example of a complex materials function\n",
     "\n",
-    "Heterogeneous catalysis is governed by an intricate interplay of multiple processes occurring at different time and length scales. Some of these processes are the surface chemical reactions, the dynamic restructuring of the material at reaction conditions and the transport of reactants and products. Because of such complexity, modelling the full catalytic progression via first-principles-based methods is often unfeasible. While experiments, e.g., spectroscopy under reaction conditions, provide information on how the catalysts work, these investigations focus on specific materials and conditions. It is thus unclear how to derive quantitative and general relationships between the catalytic performance on one side, and the materials physicochemical properties and reaction conditions on the other. In this tutorial, we apply artificial-intelligence to an experimental dataset and identify models for the measured catalyst performance as well as the key descriptive parameters reflecting the processes that trigger, facilitate, or hinder catalyst performance. In analogy with biology, these parameters are called \"materials genes\" of heterogeneous catalysis. We analyze here the selective propane oxidation over vanadium-based catalysts as an example of a complex chemical reaction. \n",
+    "Heterogeneous catalysis is governed by an intricate interplay of multiple processes occurring at different time and length scales. Some of these processes are the surface chemical reactions, the dynamic restructuring of the material at reaction conditions and the transport of reactants and products. Because of such complexity, modelling the full catalytic progression via first-principles-based methods is often unfeasible. While experiments, e.g., spectroscopy under reaction conditions, provide information on how the catalysts work, these investigations focus on specific materials and conditions. It is thus unclear how to derive quantitative and general relationships between the catalytic performance on one side, and the materials physicochemical properties and reaction conditions on the other. In this tutorial, we apply artificial-intelligence to an experimental dataset and identify models for the measured catalyst performance as well as the key descriptive parameters reflecting the processes that trigger, facilitate, or hinder catalyst performance. In analogy with biology, these parameters are called \"materials genes\" of heterogeneous catalysis. We analyze here the selective propane oxidation over vanadium-based catalysts as an example of a complex chemical reaction.\n",
     "\n",
     "# Sure Independent Screening and Sparsifying Operator (SISSO)\n",
     "\n",
-    "The sure-independence-screening-and-sparsifying-operator (SISSO) approach used here combines a symbolic-regression-based feature construction with compressed sensing for the identification of analytical expressions relating the input variables, called primary features, and the target of interest, based on data. In the present example, the catalytic performance is our target. (i) The algorithm starts with a feature construction step, in which the primary features are systematically combined by the exhaustive application of mathematical operators (e.g., addition, multiplication, exponential, square root). As a result of this step, an extremely large number of candidate expressions are generated. In the application show in this tutorial, the candidate descriptor space contains billions of elements. (ii) The candidate expressions are then selected according to their fit to the target using compressed sensing. Among all generated candidate expressions, SISSO selects a low number, typically lower than 5, which combined by means of fitted weighting coefficients, give a good fit to the target for the materials in the training set.  \n",
+    "The sure-independence-screening-and-sparsifying-operator (SISSO) approach used here combines a symbolic-regression-based feature construction with compressed sensing for the identification of analytical expressions relating the input variables, called primary features, and the target of interest, based on data. In the present example, the catalytic performance is our target. (i) The algorithm starts with a feature construction step, in which the primary features are systematically combined by the exhaustive application of mathematical operators (e.g., addition, multiplication, exponential, square root). As a result of this step, an extremely large number of candidate expressions are generated. In the application show in this tutorial, the candidate descriptor space contains billions of elements. (ii) The candidate expressions are then selected according to their fit to the target using compressed sensing. Among all generated candidate expressions, SISSO selects a low number, typically lower than 5, which combined by means of fitted weighting coefficients, give a good fit to the target for the materials in the training set.\n",
     "\n",
-    "For futher details, we refer to the references above. We also note that a dedicated notebook discussing compressed sensing methods for feature selection including SISSO is available in the NOMAD AI-toolkit."
+    "For futher details, we refer to the references above. We also note that a dedicated notebook discussing compressed sensing methods for feature selection including SISSO is available in the NOMAD AI-toolkit.\n"
    ]
   },
   {
@@ -77,7 +76,7 @@
    "source": [
     "# Import required modules and dataset\n",
     "\n",
-    "We start by loading the modules required for this tutorial as well as the data set. "
+    "We start by loading the modules required for this tutorial as well as the data set.\n"
    ]
   },
   {
@@ -92,15 +91,16 @@
    },
    "outputs": [],
    "source": [
-    "import pandas as pd\n",
-    "import numpy as np\n",
-    "import matplotlib\n",
-    "import matplotlib.pyplot as plt\n",
-    "import itertools\n",
     "import os\n",
     "import json\n",
-    "from scipy.interpolate import make_interp_spline, BSpline\n",
-    "# plt.rcParams.update({'font.size': 20})"
+    "import itertools\n",
+    "\n",
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "import matplotlib.pyplot as plt\n",
+    "\n",
+    "from pathlib import Path\n",
+    "from scipy.interpolate import make_interp_spline"
    ]
   },
   {
@@ -117,9 +117,9 @@
     " <li>surface properties from (near-ambient-pressure) x-ray photoelectron spectroscopy ((NAP-)XPS) </li>\n",
     " <li>oxygen uptake (oxidation ability) from temperature-programmed reduction/oxidation (TPRO) </li>\n",
     " <li>electrical conductivity from microwave cavity perturbation technique (MCPT) </li>\n",
-    "</div> \n",
+    "</div>\n",
     "\n",
-    "The values of each of these primary features for each material in the data set are shown below."
+    "The values of each of these primary features for each material in the data set are shown below.\n"
    ]
   },
   {
@@ -133,14 +133,81 @@
    },
    "outputs": [],
    "source": [
-    "pd.read_csv('data/primary_features.csv').set_index('material')"
+    "pd.read_csv(\"data/primary_features.csv\").set_index(\"material\")"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "The targets used here correspond to the acrylic acid selectivity and the propane conversion measured at different temperatures. The values of these two materials functions at each measured temperature for each catalyst are shown below."
+    "The targets used here correspond to the acrylic acid selectivity and the propane conversion measured at different temperatures. The values of these two materials functions at each measured temperature for each catalyst are shown below.\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df = pd.read_csv(\"data/data_propane.csv\").set_index(\"material\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "materials = [\n",
+    "    \"MoVOx\",\n",
+    "    \"MoVTeNbOx\",\n",
+    "    \"V2O5\",\n",
+    "    \"VPP\",\n",
+    "    \"A-VPP\",\n",
+    "    \"B-VOPO4\",\n",
+    "    \"A-VWOPO4\",\n",
+    "    \"A-VOPO4\",\n",
+    "    \"VWPOx\"\n",
+    "]\n",
+    "\n",
+    "\n",
+    "labels = {\n",
+    "    \"MoVOx\": \"MoVO$_x$\",\n",
+    "    \"MoVTeNbOx\": \"MoVTeNbO$_x$\",\n",
+    "    \"V2O5\": \"V$_2$O$_5$\",\n",
+    "    \"VPP\": \"VPP\",\n",
+    "    \"A-VPP\": \"a-VPP\",\n",
+    "    \"B-VOPO4\": r\"$\\beta$-VOPO$_4$\",\n",
+    "    \"A-VWOPO4\": r\"$\\alpha$-VWOPO$_4$\",\n",
+    "    \"A-VOPO4\": r\"$\\alpha$-VOPO$_4$\",\n",
+    "    \"VWPOx\": \"VWPO$_x$\"\n",
+    "}\n",
+    "\n",
+    "\n",
+    "colors = {\n",
+    "    \"MoVOx\": \"magenta\",\n",
+    "    \"MoVTeNbOx\": \"green\",\n",
+    "    \"V2O5\": \"red\",\n",
+    "    \"VPP\": \"darkorange\",\n",
+    "    \"A-VPP\": \"gold\",\n",
+    "    \"B-VOPO4\": \"blue\",\n",
+    "    \"A-VWOPO4\": \"darkcyan\",\n",
+    "    \"A-VOPO4\": \"mediumpurple\",\n",
+    "    \"VWPOx\": \"cyan\"\n",
+    "}\n",
+    "\n",
+    "markers = {\n",
+    "    \"MoVOx\": \"o\",\n",
+    "    \"MoVTeNbOx\": \"o\",\n",
+    "    \"V2O5\": \"s\",\n",
+    "    \"VPP\": \"D\",\n",
+    "    \"A-VPP\": \"D\",\n",
+    "    \"B-VOPO4\": \">\",\n",
+    "    \"A-VWOPO4\": \"^\",\n",
+    "    \"A-VOPO4\": \"^\",\n",
+    "    \"VWPOx\": \"<\"\n",
+    "}\n",
+    "\n"
    ]
   },
   {
@@ -154,34 +221,20 @@
    },
    "outputs": [],
    "source": [
-    "df=pd.read_csv('data/data_propane.csv').set_index('material')\n",
-    "\n",
-    "T=df.loc['MoVTeNbOx']['T (C)']\n",
-    "T_red=df.loc['MoVOx']['T (C)']\n",
-    "fig, (ax1,ax2) = plt.subplots(1,2, constrained_layout=True, figsize=(14,5))\n",
-    "ax1.scatter(T_red, df.loc['MoVOx']['s_acrylic_acid (p)'], c='magenta', marker=\"o\", s=80, label='MoVO$_x$')\n",
-    "ax1.scatter(T, df.loc['MoVTeNbOx']['s_acrylic_acid (p)'], c='green', marker=\"o\",s=80, label='MoVTeNbO$_x$')\n",
-    "ax1.scatter(T, df.loc['V2O5']['s_acrylic_acid (p)'],  c='red', marker=\"s\", s=80, label='V$_2$O$_5$')\n",
-    "ax1.scatter(T, df.loc['VPP']['s_acrylic_acid (p)'], c='darkorange', marker=\"D\", s=80, label='VPP')\n",
-    "ax1.scatter(T, df.loc['A-VPP']['s_acrylic_acid (p)'],  c='gold', marker=\"D\", s=80, label='a-VPP')\n",
-    "ax1.scatter(T, df.loc['B-VOPO4']['s_acrylic_acid (p)'],  c='blue', marker=\">\", label=r'$\\beta$-VOPO$_4$',s=80)\n",
-    "ax1.scatter(T, df.loc['A-VWOPO4']['s_acrylic_acid (p)'],  c='darkcyan', marker=\"^\",label=r'$\\alpha$-VWOPO$_4$',s=80)\n",
-    "ax1.scatter(T, df.loc['A-VOPO4']['s_acrylic_acid (p)'], c='mediumpurple', marker=\"^\", label=r'$\\alpha$-VOPO$_4$',s=80)\n",
-    "ax1.scatter(T, df.loc['VWPOx']['s_acrylic_acid (p)'], c='cyan', marker=\"<\", label='VWPO$_x$',s=80)\n",
-    "ax2.scatter(T_red, df.loc['MoVOx']['x (p)'], c='magenta', marker=\"o\", s=80, label='MoVO$_x$')\n",
-    "ax2.scatter(T, df.loc['MoVTeNbOx']['x (p)'], c='green', marker=\"o\",s=80, label='MoVTeNbO$_x$')\n",
-    "ax2.scatter(T, df.loc['V2O5']['x (p)'],  c='red', marker=\"s\", s=80, label='V$_2$O$_5$')\n",
-    "ax2.scatter(T, df.loc['VPP']['x (p)'], c='darkorange', marker=\"D\", s=80, label='VPP')\n",
-    "ax2.scatter(T, df.loc['A-VPP']['x (p)'],  c='gold', marker=\"D\", s=80, label='a-VPP')\n",
-    "ax2.scatter(T, df.loc['B-VOPO4']['x (p)'],  c='blue', marker=\">\", label=r'$\\beta$-VOPO$_4$',s=80)\n",
-    "ax2.scatter(T, df.loc['A-VWOPO4']['x (p)'],  c='darkcyan', marker=\"^\",label=r'$\\alpha$-VWOPO$_4$',s=80)\n",
-    "ax2.scatter(T, df.loc['A-VOPO4']['x (p)'], c='mediumpurple', marker=\"^\", label=r'$\\alpha$-VOPO$_4$',s=80)\n",
-    "ax2.scatter(T, df.loc['VWPOx']['x (p)'], c='cyan', marker=\"<\", label='VWPO$_x$',s=80)\n",
-    "ax2.legend(bbox_to_anchor=(1,1),fontsize=18)\n",
-    "ax1.set_ylabel('$S_{\\mathrm{acrylic \\: acid}}$ (%)')\n",
-    "ax1.set_xlabel('Temperature ($\\degree$C)')\n",
-    "ax2.set_ylabel('$X_{\\mathrm{propane}}$ (%)')\n",
-    "ax2.set_xlabel('Temperature ($\\degree$C)')"
+    "fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(9, 5))\n",
+    "\n",
+    "for m_id in materials:\n",
+    "    m = df.loc[m_id]\n",
+    "    ax1.scatter(m[\"T (C)\"], m[\"s_acrylic_acid (p)\"], c=colors[m_id], marker=markers[m_id], label=labels[m_id])\n",
+    "    ax2.scatter(m[\"T (C)\"], m[\"x (p)\"], c=colors[m_id], marker=markers[m_id], label=labels[m_id])\n",
+    "\n",
+    "ax1.set_ylabel(\"$S_{\\mathrm{acrylic \\: acid}}$ (%)\")\n",
+    "ax1.set_xlabel(\"Temperature ($\\degree$C)\")\n",
+    "ax2.set_ylabel(\"$X_{\\mathrm{propane}}$ (%)\")\n",
+    "ax2.set_xlabel(\"Temperature ($\\degree$C)\")\n",
+    "ax2.legend(bbox_to_anchor=(1, 1))\n",
+    "\n",
+    "fig.show()"
    ]
   },
   {
@@ -192,7 +245,7 @@
    "source": [
     "# Write SISSO input files and run the descriptor identification\n",
     "\n",
-    "We start by writing the input files for SISSO and executing the algorithm. We note that a leave-one-material-out cross-validation scheme is used here. Additionally, the features are pre-selected for the identification of expressions using symbolic regression tree depths of 3 (rung=3). This is to keep the candidate descriptor space tractable. "
+    "We start by writing the input files for SISSO and executing the algorithm. We note that a leave-one-material-out cross-validation scheme is used here. Additionally, the features are pre-selected for the identification of expressions using symbolic regression tree depths of 3 (rung=3). This is to keep the candidate descriptor space tractable.\n"
    ]
   },
   {
@@ -206,7 +259,7 @@
    },
    "outputs": [],
    "source": [
-    "def write_sisso(train_df,rung,ops,leave_out_ind,prop_key,path):\n",
+    "def write_sisso(train_df, rung, ops, leave_out_ind, prop_key, path):\n",
     "    \"\"\"\n",
     "    creates the directory and writes the input files (sisso.json and train.dat) for cpp-sisso\n",
     "    arguments: train_df (pd dataframe): dataframe containing primary features and target values\n",
@@ -215,137 +268,155 @@
     "               ops(list): list of mathematical operators (str)\n",
     "               leave_out_ind (list): indicies of materials to be leaft-out\n",
     "               prop_key(str): target column name\n",
-    "               path(str): path of the directory to be created \n",
+    "               path(str): path of the directory to be created\n",
     "    \"\"\"\n",
-    "    n_sis=200     #size of selected candidate feature spaces for each dimension\n",
-    "    n_res=10      #number of residuals\n",
-    "    n_store=25    #number of top-ranked models to report\n",
+    "    n_sis = 200  # size of selected candidate feature spaces for each dimension\n",
+    "    n_res = 10  # number of residuals\n",
+    "    n_store = 25  # number of top-ranked models to report\n",
     "    \n",
+    "    # rung_store: rungs to store during the descriptor identification\n",
+    "    # rung_gen: rungs to generate on-the-fly during the descriptor identification\n",
     "    if rung == 1:\n",
-    "        rung_store=0   #rungs to store during the descriptor identification\n",
-    "        rung_gen=0     #rungs to generate on-the-fly during the descriptor identification\n",
-    "        \n",
+    "        rung_store = 0\n",
+    "        rung_gen = 0\n",
+    "\n",
     "    if rung == 2:\n",
-    "        rung_store=1\n",
-    "        rung_gen=0\n",
-    "        \n",
+    "        rung_store = 1\n",
+    "        rung_gen = 0\n",
+    "\n",
     "    if rung == 3:\n",
-    "        rung_store=2\n",
-    "        rung_gen=1\n",
-    "        \n",
-    "    train_df.to_csv(path+'/train.dat')\n",
+    "        rung_store = 2\n",
+    "        rung_gen = 1\n",
+    "\n",
+    "    train_df.to_csv(path / \"train.dat\")\n",
     "    input_file = {}\n",
-    "    input_file = {'desc_dim':3,                             #descriptor dimension\n",
-    "                      'n_sis_select':n_sis,\n",
-    "                      'max_rung':rung,\n",
-    "                      'n_rung_store':rung_store,\n",
-    "                      'n_residual':n_res,\n",
-    "                      'min_abs_feat_val': 1e-3,\n",
-    "                      'max_abs_feat_val': 1e5,\n",
-    "                      'data_file': 'train.dat',\n",
-    "                      'property_key': prop_key,\n",
-    "                      'n_rung_generate': rung_gen,\n",
-    "                      'n_models_store': n_store,\n",
-    "                      'leave_out_inds': leave_out_ind,\n",
-    "                      'fix_intercept' : 'true',            #the intercept c_0 in the model is fixed to zero\n",
-    "                      'opset': ops,\n",
-    "\n",
-    "                     }\n",
-    "    with open(path+'/sisso.json','w') as outfile:\n",
+    "    input_file = {\n",
+    "        \"desc_dim\": 3,  # descriptor dimension\n",
+    "        \"n_sis_select\": n_sis,\n",
+    "        \"max_rung\": rung,\n",
+    "        \"n_rung_store\": rung_store,\n",
+    "        \"n_residual\": n_res,\n",
+    "        \"min_abs_feat_val\": 1e-3,\n",
+    "        \"max_abs_feat_val\": 1e5,\n",
+    "        \"data_file\": \"train.dat\",\n",
+    "        \"property_key\": prop_key,\n",
+    "        \"n_rung_generate\": rung_gen,\n",
+    "        \"n_models_store\": n_store,\n",
+    "        \"leave_out_inds\": leave_out_ind,\n",
+    "        \"fix_intercept\": \"true\",  # the intercept c_0 in the model is fixed to zero\n",
+    "        \"opset\": ops,\n",
+    "    }\n",
+    "    with open(path / \"sisso.json\", \"w\") as outfile:\n",
     "        json.dump(input_file, outfile, indent=4)\n",
     "\n",
-    "#mathematical operators to be used for feature construction       \n",
-    "ops = [\"add\", \n",
-    "       \"sub\", \n",
-    "       \"abs_diff\", \n",
-    "       \"mult\", \n",
-    "       \"div\", \n",
-    "       \"exp\", \n",
-    "       \"sq\", \n",
-    "       \"cb\", \n",
-    "       \"sqrt\", \n",
-    "       \"cbrt\", \n",
-    "       \"log\", \n",
-    "       \"abs\", \n",
-    "       \"six_pow\"] \n",
-    "\n",
-    "#we start with the selectivity and the conversion is therefore excluded\n",
-    "prop='s_acrylic_acid'\n",
-    "train_df=df.drop(['x (p)'], axis=1) \n",
-    "\n",
-    "#leave-out-indices (one per material, i.e. one per cross-validation iteration)\n",
-    "lo_index=[[0,9,18,27], #MoVOx\n",
-    "          [1,10,19,28,36,44,52,60,68], #MoVTeNbOx\n",
-    "          [2,11,20,29,37,45,53,61,69], #V2O5\n",
-    "          [3,12,21,30,38,46,54,62,70], #VPP\n",
-    "          [4,13,22,31,39,47,55,63,71], #a-VPP\n",
-    "          [5,14,23,32,40,48,56,64,72], #beta-VOPO4\n",
-    "          [6,15,24,33,41,49,57,65,73], #alpha-VWOPO4\n",
-    "          [7,16,25,34,42,50,58,66,74], #alpha-VOPO4\n",
-    "          [8,17,26,35,43,51,59,67,75]] #VWPOx\n",
-    "\n",
-    "#features selected for rung=3 analysis\n",
-    "selected_features_r3 = train_df[['s_acrylic_acid (p)','Task',\n",
-    "                                 'V_pore_fr (cm^3/g)',\n",
-    "                                    'V_pore_act (cm^3/g)',\n",
-    "                                    'x_V_bulk_act (pa)',\n",
-    "                                    'x_V_surf_fr (pa)',\n",
-    "                                    'x_C_surf_fr (pa)',\n",
-    "                                    'x_V_surf_act (pa)',\n",
-    "                                    'x_V_surf_dry (pa)',\n",
-    "                                    'x_V_surf_wet (pa)',\n",
-    "                                    'x_V_surf_c3 (pa)',\n",
-    "                                    'ox_V_surf_c3 (e)',\n",
-    "                                    'a_C=O_surf_fr (par)',\n",
-    "                                    'a_C-O_surf_act (par)',\n",
-    "                                    'W_wet (eV)',\n",
-    "                                    'W_c3 (eV)',\n",
-    "                                    'Ea_cond_act (kJ/mol)']]\n",
-    "\n",
-    "train_df_r3= selected_features_r3.copy()               "
+    "\n",
+    "# mathematical operators to be used for feature construction\n",
+    "ops = [\n",
+    "    \"add\",\n",
+    "    \"sub\",\n",
+    "    \"abs_diff\",\n",
+    "    \"mult\",\n",
+    "    \"div\",\n",
+    "    \"exp\",\n",
+    "    \"sq\",\n",
+    "    \"cb\",\n",
+    "    \"sqrt\",\n",
+    "    \"cbrt\",\n",
+    "    \"log\",\n",
+    "    \"abs\",\n",
+    "    \"six_pow\",\n",
+    "]\n",
+    "\n",
+    "# we start with the selectivity and the conversion is therefore excluded\n",
+    "prop = \"s_acrylic_acid\"\n",
+    "train_df = df.drop([\"x (p)\"], axis=1)\n",
+    "\n",
+    "# leave-out-indices (one per material, i.e. one per cross-validation iteration)\n",
+    "lo_index = [\n",
+    "    [0, 9, 18, 27],  # MoVOx\n",
+    "    [1, 10, 19, 28, 36, 44, 52, 60, 68],  # MoVTeNbOx\n",
+    "    [2, 11, 20, 29, 37, 45, 53, 61, 69],  # V2O5\n",
+    "    [3, 12, 21, 30, 38, 46, 54, 62, 70],  # VPP\n",
+    "    [4, 13, 22, 31, 39, 47, 55, 63, 71],  # a-VPP\n",
+    "    [5, 14, 23, 32, 40, 48, 56, 64, 72],  # beta-VOPO4\n",
+    "    [6, 15, 24, 33, 41, 49, 57, 65, 73],  # alpha-VWOPO4\n",
+    "    [7, 16, 25, 34, 42, 50, 58, 66, 74],  # alpha-VOPO4\n",
+    "    [8, 17, 26, 35, 43, 51, 59, 67, 75],  # VWPOx\n",
+    "] \n",
+    "\n",
+    "# features selected for rung=3 analysis\n",
+    "selected_features_r3 = train_df[\n",
+    "    [\n",
+    "        \"s_acrylic_acid (p)\",\n",
+    "        \"Task\",\n",
+    "        \"V_pore_fr (cm^3/g)\",\n",
+    "        \"V_pore_act (cm^3/g)\",\n",
+    "        \"x_V_bulk_act (pa)\",\n",
+    "        \"x_V_surf_fr (pa)\",\n",
+    "        \"x_C_surf_fr (pa)\",\n",
+    "        \"x_V_surf_act (pa)\",\n",
+    "        \"x_V_surf_dry (pa)\",\n",
+    "        \"x_V_surf_wet (pa)\",\n",
+    "        \"x_V_surf_c3 (pa)\",\n",
+    "        \"ox_V_surf_c3 (e)\",\n",
+    "        \"a_C=O_surf_fr (par)\",\n",
+    "        \"a_C-O_surf_act (par)\",\n",
+    "        \"W_wet (eV)\",\n",
+    "        \"W_c3 (eV)\",\n",
+    "        \"Ea_cond_act (kJ/mol)\",\n",
+    "    ]\n",
+    "]\n",
+    "\n",
+    "train_df_r3 = selected_features_r3.copy()"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2021-04-19T17:17:03.355228Z",
-     "start_time": "2021-04-19T17:17:03.352934Z"
-    }
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
-    "#create folders, write inputs and run SISSO \n",
-    "#os.chdir(working_dir)\n",
-    "#for rung in [1,2]:\n",
-    "#    os.mkdir(prop+'_r'+str(rung))\n",
-    "#    write_sisso(train_df,rung,ops,'[]',prop,prop+'_r'+str(rung))\n",
-    "#    os.chdir(prop+'_r'+str(rung))\n",
-    "#    os.system(cpp_sisso_executable+' sisso.json > sisso.out')\n",
-    "#    os.chdir(working_dir)\n",
-    "#    os.mkdir(prop+'_r'+str(rung)+'_cv')\n",
-    "#    for it in [0,1,2,3,4,5,6,7,8]:\n",
-    "#        os.chdir(working_dir)\n",
-    "#        os.mkdir(prop+'_r'+str(rung)+'_cv/iter_'+str(it))\n",
-    "#        write_sisso(train_df,rung,ops,lo_index[it],prop,prop+'_r'+str(rung)+'_cv/iter_'+str(it))\n",
-    "#        os.chdir(prop+'_r'+str(rung)+'_cv/iter_'+str(it))\n",
-    "#        os.system(cpp_sisso_executable+' sisso.json > sisso.out')             \n",
-    "   \n",
-    "#for rung in [3]:\n",
-    "#    os.mkdir(prop+'_r'+str(rung))\n",
-    "#    write_sisso(train_df_r3,rung,ops,'[]',prop,prop+'_r'+str(rung))\n",
-    "#    os.chdir(prop+'_r'+str(rung))\n",
-    "#    os.system(cpp_sisso_executable+' sisso.json > sisso.out')\n",
-    "#    os.chdir(working_dir)\n",
-    "#    os.mkdir(prop+'_r'+str(rung)+'_cv')\n",
-    "#    for it in [0,1,2,3,4,5,6,7,8]:\n",
-    "#        os.chdir(working_dir)\n",
-    "#        os.mkdir(prop+'_r'+str(rung)+'_cv/iter_'+str(it))\n",
-    "#        write_sisso(train_df_r3,rung,ops,lo_index[it],prop,prop+'_r'+str(rung)+'_cv/iter_'+str(it))\n",
-    "#        os.chdir(prop+'_r'+str(rung)+'_cv/iter_'+str(it))\n",
-    "#        os.system(cpp_sisso_executable+' sisso.json > sisso.out')\n",
-    "#os.chdir(working_dir)"
+    "# cpp_sisso_executable = '/opt/sissopp/bin/sisso++'\n",
+    "\n",
+    "# working_dir = Path('data')\n",
+    "# working_dir.mkdir(parents=True, exist_ok=True)\n",
+    "# os.chdir(working_dir)\n",
+    "\n",
+    "# root = Path().cwd()\n",
+    "# for rung in [1, 2]:\n",
+    "#     folder = root / f\"{prop}_r{rung}\"\n",
+    "#     folder.mkdir()    \n",
+    "    \n",
+    "#     os.chdir(folder)\n",
+    "#     write_sisso(train_df, rung,ops,'[]', prop, folder)\n",
+    "#     os.system(f\"{cpp_sisso_executable} sisso.json > sisso.out\")\n",
+    "    \n",
+    "#     for it in range(9):\n",
+    "            \n",
+    "#         folder = root / f\"{prop}_r{rung}_cv/iter_{it}\"\n",
+    "#         folder.mkdir(parents=True, exist_ok=True)\n",
+    "#         os.chdir(folder)\n",
+    "    \n",
+    "#         write_sisso(train_df, rung,ops, lo_index[it], prop, folder)\n",
+    "#         os.system(f\"{cpp_sisso_executable} sisso.json > sisso.out\")\n",
+    "\n",
+    "# for rung in [3]:\n",
+    "#     folder = root / f\"{prop}_r{rung}\"\n",
+    "#     folder.mkdir()    \n",
+    "\n",
+    "#     os.chdir(folder)\n",
+    "#     write_sisso(train_df_r3, rung, ops,'[]', prop, folder)\n",
+    "#     os.system(f\"{cpp_sisso_executable} sisso.json > sisso.out\")\n",
+    "    \n",
+    "#     for it in range(9):\n",
+    "#         folder = root / f\"{prop}_r{rung}_cv/iter_{it}\"\n",
+    "#         folder.mkdir(parents=True, exist_ok=True)\n",
+    "#         os.chdir(folder)\n",
+    "\n",
+    "#         write_sisso(train_df_r3, rung, ops, lo_index[it], prop, folder)\n",
+    "#         os.system(f\"{cpp_sisso_executable} sisso.json > sisso.out\")\n",
+    "\n",
+    "# os.chdir(working_dir)"
    ]
   },
   {
@@ -354,138 +425,149 @@
    "source": [
     "# Cross-validation analysis\n",
     "\n",
-    "Now, we look at the cross-validation scores to decide the optimal model complexity with respect to is predictability. In the case of SISSO, the complexity is determined by the depth of the symbolic regression tree used to construct the candidate descriptor expressions, the so-called rung, and the number of selected expressions entering the model, the so-called descriptor dimension. We use the prediction root mean squared error (RMSE) averaged over all cross-validation iterations as metric. Therefore, the optimal complexity corresponds to the one yielding lower average cross-validation RMSE. "
+    "Now, we look at the cross-validation scores to decide the optimal model complexity with respect to is predictability. In the case of SISSO, the complexity is determined by the depth of the symbolic regression tree used to construct the candidate descriptor expressions, the so-called rung, and the number of selected expressions entering the model, the so-called descriptor dimension. We use the prediction root mean squared error (RMSE) averaged over all cross-validation iterations as metric. Therefore, the optimal complexity corresponds to the one yielding lower average cross-validation RMSE.\n"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
    "metadata": {
-    "ExecuteTime": {
-     "end_time": "2021-04-19T17:26:53.192025Z",
-     "start_time": "2021-04-19T17:26:52.657647Z"
-    }
+    "scrolled": false
    },
    "outputs": [],
    "source": [
-    "def get_cv_data(path,n_iter,dim,rank):\n",
+    "def get_cv_data(path, n_iter, dim, rank):\n",
     "    \"\"\"\n",
     "    reads cpp-sisso output files and collects cross-validation results\n",
     "    arguments: path(str): path of the directory containing the sisso output for the descriptor identification with the whole dataset\n",
-    "               n_iter(int): number of cross-validation iterations, 9 in this example \n",
+    "               n_iter(int): number of cross-validation iterations, 9 in this example\n",
     "               dim(int): descriptor dimension\n",
-    "               rank(int): size of the ensemble of top-ranked models analyzed, 25 in this example. \n",
+    "               rank(int): size of the ensemble of top-ranked models analyzed, 25 in this example.\n",
     "               obs: note that this function is adapted to the directory structure used above\n",
     "    \"\"\"\n",
-    "    e_train=[]\n",
-    "    e_cv=[]\n",
+    "    e_train = []\n",
+    "    e_cv = []\n",
+    "\n",
+    "    with open(f\"{path}/sisso.out\") as f:\n",
+    "        for line in f:\n",
+    "            if line.startswith(\"Train RMSE: \"):\n",
+    "                e_train.append(float(line[len(\"Train RMSE: \"):-1]))\n",
+    "\n",
+    "    for it in range(n_iter):\n",
+    "        e_int = []\n",
+    "        for rk in range(rank):\n",
+    "            \n",
+    "            filename = f\"{path}_cv/iter_{it}/models/train_dim_{dim}_model_{rk}.dat\"\n",
+    "            with open(filename) as f:\n",
+    "                for line in f:\n",
+    "                    if line.startswith(\"# RMSE: \"):\n",
+    "                        train_e = float(line.split(\";\")[0][len(\"# RMSE: \"):])\n",
+    "                    if line.startswith(\"# c0\"):\n",
+    "                        model = line[len(\"# \"):-1]\n",
+    "\n",
+    "            filename = f\"{path}_cv/iter_{it}/models/test_dim_{dim}_model_{rk}.dat\"\n",
+    "            with open(filename) as f:\n",
+    "                for line in f:\n",
+    "                    if line.startswith(\"# RMSE: \"):\n",
+    "                        test_e = float(line.split(\";\")[0][len(\"# RMSE: \"):])\n",
+    "            \n",
+    "            e_int.append([rk, train_e, test_e, model])\n",
+    "\n",
+    "        e_int_sorted = sorted(e_int, key=lambda x: x[2])\n",
+    "        e_cv.append([it, *e_int_sorted[0]])\n",
     "    \n",
-    "    with open(path+'/sisso.out') as f:\n",
-    "        data = f.readlines()\n",
-    "        for line in data:\n",
-    "            if line.__contains__('Train'):\n",
-    "                e_train.append(float(line.split()[2].split(';')[0]))\n",
-    "                    \n",
-    "    for it in [*range(n_iter)]:\n",
-    "        e_int=[]\n",
-    "        for rk in [*range(rank)]:\n",
-    "            with open(path+'_cv/iter_'+str(it)+'/models/train_dim_'+str(dim)+'_model_'+str(rk)+'.dat') as f:\n",
-    "                data = f.readlines()\n",
-    "                for line in data:\n",
-    "                    if line.__contains__('RMSE'):\n",
-    "                        train_e = float(line.split()[2].split(';')[0])\n",
-    "                    if line.__contains__('# c0'):\n",
-    "                        model = line.split('#')[1].split('\\n')[0]\n",
-    "            with open(path+'_cv/iter_'+str(it)+'/models/test_dim_'+str(dim)+'_model_'+str(rk)+'.dat') as f:\n",
-    "                data = f.readlines()\n",
-    "                for line in data:\n",
-    "                     if line.__contains__('RMSE'):\n",
-    "                        test_e = float(line.split()[2].split(';')[0])\n",
-    "            e_int.append([rk,train_e,test_e,model])\n",
-    "                \n",
-    "        e_int_sorted=sorted(e_int, key = lambda x: x[2])    \n",
-    "        e_cv.append([it,*e_int_sorted[0]])\n",
-    "    df=pd.DataFrame(e_cv,columns=['CV iteration','rank','train_rmse','test_rmse','model']).set_index('CV iteration')            \n",
-    "    return(df) \n",
-    "\n",
-    "def get_train_data(path,dim,rank):\n",
-    "    \"\"\"\n",
-    "    reads cpp-sisso output files and collects training results (obtained with the whole data set)\n",
+    "    columns =[\"CV iteration\", \"rank\", \"train_rmse\", \"test_rmse\", \"model\"]\n",
+    "    return pd.DataFrame(e_cv, columns=columns).set_index(\"CV iteration\")\n",
+    "\n",
+    "\n",
+    "def get_train_data(path, dim, rank):\n",
+    "    \"\"\"reads cpp-sisso output files and collects training results (obtained with the whole data set)\n",
     "    arguments: path(str): path of the directory containing the sisso output for the descriptor identification with the whole dataset\n",
     "               dim(int): descriptor dimension\n",
-    "               rank(int): number of models to show  \n",
+    "               rank(int): number of models to show\n",
     "    \"\"\"\n",
-    "    e_train_model=[]\n",
-    "    \n",
-    "    for rk in [*range(rank)]:\n",
-    "        with open(path+'/models/train_dim_'+str(dim)+'_model_'+str(rk)+'.dat') as f:\n",
-    "                data = f.readlines()\n",
-    "                for line in data:\n",
-    "                    if line.__contains__('RMSE'):\n",
-    "                        e_train=float(line.split()[2].split(';')[0])\n",
-    "                    if line.__contains__('# c0'):\n",
-    "                        model=line.split('#')[1].split('\\n')[0]\n",
-    "        e_train_model.append([rk,e_train,model])\n",
-    "        \n",
-    "    \n",
-    "    df=pd.DataFrame(e_train_model,columns=['rank','train_rmse','model']).set_index('rank')            \n",
-    "    return(df) \n",
+    "    e_train_model = []\n",
     "\n",
+    "    for rk in range(rank):\n",
+    "        filename = f\"{path}/models/train_dim_{dim}_model_{rk}.dat\"\n",
+    "        with open(filename) as f:\n",
+    "            for line in f:\n",
+    "                if line.startswith(\"# RMSE:\"):\n",
+    "                    e_train = float(line.split(\";\")[0][len(\"# RMSE:\"):])\n",
+    "                if line.startswith(\"# c0\"):\n",
+    "                    model = line[len(\"# \"):-1]\n",
+    "        e_train_model.append([rk, e_train, model])\n",
     "\n",
-    "def get_errors(prop,n_iter,rank):\n",
-    "    \"\"\"\n",
-    "    reads cpp-sisso output files and provides training and average cross-validation root mean square errors\n",
+    "    columns = [\"rank\", \"train_rmse\", \"model\"]\n",
+    "    df = pd.DataFrame(e_train_model, columns=columns).set_index(\"rank\")\n",
+    "    return df\n",
+    "\n",
+    "\n",
+    "def get_errors(prop, n_iter, rank):\n",
+    "    \"\"\"reads cpp-sisso output files and provides training and average cross-validation root mean square errors\n",
     "    arguments: prop(str): property name\n",
-    "               n_iter(int): number of cross-validation iterations, 9 in this example \n",
-    "               rank(int): number of models to show  \n",
+    "               n_iter(int): number of cross-validation iterations, 9 in this example\n",
+    "               rank(int): number of models to show\n",
     "    \"\"\"\n",
-    "    cv_av_rmse=[]\n",
-    "    train_rmse=[]\n",
-    "    for path in [prop+'_r1',prop+'_r2',prop+'_r3']:\n",
-    "        cv_rmse=[]\n",
-    "        train_rmse_rung=[]\n",
-    "        for dim in [1,2,3]:\n",
-    "            train_data=get_train_data(path,dim,rank)\n",
-    "            train_rmse_rung.append(train_data['train_rmse'][0])\n",
-    "            cv_data=get_cv_data(path,n_iter,dim,rank)\n",
-    "            cv_rmse.append(cv_data.mean(axis=0)[2])\n",
+    "    cv_av_rmse = []\n",
+    "    train_rmse = []\n",
+    "    for path in [f\"{prop}_r1\", f\"{prop}_r2\", f\"{prop}_r3\"]:\n",
+    "        cv_rmse = []\n",
+    "        train_rmse_rung = []\n",
+    "        for dim in [1, 2, 3]:\n",
+    "            train_data = get_train_data(path, dim, rank)\n",
+    "            train_rmse_rung.append(train_data[\"train_rmse\"][0])\n",
+    "            cv_data = get_cv_data(path, n_iter, dim, rank)\n",
+    "            cv_rmse.append(cv_data['test_rmse'].mean())\n",
+    "\n",
     "        cv_av_rmse.append(cv_rmse)\n",
     "        train_rmse.append(train_rmse_rung)\n",
-    "         \n",
-    "    return([train_rmse[0],cv_av_rmse[0],\n",
-    "            train_rmse[1],cv_av_rmse[1],\n",
-    "            train_rmse[2],cv_av_rmse[2]])\n",
-    "\n",
-    "fig, (ax1) = plt.subplots(1,1, constrained_layout=True, figsize=(8,5))\n",
-    "\n",
-    "e_p1=get_errors('data/s_acrylic_acid',9,25)\n",
-    "x=[1,2,3]\n",
-    "\n",
-    "ax1.scatter(x,e_p1[0], c='dimgrey')\n",
-    "ax1.plot(x,e_p1[0], c='dimgrey', linestyle='dashed',marker='o',mfc='dimgrey', label='$q=1$, training')\n",
-    "ax1.scatter(x,e_p1[1],  c='dimgrey')\n",
-    "ax1.plot(x,e_p1[1], label='$q=1$, prediction',  c='dimgrey')\n",
-    "ax1.scatter(x,e_p1[2], c='blue')\n",
-    "ax1.plot(x,e_p1[2], c='blue', linestyle='dashed',marker='o',mfc='blue', label='$q=2$, training')\n",
-    "ax1.scatter(x,e_p1[3], c='blue')\n",
-    "ax1.plot(x,e_p1[3], label='$q=2$, prediction', c='blue')\n",
-    "ax1.scatter(x,e_p1[4], c='green')\n",
-    "ax1.plot(x,e_p1[4], c='green', linestyle='dashed',marker='o',mfc='green', label='$q=3$, training')\n",
-    "ax1.scatter(x,e_p1[5], c='green')\n",
-    "ax1.plot(x,e_p1[5], label='$q=3$, prediction', c='green')\n",
-    "ax1.set_xlim(0.75,3.25)\n",
-    "ax1.set_ylabel('RMSE (%)')\n",
-    "ax1.set_xlabel('descriptor dimension ($D$)')\n",
-    "ax1.set_xticks(ticks=[1,2,3])\n",
-    "ax1.set_xticklabels(('1','2','3'))\n",
-    "ax1.legend(bbox_to_anchor=(1,1),fontsize=18)"
+    "\n",
+    "    return [train_rmse[0], cv_av_rmse[0], train_rmse[1], cv_av_rmse[1], train_rmse[2], cv_av_rmse[2]]\n",
+    "\n",
+    "\n",
+    "e_p1 = get_errors(\"data/s_acrylic_acid\", 9, 25)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2021-04-19T17:26:53.192025Z",
+     "start_time": "2021-04-19T17:26:52.657647Z"
+    },
+    "scrolled": false
+   },
+   "outputs": [],
+   "source": [
+    "x = [1, 2, 3]\n",
+    "\n",
+    "fig, (ax1) = plt.subplots(1, 1, constrained_layout=True, figsize=(8, 5))\n",
+    "\n",
+    "ax1.plot(x, e_p1[0], label=\"$q=1$, training\", c=\"dimgrey\", marker=\"o\", linestyle=\"dashed\")\n",
+    "ax1.plot(x, e_p1[1], label=\"$q=1$, prediction\", c=\"dimgrey\", marker=\"o\")\n",
+    "\n",
+    "ax1.plot(x, e_p1[2], label=\"$q=2$, training\", c=\"blue\", marker=\"o\", linestyle=\"dashed\")\n",
+    "ax1.plot(x, e_p1[3], label=\"$q=2$, prediction\", c=\"blue\", marker=\"o\")\n",
+    "\n",
+    "ax1.plot(x, e_p1[4], label=\"$q=3$, training\", c=\"green\", marker=\"o\", linestyle=\"dashed\")\n",
+    "ax1.plot(x, e_p1[5], label=\"$q=3$, prediction\", c=\"green\", marker=\"o\")\n",
+    "\n",
+    "ax1.set_xlim(0.75, 3.25)\n",
+    "ax1.set_ylabel(\"RMSE (%)\")\n",
+    "ax1.set_xlabel(\"descriptor dimension ($D$)\")\n",
+    "ax1.set_xticks(ticks=x)\n",
+    "ax1.legend(bbox_to_anchor=(1, 1))\n",
+    "\n",
+    "fig.show()"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "For the acrylic acid selectivity, the optimal complexity is q=3, D=2. We can now look at the best models identified at such complexity using the whole data set. We note that SISSO ranks the models according to their training RMSE. "
+    "For the acrylic acid selectivity, the optimal complexity is q=3, D=2. We can now look at the best models identified at such complexity using the whole data set. We note that SISSO ranks the models according to their training RMSE.\n"
    ]
   },
   {
@@ -499,14 +581,14 @@
    },
    "outputs": [],
    "source": [
-    "get_train_data('data/s_acrylic_acid_r3',2,10)"
+    "get_train_data(\"data/s_acrylic_acid_r3\", 2, 10)"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "The models obtained during each cross-validation iteration and the associated errors are shown below. We note that the second cross-validation iteration is associated to particularly high test errors (evaluated in the left-out material). This is because the material left-out of training during this cross-validation iteration is MoVTeNbOx. This catalyst is, by far, the material giving the highest acrylic acid selectivity values among all catalysts in the data set. "
+    "The models obtained during each cross-validation iteration and the associated errors are shown below. We note that the second cross-validation iteration is associated to particularly high test errors (evaluated in the left-out material). This is because the material left-out of training during this cross-validation iteration is MoVTeNbOx. This catalyst is, by far, the material giving the highest acrylic acid selectivity values among all catalysts in the data set.\n"
    ]
   },
   {
@@ -520,29 +602,25 @@
    },
    "outputs": [],
    "source": [
-    "get_cv_data('data/s_acrylic_acid_r3',9,2,25)"
+    "get_cv_data(\"data/s_acrylic_acid_r3\", 9, 2, 25)"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "We also look at (i) the predictions of the best model (obtained using the whole data set at the identified optimal complexity) for the materials in the data set, (ii) to the value of each descriptor components for each material in the data set and (iii) to the temperature-dependant coefficients of the multi-task SISSO model."
+    "We also look at (i) the predictions of the best model (obtained using the whole data set at the identified optimal complexity) for the materials in the data set, (ii) to the value of each descriptor components for each material in the data set and (iii) to the temperature-dependant coefficients of the multi-task SISSO model.\n"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
    "metadata": {
-    "ExecuteTime": {
-     "end_time": "2021-04-19T17:29:56.139091Z",
-     "start_time": "2021-04-19T17:29:55.841578Z"
-    },
-    "scrolled": true
+    "scrolled": false
    },
    "outputs": [],
    "source": [
-    "def get_predictions(prop,path,rung,dim):\n",
+    "def get_predictions(prop, path, rung, dim):\n",
     "    \"\"\"\n",
     "    reads cpp-sisso output and returns the best model identified at the given complexity(rung and dim) evaluated for all materials in the training set\n",
     "    arguments: prop(str): property name\n",
@@ -550,91 +628,120 @@
     "               rung(int): descriptor rung\n",
     "               dim(int): descriptor dimension\n",
     "    \"\"\"\n",
-    "    prop_est=[]\n",
-    "    prop_value=[]\n",
-    "    mat=[]\n",
-    "    d1=[]\n",
-    "    d2=[]\n",
-    "    d3=[]\n",
-    "    with open(path+'_r'+str(rung)+'/models/train_dim_'+str(dim)+'_model_0.dat') as f:\n",
+    "    prop_est = []\n",
+    "    prop_value = []\n",
+    "    mat = []\n",
+    "    d1 = []\n",
+    "    d2 = []\n",
+    "    d3 = []\n",
+    "    \n",
+    "    filename = f\"{path}_r{rung}/models/train_dim_{dim}_model_0.dat\"\n",
+    "    with open(filename) as f:\n",
     "        for line in f:\n",
-    "            line = line.split('#',1)[0].strip()\n",
-    "            if not line:\n",
+    "            line = line.strip()\n",
+    "\n",
+    "            if line.startswith(\"#\") or not line:\n",
     "                continue\n",
-    "            descr=[float(x.strip()) for x in line.split(',')]\n",
-    "            prop_value.append(descr[0])\n",
-    "            prop_est.append(descr[1])\n",
-    "            d1.append(descr[2])\n",
-    "            if dim==2:\n",
-    "                d2.append(descr[3])\n",
-    "            if dim ==3:\n",
-    "                d2.append(descr[3])\n",
-    "                d3.append(descr[4])\n",
-    "    with open(path+'_r'+str(rung)+'/train.dat') as f:\n",
+    "\n",
+    "            values = [float(x.strip()) for x in line.split(\",\")]\n",
+    "            \n",
+    "            prop_value.append(values[0])\n",
+    "            prop_est.append(values[1])\n",
+    "            \n",
+    "            if dim == 1:\n",
+    "                d1.append(values[2])\n",
+    "            if dim == 2:\n",
+    "                d1.append(values[2])\n",
+    "                d2.append(values[3])\n",
+    "            if dim == 3:\n",
+    "                d1.append(values[2])\n",
+    "                d2.append(values[3])\n",
+    "                d3.append(values[4])\n",
+    "    \n",
+    "    filename = f\"{path}_r{rung}/train.dat\"\n",
+    "    with open(filename) as f:\n",
+    "        next(f) # skipping the first header line\n",
     "        for line in f:\n",
-    "            line = line.split('material',1)[0].strip()\n",
-    "            if not line:\n",
-    "                continue\n",
-    "            mat.append(line.split(',')[0].split('_')[0])\n",
-    "               \n",
-    "    if dim==1:\n",
-    "        df = pd.DataFrame({'material':mat, prop:prop_value, 'pred_'+prop:prop_est, 'd1':d1}).set_index('material')\n",
-    "    if dim==2:\n",
-    "        df = pd.DataFrame({'material':mat, prop:prop_value, 'pred_'+prop:prop_est, 'd1':d1, 'd2':d2}).set_index('material')\n",
-    "    if dim==3:\n",
-    "        df = pd.DataFrame({'material':mat, prop:prop_value, 'pred_'+prop:prop_est, 'd1':d1, 'd2':d2, 'd3':d3}).set_index('material')\n",
-    "    return(df)\n",
-    "\n",
-    "def get_coefficients(path,rung,dim):\n",
+    "            mat.append(line.split(\"_\", maxsplit=1)[0])\n",
+    "\n",
+    "    if dim == 1:\n",
+    "        data = zip(mat, prop_value, prop_est, d1)\n",
+    "        columns = [\"material\", prop, f\"pred_{prop}\", \"d1\"]\n",
+    "    if dim == 2:\n",
+    "        data = zip(mat, prop_value, prop_est, d1, d2)\n",
+    "        columns = [\"material\", prop, f\"pred_{prop}\", \"d1\", \"d2\"]\n",
+    "    if dim == 3:\n",
+    "        data = zip(mat, prop_value, prop_est, d1, d2, d3)\n",
+    "        columns = [\"material\", prop, f\"pred_{prop}\", \"d1\", \"d2\", \"d3\"]\n",
+    "    return pd.DataFrame(data, columns=columns).set_index(\"material\")\n",
+    "\n",
+    "\n",
+    "\n",
+    "def get_coefficients(path, rung, dim):\n",
     "    \"\"\"\n",
-    "    reads cpp-sisso output and returns the coefficients of the best model identified at the given complexity (rung and dim) \n",
+    "    reads cpp-sisso output and returns the coefficients of the best model identified at the given complexity (rung and dim)\n",
     "    arguments: path(str): path to folder\n",
     "               rung(int): descriptor rung\n",
     "               dim(int): descriptor dimension\n",
     "    \"\"\"\n",
-    "    c1=[]\n",
-    "    c2=[]\n",
-    "    c3=[]\n",
-    "    with open(path+'_r'+str(rung)+'/models/train_dim_'+str(dim)+'_model_0.dat') as f:\n",
+    "    c1 = []\n",
+    "    c2 = []\n",
+    "    c3 = []\n",
+    "    \n",
+    "    filename = f\"{path}_r{rung}/models/train_dim_{dim}_model_0.dat\"\n",
+    "    with open(filename) as f:\n",
     "        for line in itertools.islice(f, 5, 14):\n",
-    "            li=line.split(',',1)[1].strip()\n",
+    "            li = line.split(\",\", 1)[1].strip()\n",
     "            li = li[:-1]\n",
-    "            descr=[float(x.strip()) for x in li.split(',')]\n",
-    "            c1.append(descr[0])\n",
-    "            if dim==2:\n",
+    "            descr = [float(x.strip()) for x in li.split(\",\")]\n",
+    "            \n",
+    "            if dim == 1:\n",
+    "                c1.append(descr[0])\n",
+    "            if dim == 2:\n",
+    "                c1.append(descr[0])\n",
     "                c2.append(descr[1])\n",
-    "            if dim==3:\n",
-    "                c2.append(descr[1])           \n",
-    "                c3.append(descr[1])\n",
-    "    return(c1,c2,c3)\n",
-    "\n",
-    "fig, (ax1) = plt.subplots(1,1, constrained_layout=True, figsize=(9,5))\n",
-    "\n",
-    "p=get_predictions('s_acrylic_acid','data/s_acrylic_acid',3,2)\n",
-    "T=[225, 250, 275, 300, 325, 350, 375, 400, 425]\n",
-    "T_red=[225, 250, 275, 300]\n",
-    "\n",
-    "ax1.scatter(T_red, p.loc['MoVOx']['s_acrylic_acid'], c='magenta', marker=\"o\", s=80, label='MoVO$_x$')\n",
-    "ax1.scatter(T, p.loc['MoVTeNbOx']['s_acrylic_acid'], c='green', marker=\"o\",s=80, label='MoVTeNbO$_x$')\n",
-    "ax1.scatter(T, p.loc['V2O5']['s_acrylic_acid'],  c='red', marker=\"s\", s=80, label='V$_2$O$_5$')\n",
-    "ax1.scatter(T, p.loc['VPP']['s_acrylic_acid'], c='darkorange', marker=\"D\", s=80, label='VPP')\n",
-    "ax1.scatter(T, p.loc['A-VPP']['s_acrylic_acid'],  c='gold', marker=\"D\", s=80, label='a-VPP')\n",
-    "ax1.scatter(T, p.loc['B-VOPO4']['s_acrylic_acid'],  c='blue', marker=\">\", label=r'$\\beta$-VOPO$_4$',s=80)\n",
-    "ax1.scatter(T, p.loc['A-VWOPO4']['s_acrylic_acid'],  c='darkcyan', marker=\"^\",label=r'$\\alpha$-VWOPO$_4$',s=80)\n",
-    "ax1.scatter(T, p.loc['A-VOPO4']['s_acrylic_acid'], c='mediumpurple', marker=\"^\", label=r'$\\alpha$-VOPO$_4$',s=80)\n",
-    "ax1.scatter(T, p.loc['VWPOx']['s_acrylic_acid'], c='cyan', marker=\"<\", label='VWPO$_x$',s=80)\n",
-    "ax1.legend(bbox_to_anchor=(1,1),fontsize=18)\n",
-    "ax1.scatter(T_red, p.loc['MoVOx']['pred_s_acrylic_acid'], c='magenta', marker=\"x\",s=200)\n",
-    "ax1.scatter(T, p.loc['MoVTeNbOx']['pred_s_acrylic_acid'],  c='green', marker=\"x\",s=200)\n",
-    "ax1.scatter(T, p.loc['V2O5']['pred_s_acrylic_acid'], c='red', marker=\"x\",s=200)\n",
-    "ax1.scatter(T, p.loc['VPP']['pred_s_acrylic_acid'],  c='darkorange', marker=\"x\",s=200)\n",
-    "ax1.scatter(T, p.loc['A-VPP']['pred_s_acrylic_acid'],  c='gold', marker=\"x\",s=200)\n",
-    "ax1.scatter(T, p.loc['B-VOPO4']['pred_s_acrylic_acid'],  c='blue', marker=\"x\",s=200)\n",
-    "ax1.scatter(T, p.loc['A-VWOPO4']['pred_s_acrylic_acid'], c='darkcyan', marker=\"x\",s=200)\n",
-    "ax1.scatter(T, p.loc['A-VOPO4']['pred_s_acrylic_acid'], c='mediumpurple', marker=\"x\",s=200)\n",
-    "ax1.scatter(T, p.loc['VWPOx']['pred_s_acrylic_acid'], c='cyan', marker=\"x\",s=200)\n",
-    "ax1.set_ylabel('$S_{\\mathrm{acrylic \\: acid}}$ (%)')\n",
-    "ax1.set_xlabel('Temperature ($\\degree$C)')"
+    "            if dim == 3:\n",
+    "                c1.append(descr[0])\n",
+    "                c2.append(descr[1])\n",
+    "                c3.append(descr[2])\n",
+    "\n",
+    "    return (c1, c2, c3)\n",
+    "\n",
+    "\n",
+    "p = get_predictions(\"s_acrylic_acid\", \"data/s_acrylic_acid\", 3, 2)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2021-04-19T17:29:56.139091Z",
+     "start_time": "2021-04-19T17:29:55.841578Z"
+    },
+    "scrolled": false
+   },
+   "outputs": [],
+   "source": [
+    "fig, (ax1) = plt.subplots(1, 1, constrained_layout=True, figsize=(8, 5))\n",
+    "\n",
+    "\n",
+    "for m_id in materials:\n",
+    "    m = df.loc[m_id]\n",
+    "    T = df.loc[m_id][\"T (C)\"]\n",
+    "    ax1.scatter(T, m[\"s_acrylic_acid (p)\"], c=colors[m_id], marker=markers[m_id], label=labels[m_id])\n",
+    "\n",
+    "for m_id in materials:\n",
+    "    m = p.loc[m_id]\n",
+    "    T = df.loc[m_id][\"T (C)\"]\n",
+    "    ax1.scatter(T, m[\"pred_s_acrylic_acid\"], c=colors[m_id], marker='x')\n",
+    "    \n",
+    "\n",
+    "ax1.set_ylabel(\"$S_{\\mathrm{acrylic \\: acid}}$ (%)\")\n",
+    "ax1.set_xlabel(\"Temperature ($\\degree$C)\")\n",
+    "ax1.legend(bbox_to_anchor=(1, 1))\n",
+    "\n",
+    "fig.show()"
    ]
   },
   {
@@ -648,51 +755,25 @@
    },
    "outputs": [],
    "source": [
-    "fig, (ax1,ax2) = plt.subplots(2,1, constrained_layout=True, figsize=(8,12))\n",
-    "d1=[p.loc['MoVOx']['d1'][0],\n",
-    "    p.loc['MoVTeNbOx']['d1'][0],\n",
-    "    p.loc['V2O5']['d1'][0],\n",
-    "    p.loc['VPP']['d1'][0],\n",
-    "    p.loc['A-VPP']['d1'][0],\n",
-    "    p.loc['B-VOPO4']['d1'][0],\n",
-    "    p.loc['A-VWOPO4']['d1'][0],\n",
-    "    p.loc['A-VOPO4']['d1'][0],\n",
-    "    p.loc['VWPOx']['d1'][0]]\n",
-    "\n",
-    "d2=[p.loc['MoVOx']['d2'][0],\n",
-    "    p.loc['MoVTeNbOx']['d2'][0],\n",
-    "    p.loc['V2O5']['d2'][0],\n",
-    "    p.loc['VPP']['d2'][0],\n",
-    "    p.loc['A-VPP']['d2'][0],\n",
-    "    p.loc['B-VOPO4']['d2'][0],\n",
-    "    p.loc['A-VWOPO4']['d2'][0],\n",
-    "    p.loc['A-VOPO4']['d2'][0],\n",
-    "    p.loc['VWPOx']['d2'][0]]\n",
-    "    \n",
-    "#d3=[p.loc['MoVOx']['d3'][0],\n",
-    "#    p.loc['MoVTeNbOx']['d3'][0],\n",
-    "#    p.loc['V2O5']['d3'][0],\n",
-    "#    p.loc['VPP']['d3'][0],\n",
-    "#    p.loc['A-VPP']['d3'][0],\n",
-    "#    p.loc['B-VOPO4']['d3'][0],\n",
-    "#    p.loc['A-VWOPO4']['d3'][0],\n",
-    "#    p.loc['A-VOPO4']['d3'][0],\n",
-    "#    p.loc['VWPOx']['d3'][0]]\n",
-    "\n",
-    "ax1.set_xticks(np.arange(9))\n",
-    "ax2.set_xticks(np.arange(9))\n",
-    "#ax3.set_xticks(np.arange(9))\n",
-    "ax1.set_xticklabels('', visible=False)\n",
-    "ax2.set_xticklabels(('MoVO$_x$', 'MoVTeNbO$_x$', 'V$_2$O$_5$', 'VPP', 'a-VPP',\n",
-    "                     r'$\\beta$-VOPO$_4$', r'$\\alpha$-VWOPO$_4$', r'$\\alpha$-VOPO$_4$', \n",
-    "                     'VWPO$_x$'), rotation=90)\n",
-    "ax1.bar(np.arange(9),d1 , color = 'black', width = 0.50)\n",
-    "ax2.bar(np.arange(9),d2 , color = 'green', width = 0.50)\n",
-    "#ax3.bar(np.arange(9)+0.2,d3 , color = 'red', width = 0.50)\n",
-    "\n",
-    "ax1.set_ylabel('$ d_1^S$')\n",
-    "ax2.set_ylabel('$ d_2^S$')\n",
-    "#ax3.set_ylabel('$ d_3^S$')"
+    "fig, (ax1, ax2) = plt.subplots(2, 1, constrained_layout=True, figsize=(7, 5))\n",
+    "\n",
+    "d1 = [p.loc[m][\"d1\"][0] for m in materials]\n",
+    "d2 = [p.loc[m][\"d2\"][0] for m in materials]\n",
+    "\n",
+    "inds = np.arange(9)\n",
+    "ax1.bar(inds, d1, width=0.50)\n",
+    "ax2.bar(inds, d2, width=0.50)\n",
+    "\n",
+    "ax1.set_xticks(inds)\n",
+    "ax1.set_xticklabels(\"\", visible=False)\n",
+    "\n",
+    "ax2.set_xticks(inds)\n",
+    "ax2.set_xticklabels(labels, rotation=90)\n",
+    "\n",
+    "ax1.set_ylabel(\"$ d_1^S$\")\n",
+    "ax2.set_ylabel(\"$ d_2^S$\")\n",
+    "\n",
+    "fig.show()\n"
    ]
   },
   {
@@ -706,37 +787,35 @@
    },
    "outputs": [],
    "source": [
-    "fig, (ax1,ax2) = plt.subplots(2,1, constrained_layout=True, figsize=(8,10))\n",
-    "coeff=get_coefficients('data/s_acrylic_acid',3,2)\n",
+    "coeff = get_coefficients(\"data/s_acrylic_acid\", 3, 2)\n",
     "\n",
-    "ax1.scatter(T,coeff[0],c='black',s=80)\n",
-    "ax2.scatter(T,coeff[1],c='green',s=80)\n",
-    "#ax3.scatter(t,coeff[2],c='red',s=80)\n",
+    "spline_c1 = make_interp_spline(T, coeff[0], k=2)\n",
+    "spline_c2 = make_interp_spline(T, coeff[1], k=2)\n",
     "\n",
-    "ax1.set_ylabel('$c_1^S $')\n",
-    "ax2.set_ylabel('$c_2^S $')\n",
-    "#ax3.set_ylabel('$c_3^S $')\n",
-    "ax2.set_xlabel('Temperature ($\\degree$C)')\n",
+    "fig, (ax1, ax2) = plt.subplots(2, 1, constrained_layout=True, figsize=(7, 8))\n",
     "\n",
-    "T_int = np.linspace(220, 430, 100) \n",
+    "ax1.scatter(T, coeff[0])\n",
+    "ax2.scatter(T, coeff[1])\n",
     "\n",
-    "spline_c1 = make_interp_spline(T,coeff[0], k=2) \n",
+    "T_int = np.linspace(220, 430, 100)\n",
     "c1_smooth = spline_c1(T_int)\n",
-    "spline_c2 = make_interp_spline(T,coeff[1], k=2) \n",
     "c2_smooth = spline_c2(T_int)\n",
-    "#spline_c3 = make_interp_spline(t,coeff[2], k=2) \n",
-    "#c3_smooth = spline_c3(t_int)\n",
     "\n",
-    "ax1.plot(T_int, c1_smooth, color='black')\n",
-    "ax2.plot(T_int, c2_smooth, color='green')\n",
-    "#ax3.plot(t_int, c3_smooth, color='red')"
+    "ax1.plot(T_int, c1_smooth)\n",
+    "ax2.plot(T_int, c2_smooth)\n",
+    "\n",
+    "ax1.set_ylabel(\"$c_1^S $\")\n",
+    "ax2.set_ylabel(\"$c_2^S $\")\n",
+    "ax2.set_xlabel(\"Temperature ($\\degree$C)\")\n",
+    "\n",
+    "plt.show()"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Finally, because the descriptor components multiplied by the coefficients are correlated with each other for this specific target property, we can fit a linear relationship in order to generate a 2-dimensional \"map of catalytsts\" according to acrylic acid selectivity. This material chart indicates the regions of the materials space associated to high performance,  where the search for novel materials should be focused. "
+    "Finally, because the descriptor components multiplied by the coefficients are correlated with each other for this specific target property, we can fit a linear relationship in order to generate a 2-dimensional \"map of catalytsts\" according to acrylic acid selectivity. This material chart indicates the regions of the materials space associated to high performance, where the search for novel materials should be focused.\n"
    ]
   },
   {
@@ -750,13 +829,20 @@
    },
    "outputs": [],
    "source": [
-    "plt.scatter(d2,d1,s=50,c='black')\n",
     "lin_fit = np.polyfit(d2, d1, 1)\n",
-    "lin_approx= np.poly1d(lin_fit)\n",
-    "plt.plot(d2,lin_approx(d2),linestyle='dashed',color='black')\n",
-    "print(lin_approx)\n",
-    "plt.ylabel('$ d_1^S$')\n",
-    "plt.xlabel('$ d_2^S$')"
+    "lin_approx = np.poly1d(lin_fit)\n",
+    "\n",
+    "fig, (ax1) = plt.subplots(1, 1, constrained_layout=True, figsize=(7, 5))\n",
+    "\n",
+    "ax1.scatter(d2, d1, s=50, label=\"data\")\n",
+    "ax1.plot(d2, lin_approx(d2), linestyle=\"dashed\", label=\"fit\")\n",
+    "\n",
+    "plt.title(f\"Linear fit: {lin_approx}\")\n",
+    "plt.ylabel(\"$d_1^S$\")\n",
+    "plt.xlabel(\"$d_2^S$\")\n",
+    "plt.legend()\n",
+    "\n",
+    "plt.show()"
    ]
   },
   {
@@ -771,31 +857,30 @@
    },
    "outputs": [],
    "source": [
-    "fig, (ax1) = plt.subplots(1,1, constrained_layout=True, figsize=(9,7))\n",
-    "plasma_r = matplotlib.cm.get_cmap('plasma_r')\n",
-    "cmapstyle='plasma_r'\n",
+    "d2_range = np.linspace(0, 150, 400)\n",
     "\n",
-    "d2_range =np.linspace(0,150, 400)\n",
-    "\n",
-    "S1=[]\n",
+    "S1 = []\n",
     "for i in range(len(T_int)):\n",
-    "    s=d2_range*c2_smooth[i]+c1_smooth[i]*(0.002946*d2_range-0.0009318)\n",
-    "    S1.append(s) \n",
-    "S1a=np.transpose(np.array(S1))\n",
-    "\n",
-    "ax1.pcolormesh(T_int, d2_range, S1a, cmap=cmapstyle, vmin=0, vmax=100)\n",
-    "fig.colorbar(ax1.pcolormesh(T_int, d2_range, S1a, cmap=cmapstyle, vmin=0, vmax=100), ax=ax1,label='$S_{\\mathrm{acrylic \\: acid}}^{\\mathrm{(SISSO)}}(T)$ (%)')\n",
-    "\n",
-    "ax1.contour(T_int,d2_range,S1a,levels=[0,20,40,60,80,100],colors='lightgray')\n",
-    "\n",
-    "ax1.hlines(d2[0],220,430)\n",
-    "ax1.hlines(d2[1],220,430)\n",
-    "ax1.hlines(d2[2],220,430)\n",
-    "ax1.hlines(d2[3],220,430)\n",
-    "ax1.hlines(d2[4],220,430)\n",
-    "ax1.set_xlabel('Temperature ($\\degree$C)')\n",
-    "ax1.set_ylabel('$d_2^S$')\n",
-    "ax1.set_xlim(220,430)\n"
+    "    s = d2_range * c2_smooth[i] + c1_smooth[i] * (0.002946 * d2_range - 0.0009318)\n",
+    "    S1.append(s)\n",
+    "\n",
+    "S1a = np.transpose(np.array(S1))\n",
+    "\n",
+    "\n",
+    "fig, (ax1) = plt.subplots(1, 1, constrained_layout=True, figsize=(7, 5))\n",
+    "\n",
+    "im = ax1.pcolormesh(T_int, d2_range, S1a, cmap=\"plasma_r\", vmin=0, vmax=100)\n",
+    "fig.colorbar(im, ax=ax1,label=\"$S_{\\mathrm{acrylic \\: acid}}^{\\mathrm{(SISSO)}}(T)$ (%)\")\n",
+    "\n",
+    "ax1.contour(T_int, d2_range, S1a, levels=[0, 20, 40, 60, 80, 100], colors=\"lightgray\")\n",
+    "\n",
+    "ax1.hlines(d2, 220, 430)\n",
+    "ax1.set_xlim(220, 430)\n",
+    "\n",
+    "ax1.set_xlabel(\"Temperature ($\\degree$C)\")\n",
+    "ax1.set_ylabel(\"$d_2^S$\")\n",
+    "\n",
+    "fig.show()"
    ]
   },
   {