diff --git a/.ipynb_checkpoints/hierarchical_sisso-checkpoint.ipynb b/.ipynb_checkpoints/hierarchical_sisso-checkpoint.ipynb index 6cb0b7620a24dc7f61a7b63e600b2663797c6625..4b65365d9eb6c0825c97ddd7eecf5048ec7db827 100644 --- a/.ipynb_checkpoints/hierarchical_sisso-checkpoint.ipynb +++ b/.ipynb_checkpoints/hierarchical_sisso-checkpoint.ipynb @@ -11,9 +11,9 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "<img style=\"float: left;\" src=\"assets/hisisso/logo_MPG.png\" width=150>\n", - "<img style=\"float: left; margin-top: -10px\" src=\"assets/hisisso/logo_NOMAD.png\" width=250>\n", - "<img style=\"float: left; margin-top: -5px\" src=\"assets/hisisso/logo_HU.png\" width=130>" + "<img style=\"float: left;\" src=\"assets/hierarchical_sisso/logo_MPG.png\" width=150>\n", + "<img style=\"float: left; margin-top: -10px\" src=\"assets/hierarchical_sisso/logo_NOMAD.png\" width=250>\n", + "<img style=\"float: left; margin-top: -5px\" src=\"assets/hierarchical_sisso/logo_HU.png\" width=130>" ] }, { @@ -285,7 +285,51 @@ " avg_cv_errors = cv_errors.mean(axis=0)\n", " std_cv_errors = cv_errors.std(axis=0)\n", " \n", - " return(train_errors, avg_cv_errors, std_cv_errors) " + " return(train_errors, avg_cv_errors, std_cv_errors)\n", + "\n", + "def get_model(path,dim,rung,prop,unit,mode):\n", + " \"\"\"\n", + " reads cpp-sisso output and returns the model and model components\n", + " arguments: path(str): directory containing the output files\n", + " dim(int): model dimension\n", + " rung(int): number of iterations for operator application (e.g. 1, 2 or 3)\n", + " prop(str): property label\n", + " unit(str): property unit\n", + " mode(str): 'train' or 'test' to obtain the models evaluated for training and test materials, respectively\n", + " \"\"\"\n", + " if mode == \"train\":\n", + " model = load_model(f\"{path}/models/train_dim_{dim}_model_0.dat\")\n", + " else:\n", + " model = load_model(\n", + " f\"{path}/models/train_dim_{dim}_model_0.dat\", f\"{path}/models/test_dim_{dim}_model_0.dat\"\n", + " )\n", + " \n", + " coefficients = model.coefs[0]\n", + " columns = [f\"{prop} {unit}\", f\"pred_{prop}_r{rung} {unit}\"]\n", + " columns += [f\"f{ii}\" for ii in range(1, len(model.feats) + 1)]\n", + "\n", + " if mode == \"train\":\n", + " df = pd.DataFrame(\n", + " index = [mat_id.strip() for mat_id in model.sample_ids_train],\n", + " data = np.column_stack((model.prop_train.reshape(-1, 1), model.fit.reshape(-1, 1), np.array([feat.value for feat in model.feats]).T)),\n", + " columns=columns,\n", + " )\n", + " else:\n", + " df = pd.DataFrame(\n", + " index = [mat_id.strip() for mat_id in model.sample_ids_test],\n", + " data = np.column_stack((model.prop_test.reshape(-1, 1), model.predict.reshape(-1, 1), np.array([feat.test_value for feat in model.feats]).T)),\n", + " columns=columns,\n", + " )\n", + " \n", + " df.dropna(axis=1, inplace=True)\n", + " df.drop(labels=f\"{prop} {unit}\", axis=1, inplace=True)\n", + " \n", + " for i in range(len(coefficients)-1):\n", + " j=i+1\n", + " df[f\"a{j}_r{rung}_{prop} {unit}\"]=df[f\"f{j}\"] * coefficients[i]\n", + " df.drop(labels=f\"f{j}\", axis=1, inplace=True)\n", + "\n", + " return(df)" ] }, { @@ -808,57 +852,6 @@ "The case of bulk modulus will be treated separately below, since the information on lattice constants and cohesive energy will be used to model the bulk modulus. " ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "def get_model(path,dim,rung,prop,unit,mode):\n", - " \"\"\"\n", - " reads cpp-sisso output and returns the model and model components\n", - " arguments: path(str): directory containing the output files\n", - " dim(int): model dimension\n", - " rung(int): number of iterations for operator application (e.g. 1, 2 or 3)\n", - " prop(str): property label\n", - " unit(str): property unit\n", - " mode(str): 'train' or 'test' to obtain the models evaluated for training and test materials, respectively\n", - " \"\"\"\n", - " if mode == \"train\":\n", - " model = load_model(f\"{path}/models/train_dim_{dim}_model_0.dat\")\n", - " else:\n", - " model = load_model(\n", - " f\"{path}/models/train_dim_{dim}_model_0.dat\", f\"{path}/models/test_dim_{dim}_model_0.dat\"\n", - " )\n", - " \n", - " coefficients = model.coefs[0]\n", - " columns = [f\"{prop} {unit}\", f\"pred_{prop}_r{rung} {unit}\"]\n", - " columns += [f\"f{ii}\" for ii in range(1, len(model.feats) + 1)]\n", - "\n", - " if mode == \"train\":\n", - " df = pd.DataFrame(\n", - " index = [mat_id.strip() for mat_id in model.sample_ids_train],\n", - " data = np.column_stack((model.prop_train.reshape(-1, 1), model.fit.reshape(-1, 1), np.array([feat.value for feat in model.feats]).T)),\n", - " columns=columns,\n", - " )\n", - " else:\n", - " df = pd.DataFrame(\n", - " index = [mat_id.strip() for mat_id in model.sample_ids_test],\n", - " data = np.column_stack((model.prop_test.reshape(-1, 1), model.predict.reshape(-1, 1), np.array([feat.test_value for feat in model.feats]).T)),\n", - " columns=columns,\n", - " )\n", - " \n", - " df.dropna(axis=1, inplace=True)\n", - " df.drop(labels=f\"{prop} {unit}\", axis=1, inplace=True)\n", - " \n", - " for i in range(len(coefficients)-1):\n", - " j=i+1\n", - " df[f\"a{j}_r{rung}_{prop} {unit}\"]=df[f\"f{j}\"] * coefficients[i]\n", - " df.drop(labels=f\"f{j}\", axis=1, inplace=True)\n", - "\n", - " return(df)" - ] - }, { "cell_type": "code", "execution_count": null, diff --git a/hierarchical_sisso.ipynb b/hierarchical_sisso.ipynb index 6cb0b7620a24dc7f61a7b63e600b2663797c6625..4b65365d9eb6c0825c97ddd7eecf5048ec7db827 100644 --- a/hierarchical_sisso.ipynb +++ b/hierarchical_sisso.ipynb @@ -11,9 +11,9 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "<img style=\"float: left;\" src=\"assets/hisisso/logo_MPG.png\" width=150>\n", - "<img style=\"float: left; margin-top: -10px\" src=\"assets/hisisso/logo_NOMAD.png\" width=250>\n", - "<img style=\"float: left; margin-top: -5px\" src=\"assets/hisisso/logo_HU.png\" width=130>" + "<img style=\"float: left;\" src=\"assets/hierarchical_sisso/logo_MPG.png\" width=150>\n", + "<img style=\"float: left; margin-top: -10px\" src=\"assets/hierarchical_sisso/logo_NOMAD.png\" width=250>\n", + "<img style=\"float: left; margin-top: -5px\" src=\"assets/hierarchical_sisso/logo_HU.png\" width=130>" ] }, { @@ -285,7 +285,51 @@ " avg_cv_errors = cv_errors.mean(axis=0)\n", " std_cv_errors = cv_errors.std(axis=0)\n", " \n", - " return(train_errors, avg_cv_errors, std_cv_errors) " + " return(train_errors, avg_cv_errors, std_cv_errors)\n", + "\n", + "def get_model(path,dim,rung,prop,unit,mode):\n", + " \"\"\"\n", + " reads cpp-sisso output and returns the model and model components\n", + " arguments: path(str): directory containing the output files\n", + " dim(int): model dimension\n", + " rung(int): number of iterations for operator application (e.g. 1, 2 or 3)\n", + " prop(str): property label\n", + " unit(str): property unit\n", + " mode(str): 'train' or 'test' to obtain the models evaluated for training and test materials, respectively\n", + " \"\"\"\n", + " if mode == \"train\":\n", + " model = load_model(f\"{path}/models/train_dim_{dim}_model_0.dat\")\n", + " else:\n", + " model = load_model(\n", + " f\"{path}/models/train_dim_{dim}_model_0.dat\", f\"{path}/models/test_dim_{dim}_model_0.dat\"\n", + " )\n", + " \n", + " coefficients = model.coefs[0]\n", + " columns = [f\"{prop} {unit}\", f\"pred_{prop}_r{rung} {unit}\"]\n", + " columns += [f\"f{ii}\" for ii in range(1, len(model.feats) + 1)]\n", + "\n", + " if mode == \"train\":\n", + " df = pd.DataFrame(\n", + " index = [mat_id.strip() for mat_id in model.sample_ids_train],\n", + " data = np.column_stack((model.prop_train.reshape(-1, 1), model.fit.reshape(-1, 1), np.array([feat.value for feat in model.feats]).T)),\n", + " columns=columns,\n", + " )\n", + " else:\n", + " df = pd.DataFrame(\n", + " index = [mat_id.strip() for mat_id in model.sample_ids_test],\n", + " data = np.column_stack((model.prop_test.reshape(-1, 1), model.predict.reshape(-1, 1), np.array([feat.test_value for feat in model.feats]).T)),\n", + " columns=columns,\n", + " )\n", + " \n", + " df.dropna(axis=1, inplace=True)\n", + " df.drop(labels=f\"{prop} {unit}\", axis=1, inplace=True)\n", + " \n", + " for i in range(len(coefficients)-1):\n", + " j=i+1\n", + " df[f\"a{j}_r{rung}_{prop} {unit}\"]=df[f\"f{j}\"] * coefficients[i]\n", + " df.drop(labels=f\"f{j}\", axis=1, inplace=True)\n", + "\n", + " return(df)" ] }, { @@ -808,57 +852,6 @@ "The case of bulk modulus will be treated separately below, since the information on lattice constants and cohesive energy will be used to model the bulk modulus. " ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "def get_model(path,dim,rung,prop,unit,mode):\n", - " \"\"\"\n", - " reads cpp-sisso output and returns the model and model components\n", - " arguments: path(str): directory containing the output files\n", - " dim(int): model dimension\n", - " rung(int): number of iterations for operator application (e.g. 1, 2 or 3)\n", - " prop(str): property label\n", - " unit(str): property unit\n", - " mode(str): 'train' or 'test' to obtain the models evaluated for training and test materials, respectively\n", - " \"\"\"\n", - " if mode == \"train\":\n", - " model = load_model(f\"{path}/models/train_dim_{dim}_model_0.dat\")\n", - " else:\n", - " model = load_model(\n", - " f\"{path}/models/train_dim_{dim}_model_0.dat\", f\"{path}/models/test_dim_{dim}_model_0.dat\"\n", - " )\n", - " \n", - " coefficients = model.coefs[0]\n", - " columns = [f\"{prop} {unit}\", f\"pred_{prop}_r{rung} {unit}\"]\n", - " columns += [f\"f{ii}\" for ii in range(1, len(model.feats) + 1)]\n", - "\n", - " if mode == \"train\":\n", - " df = pd.DataFrame(\n", - " index = [mat_id.strip() for mat_id in model.sample_ids_train],\n", - " data = np.column_stack((model.prop_train.reshape(-1, 1), model.fit.reshape(-1, 1), np.array([feat.value for feat in model.feats]).T)),\n", - " columns=columns,\n", - " )\n", - " else:\n", - " df = pd.DataFrame(\n", - " index = [mat_id.strip() for mat_id in model.sample_ids_test],\n", - " data = np.column_stack((model.prop_test.reshape(-1, 1), model.predict.reshape(-1, 1), np.array([feat.test_value for feat in model.feats]).T)),\n", - " columns=columns,\n", - " )\n", - " \n", - " df.dropna(axis=1, inplace=True)\n", - " df.drop(labels=f\"{prop} {unit}\", axis=1, inplace=True)\n", - " \n", - " for i in range(len(coefficients)-1):\n", - " j=i+1\n", - " df[f\"a{j}_r{rung}_{prop} {unit}\"]=df[f\"f{j}\"] * coefficients[i]\n", - " df.drop(labels=f\"f{j}\", axis=1, inplace=True)\n", - "\n", - " return(df)" - ] - }, { "cell_type": "code", "execution_count": null, diff --git a/setup.py b/setup.py index 6cec3c40cc897eaa81e7cc62ca0bc4e962c2eadb..19639afc490da995ff6c62af21d5026be31158e3 100644 --- a/setup.py +++ b/setup.py @@ -13,6 +13,6 @@ setup( description=metainfo['title'], long_description=metainfo['description'], packages=find_packages(), - install_requires=['pandas', 'numpy', 'matplotlib', 'scikit-learn', 'scipy', 'json', 'seaborn', 'sissopp'], + install_requires=['pandas', 'numpy', 'matplotlib', 'scikit-learn', 'scipy', 'json', 'seaborn'], )