Skip to content
Snippets Groups Projects
Commit e68ae2a0 authored by Aakash Ashok Naik's avatar Aakash Ashok Naik :eyes:
Browse files

Replace atomic_properties_pymat.py

parent e86922ca
No related branches found
No related tags found
No related merge requests found
#!/usr/bin/env python #!/usr/bin/env python
# coding: utf-8 # coding: utf-8
# In[1]: '''
Module : atomic_properties_pymat
This module with help of atomic_properties_pymat class from metainfo module instantiates objects with atomic element symbol as identifier to access atomic features available
'''
from atomicfeaturespackage.metainfo.metainfo import atomic_properties_pymat,metadata from atomicfeaturespackage.metainfo.metainfo import atomic_properties_pymat,metadata
...@@ -11,43 +16,17 @@ import urllib, json, re, typing ...@@ -11,43 +16,17 @@ import urllib, json, re, typing
from pandas.io.json import json_normalize from pandas.io.json import json_normalize
import os import os
#
# # Read the json file directly from pymatgen source into a Pandas dataframe
# In[2]:
#url = "https://raw.githubusercontent.com/materialsproject/pymatgen/ecdc7e40e048a3868a6579f3a9f9a41168c9bf75/pymatgen/core/periodic_table.json"
# In[3]:
# response = urllib.request.urlopen(url)
# data = json.loads(response.read())
# df1 = pd.DataFrame(data)
# ### Flatten the nested dictionary of Shannon radii feature into simple key-value dictionary
# In[4]:
path = os.path.abspath(os.path.join(os.path.dirname(__file__),"..")) path = os.path.abspath(os.path.join(os.path.dirname(__file__),".."))
path_new = os.path.join(path, "data","pymat", "periodic_table.json") path_new = os.path.join(path, "data","pymat", "periodic_table.json")
df1 = pd.read_json(path_new) df1 = pd.read_json(path_new)
# Flatten the nested dictionary of Shannon radii feature into simple key-value dictionary
# In[5]:
for elem in df1.columns: for elem in df1.columns:
if type(df1.loc['Shannon radii'][elem]) == dict: if type(df1.loc['Shannon radii'][elem]) == dict:
df1.loc['Shannon radii'][elem] = (json_normalize(df1.loc['Shannon radii'][elem]).to_dict(orient='records')[0]) df1.loc['Shannon radii'][elem] = (json_normalize(df1.loc['Shannon radii'][elem]).to_dict(orient='records')[0])
# ### replace the missing spin data for Shannon radii keys into a dictionary # Replace the missing spin data for Shannon radii keys into a dictionary
# In[6]:
repl_dict ={} repl_dict ={}
for elem in df1.columns: for elem in df1.columns:
...@@ -76,14 +55,8 @@ def rename_keys(d, keys): ...@@ -76,14 +55,8 @@ def rename_keys(d, keys):
for elem in df1.columns: for elem in df1.columns:
if type(df1.loc['Shannon radii'][elem]) == dict: if type(df1.loc['Shannon radii'][elem]) == dict:
test = rename_keys(df1.loc['Shannon radii'][elem], repl_dict) test = rename_keys(df1.loc['Shannon radii'][elem], repl_dict)
df1.loc['Shannon radii'][elem] = test df1.loc['Shannon radii'][elem] = test
# Dataframe manulpulation to sort the data from lowest to highest atomic number
# ### Dataframe manulpulation to sort the data from lowest to highest atomic number
# In[7]:
df2 = df1.transpose() df2 = df1.transpose()
df2.reset_index(); df2.reset_index();
...@@ -91,9 +64,8 @@ df3 = df2.sort_values('Atomic no') ...@@ -91,9 +64,8 @@ df3 = df2.sort_values('Atomic no')
df3.reset_index(); df3.reset_index();
# ### Remove the unwanted text and units from dataframe columns (Cleaning the dataset) #Remove the unwanted text and units from dataframe columns (Cleaning the dataset)
# In[8]:
df3['Coefficient of linear thermal expansion'] = df3['Coefficient of linear thermal expansion'].str.replace(r"(?: x10<sup>-6</sup>K<sup>-1</sup>)", '') df3['Coefficient of linear thermal expansion'] = df3['Coefficient of linear thermal expansion'].str.replace(r"(?: x10<sup>-6</sup>K<sup>-1</sup>)", '')
df3['Boiling point'] = df3['Boiling point'].str.replace(r"(?: K)", '') df3['Boiling point'] = df3['Boiling point'].str.replace(r"(?: K)", '')
df3['Brinell hardness'] = df3['Brinell hardness'].str.replace(r"(?: MN m<sup>-2</sup>)", '') df3['Brinell hardness'] = df3['Brinell hardness'].str.replace(r"(?: MN m<sup>-2</sup>)", '')
...@@ -106,8 +78,6 @@ df3['Liquid range'] = df3['Liquid range'].str.replace(r"(?: K)", '') ...@@ -106,8 +78,6 @@ df3['Liquid range'] = df3['Liquid range'].str.replace(r"(?: K)", '')
df3['Melting point'] = df3['Melting point'].str.replace(r"(?: K)", '') df3['Melting point'] = df3['Melting point'].str.replace(r"(?: K)", '')
df3['Molar volume'] = df3['Molar volume'].str.replace(r"(?: cm<sup>3</sup>)", '') df3['Molar volume'] = df3['Molar volume'].str.replace(r"(?: cm<sup>3</sup>)", '')
df3['Mineral hardness'] = df3['Mineral hardness'].str.replace(r"(?: \(graphite; diamond is 10.0\)\(no units\))", '') df3['Mineral hardness'] = df3['Mineral hardness'].str.replace(r"(?: \(graphite; diamond is 10.0\)\(no units\))", '')
df3['Reflectivity'] = df3['Reflectivity'].str.replace(r"(?:%)", '') df3['Reflectivity'] = df3['Reflectivity'].str.replace(r"(?:%)", '')
df3['Rigidity modulus'] = df3['Rigidity modulus'].str.replace(r"(?: GPa)", '') df3['Rigidity modulus'] = df3['Rigidity modulus'].str.replace(r"(?: GPa)", '')
df3['Superconduction temperature'] = df3['Superconduction temperature'].str.replace(r"(?: K)|(?:K)", '') df3['Superconduction temperature'] = df3['Superconduction temperature'].str.replace(r"(?: K)|(?:K)", '')
...@@ -116,34 +86,16 @@ df3['Velocity of sound'] = df3['Velocity of sound'].str.replace(r"(?: m s<sup>-1 ...@@ -116,34 +86,16 @@ df3['Velocity of sound'] = df3['Velocity of sound'].str.replace(r"(?: m s<sup>-1
df3['Vickers hardness'] = df3['Vickers hardness'].str.replace(r"(?: MN m<sup>-2</sup>)", '') df3['Vickers hardness'] = df3['Vickers hardness'].str.replace(r"(?: MN m<sup>-2</sup>)", '')
df3['Electronic structure'] = df3['Electronic structure'].str.replace(r"(?:<sup>)|(?:<\/sup>)|(?:\([^()]*\))", '') df3['Electronic structure'] = df3['Electronic structure'].str.replace(r"(?:<sup>)|(?:<\/sup>)|(?:\([^()]*\))", '')
# In[9]:
df3 = df3.replace(["no data","no data ","&gt"],np.nan) df3 = df3.replace(["no data","no data ","&gt"],np.nan)
df4 = df3.reset_index() df4 = df3.reset_index()
df4.rename(columns={"index": "Element Symbol"},inplace = True) df4.rename(columns={"index": "Element Symbol"},inplace = True)
df4.drop('iupac_ordering', axis=1, inplace=True) df4.drop('iupac_ordering', axis=1, inplace=True)
# ### Lastly fill the empty features with np.nan values #Fill the empty features with np.nan values
# In[10]:
df_clean = df4.replace(r'^\s*$', np.nan, regex=True) df_clean = df4.replace(r'^\s*$', np.nan, regex=True)
# Section instantiation and its objects attributes are filled. This will create 118 objects unique ( atomic element symbol) which can be used to access its properties
# In[11]:
df_clean;
# # Section instantiation and its objects attributes are filled. This will create 118 objects unique ( atomic element symbol) which can be used to access its properties
# In[12]:
objs = [] objs = []
for i in df_clean['Element Symbol']: for i in df_clean['Element Symbol']:
...@@ -151,10 +103,6 @@ for i in df_clean['Element Symbol']: ...@@ -151,10 +103,6 @@ for i in df_clean['Element Symbol']:
prop = pymat.m_create(metadata) prop = pymat.m_create(metadata)
objs.append(pymat) objs.append(pymat)
# In[13]:
count = 0 count = 0
for obj in objs: for obj in objs:
obj.section_pymat_metadata.source = 'Pymatgen' obj.section_pymat_metadata.source = 'Pymatgen'
...@@ -210,11 +158,11 @@ atomic_element_symbol = df_clean['Element Symbol'].values ...@@ -210,11 +158,11 @@ atomic_element_symbol = df_clean['Element Symbol'].values
for count, symbol in enumerate(atomic_element_symbol): for count, symbol in enumerate(atomic_element_symbol):
globals()[symbol] = objs[count] globals()[symbol] = objs[count]
# In[14]:
def definition(abc): def definition(abc):
'''
This function can be used to access quantity definitions for each element features accessible
'''
if abc == 'atomic_number': if abc == 'atomic_number':
return atomic_properties_pymat.atomic_number.__doc__ return atomic_properties_pymat.atomic_number.__doc__
if abc == 'atomic_element_symbol': if abc == 'atomic_element_symbol':
...@@ -306,13 +254,12 @@ def definition(abc): ...@@ -306,13 +254,12 @@ def definition(abc):
if abc == 'atomic_basis_set': if abc == 'atomic_basis_set':
return metadata.atomic_basis_set.__doc__ return metadata.atomic_basis_set.__doc__
# In[15]:
def symbol(abc): def symbol(abc):
return globals()[abc] '''
This function utilty is, it can be called to access element properties based on its symbol in periodic table. Can be usefull if want to acess particular property of multiple elements at once
'''
return globals()[abc]
# In[16]: # This method_list object could be called after importing this module so one can easily see all the quantities accessible.
method_list = [method for method in dir(atomic_properties_pymat) if (method.startswith('m_') or method.startswith('_') or method.startswith('__') or method.startswith('val') or method.startswith('section_') or method.startswith('get')) is False] method_list = [method for method in dir(atomic_properties_pymat) if (method.startswith('m_') or method.startswith('_') or method.startswith('__') or method.startswith('val') or method.startswith('section_') or method.startswith('get')) is False]
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment