Commit 700f03b8 authored by Christoph Freysoldt's avatar Christoph Freysoldt
Browse files

change root path to relative

parent e1254caf
<
%% Cell type:markdown id: tags:
#Introduction
In this notebook, we will implement a machine-learning framework accelerates functional high-entropy alloy discovery
%% Cell type:code id: tags:
```
import cv2
import os
import time
import random
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.optim import Adam, lr_scheduler
from torch.utils.data import Dataset, DataLoader
from scipy.spatial.distance import cdist
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
from sklearn.cluster import KMeans
from sklearn.mixture import GaussianMixture
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from matplotlib.patches import Ellipse
from mpl_toolkits.axes_grid1 import make_axes_locatable
import seaborn as sns
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
root = '/content/'
root = 'content/'
sns.set(color_codes=True)
```
%% Cell type:markdown id: tags:
#Required packages
%% Cell type:code id: tags:
```
!pip install bayesian-optimization
!pip install lightgbm
```
%% Output
Collecting bayesian-optimization
Downloading bayesian-optimization-1.2.0.tar.gz (14 kB)
Requirement already satisfied: numpy>=1.9.0 in /usr/local/lib/python3.7/dist-packages (from bayesian-optimization) (1.19.5)
Requirement already satisfied: scipy>=0.14.0 in /usr/local/lib/python3.7/dist-packages (from bayesian-optimization) (1.4.1)
Requirement already satisfied: scikit-learn>=0.18.0 in /usr/local/lib/python3.7/dist-packages (from bayesian-optimization) (0.22.2.post1)
Requirement already satisfied: joblib>=0.11 in /usr/local/lib/python3.7/dist-packages (from scikit-learn>=0.18.0->bayesian-optimization) (1.0.1)
Building wheels for collected packages: bayesian-optimization
Building wheel for bayesian-optimization (setup.py) ... [?25l[?25hdone
Created wheel for bayesian-optimization: filename=bayesian_optimization-1.2.0-py3-none-any.whl size=11685 sha256=fe62c2a660ed5d9d514b5ea22d24477c1375e2a76b765fe2428891c61cb98514
Stored in directory: /root/.cache/pip/wheels/fd/9b/71/f127d694e02eb40bcf18c7ae9613b88a6be4470f57a8528c5b
Successfully built bayesian-optimization
Installing collected packages: bayesian-optimization
Successfully installed bayesian-optimization-1.2.0
Requirement already satisfied: lightgbm in /usr/local/lib/python3.7/dist-packages (2.2.3)
Requirement already satisfied: numpy in /usr/local/lib/python3.7/dist-packages (from lightgbm) (1.19.5)
Requirement already satisfied: scikit-learn in /usr/local/lib/python3.7/dist-packages (from lightgbm) (0.22.2.post1)
Requirement already satisfied: scipy in /usr/local/lib/python3.7/dist-packages (from lightgbm) (1.4.1)
Requirement already satisfied: joblib>=0.11 in /usr/local/lib/python3.7/dist-packages (from scikit-learn->lightgbm) (1.0.1)
%% Cell type:markdown id: tags:
#Wasserstein Autoencoder
Wasserstein autoencoder is used for learning the latent space representation
%% Cell type:code id: tags:
```
class FeatureDataset(Dataset): #from numpy to tensor (pytroch-readable)
'''
Args: x is a 2D numpy array [x_size, x_features]
'''
def __init__(self, x, y):
self.x = x
self.y = y
def __len__(self):
return self.x.shape[0]
def __getitem__(self, idx):
return torch.FloatTensor(self.x[idx]), torch.FloatTensor(self.y[idx])
class AttributeDataset(Dataset): # this is for classifier
'''
Args: x is a 2D numpy array [x_size, x_features]
'''
def __init__(self, x, y):
self.x = x
self.y = y
def __len__(self):
return self.x.shape[0]
def __getitem__(self, idx):
return torch.Tensor(self.x[idx]), torch.Tensor(self.y[idx])
```
%% Cell type:markdown id: tags:
**Model architecture**
%% Cell type:code id: tags:
```
def weights_init(m):
classname = m.__class__.__name__
if classname.find('BatchNorm') != -1:
m.weight.data.normal_(1.0, 0.02)
m.bias.data.fill_(0)
class WAE(nn.Module):
def __init__(self, input_size):
super(WAE, self).__init__()
self.input_size = input_size
# encoder
self.encoder = nn.Sequential(
nn.Linear(self.input_size, 80),
nn.LayerNorm(80),
nn.ReLU(),
nn.Linear(80, 64),
nn.LayerNorm(64),
nn.ReLU(),
nn.Linear(64, 48),
nn.LayerNorm(48),
nn.ReLU(),
nn.Linear(48, 2),
)
# decoder
self.decoder = nn.Sequential(
nn.Linear(2, 48),
nn.LayerNorm(48),
nn.ReLU(),
nn.Linear(48, 64),
nn.LayerNorm(64),
nn.ReLU(),
nn.Linear(64, 80),
nn.LayerNorm(80),
nn.ReLU(),
nn.Linear(80, self.input_size),
nn.Softmax(dim=1) #(softmad along dimension 1)
)
self.apply(weights_init)
def forward(self, x):
z = self._encode(x)
x_recon = self._decode(z)
return x_recon, z
def _encode(self, x):
return self.encoder(x)
def _decode(self, z):
return self.decoder(z)
```
%% Cell type:markdown id: tags:
**Utility functions for WAE**
* same_seeds: fixing the randomness
* get_latents: get the latent spaces from the WAE
* imq_kernels: inverse multiquadric (IMQ) kernel - computing the maximum mean discrepancy, which is part of the loss function for WAE.
%% Cell type:code id: tags:
```
def same_seeds(seed): #fix np & torch seed to the same.
torch.manual_seed(seed)
if torch.cuda.is_available():
torch.cuda.manual_seed(seed)
torch.cuda.manual_seed_all(seed) # if you are using multi-GPU.
np.random.seed(seed) # Numpy module.
random.seed(seed) # Python random module.
torch.backends.cudnn.benchmark = False
torch.backends.cudnn.deterministic = True
def get_latents(model, dataset): #from dataset to altten
model.to(device).eval() # training model or evaluation mode, eval means setting the model to its evaluation mode (gradient fixed)
latents = []
with torch.no_grad(): # fix the gradient, assure that the model parameters are fixed
dataloader = DataLoader(dataset, batch_size=256, shuffle=False)
for i, data in enumerate(dataloader):
x = data[0].to(device)
recon_x, z = model(x)
latents.append(z.detach().cpu().numpy())
return np.concatenate(latents,axis=0)
def imq_kernel(X: torch.Tensor, Y: torch.Tensor, h_dim: int): # common kerntl to choose
batch_size = X.size(0)
norms_x = X.pow(2).sum(1, keepdim=True) # batch_size x 1
prods_x = torch.mm(X, X.t()).to(device) # batch_size x batch_size
dists_x = norms_x + norms_x.t() - 2 * prods_x # mm matrix multiplicaiton
norms_y = Y.pow(2).sum(1, keepdim=True).to(device) # batch_size x 1
prods_y = torch.mm(Y, Y.t()).to(device) # batch_size x batch_size
dists_y = norms_y + norms_y.t() - 2 * prods_y
dot_prd = torch.mm(X, Y.t())
dists_c = norms_x + norms_y.t() - 2 * dot_prd
stats = 0
for scale in [.1, .2, .5, 1., 2., 5., 10.]: # need more study on this
C = 2 * h_dim * 1.0 * scale
res1 = C / (C + dists_x)
res1 += C / (C + dists_y)
if torch.cuda.is_available():
res1 = (1 - torch.eye(batch_size).to(device)) * res1
else:
res1 = (1 - torch.eye(batch_size)) * res1
res1 = res1.sum() / (batch_size - 1)
res2 = C / (C + dists_c)
res2 = res2.sum() * 2. / (batch_size)
stats += res1 - res2
return stats
```
%% Cell type:markdown id: tags:
**Data loading**
%% Cell type:code id: tags:
```
same_seeds(1) #seed equals to 1
params = {
'num_epoch' : 100,
'batch_size' : 20,
'lr' : 5e-4,
'weight_decay' : 0.0,
'sigma' : 8.0,
'MMD_lambda' : 1e-4,
'model_name' : 'WAE_v1',
} # for WAE training
all = pd.read_csv('data_base.csv', header=0).iloc[:,1:19].to_numpy()
raw_x = all[:696,:6]
raw_y = all[:696,17].reshape(-1,1)
dataset = FeatureDataset(raw_x[:], raw_y[:]) #numpy to tensor
dataloader = DataLoader(dataset, batch_size=params['batch_size'], shuffle=True) # tensor to dataloader
print(raw_x[50:55])
```
%% Output
[[0.701 0.2 0.099 0. 0. 0. ]
[0.65 0.25 0.1 0. 0. 0. ]
[0.62 0.28 0.1 0. 0. 0. ]
[0.601 0.3 0.099 0. 0. 0. ]
[0.58 0.32 0.1 0. 0. 0. ]]
%% Cell type:code id: tags:
```
model = WAE(raw_x.shape[1]).to(device) # initialize the model
optimizer = Adam(model.parameters(), lr = params['lr'], weight_decay = params['weight_decay']) # optimizer
def train_WAE(model, optimizer, dataloader, params):
model_name = params['model_name']
num_epoch = params['num_epoch']
sigma = params['sigma'] # assuming the latent space follows Gaussian
MMD_lambda = params['MMD_lambda'] #WAE distance (maximum mean discrepancy)
folder_dir = os.path.join(root, model_name) # a folder to save models
if not os.path.isdir(folder_dir):
os.mkdir(folder_dir)
for epoch in range(num_epoch):
start_time = time.time()
total_loss = [] #save for plot, recon loss+MMD
total_recon = [] # binary cross entropy
total_MMD = [] #maximum mean discrepancy
for i, data in enumerate(dataloader):
x = data[0].to(device)
y = data[1].to(device)
model.train() # model goes to train mode
recon_x, z_tilde = model(x) # latent space is Z_tilde
z = sigma*torch.randn(z_tilde.size()).to(device) # z is sampled from a Gaussian that has the same dimension (but no relation to z_tilde).
recon_loss = F.binary_cross_entropy(recon_x, x, reduction='mean') #lowest reconstruction loss
#recon_loss = F.mse_loss(recon_x, x, reduction='mean')
#recon_loss = F.l1_loss(recon_x, x, reduction='mean')
MMD_loss = imq_kernel(z_tilde, z, h_dim=2).to(device) #W-distance between z_tilde and z
MMD_loss = MMD_loss / x.size(0) #averaging, because recon loss is mean.
loss = recon_loss + MMD_loss * MMD_lambda #MM_lambda: learning-rate alike, hyperparamer
optimizer.zero_grad()
loss.backward()
optimizer.step()
total_loss.append(loss.item())# from tensor to values
total_recon.append(recon_loss.item())
total_MMD.append(MMD_loss.item())
avg_loss = sum(total_loss)/len(total_loss)
avg_recon = sum(total_recon)/len(total_recon)
avg_MMD = sum(total_MMD)/len(total_MMD)
#scheduler.step(avg_loss)
print('[{:03}/{:03}] loss: {:.6f} Recon_loss: {:.6f}, MMD_loss:{:.6f}, time: {:.3f} sec'.format(\
epoch+1, num_epoch, \
avg_loss, \
avg_recon, avg_MMD, time.time() - start_time))
# save the model every 2 epoches
if (epoch+1) % 1 == 0:
save_model_dir = str(model_name + "_{}.pth".format(epoch+1))
torch.save(model.state_dict(), os.path.join(folder_dir, save_model_dir))
train_WAE(model, optimizer, dataloader, params)
```
%% Output
[001/100] loss: 0.365287 Recon_loss: 0.364788, MMD_loss:4.988922, time: 0.482 sec
[002/100] loss: 0.319609 Recon_loss: 0.319189, MMD_loss:4.200815, time: 0.240 sec
[003/100] loss: 0.311577 Recon_loss: 0.311122, MMD_loss:4.544367, time: 0.244 sec
[004/100] loss: 0.308807 Recon_loss: 0.308342, MMD_loss:4.655530, time: 0.173 sec
[005/100] loss: 0.307247 Recon_loss: 0.306776, MMD_loss:4.710283, time: 0.162 sec
[006/100] loss: 0.306054 Recon_loss: 0.305588, MMD_loss:4.664705, time: 0.168 sec
[007/100] loss: 0.305088 Recon_loss: 0.304617, MMD_loss:4.708817, time: 0.238 sec
[008/100] loss: 0.304573 Recon_loss: 0.304105, MMD_loss:4.686302, time: 0.191 sec
[009/100] loss: 0.303707 Recon_loss: 0.303228, MMD_loss:4.788946, time: 0.166 sec
[010/100] loss: 0.302751 Recon_loss: 0.302280, MMD_loss:4.713873, time: 0.156 sec
[011/100] loss: 0.301802 Recon_loss: 0.301345, MMD_loss:4.570692, time: 0.184 sec
[012/100] loss: 0.301485 Recon_loss: 0.301018, MMD_loss:4.667446, time: 0.162 sec
[013/100] loss: 0.300671 Recon_loss: 0.300216, MMD_loss:4.550119, time: 0.164 sec
[014/100] loss: 0.300389 Recon_loss: 0.299938, MMD_loss:4.517685, time: 0.167 sec
[015/100] loss: 0.300090 Recon_loss: 0.299639, MMD_loss:4.515236, time: 0.163 sec
[016/100] loss: 0.299518 Recon_loss: 0.299062, MMD_loss:4.564801, time: 0.156 sec
[017/100] loss: 0.299015 Recon_loss: 0.298569, MMD_loss:4.457237, time: 0.161 sec
[018/100] loss: 0.299192 Recon_loss: 0.298747, MMD_loss:4.443053, time: 0.163 sec
[019/100] loss: 0.298831 Recon_loss: 0.298389, MMD_loss:4.429230, time: 0.165 sec
[020/100] loss: 0.298761 Recon_loss: 0.298314, MMD_loss:4.470654, time: 0.219 sec
[021/100] loss: 0.298734 Recon_loss: 0.298295, MMD_loss:4.393027, time: 0.230 sec
[022/100] loss: 0.298443 Recon_loss: 0.298006, MMD_loss:4.373185, time: 0.228 sec
[023/100] loss: 0.299117 Recon_loss: 0.298690, MMD_loss:4.270142, time: 0.169 sec
[024/100] loss: 0.298156 Recon_loss: 0.297726, MMD_loss:4.298305, time: 0.160 sec
[025/100] loss: 0.298053 Recon_loss: 0.297621, MMD_loss:4.324094, time: 0.159 sec
[026/100] loss: 0.297877 Recon_loss: 0.297460, MMD_loss:4.173318, time: 0.162 sec
[027/100] loss: 0.297605 Recon_loss: 0.297177, MMD_loss:4.286124, time: 0.156 sec
[028/100] loss: 0.297636 Recon_loss: 0.297209, MMD_loss:4.271590, time: 0.188 sec
[029/100] loss: 0.297569 Recon_loss: 0.297137, MMD_loss:4.320729, time: 0.233 sec
[030/100] loss: 0.297554 Recon_loss: 0.297129, MMD_loss:4.245043, time: 0.160 sec
[031/100] loss: 0.297525 Recon_loss: 0.297099, MMD_loss:4.259788, time: 0.163 sec
[032/100] loss: 0.297699 Recon_loss: 0.297271, MMD_loss:4.274336, time: 0.161 sec
[033/100] loss: 0.297089 Recon_loss: 0.296667, MMD_loss:4.224826, time: 0.173 sec
[034/100] loss: 0.297538 Recon_loss: 0.297113, MMD_loss:4.246197, time: 0.157 sec
[035/100] loss: 0.297467 Recon_loss: 0.297044, MMD_loss:4.224483, time: 0.241 sec
[036/100] loss: 0.297129 Recon_loss: 0.296705, MMD_loss:4.234460, time: 0.230 sec
[037/100] loss: 0.296836 Recon_loss: 0.296420, MMD_loss:4.163971, time: 0.227 sec
[038/100] loss: 0.297052 Recon_loss: 0.296619, MMD_loss:4.321280, time: 0.187 sec
[039/100] loss: 0.296952 Recon_loss: 0.296546, MMD_loss:4.064921, time: 0.233 sec
[040/100] loss: 0.297335 Recon_loss: 0.296923, MMD_loss:4.114959, time: 0.210 sec
[041/100] loss: 0.297304 Recon_loss: 0.296895, MMD_loss:4.089278, time: 0.159 sec
[042/100] loss: 0.297130 Recon_loss: 0.296715, MMD_loss:4.145263, time: 0.214 sec
[043/100] loss: 0.297211 Recon_loss: 0.296795, MMD_loss:4.160071, time: 0.236 sec
[044/100] loss: 0.296866 Recon_loss: 0.296452, MMD_loss:4.143130, time: 0.168 sec
[045/100] loss: 0.296752 Recon_loss: 0.296336, MMD_loss:4.162766, time: 0.170 sec
[046/100] loss: 0.296653 Recon_loss: 0.296245, MMD_loss:4.076583, time: 0.159 sec
[047/100] loss: 0.296501 Recon_loss: 0.296101, MMD_loss:3.996096, time: 0.167 sec
[048/100] loss: 0.297359 Recon_loss: 0.296951, MMD_loss:4.075806, time: 0.156 sec
[049/100] loss: 0.296895 Recon_loss: 0.296487, MMD_loss:4.078783, time: 0.182 sec
[050/100] loss: 0.296613 Recon_loss: 0.296219, MMD_loss:3.934222, time: 0.241 sec
[051/100] loss: 0.296619 Recon_loss: 0.296196, MMD_loss:4.231486, time: 0.192 sec
[052/100] loss: 0.296902 Recon_loss: 0.296487, MMD_loss:4.149249, time: 0.172 sec
[053/100] loss: 0.296639 Recon_loss: 0.296232, MMD_loss:4.070097, time: 0.157 sec
[054/100] loss: 0.296636 Recon_loss: 0.296248, MMD_loss:3.872639, time: 0.161 sec
[055/100] loss: 0.296585 Recon_loss: 0.296192, MMD_loss:3.932009, time: 0.175 sec
[056/100] loss: 0.296295 Recon_loss: 0.295894, MMD_loss:4.014891, time: 0.174 sec
[057/100] loss: 0.296475 Recon_loss: 0.296065, MMD_loss:4.100971, time: 0.194 sec
[058/100] loss: 0.296458 Recon_loss: 0.296047, MMD_loss:4.109641, time: 0.162 sec
[059/100] loss: 0.296511 Recon_loss: 0.296105, MMD_loss:4.059724, time: 0.157 sec
[060/100] loss: 0.296396 Recon_loss: 0.295992, MMD_loss:4.038733, time: 0.171 sec
[061/100] loss: 0.296614 Recon_loss: 0.296203, MMD_loss:4.108154, time: 0.160 sec
[062/100] loss: 0.296747 Recon_loss: 0.296337, MMD_loss:4.106113, time: 0.167 sec
[063/100] loss: 0.296383 Recon_loss: 0.295984, MMD_loss:3.987412, time: 0.164 sec
[064/100] loss: 0.296521 Recon_loss: 0.296123, MMD_loss:3.984291, time: 0.161 sec
[065/100] loss: 0.296378 Recon_loss: 0.295979, MMD_loss:3.992087, time: 0.155 sec
[066/100] loss: 0.296210 Recon_loss: 0.295811, MMD_loss:3.988383, time: 0.175 sec
[067/100] loss: 0.296177 Recon_loss: 0.295773, MMD_loss:4.037848, time: 0.236 sec
[068/100] loss: 0.296045 Recon_loss: 0.295667, MMD_loss:3.787771, time: 0.233 sec
[069/100] loss: 0.296186 Recon_loss: 0.295795, MMD_loss:3.908190, time: 0.232 sec
[070/100] loss: 0.296298 Recon_loss: 0.295903, MMD_loss:3.944492, time: 0.223 sec
[071/100] loss: 0.296370 Recon_loss: 0.295976, MMD_loss:3.935484, time: 0.156 sec
[072/100] loss: 0.296354 Recon_loss: 0.295953, MMD_loss:4.011393, time: 0.166 sec
[073/100] loss: 0.296211 Recon_loss: 0.295825, MMD_loss:3.856404, time: 0.161 sec
[074/100] loss: 0.296478 Recon_loss: 0.296078, MMD_loss:3.995800, time: 0.161 sec
[075/100] loss: 0.296381 Recon_loss: 0.295980, MMD_loss:4.001204, time: 0.160 sec
[076/100] loss: 0.296341 Recon_loss: 0.295940, MMD_loss:4.011523, time: 0.215 sec
[077/100] loss: 0.296220 Recon_loss: 0.295829, MMD_loss:3.904648, time: 0.169 sec
[078/100] loss: 0.296117 Recon_loss: 0.295726, MMD_loss:3.912922, time: 0.165 sec
[079/100] loss: 0.296196 Recon_loss: 0.295803, MMD_loss:3.929377, time: 0.166 sec
[080/100] loss: 0.295948 Recon_loss: 0.295549, MMD_loss:3.984029, time: 0.160 sec
[081/100] loss: 0.295997 Recon_loss: 0.295608, MMD_loss:3.892871, time: 0.161 sec
[082/100] loss: 0.295835 Recon_loss: 0.295448, MMD_loss:3.866601, time: 0.162 sec
[083/100] loss: 0.295890 Recon_loss: 0.295501, MMD_loss:3.886154, time: 0.215 sec
[084/100] loss: 0.296199 Recon_loss: 0.295807, MMD_loss:3.923056, time: 0.218 sec
[085/100] loss: 0.295973 Recon_loss: 0.295577, MMD_loss:3.960673, time: 0.159 sec
[086/100] loss: 0.296010 Recon_loss: 0.295614, MMD_loss:3.961454, time: 0.161 sec
[087/100] loss: 0.295832 Recon_loss: 0.295442, MMD_loss:3.900643, time: 0.158 sec
[088/100] loss: 0.295746 Recon_loss: 0.295352, MMD_loss:3.940127, time: 0.170 sec
[089/100] loss: 0.295856 Recon_loss: 0.295475, MMD_loss:3.814231, time: 0.236 sec
[090/100] loss: 0.296133 Recon_loss: 0.295738, MMD_loss:3.948777, time: 0.235 sec
[091/100] loss: 0.295939 Recon_loss: 0.295552, MMD_loss:3.870931, time: 0.223 sec
[092/100] loss: 0.295722 Recon_loss: 0.295336, MMD_loss:3.862048, time: 0.171 sec
[093/100] loss: 0.295671 Recon_loss: 0.295289, MMD_loss:3.814457, time: 0.167 sec
[094/100] loss: 0.295676 Recon_loss: 0.295293, MMD_loss:3.827099, time: 0.162 sec
[095/100] loss: 0.295916 Recon_loss: 0.295534, MMD_loss:3.814622, time: 0.174 sec
[096/100] loss: 0.295846 Recon_loss: 0.295460, MMD_loss:3.854504, time: 0.205 sec
[097/100] loss: 0.295670 Recon_loss: 0.295290, MMD_loss:3.798476, time: 0.205 sec
[098/100] loss: 0.296372 Recon_loss: 0.295990, MMD_loss:3.812305, time: 0.158 sec
[099/100] loss: 0.296216 Recon_loss: 0.295829, MMD_loss:3.869949, time: 0.215 sec
[100/100] loss: 0.295944 Recon_loss: 0.295553, MMD_loss:3.911615, time: 0.199 sec
%% Cell type:markdown id: tags:
**double check on the recontruted compositions**
* one way to find out whether WAE (or any other VAE) has learned the
repsentation is to compare the reconstructed and original compositions.
%% Cell type:code id: tags:
```
#double check on the recontruted compositions
#t = time.localtime()
model_dir = os.path.join(root,'{}/{}_100.pth'.format(params['model_name'], params['model_name']))
model = WAE(raw_x.shape[1]).to(device)
model.load_state_dict(torch.load(model_dir))
model.eval()
with torch.no_grad():
test = torch.FloatTensor(raw_x).to(device)
recon_x, z = model(test)
recon_x = model.decoder(z)
recon_x = recon_x.cpu().detach().numpy()
column_name = ['Fe','Ni','Co','Cr','V','Cu']#,'VEC','AR1','AR2','PE','Density','TC','MP','FI','SI','TI','M']
#recon_x = (recon_x * (max-min)) + min
pd.DataFrame(recon_x.round(3), columns=column_name).loc[690:695]
```
%% Output
Fe Ni Co Cr V Cu
690 0.614 0.330 0.053 0.000 0.003 0.0
691 0.613 0.331 0.052 0.000 0.003 0.0
692 0.612 0.323 0.062 0.001 0.002 0.0
693 0.608 0.326 0.063 0.000 0.003 0.0
694 0.608 0.317 0.072 0.001 0.002 0.0
695 0.605 0.320 0.073 0.001 0.002 0.0
%% Cell type:code id: tags:
```
csv_data = pd.read_csv('data_base.csv', header=0).iloc[:,1:19]
csv_data.iloc[690:696,:6].round(3)
```
%% Output
Fe Ni Co Cr V Cu
690 0.630 0.330 0.04 0.0 0.0 0.0
691 0.625 0.335 0.04 0.0 0.0 0.0
692 0.635 0.315 0.05 0.0 0.0 0.0
693 0.625 0.325 0.05 0.0 0.0 0.0
694 0.635 0.305 0.06 0.0 0.0 0.0
695 0.625 0.315 0.06 0.0 0.0 0.0
%% Cell type:markdown id: tags:
**Visualize the WAE latent space**
Here we assign different colors to alloy with and without Copper,
as we expected them to differ significantly in the latent space.
%% Cell type:code id: tags:
```
sns.set_style('ticks')
model = WAE(raw_x.shape[1]).to(device)
model.load_state_dict(torch.load(model_dir))
dataset = FeatureDataset(raw_x[:], raw_y[:])
latents = get_latents(model, dataset)
low_cu = raw_x[:,5] < 0.05
low_cu_latent = latents[low_cu]
low_cu_color = raw_y[:][low_cu]
high_cu = raw_x[:,5] >= 0.05
high_cu_latent = latents[high_cu]
high_cu_color = raw_y[:][high_cu]
# figure settings
fig, axs = plt.subplots(figsize = (3, 3),dpi=200)
#axs.set_aspect(1.)
#axs.set_ylim(-7,7)
#axs.set_xlim(-11,5)
axs.set_yticks(np.arange(-6, 8, step=2))
axs.set_xticks(np.arange(-10, 5, step=2))
axs.set_yticklabels(np.arange(-6, 8, step=2), fontsize=7)
axs.set_xticklabels(np.arange(-10, 5, step=2), fontsize=7)
for axis in ['top','bottom','left','right']:
axs.spines[axis].set_linewidth(1.)
axs.tick_params(axis='both', which='major', top=False, labeltop=False, direction='out', width=1., length=4)
axs.tick_params(axis='both', which='major', right=False, labelright=False, direction='out', width=1., length=4)
#scatter1 = axs.scatter(low_cu_latent[:,0], low_cu_latent[:,1], c=low_cu_color, alpha=.75, s=10, linewidths=0, cmap='viridis')
#scatter2 = axs.scatter(high_cu_latent[:,0], high_cu_latent[:,1], c=high_cu_color, alpha=.75, s=9, linewidths=0, cmap='Reds')
scatter1 = axs.scatter(low_cu_latent[:,0], low_cu_latent[:,1], c='steelblue', alpha=.55, s=8, linewidths=0, label='Alloys w/o Cu')
scatter2 = axs.scatter(high_cu_latent[:,0], high_cu_latent[:,1], c='firebrick', alpha=.65, s=14, linewidths=0, marker='^', label='Alloys w/ Cu')
#scatter3 = axs.scatter(latents_exp_4[:,0], latents_exp_4[:,1], alpha=1., s=10, linewidths=.75, edgecolors='darkslategray', facecolors='w')#, label='New FeCoNiCr HEAs')
#scatter4 = axs.scatter(latents_exp_5[:,0], latents_exp_5[:,1], alpha=1., s=16, linewidths=.75, edgecolors='darkred', facecolors='w',marker='^')#, label='New FeCoNiCrCu HEAs')
handles,labels = axs.get_legend_handles_labels()
handles = handles[::1]
labels = labels[::1]
legend_properties = {'size':7.5}
axs.legend(handles, labels, loc='upper right', bbox_to_anchor=(1.015,1.017), handletextpad=-0.3, frameon=False, prop=legend_properties)
#axs.legend(handles, labels, loc='upper left', bbox_to_anchor=(-0.045,1.017), handletextpad=-0.3, frameon=False, prop=legend_properties)
#rect = patches.Rectangle((-19.4,15.0), 18, 4.5, linewidth=0,edgecolor=None,facecolor='k', alpha=0.03,linestyle=None,zorder=-10) #(0.2,15.4), 14, 4.1
#axs.add_patch(rect)
fig.savefig('Figure3_a.tif', bbox_inches = 'tight', pad_inches=0.01)
```
%% Output
%% Cell type:markdown id: tags:
#**From WAE latent space to composition**
In order to sample composition from the latent space. We turn the latent space into probability distribution via Gausian Mixture Model (GMM)
%% Cell type:code id: tags:
```
#plotting functions
def draw_ellipse(position, covariance, ax=None, **kwargs):
"""Draw an ellipse with a given position and covariance"""
ax = ax or plt.gca()
# Convert covariance to principal axes
if covariance.shape == (2, 2):
U, s, Vt = np.linalg.svd(covariance)
angle = np.degrees(np.arctan2(U[1, 0], U[0, 0]))
width, height = 2 * np.sqrt(s)
else:
angle = 0
width, height = 2 * np.sqrt(covariance)
# Draw the Ellipse
for nsig in range(1, 4):
ax.add_patch(Ellipse(position, nsig * width, nsig * height,
angle, **kwargs))
def plot_gmm(gm, X, label=True, ax=None):
X= latents
fig, axs = plt.subplots(1,1,figsize=(2,2),dpi=200)
ax = axs or plt.gca()
labels = gm.fit(X).predict(X)
if label:
low_cu = raw_x[:,5] < 0.05
low_cu_latent = latents[low_cu]
low_cu_color = raw_y[:][low_cu]
high_cu = raw_x[:,5] >= 0.05
high_cu_latent = latents[high_cu]
high_cu_color = raw_y[:][high_cu]
scatter1 = axs.scatter(low_cu_latent[:,0], low_cu_latent[:,1], c=low_cu_color, alpha=.65, s=8, linewidths=0, cmap='viridis')
scatter2 = axs.scatter(high_cu_latent[:,0], high_cu_latent[:,1], c=high_cu_color, alpha=.65, s=14, linewidths=0, cmap='Reds', marker='^')
#scatter3 = axs.scatter(latents[698:,0], latents[698:,1], alpha=1., s=10, linewidths=.75, edgecolors='k', facecolors='none')
else:
ax.scatter(X[:, 0], X[:, 1], s=5, zorder=2)
ax.axis('equal')
w_factor = 0.2 / gm.weights_.max()
for pos, covar, w in zip(gm.means_, gm.covariances_, gm.weights_):
draw_ellipse(pos, covar, alpha= 0.75*w * w_factor, facecolor='slategrey', zorder=-10)
```
%% Cell type:markdown id: tags:
Here the GMM is applied, you might wonder why 4 is chosen, the answer can be found below
%% Cell type:code id: tags:
```
gm = GaussianMixture(n_components=4, random_state=0, init_params='kmeans').fit(latents) #plot a n_components v.s. Average negative log likelihood
print('Average negative log likelihood:', -1*gm.score(latents))
plot_gmm(gm, latents)
```
%% Output
Average negative log likelihood: 1.3329691491445406
%% Cell type:markdown id: tags:
Using elbow method to find out the best # of components, the best number of cluster is either 4 or 5.
%% Cell type:code id: tags:
```
scores=[] #using elbow method to find out the best # of components
for i in range(1,12):
gm = GaussianMixture(n_components=i, random_state=0, init_params='kmeans').fit(latents)
print('Average negative log likelihood:', -1*gm.score(latents))
scores.append(-1*gm.score(latents))
```
%% Output
Average negative log likelihood: 1.6934940458870427
Average negative log likelihood: 1.5939515381329854
Average negative log likelihood: 1.3446285344006002
Average negative log likelihood: 1.3329691491445406
Average negative log likelihood: 1.1957491673672727
Average negative log likelihood: 1.1577345788629838
Average negative log likelihood: 1.1085116154613388
Average negative log likelihood: 1.106149515195862
Average negative log likelihood: 1.0059797843308775
Average negative log likelihood: 1.012995374227744
Average negative log likelihood: 0.9596908609051571
%% Cell type:code id: tags:
```
import matplotlib.pyplot as plt
plt.figure()
plt.scatter(range(1,12), scores,color='red')
plt.plot(range(1,12),scores)
plt.show()
```
%% Output
%% Cell type:markdown id: tags:
#INVAR Classifier
A simple neural network classifier that predicts INVAR based on composition.
%% Cell type:code id: tags:
```
class Classifier(nn.Module): #a very simple classifer with large dropout. intuition here: as simple as possible, given that we only have 2d input
def __init__(self):
super(Classifier, self).__init__()
self.fc = nn.Sequential(
nn.Linear(2,8),
nn.Dropout(0.5),
nn.Linear(8,1),
nn.Sigmoid()
)
def forward(self, x):
return self.fc(x)
```
%% Cell type:markdown id: tags:
**Classifier training**
%% Cell type:code id: tags:
```
same_seeds(1)
params['cls_bs'] = 16
params['cls_lr'] = 1e-4
params['cls_epoch'] = 100
params['num_fold'] = 5
params['label_y'] = np.where(raw_y<5, 1, 0)
params['latents'] = latents
cls = Classifier().to(device)
opt = Adam(cls.parameters(), lr=params['cls_lr'], weight_decay=0.)
def training_Cls(model, optimizer, params):
label_y = params['label_y']
latents = params['latents']
cls_epoch = params['cls_epoch']
kf = KFold(n_splits=params['num_fold'])
train_acc = []
test_acc = []
k=1
for train, test in kf.split(latents):
x_train, x_test, y_train, y_test = latents[train], latents[test], label_y[train], label_y[test]
cls_dataset = AttributeDataset(x_train, y_train)
cls_dataloader = DataLoader(cls_dataset, batch_size=params['cls_bs'], shuffle=True)
cls_testDataset = AttributeDataset(x_test, y_test)
cls_testDataloader = DataLoader(cls_testDataset, batch_size=cls_testDataset.__len__(), shuffle=False)
for epoch in range(cls_epoch):
t = time.time()
total_loss = []
total_acc = []
cls.train()
for i, data in enumerate(cls_dataloader):
x = data[0].to(device)
y = data[1].to(device)
y_pred = cls(x)
loss = F.binary_cross_entropy(y_pred, y)
total_acc.append(torch.sum(torch.where(y_pred>=0.5,1,0) == y).detach().cpu().numpy())
total_loss.append(loss.item())
opt.zero_grad()
loss.backward()
opt.step()
#eval
cls.eval()
for test in cls_testDataloader:
x = test[0].to(device)
y = test[1].to(device)
y_pred = cls(x)
accuracy = torch.sum(torch.where(y_pred>=0.5,1,0) == y) / y_pred.size(0)
test_loss = F.binary_cross_entropy(y_pred, y)
#print(f'[{epoch+1:03}/{cls_epoch}] loss:{sum(total_loss)/len(total_loss):.3f} test_loss:{test_loss.item():.3f} acc:{sum(total_acc)/cls_dataset.__len__():.3f} test_acc:{accuracy:.3f} time:{time.time()-t:.3f}')
print('[{}/{}] train_acc: {:.04f} || test_acc: {:.04f}'.format(k, params['num_fold'], sum(total_acc)/cls_dataset.__len__(), accuracy.item()))
train_acc.append(sum(total_acc)/cls_dataset.__len__())
test_acc.append(accuracy.item())
k+=1
print('train_acc: {:.04f} || test_acc: {:.04f}'.format(sum(train_acc)/len(train_acc), sum(test_acc)/len(test_acc)))
return train_acc, test_acc
train_acc, test_acc = training_Cls(cls, opt, params)
```
%% Output
[1/5] train_acc: 0.8183 || test_acc: 0.8929
[2/5] train_acc: 0.8797 || test_acc: 0.7122
[3/5] train_acc: 0.8205 || test_acc: 0.9065
[4/5] train_acc: 0.8474 || test_acc: 0.8058
[5/5] train_acc: 0.8276 || test_acc: 0.9137
train_acc: 0.8387 || test_acc: 0.8462
%% Cell type:markdown id: tags:
#Markov Chain Monte Carlo for composition sampling
%% Cell type:code id: tags:
```
def MCMC(gm, classifier, n_samples, sigma=0.1): #MCMC
sample_z = []
z = gm.sample(1)[0]
for i in range(n_samples):
uniform_rand = np.random.uniform(size=1)
z_next = np.random.multivariate_normal(z.squeeze(),sigma*np.eye(2)).reshape(1,-1)
z_combined = np.concatenate((z, z_next),axis=0)
scores = cls(torch.Tensor(z_combined).to(device)).detach().cpu().numpy().squeeze()
z_score, z_next_score = np.log(scores[0]), np.log(scores[1]) #z score needes to be converted to log, coz gm score is log.
z_prob, z_next_prob = (gm.score(z)+z_score), (gm.score(z_next)+z_next_score) # two log addition, output: log probability
accepence = min(0, (z_next_prob - z_prob))
if i == 0:
sample_z.append(z.squeeze())
if np.log(uniform_rand) < accepence:
sample_z.append(z_next.squeeze())
z = z_next
else:
pass
return np.stack(sample_z)
```
%% Cell type:markdown id: tags:
Sample 10000 times with sigma=0.5
%% Cell type:code id: tags:
```
sample_z = MCMC(gm=gm, classifier=cls, n_samples=5000, sigma=0.5)
WAE_comps = model._decode(torch.Tensor(sample_z).to(device)).detach().cpu().numpy() # new_comps save as csv and goes to TERM
print('Sample size:', sample_z.shape)
```
%% Output
Sample size: (1129, 2)
%% Cell type:code id: tags:
```
WAE_comps=pd.DataFrame(WAE_comps)
WAE_comps.columns=column_name
WAE_comps.to_csv('comps_WAE.csv',index=False)
WAE_comps.head()
```
%% Output
Fe Ni Co Cr V Cu
0 0.621771 0.278061 0.066139 0.033976 0.000051 0.000002
1 0.290811 0.193655 0.426399 0.000012 0.089111 0.000012
2 0.356562 0.032219 0.479116 0.132074 0.000014 0.000015
3 0.232804 0.026366 0.633758 0.107050 0.000015 0.000007
4 0.431049 0.083583 0.353330 0.132014 0.000015 0.000009
%% Cell type:markdown id: tags:
Plotting the sampled composition along with known datas
%% Cell type:code id: tags:
```
sns.set_style('ticks')
t = time.localtime()
model_dir = os.path.join(root, '{}/{}_100.pth'.format(params['model_name'], params['model_name']))
model = WAE(raw_x.shape[1]).to(device)
model.load_state_dict(torch.load(model_dir))
model.eval()
dataset = FeatureDataset(raw_x[:], raw_y[:])
latents = get_latents(model,dataset)
low_cu = raw_x[:,5] < 0.05
low_cu_latent = latents[low_cu]
low_cu_color = raw_y[:][low_cu]
high_cu = raw_x[:,5] >= 0.05
high_cu_latent = latents[high_cu]
high_cu_color = raw_y[:][high_cu]
fig, axs = plt.subplots(figsize = (3, 3),dpi=200)
axs.set_xlim(-6,3)
axs.set_ylim(-4,5)
scatter1 = axs.scatter(low_cu_latent[:,0], low_cu_latent[:,1], c='steelblue', alpha=.55, s=2, linewidths=0, cmap='viridis')
scatter2 = axs.scatter(high_cu_latent[:,0], high_cu_latent[:,1], c=high_cu_color, alpha=.65, s=3.5, linewidths=0, cmap='Reds', marker='^')
scatter4 = axs.scatter(sample_z[:,0], sample_z[:,1], c='k', alpha=.15, s=0.8, linewidths=0, zorder=-1)
plt.show()
```
%% Output
%% Cell type:markdown id: tags:
#Two-stage ensemble regression model (TERM)
%% Cell type:markdown id: tags:
**Ultility functions**
* MAPELoss - Mean Absolute Percentile Error (MAPE)
* normalizing_data - Normalizing the data
%% Cell type:code id: tags:
```
import torch.nn as nn
import torch
import numpy as np
import pandas as pd
from torch.utils.data import Dataset, DataLoader
import random
class MAPELoss(nn.Module):
def __init__(self):
super(MAPELoss, self).__init__()
def forward (self, output, target):
loss = torch.mean(torch.abs((target - output) / target))
# loss = (-1)*loss
return loss
def minmaxscaler(data):
min = np.amin(data)
max = np.amax(data)
return (data - min)/(max-min)
def weights_init(m):
classname = m.__class__.__name__
if classname.find('BatchNorm') != -1:
m.weight.data.normal_(1.0, 0.02)
m.bias.data.fill_(0)
class FeatureDataset(Dataset):
'''
Args: x is a 2D numpy array [x_size, x_features]
'''
def __init__(self, x):
self.x = x
def __len__(self):
return self.x.shape[0]
def __getitem__(self, idx):
return torch.FloatTensor(self.x[idx])
def getBatch(self, idxs = []):
if idxs == None:
return idxs
else:
x_features = []
for i in idxs:
x_features.append(self.__getitem__(i))
return torch.FloatTensor(x_features)
def normalizing_data(data, seed=42):
df_all = data.drop(columns=['alloy'])
#create a min max processing object
composition = df_all [['Fe','Ni','Co','Cr','V','Cu']]
min_max_scaler = preprocessing.MinMaxScaler()
normalized_atomic_properties = min_max_scaler.fit_transform(df_all[['VEC','AR1','AR2','PE','Density',
'TC','MP','FI','SI','TI','M']])
x = pd.concat([composition,pd.DataFrame(normalized_atomic_properties)],axis=1)
x=x.iloc[:697]
y = df_all[['TEC']][:697]
# bins = [18,35,48,109,202,234,525,687,695]
bins = [18,35,48,109,202,234,525,687]
y_binned = np.digitize(y.index, bins, right=True) #stratified 7-fold: each folder contains a specific type of alloys (7 types in total, each takes 85% and 15% as training and testing)
x = torch.FloatTensor(x.values) #numpy to tensor
y = torch.FloatTensor(y.values) #numpy to tensor
if torch.cuda.is_available():
x = x.cuda()
y = y.cuda()
train_features, test_features, train_labels, test_labels = train_test_split(x, y, test_size=0.15, random_state=seed, stratify=y_binned)
return train_features, test_features, train_labels, test_labels
```
%% Cell type:markdown id: tags:
**Data loading**
%% Cell type:code id: tags:
```
import datetime
import torch.utils.data as Data
import pandas as pd
import torch
import torch.nn.functional as F
import matplotlib.pyplot as plt
import numpy as np
from sklearn.model_selection import train_test_split
import torch.nn as nn
import torch.optim as optim
from bayes_opt import BayesianOptimization
import time
import os
from sklearn import preprocessing
t = time.localtime()
table = pd.DataFrame(columns=['target','batch_size','lr','module__n_hidden','module__w'])
plt.close('all')
starttime = datetime.datetime.now()
data = pd.read_csv('data_base.csv')
train_features, test_features, train_labels, test_labels = normalizing_data(data, seed=42)
```
%% Cell type:code id: tags:
```
print(torch.cuda.is_available())
```
%% Output
False
%% Cell type:markdown id: tags:
**Neural network architecture**
%% Cell type:code id: tags:
```
class Net(nn.Module):
def __init__(self, n_feature=17, n_hidden=218, n_output=1, w = 6):
super(Net, self).__init__()
# self.BN=torch.nn.BatchNorm1d(n_hidden)
self.hidden1 = torch.nn.Linear(n_feature, n_hidden)
nn.init.kaiming_normal_(self.hidden1.weight)
self.hiddens = nn.ModuleList ([nn.Linear(n_hidden, n_hidden) for i in range(w)])
for m in self.hiddens:
nn.init.kaiming_normal_(m.weight)
self.predict = torch.nn.Linear(n_hidden, n_output)
nn.init.kaiming_normal_(self.predict.weight)
def forward(self, x):
x = self.hidden1(x)
# x = self.BN(x)
# x = self.Dropout (x)
x = F.relu(x)
for m in self.hiddens:
x = m(x)
# x = self.BN(x)
x = F.relu(x)
x = self.predict(x)
# x = self.BN_3(x)
# x = self.Dropout (x)
# 输出值
return x
def train(net, num_epochs, batch_size, train_features, test_features, train_labels, test_labels,
train_loader,
optimizer):
print ("\n=== train begin ===")
print(net)
train_ls, test_ls = [], []
loss = MAPELoss() # MAPE means Mean Absolute percentile error
for epoch in range(num_epochs):
for x, y in train_loader:
ls = loss(net(x).view(-1, 1), y.view(-1, 1))
optimizer.zero_grad()
ls.backward()
optimizer.step()
if epoch % 100 == 0:
train_ls.append(loss(net(train_features).view(-1, 1), train_labels.view(-1, 1)).item())
test_ls.append(loss(net(test_features).view(-1, 1), test_labels.view(-1, 1)).item())
print ("epoch %d: train loss %f, test loss %f" % (epoch, train_ls[-1], test_ls[-1]))
print ("=== train end ===")
def test(model, test_loader):
model.eval()
test_loss = 0
n = 0
loss = MAPELoss()
with torch.no_grad():
for data, target in test_loader:
output = model(data)
test_loss += loss(output.view(-1, 1), target.view(-1, 1)).item() # sum up batch loss
n += 1
test_loss /= n
print('Test set: Average loss: {:.4f}'.format(
test_loss))
return test_loss
def train_model(batch_size,lr, module__n_hidden,module__w):
module__n_hidden = int(module__n_hidden) # number of neurons per layer
module__w = int(module__w) # number of hidden layers
batch_size = int(batch_size)
train_dataset = Data.TensorDataset(train_features, train_labels)
test_dataset = Data.TensorDataset(test_features, test_labels)
train_loader = Data.DataLoader(train_dataset, batch_size, shuffle=True)
test_loader = Data.DataLoader(test_dataset, batch_size, shuffle=True)
net = Net(n_feature=17, n_hidden=module__n_hidden, n_output=1, w = module__w)
if torch.cuda.is_available():
net = net.cuda()
n_epochs = 20
optimizer = optim.Adam(net.parameters(), lr=lr, weight_decay=0.0001)
train(net, n_epochs, batch_size,train_features, test_features,
train_labels, test_labels,train_loader, optimizer)
train_loss= test(net,train_loader)
test_loss = test(net, test_loader)
r = -np.abs(train_loss-test_loss)
return -test_loss
```
%% Cell type:markdown id: tags:
**Bayesian hyperparameter optimization**
%% Cell type:code id: tags:
```
bounds = {'lr': (0.0005,0.001), 'batch_size': (32,64), 'module__n_hidden': (16,526),
'module__w': (2,10)}
optimizer = BayesianOptimization(
f=train_model,
pbounds=bounds,
random_state=1,
)
optimizer.maximize(init_points=10, n_iter=1)
print(optimizer.max)
table = pd.DataFrame(columns=['target','batch_size','lr','module__n_hidden','module__w'])
for res in optimizer.res:
table=table.append(pd.DataFrame({'target':[res['target']],'batch_size':[res['params']['batch_size']],
'lr':[res['params']['lr']], 'module__n_hidden':[res['params']['module__n_hidden']],
'module__w':[res['params']['module__w']]}),ignore_index=True)
table=table.append(pd.DataFrame({'target':[optimizer.max['target']],'batch_size':[optimizer.max['params']['batch_size']],
'lr':[optimizer.max['params']['lr']], 'module__n_hidden':[optimizer.max['params']['module__n_hidden']],
'module__w':[optimizer.max['params']['module__w']]}),ignore_index=True)
model_name = 'Invar_inference_NN'
file_name = '{}.xlsx'.format(model_name)
endtime = datetime.datetime.now()
Rtime = endtime - starttime
print(Rtime)
table.to_excel(file_name)
```
%% Output
| iter | target | batch_... | lr | module... | module__w |
-------------------------------------------------------------------------
=== train begin ===
Net(
(hidden1): Linear(in_features=17, out_features=16, bias=True)
(hiddens): ModuleList(
(0): Linear(in_features=16, out_features=16, bias=True)
(1): Linear(in_features=16, out_features=16, bias=True)
(2): Linear(in_features=16, out_features=16, bias=True)
(3): Linear(in_features=16, out_features=16, bias=True)
)
(predict): Linear(in_features=16, out_features=1, bias=True)
)
epoch 0: train loss 0.967301, test loss 0.964457
=== train end ===
Test set: Average loss: 0.6603
Test set: Average loss: 0.6365
|  1  | -0.6365  |  45.34  |  0.000860 |  16.06  |  4.419  |
=== train begin ===
Net(
(hidden1): Linear(in_features=17, out_features=110, bias=True)
(hiddens): ModuleList(
(0): Linear(in_features=110, out_features=110, bias=True)
(1): Linear(in_features=110, out_features=110, bias=True)
(2): Linear(in_features=110, out_features=110, bias=True)
(3): Linear(in_features=110, out_features=110, bias=True)
)
(predict): Linear(in_features=110, out_features=1, bias=True)
)
epoch 0: train loss 0.952195, test loss 0.945242
=== train end ===
Test set: Average loss: 0.4462
Test set: Average loss: 0.4631
|  2  | -0.4631  |  36.7  |  0.000546 |  111.0  |  4.764  |
=== train begin ===
Net(
(hidden1): Linear(in_features=17, out_features=229, bias=True)
(hiddens): ModuleList(
(0): Linear(in_features=229, out_features=229, bias=True)
(1): Linear(in_features=229, out_features=229, bias=True)
(2): Linear(in_features=229, out_features=229, bias=True)
(3): Linear(in_features=229, out_features=229, bias=True)
(4): Linear(in_features=229, out_features=229, bias=True)
(5): Linear(in_features=229, out_features=229, bias=True)
(6): Linear(in_features=229, out_features=229, bias=True)
)
(predict): Linear(in_features=229, out_features=1, bias=True)
)
epoch 0: train loss 0.887715, test loss 0.865657
=== train end ===
Test set: Average loss: 0.5808
Test set: Average loss: 0.5417
|  3  | -0.5417  |  44.7  |  0.000769 |  229.8  |  7.482  |
=== train begin ===
Net(
(hidden1): Linear(in_features=17, out_features=29, bias=True)
(hiddens): ModuleList(
(0): Linear(in_features=29, out_features=29, bias=True)
(1): Linear(in_features=29, out_features=29, bias=True)
(2): Linear(in_features=29, out_features=29, bias=True)
(3): Linear(in_features=29, out_features=29, bias=True)
(4): Linear(in_features=29, out_features=29, bias=True)
(5): Linear(in_features=29, out_features=29, bias=True)
(6): Linear(in_features=29, out_features=29, bias=True)
)
(predict): Linear(in_features=29, out_features=1, bias=True)
)
epoch 0: train loss 0.970142, test loss 0.981575
=== train end ===
Test set: Average loss: 0.4597
Test set: Average loss: 0.4817
|  4  | -0.4817  |  38.54  |  0.000939 |  29.97  |  7.364  |
=== train begin ===
Net(
(hidden1): Linear(in_features=17, out_features=87, bias=True)
(hiddens): ModuleList(
(0): Linear(in_features=87, out_features=87, bias=True)
(1): Linear(in_features=87, out_features=87, bias=True)
(2): Linear(in_features=87, out_features=87, bias=True)
)
(predict): Linear(in_features=87, out_features=1, bias=True)
)
epoch 0: train loss 0.989642, test loss 0.933345
=== train end ===
Test set: Average loss: 0.3933
Test set: Average loss: 0.4565
|  5  | -0.4565  |  45.35  |  0.000779 |  87.6  |  3.585  |
=== train begin ===
Net(
(hidden1): Linear(in_features=17, out_features=175, bias=True)
(hiddens): ModuleList(
(0): Linear(in_features=175, out_features=175, bias=True)
(1): Linear(in_features=175, out_features=175, bias=True)
(2): Linear(in_features=175, out_features=175, bias=True)
(3): Linear(in_features=175, out_features=175, bias=True)
(4): Linear(in_features=175, out_features=175, bias=True)
(5): Linear(in_features=175, out_features=175, bias=True)
(6): Linear(in_features=175, out_features=175, bias=True)
)
(predict): Linear(in_features=175, out_features=1, bias=True)
)
epoch 0: train loss 0.997312, test loss 0.987231
=== train end ===
Test set: Average loss: 0.4088
Test set: Average loss: 0.4704
|  6  | -0.4704  |  57.62  |  0.000984 |  175.8  |  7.539  |
=== train begin ===
Net(
(hidden1): Linear(in_features=17, out_features=59, bias=True)
(hiddens): ModuleList(
(0): Linear(in_features=59, out_features=59, bias=True)
(1): Linear(in_features=59, out_features=59, bias=True)
)
(predict): Linear(in_features=59, out_features=1, bias=True)
)
epoch 0: train loss 0.945286, test loss 0.931218
=== train end ===
Test set: Average loss: 0.4456
Test set: Average loss: 0.4744
|  7  | -0.4744  |  60.04  |  0.000947 |  59.37  |  2.312  |
=== train begin ===
Net(
(hidden1): Linear(in_features=17, out_features=66, bias=True)
(hiddens): ModuleList(
(0): Linear(in_features=66, out_features=66, bias=True)
(1): Linear(in_features=66, out_features=66, bias=True)
(2): Linear(in_features=66, out_features=66, bias=True)
(3): Linear(in_features=66, out_features=66, bias=True)
(4): Linear(in_features=66, out_features=66, bias=True)
)
(predict): Linear(in_features=66, out_features=1, bias=True)
)
epoch 0: train loss 0.951440, test loss 0.957168
=== train end ===
Test set: Average loss: 0.3364
Test set: Average loss: 0.3963
|  8  | -0.3963  |  37.43  |  0.000939 |  66.16  |  5.369  |
=== train begin ===
Net(
(hidden1): Linear(in_features=17, out_features=368, bias=True)
(hiddens): ModuleList(
(0): Linear(in_features=368, out_features=368, bias=True)
(1): Linear(in_features=368, out_features=368, bias=True)
(2): Linear(in_features=368, out_features=368, bias=True)
(3): Linear(in_features=368, out_features=368, bias=True)
)
(predict): Linear(in_features=368, out_features=1, bias=True)
)
epoch 0: train loss 0.948707, test loss 0.970389
=== train end ===
Test set: Average loss: 0.3856
Test set: Average loss: 0.4180
|  9  | -0.418  |  62.65  |  0.000766 |  368.9  |  4.524  |
=== train begin ===
Net(
(hidden1): Linear(in_features=17, out_features=25, bias=True)
(hiddens): ModuleList(
(0): Linear(in_features=25, out_features=25, bias=True)
(1): Linear(in_features=25, out_features=25, bias=True)
(2): Linear(in_features=25, out_features=25, bias=True)
(3): Linear(in_features=25, out_features=25, bias=True)
(4): Linear(in_features=25, out_features=25, bias=True)
(5): Linear(in_features=25, out_features=25, bias=True)
(6): Linear(in_features=25, out_features=25, bias=True)
(7): Linear(in_features=25, out_features=25, bias=True)
)
(predict): Linear(in_features=25, out_features=1, bias=True)
)
epoch 0: train loss 0.964479, test loss 0.965559
=== train end ===
Test set: Average loss: 0.4016
Test set: Average loss: 0.5237
|  10  | -0.5237  |  53.97  |  0.000917 |  25.33  |  8.001  |
=== train begin ===
Net(
(hidden1): Linear(in_features=17, out_features=526, bias=True)
(hiddens): ModuleList(
(0): Linear(in_features=526, out_features=526, bias=True)
(1): Linear(in_features=526, out_features=526, bias=True)
(2): Linear(in_features=526, out_features=526, bias=True)
(3): Linear(in_features=526, out_features=526, bias=True)
(4): Linear(in_features=526, out_features=526, bias=True)
(5): Linear(in_features=526, out_features=526, bias=True)
(6): Linear(in_features=526, out_features=526, bias=True)
(7): Linear(in_features=526, out_features=526, bias=True)
(8): Linear(in_features=526, out_features=526, bias=True)
(9): Linear(in_features=526, out_features=526, bias=True)
)
(predict): Linear(in_features=526, out_features=1, bias=True)
)
epoch 0: train loss 0.967434, test loss 0.948400
=== train end ===
Test set: Average loss: 0.5070
Test set: Average loss: 0.6614
|  11  | -0.6614  |  32.0  |  0.001  |  526.0  |  10.0  |
=========================================================================
{'target': -0.3962911367416382, 'params': {'batch_size': 37.43457342606621, 'lr': 0.0009390712517147066, 'module__n_hidden': 66.15688525485555, 'module__w': 5.368861000040417}}
0:00:20.704944
%% Cell type:code id: tags:
```
table.head()
```
%% Output
target batch_size lr module__n_hidden module__w
0 -0.636508 45.344704 0.000860 16.058331 4.418661
1 -0.463091 36.696189 0.000546 110.992708 4.764486
2 -0.541740 44.696559 0.000769 229.789202 7.481756
3 -0.481740 38.542472 0.000939 29.967673 7.363740
4 -0.456523 45.353754 0.000779 87.597339 3.584812
%% Cell type:markdown id: tags:
**Load top 2 NN model with 2 different seeds**
Due to the data size problem, we even need to train the model with different seed in order to obtain a robust ensemble.
%% Cell type:code id: tags:
```
import datetime
import torch.utils.data as Data
import pandas as pd
import torch
import torch.nn.functional as F
import matplotlib.pyplot as plt
import numpy as np
from sklearn.model_selection import train_test_split
import torch.nn as nn
import torch.optim as optim
import time
import os
import pickle
import seaborn as sns
t = time.localtime()
plt.close('all')
target = pd.read_excel('Invar_inference_NN.xlsx')
starttime = datetime.datetime.now()
for i in range(1,3): # This is to choose best 10 points
for j in range(40,42): # 10 different seeds
train_features, test_features, train_labels, test_labels = normalizing_data(data, seed=j)
lr = target.at[i,'lr'] # the same
module__n_hidden = target.at[i,'module__n_hidden']
module__w = target.at[i,'module__w']
batch_size = target.at[i,'batch_size']
module__n_hidden = int(module__n_hidden)
module__w = int(module__w)
batch_size = int(batch_size)
print (module__w)
batch_size = target.at[i,'batch_size'] # choose 'batch_size' paramter at ith row
lr = target.at[i,'lr'] # the same
module__n_hidden = target.at[i,'module__n_hidden']
module__w = target.at[i,'module__w']
module__n_hidden = int(module__n_hidden)
module__w = int(module__w)
batch_size = int(batch_size)
print (module__w)
train_dataset = Data.TensorDataset(train_features, train_labels)
test_dataset = Data.TensorDataset(test_features, test_labels)
train_loader = Data.DataLoader(train_dataset, batch_size, shuffle=True)
test_loader = Data.DataLoader(test_dataset, batch_size, shuffle=True)
class Net(nn.Module):
def __init__(self, n_feature=17, n_hidden=module__n_hidden, n_output=1, w = module__w):
super(Net, self).__init__()
# self.BN=torch.nn.BatchNorm1d(n_hidden)
self.hidden1 = torch.nn.Linear(n_feature, n_hidden)
nn.init.kaiming_normal_(self.hidden1.weight)
self.hiddens = nn.ModuleList ([nn.Linear(n_hidden, n_hidden) for i in range(w)])
for m in self.hiddens:
nn.init.kaiming_normal_(m.weight)
self.predict = torch.nn.Linear(n_hidden, n_output)
nn.init.kaiming_normal_(self.predict.weight)
def forward(self, x):
x = self.hidden1(x)
# x = self.BN(x)
# x = self.Dropout (x)
x = F.relu(x)
for m in self.hiddens:
x = m(x)
# x = self.BN(x)
x = F.relu(x)
x = self.predict(x)
# x = self.BN_3(x)
# x = self.Dropout (x)
return x
def plotCurve(x_vals, y_vals,
x_label, y_label,
x2_vals=None, y2_vals=None,
legend=None,
figsize=(3.5, 2.5)):
# set figsize
plt.xlabel(x_label)
plt.ylabel(y_label)
plt.plot(x_vals, y_vals)
if x2_vals and y2_vals:
plt.plot(x2_vals, y2_vals, linestyle=':')
if legend:
plt.legend(legend)
#training
print ("\n=== train begin ===")
net = Net()
print(net)
if torch.cuda.is_available():
net = net.cuda()
train_ls, test_ls = [], []
loss = MAPELoss()
n_epochs = 10
optimizer = optim.Adam(net.parameters(), lr=lr, weight_decay=0.0001)
for epoch in range(n_epochs):
for x, y in train_loader:
ls = loss(net(x).view(-1, 1), y.view(-1, 1))
optimizer.zero_grad()
ls.backward()
optimizer.step()
train_ls.append(loss(net(train_features).view(-1, 1), train_labels.view(-1, 1)).item())
test_ls.append(loss(net(test_features).view(-1, 1), test_labels.view(-1, 1)).item())
if epoch % 100 == 0:
print ("epoch %d: train loss %f, test loss %f" % (epoch, train_ls[-1], test_ls[-1]))
print ("plot curves")
plotCurve(range(1, n_epochs + 1), train_ls,"epoch", "loss",range(1, n_epochs + 1), test_ls,["train", "test"])
plt.text(60, 0.7, 'Loss=%.4f' % test_ls[-1], fontdict={'size': 20, 'color': 'red'})
fig_name_1 = '{}-{}_1.png'.format(i,j)
plt.savefig(fig_name_1, format='png', dpi=300)
#plotting
net.eval()
predict=net(test_features)
predict=predict.cpu()
predict=predict.data.numpy()
plt.figure()
sns.regplot(x=predict, y=test_labels.cpu().data.numpy(), color='g')
fig_name_2 = 'NN_rank_{}-seed_{}.png'.format(i,j)
plt.savefig(fig_name_2, format='png', dpi=300)
#save the models
net_name = 'NN_rank_{}-seed_{}.pt'.format(i,j)
torch.save(net.state_dict(), net_name)
endtime = datetime.datetime.now()
Rtime = endtime - starttime
print(Rtime)
```
%% Output
4
4
=== train begin ===
Net(
(hidden1): Linear(in_features=17, out_features=110, bias=True)
(hiddens): ModuleList(
(0): Linear(in_features=110, out_features=110, bias=True)
(1): Linear(in_features=110, out_features=110, bias=True)
(2): Linear(in_features=110, out_features=110, bias=True)
(3): Linear(in_features=110, out_features=110, bias=True)
)
(predict): Linear(in_features=110, out_features=1, bias=True)
)
epoch 0: train loss 0.935467, test loss 0.921161
plot curves
4
4
=== train begin ===
Net(
(hidden1): Linear(in_features=17, out_features=110, bias=True)
(hiddens): ModuleList(
(0): Linear(in_features=110, out_features=110, bias=True)
(1): Linear(in_features=110, out_features=110, bias=True)
(2): Linear(in_features=110, out_features=110, bias=True)
(3): Linear(in_features=110, out_features=110, bias=True)
)
(predict): Linear(in_features=110, out_features=1, bias=True)
)
epoch 0: train loss 0.928591, test loss 0.887510
plot curves
7
7
=== train begin ===
Net(
(hidden1): Linear(in_features=17, out_features=229, bias=True)
(hiddens): ModuleList(
(0): Linear(in_features=229, out_features=229, bias=True)
(1): Linear(in_features=229, out_features=229, bias=True)
(2): Linear(in_features=229, out_features=229, bias=True)
(3): Linear(in_features=229, out_features=229, bias=True)
(4): Linear(in_features=229, out_features=229, bias=True)
(5): Linear(in_features=229, out_features=229, bias=True)
(6): Linear(in_features=229, out_features=229, bias=True)
)
(predict): Linear(in_features=229, out_features=1, bias=True)
)
epoch 0: train loss 0.862380, test loss 0.871121
plot curves
7
7
=== train begin ===
Net(
(hidden1): Linear(in_features=17, out_features=229, bias=True)
(hiddens): ModuleList(
(0): Linear(in_features=229, out_features=229, bias=True)
(1): Linear(in_features=229, out_features=229, bias=True)
(2): Linear(in_features=229, out_features=229, bias=True)
(3): Linear(in_features=229, out_features=229, bias=True)
(4): Linear(in_features=229, out_features=229, bias=True)
(5): Linear(in_features=229, out_features=229, bias=True)
(6): Linear(in_features=229, out_features=229, bias=True)
)
(predict): Linear(in_features=229, out_features=1, bias=True)
)
epoch 0: train loss 0.998901, test loss 0.993529
plot curves
0:00:06.117407
%% Cell type:markdown id: tags:
**Gradient Boosting Decision Tree Ensemble (GBDT)**
%% Cell type:code id: tags:
```
import os
import time
from bayes_opt import BayesianOptimization
#from sklearn.model_selection import cross_val_score
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from lightgbm import LGBMRegressor
import numpy as np
#import seaborn as sns
#from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import explained_variance_score
#from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import datetime
import pandas as pd
t = time.localtime()
model_name = 'Invar_inference_GBDT'
file_name = '{}.xlsx'.format(model_name)
data = pd.read_csv('data_base.csv')
train_features, test_features, train_labels, test_labels = normalizing_data(data,seed=42)
train_features, test_features = train_features.cpu().data.numpy(),test_features.cpu().data.numpy()
train_labels, test_labels = train_labels.cpu().data.numpy(), test_labels.cpu().data.numpy()
train_labels, test_labels = train_labels.reshape(-1), test_labels.reshape(-1)
def train_model(num_leaves,
min_child_samples,
learning_rate,
n_estimators,
max_bin,
colsample_bytree,
subsample,
max_depth,
reg_alpha,
reg_lambda,
min_split_gain,
min_child_weight
):
params = {
"num_leaves": int(round(num_leaves)),
'min_child_samples':int(round(min_child_samples)),
'learning_rate': learning_rate,
'n_estimators': int(round(n_estimators)),
'max_bin': int(round(max_bin)),
'colsample_bytree': max(min(colsample_bytree, 1), 0),
'subsample': max(min(subsample, 1), 0),
'max_depth': int(round(max_depth)),
'reg_alpha': max(reg_alpha, 0),
'reg_lambda': max(reg_lambda, 0),
'min_split_gain': min_split_gain,
'min_child_weight': min_child_weight,
'verbose': -1
}
model = LGBMRegressor(**params)
model.fit(train_features, train_labels)
y_pred = model.predict(test_features)
error = -np.mean(np.abs((test_labels - y_pred) / test_labels)) # print(error)
return error
bounds = {'num_leaves': (5, 60),#50
'min_child_samples':(1, 50),
'learning_rate': (0.001, 1),
'n_estimators': (5, 200),#100
'max_bin': (5, 100),#10
'colsample_bytree': (0.5, 1),
'subsample': (0.1, 2),
'max_depth': (1, 60),#10
'reg_alpha': (0.01, 1), #5
'reg_lambda': (0.01, 1),#5
'min_split_gain': (0.001, 0.1),
'min_child_weight': (0.0001, 30)}
optimizer = BayesianOptimization(
f=train_model,
pbounds=bounds,
random_state=1,
)
optimizer.maximize(init_points = 10, n_iter=1)
table = pd.DataFrame(columns=['target', 'colsample_bytree', 'learning_rate', 'max_bin',
'max_depth','min_child_samples','min_child_weight','min_split_gain',
'n_estimators','num_leaves','reg_alpha','reg_lambda','subsample'])
for res in optimizer.res:
table=table.append(pd.DataFrame({'target':[res['target']],'colsample_bytree':[res['params']['colsample_bytree']],
'colsample_bytree':[res['params']['colsample_bytree']],
'learning_rate':[res['params']['learning_rate']],
'max_bin':[res['params']['max_bin']],