Commit 385d8aa9 authored by Adam Fekete's avatar Adam Fekete

adding Gp python module

parent ba6fedef
import numpy as np
import matplotlib.pyplot as plt
from numpy import linalg as LA
import random
from mpl_toolkits.mplot3d import axes3d
from GP_custom_gen import GaussianProcess3 as GPvec
import os.path
import datetime
enlarge_database = False # if True, fake configurations with particles reflected are added
### Importing data from simulation ###
InDir = "ASE_2D/T=0.05"
forces = np.asarray(np.load(os.path.join(InDir,"forcest.npy")))
confs = np.asarray(np.load(os.path.join(InDir,"confst.npy")))
lenc = len(confs)
print("Database lenght is: " ,lenc)
def compl(ncal, ntest):
"""
Inputs: 'ncal' is number of training point, 'ntest' is the number of testing points
Outputs: array with probability of each error, array with bin limits
"""
### Subsampling from original database ###
ntot = ncal + ntest
ind = np.arange(lenc)
ind_ntot = np.random.choice(ind, size=ntot, replace=False)
confs_ntot = confs[ind_ntot]
forces_ntot = forces[ind_ntot]
print("Ntot Database lenght is: " ,len(forces_ntot))
### Spliting database in training/testing sets ###
tr_confs = confs_ntot[0:ncal]
tr_forces = forces_ntot[0:ncal]
tst_confs = confs_ntot[ncal:]
tst_forces = forces_ntot[ncal:]
print("Training Database lenght is: " ,len(tr_confs))
print("Testing Database lenght is: " ,len(tst_confs))
### Training the GP model ###
t0train = datetime.datetime.now()
gp = GPvec( ker=[ 'id'], fvecs =[ 'cov_mat'] , nugget = 1e-14, theta0=np.array([ None]), sig = .25, bounds = ((1, 10),),
optimizer= None, calc_error = False, eval_grad = False)
gp.fit(tr_confs, tr_forces)
tftrain = datetime.datetime.now()
print("Training computational time is", (tftrain-t0train).total_seconds())
### Testing the GP model ###
t0test = datetime.datetime.now()
f_pred= gp.predict(tst_confs) # , err_pred
f_pred.resize(ntest, 2)
tftest = datetime.datetime.now()
print("Testing computational time is", (tftest-t0test).total_seconds())
### Getting errors made in testing ###
errors = np.zeros(len(tst_forces))
for i in np.arange(len(errors)):
errors[i] = np.linalg.norm(f_pred[i]-tst_forces[i])
return errors, np.mean(np.absolute(tst_forces))
errors = []
avab_forces = []
for ncal in [2, 5, 10, 20, 40, 80, 160]:
err, avab_force = compl(ncal, 1000)
errors.append(err)
avab_forces.append(avab_force)
OutDir ="ASE_2D/T=0.05/ComplRuns/O2/2nd"
np.save(os.path.join(OutDir,"errors.npy"), errors)
np.save(os.path.join(OutDir,"avab_forces.npy"), avab_forces)
print avab_forces
"""
plt.plot(tst_forces, tst_forces)
plt.plot(tst_forces, f_pred, 'ro')
plt.ylabel('prediction')
plt.xlabel('real value')
plt.show()
"""
This diff is collapsed.
This diff is collapsed.
import numpy as np
#from pathos.multiprocessing import ProcessingPool
D = 2
class MLCalculator:
def __init__(self, gp, nl):
self.gp = gp
self.nl = nl
def get_potential_energy(self, atoms):
return -1
def get_forces(self, atoms):
n = len(atoms)
nl = self.nl
gp = self.gp
nl.update(atoms)
cell = atoms.get_cell()
confs = []
for a in np.arange(n):
indices, offsets = nl.get_neighbors(a)
offsets = np.dot(offsets, cell)
conf = np.zeros((len(indices), D))
for i, (a2, offset) in enumerate(zip(indices, offsets)):
d = atoms.positions[a2] + offset - atoms.positions[a]
conf[i] = d[0:D]
confs.append(conf)
confs = np.array(confs)
### MULTIPROCESSING
"""
if __name__ == 'MLCalculator':
nods = 2
pool = ProcessingPool(nodes= nods)
clist = [confs[i*n/nods:(i+1)*n/nods] for i in np.arange(nods)]
result = np.array(pool.map(gp.predict,clist))
"""
result = gp.predict(confs)
pad = np.zeros((n, 1))
result = np.concatenate((result, pad), axis = 1)
forces = np.reshape(result, (n,3))
return forces
import numpy as np
import matplotlib.pyplot as plt
from numpy import linalg as LA
import random
import sys
sys.path.insert(0, '../ML_of_Forces/2D')
from GP_custom_gen import GaussianProcess3 as GPvec
import os.path
import datetime
### Importing data from simulation ###
InDir = "../OneDrive/ML_Data/2D//T=0.05"
forces = np.asarray(np.load(os.path.join(InDir,"forcest.npy")))
confs = np.asarray(np.load(os.path.join(InDir,"confst.npy")))
lenc = len(forces)
print("Database lenght is: ", lenc)
### Random subsampling from big database ###
ncal = 10
ntest = 50
ntot = ncal + ntest
ind = np.arange(lenc)
ind_ntot = np.random.choice(ind, size=ntot, replace=False)
ind_ncal = ind_ntot[0:ncal]
ind_ntest = ind_ntot[ncal:ntot]
print("Ntot Database lenght is: " ,len(ind_ntot))
### Spliting database in training/testing sets ###
tr_confs = confs[ind_ncal]
tr_forces = forces[ind_ncal]
tst_confs = confs[ind_ntest]
tst_forces = forces[ind_ntest]
print("Initial shapes are ",np.shape(tr_confs), np.shape(tr_forces), np.shape(tst_confs), np.shape(tst_forces))
print("Training Database lenght is: " ,len(tr_confs))
print("Testing Database lenght is: " ,len(tst_confs))
### Training and testing of the GP model ###
t0 = datetime.datetime.now()
### Training the GP model ###
t0train = datetime.datetime.now()
# optimization methods: None, "fmin_l_bfgs_b", "Nelder-Mead"
gp = GPvec( ker=[ 'id'], fvecs =[ 'cov_sim_sq'] , nugget = 1e-10, theta0=np.array([1]), sig = 0.4, bounds = ((5, 1000000.),),
optimizer= None, calc_error = True, eval_grad = True)
gp.fit(tr_confs, tr_forces)
tftrain = datetime.datetime.now()
print("Training computational time is", (tftrain-t0train).total_seconds())
### Testing the GP model ###
t0test = datetime.datetime.now()
f_pred , err_pred= gp.predict(tst_confs) # , err_pred
f_pred.resize(ntest, 2)
tftest = datetime.datetime.now()
print("Testing computational time is", (tftest-t0test).total_seconds())
tf = datetime.datetime.now()
print("Computational time is", (tf-t0).total_seconds())
### Testing performance of the GP Model ###
errors = np.zeros(len(tst_forces))
for i in np.arange(len(errors)):
errors[i] = np.linalg.norm(f_pred[i]-tst_forces[i])
mean_f = np.mean(np.sqrt(np.sum(tst_forces**2, axis = 1)))
print(mean_f)
print("Mean value of error is ", np.mean(errors), "Mean force in testing set is ", mean_f, "Mean relative error is ", np.mean(errors)/mean_f)
### Plots ###
fs = 15
fig = plt.figure(1)
plt.plot(tst_forces, tst_forces, 'b-')
plt.plot( f_pred,tst_forces, 'ro')
plt.ylabel('real value', fontsize = fs)
plt.xlabel('prediction', fontsize = fs)
plt.show()
1/0
fig = plt.figure(2)
plt.plot(errors, errors)
plt.plot( 2*np.sqrt(err_pred),errors, 'ro')
plt.ylabel('real value', fontsize = fs)
plt.xlabel('prediction', fontsize = fs)
plt.show()
fig = plt.figure(3)
plt.hist(errors, bins = 40, normed = 1., alpha = 0.75, range=(0, 4))
plt.ylabel('$p(\Delta F)$', fontsize = fs)
plt.xlabel('Error on prediction $(\Delta F)$', fontsize = fs)
plt.show()
import numpy as np
from scipy import interpolate
D = 2
class MBExp:
"""
This class maps a trained gp process onto explicit basis functions
"""
def __init__(self, gp):
self.gp = gp
### interpolation ###
def PW_L_fit(self, d0, df, Delta_d = 0.1): # Pairwise, linear fit
gp = self.gp
npoints = round((df-d0)/Delta_d)
xgrid = np.linspace(d0, df, npoints)
confs = np.zeros((npoints, 1, D))
confs[:, 0, 0] = xgrid
forces = gp.predict(confs)
if (forces[:, 1] > 0.).any():
print("WARNING, force field is not exactly pairwise")
xforces = forces[:, 0]
interp = interpolate.interp1d(xgrid, xforces)
self.interp = interp
def PW_S_fit(self, d0, df, Delta_d = 0.1, k=3): # Pairwise, spline fit of order k
gp = self.gp
npoints = round((df-d0)/Delta_d)
xgrid = np.linspace(d0, df, npoints)
confs = np.zeros((npoints, 1, D))
confs[:, 0, 0] = xgrid
forces = gp.predict(confs)
if (forces[:, 1] > 0.).any():
print("WARNING, force field is not exactly pairwise")
xforces = forces[:, 0]
interp = interpolate.InterpolatedUnivariateSpline(xgrid, xforces, k=k, ext = 3)
self.interp = interp
def TB_L_fit(self, rs, ts):
gp = self.gp
Lr = len(rs)
Lt = len(ts)
# Brute force, to be optimized
from numba import jit
confs = np.zeros((Lr*Lr*Lt, 2, D))
i = 0
for r1 in rs:
for r2 in rs:
for theta in ts:
r1c = np.array([r1, 0.])
r2c = np.array([r2*np.cos(theta), r2*np.sin(theta)])
confs[i, 0] = r1c
confs[i, 1] = r2c
i += 1
forces = gp.predict(confs)
if (forces[:, 1] > 0.).any():
print("WARNING, force field is not exactly pairwise")
xforces = forces[:, 0]
yforces = forces[:, 1]
xforces = np.reshape(xforces, (Lr,Lr,Lt))
yforces = np.reshape(yforces, (Lr,Lr,Lt))
interpx = interpolate.RegularGridInterpolator((rs,rs, ts), xforces )
interpy = interpolate.RegularGridInterpolator((rs,rs, ts), yforces )
self.interp = [interpx, interpy]
def TB_L_fit_opt(self, rs1, rs2, phis): # Three body linear fit
gp = self.gp
#nrpoints = round((df-d0)/Delta_d)
#rgrid = np.linspace(d0, df, num = nrpoints)
#nphipoints = round(np.pi/Delta_phi)
#phigrid = np.linspace(0, np.pi, num = nphipoints)
rgrid1, rgrid2, phigrid = rs1, rs2, phis
nrpoints1, nrpoints2, nphipoints = len(rgrid1), len(rgrid2), len(phigrid)
confs = np.zeros((nrpoints1*nrpoints2*nphipoints, 2, D))
confs[:, 0, 0] = np.repeat(rgrid1, (nrpoints2 * nphipoints)) #The configurations of the first atom are all on the x axis, and they repeat. So they are 0 0 .... 0 0.1 0.1 .... 0.1 0.2 etc
confs[:, 1, 0] = np.tile(np.reshape(np.outer(rgrid2, np.cos(phigrid)), nphipoints*nrpoints2), nrpoints1)
confs[:,1,1] = np.tile(np.reshape(np.outer(rgrid2 , np.sin(phigrid)), nrpoints2*nphipoints), nrpoints1)
print(np.shape(confs))
#delete symmetric configurations in order to halve the prediction time ( do it later)
forces = gp.predict(confs)
if (np.abs(forces[:,1]) > 0.000001).any():
print("WARNING, force field is not exactly three body")
xforces = forces[:,0]
yforces = forces[:,1]
xforces = np.reshape(xforces, (nrpoints1, nrpoints2, nphipoints)) #Reshaping is probably correct
yforces = np.reshape(yforces, (nrpoints1, nrpoints2, nphipoints))
interpx = interpolate.RegularGridInterpolator((rgrid1, rgrid2, phigrid), xforces, method='linear', bounds_error = True)
interpy = interpolate.RegularGridInterpolator((rgrid1, rgrid2, phigrid), yforces, method='linear', bounds_error = True)
self.interpx = interpx
self.interpy = interpy
self.interp = [interpx, interpy]
### prediction ###
def pair_forces_scalars(self, ds): # Force magnitudes as a function of the distances ds
interp = self.interp
return interp(ds)
def pair_potential_scalars(self, d0, df, Delta_d = 0.1): # Potential Visualization
interp = self.interp
npoints = round((df-d0)/Delta_d)
distances = np.linspace(df, d0, npoints)
forces = interp(distances)
energies = np.cumsum(forces*Delta_d)
return - np.flipud(energies)
def pair_force_vector(self, r): # Force as a function of a vector
interp = self.interp
d = np.linalg.norm(r)
r_hat = r/d
f_scalar = interp(d)
f_vector = f_scalar * r_hat
return f_vector
def pair_force_conf(self, rs): # Force as a function of a configuration
interp = self.interp
ds = np.sqrt(np.einsum('nd, nd -> n', rs, rs))
rs_hat = np.einsum('nd, n -> nd',rs, 1./ds)
fs_scalars = interp(ds)
fs_vectors = np.einsum('n, nd -> d', fs_scalars, rs_hat)
return fs_vectors
def pair_forces_confs(self, confs): # Forces as a function of configurations
interp = self.interp
pair_force_conf = self.pair_force_conf
forces = np.zeros((len(confs), D))
for c in np.arange(len(confs)):
rs = np.array((confs[c]))
force = pair_force_conf(rs)
forces[c] = force
return forces
# 3 body predictions
def tri_forces_grid(self, grid): # Force magnitudes as a function of the distances ds
interp = self.interp
fx = interp[0](grid)
fy = interp[1](grid)
return fy
def tri_force_conf(self, rs): # Force as a function of a configuration
interpx = self.interpx
interpy = self.interpy
ds = np.sqrt(np.einsum('nd, nd -> n', rs, rs))
rs_hat = np.einsum('nd, n -> nd',rs, 1./ds)
numatoms = len(ds)
totforce = np.zeros(D)
for i in np.arange(numatoms):
for j in np.arange(numatoms):
r1, r2 = ds[i], ds[j]
phi = np.arccos(min(np.dot(rs_hat[i], rs_hat[j]),1))
forcex = interpx([r1,r2,phi]) #forcex and forcey values are ridicolous (something like 180 eV/A)
forcey = interpy([r1,r2,phi])
r1_3 = np.concatenate((rs_hat[i], [0.]), axis = 0)
r2_3 = np.concatenate((rs_hat[j], [0.]), axis = 0)
y = np.cross(np.cross(r1_3, r2_3), r1_3)
#y = np.dot([[0, -1],[1, 0]], rs_hat[i])
totforce += forcex*rs_hat[i]
totforce += forcey*y[0:1]#(np.cross(np.cross(rs_hat[i], rs_hat[j]), rs_hat[i]))
#totforce = np.asarray(totforce)
return totforce
def tri_forces_confs(self, confs): # Forces as a function of configurations
interp = self.interp
tri_force_conf = self.tri_force_conf
forces = np.zeros((len(confs), D))
for c in np.arange(len(confs)):
rs = np.array((confs[c]))
force = tri_force_conf(rs)
forces[c] = force
return forces
from ase import Atoms
import numpy as np
from ase.calculators.neighborlist import NeighborList
import matplotlib.pyplot as plt
import os.path
import sys
sys.path.insert(0, '../')
from GP_custom_gen import GaussianProcess3 as GPvec
from MLCalculator import MLCalculator
from quippy.potential import Potential
from ase.calculators.lj import LennardJones as LJase
# Training ML calculator
InDir = "../ASE_2D/tests"
forces = np.asarray(np.load(os.path.join(InDir,"forcest.npy")))
confs = np.asarray(np.load(os.path.join(InDir,"confst.npy")))
ncal = 50
ind = np.arange(len(forces))
ind_ncal = np.random.choice(ind, size=ncal, replace=False)
tr_confs = confs[ind_ncal]
tr_forces = forces[ind_ncal]
gp = GPvec( ker=[ 'id'], fvecs =['cov_mat'] , nugget = 1e-6, theta0=np.array([1]), sig = .5, bounds = ((1, 100.),),
optimizer= None, calc_error = False, eval_grad = False)
gp.fit(tr_confs, tr_forces)
# setup alternative calcs
sw_pot = Potential('IP SW', param_filename=os.path.join("/home/k1192572/QUIP/share/Parameters","ip.parms.SW.xml"))
dftb_pot = Potential('TB DFTB', param_filename=os.path.join("/home/k1192572/QUIP/share/Parameters",'DFTB_params.xml'))
lj_pot = LJase(sigma=2.35/(2.**(1./6.)), epsilon = 0.8, rc = 5.)
# plot of force field
force1 = []
force2 = []
r1 = []
r2 = []
drs = 0.025
dist = 5.
dimer = Atoms('Si2', positions = [[0, 0, 0],[1.5, 0, 0]])
r_cut = 10.
cutoffs = np.ones(len(dimer))*r_cut/2.
nl = NeighborList(cutoffs, skin=0., sorted=False, self_interaction=False,
bothways=True)
nl.build(dimer)
MLcal = MLCalculator(gp, nl)
dimer.set_calculator(MLcal)
for s in np.arange(dist/drs):
x= np.array([1, 0, 0])
pos= dimer.get_positions()
fs = dimer.get_forces()
r = pos[1]
f1, f2 = fs[0], fs[1]
dr = x*drs
pos[1] += dr
dimer.set_positions(pos)
r2.append(r)
force1.append(f1)
force2.append(f2)
r2 = np.array(r2)
force1, force2 = np.array(force1), np.array(force2)
pot = []
print(force2)
fdr = force2[:,0]*drs
ppot = 0
for i in np.arange(len(fdr)):
ppot += fdr[i]
pot.append(ppot)
pot = - np.array(pot)