Commit 660112cf authored by Carl Poelking's avatar Carl Poelking
Browse files

PCA etc.

parent 15068923
......@@ -7,7 +7,6 @@ import numpy as np
import logging
from momo import osio, endl, flush
# THERE IS SOMETHING WRONG WITH THE GRADIENTS HERE (DIRECTION SEEMS FINE, BUT MAGNITUDE IS NOT?)! REMARK ~ok, error source found
# TODO Unit test for global spectrum REMARK ~ok, automatize
# TODO Check outer kernel derivative REMARK ~ok, automatize
# TODO Check normalization derivative (compute on C++ level already) REMARK ~ok, -> C++
......@@ -84,7 +83,109 @@ class Xnklab(object):
#raw_input('...')
return self.X_linear
def reduce_xnklab_atomic(atomic, types_global, verbose=False):
types_atomic = atomic.getTypes()
N = atomic.basis.N
L = atomic.basis.L
S = len(types_global)
S_atomic = len(types_atomic)
# ORGANIZATION:
# | <- dim_aa_sum = off_ab -> | <- dim_ab_sum -> |
# | 11 <- dim_aa_red -> 22 <- ... -> | 12,21 <- dim_ab_red -> 13,31 ... -> |
dim_aa_red = (N*N+N)/2*(L+1)
dim_aa_sum = S*dim_aa_red
off_ab = dim_aa_sum
dim_ab_red = N*N*(L+1)
dim_ab_sum = (S*S-S)/2*dim_ab_red
X = np.zeros((dim_aa_sum+dim_ab_sum))
if verbose:
print "Global types:", types_global
print "Local types: ", types_atomic
print "Dim. aa (one pair a=b) ", dim_aa_red
print "Dim. aa (all pairs a=b) ", dim_aa_sum
print "Dim. ab (one pair a!=b)", dim_ab_red
print "Dim. ab (all pairs a<b)", dim_ab_sum
# SPECIES a = b
for i in range(S_atomic):
a = types_atomic[i]
xnklaa = atomic.getPower(a,a).array.real
xnklaa_red = np.zeros(((N*N+N)/2, L+1))
# Select where n <= k
for i in range(N):
for j in range(i, N):
ij_red = i*N - (i*i-i)/2 + j-i
xnklaa_red[ij_red] = xnklaa[i*N+j]
# Locate in global array
sa = types_global.index(a)
i0 = sa*dim_aa_red
i1 = i0 + dim_aa_red
X[i0:i1] = xnklaa_red.flatten()
#print a,X
#raw_input('...')
#print a,sa,i0,i1
# SPECIES a != b
for i in range(S_atomic):
for j in range(i+1, S_atomic):
# Select all
a = types_atomic[i]
b = types_atomic[j]
xnklab = atomic.getPower(a,b).array.real
xnklab_red = xnklab
# Locate in global array
sa = types_global.index(a)
sb = types_global.index(b)
if sa > sb:
stmp = sa
sa = sb
sb = stmp
i0 = off_ab + (sa*S - (sa*sa+sa)/2 + sb-sa-1)*dim_ab_red
i1 = i0 + dim_ab_red
X[i0:i1] = xnklab.flatten()
#print a,b,X
#raw_input('...')
#print a,b,sa,sb,i0,i1
return X
class KernelAdaptorSpecificUnique(object):
"""
Extracts Xnklab from spectrum, while removing duplicates
resulting from elements that are identical by symmetry:
e.g., Xnklab and Xknlba, and Xnklab and Xknlab if a=b
"""
def __init__(self, options, types_global):
self.types = types_global
self.S = len(types_global)
return
def adapt(self, spectrum):
IX = np.zeros((0.,0.), dtype='float64')
dimX = -1
for atomic_i in spectrum:
Xi_unnorm, Xi_norm = self.adaptScalar(atomic_i)
dimX = Xi_norm.shape[0]
if not IX.any():
IX = np.copy(Xi_norm) # TODO Is this necessary?
IX.resize((1, dimX))
else:
i = IX.shape[0]
IX.resize((i+1, dimX))
IX[-1,:] = Xi_norm
#print IX
return IX
def adaptScalar(self, atomic):
X = reduce_xnklab_atomic(atomic, self.types)
X_norm = X/np.dot(X,X)**0.5
return X, X_norm
class KernelAdaptorSpecific(object):
"""
WARNING: Duplicate components in Xnklab are not removed
"""
def __init__(self, options, types_global):
self.types = types_global
self.S = len(types_global)
......@@ -222,11 +323,31 @@ class KernelAdaptorGlobalGeneric(object):
dX_dz = dX_dz/mag_X - np.dot(X, dX_dz)/mag_X**3 * X
return dX_dx, dX_dy, dX_dz
class KernelFunctionDotShifted(object):
"""
For descriptors X,Y where X.dot(Y) e [-1,+1]
=> k e [0,1]
"""
def __init__(self, options):
self.delta = float(options['kernel.delta'])
self.xi = float(options['kernel.xi'])
return
def evaluate(self, IX, IY):
k = 0.5*(1.+IX.dot(IY.T))
return self.delta**2 * k**self.xi
class KernelFunctionDot(object):
def __init__(self, options):
self.delta = float(options.get('kernel.delta'))
self.xi = float(options.get('kernel.xi'))
if (type(options) == dict):
self.delta = float(options['kernel.delta'])
self.xi = float(options['kernel.xi'])
else:
self.delta = float(options.get('kernel.delta'))
self.xi = float(options.get('kernel.xi'))
return
def evaluate(self, IX, IY):
return self.delta**2 * IX.dot(IY.T)**self.xi
def computeDot(self, IX, X, xi, delta):
return delta**2 * np.dot(IX,X)**xi
def compute(self, IX, X):
......@@ -360,11 +481,13 @@ class KernelFunctionDot3HarmonicDist(object):
KernelAdaptorFactory = {
'generic': KernelAdaptorGeneric,
'specific': KernelAdaptorSpecific,
'specific-unique': KernelAdaptorSpecificUnique,
'global-generic': KernelAdaptorGlobalGeneric
}
}
KernelFunctionFactory = {
'dot': KernelFunctionDot,
'dot-shifted': KernelFunctionDotShifted,
'dot-harmonic': KernelFunctionDotHarmonic,
'dot-harmonic-dist': KernelFunctionDotHarmonicDist,
'dot-lj': KernelFunctionDotLj,
......
......@@ -4,6 +4,12 @@ import kernel as kern
import soap
import multiprocessing as mp
import momo
import datasets.gdb
import datasets.soap
import libmatch.environments # Alchemy
import libmatch.structures # structure
LAGRAPH_DEBUG = False
LAGRAPH_CHECK = True
......@@ -85,7 +91,7 @@ def adjust_regularization(graphs, options):
print "Adjust gamma to", options['laplacian']['regularize_gamma']
return
def optimize_resolution(graphs, options, write_out=False, log=None, verbose=False):
def optimize_hierarchy(graphs, options, kfct, write_out=False, log=None, verbose=False):
if not options['graph']['optimize_hierarchy']: return
if log: log << "Optimizing r0, n_levels based on %d graphs" % len(graphs) << log.endl
# ETA-GAMMA PAIRS
......@@ -111,7 +117,7 @@ def optimize_resolution(graphs, options, write_out=False, log=None, verbose=Fals
graph.createSubgraphs(options)
# Process
kmat = soap.soapy.util.mp_compute_upper_triangle(
kfct=compare_graphs_laplacian_kernelized,
kfct=kfct,
g_list=graphs,
n_procs=4,
n_blocks=1,
......@@ -463,6 +469,7 @@ class ParticleSubgraph(object):
self.position = position
self.cutoff = cutoff
self.size = len(self.idcs)
self.z = self.parent.Z[self.idx]
return
@property
def label(self):
......@@ -489,6 +496,7 @@ class ParticleGraph(object):
self.K = None
self.subgraphs = None
self.subgraph_cutoffs = None
self.Z = atoms.get_atomic_numbers()
if options['graph']['hierarchical']:
self.createSubgraphs(options)
return
......@@ -595,6 +603,31 @@ class ParticleGraph(object):
dim = ix.shape[1]
assert ix.shape[0] == n_atoms
P = ix
elif descriptor_type == 'soap-quippy':
atoms_quippy = datasets.gdb.convert_ase2quippy_atomslist([atoms])
# Read options
options_xml_file = options_descriptor["options_xml"]
opt_interface = momo.OptionsInterface()
xml_options = opt_interface.ParseOptionsFile(options_xml_file, 'options')
if xml_options.soap.nocenter and xml_options.soap.nocenter != 'None':
xml_options.soap.nocenter = map(int, xml_options.soap.nocenter.split())
else:
xml_options.soap.nocenter = []
datasets.soap.finalize_options(None, xml_options)
# Process
soap = libmatch.structures.structure(xml_options.kernel.alchemy)
soap.parse(atoms_quippy,
options.soap.R,
options.soap.N,
options.soap.L,
options.soap.sigma,
options.soap.w0,
options.soap.nocenter,
options.soap.noatom,
kit = options.kernel.kit)
# ...
elif descriptor_type == 'none':
dim = 1
P = np.zeros((n_atoms, dim))
......
......@@ -5,23 +5,58 @@ import numpy as np
import logging
import io
def pca_epsilon_threshold(IX):
x_const = (1./IX.shape[1])**0.5
eps = x_const*1e-5
return eps
def pca_compute(IX, eps=0., log=None, norm_div_std=True, norm_sub_mean=True):
"""
To check result consider:
IX_norm_pca.T.dot(IX_norm_pca)/IX_norm_pca.shape[0]
"""
# Normalize: mean, std
if log: log << "PCA: Normalize ..." << log.endl
if norm_sub_mean:
IX_norm = IX - IX.mean(0)
if norm_div_std:
IX_norm = IX_norm/(IX.std(0)+eps)
# Correlation matrix
if log: log << "PCA: Correlate ..." << log.endl
S = IX_norm.T.dot(IX_norm)/IX_norm.shape[0]
# Diagonalize
if log: log << "PCA: Eigenspace ..." << log.endl
lambda_, U = np.linalg.eigh(S)
idcs = lambda_.argsort()[::-1]
lambda_ = lambda_[idcs]
U = U[:,idcs]
L = np.identity(lambda_.shape[0])*lambda_
#print L-U.T.dot(S).dot(U) # <- should be zero
#print S-U.dot(L).dot(U.T) # <- should be zero
# Transform
if log: log << "PCA: Transform ..." << log.endl
IX_norm_pca = U.T.dot(IX_norm.T).T
return IX_norm_pca, IX_norm, L, U
class PCA(object):
def __init__(self):
self.IX = None
self.IX_norm = None
self.X_mean = None
self.X_std = None
self.eps = None
return
def compute(self, IX, normalize_mean=True, normalize_std=True):
def compute(self, IX, normalize_mean=True, normalize_std=True, prefix='out.pca', eps=0.):
# Normalize
self.IX = IX
self.X_mean = IX.mean(0)
self.X_std= IX.std(0)
self.eps = eps
if not normalize_mean:
self.X_mean.fill(0.)
if not normalize_std:
self.X_std.fill(1.)
self.IX_norm = (IX - self.X_mean)/self.X_std
self.IX_norm = (IX - self.X_mean)/(self.X_std+eps)
# Correlation matrix / moment matrix
Sigma = np.dot(self.IX_norm.T,self.IX_norm)/self.IX_norm.shape[0]
eigvals, eigvecs = np.linalg.eigh(Sigma)
......@@ -34,8 +69,8 @@ class PCA(object):
self.eigvals = eigvals # eigvals[i] => i-th eigenvalue, where
self.eigvecs = eigvecs # eigvecs[i] => i-th eigenvector
eigvals_cumulative = np.cumsum(eigvals)
np.savetxt('out.pca.sigma.txt', Sigma)
np.savetxt('out.pca.eigvals_cum.txt', eigvals_cumulative/eigvals_cumulative[-1])
np.savetxt('%s.sigma.txt' % prefix, Sigma)
np.savetxt('%s.eigvals_cum.txt' % prefix, eigvals_cumulative/eigvals_cumulative[-1])
return
def expand(self, X, idx_cutoff=None):
assert False
......@@ -43,7 +78,7 @@ class PCA(object):
eigvals_sel = self.eigvals[0:idx_cutoff] if idx_cutoff else self.eigvals
# Normalize
X_norm = X-self.X_mean
X_norm = X_norm/self.X_std
X_norm = X_norm/(self.X_std+self.eps)
# Expand
eigencoeffs = eigvecs_sel.dot(X_norm)
# Reconstruct
......@@ -57,7 +92,7 @@ class PCA(object):
eigvals_sel = self.eigvals[0:idx_cutoff] if idx_cutoff else self.eigvals
# Normalize
IX_norm = IX-self.X_mean
IX_norm = IX_norm/self.X_std
IX_norm = IX_norm/(self.X_std+self.eps)
# Expand
eigencoeffs = IX_norm.dot(eigvecs_sel.T)
return eigencoeffs
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment