Commit 8aacbc22 authored by Carl Poelking's avatar Carl Poelking
Browse files

Add more tests/examples.

parent ae9521a1
#! /usr/bin/env python
from momo import osio, endl, flush
from __pyosshell__ import get_dirs
import numpy as np
def extract_data(folder, N_train, N_test, regex):
def extract_data_sub(subfolder, transform_y = lambda y: np.log10(y)):
base = osio.cwd()
osio.cd(subfolder)
X = np.load('xnkl.array.npy')
y = float(open('jeff2_h.txt', 'r').readline())
y = transform_y(y)
osio.cd(base)
return X, y
def sort_subfolders(folders):
return sorted(folders, key = lambda f: int(f.split('_')[1])*len(folders)+int(f.split('_')[2]))
base = osio.cwd()
osio << "Base =" << base << endl
osio.cd(folder)
# GET SUBFOLDERS
subfolders = sort_subfolders(get_dirs('./', regex))
# LIMIT RANGE
N = N_train + N_test
assert N < len(subfolders)
subfolders = subfolders[:N]
# FIND DESCRIPTOR DIMENSIONS
dim_x = extract_data_sub(subfolders[0])[0].shape[0]
osio << "Descriptor dimension =" << dim_x << endl
# EXTRACT FEATURES & TARGETS
X_list = np.zeros((N, dim_x))
y_list = np.zeros((N))
idx = -1
for subfolder in subfolders:
idx += 1
osio << osio.back << subfolder << flush
X, y = extract_data_sub(subfolder)
X_list[idx] = X
y_list[idx] = y
osio << endl
# SPLIT ONTO TRAINING & TEST SET
X_train = np.zeros((N_train, dim_x))
y_train = np.zeros((N_train))
X_test = np.zeros((N_test, dim_x))
y_test = np.zeros((N_test))
X_train[0] = X_list[0]
y_train[0] = y_list[0]
count_train = 1
count_test = 0
for idx in range(1,N):
if float(count_train)/(count_train+count_test) < float(N_train)/(N):
X_train[count_train] = X_list[idx]
y_train[count_train] = y_list[idx]
count_train += 1
else:
X_test[count_test] = X_list[idx]
y_test[count_test] = y_list[idx]
count_test += 1
assert count_train == N_train
assert count_test == N_test
osio.cd(base)
return X_train, y_train, X_test, y_test
# TODO Take log of tintegrals
# TODO Raise kernel to power of xi
N_train = 3000
N_test = 500
xi = 3.
lambda_reg = 1e-2
X_train, y_train, X_test, y_test = extract_data('frame0', N_train, N_test, 'pair_')
def compute_kernel(X1, X2, xi):
osio << "Compute kernel ..." << endl
K = np.dot(X1, X2.T) # = np.inner(X1,X2)
osio << "Normalize ..." << endl
norm_1 = np.sqrt(np.sum(X1*X1,1))
norm_2 = np.sqrt(np.sum(X2*X2,1))
norm_12 = np.outer(norm_1, norm_2)
K = K/norm_12
osio << "Sharpen ..." << endl
K = K**xi
return K
K = compute_kernel(X_train, X_train, xi)
np.savetxt('kernel.array.txt', K)
osio << "Regularize ..." << endl
K_reg = K + lambda_reg*np.identity(K.shape[0])
osio << "Invert ..." << endl
K_reg_inv = np.linalg.inv(K_reg)
np.savetxt('kernel_inv.array.txt', K_reg_inv)
osio << "Compute weights ..." << endl
w = K_reg_inv.dot(y_train)
np.savetxt('weights.array.txt', w)
osio << "Train ..." << endl
y_train_calc = np.dot(K, w)
rms_error_train = (np.sum((y_train_calc-y_train)**2)/y_train.shape[0])**0.5
print rms_error_train
np.savetxt('y_train_calc.array.txt', y_train_calc)
np.savetxt('y_train_ref.array.txt', y_train)
# Optimize xi and lambda
# Neural network to single out important components?
# PCA
# Try different descriptor
osio << "Test ..." << endl
K_test = compute_kernel(X_test, X_train, xi)
y_test_calc = np.dot(K_test, w)
rms_error_test = (np.sum((y_test_calc-y_test)**2)/y_test.shape[0])**0.5
print rms_error_test
np.savetxt('y_test_calc.array.txt', y_test_calc)
np.savetxt('y_test_ref.array.txt', y_test)
"""
osio << "Compute kernel matrix ..." << endl
K = np.dot(X_train, X_train.T)
osio << "Normalize ..." << endl
norm_vec_sqrt = np.sqrt(np.sum(X_train*X_train,1))
norm_mat_sqrt = np.outer(norm_vec_sqrt, norm_vec_sqrt)
K = K/norm_mat_sqrt
osio << "Sharpen ..." << endl
K = K**xi
np.savetxt('kernel.array.txt', K)
osio << "Regularize ..." << endl
K_reg = K + lambda_reg*np.identity(K.shape[0])
osio << "Invert kernel ..." << endl
K_reg_inv = np.linalg.inv(K_reg)
np.save('kernel_inv.array.npy', K_reg_inv)
np.savetxt('kernel_inv.array.txt', K_reg_inv)
osio << "Compute weights ..." << endl
w = K_reg_inv.dot(y_train)
np.save('weights.array.npy', w)
np.savetxt('weights.array.txt', w)
"""
#! /usr/bin/env python
import soap
import soap.tools
import numpy as np
from __pyosshell__ import get_dirs
from momo import os, osio, endl, flush
from momoxml import *
# LOGGER
soap.silence()
# INITIALIZE OPTIONS
options = soap.Options()
options.set('radialbasis.type', 'gaussian')
options.set('radialbasis.mode', 'adaptive')
#options.set('radialbasis.mode', 'equispaced')
options.set('radialbasis.N', 9)
options.set('radialbasis.sigma', 0.9)
options.set('radialbasis.integration_steps', 15)
#options.set('radialbasis.N', 9)
options.set('radialcutoff.Rc', 6.8)
options.set('radialcutoff.Rc_width', 0.5)
options.set('radialcutoff.type', 'shifted-cosine')
options.set('radialcutoff.center_weight', 1.)
#options.set('radialcutoff.center_weight', -7.)
options.set('angularbasis.type', 'spherical-harmonic')
options.set('angularbasis.L', 6)
#options.set('angularbasis.L', 12)
#options.set('densitygrid.N', 20)
options.set('densitygrid.N', 20)
options.set('densitygrid.dx', 0.15)
# DEFINE EXCLUSIONS
options.excludeCenters(['H'])
options.excludeTargets([])
element_vdw_radius = {
'H':1.20,
'C':1.70,
'N':1.55,
'O':1.52,
'S':1.80
}
element_mass = {
'H':1,
'C':12,
'N':14,
'O':16,
'S':32
}
element_valence_order = {
'H':1,
'C':4,
'N':5,
'O':6,
'S':6
}
# LOAD REFERENCE DATA
target_map = {}
tree = XmlTree('extract.pairs.xml')
pair_tags = tree.DiveForAll('pair')
pair_count = 0
for pair in pair_tags:
id1 = pair['id1'](int)
id2 = pair['id2'](int)
jeff2_h = pair['channel'].GetUnique('jeff2_h')(float)
if target_map.has_key(id1):
pass
else:
target_map[id1] = {}
target_map[id1][id2] = jeff2_h
# EXPAND STRUCTURES
osio.cd('frame0')
pair_folders = get_dirs('./', 'pair_')
n_pairs = len(pair_folders)
pair_folders = sorted(pair_folders, key = lambda f: int(f.split('_')[1])*n_pairs+int(f.split('_')[2]))
xnkl_list = []
target_list = []
label_list = []
pair_id = 0
for folder in pair_folders:
pair_id += 1
# READ XYZ => CONFIG
osio.cd(folder)
osio << "Pair-ID %5d ('%s')" % (pair_id, folder) << endl
if 'jeff2_h.txt' in os.listdir('./'):
assert 'xnkl.array.npy' in os.listdir('./')
assert 'xnkl.array.txt' in os.listdir('./')
#xnkl_pair_npy = np.load('xnkl.array.npy')
#xnkl_pair_txt = np.loadtxt('xnkl.array.txt')
#drms = np.sum((xnkl_pair_npy-xnkl_pair_txt)**2)/(xnkl_pair_npy.shape[0])
#osio << "... Check zero =" << drms << endl
osio << "... Continue." << endl
osio.cd(-1)
continue
# PAIR DATA
id1 = int(folder.split('_')[1])
id2 = int(folder.split('_')[2])
target = target_map[id1][id2]
# READ COORDINATES
ase_config_list = soap.tools.ase_load_all('dim')
assert len(ase_config_list) == 1
config = ase_config_list[0]
# PROCESS STRUCTURE
structure = soap.tools.setup_structure_ase(config.config_file, config.atoms)
for particle in structure:
particle.sigma = 0.5*element_vdw_radius[particle.type]
particle.weight = element_valence_order[particle.type]
if particle.id > 28:
#print particle.id, particle.name
particle.weight *= -1.
# COMPUTE SPECTRUM
spectrum = soap.Spectrum(structure, options)
basis = spectrum.basis
spectrum.compute()
spectrum.computePower()
# EXTRACT XNKL
x_pair = soap.PowerExpansion(spectrum.basis)
xnkl_pair = x_pair.array
norm = 0.
for atomic in spectrum:
xnkl_pair = xnkl_pair + atomic.getPower("","").array
norm += 1.
xnkl_pair = xnkl_pair/norm
xnkl_pair = xnkl_pair.real
xnkl_pair = xnkl_pair.reshape(basis.N*basis.N*(basis.L+1))
# SAVE TO HARDDRIVE
np.save('xnkl.array.npy', xnkl_pair)
np.savetxt('xnkl.array.txt', xnkl_pair)
ofs = open('jeff2_h.txt', 'w')
ofs.write('%+1.7e\n' % target)
ofs.close()
#xnkl_list.append(xnkl_pair)
#label_list.append(folder)
#target_list.append(target_map[id1][id2])
#atomic = spectrum.getAtomic(1, "S")
#center = atomic.getCenter()
#atomic.getLinear("").writeDensityOnGrid('tmp.cube', options, structure, center, True)
osio.cd(-1)
#break
#raw_input('...')
#if pair_id > 10: break
osio.root()
osio.okquit()
"""
pool = mp.Pool(processes=options.n_procs)
lock = mp.Lock()
compute_soap_partial = fct.partial(compute_soap, options=options)
soaps = pool.map(compute_soap_partial, metas)
pool.close()
pool.join()
for soap, meta in zip(soaps, metas):
meta.soap = soap
"""
#! /usr/bin/env python
import soap
import soap.tools
import os
import numpy as np
from momo import osio, endl, flush
options = soap.Options()
options.excludeCenters(['H'])
options.excludeTargets([])
options.set('radialbasis.type', 'gaussian')
options.set('radialbasis.mode', 'adaptive')
options.set('radialbasis.N', 9)
options.set('radialbasis.sigma', 0.9)
options.set('radialbasis.integration_steps', 15)
options.set('radialcutoff.Rc', 6.8)
options.set('radialcutoff.Rc_width', 0.5)
options.set('radialcutoff.type', 'shifted-cosine')
options.set('radialcutoff.center_weight', 1.)
options.set('angularbasis.type', 'spherical-harmonic')
options.set('angularbasis.L', 6)
options.set('densitygrid.N', 20)
options.set('densitygrid.dx', 0.15)
# STRUCTURE
config = soap.tools.ase_load_single('line.xyz')
structure = soap.tools.setup_structure_ase(config.config_file, config.atoms)
# SPECTRUM
spectrum = soap.Spectrum(structure, options)
spectrum.compute()
spectrum.computePower()
#structure = spectrum.structure # !! THIS CREATES A DANGLING POINTER !!
basis = spectrum.basis
options = spectrum.options
center_id = 7
center_type = "C"
density_type = "C"
# TARGET EXTRACTION
atomic = spectrum.getAtomic(center_id, center_type)
center = atomic.getCenter()
atomic_xnkl = atomic.getPower(density_type, density_type).array
# INVERT XKNL => QNLM
atomic_inv_qnlm = soap.tools.invert_xnkl_aa(atomic_xnkl, basis, l_smaller=0, l_larger=-1)
# CREATE INVERSION EXPANSION
basisexp = soap.BasisExpansion(basis)
basisexp.array = atomic_inv_qnlm
# CREATE INVERSION ATOMIC SPECTRUM
atomic_inv = soap.AtomicSpectrum(center, basis)
atomic_inv.addLinear(density_type, basisexp)
atomic_inv.computePower()
atomic_inv_xnkl = atomic_inv.getPower(density_type, density_type).array
dxnkl = atomic_inv_xnkl - atomic_xnkl
print atomic_xnkl
raw_input('...')
print atomic_inv_xnkl
raw_input('...')
print dxnkl
raw_input('...')
for particle in structure:
print particle.name
raw_input('...')
# OUTPUT DENSITY
cubefile = 'recon.id-%d_center-%s_type-%s.cube' % (center_id, center_type, density_type)
coefffile = cubefile[:-5]+'.coeff'
basisexp.writeDensityOnGrid(cubefile, options, structure, center, True)
basisexp.writeDensity(coefffile, options, structure, center)
qnlm_orig = atomic_inv_qnlm
for l in range(basis.L+1):
qnlm = np.copy(qnlm_orig)
for n in range(basis.N):
for ll in range(basis.L+1):
if ll == l: continue
for m in range(-ll,ll+1):
qnlm[n,ll*ll+ll+m] = np.complex(0.,0.)
#print qnlm
basisexp.array = qnlm
cubefile = 'recon.id-%d_center-%s_type-%s_l-%d.cube' % (center_id, center_type, density_type, l)
coefffile = cubefile[:-5]+'.coeff'
basisexp.writeDensityOnGrid(cubefile, options, structure, center, True)
basisexp.writeDensity(coefffile, options, structure, center)
#! /usr/bin/env python
from momo import osio, endl, flush, os, sys
l = sys.argv[1]
vmd_exe = '/usr/local/bin/vmd'
setup_tcl = 'vmdsetup.tcl'
cubefile = 'recon.id-7_center-C_type-C_l-%s.cube' % l
vmdsetup_tcl = '''
mol new {cubefile:s}
mol modselect 0 top all
mol modcolor 0 top Name
mol modstyle 0 top cpk 1 0.8
mol modmaterial 0 top Edgy
mol numperiodic top 0 1
mol addrep top
mol modselect 1 top all
mol modcolor 1 top ColorId 0
mol modstyle 1 top Isosurface -0.01 0 0 Solid
mol modmaterial 1 top Transparent
mol numperiodic top 1 1
mol addrep top
mol modselect 2 top all
mol modcolor 2 top ColorId 1
mol modstyle 2 top Isosurface 0.01 0 0 Solid
mol modmaterial 2 top Transparent
mol numperiodic top 2 1
display rendermode GLSL
axes location off
color Name F cyan3
color Name N iceblue
'''.format(cubefile=cubefile)
ofs = open(setup_tcl, 'w')
ofs.write(vmdsetup_tcl)
ofs.close()
os.system('%s -e %s' % (vmd_exe, setup_tcl))
#! /usr/bin/env python
import soap
import soap.tools
import numpy as np
import multiprocessing as mp
import functools as fct
import threading
from __pyosshell__ import get_dirs
from momo import os, osio, endl, flush
from momoxml import *
# LOGGER
soap.silence()
MP_LOCK = mp.Lock()
options_dict = {
'radialbasis.type' : 'gaussian',
'radialbasis.mode' : 'adaptive',
'radialbasis.N' : 9,
'radialbasis.sigma': 0.9,
'radialbasis.integration_steps': 15,
'radialcutoff.Rc': 6.8,
'radialcutoff.Rc_width': 0.5,
'radialcutoff.type': 'shifted-cosine',
'radialcutoff.center_weight': 1.,
'angularbasis.type': 'spherical-harmonic',
'angularbasis.L': 6,
'densitygrid.N': 20,
'densitygrid.dx': 0.15}
exclude_centers = ['H']
exclude_targets = []
element_vdw_radius = {
'H':1.20,
'C':1.70,
'N':1.55,
'O':1.52,
'S':1.80
}
element_mass = {
'H':1,
'C':12,
'N':14,
'O':16,
'S':32
}
element_valence_order = {
'H':1,
'C':4,
'N':5,
'O':6,
'S':6
}
# READ COORDINATES
ase_config_list = soap.tools.ase_load_all('group_rnd_short', log=osio)
def compute_spectrum(config, options_dict, exclude_centers, exclude_targets):
# ANNOUNCE
global MP_LOCK
MP_LOCK.acquire()
osio << osio.item \
<< "Soap for: " << config.config_file \
<< "PID=%d" % mp.current_process().pid << endl
MP_LOCK.release()
# SET OPTIONS
options = soap.Options()
for item in options_dict.iteritems():
options.set(*item)
options.excludeCenters(exclude_centers)
options.excludeTargets(exclude_targets)
# PROCESS STRUCTURE
structure = soap.tools.setup_structure_ase(config.config_file, config.atoms)
for particle in structure:
particle.sigma = 0.5*element_vdw_radius[particle.type]
particle.weight = element_valence_order[particle.type]
# COMPUTE SPECTRUM
spectrum = soap.Spectrum(structure, options)
spectrum.compute()
spectrum.computePower()
spectrum.save('%s.spectrum' % config.config_file)
return config.config_file
pool = mp.Pool(processes=4)
compute_spectrum_partial = fct.partial(
compute_spectrum,
options_dict=options_dict,
exclude_centers=exclude_centers,
exclude_targets=exclude_targets)
arch_files = pool.map(compute_spectrum_partial, ase_config_list)
pool.close()
pool.join()
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment