Commit ac4274c5 authored by Philipp Frank's avatar Philipp Frank
Browse files

start work on samplers

parent 19172fd8
...@@ -72,44 +72,40 @@ if __name__ == "__main__": ...@@ -72,44 +72,40 @@ if __name__ == "__main__":
) )
H = ift.StandardHamiltonian(likelihood) H = ift.StandardHamiltonian(likelihood)
fullcov_model = ift.FullCovarianceModel(H.domain) position_fc = ift.from_random(H.domain)*0.1
meanfield_model = ift.MeanfieldModel(H.domain) position_mf = ift.from_random(H.domain)*0.
position_fc = fullcov_model.get_initial_pos(initial_sig=0.01) fc = ift.FullCovariance(position_fc, H, 3, True, initial_sig=0.01)
position_mf = meanfield_model.get_initial_pos(initial_sig=0.01) mf = ift.MeanField(position_mf, H, 3, True, initial_sig=0.0001)
minimizer_fc = ift.ADVIOptimizer(10)
f_KL_fc = lambda x: ift.ParametricGaussianKL(x, H, fullcov_model, 3, True) minimizer_mf = ift.ADVIOptimizer(10)
KL_fc = f_KL_fc(position_fc)
f_KL_mf = lambda x: ift.ParametricGaussianKL(x, H, meanfield_model, 3, True)
KL_mf = f_KL_mf(position_mf)
minimizer_fc = ift.ADVIOptimizer(10, f_KL_fc)
minimizer_mf = ift.ADVIOptimizer(10, f_KL_mf)
plt.pause(0.001) plt.pause(0.001)
for i in range(25): for i in range(25):
KL_fc, _ = minimizer_fc(KL_fc) if i != 0:
KL_mf, _ = minimizer_mf(KL_mf) fc.minimize(minimizer_fc)
mf.minimize(minimizer_mf)
plt.figure("result") plt.figure("result")
plt.cla() plt.cla()
plt.plot( plt.plot(
sky(fullcov_model.generator(KL_fc.position)).val, sky(fc.position).val,
"b-", "b-",
label="Full covariance", label="Full covariance",
) )
plt.plot( plt.plot(
sky(meanfield_model.generator(KL_mf.position)).val, "r-", label="Mean field" sky(mf.position).val, "r-", label="Mean field"
) )
for samp in KL_fc.samples: #for samp in KL_fc.samples:
plt.plot( # plt.plot(
sky(fullcov_model.generator(KL_fc.position + samp)).val, "b-", alpha=0.3 # sky(fullcov_model.generator(KL_fc.position + samp)).val, "b-", alpha=0.3
) # )
for samp in KL_mf.samples: #for samp in KL_mf.samples:
plt.plot( # plt.plot(
sky(meanfield_model.generator(KL_mf.position + samp)).val, # sky(meanfield_model.generator(KL_mf.position + samp)).val,
"r-", # "r-",
alpha=0.3, # alpha=0.3,
) # )
plt.plot(data.val, "kx") plt.plot(data.val, "kx")
plt.plot(sky(mock_position).val, "k-", label="Ground truth") plt.plot(sky(mock_position).val, "k-", label="Ground truth")
plt.legend() plt.legend()
......
...@@ -73,7 +73,7 @@ from .minimization.scipy_minimizer import L_BFGS_B ...@@ -73,7 +73,7 @@ from .minimization.scipy_minimizer import L_BFGS_B
from .minimization.energy import Energy from .minimization.energy import Energy
from .minimization.quadratic_energy import QuadraticEnergy from .minimization.quadratic_energy import QuadraticEnergy
from .minimization.energy_adapter import EnergyAdapter from .minimization.energy_adapter import EnergyAdapter
from .minimization.kl_energies import MetricGaussianKL, GeoMetricKL, ParametricGaussianKL from .minimization.kl_energies import MetricGaussianKL, GeoMetricKL
from .sugar import * from .sugar import *
...@@ -92,7 +92,7 @@ from .library.adjust_variances import (make_adjust_variances_hamiltonian, ...@@ -92,7 +92,7 @@ from .library.adjust_variances import (make_adjust_variances_hamiltonian,
from .library.nft import Gridder, FinuFFT from .library.nft import Gridder, FinuFFT
from .library.correlated_fields import CorrelatedFieldMaker from .library.correlated_fields import CorrelatedFieldMaker
from .library.correlated_fields_simple import SimpleCorrelatedField from .library.correlated_fields_simple import SimpleCorrelatedField
from .library.variational_models import MeanfieldModel, FullCovarianceModel from .library.variational_models import MeanField, FullCovariance
from . import extra from . import extra
......
...@@ -21,7 +21,6 @@ from ..domain_tuple import DomainTuple ...@@ -21,7 +21,6 @@ from ..domain_tuple import DomainTuple
from ..domains.unstructured_domain import UnstructuredDomain from ..domains.unstructured_domain import UnstructuredDomain
from ..field import Field from ..field import Field
from ..linearization import Linearization from ..linearization import Linearization
from ..multi_domain import MultiDomain
from ..multi_field import MultiField from ..multi_field import MultiField
from ..operators.einsum import MultiLinearEinsum from ..operators.einsum import MultiLinearEinsum
from ..operators.energy_operators import EnergyOperator from ..operators.energy_operators import EnergyOperator
...@@ -29,114 +28,86 @@ from ..operators.linear_operator import LinearOperator ...@@ -29,114 +28,86 @@ from ..operators.linear_operator import LinearOperator
from ..operators.multifield2vector import Multifield2Vector from ..operators.multifield2vector import Multifield2Vector
from ..operators.sandwich_operator import SandwichOperator from ..operators.sandwich_operator import SandwichOperator
from ..operators.simple_linear_operators import FieldAdapter, PartialExtractor from ..operators.simple_linear_operators import FieldAdapter, PartialExtractor
from ..sugar import domain_union, from_random, full, makeField from ..sugar import domain_union, full, makeField, is_fieldlike
from ..minimization.stochastic_minimizer import PartialSampledEnergy
class MeanfieldModel(): class MeanField:
"""Collect the operators required for Gaussian mean-field variational def __init__(self, position, hamiltonian, n_samples, mirror_samples,
inference. initial_sig=1, comm=None, nanisinf=False, names = ['mean', 'var']):
"""Collect the operators required for Gaussian mean-field variational
Parameters inference.
----------
domain: MultiDomain
The domain of the model parameters.
"""
def __init__(self, domain):
self.domain = MultiDomain.make(domain)
self.Flat = Multifield2Vector(self.domain)
self.std = FieldAdapter(self.Flat.target,'var').absolute()
self.latent = FieldAdapter(self.Flat.target,'latent')
self.mean = FieldAdapter(self.Flat.target,'mean')
self.generator = self.Flat.adjoint(self.mean + self.std * self.latent)
self.entropy = GaussianEntropy(self.std.target) @ self.std
def get_initial_pos(self, initial_mean=None, initial_sig = 1):
"""Provide an initial position for a given mean parameter vector and an
initial standard deviation.
Parameters
----------
initial_mean: MultiField
The initial mean of the variational approximation. If not None, a
Gaussian sample with mean zero and standard deviation of 0.1 is
used. Default: None
initial_sig: positive float
The initial standard deviation shared by all parameters. Default: 1
""" """
Flat = Multifield2Vector(position.domain)
initial_pos = from_random(self.generator.domain).to_dict() std = FieldAdapter(Flat.target, names[1]).absolute()
initial_pos['latent'] = full(self.generator.domain['latent'], 0.) latent = FieldAdapter(Flat.target,'latent')
initial_pos['var'] = full(self.generator.domain['var'], initial_sig) mean = FieldAdapter(Flat.target, names[0])
generator = Flat.adjoint(mean + std * latent)
if initial_mean is None: entropy = GaussianEntropy(std.target) @ std
initial_mean = 0.1*from_random(self.generator.target) pos = {names[0]: Flat(position)}
if is_fieldlike(initial_sig):
initial_pos['mean'] = self.Flat(initial_mean) pos[names[1]] = Flat(initial_sig)
return MultiField.from_dict(initial_pos) else:
pos[names[1]] = full(Flat.target, initial_sig)
pos = MultiField.from_dict(pos)
class FullCovarianceModel(): op = hamiltonian(generator) + entropy
"""Collect the operators required for Gaussian full-covariance variational self._names = names
inference. self._KL = PartialSampledEnergy.make(pos, op, ['latent',], n_samples, mirror_samples, nanisinf=nanisinf, comm=comm)
self._Flat = Flat
Parameters
---------- @property
domain: MultiDomain def position(self):
The domain of the model parameters. return self._Flat.adjoint(self._KL.position[self._names[0]])
"""
def minimize(self, minimizer):
def __init__(self, domain): self._KL, _ = minimizer(self._KL)
self.domain = MultiDomain.make(domain)
self.Flat = Multifield2Vector(self.domain)
class FullCovariance:
def __init__(self, position, hamiltonian, n_samples, mirror_samples,
initial_sig=1, comm=None, nanisinf=False, names = ['mean', 'cov']):
"""Collect the operators required for Gaussian full-covariance variational
inference.
"""
Flat = Multifield2Vector(position.domain)
one_space = UnstructuredDomain(1) one_space = UnstructuredDomain(1)
self.flat_domain = self.Flat.target[0] flat_domain = Flat.target[0]
N_tri = self.flat_domain.shape[0]*(self.flat_domain.shape[0]+1)//2 N_tri = flat_domain.shape[0]*(flat_domain.shape[0]+1)//2
triangular_space = DomainTuple.make(UnstructuredDomain(N_tri)) triangular_space = DomainTuple.make(UnstructuredDomain(N_tri))
tri = FieldAdapter(triangular_space, 'cov') tri = FieldAdapter(triangular_space, names[1])
mat_space = DomainTuple.make((self.flat_domain,self.flat_domain)) mat_space = DomainTuple.make((flat_domain,flat_domain))
lat_mat_space = DomainTuple.make((one_space,self.flat_domain)) lat_mat_space = DomainTuple.make((one_space,flat_domain))
lat = FieldAdapter(lat_mat_space,'latent') lat = FieldAdapter(lat_mat_space,'latent')
LT = LowerTriangularProjector(triangular_space,mat_space) LT = LowerTriangularProjector(triangular_space,mat_space)
mean = FieldAdapter(self.flat_domain,'mean') mean = FieldAdapter(flat_domain,names[0])
cov = LT @ tri cov = LT @ tri
co = FieldAdapter(cov.target, 'co') co = FieldAdapter(cov.target, 'co')
matmul_setup_dom = domain_union((co.domain,lat.domain)) matmul_setup_dom = domain_union((co.domain,lat.domain))
co_part = PartialExtractor(matmul_setup_dom, co.domain) co_part = PartialExtractor(matmul_setup_dom, co.domain)
lat_part = PartialExtractor(matmul_setup_dom, lat.domain) lat_part = PartialExtractor(matmul_setup_dom, lat.domain)
matmul_setup = lat_part.adjoint @ lat.adjoint @ lat + co_part.adjoint @ co.adjoint @ cov matmul_setup = lat_part.adjoint @ lat.adjoint @ lat + co_part.adjoint @ co.adjoint @ cov
MatMult = MultiLinearEinsum(matmul_setup.target,'ij,ki->jk', key_order=('co','latent')) MatMult = MultiLinearEinsum(matmul_setup.target,'ij,ki->jk', key_order=('co','latent'))
Resp = Respacer(MatMult.target, mean.target) Resp = Respacer(MatMult.target, mean.target)
self.generator = self.Flat.adjoint @ (mean + Resp @ MatMult @ matmul_setup) generator = Flat.adjoint @ (mean + Resp @ MatMult @ matmul_setup)
Diag = DiagonalSelector(cov.target, self.Flat.target) Diag = DiagonalSelector(cov.target, Flat.target)
diag_cov = Diag(cov).absolute() diag_cov = Diag(cov).absolute()
self.entropy = GaussianEntropy(diag_cov.target) @ diag_cov entropy = GaussianEntropy(diag_cov.target) @ diag_cov
diag_tri = np.diag(np.full(flat_domain.shape[0], initial_sig))[np.tril_indices(flat_domain.shape[0])]
def get_initial_pos(self, initial_mean=None, initial_sig=1): pos = MultiField.from_dict({names[0]:Flat(position),names[1]:makeField(generator.domain[names[1]], diag_tri)})
"""Provide an initial position for a given mean parameter vector and a op = hamiltonian(generator) + entropy
diagonal covariance with an initial standard deviation. self._names = names
self._KL = PartialSampledEnergy.make(pos, op, ['latent',], n_samples, mirror_samples, nanisinf=nanisinf, comm=comm)
Parameters self._Flat = Flat
----------
initial_mean: MultiField @property
The initial mean of the variational approximation. If not None, a def position(self):
Gaussian sample with mean zero and standard deviation of 0.1 is return self._Flat.adjoint(self._KL.position[self._names[0]])
used. Default: None
initial_sig: positive float def minimize(self, minimizer):
The initial standard deviation shared by all parameters. Default: 1 self._KL, _ = minimizer(self._KL)
"""
initial_pos = from_random(self.generator.domain).to_dict()
initial_pos['latent'] = full(self.generator.domain['latent'], 0.)
diag_tri = np.diag(np.full(self.flat_domain.shape[0], initial_sig))[np.tril_indices(self.flat_domain.shape[0])]
initial_pos['cov'] = makeField(self.generator.domain['cov'], diag_tri)
if initial_mean is None:
initial_mean = 0.1*from_random(self.generator.target)
initial_pos['mean'] = self.Flat(initial_mean)
return MultiField.from_dict(initial_pos)
class GaussianEntropy(EnergyOperator): class GaussianEntropy(EnergyOperator):
......
...@@ -15,8 +15,106 @@ ...@@ -15,8 +15,106 @@
# #
# NIFTy is being developed at the Max-Planck-Institut fuer Astrophysik. # NIFTy is being developed at the Max-Planck-Institut fuer Astrophysik.
import numpy as np
from .minimizer import Minimizer from .minimizer import Minimizer
from .energy import Energy from .energy import Energy
from .kl_energies import _SelfAdjointOperatorWrapper, _get_lo_hi
from ..linearization import Linearization
from ..utilities import myassert, allreduce_sum
from ..multi_domain import MultiDomain
from ..sugar import from_random
from .. import random
class _StochasticEnergyAdapter(Energy):
def __init__(self, position, local_ops, n_samples, comm, nanisinf):
super(_StochasticEnergyAdapter, self).__init__(position)
for op in local_ops:
myassert(position.domain == op.domain)
self._comm = comm
self._local_ops = local_ops
self._n_samples = n_samples
lin = Linearization.make_var(position)
v, g = [], []
for op in self._local_ops:
tmp = op(lin)
v.append(tmp.val.val)
g.append(tmp.gradient)
self._val = allreduce_sum(v, self._comm)[()]/self._n_samples
if np.isnan(self._val) and self._nanisinf:
self._val = np.inf
self._grad = allreduce_sum(g, self._comm)/self._n_samples
@property
def value(self):
return self._val
@property
def gradient(self):
return self._grad
def at(self, position):
return _StochasticEnergyAdapter(position, self._local_ops,
self._n_samples, self._comm, self._nanisinf)
def apply_metric(self, x):
lin = Linearization.make_var(self.position, want_metric=True)
res = []
for op in self._local_ops:
res.append(op(lin).metric(x))
return allreduce_sum(res, self._comm)/self._n_samples
@property
def metric(self):
return _SelfAdjointOperatorWrapper(self.position.domain,
self.apply_metric)
class PartialSampledEnergy(_StochasticEnergyAdapter):
def __init__(self, position, op, keys, local_ops, n_samples, comm, nanisinf,
_callingfrommake=False):
if not _callingfrommake:
raise NotImplementedError
super(PartialSampledEnergy, self).__init__(position,
local_ops, n_samples, comm, nanisinf)
self._op = op
self._keys = keys
def at(self, position):
return PartialSampledEnergy(position, self._op, self._keys,
self._local_ops, self._n_samples,
self._comm, self._nanisinf,
_callingfrommake=True)
def resample_at(self, position):
return PartialSampledEnergy.make(position, self._op, self._keys,
self._n_samples, self._comm)
@staticmethod
def make(position, op, keys, n_samples, mirror_samples, nanisinf = False, comm=None):
samdom = {}
for k in keys:
if k in position.domain.keys():
raise ValueError
if k not in op.domain.keys():
raise ValueError
else:
samdom[k] = op.domain[k]
samdom = MultiDomain.make(samdom)
local_ops = []
sseq = random.spawn_sseq(n_samples)
for i in range(*_get_lo_hi(comm, n_samples)):
with random.Context(sseq[i]):
rnd = from_random(samdom)
_, tmp = op.simplify_for_constant_input(rnd)
myassert(tmp.domain == position.domain)
local_ops.append(tmp)
if mirror_samples:
local_ops.append(op.simplify_for_constant_input(-rnd)[1])
n_samples = 2*n_samples if mirror_samples else n_samples
return PartialSampledEnergy(position, op, keys, local_ops, n_samples,
comm, nanisinf, _callingfrommake=True)
class ADVIOptimizer(Minimizer): class ADVIOptimizer(Minimizer):
...@@ -27,9 +125,6 @@ class ADVIOptimizer(Minimizer): ...@@ -27,9 +125,6 @@ class ADVIOptimizer(Minimizer):
---------- ----------
steps: int steps: int
The number of concecutive steps during one call of the optimizer. The number of concecutive steps during one call of the optimizer.
resample_energy : function
Function that returns an `Energy` object at a given position. It is
called after every step of the optimizer.
eta: positive float eta: positive float
The scale of the step-size sequence. It might have to be adapted to the The scale of the step-size sequence. It might have to be adapted to the
application to increase performance. Default: 1. application to increase performance. Default: 1.
...@@ -41,14 +136,13 @@ class ADVIOptimizer(Minimizer): ...@@ -41,14 +136,13 @@ class ADVIOptimizer(Minimizer):
A small value guarantees Robbins and Monro conditions. A small value guarantees Robbins and Monro conditions.
""" """
def __init__(self, steps, resample_energy, eta=1, alpha=0.1, tau=1, epsilon=1e-16): def __init__(self, steps, eta=1, alpha=0.1, tau=1, epsilon=1e-16):
self.alpha = alpha self.alpha = alpha
self.eta = eta self.eta = eta
self.tau = tau self.tau = tau
self.epsilon = epsilon self.epsilon = epsilon
self.counter = 1 self.counter = 1
self.steps = steps self.steps = steps
self._resample = resample_energy
self.s = None self.s = None
def _step(self, position, gradient): def _step(self, position, gradient):
...@@ -70,7 +164,7 @@ class ADVIOptimizer(Minimizer): ...@@ -70,7 +164,7 @@ class ADVIOptimizer(Minimizer):
convergence = 0 convergence = 0
for i in range(self.steps): for i in range(self.steps):
x = self._step(E.position, E.gradient) x = self._step(E.position, E.gradient)
E = self._resample(x) E = E.resample_at(x)
myassert(isinstance(E, Energy)) myassert(isinstance(E, Energy))
myassert(x.domain is E.position.domain) myassert(x.domain is E.position.domain)
return E, convergence return E, convergence
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment