Commit 52ec698f authored by Jakob Knollmüller's avatar Jakob Knollmüller
Browse files

use nifty structure

parent 9d21a5e2
Pipeline #94444 failed with stages
in 31 seconds
import nifty7 as ift
from parametric_KL import ParametricGaussianKL, MeanfieldModel
from matplotlib import pyplot as plt
from advi_optimizer import ADVIOptimizer
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
......@@ -116,16 +114,16 @@ if __name__ == '__main__':
ic_newton = ift.DeltaEnergyController(
name='Newton', iteration_limit=1, tol_rel_deltaE=1e-8)
# minimizer = ift.L_BFGS(ic_newton)
minimizer = ADVIOptimizer(steps=10)
minimizer = ift.ADVIOptimizer(steps=10)
# Compute MAP solution by minimizing the information Hamiltonian
H = ift.StandardHamiltonian(likelihood)
initial_position = ift.from_random(domain, 'normal')
meanfield_model = MeanfieldModel(H.domain)
meanfield_model = ift.MeanfieldModel(H.domain)
initial_position = meanfield_model.get_initial_pos()
position = initial_position
KL = ParametricGaussianKL.make(initial_position,H,meanfield_model,3,False)
KL = ift.ParametricGaussianKL.make(initial_position,H,meanfield_model,3,False)
plt.figure('data')
plt.imshow(sky(mock_position).val)
plt.pause(0.001)
......
......@@ -53,7 +53,7 @@ from .operators.energy_operators import (
Squared2NormOperator, StudentTEnergy, VariableCovarianceGaussianEnergy)
from .operators.convolution_operators import FuncConvolutionOperator
from .operators.normal_operators import NormalTransform, LognormalTransform
from .operators.multifield_flattener import MultifieldFlattener
from .probing import probe_with_posterior_samples, probe_diagonal, \
StatCalculator, approximation2endo
......@@ -67,11 +67,12 @@ from .minimization.nonlinear_cg import NonlinearCG
from .minimization.descent_minimizers import (
DescentMinimizer, SteepestDescent, VL_BFGS, L_BFGS, RelaxedNewton,
NewtonCG)
from .minimization.stochastic_minimizer import ADVIOptimizer
from .minimization.scipy_minimizer import L_BFGS_B
from .minimization.energy import Energy
from .minimization.quadratic_energy import QuadraticEnergy
from .minimization.energy_adapter import EnergyAdapter
from .minimization.metric_gaussian_kl import MetricGaussianKL
from .minimization.gaussian_kl import MetricGaussianKL, ParametricGaussianKL
from .sugar import *
......@@ -89,6 +90,7 @@ from .library.adjust_variances import (make_adjust_variances_hamiltonian,
from .library.gridder import Gridder
from .library.correlated_fields import CorrelatedFieldMaker
from .library.correlated_fields_simple import SimpleCorrelatedField
from .library.meanfield_model import MeanfieldModel
from . import extra
......
import numpy as np
from ..operators.multifield_flattener import MultifieldFlattener
from ..operators.simple_linear_operators import FieldAdapter
from ..operators.energy_operators import EnergyOperator
from ..operators.sandwich_operator import SandwichOperator
from ..sugar import full, from_random
from ..linearization import Linearization
from ..field import Field
from ..multi_field import MultiField
class MeanfieldModel():
def __init__(self, domain):
self.domain = domain
self.Flat = MultifieldFlattener(self.domain)
self.std = FieldAdapter(self.Flat.target,'var').absolute()
self.latent = FieldAdapter(self.Flat.target,'latent')
self.mean = FieldAdapter(self.Flat.target,'mean')
self.generator = self.Flat.adjoint(self.mean + self.std * self.latent)
self.entropy = GaussianEntropy(self.std.target) @ self.std
def get_initial_pos(self, initial_mean=None):
initial_pos = from_random(self.generator.domain).to_dict()
initial_pos['latent'] = full(self.generator.domain['latent'], 0.)
initial_pos['var'] = full(self.generator.domain['var'], 1.)
if initial_mean is None:
initial_mean = 0.1*from_random(self.generator.target)
initial_pos['mean'] = self.Flat(initial_mean)
return MultiField.from_dict(initial_pos)
class GaussianEntropy(EnergyOperator):
def __init__(self, domain):
self._domain = domain
def apply(self, x):
self._check_input(x)
res = -0.5* (2*np.pi*np.e*x**2).log().sum()
if not isinstance(x, Linearization):
return Field.scalar(res)
if not x.want_metric:
return res
return res.add_metric(SandwichOperator.make(res.jac)) #FIXME not sure about metric
\ No newline at end of file
......@@ -23,11 +23,13 @@ from ..logger import logger
from ..multi_field import MultiField
from ..operators.endomorphic_operator import EndomorphicOperator
from ..operators.energy_operators import StandardHamiltonian
from ..operators.multifield_flattener import MultifieldFlattener
from ..probing import approximation2endo
from ..sugar import makeOp
from ..sugar import makeOp, full, from_random
from .energy import Energy
class _KLMetric(EndomorphicOperator):
def __init__(self, KL):
self._KL = KL
......@@ -279,3 +281,137 @@ class MetricGaussianKL(Energy):
tmp = tmp + self._hamiltonian(lin-s).metric.draw_sample(from_inverse=False)
samp.append(tmp)
return utilities.allreduce_sum(samp, self._comm)/self.n_eff_samples
class ParametricGaussianKL(Energy):
"""Provides the sampled Kullback-Leibler divergence between a distribution
and a Parametric Gaussian.
Notes
-----
See also
"""
def __init__(self, variational_parameters, hamiltonian, variational_model,
n_samples, mirror_samples, comm,
local_samples, nanisinf, _callingfrommake=False):
if not _callingfrommake:
raise NotImplementedError
super(ParametricGaussianKL, self).__init__(variational_parameters)
assert variational_model.generator.target is hamiltonian.domain
self._hamiltonian = hamiltonian
self._variational_model = variational_model
self._full_model = hamiltonian(variational_model.generator) + variational_model.entropy
self._n_samples = int(n_samples)
self._mirror_samples = bool(mirror_samples)
self._comm = comm
self._local_samples = local_samples
self._nanisinf = bool(nanisinf)
lin = Linearization.make_partial_var(variational_parameters, ['latent'])
v, g = [], []
for s in self._local_samples:
# s = _modify_sample_domain(s, variational_parameters.domain)
tmp = self._full_model(lin+s)
tv = tmp.val.val
tg = tmp.gradient
if mirror_samples:
tmp = self._full_model(lin-s)
tv = tv + tmp.val.val
tg = tg + tmp.gradient
v.append(tv)
g.append(tg)
self._val = utilities.allreduce_sum(v, self._comm)[()]/self.n_eff_samples
if np.isnan(self._val) and self._nanisinf:
self._val = np.inf
self._grad = utilities.allreduce_sum(g, self._comm)/self.n_eff_samples
@staticmethod
def make(variational_parameters, hamiltonian, variational_model, n_samples, mirror_samples,
comm=None, nanisinf=False):
"""Return instance of :class:`MetricGaussianKL`.
Parameters
----------
mean : Field
Mean of the Gaussian probability distribution.
hamiltonian : StandardHamiltonian
Hamiltonian of the approximated probability distribution.
n_samples : integer
Number of samples used to stochastically estimate the KL.
mirror_samples : boolean
Whether the negative of the drawn samples are also used, as they are
equally legitimate samples. If true, the number of used samples
doubles. Mirroring samples stabilizes the KL estimate as extreme
sample variation is counterbalanced. Since it improves stability in
many cases, it is recommended to set `mirror_samples` to `True`.
constants : list
List of parameter keys that are kept constant during optimization.
Default is no constants.
napprox : int
Number of samples for computing preconditioner for sampling. No
preconditioning is done by default.
comm : MPI communicator or None
If not None, samples will be distributed as evenly as possible
across this communicator. If `mirror_samples` is set, then a sample and
its mirror image will always reside on the same task.
nanisinf : bool
If true, nan energies which can happen due to overflows in the forward
model are interpreted as inf. Thereby, the code does not crash on
these occaisions but rather the minimizer is told that the position it
has tried is not sensible.
Note
----
The two lists `constants` and `point_estimates` are independent from each
other. It is possible to sample along domains which are kept constant
during minimization and vice versa.
"""
if not isinstance(hamiltonian, StandardHamiltonian):
raise TypeError
if hamiltonian.domain is not variational_model.generator.target:
raise ValueError
if not isinstance(n_samples, int):
raise TypeError
if not isinstance(mirror_samples, bool):
raise TypeError
n_samples = int(n_samples)
mirror_samples = bool(mirror_samples)
local_samples = []
sseq = random.spawn_sseq(n_samples)
#FIXME dirty trick, many multiplications with zero
DirtyMaskDict = full(variational_model.generator.domain,0.).to_dict()
DirtyMaskDict['latent'] = full(variational_model.generator.domain['latent'], 1.)
DirtyMask = MultiField.from_dict(DirtyMaskDict)
for i in range(*_get_lo_hi(comm, n_samples)):
with random.Context(sseq[i]):
local_samples.append(DirtyMask * from_random(variational_model.generator.domain))
local_samples = tuple(local_samples)
return ParametricGaussianKL(
variational_parameters, hamiltonian,variational_model,n_samples, mirror_samples, comm, local_samples,
nanisinf, _callingfrommake=True)
@property
def n_eff_samples(self):
if self._mirror_samples:
return 2*self._n_samples
return self._n_samples
def at(self, position):
return ParametricGaussianKL(
position, self._hamiltonian, self._variational_model, self._n_samples, self._mirror_samples,
self._comm, self._local_samples, self._nanisinf, True)
@property
def value(self):
return self._val
@property
def gradient(self):
return self._grad
from parametric_KL import ParametricGaussianKL
import nifty7 as ift
from ..minimization.gaussian_kl import ParametricGaussianKL
from .minimizer import Minimizer
import numpy as np
class ADVIOptimizer(ift.Minimizer):
class ADVIOptimizer(Minimizer):
def __init__(self, steps,eta=1, alpha=1, tau=1, epsilon=1e-16):
self.alpha = alpha
......@@ -15,7 +15,7 @@ class ADVIOptimizer(ift.Minimizer):
def _step(self, position, gradient):
self.s = self.alpha * gradient**2 + (1-self.alpha)*self.s
self.rho = self.eta * self.counter**(-0.5+ self.epsilon) / (self.tau + ift.sqrt(self.s))
self.rho = self.eta * self.counter**(-0.5+ self.epsilon) / (self.tau + sqrt(self.s))
new_position = position - self.rho * gradient
self.counter += 1
return new_position
......@@ -23,15 +23,15 @@ class ADVIOptimizer(ift.Minimizer):
def __call__(self, E):
if self.s is None:
self.s = E.gradient**2
convergence = 0
convergence = 0 # come up with somthing how to determine convergence
for i in range(self.steps):
x = self._step(E.position, E.gradient)
# maybe some KL function for resample?
# maybe some KL function for resample? Should make it more generic.
E = ParametricGaussianKL.make(x, E._hamiltonian, E._variational_model,
E._n_samples, E._mirror_samples)
return E, convergence
def reset(self):
self.counter = 0
self.counter = 1
self.s = None
\ No newline at end of file
import numpy as np
from .linear_operator import LinearOperator
from ..domain_tuple import DomainTuple
from ..domains.unstructured_domain import UnstructuredDomain
from ..sugar import makeField
from ..multi_field import MultiField
class MultifieldFlattener(LinearOperator):
'''
Flattens a MultiField and returns a Field in an unstructred domain with the same number of DOF.
'''
def __init__(self, domain):
self._dof = domain.size
self._domain = domain
self._target = DomainTuple.make(UnstructuredDomain(self._dof))
self._capability = self.TIMES | self.ADJOINT_TIMES
def _flatten(self, x):
result = np.empty(self.target.shape)
runner = 0
for key in self.domain.keys():
dom_size = x[key].domain.size
result[runner:runner+dom_size] = x[key].val.flatten()
runner += dom_size
return result
def _restructure(self, x):
runner = 0
unstructured = x.val
result = {}
for key in self.domain.keys():
subdom = self.domain[key]
dom_size = subdom.size
subfield = unstructured[runner:runner+dom_size].reshape(subdom.shape)
subdict = {key:makeField(subdom,subfield)}
result = {**result,**subdict}
runner += dom_size
return result
def apply(self, x, mode):
self._check_mode(mode)
if mode == self.TIMES:
return makeField(self.target,self._flatten(x))
return MultiField.from_dict(self._restructure(x))
\ No newline at end of file
import nifty5 as ift
class ADVIOptimizer():
def __init__(self, H, model, entropy,initial_position ,eta=1, alpha=1, tau=1, epsilon=1e-16):
self.model = model
self.H = H
self.entropy = entropy
self.alpha = alpha
self.eta = eta
self.tau=tau
self.epsilon = epsilon
self.counter = 1
E = ift.ParametrizedGaussianKL(initial_position, H, self.model,
self.entropy, 5)
self.s = E.gradient**2
def _step(self, position, gradient):
self.s = self.alpha * gradient**2 + (1-self.alpha)*self.s
self.rho = self.eta * self.counter**(-0.5+ self.epsilon) / (self.tau + ift.sqrt(self.s))
new_position = position - self.rho * gradient
self.counter += 1
return new_position
def __call__(self, x,steps,N_samples=1, N_validate=10):
for i in range(steps):
E = ift.ParametrizedGaussianKL(x, self.H, self.model,
self.entropy, N_samples)
x = self._step(E.position, E.gradient)
return x
class MGVIOptimizer():
def __init__(self, H, initial_position ,eta=1, alpha=1, tau=1, epsilon=1e-16):
self.H = H
self.alpha = alpha
self.eta = eta
self.tau=tau
self.epsilon = epsilon
self.counter = 1
E = ift.MetricGaussianKL(initial_position, H, 5,
mirror_samples=False)
self.s = E.gradient**2
def _step(self, position, gradient):
self.s = self.alpha * gradient**2 + (1-self.alpha)*self.s
self.rho = self.eta * self.counter**(-0.5+ self.epsilon) / (self.tau + ift.sqrt(self.s))
new_position = position - self.rho * gradient
self.counter += 1
return new_position
def __call__(self, x,steps,N_samples=1, N_validate=10, mirror_samples=False):
for i in range(steps):
E = ift.MetricGaussianKL(x, self.H, N_samples,
mirror_samples=mirror_samples)
x = self._step(E.position, E.gradient)
return x
\ No newline at end of file
import nifty7 as ift
import numpy as np
import os, sys
class MultifieldFlattener(ift.LinearOperator):
def __init__(self, domain):
self._dof = domain.size
self._domain = domain
self._target = ift.DomainTuple.make(ift.UnstructuredDomain(self._dof))
self._capability = self.TIMES | self.ADJOINT_TIMES
def _flatten(self, x):
result = np.empty(self.target.shape)
runner = 0
for key in self.domain.keys():
dom_size = x[key].domain.size
result[runner:runner+dom_size] = x[key].val.flatten()
runner += dom_size
return result
def _restructure(self, x):
runner = 0
unstructured = x.val
result = {}
for key in self.domain.keys():
subdom = self.domain[key]
dom_size = subdom.size
subfield = unstructured[runner:runner+dom_size].reshape(subdom.shape)
subdict = {key:ift.from_global_data(subdom,subfield)}
result = {**result,**subdict}
runner += dom_size
return result
def apply(self, x, mode):
self._check_mode(mode)
if mode == self.TIMES:
return ift.from_global_data(self.target,self._flatten(x))
return ift.MultiField.from_dict(self._restructure(x))
class GaussianEntropy(ift.EnergyOperator):
def __init__(self, domain):
self._domain = domain
def apply(self, x):
self._check_input(x)
res = -0.5*ift.log((2*np.pi*np.e*x**2)).sum()
if not isinstance(x, ift.Linearization):
return ift.Field.scalar(res)
if not x.want_metric:
return res
return res.add_metric(ift.SandwichOperator.make(res.jac))
class LowerTriangularProjector(ift.LinearOperator):
def __init__(self, domain, target):
self._domain = domain
self._target = target
self._indices=np.tril_indices(target.shape[1])
self._capability = self.TIMES | self.ADJOINT_TIMES
def apply(self, x, mode):
self._check_mode(mode)
if mode == self.TIMES:
mat = np.zeros(self._target.shape[1:])
mat[self._indices] = x.val
return ift.from_global_data(self._target,mat.reshape((1,)+mat.shape))
return ift.from_global_data(self._domain,x.val[0][self._indices].reshape(self._domain.shape))
class DiagonalSelector(ift.LinearOperator):
def __init__(self, domain, target):
self._domain = domain
self._target = target
self._capability = self.TIMES | self.ADJOINT_TIMES
def apply(self, x, mode):
self._check_mode(mode)
if mode == self.TIMES:
result = np.diag(x.val[0])
return ift.from_global_data(self._target,result)
return ift.from_global_data(self._domain,np.diag(x.val).reshape(self._domain.shape))
class ParametrizedGaussianKL(ift.Energy):
def __init__(self, variational_parameters, hamiltonian, variational_model, entropy ,n_samples,
_samples=None):
super(ParametrizedGaussianKL, self).__init__(variational_parameters)
# \xi = \bar{\xi} + \sigma * \eta
if hamiltonian.domain is not variational_model.target:
raise ValueError
if not isinstance(n_samples, int):
raise TypeError
self._entropy = entropy
self._n_samples = n_samples
self._hamiltonian = hamiltonian
self._variational_model = variational_model
self._full_model = hamiltonian(variational_model) + entropy
#FIXME !DIRTY, DON'T TO THIS!
DirtyMaskDict = ift.full(self._variational_model.domain,0.).to_dict()
DirtyMaskDict['latent'] = ift.full(self._variational_model.domain['latent'], 1.)
DirtyMask = ift.MultiField.from_dict(DirtyMaskDict)
if _samples is None:
_samples = tuple(DirtyMask * ift.from_random('normal', variational_model.domain)
for _ in range(n_samples))
else:
_samples = tuple(DirtyMask * _
for _ in _samples)
##################
self._samples = _samples
self._lin = ift.Linearization.make_partial_var(variational_parameters, ['latent'])
v, g = None, None
for s in self._samples:
tmp = self._full_model(self._lin+s)
if v is None:
v = tmp.val.local_data[()]
g = tmp.gradient
else:
v += tmp.val.local_data[()]
g = g + tmp.gradient
self._val = v / len(self._samples)
self._grad = g * (1./len(self._samples))
self._metric = None
def at(self, position):
return ParametrizedGaussianKL(position, self._hamiltonian, self._variational_model,
self._entropy, n_samples=self._n_samples,
_samples=self._samples)
@property
def value(self):
return self._val
@property
def gradient(self):
return self._grad
def _get_metric(self):
if self._metric is None:
lin = self._lin.with_want_metric()
mymap = map(lambda v: self._full_model(lin+v).metric,
self._samples)
self._metric = ift.utilities.my_sum(mymap)
self._metric = self._metric.scale(1./len(self._samples))
def apply_metric(self, x):
self._get_metric()
return self._metric(x)
@property
def metric(self):
self._get_metric()
return self._metric
@property
def samples(self):
return self._samples
class Respacer(ift.LinearOperator):
def __init__(self, domain, target):
self._domain = domain
self._target = target
self.