Commit 9d21a5e2 authored by Jakob Knollmüller's avatar Jakob Knollmüller
Browse files

working demo for meanfield gaussian variational inference

parent 02fd701c
Pipeline #94340 passed with stages
in 11 minutes and 14 seconds
from parametric_KL import ParametricGaussianKL
import nifty7 as ift
import numpy as np
class ADVIOptimizer(ift.Minimizer):
def __init__(self, steps,eta=1, alpha=1, tau=1, epsilon=1e-16):
self.alpha = alpha
self.eta = eta
self.tau=tau
self.epsilon = epsilon
self.counter = 1
self.steps = steps
self.s = None
def _step(self, position, gradient):
self.s = self.alpha * gradient**2 + (1-self.alpha)*self.s
self.rho = self.eta * self.counter**(-0.5+ self.epsilon) / (self.tau + ift.sqrt(self.s))
new_position = position - self.rho * gradient
self.counter += 1
return new_position
def __call__(self, E):
if self.s is None:
self.s = E.gradient**2
convergence = 0
for i in range(self.steps):
x = self._step(E.position, E.gradient)
# maybe some KL function for resample?
E = ParametricGaussianKL.make(x, E._hamiltonian, E._variational_model,
E._n_samples, E._mirror_samples)
return E, convergence
def reset(self):
self.counter = 0
self.s = None
\ No newline at end of file
import nifty7 as ift
from parametric_KL import ParametricGaussianKL, MeanfieldModel
from matplotlib import pyplot as plt
from advi_optimizer import ADVIOptimizer
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
# Copyright(C) 2013-2020 Max-Planck-Society
#
# NIFTy is being developed at the Max-Planck-Institut fuer Astrophysik.
###############################################################################
# Log-normal field reconstruction from Poissonian data with inhomogenous
# exposure (in case for 2D mode)
# 1D (set mode=0), 2D (mode=1), or on the sphere (mode=2)
###############################################################################
import sys
import numpy as np
def exposure_2d(domain):
# Structured exposure for 2D mode
x_shape, y_shape = domain.shape
exposure = np.ones(domain.shape)
exposure[x_shape//3:x_shape//2, :] *= 2.
exposure[x_shape*4//5:x_shape, :] *= .1
exposure[x_shape//2:x_shape*3//2, :] *= 3.
exposure[:, x_shape//3:x_shape//2] *= 2.
exposure[:, x_shape*4//5:x_shape] *= .1
exposure[:, x_shape//2:x_shape*3//2] *= 3.
return ift.Field.from_raw(domain, exposure)
def main():
# Plotting
signal = sky(mock_position)
reconst = sky(H.position)
filename = "getting_started_2_mode_{}.png".format(mode)
plot = ift.Plot()
plot.add(signal, title='Signal')
plot.add(GR.adjoint(data), title='Data')
plot.add(reconst, title='Reconstruction')
plot.add(reconst - signal, title='Residuals')
plot.output(xsize=12, ysize=10, name=filename)
print("Saved results as '{}'.".format(filename))
if __name__ == '__main__':
# Choose space on which the signal field is defined
if len(sys.argv) == 2:
mode = int(sys.argv[1])
else:
mode = 1
if mode == 0:
# One-dimensional regular grid with uniform exposure of 10
position_space = ift.RGSpace(1024)
exposure = ift.Field.full(position_space, 10.)
elif mode == 1:
# Two-dimensional regular grid with inhomogeneous exposure
position_space = ift.RGSpace([512, 512])
exposure = exposure_2d(position_space)
else:
# Sphere with uniform exposure of 100
position_space = ift.HPSpace(128)
exposure = ift.Field.full(position_space, 100.)
# Define harmonic space and harmonic transform
harmonic_space = position_space.get_default_codomain()
HT = ift.HarmonicTransformOperator(harmonic_space, position_space)
# Domain on which the field's degrees of freedom are defined
domain = ift.DomainTuple.make(harmonic_space)
# Define amplitude (square root of power spectrum)
def sqrtpspec(k):
return 1./(20. + k**2)
p_space = ift.PowerSpace(harmonic_space)
pd = ift.PowerDistributor(harmonic_space, p_space)
a = ift.PS_field(p_space, sqrtpspec)
A = pd(a)
# Define sky operator
sky = ift.exp(HT(ift.makeOp(A))).ducktape('xi')
M = ift.DiagonalOperator(exposure)
GR = ift.GeometryRemover(position_space)
# Define instrumental response
R = GR(M)
# Generate mock data and define likelihood operator
d_space = R.target[0]
lamb = R(sky)
mock_position = ift.from_random(sky.domain, 'normal')
data = lamb(mock_position)
data = ift.random.current_rng().poisson(data.val.astype(np.float64))
data = ift.Field.from_raw(d_space, data)
likelihood = ift.PoissonianEnergy(data) @ lamb
# Settings for minimization
ic_newton = ift.DeltaEnergyController(
name='Newton', iteration_limit=1, tol_rel_deltaE=1e-8)
# minimizer = ift.L_BFGS(ic_newton)
minimizer = ADVIOptimizer(steps=10)
# Compute MAP solution by minimizing the information Hamiltonian
H = ift.StandardHamiltonian(likelihood)
initial_position = ift.from_random(domain, 'normal')
meanfield_model = MeanfieldModel(H.domain)
initial_position = meanfield_model.get_initial_pos()
position = initial_position
KL = ParametricGaussianKL.make(initial_position,H,meanfield_model,3,False)
plt.figure('data')
plt.imshow(sky(mock_position).val)
plt.pause(0.001)
for i in range(300):
# KL = ParametricGaussianKL.make(position,H,meanfield_model,3,True)
KL, _ = minimizer(KL)
position = KL.position
plt.figure('result')
plt.cla()
plt.imshow(sky(meanfield_model.generator(KL.position)).val)
plt.pause(0.001)
\ No newline at end of file
import nifty5 as ift
class ADVIOptimizer():
def __init__(self, H, model, entropy,initial_position ,eta=1, alpha=1, tau=1, epsilon=1e-16):
self.model = model
self.H = H
self.entropy = entropy
self.alpha = alpha
self.eta = eta
self.tau=tau
self.epsilon = epsilon
self.counter = 1
E = ift.ParametrizedGaussianKL(initial_position, H, self.model,
self.entropy, 5)
self.s = E.gradient**2
def _step(self, position, gradient):
self.s = self.alpha * gradient**2 + (1-self.alpha)*self.s
self.rho = self.eta * self.counter**(-0.5+ self.epsilon) / (self.tau + ift.sqrt(self.s))
new_position = position - self.rho * gradient
self.counter += 1
return new_position
def __call__(self, x,steps,N_samples=1, N_validate=10):
for i in range(steps):
E = ift.ParametrizedGaussianKL(x, self.H, self.model,
self.entropy, N_samples)
x = self._step(E.position, E.gradient)
return x
class MGVIOptimizer():
def __init__(self, H, initial_position ,eta=1, alpha=1, tau=1, epsilon=1e-16):
self.H = H
self.alpha = alpha
self.eta = eta
self.tau=tau
self.epsilon = epsilon
self.counter = 1
E = ift.MetricGaussianKL(initial_position, H, 5,
mirror_samples=False)
self.s = E.gradient**2
def _step(self, position, gradient):
self.s = self.alpha * gradient**2 + (1-self.alpha)*self.s
self.rho = self.eta * self.counter**(-0.5+ self.epsilon) / (self.tau + ift.sqrt(self.s))
new_position = position - self.rho * gradient
self.counter += 1
return new_position
def __call__(self, x,steps,N_samples=1, N_validate=10, mirror_samples=False):
for i in range(steps):
E = ift.MetricGaussianKL(x, self.H, N_samples,
mirror_samples=mirror_samples)
x = self._step(E.position, E.gradient)
return x
\ No newline at end of file
import nifty7 as ift
import numpy as np
import os, sys
class MultifieldFlattener(ift.LinearOperator):
def __init__(self, domain):
self._dof = domain.size
self._domain = domain
self._target = ift.DomainTuple.make(ift.UnstructuredDomain(self._dof))
self._capability = self.TIMES | self.ADJOINT_TIMES
def _flatten(self, x):
result = np.empty(self.target.shape)
runner = 0
for key in self.domain.keys():
dom_size = x[key].domain.size
result[runner:runner+dom_size] = x[key].val.flatten()
runner += dom_size
return result
def _restructure(self, x):
runner = 0
unstructured = x.val
result = {}
for key in self.domain.keys():
subdom = self.domain[key]
dom_size = subdom.size
subfield = unstructured[runner:runner+dom_size].reshape(subdom.shape)
subdict = {key:ift.from_global_data(subdom,subfield)}
result = {**result,**subdict}
runner += dom_size
return result
def apply(self, x, mode):
self._check_mode(mode)
if mode == self.TIMES:
return ift.from_global_data(self.target,self._flatten(x))
return ift.MultiField.from_dict(self._restructure(x))
class GaussianEntropy(ift.EnergyOperator):
def __init__(self, domain):
self._domain = domain
def apply(self, x):
self._check_input(x)
res = -0.5*ift.log((2*np.pi*np.e*x**2)).sum()
if not isinstance(x, ift.Linearization):
return ift.Field.scalar(res)
if not x.want_metric:
return res
return res.add_metric(ift.SandwichOperator.make(res.jac))
class LowerTriangularProjector(ift.LinearOperator):
def __init__(self, domain, target):
self._domain = domain
self._target = target
self._indices=np.tril_indices(target.shape[1])
self._capability = self.TIMES | self.ADJOINT_TIMES
def apply(self, x, mode):
self._check_mode(mode)
if mode == self.TIMES:
mat = np.zeros(self._target.shape[1:])
mat[self._indices] = x.val
return ift.from_global_data(self._target,mat.reshape((1,)+mat.shape))
return ift.from_global_data(self._domain,x.val[0][self._indices].reshape(self._domain.shape))
class DiagonalSelector(ift.LinearOperator):
def __init__(self, domain, target):
self._domain = domain
self._target = target
self._capability = self.TIMES | self.ADJOINT_TIMES
def apply(self, x, mode):
self._check_mode(mode)
if mode == self.TIMES:
result = np.diag(x.val[0])
return ift.from_global_data(self._target,result)
return ift.from_global_data(self._domain,np.diag(x.val).reshape(self._domain.shape))
class ParametrizedGaussianKL(ift.Energy):
def __init__(self, variational_parameters, hamiltonian, variational_model, entropy ,n_samples,
_samples=None):
super(ParametrizedGaussianKL, self).__init__(variational_parameters)
# \xi = \bar{\xi} + \sigma * \eta
if hamiltonian.domain is not variational_model.target:
raise ValueError
if not isinstance(n_samples, int):
raise TypeError
self._entropy = entropy
self._n_samples = n_samples
self._hamiltonian = hamiltonian
self._variational_model = variational_model
self._full_model = hamiltonian(variational_model) + entropy
#FIXME !DIRTY, DON'T TO THIS!
DirtyMaskDict = ift.full(self._variational_model.domain,0.).to_dict()
DirtyMaskDict['latent'] = ift.full(self._variational_model.domain['latent'], 1.)
DirtyMask = ift.MultiField.from_dict(DirtyMaskDict)
if _samples is None:
_samples = tuple(DirtyMask * ift.from_random('normal', variational_model.domain)
for _ in range(n_samples))
else:
_samples = tuple(DirtyMask * _
for _ in _samples)
##################
self._samples = _samples
self._lin = ift.Linearization.make_partial_var(variational_parameters, ['latent'])
v, g = None, None
for s in self._samples:
tmp = self._full_model(self._lin+s)
if v is None:
v = tmp.val.local_data[()]
g = tmp.gradient
else:
v += tmp.val.local_data[()]
g = g + tmp.gradient
self._val = v / len(self._samples)
self._grad = g * (1./len(self._samples))
self._metric = None
def at(self, position):
return ParametrizedGaussianKL(position, self._hamiltonian, self._variational_model,
self._entropy, n_samples=self._n_samples,
_samples=self._samples)
@property
def value(self):
return self._val
@property
def gradient(self):
return self._grad
def _get_metric(self):
if self._metric is None:
lin = self._lin.with_want_metric()
mymap = map(lambda v: self._full_model(lin+v).metric,
self._samples)
self._metric = ift.utilities.my_sum(mymap)
self._metric = self._metric.scale(1./len(self._samples))
def apply_metric(self, x):
self._get_metric()
return self._metric(x)
@property
def metric(self):
self._get_metric()
return self._metric
@property
def samples(self):
return self._samples
class Respacer(ift.LinearOperator):
def __init__(self, domain, target):
self._domain = domain
self._target = target
self._capability = self.TIMES | self.ADJOINT_TIMES
def apply(self,x,mode):
self._check_mode(mode)
if mode == self.TIMES:
return ift.from_global_data(self._target,x.val.reshape(self._target.shape))
return ift.from_global_data(self._domain,x.val.reshape(self._domain.shape))
def build_meanfield(likelihood, initial_mean =None):
Flat = MultifieldFlattener(likelihood.domain)
variational_var = ift.FieldAdapter(Flat.target,'var').absolute()
variational_latent = ift.FieldAdapter(Flat.target,'latent')
variational_mean = ift.FieldAdapter(Flat.target,'mean')
meanfield_model = Flat.adjoint(variational_mean + variational_var * variational_latent)
initial_pos = ift.from_random('normal', meanfield_model.domain).to_dict()
initial_pos['latent'] = ift.full(meanfield_model.domain['latent'], 0.)
initial_pos['var'] = ift.full(meanfield_model.domain['var'], 1.)
if initial_mean is None:
initial_mean = 0.1*ift.from_random('normal',likelihood.domain)
initial_pos['mean'] = Flat(initial_mean)
initial_pos = ift.MultiField.from_dict(initial_pos)
meanfield_entropy = GaussianEntropy(variational_var.target)(variational_var)
return meanfield_model, meanfield_entropy, initial_pos, variational_var, variational_mean
def build_fullcovariance(likelihood, initial_mean =None):
Flat = MultifieldFlattener(likelihood.domain)
one_space = ift.UnstructuredDomain(1)
latent_domain = Flat.target[0]
N_tri = latent_domain.shape[0]*(latent_domain.shape[0]+1)//2
triangular_space = ift.DomainTuple.make(ift.UnstructuredDomain(N_tri))
tri = ift.FieldAdapter(triangular_space,'cov')
mat_space = ift.DomainTuple.make((one_space,latent_domain,latent_domain))
lat_mat_space = ift.DomainTuple.make((one_space,one_space,latent_domain))
lat = ift.FieldAdapter(lat_mat_space,'latent')
LT = LowerTriangularProjector(triangular_space,mat_space)
mea = ift.FieldAdapter(latent_domain,'mea')
cov = LT @ tri
Mmult = VariableMatMul(lat,cov)
Resp = Respacer(Mmult.target,mea.target)
sam = Resp(Mmult) + mea
Diag = DiagonalSelector(cov.target,Flat.target)
fullcovariance_model = Flat.adjoint(sam)
Diag_cov = Diag(cov).absolute()
fullcovariance_entropy = GaussianEntropy(Diag_cov.target)(Diag(cov))
initial_pos = ift.from_random('normal', fullcovariance_model.domain).to_dict()
initial_pos['latent'] = ift.full(fullcovariance_model.domain['latent'], 0.)
diag_tri = np.diag(np.ones(latent_domain.shape[0]))[np.tril_indices(latent_domain.shape[0])]
initial_pos['cov'] = ift.from_global_data(fullcovariance_model.domain['cov'], diag_tri)
initial_pos['mea'] = Flat(initial_mean)
# return initial_pos
initial_pos = ift.MultiField.from_dict(initial_pos)
return fullcovariance_model, fullcovariance_entropy, initial_pos, cov, mea
import nifty7 as ift
import numpy as np
m_space = ift.RGSpace([73])
a_space = ift.RGSpace([128])
b_space = ift.RGSpace([128])
o_space = ift.RGSpace([1])
mb_space = ift.MultiDomain.make({'m':[a_space,m_space],'b':[o_space,a_space]})
mb = ift.from_random(mb_space)
matop = ift.MultiLinearEinsum(mb_space,'ij,ki->jk',key_order=('m','b'))
print(matop(mb).shape)
class MultifieldFlattener(ift.LinearOperator):
def __init__(self, domain):
self._dof = domain.size
self._domain = domain
self._target = ift.DomainTuple.make(ift.UnstructuredDomain(self._dof))
self._capability = self.TIMES | self.ADJOINT_TIMES
def _flatten(self, x):
result = np.empty(self.target.shape)
runner = 0
for key in self.domain.keys():
dom_size = x[key].domain.size
result[runner:runner+dom_size] = x[key].val.flatten()
runner += dom_size
return result
def _restructure(self, x):
runner = 0
unstructured = x.val
result = {}
for key in self.domain.keys():
subdom = self.domain[key]
dom_size = subdom.size
subfield = unstructured[runner:runner+dom_size].reshape(subdom.shape)
subdict = {key:ift.makeField(subdom,subfield)}
result = {**result,**subdict}
runner += dom_size
return result
def apply(self, x, mode):
self._check_mode(mode)
if mode == self.TIMES:
return ift.makeField(self.target,self._flatten(x))
return ift.MultiField.from_dict(self._restructure(x))
def _get_lo_hi(comm, n_samples):
ntask, rank, _ = ift.utilities.get_MPI_params_from_comm(comm)
return ift.utilities.shareRange(n_samples, ntask, rank)
class ParametricGaussianKL(ift.Energy):
"""Provides the sampled Kullback-Leibler divergence between a distribution
and a Parametric Gaussian.
Notes
-----
See also
"""
def __init__(self, variational_parameters, hamiltonian, variational_model,
n_samples, mirror_samples, comm,
local_samples, nanisinf, _callingfrommake=False):
if not _callingfrommake:
raise NotImplementedError
super(ParametricGaussianKL, self).__init__(variational_parameters)
assert variational_model.generator.target is hamiltonian.domain
self._hamiltonian = hamiltonian
self._variational_model = variational_model
self._full_model = hamiltonian(variational_model.generator) + variational_model.entropy
self._n_samples = int(n_samples)
self._mirror_samples = bool(mirror_samples)
self._comm = comm
self._local_samples = local_samples
self._nanisinf = bool(nanisinf)
lin = ift.Linearization.make_partial_var(variational_parameters, ['latent'])
v, g = [], []
for s in self._local_samples:
# s = _modify_sample_domain(s, variational_parameters.domain)
tmp = self._full_model(lin+s)
tv = tmp.val.val
tg = tmp.gradient
if mirror_samples:
tmp = self._full_model(lin-s)
tv = tv + tmp.val.val
tg = tg + tmp.gradient
v.append(tv)
g.append(tg)
self._val = ift.utilities.allreduce_sum(v, self._comm)[()]/self.n_eff_samples
if np.isnan(self._val) and self._nanisinf: