Commit 147c9fda authored by Philipp Arras's avatar Philipp Arras
Browse files

Refactor parametric VI

I had to remove some documentation since it was only copied from another
class and did not fit to the class at hand.
parent 1c4f555b
......@@ -147,3 +147,8 @@ run_visual_mgvi:
stage: demo_runs
script:
- python3 demos/mgvi_visualized.py
run_meanfield:
stage: demo_runs
script:
- python3 demos/meanfield_inference.py
import nifty7 as ift
from matplotlib import pyplot as plt
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
......@@ -13,35 +11,23 @@ from matplotlib import pyplot as plt
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
# Copyright(C) 2013-2020 Max-Planck-Society
# Copyright(C) 2013-2021 Max-Planck-Society
#
# NIFTy is being developed at the Max-Planck-Institut fuer Astrophysik.
###############################################################################
# Log-normal field reconstruction from Poissonian data with inhomogenous
# exposure (in case for 2D mode)
# 1D (set mode=0), 2D (mode=1), or on the sphere (mode=2)
# FIXME Short text what this demo does
#
#
###############################################################################
import sys
import numpy as np
# def exposure_2d(domain):
# # Structured exposure for 2D mode
# x_shape, y_shape = domain.shape
# exposure = np.ones(domain.shape)
# exposure[x_shape//3:x_shape//2, :] *= 2.
# exposure[x_shape*4//5:x_shape, :] *= .1
# exposure[x_shape//2:x_shape*3//2, :] *= 3.
# exposure[:, x_shape//3:x_shape//2] *= 2.
# exposure[:, x_shape*4//5:x_shape] *= .1
# exposure[:, x_shape//2:x_shape*3//2] *= 3.
# return ift.Field.from_raw(domain, exposure)
import nifty7 as ift
from matplotlib import pyplot as plt
if __name__ == '__main__':
if __name__ == "__main__":
# Two-dimensional regular grid with inhomogeneous exposure
position_space = ift.RGSpace([100])
......@@ -55,7 +41,7 @@ if __name__ == '__main__':
# Define amplitude (square root of power spectrum)
def sqrtpspec(k):
return 1./(1. + k**2)
return 1.0 / (1.0 + k ** 2)
p_space = ift.PowerSpace(harmonic_space)
pd = ift.PowerDistributor(harmonic_space, p_space)
......@@ -63,7 +49,7 @@ if __name__ == '__main__':
A = pd(a)
# Define sky operator
sky = 10*ift.exp(HT(ift.makeOp(A))).ducktape('xi')
sky = 10 * ift.exp(HT(ift.makeOp(A))).ducktape("xi")
# M = ift.DiagonalOperator(exposure)
GR = ift.GeometryRemover(position_space)
......@@ -74,7 +60,7 @@ if __name__ == '__main__':
# Generate mock data and define likelihood operator
d_space = R.target[0]
lamb = R(sky)
mock_position = ift.from_random(sky.domain, 'normal')
mock_position = ift.from_random(sky.domain, "normal")
data = lamb(mock_position)
data = ift.random.current_rng().poisson(data.val.astype(np.float64))
data = ift.Field.from_raw(d_space, data)
......@@ -82,42 +68,47 @@ if __name__ == '__main__':
# Settings for minimization
ic_newton = ift.DeltaEnergyController(
name='Newton', iteration_limit=1, tol_rel_deltaE=1e-8)
# minimizer = ift.L_BFGS(ic_newton)
name="Newton", iteration_limit=1, tol_rel_deltaE=1e-8
)
minimizer_fc = ift.ADVIOptimizer(steps=10)
minimizer_mf = ift.ADVIOptimizer(steps=10)
# Compute MAP solution by minimizing the information Hamiltonian
H = ift.StandardHamiltonian(likelihood)
# initial_position = ift.from_random(domain, 'normal')
# meanfield_model = ift.MeanfieldModel(H.domain)
fullcov_model = ift.FullCovarianceModel(H.domain)
meanfield_model = ift.MeanfieldModel(H.domain)
position_fc = fullcov_model.get_initial_pos(initial_sig=0.01)
position_mf = meanfield_model.get_initial_pos(initial_sig=0.01)
KL_fc = ift.ParametricGaussianKL.make(position_fc,H,fullcov_model,3,True)
KL_mf = ift.ParametricGaussianKL.make(position_mf,H,meanfield_model,3,True)
KL_fc = ift.ParametricGaussianKL.make(position_fc, H, fullcov_model, 3, True)
KL_mf = ift.ParametricGaussianKL.make(position_mf, H, meanfield_model, 3, True)
# plt.figure('data')
# plt.imshow(sky(mock_position).val)
plt.pause(0.001)
for i in range(3000):
# KL = ParametricGaussianKL.make(position,H,meanfield_model,3,True)
for i in range(25):
KL_fc, _ = minimizer_fc(KL_fc)
KL_mf, _ = minimizer_mf(KL_mf)
plt.figure('result')
plt.figure("result")
plt.cla()
plt.plot(sky(fullcov_model.generator(KL_fc.position)).val,'b-',label='fc')
plt.plot(sky(meanfield_model.generator(KL_mf.position)).val,'r-',label='mf')
plt.plot(
sky(fullcov_model.generator(KL_fc.position)).val,
"b-",
label="Full covariance",
)
plt.plot(
sky(meanfield_model.generator(KL_mf.position)).val, "r-", label="Mean field"
)
for samp in KL_fc.samples:
plt.plot(sky(fullcov_model.generator(KL_fc.position + samp)).val,'b-',alpha=0.3)
for samp in KL_mf.samples:
plt.plot(sky(meanfield_model.generator(KL_mf.position + samp)).val,'r-',alpha=0.3)
plt.plot(data.val,'kx')
plt.plot(sky(mock_position).val,'k-',label='true')
plt.plot(
sky(fullcov_model.generator(KL_fc.position + samp)).val, "b-", alpha=0.3
)
for samp in KL_mf.samples:
plt.plot(
sky(meanfield_model.generator(KL_mf.position + samp)).val,
"r-",
alpha=0.3,
)
plt.plot(data.val, "kx")
plt.plot(sky(mock_position).val, "k-", label="Ground truth")
plt.legend()
plt.ylim(0,data.val.max()+10)
plt.pause(0.001)
\ No newline at end of file
plt.ylim(0, data.val.max() + 10)
plt.pause(0.001)
......@@ -61,18 +61,18 @@ def main():
return lh + prior
z = np.exp(-1*np_ham(xx, yy))
# plt.plot(y, np.sum(z, axis=0))
# plt.xlabel('y')
# plt.ylabel('unnormalized pdf')
# plt.title('Marginal density')
# plt.pause(2.0)
# plt.close()
# plt.plot(x*scale, np.sum(z, axis=1))
# plt.xlabel('x')
# plt.ylabel('unnormalized pdf')
# plt.title('Marginal density')
# plt.pause(2.0)
# plt.close()
plt.plot(y, np.sum(z, axis=0))
plt.xlabel('y')
plt.ylabel('unnormalized pdf')
plt.title('Marginal density')
plt.pause(2.0)
plt.close()
plt.plot(x*scale, np.sum(z, axis=1))
plt.xlabel('x')
plt.ylabel('unnormalized pdf')
plt.title('Marginal density')
plt.pause(2.0)
plt.close()
pos = ift.from_random(ham.domain, 'normal')
MAP = ift.EnergyAdapter(pos, ham, want_metric=True)
......@@ -81,7 +81,7 @@ def main():
minimizer_mf = ift.ADVIOptimizer(10)
MAP, _ = minimizer(MAP)
map_xs, map_ys = [], []
for ii in range(20):
for ii in range(10):
samp = (MAP.metric.draw_sample(from_inverse=True) + MAP.position).val
map_xs.append(samp['a'])
map_ys.append(samp['b'])
......@@ -91,42 +91,50 @@ def main():
pos = ift.from_random(ham.domain, 'normal')
mf_model = ift.MeanfieldModel(ham.domain)
pos_mf = mf_model.get_initial_pos(initial_mean=pos)
mfkl = ift.ParametricGaussianKL.make(pos_mf,ham,mf_model,20,True)
mfkl = ift.ParametricGaussianKL.make(pos_mf, ham, mf_model, 20, True)
plt.figure(figsize=[12, 8])
for ii in range(300):
if ii % 1 == 0:
mgkl = ift.MetricGaussianKL.make(pos, ham, 20, True)
for ii in range(15):
if ii % 3 == 0:
mgkl = ift.MetricGaussianKL.make(pos, ham, 40, False)
plt.cla()
plt.imshow(z.T, origin='lower', norm=LogNorm(), vmin=1e-3,
vmax=np.max(z), cmap='gist_earth_r',
extent=x_limits_scaled + y_limits)
plt.imshow(
z.T,
origin="lower",
norm=LogNorm(vmin=1e-3, vmax=np.max(z)),
cmap="gist_earth_r",
extent=x_limits_scaled + y_limits,
)
if ii == 0:
cbar = plt.colorbar()
cbar.ax.set_ylabel('pdf')
plt.scatter(np.array(map_xs)*scale, np.array(map_ys),
label='Laplace samples')
plt.scatter(MAP.position.val['a']*scale, MAP.position.val['b'],
label='Maximum a posterior solution')
xs, ys = [], []
for samp in mgkl.samples:
samp = (samp + pos).val
xs.append(samp['a'])
ys.append(samp['b'])
xxs, yys = [], []
plt.scatter(np.array(xs)*scale, np.array(ys), label='MGVI samples')
plt.scatter(pos.val['a']*scale, pos.val['b'], marker="x", label='MGVI latent mean')
xs, ys = [], []
for samp in mfkl.samples:
samp = mf_model.generator((samp + mfkl.position)).val
xxs.append(samp['a'])
yys.append(samp['b'])
plt.scatter(np.array(xs)*scale, np.array(ys), label='MGVI samples')
plt.scatter(np.array(xxs)*scale, np.array(yys), label='MFVI samples')
xs.append(samp['a'])
ys.append(samp['b'])
plt.scatter(np.array(xs)*scale, np.array(ys), label='MFVI samples')
plt.scatter(pos.val['a']*scale, pos.val['b'], label='MGVI latent mean')
plt.scatter(np.array(map_xs)*scale, np.array(map_ys),
label='Laplace samples')
plt.scatter(MAP.position.val['a']*scale, MAP.position.val['b'],
label='Maximum a posterior solution')
plt.legend()
plt.ylim(-4,4)
plt.xlim(-8,8)
plt.legend(loc="upper right")
plt.xlim(-8, 8)
plt.ylim(-4, 4)
plt.draw()
plt.pause(0.01)
plt.pause(1.0)
mgkl, _ = minimizer(mgkl)
mfkl, _ = minimizer_mf(mfkl)
......
......@@ -53,7 +53,8 @@ from .operators.energy_operators import (
Squared2NormOperator, StudentTEnergy, VariableCovarianceGaussianEnergy)
from .operators.convolution_operators import FuncConvolutionOperator
from .operators.normal_operators import NormalTransform, LognormalTransform
from .operators.multifield_flattener import MultifieldFlattener
from .operators.multifield2vector import Multifield2Vector
from .probing import probe_with_posterior_samples, probe_diagonal, \
StatCalculator, approximation2endo
......@@ -72,7 +73,7 @@ from .minimization.scipy_minimizer import L_BFGS_B
from .minimization.energy import Energy
from .minimization.quadratic_energy import QuadraticEnergy
from .minimization.energy_adapter import EnergyAdapter
from .minimization.gaussian_kl import MetricGaussianKL, ParametricGaussianKL
from .minimization.metric_gaussian_kl import MetricGaussianKL, ParametricGaussianKL
from .sugar import *
......
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
# Copyright(C) 2013-2021 Max-Planck-Society
#
# NIFTy is being developed at the Max-Planck-Institut fuer Astrophysik.
import numpy as np
from ..operators.multifield_flattener import MultifieldFlattener
from ..operators.simple_linear_operators import FieldAdapter, PartialExtractor
from ..operators.energy_operators import EnergyOperator
from ..operators.sandwich_operator import SandwichOperator
from ..operators.linear_operator import LinearOperator
from ..operators.einsum import MultiLinearEinsum
from ..sugar import full, from_random, makeField, domain_union
from ..linearization import Linearization
from ..field import Field
from ..multi_field import MultiField
from ..domain_tuple import DomainTuple
from ..domains.unstructured_domain import UnstructuredDomain
from ..field import Field
from ..linearization import Linearization
from ..multi_domain import MultiDomain
from ..multi_field import MultiField
from ..operators.einsum import MultiLinearEinsum
from ..operators.energy_operators import EnergyOperator
from ..operators.linear_operator import LinearOperator
from ..operators.multifield2vector import Multifield2Vector
from ..operators.sandwich_operator import SandwichOperator
from ..operators.simple_linear_operators import FieldAdapter, PartialExtractor
from ..sugar import domain_union, from_random, full, makeField
class MeanfieldModel():
def __init__(self, domain):
self.domain = domain
self.Flat = MultifieldFlattener(self.domain)
self.domain = MultiDomain.make(domain)
self.Flat = Multifield2Vector(self.domain)
self.std = FieldAdapter(self.Flat.target,'var').absolute()
self.latent = FieldAdapter(self.Flat.target,'latent')
......@@ -34,10 +54,11 @@ class MeanfieldModel():
initial_pos['mean'] = self.Flat(initial_mean)
return MultiField.from_dict(initial_pos)
class FullCovarianceModel():
def __init__(self, domain):
self.domain = domain
self.Flat = MultifieldFlattener(self.domain)
self.domain = MultiDomain.make(domain)
self.Flat = Multifield2Vector(self.domain)
one_space = UnstructuredDomain(1)
self.flat_domain = self.Flat.target[0]
N_tri = self.flat_domain.shape[0]*(self.flat_domain.shape[0]+1)//2
......@@ -64,10 +85,10 @@ class FullCovarianceModel():
diag_cov = Diag(cov).absolute()
self.entropy = GaussianEntropy(diag_cov.target) @ diag_cov
def get_initial_pos(self, initial_mean = None, initial_sig = 1):
def get_initial_pos(self, initial_mean=None, initial_sig=1):
initial_pos = from_random(self.generator.domain).to_dict()
initial_pos['latent'] = full(self.generator.domain['latent'], 0.)
diag_tri = np.diag(np.full(self.flat_domain.shape[0],initial_sig))[np.tril_indices(self.flat_domain.shape[0])]
diag_tri = np.diag(np.full(self.flat_domain.shape[0], initial_sig))[np.tril_indices(self.flat_domain.shape[0])]
initial_pos['cov'] = makeField(self.generator.domain['cov'], diag_tri)
if initial_mean is None:
initial_mean = 0.1*from_random(self.generator.target)
......@@ -75,59 +96,59 @@ class FullCovarianceModel():
return MultiField.from_dict(initial_pos)
class GaussianEntropy(EnergyOperator):
def __init__(self, domain):
self._domain = domain
def apply(self, x):
self._check_input(x)
res = -0.5* (2*np.pi*np.e*x**2).log().sum()
res = -0.5*(2*np.pi*np.e*x**2).log().sum()
if not isinstance(x, Linearization):
return Field.scalar(res)
if not x.want_metric:
return res
return res.add_metric(SandwichOperator.make(res.jac)) #FIXME not sure about metric
# FIXME not sure about metric
return res.add_metric(SandwichOperator.make(res.jac))
class LowerTriangularProjector(LinearOperator):
def __init__(self, domain, target):
self._domain = domain
self._target = target
self._indices=np.tril_indices(target.shape[0])
self._domain = DomainTuple.make(domain)
self._target = DomainTuple.make(target)
self._indices = np.tril_indices(target.shape[0])
self._capability = self.TIMES | self.ADJOINT_TIMES
def apply(self, x, mode):
self._check_mode(mode)
self._check_input(x, mode)
x = x.val
if mode == self.TIMES:
mat = np.zeros(self._target.shape)
mat[self._indices] = x.val
return makeField(self._target,mat)
return makeField(self._domain, x.val[self._indices].reshape(self._domain.shape))
res = np.zeros(self._target.shape)
res[self._indices] = x
else:
res = x[self._indices].reshape(self._domain.shape)
return makeField(self._tgt(mode), res)
class DiagonalSelector(LinearOperator):
def __init__(self, domain, target):
self._domain = domain
self._target = target
self._domain = DomainTuple.make(domain)
self._target = DomainTuple.make(target)
self._capability = self.TIMES | self.ADJOINT_TIMES
def apply(self, x, mode):
self._check_mode(mode)
if mode == self.TIMES:
result = np.diag(x.val)
return makeField(self._target,result)
return makeField(self._domain,np.diag(x.val).reshape(self._domain.shape))
self._check_input(x, mode)
x = np.diag(x.val)
if mode == self.ADJOINT_TIMES:
x = x.reshape(self._domain.shape)
return makeField(self._tgt(mode), x)
class Respacer(LinearOperator):
def __init__(self, domain, target):
self._domain = domain
self._target = target
self._domain = DomainTuple.make(domain)
self._target = DomainTuple.make(target)
self._capability = self.TIMES | self.ADJOINT_TIMES
def apply(self,x,mode):
self._check_mode(mode)
if mode == self.TIMES:
return makeField(self._target,x.val.reshape(self._target.shape))
return makeField(self._domain,x.val.reshape(self._domain.shape))
def apply(self, x, mode):
self._check_input(x, mode)
return makeField(self._tgt(mode), x.val.reshape(self._tgt(mode).shape))
......@@ -19,18 +19,15 @@ import numpy as np
from .. import random, utilities
from ..linearization import Linearization
from ..logger import logger
from ..multi_field import MultiField
from ..operators.endomorphic_operator import EndomorphicOperator
from ..operators.energy_operators import StandardHamiltonian
from ..operators.multifield_flattener import MultifieldFlattener
from ..probing import approximation2endo
from ..sugar import makeOp, full, from_random
from ..sugar import from_random, full, makeOp
from ..utilities import myassert
from .energy import Energy
class _KLMetric(EndomorphicOperator):
def __init__(self, KL):
self._KL = KL
......@@ -263,24 +260,24 @@ class MetricGaussianKL(Energy):
class ParametricGaussianKL(Energy):
"""Provides the sampled Kullback-Leibler divergence between a distribution
"""Provide the sampled Kullback-Leibler divergence between a distribution
and a Parametric Gaussian.
Notes
-----
See also
FIXME
"""
def __init__(self, variational_parameters, hamiltonian, variational_model,
n_samples, mirror_samples, comm,
local_samples, nanisinf, _callingfrommake=False):
def __init__(self, variational_parameters, hamiltonian, variational_model,
n_samples, mirror_samples, comm, local_samples, nanisinf,
_callingfrommake=False):
if not _callingfrommake:
raise NotImplementedError
super(ParametricGaussianKL, self).__init__(variational_parameters)
assert variational_model.generator.target is hamiltonian.domain
self._hamiltonian = hamiltonian
self._variational_model = variational_model
self._full_model = hamiltonian(variational_model.generator) + variational_model.entropy
self._full_model = (
hamiltonian(variational_model.generator) + variational_model.entropy
)
self._n_samples = int(n_samples)
self._mirror_samples = bool(mirror_samples)
......@@ -288,64 +285,28 @@ class ParametricGaussianKL(Energy):
self._local_samples = local_samples
self._nanisinf = bool(nanisinf)
lin = Linearization.make_partial_var(variational_parameters, ['latent'])
lin = Linearization.make_partial_var(variational_parameters, ["latent"])
v, g = [], []
for s in self._local_samples:
# s = _modify_sample_domain(s, variational_parameters.domain)
tmp = self._full_model(lin+s)
tmp = self._full_model(lin + s)
tv = tmp.val.val
tg = tmp.gradient
if mirror_samples:
tmp = self._full_model(lin-s)
tmp = self._full_model(lin - s)
tv = tv + tmp.val.val
tg = tg + tmp.gradient
v.append(tv)
g.append(tg)
self._val = utilities.allreduce_sum(v, self._comm)[()]/self.n_eff_samples
self._val = utilities.allreduce_sum(v, self._comm)[()] / self.n_eff_samples
if np.isnan(self._val) and self._nanisinf:
self._val = np.inf
self._grad = utilities.allreduce_sum(g, self._comm)/self.n_eff_samples
self._grad = utilities.allreduce_sum(g, self._comm) / self.n_eff_samples
@staticmethod
def make(variational_parameters, hamiltonian, variational_model, n_samples, mirror_samples,
comm=None, nanisinf=False):
"""Return instance of :class:`MetricGaussianKL`.
Parameters
----------
mean : Field
Mean of the Gaussian probability distribution.
hamiltonian : StandardHamiltonian
Hamiltonian of the approximated probability distribution.
n_samples : integer
Number of samples used to stochastically estimate the KL.
mirror_samples : boolean
Whether the negative of the drawn samples are also used, as they are
equally legitimate samples. If true, the number of used samples
doubles. Mirroring samples stabilizes the KL estimate as extreme
sample variation is counterbalanced. Since it improves stability in
many cases, it is recommended to set `mirror_samples` to `True`.
constants : list
List of parameter keys that are kept constant during optimization.
Default is no constants.
napprox : int
Number of samples for computing preconditioner for sampling. No
preconditioning is done by default.
comm : MPI communicator or None
If not None, samples will be distributed as evenly as possible
across this communicator. If `mirror_samples` is set, then a sample and
its mirror image will always reside on the same task.
nanisinf : bool
If true, nan energies which can happen due to overflows in the forward
model are interpreted as inf. Thereby, the code does not crash on
these occaisions but rather the minimizer is told that the position it
has tried is not sensible.
Note
----
The two lists `constants` and `point_estimates` are independent from each
other. It is possible to sample along domains which are kept constant
during minimization and vice versa.
def make(variational_parameters, hamiltonian, variational_model, n_samples,
mirror_samples, comm=None, nanisinf=False):
"""FIXME
"""
if not isinstance(hamiltonian, StandardHamiltonian):
......@@ -362,31 +323,50 @@ class ParametricGaussianKL(Energy):
local_samples = []
sseq = random.spawn_sseq(n_samples)
#FIXME dirty trick, many multiplications with zero
DirtyMaskDict = full(variational_model.generator.domain,0.).to_dict()
DirtyMaskDict['latent'] = full(variational_model.generator.domain['latent'], 1.)
# FIXME dirty trick, many multiplications with zero
DirtyMaskDict = full(variational_model.generator.domain, 0.0).to_dict()
DirtyMaskDict["latent"] = full(
variational_model.generator.domain["latent"], 1.0
)
DirtyMask = MultiField.from_dict(DirtyMaskDict)
for i in range(*_get_lo_hi(comm, n_samples)):
with random.Context(sseq[i]):
local_samples.append(DirtyMask * from_random(variational_model.generator.domain))
local_samples.append(
DirtyMask * from_random(variational_model.generator.domain)