diff git a/demos/bernoulli_demo.py b/demos/bernoulli_demo.py
index 27bb850e26add5310c328493e090a44f7936cf4e..77c4f975e9bbc5e75ee2b0bea606d5d312203c46 100644
 a/demos/bernoulli_demo.py
+++ b/demos/bernoulli_demo.py
@@ 74,7 +74,7 @@ if __name__ == '__main__':
ic_sampling = ift.GradientNormController(iteration_limit=100)
# Minimize the Hamiltonian
 H = ift.Hamiltonian(likelihood, ic_sampling)
+ H = ift.StandardHamiltonian(likelihood, ic_sampling)
H = ift.EnergyAdapter(position, H, want_metric=True)
# minimizer = ift.L_BFGS(ic_newton)
H, convergence = minimizer(H)
diff git a/demos/getting_started_2.py b/demos/getting_started_2.py
index ddb85b65aa686749973a859b56887e553e0dd00f..3b087170a3f44e03925c692406803c1e6b9fa753 100644
 a/demos/getting_started_2.py
+++ b/demos/getting_started_2.py
@@ 99,7 +99,7 @@ if __name__ == '__main__':
minimizer = ift.NewtonCG(ic_newton)
# Compute MAP solution by minimizing the information Hamiltonian
 H = ift.Hamiltonian(likelihood)
+ H = ift.StandardHamiltonian(likelihood)
initial_position = ift.from_random('normal', domain)
H = ift.EnergyAdapter(initial_position, H, want_metric=True)
H, convergence = minimizer(H)
diff git a/demos/getting_started_3.py b/demos/getting_started_3.py
index 9ca2994970f3c825f232f3457c9ce2216a9f0a11..a292f8754ef68cc43d5fbf2b4995d0964775746e 100644
 a/demos/getting_started_3.py
+++ b/demos/getting_started_3.py
@@ 100,10 +100,10 @@ if __name__ == '__main__':
# Set up likelihood and information Hamiltonian
likelihood = ift.GaussianEnergy(mean=data, covariance=N)(signal_response)
 H = ift.Hamiltonian(likelihood, ic_sampling)
+ H = ift.StandardHamiltonian(likelihood, ic_sampling)
 initial_position = ift.MultiField.full(H.domain, 0.)
 position = initial_position
+ initial_mean = ift.MultiField.full(H.domain, 0.)
+ mean = initial_mean
plot = ift.Plot()
plot.add(signal(mock_position), title='Ground Truth')
@@ 117,9 +117,9 @@ if __name__ == '__main__':
# Draw new samples to approximate the KL five times
for i in range(5):
# Draw new samples and minimize KL
 KL = ift.KL_Energy(position, H, N_samples)
+ KL = ift.MetricGaussianKL(mean, H, N_samples)
KL, convergence = minimizer(KL)
 position = KL.position
+ mean = KL.position
# Plot current reconstruction
plot = ift.Plot()
@@ 128,7 +128,7 @@ if __name__ == '__main__':
plot.output(ny=1, ysize=6, xsize=16, name="loop{:02}.png".format(i))
# Draw posterior samples
 KL = ift.KL_Energy(position, H, N_samples)
+ KL = ift.MetricGaussianKL(mean, H, N_samples)
sc = ift.StatCalculator()
for sample in KL.samples:
sc.add(signal(sample + KL.position))
diff git a/demos/polynomial_fit.py b/demos/polynomial_fit.py
index 8e71d06f5c9d45558982bd30aecdadef5efc8796..68a6c677dd6baaeb8cb456da5cd219a63ee6da36 100644
 a/demos/polynomial_fit.py
+++ b/demos/polynomial_fit.py
@@ 103,7 +103,7 @@ N = ift.DiagonalOperator(ift.from_global_data(d_space, var))
IC = ift.DeltaEnergyController(tol_rel_deltaE=1e12, iteration_limit=200)
likelihood = ift.GaussianEnergy(d, N)(R)
Ham = ift.Hamiltonian(likelihood, IC)
+Ham = ift.StandardHamiltonian(likelihood, IC)
H = ift.EnergyAdapter(params, Ham, want_metric=True)
# Minimize
diff git a/docs/generate.sh b/docs/generate.sh
index 2316da20cc983892835e1a97d87640ef35f72dec..d8da633f2962fd07c431932c16c8431378888817 100755
 a/docs/generate.sh
+++ b/docs/generate.sh
@@ 1,2 +1,3 @@
+# rm rf docs/build docs/source/mod
sphinxapidoc e o docs/source/mod nifty5
sphinxbuild b html docs/source/ docs/build/
diff git a/docs/source/ift.rst b/docs/source/ift.rst
index c94fdb51950978a11678d776687436553f3b9d0a..f778074ca35d4801a3b690d64db381d5f2b24574 100644
 a/docs/source/ift.rst
+++ b/docs/source/ift.rst
@@ 5,11 +5,20 @@ Theoretical Background

`Information Field Theory `_ [1]_ (IFT) is information theory, the logic of reasoning under uncertainty, applied to fields. A field can be any quantity defined over some space, e.g. the air temperature over Europe, the magnetic field strength in the Milky Way, or the matter density in the Universe. IFT describes how data and knowledge can be used to infer field properties. Mathematically it is a statistical field theory and exploits many of the tools developed for such. Practically, it is a framework for signal processing and image reconstruction.
+`Information Field Theory `_ [1]_ (IFT) is information theory, the logic of reasoning under uncertainty, applied to fields.
+A field can be any quantity defined over some space, e.g. the air temperature over Europe, the magnetic field strength in the Milky Way, or the matter density in the Universe.
+IFT describes how data and knowledge can be used to infer field properties.
+Mathematically it is a statistical field theory and exploits many of the tools developed for such.
+Practically, it is a framework for signal processing and image reconstruction.
IFT is fully Bayesian. How else could infinitely many field degrees of freedom be constrained by finite data?
+IFT is fully Bayesian.
+How else could infinitely many field degrees of freedom be constrained by finite data?
There is a full toolbox of methods that can be used, like the classical approximation (= Maximum a posteriori = MAP), effective action (= Variational Bayes = VI), Feynman diagrams, renormalization, and more. IFT reproduces many known well working algorithms. This should be reassuring. Also, there were certainly previous works in a similar spirit. Anyhow, in many cases IFT provides novel rigorous ways to extract information from data. NIFTy comes with reimplemented MAP and VI estimators. It also provides a Hamiltonian Monte Carlo sampler for Fields (HMCF). (*FIXME* does it?)
+There is a full toolbox of methods that can be used, like the classical approximation (= Maximum a posteriori = MAP), effective action (= Variational Bayes = VI), Feynman diagrams, renormalization, and more.
+IFT reproduces many known well working algorithms, which is reassuring.
+Also, there were certainly previous works in a similar spirit.
+Anyhow, in many cases IFT provides novel rigorous ways to extract information from data.
+NIFTy comes with reimplemented MAP and VI estimators.
.. tip:: *Inanutshell introductions to information field theory* can be found in [2]_, [3]_, [4]_, and [5]_, with the latter probably being the most didactical.
@@ 27,7 +36,8 @@ There is a full toolbox of methods that can be used, like the classical approxim
Discretized continuum

The representation of fields that are mathematically defined on a continuous space in a finite computer environment is a common necessity. The goal hereby is to preserve the continuum limit in the calculus in order to ensure a resolution independent discretization.
+The representation of fields that are mathematically defined on a continuous space in a finite computer environment is a common necessity.
+The goal hereby is to preserve the continuum limit in the calculus in order to ensure a resolution independent discretization.
+++
 .. image:: images/42vs6.png  .. image:: images/42vs9.png 
@@ 43,7 +53,8 @@ Any partition of the continuous position space :math:`\Omega` (with volume :math
V &\quad=\quad \int_\Omega \mathrm{d}x \quad=\quad \sum_{q=1}^Q \int_{\Omega_q} \mathrm{d}x \quad=\quad \sum_{q=1}^Q V_q
.
Here the number :math:`Q` characterizes the resolution of the pixelization and the continuum limit is described by :math:`Q \rightarrow \infty` and :math:`V_q \rightarrow 0` for all :math:`q \in \{1,\dots,Q\}` simultaneously. Moreover, the above equation defines a discretization of continuous integrals, :math:`\int_\Omega \mathrm{d}x \mapsto \sum_q V_q`.
+Here the number :math:`Q` characterizes the resolution of the pixelization and the continuum limit is described by :math:`Q \rightarrow \infty` and :math:`V_q \rightarrow 0` for all :math:`q \in \{1,\dots,Q\}` simultaneously.
+Moreover, the above equation defines a discretization of continuous integrals, :math:`\int_\Omega \mathrm{d}x \mapsto \sum_q V_q`.
Any valid discretization scheme for a field :math:`{s}` can be described by a mapping,
@@ 52,39 +63,48 @@ Any valid discretization scheme for a field :math:`{s}` can be described by a ma
s(x \in \Omega_q) \quad\mapsto\quad s_q \quad=\quad \int_{\Omega_q} \mathrm{d}x \; w_q(x) \; s(x)
,
if the weighting function :math:`w_q(x)` is chosen appropriately. In order for the discretized version of the field to converge to the actual field in the continuum limit, the weighting functions need to be normalized in each subset; i.e., :math:`\forall q: \int_{\Omega_q} \mathrm{d}x \; w_q(x) = 1`. Choosing such a weighting function that is constant with respect to :math:`x` yields
+if the weighting function :math:`w_q(x)` is chosen appropriately.
+In order for the discretized version of the field to converge to the actual field in the continuum limit, the weighting functions need to be normalized in each subset; i.e., :math:`\forall q: \int_{\Omega_q} \mathrm{d}x \; w_q(x) = 1`.
+Choosing such a weighting function that is constant with respect to :math:`x` yields
.. math::
s_q = \frac{\int_{\Omega_q} \mathrm{d}x \; s(x)}{\int_{\Omega_q} \mathrm{d}x} = \left< s(x) \right>_{\Omega_q}
,
which corresponds to a discretization of the field by spatial averaging. Another common and equally valid choice is :math:`w_q(x) = \delta(xx_q)`, which distinguishes some position :math:`x_q \in \Omega_q`, and evaluates the continuous field at this position,
+which corresponds to a discretization of the field by spatial averaging.
+Another common and equally valid choice is :math:`w_q(x) = \delta(xx_q)`, which distinguishes some position :math:`x_q \in \Omega_q`, and evaluates the continuous field at this position,
.. math::
s_q \quad=\quad \int_{\Omega_q} \mathrm{d}x \; \delta(xx_q) \; s(x) \quad=\quad s(x_q)
.
In practice, one often makes use of the spatially averaged pixel position, :math:`x_q = \left< x \right>_{\Omega_q}`. If the resolution is high enough to resolve all features of the signal field :math:`{s}`, both of these discretization schemes approximate each other, :math:`\left< s(x) \right>_{\Omega_q} \approx s(\left< x \right>_{\Omega_q})`, since they approximate the continuum limit by construction. (The approximation of :math:`\left< s(x) \right>_{\Omega_q} \approx s(x_q \in \Omega_q)` marks a resolution threshold beyond which further refinement of the discretization reveals no new features; i.e., no new information content of the field :math:`{s}`.)
+In practice, one often makes use of the spatially averaged pixel position, :math:`x_q = \left< x \right>_{\Omega_q}`.
+If the resolution is high enough to resolve all features of the signal field :math:`{s}`, both of these discretization schemes approximate each other, :math:`\left< s(x) \right>_{\Omega_q} \approx s(\left< x \right>_{\Omega_q})`, since they approximate the continuum limit by construction.
+(The approximation of :math:`\left< s(x) \right>_{\Omega_q} \approx s(x_q \in \Omega_q)` marks a resolution threshold beyond which further refinement of the discretization reveals no new features; i.e., no new information content of the field :math:`{s}`.)
All operations involving position integrals can be normalized in accordance with the above definitions. For example, the scalar product between two fields :math:`{s}` and :math:`{u}` is defined as
+All operations involving position integrals can be normalized in accordance with the above definitions.
+For example, the scalar product between two fields :math:`{s}` and :math:`{u}` is defined as
.. math::
{s}^\dagger {u} \quad=\quad \int_\Omega \mathrm{d}x \; s^*(x) \; u(x) \quad\approx\quad \sum_{q=1}^Q V_q^{\phantom{*}} \; s_q^* \; u_q^{\phantom{*}}
,
where :math:`\dagger` denotes adjunction and :math:`*` complex conjugation. Since the above approximation becomes an equality in the continuum limit, the scalar product is independent of the pixelization scheme and resolution, if the latter is sufficiently high.
+where :math:`\dagger` denotes adjunction and :math:`*` complex conjugation.
+Since the above approximation becomes an equality in the continuum limit, the scalar product is independent of the pixelization scheme and resolution, if the latter is sufficiently high.
The above line of argumentation analogously applies to the discretization of operators. For a linear operator :math:`{A}` acting on some field :math:`{s}` as :math:`{A} {s} = \int_\Omega \mathrm{d}y \; A(x,y) \; s(y)`, a matrix representation discretized with constant weighting functions is given by
+The above line of argumentation analogously applies to the discretization of operators.
+For a linear operator :math:`{A}` acting on some field :math:`{s}` as :math:`{A} {s} = \int_\Omega \mathrm{d}y \; A(x,y) \; s(y)`, a matrix representation discretized with constant weighting functions is given by
.. math::
A(x \in \Omega_p, y \in \Omega_q) \quad\mapsto\quad A_{pq} \quad=\quad \frac{\iint_{\Omega_p \Omega_q} \mathrm{d}x \, \mathrm{d}y \; A(x,y)}{\iint_{\Omega_p \Omega_q} \mathrm{d}x \, \mathrm{d}y} \quad=\quad \big< \big< A(x,y) \big>_{\Omega_p} \big>_{\Omega_q}
.
The proper discretization of spaces, fields, and operators, as well as the normalization of position integrals, is essential for the conservation of the continuum limit. Their consistent implementation in NIFTy allows a pixelization independent coding of algorithms.
+The proper discretization of spaces, fields, and operators, as well as the normalization of position integrals, is essential for the conservation of the continuum limit.
+Their consistent implementation in NIFTy allows a pixelization independent coding of algorithms.
Free Theory & Implicit Operators

@@ 135,18 +155,24 @@ the posterior covariance operator, and
j = R^\dagger N^{1} d
the information source. The operation in :math:`{m = D\,R^\dagger N^{1} d}` is also called the generalized Wiener filter.
+the information source.
+The operation in :math:`{m = D\,R^\dagger N^{1} d}` is also called the generalized Wiener filter.
NIFTy permits to define the involved operators :math:`{R}`, :math:`{R^\dagger}`, :math:`{S}`, and :math:`{N}` implicitly, as routines that can be applied to vectors, but which do not require the explicit storage of the matrix elements of the operators.
Some of these operators are diagonal in harmonic (Fourier) basis, and therefore only require the specification of a (power) spectrum and :math:`{S= F\,\widehat{P_s} F^\dagger}`. Here :math:`{F = \mathrm{HarmonicTransformOperator}}`, :math:`{\widehat{P_s} = \mathrm{DiagonalOperator}(P_s)}`, and :math:`{P_s(k)}` is the power spectrum of the process that generated :math:`{s}` as a function of the (absolute value of the) harmonic (Fourier) space koordinate :math:`{k}`. For those, NIFTy can easily also provide inverse operators, as :math:`{S^{1}= F\,\widehat{\frac{1}{P_s}} F^\dagger}` in case :math:`{F}` is unitary, :math:`{F^\dagger=F^{1}}`.
+Some of these operators are diagonal in harmonic (Fourier) basis, and therefore only require the specification of a (power) spectrum and :math:`{S= F\,\widehat{P_s} F^\dagger}`.
+Here :math:`{F = \mathrm{HarmonicTransformOperator}}`, :math:`{\widehat{P_s} = \mathrm{DiagonalOperator}(P_s)}`, and :math:`{P_s(k)}` is the power spectrum of the process that generated :math:`{s}` as a function of the (absolute value of the) harmonic (Fourier) space coordinate :math:`{k}`.
+For those, NIFTy can easily also provide inverse operators, as :math:`{S^{1}= F\,\widehat{\frac{1}{P_s}} F^\dagger}` in case :math:`{F}` is unitary, :math:`{F^\dagger=F^{1}}`.
These implicit operators can be combined into new operators, e.g. to :math:`{D^{1} = S^{1} + R^\dagger N^{1} R}`, as well as their inverses, e.g. :math:`{D = \left( D^{1} \right)^{1}}`.
The invocation of an inverse operator applied to a vector might trigger the execution of a numerical linear algebra solver.
Thus, when NIFTy calculates :math:`{m = D\, j}` it actually solves :math:`{D^{1} m = j}` for :math:`{m}` behind the scenes. The advantage of implicit operators to explicit matrices is the reduced memory requirements. The reconstruction of only a Megapixel image would otherwithe require the storage and processing of matrices with sizes of several Terrabytes. Larger images could not be dealt with due to the quadratic memory requirements of explicit operator representations.
+Thus, when NIFTy calculates :math:`{m = D\, j}`, it actually solves :math:`{D^{1} m = j}` for :math:`{m}` behind the scenes.
+The advantage of implicit operators to explicit matrices is the reduced memory requirements.
+The reconstruction of only a Megapixel image would otherwithe require the storage and processing of matrices with sizes of several Terabytes.
+Larger images could not be dealt with due to the quadratic memory requirements of explicit operator representations.
The demo codes demos/getting_started_1.py and demos/Wiener_Filter.ipynb illustrate this.
+The demo codes `demos/getting_started_1.py` and `demos/Wiener_Filter.ipynb` illustrate this.
Generative Models
@@ 164,7 +190,7 @@ Let us rewrite the above free theory as a generative model:
with :math:`{A}` the amplitude operator such that it generates signal field realizations with the correct covariance :math:`{S=A\,A^\dagger}` when being applied to a white Gaussian field :math:`{\xi}` with :math:`{\mathcal{P}(\xi)= \mathcal{G}(\xi, 1)}`.
The joint information Hamiltonian for the whitened signal field :math:`{\xi}` reads:
+The joint information Hamiltonian for the standardized signal field :math:`{\xi}` reads:
.. math::
@@ 172,26 +198,35 @@ The joint information Hamiltonian for the whitened signal field :math:`{\xi}` re
NIFTy takes advantage of this formulation in several ways:
1) All prior degrees of freedom have unit covariance which improves the condition number of operators which need to be inverted.
2) The amplitude operator can be regarded as part of the response, :math:`{R'=R\,A}`. In general, more sophisticated responses can be constructed out of the composition of simpler operators.
3) The response can be nonlinear, e.g. :math:`{R'(s)=R \exp(A\,\xi)}`, see demos/getting_started_2.py.
4) The amplitude operator can be made dependent on unknowns as well, e.g. :math:`A=A(\tau)= F\, \widehat{e^\tau}` represents an amplitude operator with a positive definite, unknown spectrum defined in the Fourier domain. The amplitude field :math:`{\tau}` would get its own amplitude operator, with a cepstrum (spectrum of a log spectrum) defined in quefrency space (harmonic space of a logarithmically binned harmonic space) to regularize its degrees of freedom by imposing some (userdefined degree of) spectral smoothness.
5) NIFTy can calculate the gradient of the information Hamiltonian and the Fisher information metric with respect to all unknown parameters, here :math:`{\xi}` and :math:`{\tau}`, by automatic differentiation. The gradients are used for MAP and HMCF estimates, and the Fisher matrix is required in addition to the gradient by Metric Gaussian Variational Inference (MGVI), which is available in NIFTy as well. MGVI is an implicit operator extension of Automatic Differentiation Variational Inference (ADVI).
+1) All prior degrees of freedom have unit covariance, which improves the condition number of operators that need to be inverted.
The reconstruction of a nonGaussian signal with unknown covariance from a nontrivial (tomographic) response is demonstrated in demos/getting_started_3.py. Here, the uncertainty of the field and the power spectrum of its generating process are probed via posterior samples provided by the MGVI algorithm.
+2) The amplitude operator can be regarded as part of the response, :math:`{R'=R\,A}`.
+ In general, more sophisticated responses can be constructed out of the composition of simpler operators.
+
+3) The response can be nonlinear, e.g. :math:`{R'(s)=R \exp(A\,\xi)}`, see `demos/getting_started_2.py`.
+
+4) The amplitude operator may dependent on further parameters, e.g. :math:`A=A(\tau)= F\, \widehat{e^\tau}` represents an amplitude operator with a positive definite, unknown spectrum defined in the Fourier domain.
+ The amplitude field :math:`{\tau}` would get its own amplitude operator, with a cepstrum (spectrum of a log spectrum) defined in quefrency space (harmonic space of a logarithmically binned harmonic space) to regularize its degrees of freedom by imposing some (userdefined degree of) spectral smoothness.
+
+5) NIFTy calculates the gradient of the information Hamiltonian and the Fisher information metric with respect to all unknown parameters, here :math:`{\xi}` and :math:`{\tau}`, by automatic differentiation.
+ The gradients are used for MAP and HMCF estimates, and the Fisher matrix is required in addition to the gradient by Metric Gaussian Variational Inference (MGVI), which is available in NIFTy as well.
+ MGVI is an implicit operator extension of Automatic Differentiation Variational Inference (ADVI).
+
+The reconstruction of a nonGaussian signal with unknown covariance from a nontrivial (tomographic) response is demonstrated in `demos/getting_started_3.py`.
+Here, the uncertainty of the field and the power spectrum of its generating process are probed via posterior samples provided by the MGVI algorithm.
++
 **Output of tomography demo getting_started_3.py** 
++
 .. image:: images/getting_started_3_setup.png 
 
+ :width: 50 % 
++
 NonGaussian signal field, 
 data backprojected into the image domain, power 
 spectrum of underlying Gausssian process. 
++
 .. image:: images/getting_started_3_results.png 
 
+ :width: 50 % 
++
 Posterior mean field signal 
 reconstruction, its uncertainty, and the power 
@@ 199,3 +234,73 @@ The reconstruction of a nonGaussian signal with unknown covariance from a nont
 samples in comparison to the correct one (thick 
 orange line). 
++
+
+Maximim a Posteriori
+
+
+One popular field estimation method is Maximim a Posteriori (MAP).
+
+It only requires to minimize the information Hamiltonian, e.g by a gradient descent method that stops when
+
+.. math::
+
+ \frac{\partial \mathcal{H}(d,\xi)}{\partial \xi} = 0.
+
+NIFTy5 automatically calculates the necessary gradient from a generative model of the signal and the data and to minimize the Hamiltonian.
+
+However, MAP often provides unsatisfactory results in cases of deep hirachical Bayesian networks.
+The reason for this is that MAP ignores the volume factors in parameter space, which are not to be neglected in deciding whether a solution is reasonable or not.
+In the high dimensional setting of field inference these volume factors can differ by large ratios.
+A MAP estimate, which is only representative for a tiny fraction of the parameter space, might be a poorer choice (with respect to an error norm) compared to a slightly worse location with slightly lower posterior probability, which, however, is associated with a much larger volume (of nearby locations with similar probability).
+
+This causes MAP signal estimates to be more prone to overfitting the noise as well as to perception thresholds than methods that take volume effects into account.
+
+
+Variational Inference
+
+
+One method that takes volume effects into account is Variational Inference (VI).
+In VI, the posterior :math:`\mathcal{P}(\xid)` is approximated by a simpler, parametrized distribution, often a Gaussian :math:`\mathcal{Q}(\xi)=\mathcal{G}(\xim,D)`.
+The parameters of :math:`\mathcal{Q}`, the mean :math:`m` and its covariance :math:`D` are obtained by minimization of an appropriate information distance measure between :math:`\mathcal{Q}` and :math:`\mathcal{P}`.
+As a compromise between being optimal and being computationally affordable, the variational KullbackLeibler (KL) divergence is used:
+
+.. math::
+
+ \mathrm{KL}(m,Dd)= \mathcal{D}_\mathrm{KL}(\mathcal{Q}\mathcal{P})=
+ \int \mathcal{D}\xi \,\mathcal{Q}(\xi) \log \left( \frac{\mathcal{Q}(\xi)}{\mathcal{P}(\xi)} \right)
+
+Minimizing this with respect to all entries of the covariance :math:`D` is unfeasible for fields.
+Therefore, Metric Gaussian Variational Inference (MGVI) approximates the precision matrix at the location of the current mean :math:`M=D^{1}` by the Bayesian Fisher information metric,
+
+.. math::
+
+ M \approx \left\langle \frac{\partial \mathcal{H}(d,\xi)}{\partial \xi} \, \frac{\partial \mathcal{H}(d,\xi)}{\partial \xi}^\dagger \right\rangle_{(d,\xi)}.
+
+In practice the average is performed over :math:`\mathcal{P}(d,\xi)\approx \mathcal{P}(d\xi)\,\delta(\xim)` by evaluating the expression at the current mean :math:`m`.
+This results in a Fisher information metric of the likelihood evaluated at the mean plus the prior information metric.
+Therefore we will only have to infer the mean of the approximate distribution.
+The only term within the KLdivergence that explicitly depends on it is the Hamiltonian of the true problem averaged over the approximation:
+
+.. math::
+
+ \mathrm{KL}(md) \;\widehat{=}\;
+ \left\langle \mathcal{H}(\xi,d) \right\rangle_{\mathcal{Q}(\xi)},
+
+where :math:`\widehat{=}` expresses equality up to irrelvant (here not :math:`m`dependent) terms.
+
+Thus, only the gradient of the KL is needed with respect to this, which can be expressed as
+
+.. math::
+
+ \frac{\partial \mathrm{KL}(md)}{\partial m} = \left\langle \frac{\partial \mathcal{H}(d,\xi)}{\partial \xi} \right\rangle_{\mathcal{G}(\xim,D)}.
+
+We stochastically estimate the KLdivergence and gradients with a set of samples drawn from the approximate posterior distribution.
+The particular structure of the covariance allows us to draw independent samples solving a certain system of equations.
+This KLdivergence for MGVI is implemented in the class MetricGaussianKL within NIFTy5.
+
+
+The demo `getting_started_3.py` for example not only infers a field this way, but also the power spectrum of the process that has generated the field.
+The crosscorrelation of field and power spectrum is taken care of in this process.
+Posterior samples can be obtained to study this crosscorrelation.
+
+It should be noted that MGVI, as any VI method, can typically only provide a lower bound on the variance.
diff git a/nifty5/__init__.py b/nifty5/__init__.py
index d30c927b76c4e2c8e8b9e8e6ff43311414888b24..ab1c25002fdbd3e449ce32a6664535489b769a7a 100644
 a/nifty5/__init__.py
+++ b/nifty5/__init__.py
@@ 19,6 +19,7 @@ from .field import Field
from .multi_field import MultiField
from .operators.operator import Operator
+from .operators.adder import Adder
from .operators.diagonal_operator import DiagonalOperator
from .operators.distributors import DOFDistributor, PowerDistributor
from .operators.domain_tuple_field_inserter import DomainTupleFieldInserter
@@ 33,7 +34,6 @@ from .operators.field_zero_padder import FieldZeroPadder
from .operators.inversion_enabler import InversionEnabler
from .operators.linear_operator import LinearOperator
from .operators.mask_operator import MaskOperator
from .operators.offset_operator import OffsetOperator
from .operators.qht_operator import QHTOperator
from .operators.regridding_operator import RegriddingOperator
from .operators.sampling_enabler import SamplingEnabler
@@ 49,7 +49,7 @@ from .operators.simple_linear_operators import (
from .operators.value_inserter import ValueInserter
from .operators.energy_operators import (
EnergyOperator, GaussianEnergy, PoissonianEnergy, InverseGammaLikelihood,
 BernoulliEnergy, Hamiltonian, AveragedEnergy)
+ BernoulliEnergy, StandardHamiltonian, AveragedEnergy)
from .probing import probe_with_posterior_samples, probe_diagonal, \
StatCalculator
@@ 68,7 +68,7 @@ from .minimization.scipy_minimizer import L_BFGS_B
from .minimization.energy import Energy
from .minimization.quadratic_energy import QuadraticEnergy
from .minimization.energy_adapter import EnergyAdapter
from .minimization.kl_energy import KL_Energy
+from .minimization.metric_gaussian_kl import MetricGaussianKL
from .sugar import *
from .plot import Plot
diff git a/nifty5/library/adjust_variances.py b/nifty5/library/adjust_variances.py
index 3e7cde27fcfdf9a3b15e28aa376044deff076f1f..5034212a992a48f5b17dd2e6e8aaf85f8c77a692 100644
 a/nifty5/library/adjust_variances.py
+++ b/nifty5/library/adjust_variances.py
@@ 16,10 +16,9 @@
# NIFTy is being developed at the MaxPlanckInstitut fuer Astrophysik.
from ..minimization.energy_adapter import EnergyAdapter
from ..multi_domain import MultiDomain
from ..multi_field import MultiField
from ..operators.distributors import PowerDistributor
from ..operators.energy_operators import Hamiltonian, InverseGammaLikelihood
+from ..operators.energy_operators import StandardHamiltonian, InverseGammaLikelihood
from ..operators.scaling_operator import ScalingOperator
from ..operators.simple_linear_operators import ducktape
@@ 35,25 +34,27 @@ def make_adjust_variances(a,
Constructs a Hamiltonian to solve constant likelihood optimizations of the
form phi = a * xi under the constraint that phi remains constant.
+ FIXME xi is white.
+
Parameters

a : Operator
 Operator which gives the amplitude when evaluated at a position
+ Gives the amplitude when evaluated at a position.
xi : Operator
 Operator which gives the excitation when evaluated at a position
+ Gives the excitation when evaluated at a position.
position : Field, MultiField
 Position of the whole problem
+ Position of the entire problem.
samples : Field, MultiField
 Residual samples of the whole problem
+ Residual samples of the whole problem.
scaling : Float
 Optional rescaling of the Likelihood
+ Optional rescaling of the Likelihood.
ic_samp : Controller
 Iteration Controller for Hamiltonian
+ Iteration Controller for Hamiltonian.
Returns

 Hamiltonian
 A Hamiltonian that can be used for further minimization
+ StandardHamiltonian
+ A Hamiltonian that can be used for further minimization.
"""
d = a*xi
@@ 71,7 +72,7 @@ def make_adjust_variances(a,
if scaling is not None:
x = ScalingOperator(scaling, x.target)(x)
 return Hamiltonian(InverseGammaLikelihood(d_eval)(x), ic_samp=ic_samp)
+ return StandardHamiltonian(InverseGammaLikelihood(d_eval)(x), ic_samp=ic_samp)
def do_adjust_variances(position,
@@ 79,6 +80,9 @@ def do_adjust_variances(position,
minimizer,
xi_key='xi',
samples=[]):
+ '''
+ FIXME
+ '''
h_space = position[xi_key].domain[0]
pd = PowerDistributor(h_space, amplitude_operator.target[0])
diff git a/nifty5/library/correlated_fields.py b/nifty5/library/correlated_fields.py
index 4ceaf902e53168d7acdce1fec419c40b2de484a6..cd790596273b307681a9270b4a483fc4eab15878 100644
 a/nifty5/library/correlated_fields.py
+++ b/nifty5/library/correlated_fields.py
@@ 24,7 +24,7 @@ from ..operators.harmonic_operators import HarmonicTransformOperator
from ..operators.simple_linear_operators import ducktape
def CorrelatedField(target, amplitude_operator, name='xi'):
+def CorrelatedField(target, amplitude_operator, name='xi', codomain=None):
"""Constructs an operator which turns a white Gaussian excitation field
into a correlated field.
@@ 42,16 +42,21 @@ def CorrelatedField(target, amplitude_operator, name='xi'):
amplitude_operator: Operator
name : string
:class:`MultiField` key for the xifield.
+ codomain : Domain
+ The codomain for target[0]. If not supplied, it is inferred.
Returns

 Correlated field : Operator
+ Operator
+ Correlated field
"""
tgt = DomainTuple.make(target)
if len(tgt) > 1:
raise ValueError
 h_space = tgt[0].get_default_codomain()
 ht = HarmonicTransformOperator(h_space, tgt[0])
+ if codomain is None:
+ codomain = tgt[0].get_default_codomain()
+ h_space = codomain
+ ht = HarmonicTransformOperator(h_space, target=tgt[0])
p_space = amplitude_operator.target[0]
power_distributor = PowerDistributor(h_space, p_space)
A = power_distributor(amplitude_operator)
@@ 70,7 +75,7 @@ def MfCorrelatedField(target, amplitudes, name='xi'):
Parameters

target : Domain, DomainTuple or tuple of Domain
 Target of the operator. Must contain exactly one space.
+ Target of the operator. Must contain exactly two spaces.
amplitudes: iterable of Operator
List of two amplitude operators.
name : string
@@ 78,7 +83,8 @@ def MfCorrelatedField(target, amplitudes, name='xi'):
Returns

 Correlated field : Operator
+ Operator
+ Correlated field
"""
tgt = DomainTuple.make(target)
if len(tgt) != 2:
@@ 88,7 +94,7 @@ def MfCorrelatedField(target, amplitudes, name='xi'):
hsp = DomainTuple.make([tt.get_default_codomain() for tt in tgt])
ht1 = HarmonicTransformOperator(hsp, target=tgt[0], space=0)
 ht2 = HarmonicTransformOperator(ht1.target, space=1)
+ ht2 = HarmonicTransformOperator(ht1.target, target=tgt[1], space=1)
ht = ht2 @ ht1
psp = [aa.target[0] for aa in amplitudes]
diff git a/nifty5/library/dynamic_operator.py b/nifty5/library/dynamic_operator.py
index 5247ebc26370bfb832ebfa842f6e4250b5f7b267..bc440757064d7d3f2a13b2baadb3f3bc9aae6707 100644
 a/nifty5/library/dynamic_operator.py
+++ b/nifty5/library/dynamic_operator.py
@@ 43,7 +43,8 @@ def _make_dynamic_operator(target,
causal,
minimum_phase,
sigc=None,
 quant=None):
+ quant=None,
+ codomain=None):
if not isinstance(target, RGSpace):
raise TypeError("RGSpace required")
if not target.harmonic:
@@ 64,7 +65,9 @@ def _make_dynamic_operator(target,
if cone and (sigc is None or quant is None):
raise RuntimeError
 dom = DomainTuple.make(target.get_default_codomain())
+ if codomain is None:
+ codomain = target.get_default_codomain()
+ dom = DomainTuple.make(codomain)
ops = {}
FFT = FFTOperator(dom)
Real = Realizer(dom)
@@ 146,7 +149,7 @@ def dynamic_operator(*,
minimum_phase=False):
"""Constructs an operator encoding the Green's function of a linear
homogeneous dynamic system.

+
When evaluated, this operator returns the Green's function representation
in harmonic space. This result can be used as a convolution kernel to
construct solutions of the homogeneous stochastic differential equation
@@ 216,7 +219,7 @@ def dynamic_lightcone_operator(*,
minimum_phase=False):
'''Extends the functionality of :function: dynamic_operator to a Green's
function which is constrained to be within a light cone.

+
The resulting Green's function is constrained to be within a light cone.
This is achieved via convolution of the function with a light cone in
spacetime. Thereby the first axis of the space is set to be the teporal
diff git a/nifty5/library/smooth_linear_amplitude.py b/nifty5/library/smooth_linear_amplitude.py
index 664cbdfe10d32219969ca135128037908f514f85..1b525b2607bcad3da5dcb40bc6a993f3470e58bc 100644
 a/nifty5/library/smooth_linear_amplitude.py
+++ b/nifty5/library/smooth_linear_amplitude.py
@@ 20,8 +20,8 @@ import numpy as np
from ..domain_tuple import DomainTuple
from ..domains.power_space import PowerSpace
from ..field import Field
+from ..operators.adder import Adder
from ..operators.exp_transform import ExpTransform
from ..operators.offset_operator import OffsetOperator
from ..operators.qht_operator import QHTOperator
from ..operators.slope_operator import SlopeOperator
from ..operators.symmetrizing_operator import SymmetrizingOperator
@@ 29,7 +29,7 @@ from ..sugar import makeOp
def _ceps_kernel(k, a, k0):
 return (a/(1+np.sum((k.T/k0)**2, axis=1).T))**2
+ return (a/(1 + np.sum((k.T/k0)**2, axis=1).T))**2
def CepstrumOperator(target, a, k0):
@@ 189,7 +189,7 @@ def SLAmplitude(*, target, n_pix, a, k0, sm, sv, im, iv, keys=['tau', 'phi']):
sig = np.array([sv, iv])
mean = Field.from_global_data(sl.domain, mean)
sig = Field.from_global_data(sl.domain, sig)
 linear = (sl @ OffsetOperator(mean) @ makeOp(sig)).ducktape(keys[1])
+ linear = sl @ Adder(mean) @ makeOp(sig).ducktape(keys[1])
# Combine linear and smooth component
loglog_ampl = 0.5*(smooth + linear)
diff git a/nifty5/minimization/kl_energy.py b/nifty5/minimization/kl_energy.py
deleted file mode 100644
index 29943584c5301a06bcd152b1355d1b0e787fe875..0000000000000000000000000000000000000000
 a/nifty5/minimization/kl_energy.py
+++ /dev/null
@@ 1,93 +0,0 @@
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
#
# Copyright(C) 20132019 MaxPlanckSociety
#
# NIFTy is being developed at the MaxPlanckInstitut fuer Astrophysik.

from .energy import Energy
from ..linearization import Linearization
from .. import utilities


class KL_Energy(Energy):
 def __init__(self, position, h, nsamp, constants=[],
 constants_samples=None, gen_mirrored_samples=False,
 _samples=None):
 super(KL_Energy, self).__init__(position)
 if h.domain is not position.domain:
 raise TypeError
 self._h = h
 self._constants = constants
 if constants_samples is None:
 constants_samples = constants
 self._constants_samples = constants_samples
 if _samples is None:
 met = h(Linearization.make_partial_var(
 position, constants_samples, True)).metric
 _samples = tuple(met.draw_sample(from_inverse=True)
 for _ in range(nsamp))
 if gen_mirrored_samples:
 _samples += tuple(s for s in _samples)
 self._samples = _samples

 self._lin = Linearization.make_partial_var(position, constants)
 v, g = None, None
 for s in self._samples:
 tmp = self._h(self._lin+s)
 if v is None:
 v = tmp.val.local_data[()]
 g = tmp.gradient
 else:
 v += tmp.val.local_data[()]
 g = g + tmp.gradient
 self._val = v / len(self._samples)
 self._grad = g * (1./len(self._samples))
 self._metric = None

 def at(self, position):
 return KL_Energy(position, self._h, 0,
 self._constants, self._constants_samples,
 _samples=self._samples)

 @property
 def value(self):
 return self._val

 @property
 def gradient(self):
 return self._grad

 def _get_metric(self):
 if self._metric is None:
 lin = self._lin.with_want_metric()
 mymap = map(lambda v: self._h(lin+v).metric, self._samples)
 self._metric = utilities.my_sum(mymap)
 self._metric = self._metric.scale(1./len(self._samples))

 def apply_metric(self, x):
 self._get_metric()
 return self._metric(x)

 @property
 def metric(self):
 self._get_metric()
 return self._metric

 @property
 def samples(self):
 return self._samples

 def __repr__(self):
 return 'KL ({} samples):\n'.format(len(
 self._samples)) + utilities.indent(self._ham.__repr__())
diff git a/nifty5/minimization/metric_gaussian_kl.py b/nifty5/minimization/metric_gaussian_kl.py
new file mode 100644
index 0000000000000000000000000000000000000000..203bc6e252e28c5136a0ee9e86528c994b1faed4
 /dev/null
+++ b/nifty5/minimization/metric_gaussian_kl.py
@@ 0,0 +1,133 @@
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see .
+#
+# Copyright(C) 20132019 MaxPlanckSociety
+#
+# NIFTy is being developed at the MaxPlanckInstitut fuer Astrophysik.
+
+from .energy import Energy
+from ..linearization import Linearization
+from .. import utilities
+
+
+class MetricGaussianKL(Energy):
+ """Provides the sampled KullbackLeibler divergence between a distribution
+ and a Metric Gaussian.
+
+ A Metric Gaussian is used to approximate some other distribution.
+ It is a Gaussian distribution that uses the Fisher Information Metric
+ of the other distribution at the location of its mean to approximate the
+ variance. In order to infer the mean, the a stochastic estimate of the
+ KullbackLeibler divergence is minimized. This estimate is obtained by
+ drawing samples from the Metric Gaussian at the current mean.
+ During minimization these samples are kept constant, updating only the
+ mean. Due to the typically nonlinear structure of the true distribution
+ these samples have to be updated by reinitializing this class at some
+ point. Here standard parametrization of the true distribution is assumed.
+
+ Parameters
+ 
+ mean : Field
+ The current mean of the Gaussian.
+ hamiltonian : StandardHamiltonian
+ The StandardHamiltonian of the approximated probability distribution.
+ n_samples : integer
+ The number of samples used to stochastically estimate the KL.
+ constants : list
+ A list of parameter keys that are kept constant during optimization.
+ point_estimates : list
+ A list of parameter keys for which no samples are drawn, but that are
+ optimized for, corresponding to point estimates of these.
+ mirror_samples : boolean
+ Whether the negative of the drawn samples are also used,
+ as they are equaly legitimate samples. If true, the number of used
+ samples doubles. Mirroring samples stabilizes the KL estimate as
+ extreme sample variation is counterbalanced. (default : False)
+
+ Notes
+ 
+ For further details see: Metric Gaussian Variational Inference
+ (in preparation)
+ """
+
+ def __init__(self, mean, hamiltonian, n_sampels, constants=[],
+ point_estimates=None, mirror_samples=False,
+ _samples=None):
+ super(MetricGaussianKL, self).__init__(mean)
+ if hamiltonian.domain is not mean.domain:
+ raise TypeError
+ self._hamiltonian = hamiltonian
+ self._constants = constants
+ if point_estimates is None:
+ point_estimates = constants
+ self._constants_samples = point_estimates
+ if _samples is None:
+ met = hamiltonian(Linearization.make_partial_var(
+ mean, point_estimates, True)).metric
+ _samples = tuple(met.draw_sample(from_inverse=True)
+ for _ in range(n_sampels))
+ if mirror_samples:
+ _samples += tuple(s for s in _samples)
+ self._samples = _samples
+
+ self._lin = Linearization.make_partial_var(mean, constants)
+ v, g = None, None
+ for s in self._samples:
+ tmp = self._hamiltonian(self._lin+s)
+ if v is None:
+ v = tmp.val.local_data[()]
+ g = tmp.gradient
+ else:
+ v += tmp.val.local_data[()]
+ g = g + tmp.gradient
+ self._val = v / len(self._samples)
+ self._grad = g * (1./len(self._samples))
+ self._metric = None
+
+ def at(self, position):
+ return MetricGaussianKL(position, self._hamiltonian, 0,
+ self._constants, self._constants_samples,
+ _samples=self._samples)
+
+ @property
+ def value(self):
+ return self._val
+
+ @property
+ def gradient(self):
+ return self._grad
+
+ def _get_metric(self):
+ if self._metric is None:
+ lin = self._lin.with_want_metric()
+ mymap = map(lambda v: self._hamiltonian(lin+v).metric,
+ self._samples)
+ self._metric = utilities.my_sum(mymap)
+ self._metric = self._metric.scale(1./len(self._samples))
+
+ def apply_metric(self, x):
+ self._get_metric()
+ return self._metric(x)
+
+ @property
+ def metric(self):
+ self._get_metric()
+ return self._metric
+
+ @property
+ def samples(self):
+ return self._samples
+
+ def __repr__(self):
+ return 'KL ({} samples):\n'.format(len(
+ self._samples)) + utilities.indent(self._ham.__repr__())
diff git a/nifty5/operators/offset_operator.py b/nifty5/operators/adder.py
similarity index 82%
rename from nifty5/operators/offset_operator.py
rename to nifty5/operators/adder.py
index 98976e0579877f1ec3ece13678325843c7208497..ccee05ba1af740cfbf27d1081bbee6ae50a0857e 100644
 a/nifty5/operators/offset_operator.py
+++ b/nifty5/operators/adder.py
@@ 15,18 +15,22 @@
#
# NIFTy is being developed at the MaxPlanckInstitut fuer Astrophysik.
+from ..field import Field
+from ..multi_field import MultiField
from .operator import Operator
class OffsetOperator(Operator):
 """Shifts the input by a fixed field.
+class Adder(Operator):
+ """Adds a fixed field.
Parameters

 field : Field
+ field : Field or MultiField
The field by which the input is shifted.
"""
def __init__(self, field):
+ if not isinstance(field, (Field, MultiField)):
+ raise TypeError
self._field = field
self._domain = self._target = field.domain
diff git a/nifty5/operators/block_diagonal_operator.py b/nifty5/operators/block_diagonal_operator.py
index 1ac8abe8fd0385d79bc8462dcd917daf540be676..0996983350b2df9def4980f0b4fcf4e8a255eee0 100644
 a/nifty5/operators/block_diagonal_operator.py
+++ b/nifty5/operators/block_diagonal_operator.py
@@ 27,7 +27,7 @@ class BlockDiagonalOperator(EndomorphicOperator):
domain : MultiDomain
Domain and target of the operator.
operators : dict
 Dictionary with subdomain names as keys and :class:`LinearOperator`s
+ Dictionary with subdomain names as keys and :class:`LinearOperator` s
as items.
"""
def __init__(self, domain, operators):
diff git a/nifty5/operators/diagonal_operator.py b/nifty5/operators/diagonal_operator.py
index ba18d1edce45a9c1999959ac7595a9e45eaf0b84..069095c6038e69dd94a6a7a5aba2be5cdc456a8e 100644
 a/nifty5/operators/diagonal_operator.py
+++ b/nifty5/operators/diagonal_operator.py
@@ 27,8 +27,8 @@ class DiagonalOperator(EndomorphicOperator):
"""Represents a :class:`LinearOperator` which is diagonal.
The NIFTy DiagonalOperator class is a subclass derived from the
 :class:`EndomorphicOperator`. It multiplies an input field pixelwise with its
 diagonal.
+ :class:`EndomorphicOperator`. It multiplies an input field pixelwise with
+ its diagonal.
Parameters

diff git a/nifty5/operators/energy_operators.py b/nifty5/operators/energy_operators.py
index 9a18b35401602332d2184a3860cd81d5d50e6995..ea24001538d09cc98eefc99e1d21d89fdfb90210 100644
 a/nifty5/operators/energy_operators.py
+++ b/nifty5/operators/energy_operators.py
@@ 37,7 +37,7 @@ class EnergyOperator(Operator):
Examples

 Information Hamiltonian, i.e. negativelogprobabilities.
  Gibbs free energy, i.e. an averaged Hamiltonian, aka KullbachLeibler
+  Gibbs free energy, i.e. an averaged Hamiltonian, aka KullbackLeibler
divergence.
"""
_target = DomainTuple.scalar_domain()
@@ 259,7 +259,7 @@ class BernoulliEnergy(EnergyOperator):
return v.add_metric(met)
class Hamiltonian(EnergyOperator):
+class StandardHamiltonian(EnergyOperator):
"""Computes an information Hamiltonian in its standard form, i.e. with the
prior being a Gaussian with unit covariance.
@@ 314,52 +314,24 @@ class Hamiltonian(EnergyOperator):
def __repr__(self):
subs = 'Likelihood:\n{}'.format(utilities.indent(self._lh.__repr__()))
subs += '\nPrior: Quadratic{}'.format(self._lh.domain.keys())
 return 'Hamiltonian:\n' + utilities.indent(subs)
+ return 'StandardHamiltonian:\n' + utilities.indent(subs)
class AveragedEnergy(EnergyOperator):
 """Computes KullbackLeibler (KL) divergence or Gibbs free energies.

 A sampleaveraged energy, e.g. an Hamiltonian, approximates the relevant
 part of a KL to be used in Variational Bayes inference if the samples are
 drawn from the approximating Gaussian:

 .. math ::
 \\text{KL}(m) = \\frac1{\\#\\{v_i\\}} \\sum_{v_i} H(m+v_i),

 where :math:`v_i` are the residual samples and :math:`m` is the mean field
 around which the samples are drawn.
+ """Averages an energy over samples
Parameters

h: Hamiltonian
The energy to be averaged.
res_samples : iterable of Fields
 Set of residual sample points to be added to mean field for approximate
 estimation of the KL.
+ Set of residual sample points to be added to mean field for
+ approximate estimation of the KL.
Note

 Having symmetrized residual samples, with both v_i and v_i being present
 ensures that the distribution mean is exactly represented. This reduces
 sampling noise and helps the numerics of the KL minimization process in the
 variational Bayes inference.

 See also
 
 Let :math:`Q(f) = G(fm,D)` be the Gaussian distribution
 which is used to approximate the accurate posterior :math:`P(fd)` with
 information Hamiltonian
 :math:`H(d,f) = \\log P(d,f) = \\log P(fd) + \\text{const}`. In
 Variational Bayes one needs to optimize the KL divergence between those
 two distributions for m. It is:

 :math:`KL(Q,P) = \\int Df Q(f) \\log Q(f)/P(f)\\\\
 = \\left< \\log Q(f) \\right>_Q(f)  \\left< \\log P(f) \\right>_Q(f)\\\\
 = \\text{const} + \\left< H(f) \\right>_G(fm,D)`

 in essence the information Hamiltonian averaged over a Gaussian
 distribution centered on the mean m.
+ Having symmetrized residual samples, with both v_i and v_i being
+ present, ensures that the distribution mean is exactly represented.
:class:`AveragedEnergy(h)` approximates
:math:`\\left< H(f) \\right>_{G(fm,D)}` if the residuals
diff git a/nifty5/operators/field_zero_padder.py b/nifty5/operators/field_zero_padder.py
index 8ab9c520f790411a056cacdd5c44377b882fed40..eabc7d2f086d3686561f3c0c3b1c7b6f170d75a8 100644
 a/nifty5/operators/field_zero_padder.py
+++ b/nifty5/operators/field_zero_padder.py
@@ 25,6 +25,29 @@ from .linear_operator import LinearOperator
class FieldZeroPadder(LinearOperator):
+ """Operator which applies zeropadding to one of the subdomains of its
+ input field
+
+ Parameters
+ 
+ domain : Domain, DomainTuple or tuple of Domain
+ The operator's input domain.
+ new_shape : list or tuple of int
+ The new dimensions of the subdomain which is zeropadded.
+ No entry must be smaller than the corresponding dimension in the
+ operator's domain.
+ space : int
+ The index of the subdomain to be zeropadded. If None, it is set to 0
+ if domain contains exactly one space. domain[space] must be an RGSpace.
+ central : bool
+ If `False`, padding is performed at the end of the domain axes,
+ otherwise in the middle.
+
+ Notes
+ 
+ When doing central padding on an axis with an even length, the "central"
+ entry should in principle be split up; this is currently not done.
+ """
def __init__(self, domain, new_shape, space=0, central=False):
self._domain = DomainTuple.make(domain)
self._space = utilities.infer_space(self._domain, space)
diff git a/nifty5/operators/qht_operator.py b/nifty5/operators/qht_operator.py
index ec858d623e5cd8576bed93b966372afa267c0467..ac8f5b52c2371b471da96479a8c9835f46342d91 100644
 a/nifty5/operators/qht_operator.py
+++ b/nifty5/operators/qht_operator.py
@@ 37,9 +37,11 @@ class QHTOperator(LinearOperator):
space : int
The index of the domain on which the operator acts.
target[space] must be a nonharmonic LogRGSpace.
+ codomain : Domain
+ The codomain for target[space]. If not supplied, it is inferred.
"""
 def __init__(self, target, space=0):
+ def __init__(self, target, space=0, codomain=None):
self._target = DomainTuple.make(target)
self._space = infer_space(self._target, space)
@@ 51,8 +53,9 @@ class QHTOperator(LinearOperator):
raise TypeError("target[space] must be a nonharmonic space")
self._domain = [dom for dom in self._target]
 self._domain[self._space] = \
 self._target[self._space].get_default_codomain()
+ if codomain is None:
+ codomain = self._target[self._space].get_default_codomain()
+ self._domain[self._space] = codomain
self._domain = DomainTuple.make(self._domain)
self._capability = self.TIMES  self.ADJOINT_TIMES
diff git a/nifty5/operators/scaling_operator.py b/nifty5/operators/scaling_operator.py
index 708301a1011a488a070662383690977f0ebd51fc..1e14b62ed13de6c5e3cddfa0b1f8b6f6c3366ee3 100644
 a/nifty5/operators/scaling_operator.py
+++ b/nifty5/operators/scaling_operator.py
@@ 33,6 +33,9 @@ class ScalingOperator(EndomorphicOperator):
Notes

+ :class:`Operator` supports the multiplication with a scalar. So one does
+ not need instantiate :class:`ScalingOperator` explicitly in most cases.
+
Formally, this operator always supports all operation modes (times,
adjoint_times, inverse_times and inverse_adjoint_times), even if `factor`
is 0 or infinity. It is the user's responsibility to apply the operator
diff git a/nifty5/operators/slope_operator.py b/nifty5/operators/slope_operator.py
index b4676ed45d38776926ea5c32bc06b305615e85af..c9486e488f18ecf085878338b42d34753c8bdf89 100644
 a/nifty5/operators/slope_operator.py
+++ b/nifty5/operators/slope_operator.py
@@ 29,8 +29,8 @@ class SlopeOperator(LinearOperator):
Slope and yintercept of this line are the two parameters which are
defined on an UnstructeredDomain (in this order) which is the domain of
 the operator. Being a LogRGSpace instance each pixel has a welldefined coordinate
 value.
+ the operator. Being a LogRGSpace instance each pixel has a welldefined
+ coordinate value.
The yintercept is defined to be the value at t_0 of the target.
diff git a/nifty5/operators/value_inserter.py b/nifty5/operators/value_inserter.py
index ce1142da23d2ca031ea1f7b658ce2646009f6011..db568ecf9883b20e98bb1747bb0c2967789234d5 100644
 a/nifty5/operators/value_inserter.py
+++ b/nifty5/operators/value_inserter.py
@@ 15,9 +15,6 @@
#
# NIFTy is being developed at the MaxPlanckInstitut fuer Astrophysik.
from functools import reduce
from operator import mul

import numpy as np
from ..domain_tuple import DomainTuple
@@ 28,7 +25,7 @@ from .linear_operator import LinearOperator
class ValueInserter(LinearOperator):
 """Inserts one value into a field which is constant otherwise.
+ """Inserts one value into a field which is zero otherwise.
Parameters

@@ 36,16 +33,11 @@ class ValueInserter(LinearOperator):
index : iterable of int
The index of the target into which the value of the domain shall be
inserted.
 default_value : float
 Constant value which is inserted everywhere where the input operator
 is not inserted. Default is 0.
"""
 def __init__(self, target, index, default_value=0.):
+ def __init__(self, target, index):
self._domain = makeDomain(UnstructuredDomain(1))
self._target = DomainTuple.make(target)

 # Type and value checks
index = tuple(index)
if not all([
isinstance(n, int) and n >= 0 and n < self.target.shape[i]
@@ 54,19 +46,17 @@ class ValueInserter(LinearOperator):
raise TypeError
if not len(index) == len(self.target.shape):
raise ValueError
 np.empty(self.target.shape)[index]

self._index = index
 self._dv = float(default_value)
 self._dvsum = self._dv*(reduce(mul, self.target.shape)  1)
self._capability = self.TIMES  self.ADJOINT_TIMES
+ # Check whether index is in bounds
+ np.empty(self.target.shape)[self._index]
def apply(self, x, mode):
self._check_input(x, mode)
x = x.to_global_data()
if mode == self.TIMES:
 res = np.full(self.target.shape, self._dv, dtype=x.dtype)
+ res = np.zeros(self.target.shape, dtype=x.dtype)
res[self._index] = x
else:
 res = np.full((1,), x[self._index] + self._dvsum, dtype=x.dtype)
+ res = np.full((1,), x[self._index], dtype=x.dtype)
return Field.from_global_data(self._tgt(mode), res)
diff git a/test/test_energy_gradients.py b/test/test_energy_gradients.py
index 6541f0aaae8d03b3bad8446896fdd0684ffa4de9..129e40292cb1d4ff9aad3edf62c8cc945497e9f1 100644
 a/test/test_energy_gradients.py
+++ b/test/test_energy_gradients.py
@@ 69,7 +69,7 @@ def test_hamiltonian_and_KL(field):
field = field.exp()
space = field.domain
lh = ift.GaussianEnergy(domain=space)
 hamiltonian = ift.Hamiltonian(lh)
+ hamiltonian = ift.StandardHamiltonian(lh)
ift.extra.check_value_gradient_consistency(hamiltonian, field)
S = ift.ScalingOperator(1., space)
samps = [S.draw_sample() for i in range(3)]
diff git a/test/test_operators/test_value_inserter.py b/test/test_operators/test_value_inserter.py
index c84efb159c3cac32df96250e7743199e48f71de5..2f2e34253829129aac4b94171d8b024468961021 100644
 a/test/test_operators/test_value_inserter.py
+++ b/test/test_operators/test_value_inserter.py
@@ 17,7 +17,7 @@
import numpy as np
import pytest
from numpy.testing import assert_allclose
+from numpy.testing import assert_
import nifty5 as ift
@@ 37,17 +37,5 @@ def test_value_inserter(sp, seed):
f = ift.from_random('normal', ift.UnstructuredDomain((1,)))
inp = f.to_global_data()[0]
ret = op(f).to_global_data()
 assert_allclose(ret[ind], inp)
 assert_allclose(np.sum(ret), inp)


def test_value_inserter_nonzero():
 sp = ift.RGSpace(4)
 ind = (1,)
 default = 1.24
 op = ift.ValueInserter(sp, ind, default)
 f = ift.from_random('normal', ift.UnstructuredDomain((1,)))
 inp = f.to_global_data()[0]
 ret = op(f).to_global_data()
 assert_allclose(ret[ind], inp)
 assert_allclose(np.sum(ret), inp + 3*default)
+ assert_(ret[ind] == inp)
+ assert_(np.sum(ret) == inp)