Commit 86891afb authored by Martin Reinecke's avatar Martin Reinecke
Browse files

first round of fixes

parent 4a44d1e8
...@@ -27,32 +27,31 @@ from .simple_linear_operators import VdotOperator ...@@ -27,32 +27,31 @@ from .simple_linear_operators import VdotOperator
class EnergyOperator(Operator): class EnergyOperator(Operator):
""" Basis class EnergyOperator, an abstract class from which """ An abstract class from which
other specific EnergyOperator subclasses are derived. other specific EnergyOperator subclasses are derived.
An EnergyOperator has a scalar domain as target domain. An EnergyOperator has a scalar domain as target domain.
It turns a field into a scalar and a linearization into a linearization. It is intended as an objective function for field inference.
It is intended as an objective function for field inference.
Typical usage in IFT: Typical usage in IFT:
as an information Hamiltonian ( = negative log probability) - as an information Hamiltonian (i.e. a negative log probability)
or as a Gibbs free energy ( = averaged Hamiltonian), - or as a Gibbs free energy (i.e. an averaged Hamiltonian),
aka Kullbach-Leibler divergence. aka Kullbach-Leibler divergence.
""" """
_target = DomainTuple.scalar_domain() _target = DomainTuple.scalar_domain()
class SquaredNormOperator(EnergyOperator): class SquaredNormOperator(EnergyOperator):
""" Class for squared field norm energy. """ Class for squared field norm energy.
Usage Usage
----- -----
E = SquaredNormOperator() represents a field energy E that is the L2 norm E = SquaredNormOperator() represents a field energy E that is the L2 norm
of a field f: of a field f:
E(f) = f^dagger f E(f) = f^dagger f
""" """
def __init__(self, domain): def __init__(self, domain):
self._domain = domain self._domain = domain
...@@ -73,13 +72,13 @@ class QuadraticFormOperator(EnergyOperator): ...@@ -73,13 +72,13 @@ class QuadraticFormOperator(EnergyOperator):
op : EndomorphicOperator op : EndomorphicOperator
kernel of quadratic form kernel of quadratic form
Usage Notes
----- -----
E = QuadraticFormOperator(op) represents a field energy that is a `E = QuadraticFormOperator(op)` represents a field energy that is a
quadratic form in a field f with kernel op: quadratic form in a field f with kernel op:
E(f) = 0.5 f^dagger op f :math:`E(f) = 0.5 f^\dagger op f`
""" """
def __init__(self, op): def __init__(self, op):
from .endomorphic_operator import EndomorphicOperator from .endomorphic_operator import EndomorphicOperator
if not isinstance(op, EndomorphicOperator): if not isinstance(op, EndomorphicOperator):
...@@ -102,24 +101,21 @@ class GaussianEnergy(EnergyOperator): ...@@ -102,24 +101,21 @@ class GaussianEnergy(EnergyOperator):
Attributes Attributes
---------- ----------
mean = mean (field) of the Gaussian, mean : Field
default = 0 mean of the Gaussian, (default 0)
covariance = field covariance of the Gaussian, covariance : LinearOperator
default = identity operator covariance of the Gaussian (default = identity operator)
domain = domain of field, domain : Domainoid
default = domain of mean or covariance if specified operator domain, inferred from mean or covariance if specified
One of the attributes has to be specified at instanciation of a GaussianEnergy Notes
to inform about the domain, otherwise an exception is rasied.
Usage
----- -----
E = GaussianEnergy(mean = m, covariance = D) represents (up to constants) - At least one of the arguments has to be provided.
- `E = GaussianEnergy(mean=m, covariance=D)` represents (up to constants)
:math:`E(f) = - \log G(f-m, D) = 0.5 (f-m)^\dagger D^{-1} (f-m)`,
an information energy for a Gaussian distribution with mean m and covariance D.
E(f) = - log G(f-m, D) = 0.5 (f-m)^dagger D^-1 (f-m) """
an information energy for a Gaussian distribution with mean m and covariance D.
"""
def __init__(self, mean=None, covariance=None, domain=None): def __init__(self, mean=None, covariance=None, domain=None):
self._domain = None self._domain = None
if mean is not None: if mean is not None:
...@@ -156,23 +152,23 @@ class GaussianEnergy(EnergyOperator): ...@@ -156,23 +152,23 @@ class GaussianEnergy(EnergyOperator):
class PoissonianEnergy(EnergyOperator): class PoissonianEnergy(EnergyOperator):
"""Class for likelihood-energies of expected count field constrained by """Class for likelihood-energies of expected count field constrained by
Poissonian count data. Poissonian count data.
Parameters Parameters
---------- ----------
d : Field d : Field
data field with counts data field with counts
Usage Notes
----- -----
E = GaussianEnergy(d) represents (up to an f-independent term log(d!)) E = GaussianEnergy(d) represents (up to an f-independent term log(d!))
E(f) = -log Poisson(d|f) = sum(f) - d^dagger log(f), E(f) = -\log Poisson(d|f) = sum(f) - d^\dagger \log(f),
where f is a field in data space (d.domain) with the expectation values for where f is a Field in data space with the expectation values for
the counts. the counts.
""" """
def __init__(self, d): def __init__(self, d):
self._d = d self._d = d
self._domain = DomainTuple.make(d.domain) self._domain = DomainTuple.make(d.domain)
...@@ -189,11 +185,11 @@ class PoissonianEnergy(EnergyOperator): ...@@ -189,11 +185,11 @@ class PoissonianEnergy(EnergyOperator):
class InverseGammaLikelihood(EnergyOperator): class InverseGammaLikelihood(EnergyOperator):
"""Special class for inverse Gamma distributed covariances. """Special class for inverse Gamma distributed covariances.
RL FIXME: To be documented. RL FIXME: To be documented.
""" """
def __init__(self, d): def __init__(self, d):
self._d = d self._d = d
self._domain = DomainTuple.make(d.domain) self._domain = DomainTuple.make(d.domain)
...@@ -209,23 +205,23 @@ class InverseGammaLikelihood(EnergyOperator): ...@@ -209,23 +205,23 @@ class InverseGammaLikelihood(EnergyOperator):
class BernoulliEnergy(EnergyOperator): class BernoulliEnergy(EnergyOperator):
"""Class for likelihood-energies of expected event frequency constrained by """Class for likelihood-energies of expected event frequency constrained by
event data. event data.
Parameters Parameters
---------- ----------
d : Field d : Field
data field with events (=1) or non-events (=0) data field with events (=1) or non-events (=0)
Usage Notes
----- -----
E = BernoulliEnergy(d) represents E = BernoulliEnergy(d) represents
E(f) = -log Bernoulli(d|f) = -d^dagger log(f) - (1-d)^dagger log(1-f), :math:`E(f) = -\log \mbox{Bernoulli}(d|f) = -d^\dagger \log f - (1-d)^\dagger \log(1-f)`,
where f is a field in data space (d.domain) with the expected frequencies of where f is a field in data space (d.domain) with the expected frequencies of
events. events.
""" """
def __init__(self, d): def __init__(self, d):
self._d = d self._d = d
self._domain = DomainTuple.make(d.domain) self._domain = DomainTuple.make(d.domain)
...@@ -249,35 +245,33 @@ class Hamiltonian(EnergyOperator): ...@@ -249,35 +245,33 @@ class Hamiltonian(EnergyOperator):
---------- ----------
lh : EnergyOperator lh : EnergyOperator
a likelihood energy a likelihood energy
ic_samp : IterationController ic_samp : IterationController
is passed to SamplingEnabler to draw Gaussian distributed samples is passed to SamplingEnabler to draw Gaussian distributed samples
with covariance = metric of Hamiltonian with covariance = metric of Hamiltonian
(= Hessian without terms that generate negative eigenvalues) (= Hessian without terms that generate negative eigenvalues)
default = None default = None
Usage Notes
----- -----
H = Hamiltonian(E_lh) represents H = Hamiltonian(E_lh) represents
H(f) = 0.5 f^dagger f + E_lh(f) :math:`H(f) = 0.5 f^\dagger f + E_{lh}(f)`
an information Hamiltonian for a field f with a white Gaussian prior an information Hamiltonian for a field f with a white Gaussian prior
(unit covariance) and the likelihood energy E_lh. (unit covariance) and the likelihood energy :math:`E_{lh}`.
Tip Other field priors can be represented via transformations of a white
---
Other field priors can be represented via transformations of a white
Gaussian field into a field with the desired prior probability structure. Gaussian field into a field with the desired prior probability structure.
By implementing prior information this way, the field prior is represented By implementing prior information this way, the field prior is represented
by a generative model, from which NIFTy can draw samples and infer a field by a generative model, from which NIFTy can draw samples and infer a field
using the Maximum a Posteriori (MAP) or the Variational Bayes (VB) method. using the Maximum a Posteriori (MAP) or the Variational Bayes (VB) method.
For more details see: For more details see:
"Encoding prior knowledge in the structure of the likelihood" "Encoding prior knowledge in the structure of the likelihood"
Jakob Knollmüller, Torsten A. Ensslin, submitted, arXiv:1812.04403 Jakob Knollmüller, Torsten A. Ensslin, submitted, arXiv:1812.04403
https://arxiv.org/abs/1812.04403 https://arxiv.org/abs/1812.04403
""" """
def __init__(self, lh, ic_samp=None): def __init__(self, lh, ic_samp=None):
self._lh = lh self._lh = lh
self._prior = GaussianEnergy(domain=lh.domain) self._prior = GaussianEnergy(domain=lh.domain)
...@@ -303,27 +297,27 @@ class Hamiltonian(EnergyOperator): ...@@ -303,27 +297,27 @@ class Hamiltonian(EnergyOperator):
class SampledKullbachLeiblerDivergence(EnergyOperator): class SampledKullbachLeiblerDivergence(EnergyOperator):
"""Class for Kullbach Leibler (KL) Divergence or Gibbs free energies """Class for Kullbach Leibler (KL) Divergence or Gibbs free energies
Precisely a sample averaged Hamiltonian (or other energy) that represents Precisely a sample averaged Hamiltonian (or other energy) that represents
approximatively the relevant part of a KL to be used in Variational Bayes approximatively the relevant part of a KL to be used in Variational Bayes
inference if the samples are drawn from the approximating Gaussian. inference if the samples are drawn from the approximating Gaussian.
Let Q(f) = G(f-m,D) Gaussian used to approximate Let Q(f) = G(f-m,D) Gaussian used to approximate
P(f|d), the correct posterior with information Hamiltonian P(f|d), the correct posterior with information Hamiltonian
H(d,f) = - log P(d,f) = - log P(f|d) + const. H(d,f) = - log P(d,f) = - log P(f|d) + const.
The KL divergence between those should then be optimized for m. It is The KL divergence between those should then be optimized for m. It is
KL(Q,P) = int Df Q(f) log Q(f)/P(f) KL(Q,P) = int Df Q(f) log Q(f)/P(f)
= < log Q(f) >_Q(f) - < log P(f) >_Q(f) = < log Q(f) >_Q(f) - < log P(f) >_Q(f)
= const + < H(f) >_G(f-m,D) = const + < H(f) >_G(f-m,D)
in essence the information Hamiltonian averaged over a Gaussian distribution in essence the information Hamiltonian averaged over a Gaussian distribution
centered on the mean m. centered on the mean m.
SampledKullbachLeiblerDivergence(H) approximates < H(f) >_G(f-m,D) if the SampledKullbachLeiblerDivergence(H) approximates < H(f) >_G(f-m,D) if the
residuals f-m are drawn from covariance D. residuals f-m are drawn from covariance D.
Parameters Parameters
---------- ----------
h: Hamiltonian h: Hamiltonian
...@@ -332,22 +326,20 @@ class SampledKullbachLeiblerDivergence(EnergyOperator): ...@@ -332,22 +326,20 @@ class SampledKullbachLeiblerDivergence(EnergyOperator):
set of residual sample points to be added to mean field set of residual sample points to be added to mean field
for approximate estimation of the KL for approximate estimation of the KL
Usage: Notes
------ -----
KL = SampledKullbachLeiblerDivergence(H, samples) represents KL = SampledKullbachLeiblerDivergence(H, samples) represents
KL(m) = sum_i H(m+v_i) / N, KL(m) = sum_i H(m+v_i) / N,
where v_i are the residual samples, N is their number, and m is the mean field where v_i are the residual samples, N is their number, and m is the mean field
around which the samples are drawn. around which the samples are drawn.
Tip: Having symmetrized residual samples, with both, v_i and -v_i being present,
---- ensures that the distribution mean is exactly represented. This reduces sampling
Having symmetrized residual samples, with both, v_i and -v_i being present, noise and helps the numerics of the KL minimization process in the variational
ensures that the distribution mean is exactly represented. This reduces sampling Bayes inference.
noise and helps the numerics of the KL minimization process in the variational """
Bayes inference.
"""
def __init__(self, h, res_samples): def __init__(self, h, res_samples):
self._h = h self._h = h
self._domain = h.domain self._domain = h.domain
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment