Commit aad02ccf authored by Lukas Platz's avatar Lukas Platz

Merge branch 'NIFTy_5' into mf_new

parents 125a568b f66235b7
......@@ -2,6 +2,7 @@
git_version.py
# custom
*.txt
setup.cfg
.idea
.DS_Store
......
......@@ -17,7 +17,7 @@ NIFTy-related publications
date = {2018-04-05},
}
@ARTICLE{2013A&A...554A..26S,
@article{2013A&A...554A..26S,
author = {{Selig}, M. and {Bell}, M.~R. and {Junklewitz}, H. and {Oppermann}, N. and {Reinecke}, M. and {Greiner}, M. and {Pachajoa}, C. and {En{\ss}lin}, T.~A.},
title = "{NIFTY - Numerical Information Field Theory. A versatile PYTHON library for signal inference}",
journal = {\aap},
......@@ -35,7 +35,7 @@ NIFTy-related publications
adsnote = {Provided by the SAO/NASA Astrophysics Data System}
}
@ARTICLE{2017arXiv170801073S,
@article{2017arXiv170801073S,
author = {{Steininger}, T. and {Dixit}, J. and {Frank}, P. and {Greiner}, M. and {Hutschenreuter}, S. and {Knollm{\"u}ller}, J. and {Leike}, R. and {Porqueres}, N. and {Pumpe}, D. and {Reinecke}, M. and {{\v S}raml}, M. and {Varady}, C. and {En{\ss}lin}, T.},
title = "{NIFTy 3 - Numerical Information Field Theory - A Python framework for multicomponent signal inference on HPC clusters}",
journal = {ArXiv e-prints},
......
......@@ -59,7 +59,7 @@ from .probing import probe_with_posterior_samples, probe_diagonal, \
from .minimization.line_search import LineSearch
from .minimization.iteration_controllers import (
IterationController, GradientNormController, DeltaEnergyController,
GradInfNormController)
GradInfNormController, AbsDeltaEnergyController)
from .minimization.minimizer import Minimizer
from .minimization.conjugate_gradient import ConjugateGradient
from .minimization.nonlinear_cg import NonlinearCG
......
......@@ -93,10 +93,10 @@ class LogRGSpace(StructuredDomain):
Returns
-------
LogRGSpace
The parter domain
The partner domain
"""
codomain_bindistances = 1./(self.bindistances*self.shape)
return LogRGSpace(self.shape, codomain_bindistances, self._t_0, True)
return LogRGSpace(self.shape, codomain_bindistances, self._t_0, not self.harmonic)
def get_k_length_array(self):
"""Generates array of distances to origin of the space.
......
......@@ -181,7 +181,7 @@ class RGSpace(StructuredDomain):
Returns
-------
RGSpace
The parter domain
The partner domain
"""
distances = 1. / (np.array(self.shape)*np.array(self.distances))
return RGSpace(self.shape, distances, not self.harmonic)
......
......@@ -70,6 +70,9 @@ def _full_implementation(op, domain_dtype, target_dtype, atol, rtol,
def _check_linearity(op, domain_dtype, atol, rtol):
needed_cap = op.TIMES
if (op.capability & needed_cap) != needed_cap:
return
fld1 = from_random("normal", op.domain, dtype=domain_dtype)
fld2 = from_random("normal", op.domain, dtype=domain_dtype)
alpha = np.random.random() # FIXME: this can break badly with MPI!
......@@ -121,6 +124,9 @@ def consistency_check(op, domain_dtype=np.float64, target_dtype=np.float64,
raise TypeError('This test tests only linear operators.')
_domain_check(op)
_check_linearity(op, domain_dtype, atol, rtol)
_check_linearity(op.adjoint, target_dtype, atol, rtol)
_check_linearity(op.inverse, target_dtype, atol, rtol)
_check_linearity(op.adjoint.inverse, domain_dtype, atol, rtol)
_full_implementation(op, domain_dtype, target_dtype, atol, rtol,
only_r_linear)
_full_implementation(op.adjoint, target_dtype, domain_dtype, atol, rtol,
......
......@@ -26,7 +26,7 @@ def nthreads():
def set_nthreads(nthr):
global _nthreads
_nthreads = nthr
_nthreads = int(nthr)
def fftn(a, axes=None):
......
......@@ -15,12 +15,13 @@
#
# NIFTy is being developed at the Max-Planck-Institut fuer Astrophysik.
import numpy as np
from ..domain_tuple import DomainTuple
from ..domains.rg_space import RGSpace
from ..domains.unstructured_domain import UnstructuredDomain
from ..operators.linear_operator import LinearOperator
from ..sugar import from_global_data, makeDomain
import numpy as np
class GridderMaker(object):
......
......@@ -74,27 +74,27 @@ class ConjugateGradient(Minimizer):
if previous_gamma == 0:
return energy, controller.CONVERGED
iter = 0
ii = 0
while True:
q = energy.apply_metric(d)
ddotq = d.vdot(q).real
if ddotq == 0.:
logger.error("Error: ConjugateGradient: ddotq==0.")
curv = d.vdot(q).real
if curv == 0.:
logger.error("Error: ConjugateGradient: curv==0.")
return energy, controller.ERROR
alpha = previous_gamma/ddotq
alpha = previous_gamma/curv
if alpha < 0:
logger.error("Error: ConjugateGradient: alpha<0.")
return energy, controller.ERROR
iter += 1
if iter < self._nreset:
ii += 1
if ii < self._nreset:
r = r - q*alpha
energy = energy.at_with_grad(energy.position - alpha*d, r)
else:
energy = energy.at(energy.position - alpha*d)
r = energy.gradient
iter = 0
ii = 0
s = r if preconditioner is None else preconditioner(r)
......
......@@ -18,8 +18,14 @@
import numpy as np
from ..logger import logger
from ..probing import approximation2endo
from ..sugar import makeOp
from .conjugate_gradient import ConjugateGradient
from .iteration_controllers import (AbsDeltaEnergyController,
GradientNormController)
from .line_search import LineSearch
from .minimizer import Minimizer
from .quadratic_energy import QuadraticEnergy
class DescentMinimizer(Minimizer):
......@@ -79,7 +85,8 @@ class DescentMinimizer(Minimizer):
# compute a step length that reduces energy.value sufficiently
new_energy, success = self.line_searcher.perform_line_search(
energy=energy, pk=self.get_descent_direction(energy),
energy=energy,
pk=self.get_descent_direction(energy, f_k_minus_1),
f_k_minus_1=f_k_minus_1)
if not success:
self.reset()
......@@ -103,7 +110,7 @@ class DescentMinimizer(Minimizer):
def reset(self):
pass
def get_descent_direction(self, energy):
def get_descent_direction(self, energy, old_value=None):
"""Calculates the next descent direction.
Parameters
......@@ -112,6 +119,10 @@ class DescentMinimizer(Minimizer):
An instance of the Energy class which shall be minimized. The
position of `energy` is used as the starting point of minimization.
old_value : float
if provided, this must be the value of the energy in the previous
step.
Returns
-------
Field
......@@ -127,7 +138,7 @@ class SteepestDescent(DescentMinimizer):
functional's gradient for minimization.
"""
def get_descent_direction(self, energy):
def get_descent_direction(self, energy, _=None):
return -energy.gradient
......@@ -144,7 +155,7 @@ class RelaxedNewton(DescentMinimizer):
super(RelaxedNewton, self).__init__(controller=controller,
line_searcher=line_searcher)
def get_descent_direction(self, energy):
def get_descent_direction(self, energy, _=None):
return -energy.metric.inverse_times(energy.gradient)
......@@ -154,44 +165,32 @@ class NewtonCG(DescentMinimizer):
Algorithm derived from SciPy sources.
"""
def __init__(self, controller, line_searcher=None):
def __init__(self, controller, napprox=0, line_searcher=None, name=None,
nreset=20, max_cg_iterations=200, energy_reduction_factor=0.1):
if line_searcher is None:
line_searcher = LineSearch(preferred_initial_step_size=1.)
super(NewtonCG, self).__init__(controller=controller,
line_searcher=line_searcher)
def get_descent_direction(self, energy):
float64eps = np.finfo(np.float64).eps
grad = energy.gradient
maggrad = abs(grad).sum()
termcond = np.min([0.5, np.sqrt(maggrad)]) * maggrad
xsupi = energy.position*0
ri = grad
psupi = -ri
dri0 = ri.vdot(ri)
i = 0
while True:
if abs(ri).sum() <= termcond:
return xsupi
Ap = energy.apply_metric(psupi)
# check curvature
curv = psupi.vdot(Ap)
if 0 <= curv <= 3*float64eps:
return xsupi
elif curv < 0:
return xsupi if i > 0 else (dri0/curv) * grad
alphai = dri0/curv
xsupi = xsupi + alphai*psupi
ri = ri + alphai*Ap
dri1 = ri.vdot(ri)
psupi = (dri1/dri0)*psupi - ri
i += 1
dri0 = dri1 # update numpy.dot(ri,ri) for next time.
# curvature keeps increasing, bail out
raise ValueError("Warning: CG iterations didn't converge. "
"The Hessian is not positive definite.")
self._napprox = napprox
self._name = name
self._nreset = nreset
self._max_cg_iterations = max_cg_iterations
self._alpha = energy_reduction_factor
def get_descent_direction(self, energy, old_value=None):
if old_value is None:
ic = GradientNormController(iteration_limit=5)
else:
ediff = self._alpha*(old_value-energy.value)
ic = AbsDeltaEnergyController(
ediff, iteration_limit=self._max_cg_iterations, name=self._name)
e = QuadraticEnergy(0*energy.position, energy.metric, energy.gradient)
p = None
if self._napprox > 1:
met = energy.metric
p = makeOp(approximation2endo(met, self._napprox)).inverse
e, conv = ConjugateGradient(ic, nreset=self._nreset)(e, p)
return -e.position
class L_BFGS(DescentMinimizer):
......@@ -210,7 +209,7 @@ class L_BFGS(DescentMinimizer):
self._s = [None]*self.max_history_length
self._y = [None]*self.max_history_length
def get_descent_direction(self, energy):
def get_descent_direction(self, energy, _=None):
x = energy.position
s = self._s
y = self._y
......@@ -273,7 +272,7 @@ class VL_BFGS(DescentMinimizer):
def reset(self):
self._information_store = None
def get_descent_direction(self, energy):
def get_descent_direction(self, energy, _=None):
x = energy.position
gradient = energy.gradient
# initialize the information store if it doesn't already exist
......
......@@ -15,9 +15,10 @@
#
# NIFTy is being developed at the Max-Planck-Institut fuer Astrophysik.
import numpy as np
from ..logger import logger
from ..utilities import NiftyMeta
import numpy as np
class IterationController(metaclass=NiftyMeta):
......@@ -266,3 +267,70 @@ class DeltaEnergyController(IterationController):
return self.CONVERGED
return self.CONTINUE
class AbsDeltaEnergyController(IterationController):
"""An iteration controller checking (mainly) the energy change from one
iteration to the next.
Parameters
----------
deltaE : float
If the difference between the last and current energies is below this
value, the convergence counter will be increased in this iteration.
convergence_level : int, default=1
The number which the convergence counter must reach before the
iteration is considered to be converged
iteration_limit : int, optional
The maximum number of iterations that will be carried out.
name : str, optional
if supplied, this string and some diagnostic information will be
printed after every iteration
"""
def __init__(self, deltaE, convergence_level=1, iteration_limit=None,
name=None):
self._deltaE = deltaE
self._convergence_level = convergence_level
self._iteration_limit = iteration_limit
self._name = name
def start(self, energy):
self._itcount = -1
self._ccount = 0
self._Eold = 0.
return self.check(energy)
def check(self, energy):
self._itcount += 1
inclvl = False
Eval = energy.value
diff = abs(self._Eold-Eval)
if self._itcount > 0:
if diff < self._deltaE:
inclvl = True
self._Eold = Eval
if inclvl:
self._ccount += 1
else:
self._ccount = max(0, self._ccount-1)
# report
if self._name is not None:
logger.info(
"{}: Iteration #{} energy={:.6E} diff={:.6E} crit={:.1E} clvl={}"
.format(self._name, self._itcount, Eval, diff, self._deltaE,
self._ccount))
# Are we done?
if self._iteration_limit is not None:
if self._itcount >= self._iteration_limit:
logger.warning(
"{} Iteration limit reached. Assuming convergence"
.format("" if self._name is None else self._name+": "))
return self.CONVERGED
if self._ccount >= self._convergence_level:
return self.CONVERGED
return self.CONTINUE
......@@ -18,6 +18,8 @@
from .. import utilities
from ..linearization import Linearization
from ..operators.energy_operators import StandardHamiltonian
from ..probing import approximation2endo
from ..sugar import makeOp
from .energy import Energy
......@@ -56,6 +58,9 @@ class MetricGaussianKL(Energy):
as they are equally legitimate samples. If true, the number of used
samples doubles. Mirroring samples stabilizes the KL estimate as
extreme sample variation is counterbalanced. Default is False.
napprox : int
Number of samples for computing preconditioner for sampling. No
preconditioning is done by default.
_samples : None
Only a parameter for internal uses. Typically not to be set by users.
......@@ -67,12 +72,13 @@ class MetricGaussianKL(Energy):
See also
--------
Metric Gaussian Variational Inference (FIXME in preparation)
`Metric Gaussian Variational Inference`, Jakob Knollmüller,
Torsten A. Enßlin, `<https://arxiv.org/abs/1901.11033>`_
"""
def __init__(self, mean, hamiltonian, n_samples, constants=[],
point_estimates=[], mirror_samples=False,
_samples=None):
napprox=0, _samples=None):
super(MetricGaussianKL, self).__init__(mean)
if not isinstance(hamiltonian, StandardHamiltonian):
......@@ -91,12 +97,15 @@ class MetricGaussianKL(Energy):
if _samples is None:
met = hamiltonian(Linearization.make_partial_var(
mean, point_estimates, True)).metric
if napprox > 1:
met._approximation = makeOp(approximation2endo(met, napprox))
_samples = tuple(met.draw_sample(from_inverse=True)
for _ in range(n_samples))
if mirror_samples:
_samples += tuple(-s for s in _samples)
self._samples = _samples
# FIXME Use simplify for constant input instead
self._lin = Linearization.make_partial_var(mean, constants)
v, g = None, None
for s in self._samples:
......@@ -110,11 +119,12 @@ class MetricGaussianKL(Energy):
self._val = v / len(self._samples)
self._grad = g * (1./len(self._samples))
self._metric = None
self._napprox = napprox
def at(self, position):
return MetricGaussianKL(position, self._hamiltonian, 0,
self._constants, self._point_estimates,
_samples=self._samples)
napprox=self._napprox, _samples=self._samples)
@property
def value(self):
......@@ -129,8 +139,12 @@ class MetricGaussianKL(Energy):
lin = self._lin.with_want_metric()
mymap = map(lambda v: self._hamiltonian(lin+v).metric,
self._samples)
self._metric = utilities.my_sum(mymap)
self._metric = self._metric.scale(1./len(self._samples))
self._unscaled_metric = utilities.my_sum(mymap)
self._metric = self._unscaled_metric.scale(1./len(self._samples))
def unscaled_metric(self):
self._get_metric()
return self._unscaled_metric, 1/len(self._samples)
def apply_metric(self, x):
self._get_metric()
......
......@@ -18,9 +18,12 @@
from .. import utilities
from ..linearization import Linearization
from ..operators.energy_operators import StandardHamiltonian
from ..operators.endomorphic_operator import EndomorphicOperator
from .energy import Energy
from mpi4py import MPI
import numpy as np
from ..probing import approximation2endo
from ..sugar import makeOp, full
from ..field import Field
from ..multi_field import MultiField
......@@ -56,10 +59,83 @@ def allreduce_sum_field(fld):
return MultiField(fld.domain, res)
class KLMetric(EndomorphicOperator):
def __init__(self, KL):
self._KL = KL
self._capability = self.TIMES | self.ADJOINT_TIMES
self._domain = KL.position.domain
def apply(self, x, mode):
self._check_input(x, mode)
return self._KL.apply_metric(x)
def draw_sample(self, from_inverse=False, dtype=np.float64):
return self._KL.metric_sample(from_inverse, dtype)
class MetricGaussianKL_MPI(Energy):
"""Provides the sampled Kullback-Leibler divergence between a distribution
and a Metric Gaussian.
A Metric Gaussian is used to approximate another probability distribution.
It is a Gaussian distribution that uses the Fisher information metric of
the other distribution at the location of its mean to approximate the
variance. In order to infer the mean, a stochastic estimate of the
Kullback-Leibler divergence is minimized. This estimate is obtained by
sampling the Metric Gaussian at the current mean. During minimization
these samples are kept constant; only the mean is updated. Due to the
typically nonlinear structure of the true distribution these samples have
to be updated eventually by intantiating `MetricGaussianKL` again. For the
true probability distribution the standard parametrization is assumed.
The samples of this class are distributed among MPI tasks.
Parameters
----------
mean : Field
Mean of the Gaussian probability distribution.
hamiltonian : StandardHamiltonian
Hamiltonian of the approximated probability distribution.
n_samples : integer
Number of samples used to stochastically estimate the KL.
constants : list
List of parameter keys that are kept constant during optimization.
Default is no constants.
point_estimates : list
List of parameter keys for which no samples are drawn, but that are
(possibly) optimized for, corresponding to point estimates of these.
Default is to draw samples for the complete domain.
mirror_samples : boolean
Whether the negative of the drawn samples are also used,
as they are equally legitimate samples. If true, the number of used
samples doubles. Mirroring samples stabilizes the KL estimate as
extreme sample variation is counterbalanced. Default is False.
napprox : int
Number of samples for computing preconditioner for sampling. No
preconditioning is done by default.
_samples : None
Only a parameter for internal uses. Typically not to be set by users.
seed_offset : int
A parameter with which one can controll from which seed the samples
are drawn. Per default, the seed is different for MPI tasks, but the
same every time this class is initialized.
Note
----
The two lists `constants` and `point_estimates` are independent from each
other. It is possible to sample along domains which are kept constant
during minimization and vice versa.
See also
--------
`Metric Gaussian Variational Inference`, Jakob Knollmüller,
Torsten A. Enßlin, `<https://arxiv.org/abs/1901.11033>`_
"""
def __init__(self, mean, hamiltonian, n_samples, constants=[],
point_estimates=[], mirror_samples=False,
_samples=None):
napprox=0, _samples=None, seed_offset=0):
super(MetricGaussianKL_MPI, self).__init__(mean)
if not isinstance(hamiltonian, StandardHamiltonian):
......@@ -76,18 +152,32 @@ class MetricGaussianKL_MPI(Energy):
self._hamiltonian = hamiltonian
if _samples is None:
lo, hi = _shareRange(n_samples, ntask, rank)
if mirror_samples:
lo, hi = _shareRange(n_samples*2, ntask, rank)
else:
lo, hi = _shareRange(n_samples, ntask, rank)
met = hamiltonian(Linearization.make_partial_var(
mean, point_estimates, True)).metric
if napprox > 1:
met._approximation = makeOp(approximation2endo(met, napprox))
_samples = []
for i in range(lo, hi):
np.random.seed(i)
_samples.append(met.draw_sample(from_inverse=True))
if mirror_samples:
np.random.seed(i//2+seed_offset)
if (i % 2) and (i-1 >= lo):
_samples.append(-_samples[-1])
else:
_samples.append(((i % 2)*2-1) *
met.draw_sample(from_inverse=True))
else:
np.random.seed(i)
_samples.append(met.draw_sample(from_inverse=True))
_samples = tuple(_samples)
if mirror_samples:
_samples += [-s for s in _samples]
n_samples *= 2
_samples = tuple(_samples)
self._samples = _samples
self._seed_offset = seed_offset
self._n_samples = n_samples
self._lin = Linearization.make_partial_var(mean, constants)
v, g = None, None
......@@ -111,7 +201,8 @@ class MetricGaussianKL_MPI(Energy):
def at(self, position):
return MetricGaussianKL_MPI(
position, self._hamiltonian, self._n_samples, self._constants,
self._point_estimates, _samples=self._samples)
self._point_estimates, _samples=self._samples,
seed_offset=self._seed_offset)
@property
def value(self):
......@@ -129,8 +220,8 @@ class MetricGaussianKL_MPI(Energy):
else:
mymap = map(lambda v: self._hamiltonian(lin+v).metric,
self._samples)
self._metric = utilities.my_sum(mymap)
self._metric = self._metric.scale(1./self._n_samples)
self.unscaled_metric = utilities.my_sum(mymap)
self._metric = self.unscaled_metric.scale(1./self._n_samples)
def apply_metric(self, x):
self._get_metric()
......@@ -138,12 +229,22 @@ class MetricGaussianKL_MPI(Energy):
@property
def metric(self):
if ntask > 1:
raise ValueError("not supported when MPI is active")
return self._metric
return KLMetric(self)
@property
def samples(self):
res = _comm.allgather(self._samples)
res = [item for sublist in res for item in sublist]
return res
def unscaled_metric_sample(self, from_inverse=False, dtype=np.float64):
if from_inverse:
raise NotImplementedError()
lin = self._lin.with_want_metric()
samp = full(self._hamiltonian.domain, 0.)
for v in self._samples:
samp = samp + self._hamiltonian(lin+v).metric.draw_sample(from_inverse=False, dtype=dtype)
return allreduce_sum_field(samp)
def metric_sample(self, from_inverse=False, dtype=np.float64):
return self.unscaled_metric_sample(from_inverse, dtype)/self._n_samples
......@@ -15,6 +15,8 @@
#
# NIFTy is being developed at the Max-Planck-Institut fuer Astrophysik.
import numpy as np
from ..multi_domain import MultiDomain
from ..multi_field import MultiField
from .endomorphic_operator import EndomorphicOperator
......@@ -28,7 +30,7 @@ class BlockDiagonalOperator(EndomorphicOperator):
Domain and target of the operator.
operators : dict
Dictionary with subdomain names as keys and :class:`LinearOperator` s
as items.
as items. Any missing item will be treated as unity operator.
"""
def __init__(self, domain, operators):
if not isinstance(domain, MultiDomain):
......@@ -42,15 +44,18 @@ class BlockDiagonalOperator(EndomorphicOperator):
def apply(self, x, mode):
self._check_input(x, mode)
val = tuple(op.apply(v, mode=mode) if op is not None else None
val = tuple(op.apply(v, mode=mode) if op is not None else v
for op, v in zip(self._ops, x.values()))
return MultiField(self._domain, val)
# def draw_sample(self, from_inverse=False, dtype=np.float64):
# dtype = MultiField.build_dtype(dtype, self._domain)
# val = tuple(op.draw_sample(from_inverse, dtype)
# for op in self._op)
# return MultiField(self._domain, val)
def draw_sample(self, from_inverse=False, dtype=np.float64):
from ..sugar import from_random
val = tuple(
op.draw_sample(from_inverse, dtype)
if op is not None
else from_random('normal', self._domain[key], dtype=dtype)
for op, key in zip(self._ops, self._domain.keys()))
return MultiField(self._domain, val)