diff --git a/ChangeLog.md b/ChangeLog.md index 5e89b500fd15cd945f3fa6e1fe26dd7c04d7b6c9..ae942d380db11f23183c812798e594436c7951cc 100644 --- a/ChangeLog.md +++ b/ChangeLog.md @@ -84,7 +84,7 @@ LikelihoodOperator A new subclass of `EnergyOperator` was introduced and all `EnergyOperator`s that are likelihoods are now `LikelihoodOperator`s. A `LikelihoodOperator` -has to implement the function `get_transformation`, which returns a +has to implement the function `get_transformation`, which returns a coordinate transformation in which the Fisher metric of the likelihood becomes the identity matrix. diff --git a/demos/getting_started_3.py b/demos/getting_started_3.py index f632026e84df8d840c5c0265a4f04e222e0fbb47..61c217e73b2de6ead7e1486782bbe789506db2a0 100644 --- a/demos/getting_started_3.py +++ b/demos/getting_started_3.py @@ -11,7 +11,7 @@ # You should have received a copy of the GNU General Public License # along with this program. If not, see <http://www.gnu.org/licenses/>. # -# Copyright(C) 2013-2020 Max-Planck-Society +# Copyright(C) 2013-2021 Max-Planck-Society # # NIFTy is being developed at the Max-Planck-Institut fuer Astrophysik. @@ -97,13 +97,13 @@ def main(): data = signal_response(mock_position) + N.draw_sample_with_dtype(dtype=np.float64) # Minimization parameters - ic_sampling = ift.AbsDeltaEnergyController( - deltaE=0.05, iteration_limit=100) - ic_newton = ift.AbsDeltaEnergyController( - name='Newton', deltaE=0.5, iteration_limit=35) + ic_sampling = ift.AbsDeltaEnergyController(name="Sampling (linear)", + deltaE=0.05, iteration_limit=100) + ic_newton = ift.AbsDeltaEnergyController(name='Newton', deltaE=0.5, + convergence_level=2, iteration_limit=35) minimizer = ift.NewtonCG(ic_newton) - ic_sampling_nl = ift.AbsDeltaEnergyController( - name='Sampling', deltaE=0.5, iteration_limit=15, convergence_level=2) + ic_sampling_nl = ift.AbsDeltaEnergyController(name='Sampling (nonlin)', + deltaE=0.5, iteration_limit=15, convergence_level=2) minimizer_sampling = ift.NewtonCG(ic_sampling_nl) # Set up likelihood and information Hamiltonian diff --git a/docs/source/ift.rst b/docs/source/ift.rst index 9fe49164d42845ed46925794316867b91cf8cb2e..eb533cac168f3fca8410f68c8621bbaa6a260f73 100644 --- a/docs/source/ift.rst +++ b/docs/source/ift.rst @@ -224,7 +224,7 @@ Thus, only the gradient of the KL is needed with respect to this, which can be e We stochastically estimate the KL-divergence and gradients with a set of samples drawn from the approximate posterior distribution. The particular structure of the covariance allows us to draw independent samples solving a certain system of equations. -This KL-divergence for MGVI is implemented by +This KL-divergence for MGVI is implemented by :func:`~nifty7.minimization.kl_energies.MetricGaussianKL` within NIFTy7. It should be noted that MGVI can typically only provide a lower bound on the variance. @@ -246,7 +246,7 @@ In general, such a transformation can only be constructed locally, i.E. in a nei .. math:: \mathcal{Q}_{\bar{\xi}}(\xi) = \left(g_{\bar{\xi}}^{-1} * \mathcal{Q}\right)(\xi) = \int \delta\left(\xi - g_{\bar{\xi}}^{-1}(y)\right) \ \mathcal{G}(y, 1) \ \mathcal{D}y \ , - + where :math:`\delta` denotes the kronecker-delta. The remaining task in geoVI is to obtain the optimal expansion point :math:`\bar{\xi}` suth that :math:`\mathcal{Q}_{\bar{\xi}}` matches the posterior as good as possible. Analogous to the MGVI algorithm, :math:`\bar{\xi}` is obtained by minimization of the KL-divergenge between :math:`\mathcal{P}` and :math:`\mathcal{Q}_{\bar{\xi}}` w.r.t. :math:`\bar{\xi}`. Furthermore the KL is represented as a stochastic estimate using a set of samples drawn from :math:`\mathcal{Q}_{\bar{\xi}}` which is implemented in NIFTy7 via :func:`~nifty7.minimization.kl_energies.GeoMetricKL`. diff --git a/src/minimization/kl_energies.py b/src/minimization/kl_energies.py index 5fc729ff3eb33069ef5d2502dfa8dbcc4d8a212b..ef67185a579ca4ffd6ec24a8f68143126dcf5614 100644 --- a/src/minimization/kl_energies.py +++ b/src/minimization/kl_energies.py @@ -11,7 +11,7 @@ # You should have received a copy of the GNU General Public License # along with this program. If not, see <http://www.gnu.org/licenses/>. # -# Copyright(C) 2013-2020 Max-Planck-Society +# Copyright(C) 2013-2021 Max-Planck-Society # Authors: Philipp Frank # # NIFTy is being developed at the Max-Planck-Institut fuer Astrophysik. @@ -21,6 +21,7 @@ from functools import reduce from .. import random from .. import utilities +from ..domain_tuple import DomainTuple from ..linearization import Linearization from ..multi_field import MultiField from ..operators.inversion_enabler import InversionEnabler @@ -38,17 +39,6 @@ from ..utilities import myassert from .energy import Energy from .descent_minimizers import DescentMinimizer, ConjugateGradient -def _is_prior_dtype_float(H): - real = True - dts = H._prior._met._dtype - if isinstance(dts, dict): - for k in dts.keys(): - if not np.issubdtype(dts[k], np.float): - real = False - else: - real = np.issubdtype(dts, np.float) - return real - def _get_lo_hi(comm, n_samples): ntask, rank, _ = utilities.get_MPI_params_from_comm(comm) @@ -109,7 +99,7 @@ class _SampledKLEnergy(Energy): self._comm = comm self._local_samples = local_samples self._nanisinf = bool(nanisinf) - + lin = Linearization.make_var(mean) v, g = [], [] for s in self._local_samples: @@ -183,12 +173,21 @@ class _SampledKLEnergy(Energy): class _GeoMetricSampler: - def __init__(self, position, H, minimizer, start_from_lin, + def __init__(self, position, H, minimizer, start_from_lin, n_samples, mirror_samples, napprox=0, want_error=False): if not isinstance(H, StandardHamiltonian): raise NotImplementedError - if not _is_prior_dtype_float(H): + + # Check domain dtype + dts = H._prior._met._dtype + if isinstance(H.domain, DomainTuple): + real = np.issubdtype(dts, np.float) + else: + real = all([np.issubdtype(dts[kk], np.float) for kk in dts.keys()]) + if not real: raise ValueError("_GeoMetricSampler only supports real valued latent DOFs.") + # /Check domain dtype + if isinstance(position, MultiField): self._position = position.extract(H.domain) else: @@ -199,19 +198,18 @@ class _GeoMetricSampler: dtype, f_lh = tr scale = ScalingOperator(f_lh.target, 1.) if isinstance(dtype, dict): - sampling = reduce((lambda a,b: a*b), + sampling = reduce((lambda a,b: a*b), [dtype[k] is not None for k in dtype.keys()]) else: sampling = dtype is not None scale = SamplingDtypeSetter(scale, dtype) if sampling else scale - + fl = f_lh(Linearization.make_var(self._position)) - self._g = (Adder(-self._position) + - fl.jac.adjoint@Adder(-fl.val)@f_lh) + self._g = (Adder(-self._position) + fl.jac.adjoint@Adder(-fl.val)@f_lh) self._likelihood = SandwichOperator.make(fl.jac, scale) self._prior = SamplingDtypeSetter(ScalingOperator(fl.domain,1.), np.float64) self._met = self._likelihood + self._prior - if napprox >=1: + if napprox >= 1: self._approximation = makeOp(approximation2endo(self._met, napprox)).inverse else: self._approximation = None @@ -378,7 +376,7 @@ def MetricGaussianKL(mean, hamiltonian, n_samples, mirror_samples, constants=[], local_samples, nanisinf) -def GeoMetricKL(mean, hamiltonian, n_samples, minimizer_samp, mirror_samples, +def GeoMetricKL(mean, hamiltonian, n_samples, minimizer_samp, mirror_samples, start_from_lin = True, constants=[], point_estimates=[], napprox=0, comm=None, nanisinf=True): """Provides the sampled Kullback-Leibler used in geometric Variational @@ -386,7 +384,7 @@ def GeoMetricKL(mean, hamiltonian, n_samples, minimizer_samp, mirror_samples, In geoVI a probability distribution is approximated with a standard normal distribution in the canonical coordinate system of the Riemannian manifold - associated with the metric of the other distribution. The coordinate + associated with the metric of the other distribution. The coordinate transformation is approximated by expanding around a point. In order to infer the expansion point, a stochastic estimate of the Kullback-Leibler divergence is minimized. This estimate is obtained by sampling from the @@ -414,7 +412,7 @@ def GeoMetricKL(mean, hamiltonian, n_samples, minimizer_samp, mirror_samples, sample variation is counterbalanced. start_from_lin : boolean Whether the non-linear sampling should start using the inverse - linearized transformation (i.E. the corresponding MGVI sample). + linearized transformation (i.E. the corresponding MGVI sample). If False, the minimization starts from the prior sample. Default is True. constants : list @@ -425,7 +423,7 @@ def GeoMetricKL(mean, hamiltonian, n_samples, minimizer_samp, mirror_samples, (possibly) optimized for, corresponding to point estimates of these. Default is to draw samples for the complete domain. napprox : int - Number of samples for computing preconditioner for linear sampling. + Number of samples for computing preconditioner for linear sampling. No preconditioning is done by default. comm : MPI communicator or None If not None, samples will be distributed as evenly as possible @@ -444,7 +442,7 @@ def GeoMetricKL(mean, hamiltonian, n_samples, minimizer_samp, mirror_samples, during minimization and vice versa. DomainTuples should never be created using the constructor, but rather via the factory function :attr:`make`! - + Note on MPI and mirror_samples: As in MGVI, mirroreing samples can help to stabilize the latent mean as it reduces sampling noise. But unlike MGVI a mirrored sample involves an diff --git a/src/operators/energy_operators.py b/src/operators/energy_operators.py index 624e1aa4475e978bea961646cb7a6eb288ec44a5..d2b081a864be1a89f8bef10ac62b238ab592f6cd 100644 --- a/src/operators/energy_operators.py +++ b/src/operators/energy_operators.py @@ -93,10 +93,11 @@ class LikelihoodOperator(EnergyOperator): :func:`~nifty7.operators.operator.Operator.get_transformation`. """ dtp, f = self.get_transformation() - ch = ScalingOperator(f.target, 1.) + ch = None if dtp is not None: - ch = SamplingDtypeSetter(ch, dtp) - return SandwichOperator.make(f(Linearization.make_var(x)).jac, ch) + ch = SamplingDtypeSetter(ScalingOperator(f.target, 1.), dtp) + bun = f(Linearization.make_var(x)).jac + return SandwichOperator.make(bun, ch) class Squared2NormOperator(EnergyOperator): @@ -176,13 +177,13 @@ class VariableCovarianceGaussianEnergy(LikelihoodOperator): use_full_fisher: boolean Whether or not the proper Fisher information metric should be used as - a `metric`. If False the same approximation used in + a `metric`. If False the same approximation used in `get_transformation` is used instead. Default is True """ - def __init__(self, domain, residual_key, inverse_covariance_key, sampling_dtype, - use_full_fisher = True): + def __init__(self, domain, residual_key, inverse_covariance_key, + sampling_dtype, use_full_fisher=True): self._kr = str(residual_key) self._ki = str(inverse_covariance_key) dom = DomainTuple.make(domain) @@ -190,7 +191,7 @@ class VariableCovarianceGaussianEnergy(LikelihoodOperator): self._dt = {self._kr: sampling_dtype, self._ki: np.float64} _check_sampling_dtype(self._domain, self._dt) self._cplx = _iscomplex(sampling_dtype) - self._use_fisher = use_full_fisher + self._use_full_fisher = use_full_fisher def apply(self, x): self._check_input(x) @@ -201,7 +202,7 @@ class VariableCovarianceGaussianEnergy(LikelihoodOperator): res = 0.5*(r.vdot(r*i) - i.ptw("log").sum()) if not x.want_metric: return res - if self._use_fisher: + if self._use_full_fisher: met = 1. if self._cplx else 0.5 met = MultiField.from_dict({self._kr: i.val, self._ki: met*i.val**(-2)}, domain=self._domain) @@ -231,7 +232,7 @@ class VariableCovarianceGaussianEnergy(LikelihoodOperator): return None, res def get_transformation(self): - """Note that for the metric of a `VariableCovarianceGaussianEnergy` no + """Note that for the metric of a `VariableCovarianceGaussianEnergy` no global transformation to Euclidean space exists. A local approximation ivoking the resudual is used instead. """ @@ -616,6 +617,3 @@ class AveragedEnergy(EnergyOperator): dtp, trafo = self._h.get_transformation() mymap = map(lambda v: trafo@Adder(v), self._res_samples) return dtp, utilities.my_sum(mymap)/np.sqrt(len(self._res_samples)) - - - diff --git a/src/operators/operator.py b/src/operators/operator.py index 757e9694a7c88428bc0838fd993a68f4afd62164..1e0dc1dc733c7e02e62f70317d1433b956f2d9ff 100644 --- a/src/operators/operator.py +++ b/src/operators/operator.py @@ -110,14 +110,14 @@ class Operator(metaclass=NiftyMeta): def get_transformation(self): """The coordinate transformation that maps into a coordinate system where the metric of a likelihood is the Euclidean metric. - This is `None`, except when the object an instance of + This is `None`, except when the object an instance of :class:`~nifty7.operators.energy_operators.LikelihoodOperator` or a (nested) sum thereof. - + Returns ------- np.dtype, or dict of np.dtype : The dtype(s) of the target space of the transformation. - + Operator : The transformation that maps from `domain` into the Euclidean target space. """ return None