stochastic_minimizer.py 2.74 KB
Newer Older
Philipp Arras's avatar
Philipp Arras committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
#
# Copyright(C) 2013-2021 Max-Planck-Society
#
# NIFTy is being developed at the Max-Planck-Institut fuer Astrophysik.

18
19
20
from .minimizer import Minimizer


Philipp Arras's avatar
Philipp Arras committed
21
class ADVIOptimizer(Minimizer):
Philipp Arras's avatar
Philipp Arras committed
22
23
    """Provide an implementation of an adaptive step-size sequence optimizer,
    following https://arxiv.org/abs/1603.00788.
Jakob Knollmüller's avatar
Jakob Knollmüller committed
24
25
26
27
28
29

    Parameters
    ----------
    steps: int
        The number of concecutive steps during one call of the optimizer.
    eta: positive float
Philipp Arras's avatar
Philipp Arras committed
30
31
        The scale of the step-size sequence. It might have to be adapted to the
        application to increase performance. Default: 1.
Jakob Knollmüller's avatar
Jakob Knollmüller committed
32
    alpha: float between 0 and 1
Philipp Arras's avatar
Philipp Arras committed
33
        The fraction of how much the current gradient impacts the momentum.
Jakob Knollmüller's avatar
Jakob Knollmüller committed
34
35
36
37
    tau: positive float
        This quantity prevents division by zero.
    epsilon: positive float
        A small value guarantees Robbins and Monro conditions.
Philipp Arras's avatar
Philipp Arras committed
38
    """
Jakob Knollmüller's avatar
Jakob Knollmüller committed
39
40

    def __init__(self, steps, eta=1, alpha=0.1, tau=1, epsilon=1e-16):
41
42
        self.alpha = alpha
        self.eta = eta
Philipp Arras's avatar
Philipp Arras committed
43
        self.tau = tau
44
45
46
47
48
49
        self.epsilon = epsilon
        self.counter = 1
        self.steps = steps
        self.s = None

    def _step(self, position, gradient):
Philipp Arras's avatar
Philipp Arras committed
50
51
52
53
54
55
        self.s = self.alpha * gradient ** 2 + (1 - self.alpha) * self.s
        self.rho = (
            self.eta
            * self.counter ** (-0.5 + self.epsilon)
            / (self.tau + (self.s).sqrt())
        )
56
57
58
59
60
        new_position = position - self.rho * gradient
        self.counter += 1
        return new_position

    def __call__(self, E):
Philipp Arras's avatar
Philipp Arras committed
61
        from ..minimization.parametric_gaussian_kl import ParametricGaussianKL
62
        if self.s is None:
Philipp Arras's avatar
Philipp Arras committed
63
64
65
            self.s = E.gradient ** 2
        # FIXME come up with somthing how to determine convergence
        convergence = 0
66
67
        for i in range(self.steps):
            x = self._step(E.position, E.gradient)
Philipp Arras's avatar
Philipp Arras committed
68
69
70
71
            # FIXME maybe some KL function for resample? Should make it more generic.
            E = ParametricGaussianKL.make(
                x, E._hamiltonian, E._variational_model, E._n_samples, E._mirror_samples
            )
72
73
74
75
76

        return E, convergence

    def reset(self):
        self.counter = 1
Philipp Arras's avatar
Philipp Arras committed
77
        self.s = None