descent_minimizers.py 15.1 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
Theo Steininger's avatar
Theo Steininger committed
13
#
14
# Copyright(C) 2013-2019 Max-Planck-Society
Theo Steininger's avatar
Theo Steininger committed
15
#
16
# NIFTy is being developed at the Max-Planck-Institut fuer Astrophysik.
17

18
import numpy as np
Philipp Arras's avatar
Philipp Arras committed
19

20
from ..logger import logger
Martin Reinecke's avatar
Martin Reinecke committed
21
22
23
24
25
from ..probing import approximation2endo
from ..sugar import makeOp
from .conjugate_gradient import ConjugateGradient
from .iteration_controllers import (AbsDeltaEnergyController,
                                    GradientNormController)
Martin Reinecke's avatar
Martin Reinecke committed
26
from .line_search import LineSearch
27
from .minimizer import Minimizer
Martin Reinecke's avatar
Martin Reinecke committed
28
from .quadratic_energy import QuadraticEnergy
29
30
31


class DescentMinimizer(Minimizer):
32
    """A base class used by gradient methods to find a local minimum.
33
34
35
36
37
38
39
40
41
42
43
44

    Descent minimization methods are used to find a local minimum of a scalar
    function by following a descent direction. This class implements the
    minimization procedure once a descent direction is known. The descent
    direction has to be implemented separately.

    Parameters
    ----------
    controller : IterationController
        Object that decides when to terminate the minimization.
    line_searcher : callable *optional*
        Function which infers the step size in the descent direction
Martin Reinecke's avatar
Martin Reinecke committed
45
        (default : LineSearch()).
46
47
    """

Martin Reinecke's avatar
Martin Reinecke committed
48
    def __init__(self, controller, line_searcher=LineSearch()):
49
50
51
52
        self._controller = controller
        self.line_searcher = line_searcher

    def __call__(self, energy):
53
        """Performs the minimization of the provided Energy functional.
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87

        Parameters
        ----------
        energy : Energy
           Energy object which provides value, gradient and metric at a
           specific position in parameter space.

        Returns
        -------
        Energy
            Latest `energy` of the minimization.
        int
            Can be controller.CONVERGED or controller.ERROR

        Notes
        -----
        The minimization is stopped if
            * the controller returns controller.CONVERGED or controller.ERROR,
            * a perfectly flat point is reached,
            * according to the line-search the minimum is found,
        """
        f_k_minus_1 = None
        controller = self._controller
        status = controller.start(energy)
        if status != controller.CONTINUE:
            return energy, status

        while True:
            # check if position is at a flat point
            if energy.gradient_norm == 0:
                return energy, controller.CONVERGED

            # compute a step length that reduces energy.value sufficiently
            new_energy, success = self.line_searcher.perform_line_search(
Martin Reinecke's avatar
Martin Reinecke committed
88
89
                energy=energy,
                pk=self.get_descent_direction(energy, f_k_minus_1),
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
                f_k_minus_1=f_k_minus_1)
            if not success:
                self.reset()

            f_k_minus_1 = energy.value

            if new_energy.value > energy.value:
                logger.error("Error: Energy has increased")
                return energy, controller.ERROR

            if new_energy.value == energy.value:
                logger.warning(
                    "Warning: Energy has not changed. Assuming convergence...")
                return new_energy, controller.CONVERGED

            energy = new_energy
            status = self._controller.check(energy)
            if status != controller.CONTINUE:
                return energy, status

    def reset(self):
        pass

Martin Reinecke's avatar
Martin Reinecke committed
113
    def get_descent_direction(self, energy, old_value=None):
114
        """Calculates the next descent direction.
115
116
117
118
119
120
121

        Parameters
        ----------
        energy : Energy
            An instance of the Energy class which shall be minimized. The
            position of `energy` is used as the starting point of minimization.

Martin Reinecke's avatar
Martin Reinecke committed
122
123
124
125
        old_value : float
            if provided, this must be the value of the energy in the previous
            step.

126
127
128
129
130
131
132
133
134
        Returns
        -------
        Field
           The descent direction.
        """
        raise NotImplementedError


class SteepestDescent(DescentMinimizer):
135
    """Implementation of the steepest descent minimization scheme.
136
137
138
139
140

    Also known as 'gradient descent'. This algorithm simply follows the
    functional's gradient for minimization.
    """

Martin Reinecke's avatar
Martin Reinecke committed
141
    def get_descent_direction(self, energy, _=None):
142
143
144
        return -energy.gradient


Martin Reinecke's avatar
Martin Reinecke committed
145
class RelaxedNewton(DescentMinimizer):
146
    """Calculates the descent direction according to a Newton scheme.
Martin Reinecke's avatar
Martin Reinecke committed
147
148
149
150
151
152
153

    The descent direction is determined by weighting the gradient at the
    current parameter position with the inverse local metric.
    """

    def __init__(self, controller, line_searcher=None):
        if line_searcher is None:
Martin Reinecke's avatar
Martin Reinecke committed
154
            line_searcher = LineSearch(preferred_initial_step_size=1.)
Martin Reinecke's avatar
Martin Reinecke committed
155
156
157
        super(RelaxedNewton, self).__init__(controller=controller,
                                            line_searcher=line_searcher)

Martin Reinecke's avatar
Martin Reinecke committed
158
    def get_descent_direction(self, energy, _=None):
Martin Reinecke's avatar
Martin Reinecke committed
159
160
161
        return -energy.metric.inverse_times(energy.gradient)


162
class NewtonCG(DescentMinimizer):
163
    """Calculates the descent direction according to a Newton-CG scheme.
164
165
166
167

    Algorithm derived from SciPy sources.
    """

Martin Reinecke's avatar
Martin Reinecke committed
168
    def __init__(self, controller, napprox=0, line_searcher=None, name=None,
169
                 nreset=20, max_cg_iterations=200, energy_reduction_factor=0.1):
170
        if line_searcher is None:
Martin Reinecke's avatar
Martin Reinecke committed
171
            line_searcher = LineSearch(preferred_initial_step_size=1.)
172
173
        super(NewtonCG, self).__init__(controller=controller,
                                       line_searcher=line_searcher)
Martin Reinecke's avatar
Martin Reinecke committed
174
175
176
        self._napprox = napprox
        self._name = name
        self._nreset = nreset
177
178
        self._max_cg_iterations = max_cg_iterations
        self._alpha = energy_reduction_factor
Martin Reinecke's avatar
Martin Reinecke committed
179
180
181
182
183

    def get_descent_direction(self, energy, old_value=None):
        if old_value is None:
            ic = GradientNormController(iteration_limit=5)
        else:
184
            ediff = self._alpha*(old_value-energy.value)
Martin Reinecke's avatar
Martin Reinecke committed
185
            ic = AbsDeltaEnergyController(
186
                ediff, iteration_limit=self._max_cg_iterations, name=self._name)
Martin Reinecke's avatar
Martin Reinecke committed
187
188
189
        e = QuadraticEnergy(0*energy.position, energy.metric, energy.gradient)
        p = None
        if self._napprox > 1:
Reimar H Leike's avatar
fixup    
Reimar H Leike committed
190
191
            met = energy.metric
            p = makeOp(approximation2endo(met, self._napprox)).inverse
Martin Reinecke's avatar
Martin Reinecke committed
192
193
        e, conv = ConjugateGradient(ic, nreset=self._nreset)(e, p)
        return -e.position
194
195


196
class L_BFGS(DescentMinimizer):
Martin Reinecke's avatar
Martin Reinecke committed
197
    def __init__(self, controller, line_searcher=LineSearch(),
198
199
200
201
202
203
204
205
206
207
208
209
210
211
                 max_history_length=5):
        super(L_BFGS, self).__init__(controller=controller,
                                     line_searcher=line_searcher)
        self.max_history_length = max_history_length

    def __call__(self, energy):
        self.reset()
        return super(L_BFGS, self).__call__(energy)

    def reset(self):
        self._k = 0
        self._s = [None]*self.max_history_length
        self._y = [None]*self.max_history_length

Martin Reinecke's avatar
Martin Reinecke committed
212
    def get_descent_direction(self, energy, _=None):
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
        x = energy.position
        s = self._s
        y = self._y
        k = self._k
        maxhist = self.max_history_length
        gradient = energy.gradient

        nhist = min(k, maxhist)
        alpha = [None]*maxhist
        p = -gradient
        if k > 0:
            idx = (k-1) % maxhist
            s[idx] = x-self._lastx
            y[idx] = gradient-self._lastgrad
        if nhist > 0:
            for i in range(k-1, k-nhist-1, -1):
                idx = i % maxhist
                alpha[idx] = s[idx].vdot(p)/s[idx].vdot(y[idx])
                p = p - alpha[idx]*y[idx]
            idx = (k-1) % maxhist
            fact = s[idx].vdot(y[idx]) / y[idx].vdot(y[idx])
            if fact <= 0.:
                logger.error("L-BFGS curvature not positive definite!")
            p = p*fact
            for i in range(k-nhist, k):
                idx = i % maxhist
                beta = y[idx].vdot(p) / s[idx].vdot(y[idx])
                p = p + (alpha[idx]-beta)*s[idx]
        self._lastx = x
        self._lastgrad = gradient
        self._k += 1
        return p
Theo Steininger's avatar
Theo Steininger committed
245
246


247
class VL_BFGS(DescentMinimizer):
Martin Reinecke's avatar
Martin Reinecke committed
248
249
250
251
252
253
254
255
256
257
258
259
260
261
    """Implementation of the Vector-free L-BFGS minimization scheme.

    Find the descent direction by using the inverse Hessian.
    Instead of storing the whole matrix, it stores only the last few
    updates, which are used to do operations requiring the inverse
    Hessian product. The updates are represented in a new basis to optimize
    the algorithm.

    References
    ----------
    W. Chen, Z. Wang, J. Zhou, "Large-scale L-BFGS using MapReduce", 2014,
    Microsoft
    """

Martin Reinecke's avatar
Martin Reinecke committed
262
    def __init__(self, controller, line_searcher=LineSearch(),
Martin Reinecke's avatar
Martin Reinecke committed
263
                 max_history_length=5):
Martin Reinecke's avatar
cleanup    
Martin Reinecke committed
264
265
        super(VL_BFGS, self).__init__(controller=controller,
                                      line_searcher=line_searcher)
266
267
        self.max_history_length = max_history_length

268
    def __call__(self, energy):
269
        self._information_store = None
270
        return super(VL_BFGS, self).__call__(energy)
271

Martin Reinecke's avatar
stage 1    
Martin Reinecke committed
272
273
274
    def reset(self):
        self._information_store = None

Martin Reinecke's avatar
Martin Reinecke committed
275
    def get_descent_direction(self, energy, _=None):
276
277
        x = energy.position
        gradient = energy.gradient
278
279
280
281
        # initialize the information store if it doesn't already exist
        try:
            self._information_store.add_new_point(x, gradient)
        except AttributeError:
Martin Reinecke's avatar
Martin Reinecke committed
282
283
            self._information_store = _InformationStore(
                self.max_history_length, x0=x, gradient=gradient)
284
285
286
287

        b = self._information_store.b
        delta = self._information_store.delta

288
        descent_direction = delta[0] * b[0]
Martin Reinecke's avatar
Martin Reinecke committed
289
        for i in range(1, len(delta)):
290
            descent_direction = descent_direction + delta[i]*b[i]
291

292
        return descent_direction
Theo Steininger's avatar
Theo Steininger committed
293
294


Martin Reinecke's avatar
Martin Reinecke committed
295
class _InformationStore(object):
Matevz, Sraml (sraml)'s avatar
Matevz, Sraml (sraml) committed
296
    """Class for storing a list of past updates.
297

Matevz, Sraml (sraml)'s avatar
Matevz, Sraml (sraml) committed
298
299
    Parameters
    ----------
Martin Reinecke's avatar
Martin Reinecke committed
300
    max_history_length : int
Matevz, Sraml (sraml)'s avatar
Matevz, Sraml (sraml) committed
301
302
303
304
305
        Maximum number of stored past updates.
    x0 : Field
        Initial position in variable space.
    gradient : Field
        Gradient at position x0.
306

Matevz, Sraml (sraml)'s avatar
Matevz, Sraml (sraml) committed
307
308
    Attributes
    ----------
Martin Reinecke's avatar
Martin Reinecke committed
309
    max_history_length : int
Matevz, Sraml (sraml)'s avatar
Matevz, Sraml (sraml) committed
310
311
        Maximum number of stored past updates.
    s : List
Martin Reinecke's avatar
Martin Reinecke committed
312
        Circular buffer of past position differences, which are Fields.
Matevz, Sraml (sraml)'s avatar
Matevz, Sraml (sraml) committed
313
    y : List
Martin Reinecke's avatar
Martin Reinecke committed
314
        Circular buffer of past gradient differences, which are Fields.
Matevz, Sraml (sraml)'s avatar
Matevz, Sraml (sraml) committed
315
    last_x : Field
Martin Reinecke's avatar
tweaks    
Martin Reinecke committed
316
        Latest position in variable space.
Matevz, Sraml (sraml)'s avatar
Matevz, Sraml (sraml) committed
317
    last_gradient : Field
Martin Reinecke's avatar
tweaks    
Martin Reinecke committed
318
        Gradient at latest position.
Martin Reinecke's avatar
Martin Reinecke committed
319
    k : int
Martin Reinecke's avatar
tweaks    
Martin Reinecke committed
320
        Number of updates that have taken place
Martin Reinecke's avatar
Martin Reinecke committed
321
    ss : numpy.ndarray
Martin Reinecke's avatar
Martin Reinecke committed
322
        2D circular buffer of scalar products between different elements of s.
Martin Reinecke's avatar
Martin Reinecke committed
323
    sy : numpy.ndarray
Martin Reinecke's avatar
Martin Reinecke committed
324
        2D circular buffer of scalar products between elements of s and y.
Martin Reinecke's avatar
Martin Reinecke committed
325
    yy : numpy.ndarray
Martin Reinecke's avatar
Martin Reinecke committed
326
        2D circular buffer of scalar products between different elements of y.
Matevz, Sraml (sraml)'s avatar
Matevz, Sraml (sraml) committed
327
    """
Philipp Arras's avatar
Philipp Arras committed
328

329
330
    def __init__(self, max_history_length, x0, gradient):
        self.max_history_length = max_history_length
331
332
        self.s = [None]*max_history_length
        self.y = [None]*max_history_length
333
334
        self.last_x = x0
        self.last_gradient = gradient
Theo Steininger's avatar
Theo Steininger committed
335
        self.k = 0
336

Martin Reinecke's avatar
Martin Reinecke committed
337
        mmax = max_history_length
Martin Reinecke's avatar
Martin Reinecke committed
338
339
340
        self.ss = np.empty((mmax, mmax), dtype=np.float64)
        self.sy = np.empty((mmax, mmax), dtype=np.float64)
        self.yy = np.empty((mmax, mmax), dtype=np.float64)
341
342
343

    @property
    def history_length(self):
Martin Reinecke's avatar
Martin Reinecke committed
344
        """Returns the number of currently stored updates."""
345
346
347
348
        return min(self.k, self.max_history_length)

    @property
    def b(self):
349
350
        """Combines s, y and gradient to form the new base vectors b.

Matevz, Sraml (sraml)'s avatar
Matevz, Sraml (sraml) committed
351
352
        Returns
        -------
Martin Reinecke's avatar
Martin Reinecke committed
353
        List
Matevz, Sraml (sraml)'s avatar
Matevz, Sraml (sraml) committed
354
355
            List of new basis vectors.
        """
356
357
        result = []
        m = self.history_length
Martin Reinecke's avatar
Martin Reinecke committed
358
        mmax = self.max_history_length
359

Martin Reinecke's avatar
Martin Reinecke committed
360
        for i in range(m):
Martin Reinecke's avatar
Martin Reinecke committed
361
            result.append(self.s[(self.k-m+i) % mmax])
362

Martin Reinecke's avatar
Martin Reinecke committed
363
        for i in range(m):
Martin Reinecke's avatar
Martin Reinecke committed
364
            result.append(self.y[(self.k-m+i) % mmax])
365
366
367
368
369
370
371

        result.append(self.last_gradient)

        return result

    @property
    def b_dot_b(self):
Matevz, Sraml (sraml)'s avatar
Matevz, Sraml (sraml) committed
372
        """Generates the (2m+1) * (2m+1) scalar matrix.
373

Matevz, Sraml (sraml)'s avatar
Matevz, Sraml (sraml) committed
374
        The i,j-th element of the matrix is a scalar product between the i-th
375
376
        and j-th base vector.

Matevz, Sraml (sraml)'s avatar
Matevz, Sraml (sraml) committed
377
378
        Returns
        -------
Martin Reinecke's avatar
Martin Reinecke committed
379
        numpy.ndarray
Matevz, Sraml (sraml)'s avatar
Matevz, Sraml (sraml) committed
380
381
            Scalar matrix.
        """
382
        m = self.history_length
Martin Reinecke's avatar
Martin Reinecke committed
383
        mmax = self.max_history_length
384
385
386
        k = self.k
        result = np.empty((2*m+1, 2*m+1), dtype=np.float)

Martin Reinecke's avatar
tweaks    
Martin Reinecke committed
387
        # update the stores
Martin Reinecke's avatar
Martin Reinecke committed
388
        k1 = (k-1) % mmax
Martin Reinecke's avatar
Martin Reinecke committed
389
        for i in range(m):
Martin Reinecke's avatar
Martin Reinecke committed
390
            kmi = (k-m+i) % mmax
Martin Reinecke's avatar
Martin Reinecke committed
391
392
393
            self.ss[kmi, k1] = self.ss[k1, kmi] = self.s[kmi].vdot(self.s[k1])
            self.yy[kmi, k1] = self.yy[k1, kmi] = self.y[kmi].vdot(self.y[k1])
            self.sy[kmi, k1] = self.s[kmi].vdot(self.y[k1])
Martin Reinecke's avatar
Martin Reinecke committed
394
        for j in range(m-1):
Martin Reinecke's avatar
Martin Reinecke committed
395
396
            kmj = (k-m+j) % mmax
            self.sy[k1, kmj] = self.s[k1].vdot(self.y[kmj])
Martin Reinecke's avatar
tweaks    
Martin Reinecke committed
397

Martin Reinecke's avatar
Martin Reinecke committed
398
        for i in range(m):
Martin Reinecke's avatar
Martin Reinecke committed
399
            kmi = (k-m+i) % mmax
Martin Reinecke's avatar
Martin Reinecke committed
400
            for j in range(m):
Martin Reinecke's avatar
Martin Reinecke committed
401
                kmj = (k-m+j) % mmax
Martin Reinecke's avatar
Martin Reinecke committed
402
403
404
                result[i, j] = self.ss[kmi, kmj]
                result[i, m+j] = result[m+j, i] = self.sy[kmi, kmj]
                result[m+i, m+j] = self.yy[kmi, kmj]
405

406
            sgrad_i = self.s[kmi].vdot(self.last_gradient)
Martin Reinecke's avatar
tweaks    
Martin Reinecke committed
407
            result[2*m, i] = result[i, 2*m] = sgrad_i
408

Martin Reinecke's avatar
fix    
Martin Reinecke committed
409
            ygrad_i = self.y[kmi].vdot(self.last_gradient)
Martin Reinecke's avatar
tweaks    
Martin Reinecke committed
410
            result[2*m, m+i] = result[m+i, 2*m] = ygrad_i
411

Martin Reinecke's avatar
tweaks    
Martin Reinecke committed
412
        result[2*m, 2*m] = self.last_gradient.norm()
413
        return result
Theo Steininger's avatar
Theo Steininger committed
414
415

    @property
416
    def delta(self):
Matevz, Sraml (sraml)'s avatar
Matevz, Sraml (sraml) committed
417
        """Calculates the new scalar coefficients (deltas).
418

Matevz, Sraml (sraml)'s avatar
Matevz, Sraml (sraml) committed
419
420
        Returns
        -------
Martin Reinecke's avatar
Martin Reinecke committed
421
        List
Matevz, Sraml (sraml)'s avatar
Matevz, Sraml (sraml) committed
422
423
            List of the new scalar coefficients (deltas).
        """
424
425
426
427
428
429
430
431
        m = self.history_length
        b_dot_b = self.b_dot_b

        delta = np.zeros(2*m+1, dtype=np.float)
        delta[2*m] = -1

        alpha = np.empty(m, dtype=np.float)

Martin Reinecke's avatar
Martin Reinecke committed
432
433
        for j in range(m-1, -1, -1):
            delta_b_b = sum([delta[l] * b_dot_b[l, j] for l in range(2*m+1)])
434
435
436
            alpha[j] = delta_b_b/b_dot_b[j, m+j]
            delta[m+j] -= alpha[j]

Martin Reinecke's avatar
Martin Reinecke committed
437
        for i in range(2*m+1):
438
439
            delta[i] *= b_dot_b[m-1, 2*m-1]/b_dot_b[2*m-1, 2*m-1]

Martin Reinecke's avatar
Martin Reinecke committed
440
        for j in range(m):
Martin Reinecke's avatar
Martin Reinecke committed
441
            delta_b_b = sum([delta[l]*b_dot_b[m+j, l] for l in range(2*m+1)])
442
443
444
445
446
            beta = delta_b_b/b_dot_b[j, m+j]
            delta[j] += (alpha[j] - beta)

        return delta

Theo Steininger's avatar
Theo Steininger committed
447
    def add_new_point(self, x, gradient):
Matevz, Sraml (sraml)'s avatar
Matevz, Sraml (sraml) committed
448
449
        """Updates the s list and y list.

Martin Reinecke's avatar
Martin Reinecke committed
450
451
        Calculates the new position and gradient differences and enters them
        into the respective list.
Matevz, Sraml (sraml)'s avatar
Matevz, Sraml (sraml) committed
452
        """
Martin Reinecke's avatar
Martin Reinecke committed
453
454
455
        mmax = self.max_history_length
        self.s[self.k % mmax] = x - self.last_x
        self.y[self.k % mmax] = gradient - self.last_gradient
Theo Steininger's avatar
Theo Steininger committed
456

457
458
        self.last_x = x
        self.last_gradient = gradient
Theo Steininger's avatar
Theo Steininger committed
459

Martin Reinecke's avatar
fixes    
Martin Reinecke committed
460
        self.k += 1