line_search.py 15.3 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
Theo Steininger's avatar
Theo Steininger committed
13
#
14
# Copyright(C) 2013-2019 Max-Planck-Society
Theo Steininger's avatar
Theo Steininger committed
15
#
16
# NIFTy is being developed at the Max-Planck-Institut fuer Astrophysik.
17

Martin Reinecke's avatar
Martin Reinecke committed
18
from ..utilities import NiftyMeta
Martin Reinecke's avatar
Martin Reinecke committed
19
20
21
22
23
from ..logger import logger
import numpy as np


class LineEnergy(object):
24
    """Evaluates an underlying Energy along a certain line direction.
Martin Reinecke's avatar
Martin Reinecke committed
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62

    Given an Energy class and a line direction, its position is parametrized by
    a scalar step size along the descent direction relative to a zero point.

    Parameters
    ----------
    line_position : float
        Defines the full spatial position of this energy via
        self.energy.position = zero_point + line_position*line_direction
    energy : Energy
        The Energy object which will be evaluated along the given direction.
    line_direction : Field
        Direction used for line evaluation. Does not have to be normalized.
    offset :  float *optional*
        Indirectly defines the zero point of the line via the equation
        energy.position = zero_point + offset*line_direction
        (default : 0.).

    Notes
    -----
    The LineEnergy is used in minimization schemes in order perform line
    searches. It describes an underlying Energy which is restricted along one
    direction, only requiring the step size parameter to determine a new
    position.
    """

    def __init__(self, line_position, energy, line_direction, offset=0.):
        self._line_position = float(line_position)
        self._line_direction = line_direction

        if self._line_position == float(offset):
            self._energy = energy
        else:
            pos = energy.position \
                + (self._line_position-float(offset))*self._line_direction
            self._energy = energy.at(position=pos)

    def at(self, line_position):
63
        """Returns LineEnergy at new position, memorizing the zero point.
Martin Reinecke's avatar
Martin Reinecke committed
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103

        Parameters
        ----------
        line_position : float
            Parameter for the new position on the line direction.

        Returns
        -------
            LineEnergy object at new position with same zero point as `self`.

        """

        return LineEnergy(line_position, self._energy, self._line_direction,
                          offset=self._line_position)

    @property
    def energy(self):
        """
        Energy : The underlying Energy object
        """
        return self._energy

    @property
    def value(self):
        """
        float : The value of the energy functional at given `position`.
        """
        return self._energy.value

    @property
    def directional_derivative(self):
        """
        float : The directional derivative at the given `position`.
        """
        res = self._energy.gradient.vdot(self._line_direction)
        if abs(res.imag) / max(abs(res.real), 1.) > 1e-12:
            from ..logger import logger
            logger.warning("directional derivative has non-negligible "
                           "imaginary part: {}".format(res))
        return res.real
104

105

Martin Reinecke's avatar
Martin Reinecke committed
106
class LineSearch(metaclass=NiftyMeta):
Martin Reinecke's avatar
Martin Reinecke committed
107
108
109
110
111
112
113
114
    """Class for finding a step size that satisfies the strong Wolfe
    conditions.

    Algorithm contains two stages. It begins with a trial step length and
    keeps increasing it until it finds an acceptable step length or an
    interval. If it does not satisfy the Wolfe conditions, it performs the Zoom
    algorithm (second stage). By interpolating it decreases the size of the
    interval until an acceptable step length is found.
115

Martin Reinecke's avatar
Martin Reinecke committed
116
117
118
119
    Parameters
    ----------
    preferred_initial_step_size : float, optional
        Newton-based methods should intialize this to 1.
Martin Reinecke's avatar
Martin Reinecke committed
120
    c1 : float
Philipp Arras's avatar
Philipp Arras committed
121
        Parameter for Armijo condition rule. Default: 1e-4.
Martin Reinecke's avatar
Martin Reinecke committed
122
    c2 : float
Philipp Arras's avatar
Philipp Arras committed
123
        Parameter for curvature condition rule. Default: 0.9.
Martin Reinecke's avatar
Martin Reinecke committed
124
125
    max_step_size : float
        Maximum step allowed in to be made in the descent direction.
Philipp Arras's avatar
Philipp Arras committed
126
        Default: 1e30.
Martin Reinecke's avatar
Martin Reinecke committed
127
128
    max_iterations : int, optional
        Maximum number of iterations performed by the line search algorithm.
Philipp Arras's avatar
Philipp Arras committed
129
        Default: 100.
Martin Reinecke's avatar
Martin Reinecke committed
130
131
    max_zoom_iterations : int, optional
        Maximum number of iterations performed by the zoom algorithm.
Philipp Arras's avatar
Philipp Arras committed
132
        Default: 100.
133
    """
134

Martin Reinecke's avatar
Martin Reinecke committed
135
136
137
138
    def __init__(self, preferred_initial_step_size=None, c1=1e-4, c2=0.9,
                 max_step_size=1e30, max_iterations=100,
                 max_zoom_iterations=100):

Martin Reinecke's avatar
Martin Reinecke committed
139
        self.preferred_initial_step_size = preferred_initial_step_size
Martin Reinecke's avatar
Martin Reinecke committed
140
141
142
143
144
        self.c1 = np.float(c1)
        self.c2 = np.float(c2)
        self.max_step_size = max_step_size
        self.max_iterations = int(max_iterations)
        self.max_zoom_iterations = int(max_zoom_iterations)
145

146
    def perform_line_search(self, energy, pk, f_k_minus_1=None):
Martin Reinecke's avatar
Martin Reinecke committed
147
        """Performs the first stage of the algorithm.
Martin Reinecke's avatar
Martin Reinecke committed
148

Martin Reinecke's avatar
Martin Reinecke committed
149
150
151
        It starts with a trial step size and it keeps increasing it until it
        satisfies the strong Wolf conditions. It also performs the descent and
        returns the optimal step length and the new energy.
Martin Reinecke's avatar
Martin Reinecke committed
152
153
154
155
156
157
158
159
160
161

        Parameters
        ----------
        energy : Energy
            Energy object from which we will calculate the energy and the
            gradient at a specific point.
        pk : Field
            Vector pointing into the search direction.
        f_k_minus_1 : float, optional
            Value of the fuction (which is being minimized) at the k-1
Philipp Arras's avatar
Philipp Arras committed
162
            iteration of the line search procedure. Default: None.
Martin Reinecke's avatar
Martin Reinecke committed
163
164
165
166
167

        Returns
        -------
        Energy
            The new Energy object on the new position.
Martin Reinecke's avatar
stage 1    
Martin Reinecke committed
168
169
        bool
            whether the line search was considered successful or not
Martin Reinecke's avatar
Martin Reinecke committed
170
        """
Martin Reinecke's avatar
Martin Reinecke committed
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
        le_0 = LineEnergy(0., energy, pk, 0.)

        maxstepsize = energy.longest_step(pk)
        if maxstepsize is None:
            maxstepsize = self.max_step_size
        maxstepsize = min(maxstepsize, self.max_step_size)

        # initialize the zero phis
        old_phi_0 = f_k_minus_1
        phi_0 = le_0.value
        phiprime_0 = le_0.directional_derivative
        if phiprime_0 == 0:
            logger.warning(
                "Directional derivative is zero; assuming convergence")
            return energy, False
        if phiprime_0 > 0:
            logger.error("Error: search direction is not a descent direction")
            return energy, False

        # set alphas
        alpha0 = 0.
        phi_alpha0 = phi_0
        phiprime_alpha0 = phiprime_0

        if self.preferred_initial_step_size is not None:
            alpha1 = self.preferred_initial_step_size
        elif old_phi_0 is not None:
            alpha1 = min(1.0, 1.01*2*(phi_0 - old_phi_0)/phiprime_0)
            if alpha1 < 0:
                alpha1 = 1.0
        else:
            alpha1 = 1.0/pk.norm()
        alpha1 = min(alpha1, 0.99*maxstepsize)

        # start the minimization loop
        iteration_number = 0
        while iteration_number < self.max_iterations:
            iteration_number += 1
            if alpha1 == 0:
                return le_0.energy, False

            le_alpha1 = le_0.at(alpha1)
            phi_alpha1 = le_alpha1.value

            if (phi_alpha1 > phi_0 + self.c1*alpha1*phiprime_0) or \
               ((phi_alpha1 >= phi_alpha0) and (iteration_number > 1)):
                return self._zoom(alpha0, alpha1, phi_0, phiprime_0,
                                  phi_alpha0, phiprime_alpha0, phi_alpha1,
                                  le_0)

            phiprime_alpha1 = le_alpha1.directional_derivative
            if abs(phiprime_alpha1) <= -self.c2*phiprime_0:
                return le_alpha1.energy, True

            if phiprime_alpha1 >= 0:
                return self._zoom(alpha1, alpha0, phi_0, phiprime_0,
                                  phi_alpha1, phiprime_alpha1, phi_alpha0,
                                  le_0)

            # update alphas
            alpha0, alpha1 = alpha1, min(2*alpha1, maxstepsize)
            if alpha1 == maxstepsize:
                logger.warning("max step size reached")
                return le_alpha1.energy, False

            phi_alpha0 = phi_alpha1
            phiprime_alpha0 = phiprime_alpha1

        logger.warning("max iterations reached")
        return le_alpha1.energy, False

    def _zoom(self, alpha_lo, alpha_hi, phi_0, phiprime_0,
              phi_lo, phiprime_lo, phi_hi, le_0):
        """Performs the second stage of the line search algorithm.

        If the first stage was not successful then the Zoom algorithm tries to
        find a suitable step length by using bisection, quadratic, cubic
        interpolation.

        Parameters
        ----------
        alpha_lo : float
            A boundary for the step length interval.
            Fulfills Wolfe condition 1.
        alpha_hi : float
            The other boundary for the step length interval.
        phi_0 : float
            Value of the energy at the starting point of the line search
            algorithm.
        phiprime_0 : float
            directional derivative at the starting point of the line search
            algorithm.
        phi_lo : float
            Value of the energy if we perform a step of length alpha_lo in
            descent direction.
        phiprime_lo : float
            directional derivative at the new position if we perform a step of
            length alpha_lo in descent direction.
        phi_hi : float
            Value of the energy if we perform a step of length alpha_hi in
            descent direction.

        Returns
        -------
        Energy
            The new Energy object on the new position.
        """
        cubic_delta = 0.2  # cubic interpolant checks
        quad_delta = 0.1  # quadratic interpolant checks
        alpha_recent = None
        phi_recent = None

        if phi_lo > phi_0 + self.c1*alpha_lo*phiprime_0:
            raise ValueError("inconsistent data")
        if phiprime_lo*(alpha_hi-alpha_lo) >= 0.:
            raise ValueError("inconsistent data")
        for i in range(self.max_zoom_iterations):
            # assert phi_lo <= phi_0 + self.c1*alpha_lo*phiprime_0
            # assert phiprime_lo*(alpha_hi-alpha_lo)<0.
            delta_alpha = alpha_hi - alpha_lo
            a, b = min(alpha_lo, alpha_hi), max(alpha_lo, alpha_hi)

            # Try cubic interpolation
            if i > 0:
                cubic_check = cubic_delta * delta_alpha
                alpha_j = self._cubicmin(alpha_lo, phi_lo, phiprime_lo,
                                         alpha_hi, phi_hi,
                                         alpha_recent, phi_recent)
            # If cubic was not successful or not available, try quadratic
            if (i == 0) or (alpha_j is None) or (alpha_j > b - cubic_check) or\
               (alpha_j < a + cubic_check):
                quad_check = quad_delta * delta_alpha
                alpha_j = self._quadmin(alpha_lo, phi_lo, phiprime_lo,
                                        alpha_hi, phi_hi)
                # If quadratic was not successful, try bisection
                if (alpha_j is None) or (alpha_j > b - quad_check) or \
                   (alpha_j < a + quad_check):
                    alpha_j = alpha_lo + 0.5*delta_alpha

            # Check if the current value of alpha_j is already sufficient
            le_alphaj = le_0.at(alpha_j)
            phi_alphaj = le_alphaj.value

            # If the first Wolfe condition is not met replace alpha_hi
            # by alpha_j
            if (phi_alphaj > phi_0 + self.c1*alpha_j*phiprime_0) or \
               (phi_alphaj >= phi_lo):
                alpha_recent, phi_recent = alpha_hi, phi_hi
                alpha_hi, phi_hi = alpha_j, phi_alphaj
            else:
                phiprime_alphaj = le_alphaj.directional_derivative
                # If the second Wolfe condition is met, return the result
                if abs(phiprime_alphaj) <= -self.c2*phiprime_0:
                    return le_alphaj.energy, True
                # If not, check the sign of the slope
                if phiprime_alphaj*delta_alpha >= 0:
                    alpha_recent, phi_recent = alpha_hi, phi_hi
                    alpha_hi, phi_hi = alpha_lo, phi_lo
                else:
                    alpha_recent, phi_recent = alpha_lo, phi_lo
                # Replace alpha_lo by alpha_j
                (alpha_lo, phi_lo, phiprime_lo) = (alpha_j, phi_alphaj,
                                                   phiprime_alphaj)

        else:
            logger.warning(
                "The line search algorithm (zoom) did not converge.")
            return le_alphaj.energy, False

    def _cubicmin(self, a, fa, fpa, b, fb, c, fc):
        """Estimating the minimum with cubic interpolation.

        Finds the minimizer for a cubic polynomial that goes through the
        points (a,a), (b,fb), and (c,fc) with derivative at point a of fpa.
        If no minimizer can be found return None

        Parameters
        ----------
        a, fa, fpa : float
            abscissa, function value and derivative at first point
        b, fb : float
            abscissa and function value at second point
        c, fc : float
            abscissa and function value at third point

        Returns
        -------
        xmin : float
            Position of the approximated minimum.
        """
        with np.errstate(divide='raise', over='raise', invalid='raise'):
            try:
                C = fpa
                db = b - a
                dc = c - a
                denom = db * db * dc * dc * (db - dc)
                d1 = np.empty((2, 2))
                d1[0, 0] = dc * dc
                d1[0, 1] = -(db*db)
                d1[1, 0] = -(dc*dc*dc)
                d1[1, 1] = db*db*db
                [A, B] = np.dot(d1, np.asarray([fb - fa - C * db,
                                                fc - fa - C * dc]).ravel())
                A /= denom
                B /= denom
                radical = B * B - 3 * A * C
                xmin = a + (-B + np.sqrt(radical)) / (3 * A)
            except ArithmeticError:
                return None
        if not np.isfinite(xmin):
            return None
        return xmin

    def _quadmin(self, a, fa, fpa, b, fb):
        """Estimating the minimum with quadratic interpolation.

        Finds the minimizer for a quadratic polynomial that goes through
        the points (a,fa), (b,fb) with derivative at point a of fpa.

        Parameters
        ----------
        a, fa, fpa : float
            abscissa, function value and derivative at first point
        b, fb : float
            abscissa and function value at second point

        Returns
        -------
        xmin : float
            Position of the approximated minimum.
        """
        with np.errstate(divide='raise', over='raise', invalid='raise'):
            try:
                db = b - a * 1.0
                B = (fb - fa - fpa * db) / (db * db)
                xmin = a - fpa / (2.0 * B)
            except ArithmeticError:
                return None
        if not np.isfinite(xmin):
            return None
        return xmin