line_search_strong_wolfe.py 13.5 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
Theo Steininger's avatar
Theo Steininger committed
13
14
15
16
17
#
# Copyright(C) 2013-2017 Max-Planck-Society
#
# NIFTy is being developed at the Max-Planck-Institut fuer Astrophysik
# and financially supported by the Studienstiftung des deutschen Volkes.
18

Martin Reinecke's avatar
Martin Reinecke committed
19
from __future__ import print_function
Martin Reinecke's avatar
Martin Reinecke committed
20
21
from __future__ import division
from builtins import range
22
23
24
import numpy as np

from .line_search import LineSearch
25
from ...energies import LineEnergy
26
27
28


class LineSearchStrongWolfe(LineSearch):
29
    """Class for finding a step size that satisfies the strong Wolfe conditions.
30

31
    Algorithm contains two stages. It begins with a trial step length and
Martin Reinecke's avatar
Martin Reinecke committed
32
33
    keeps increasing it until it finds an acceptable step length or an
    interval. If it does not satisfy the Wolfe conditions, it performs the Zoom
34
35
36
    algorithm (second stage). By interpolating it decreases the size of the
    interval until an acceptable step length is found.

37
38
    Parameters
    ----------
39
    c1 : float
40
        Parameter for Armijo condition rule. (Default: 1e-4)
41
    c2 : float
42
        Parameter for curvature condition rule. (Default: 0.9)
43
    max_step_size : float
44
        Maximum step allowed in to be made in the descent direction.
45
46
47
48
49
        (Default: 50)
    max_iterations : integer
        Maximum number of iterations performed by the line search algorithm.
        (Default: 10)
    max_zoom_iterations : integer
50
        Maximum number of iterations performed by the zoom algorithm.
51
        (Default: 10)
52

53
54
55
56
57
58
    Attributes
    ----------
    c1 : float
        Parameter for Armijo condition rule.
    c2 : float
        Parameter for curvature condition rule.
59
    max_step_size : float
60
        Maximum step allowed in to be made in the descent direction.
61
62
63
64
    max_iterations : integer
        Maximum number of iterations performed by the line search algorithm.
    max_zoom_iterations : integer
        Maximum number of iterations performed by the zoom algorithm.
65

66
67
68
    """

    def __init__(self, c1=1e-4, c2=0.9,
Martin Reinecke's avatar
Martin Reinecke committed
69
                 max_step_size=1000000000, max_iterations=100,
Martin Reinecke's avatar
Martin Reinecke committed
70
                 max_zoom_iterations=30):
71
72
73
74
75
76
77
78
79

        super(LineSearchStrongWolfe, self).__init__()

        self.c1 = np.float(c1)
        self.c2 = np.float(c2)
        self.max_step_size = max_step_size
        self.max_iterations = int(max_iterations)
        self.max_zoom_iterations = int(max_zoom_iterations)

80
    def perform_line_search(self, energy, pk, f_k_minus_1=None):
81
        """Performs the first stage of the algorithm.
82
83

        It starts with a trial step size and it keeps increasing it until it
84
85
        satisfies the strong Wolf conditions. It also performs the descent and
        returns the optimal step length and the new energy.
86

87
88
89
90
91
92
        Parameters
        ----------
        energy : Energy object
            Energy object from which we will calculate the energy and the
            gradient at a specific point.
        pk : Field
93
            Vector pointing into the search direction.
94
        f_k_minus_1 : float
95
            Value of the fuction (which is being minimized) at the k-1
96
            iteration of the line search procedure. (Default: None)
97

98
99
100
101
        Returns
        -------
        energy_star : Energy object
            The new Energy object on the new position.
102
103
104

        """

105
        le_0 = LineEnergy(0., energy, pk, 0.)
106
107

        # initialize the zero phis
108
        old_phi_0 = f_k_minus_1
Martin Reinecke's avatar
Martin Reinecke committed
109
        phi_0 = le_0.value
110
        phiprime_0 = le_0.directional_derivative
Theo Steininger's avatar
Theo Steininger committed
111
112
113
        if phiprime_0 >= 0:
            self.logger.error("Input direction must be a descent direction")
            raise RuntimeError
114
115
116

        # set alphas
        alpha0 = 0.
117
118
119
        phi_alpha0 = phi_0
        phiprime_alpha0 = phiprime_0

120
121
        if self.preferred_initial_step_size is not None:
            alpha1 = self.preferred_initial_step_size
122
        elif old_phi_0 is not None:
123
124
125
126
            alpha1 = min(1.0, 1.01*2*(phi_0 - old_phi_0)/phiprime_0)
            if alpha1 < 0:
                alpha1 = 1.0
        else:
Martin Reinecke's avatar
Martin Reinecke committed
127
            alpha1 = 1.0/pk.norm()
128
129

        # start the minimization loop
Theo Steininger's avatar
Theo Steininger committed
130
131
132
        iteration_number = 0
        while iteration_number < self.max_iterations:
            iteration_number += 1
133
134
            if alpha1 == 0:
                self.logger.warn("Increment size became 0.")
Theo Steininger's avatar
Theo Steininger committed
135
136
                result_energy = le_0.energy
                break
137
138
139

            le_alpha1 = le_0.at(alpha1)
            phi_alpha1 = le_alpha1.value
140

Martin Reinecke's avatar
Martin Reinecke committed
141
            if (phi_alpha1 > phi_0 + self.c1*alpha1*phiprime_0) or \
142
               ((phi_alpha1 >= phi_alpha0) and (iteration_number > 1)):
143
144
145
                le_star = self._zoom(alpha0, alpha1, phi_0, phiprime_0,
                                     phi_alpha0, phiprime_alpha0, phi_alpha1,
                                     le_0)
Theo Steininger's avatar
Theo Steininger committed
146
147
                result_energy = le_star.energy
                break
148

149
            phiprime_alpha1 = le_alpha1.directional_derivative
Martin Reinecke's avatar
Martin Reinecke committed
150
            if abs(phiprime_alpha1) <= -self.c2*phiprime_0:
Theo Steininger's avatar
Theo Steininger committed
151
152
                result_energy = le_alpha1.energy
                break
153
154

            if phiprime_alpha1 >= 0:
155
156
157
                le_star = self._zoom(alpha1, alpha0, phi_0, phiprime_0,
                                     phi_alpha1, phiprime_alpha1, phi_alpha0,
                                     le_0)
Theo Steininger's avatar
Theo Steininger committed
158
159
                result_energy = le_star.energy
                break
160
161

            # update alphas
162
163
            alpha0, alpha1 = alpha1, min(2*alpha1, self.max_step_size)
            if alpha1 == self.max_step_size:
Theo Steininger's avatar
Theo Steininger committed
164
                self.logger.info("Reached max step size, bailing out")
165
166
                return le_alpha1.energy

167
168
169
170
171
            phi_alpha0 = phi_alpha1
            phiprime_alpha0 = phiprime_alpha1
        else:
            # max_iterations was reached
            self.logger.error("The line search algorithm did not converge.")
172
            return le_alpha1.energy
Theo Steininger's avatar
Theo Steininger committed
173
174
175
176
        if iteration_number > 1:
            self.logger.debug("Finished line-search after %08u steps" %
                              iteration_number)
        return result_energy
177
178

    def _zoom(self, alpha_lo, alpha_hi, phi_0, phiprime_0,
179
              phi_lo, phiprime_lo, phi_hi, le_0):
180
        """Performs the second stage of the line search algorithm.
181
182
183

        If the first stage was not successful then the Zoom algorithm tries to
        find a suitable step length by using bisection, quadratic, cubic
184
        interpolation.
185

186
187
188
        Parameters
        ----------
        alpha_lo : float
Martin Reinecke's avatar
Martin Reinecke committed
189
190
            A boundary for the step length interval.
            Fulfills Wolfe condition 1.
Martin Reinecke's avatar
Martin Reinecke committed
191
        alpha_hi : float
Martin Reinecke's avatar
Martin Reinecke committed
192
            The other boundary for the step length interval.
193
        phi_0 : float
194
            Value of the energy at the starting point of the line search
195
196
197
198
            algorithm.
        phiprime_0 : Field
            Gradient at the starting point of the line search algorithm.
        phi_lo : float
199
            Value of the energy if we perform a step of length alpha_lo in
200
201
            descent direction.
        phiprime_lo : Field
202
            Gradient at the nwe position if we perform a step of length
203
204
            alpha_lo in descent direction.
        phi_hi : float
205
            Value of the energy if we perform a step of length alpha_hi in
206
            descent direction.
207

208
209
210
211
        Returns
        -------
        energy_star : Energy object
            The new Energy object on the new position.
212

213
        """
214
215
216
        # define the cubic and quadratic interpolant checks
        cubic_delta = 0.2  # cubic
        quad_delta = 0.1  # quadratic
Theo Steininger's avatar
Theo Steininger committed
217
218
        alpha_recent = None
        phi_recent = None
219

Martin Reinecke's avatar
Martin Reinecke committed
220
        assert phi_lo <= phi_0 + self.c1*alpha_lo*phiprime_0
Theo Steininger's avatar
Theo Steininger committed
221
        assert phiprime_lo*(alpha_hi-alpha_lo) < 0.
Martin Reinecke's avatar
Martin Reinecke committed
222
        for i in range(self.max_zoom_iterations):
Theo Steininger's avatar
Theo Steininger committed
223
224
            # assert phi_lo <= phi_0 + self.c1*alpha_lo*phiprime_0
            # assert phiprime_lo*(alpha_hi-alpha_lo)<0.
225
            delta_alpha = alpha_hi - alpha_lo
226
            a, b = min(alpha_lo, alpha_hi), max(alpha_lo, alpha_hi)
227
228
229
230
231
232
233
234
235
236
237
238
239

            # Try cubic interpolation
            if i > 0:
                cubic_check = cubic_delta * delta_alpha
                alpha_j = self._cubicmin(alpha_lo, phi_lo, phiprime_lo,
                                         alpha_hi, phi_hi,
                                         alpha_recent, phi_recent)
            # If cubic was not successful or not available, try quadratic
            if (i == 0) or (alpha_j is None) or (alpha_j > b - cubic_check) or\
               (alpha_j < a + cubic_check):
                quad_check = quad_delta * delta_alpha
                alpha_j = self._quadmin(alpha_lo, phi_lo, phiprime_lo,
                                        alpha_hi, phi_hi)
240
                # If quadratic was not successful, try bisection
241
242
243
244
245
                if (alpha_j is None) or (alpha_j > b - quad_check) or \
                   (alpha_j < a + quad_check):
                    alpha_j = alpha_lo + 0.5*delta_alpha

            # Check if the current value of alpha_j is already sufficient
246
            le_alphaj = le_0.at(alpha_j)
Martin Reinecke's avatar
Martin Reinecke committed
247
            phi_alphaj = le_alphaj.value
248

249
250
            # If the first Wolfe condition is not met replace alpha_hi
            # by alpha_j
251
            if (phi_alphaj > phi_0 + self.c1*alpha_j*phiprime_0) or \
252
253
254
255
               (phi_alphaj >= phi_lo):
                alpha_recent, phi_recent = alpha_hi, phi_hi
                alpha_hi, phi_hi = alpha_j, phi_alphaj
            else:
256
                phiprime_alphaj = le_alphaj.directional_derivative
257
                # If the second Wolfe condition is met, return the result
Martin Reinecke's avatar
Martin Reinecke committed
258
                if abs(phiprime_alphaj) <= -self.c2*phiprime_0:
259
                    return le_alphaj
260
261
262
263
264
265
266
267
268
269
270
271
272
                # If not, check the sign of the slope
                if phiprime_alphaj*delta_alpha >= 0:
                    alpha_recent, phi_recent = alpha_hi, phi_hi
                    alpha_hi, phi_hi = alpha_lo, phi_lo
                else:
                    alpha_recent, phi_recent = alpha_lo, phi_lo
                # Replace alpha_lo by alpha_j
                (alpha_lo, phi_lo, phiprime_lo) = (alpha_j, phi_alphaj,
                                                   phiprime_alphaj)

        else:
            self.logger.error("The line search algorithm (zoom) did not "
                              "converge.")
273
            return le_alphaj
274
275

    def _cubicmin(self, a, fa, fpa, b, fb, c, fc):
276
        """Estimating the minimum with cubic interpolation.
277

278
        Finds the minimizer for a cubic polynomial that goes through the
279
280
        points ( a,f(a) ), ( b,f(b) ), and ( c,f(c) ) with derivative at point
        a of fpa.
281
        f(x) = A *(x-a)^3 + B*(x-a)^2 + C*(x-a) + D
282
        If no minimizer can be found return None
283

284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
        Parameters
        ----------
        a : float
            Selected point.
        fa : float
            Value of polynomial at point a.
        fpa : Field
            Derivative at point a.
        b : float
            Selected point.
        fb : float
            Value of polynomial at point b.
        c : float
            Selected point.
        fc : float
            Value of polynomial at point c.
300

301
302
303
304
        Returns
        -------
        xmin : float
            Position of the approximated minimum.
305

306
307
308
309
310
311
312
        """

        with np.errstate(divide='raise', over='raise', invalid='raise'):
            try:
                C = fpa
                db = b - a
                dc = c - a
313
                denom = db * db * dc * dc * (db - dc)
314
                d1 = np.empty((2, 2))
315
316
317
318
                d1[0, 0] = dc * dc
                d1[0, 1] = -(db*db)
                d1[1, 0] = -(dc*dc*dc)
                d1[1, 1] = db*db*db
319
320
321
322
323
324
325
326
327
328
329
330
331
                [A, B] = np.dot(d1, np.asarray([fb - fa - C * db,
                                                fc - fa - C * dc]).flatten())
                A /= denom
                B /= denom
                radical = B * B - 3 * A * C
                xmin = a + (-B + np.sqrt(radical)) / (3 * A)
            except ArithmeticError:
                return None
        if not np.isfinite(xmin):
            return None
        return xmin

    def _quadmin(self, a, fa, fpa, b, fb):
332
        """Estimating the minimum with quadratic interpolation.
333

334
        Finds the minimizer for a quadratic polynomial that goes through
335
336
        the points ( a,f(a) ), ( b,f(b) ) with derivative at point a of fpa.
        f(x) = B*(x-a)^2 + C*(x-a) + D
337

338
339
340
341
342
343
344
345
346
347
348
349
        Parameters
        ----------
        a : float
            Selected point.
        fa : float
            Value of polynomial at point a.
        fpa : Field
            Derivative at point a.
        b : float
            Selected point.
        fb : float
            Value of polynomial at point b.
350

351
352
353
        Returns
        -------
        xmin : float
354
            Position of the approximated minimum.
355
356
357
358
359
360
361
362
363
364
365
366
367
368
        """
        # f(x) = B*(x-a)^2 + C*(x-a) + D
        with np.errstate(divide='raise', over='raise', invalid='raise'):
            try:
                D = fa
                C = fpa
                db = b - a * 1.0
                B = (fb - D - C * db) / (db * db)
                xmin = a - C / (2.0 * B)
            except ArithmeticError:
                return None
        if not np.isfinite(xmin):
            return None
        return xmin