line_search_strong_wolfe.py 14.9 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
Theo Steininger's avatar
Theo Steininger committed
13
14
15
16
17
#
# Copyright(C) 2013-2017 Max-Planck-Society
#
# NIFTy is being developed at the Max-Planck-Institut fuer Astrophysik
# and financially supported by the Studienstiftung des deutschen Volkes.
18

Martin Reinecke's avatar
Martin Reinecke committed
19
from __future__ import print_function
Martin Reinecke's avatar
Martin Reinecke committed
20
21
from __future__ import division
from builtins import range
22
23
24
25
26
27
import numpy as np

from .line_search import LineSearch


class LineSearchStrongWolfe(LineSearch):
28
    """Class for finding a step size that satisfies the strong Wolfe conditions.
29

Martin Reinecke's avatar
Martin Reinecke committed
30
31
32
    Algorithm contains two stages. It begins whit a trial step length and
    keeps increasing it until it finds an acceptable step length or an
    interval. If it does not satisfy the Wolfe conditions, it performs the Zoom
33
34
35
    algorithm (second stage). By interpolating it decreases the size of the
    interval until an acceptable step length is found.

36
37
    Parameters
    ----------
38
    c1 : float
39
        Parameter for Armijo condition rule. (Default: 1e-4)
40
    c2 : float
41
        Parameter for curvature condition rule. (Default: 0.9)
42
    max_step_size : float
43
        Maximum step allowed in to be made in the descent direction.
44
45
46
47
48
        (Default: 50)
    max_iterations : integer
        Maximum number of iterations performed by the line search algorithm.
        (Default: 10)
    max_zoom_iterations : integer
49
        Maximum number of iterations performed by the zoom algorithm.
50
        (Default: 10)
51

52
53
54
55
56
57
    Attributes
    ----------
    c1 : float
        Parameter for Armijo condition rule.
    c2 : float
        Parameter for curvature condition rule.
58
    max_step_size : float
59
        Maximum step allowed in to be made in the descent direction.
60
61
62
63
    max_iterations : integer
        Maximum number of iterations performed by the line search algorithm.
    max_zoom_iterations : integer
        Maximum number of iterations performed by the zoom algorithm.
64

65
66
67
    """

    def __init__(self, c1=1e-4, c2=0.9,
Martin Reinecke's avatar
Martin Reinecke committed
68
                 max_step_size=1000000000, max_iterations=100,
Martin Reinecke's avatar
Martin Reinecke committed
69
                 max_zoom_iterations=30):
70
71
72
73
74
75
76
77
78

        super(LineSearchStrongWolfe, self).__init__()

        self.c1 = np.float(c1)
        self.c2 = np.float(c2)
        self.max_step_size = max_step_size
        self.max_iterations = int(max_iterations)
        self.max_zoom_iterations = int(max_zoom_iterations)

79
    def perform_line_search(self, energy, pk, f_k_minus_1=None):
80
        """Performs the first stage of the algorithm.
81
82
83

        It starts with a trial step size and it keeps increasing it until it
        satisfy the strong Wolf conditions. It also performs the descent and
84
        returns the optimal step length and the new enrgy.
85

86
87
88
89
90
91
92
93
        Parameters
        ----------
        energy : Energy object
            Energy object from which we will calculate the energy and the
            gradient at a specific point.
        pk : Field
            Unit vector pointing into the search direction.
        f_k_minus_1 : float
94
            Value of the fuction (which is being minimized) at the k-1
95
            iteration of the line search procedure. (Default: None)
96

97
98
99
100
101
102
103
104
        Returns
        -------
        alpha_star : float
            The optimal step length in the descent direction.
        phi_star : float
            Value of the energy after the performed descent.
        energy_star : Energy object
            The new Energy object on the new position.
105
106
107

        """

108
        self._set_line_energy(energy, pk, f_k_minus_1=f_k_minus_1)
109
110
111
112
113
        max_step_size = self.max_step_size
        max_iterations = self.max_iterations

        # initialize the zero phis
        old_phi_0 = self.f_k_minus_1
Martin Reinecke's avatar
Martin Reinecke committed
114
115
        le_0 = self.line_energy.at(0)
        phi_0 = le_0.value
116
        phiprime_0 = le_0.directional_derivative
Theo Steininger's avatar
Theo Steininger committed
117
118
119
        if phiprime_0 >= 0:
            self.logger.error("Input direction must be a descent direction")
            raise RuntimeError
120
121
122

        # set alphas
        alpha0 = 0.
123
124
        if self.preferred_initial_step_size is not None:
            alpha1 = self.preferred_initial_step_size
125
        elif old_phi_0 is not None and phiprime_0 != 0:
126
127
128
129
130
131
132
133
134
135
136
            alpha1 = min(1.0, 1.01*2*(phi_0 - old_phi_0)/phiprime_0)
            if alpha1 < 0:
                alpha1 = 1.0
        else:
            alpha1 = 1.0

        # give the alpha0 phis the right init value
        phi_alpha0 = phi_0
        phiprime_alpha0 = phiprime_0

        # start the minimization loop
Martin Reinecke's avatar
Martin Reinecke committed
137
        for i in range(max_iterations):
Martin Reinecke's avatar
Martin Reinecke committed
138
139
            le_alpha1 = self.line_energy.at(alpha1)
            phi_alpha1 = le_alpha1.value
140
141
142
143
            if alpha1 == 0:
                self.logger.warn("Increment size became 0.")
                alpha_star = 0.
                phi_star = phi_0
Martin Reinecke's avatar
Martin Reinecke committed
144
                le_star = le_0
145
146
                break

Martin Reinecke's avatar
Martin Reinecke committed
147
            if (phi_alpha1 > phi_0 + self.c1*alpha1*phiprime_0) or \
Martin Reinecke's avatar
Martin Reinecke committed
148
               ((phi_alpha1 >= phi_alpha0) and (i > 0)):
Martin Reinecke's avatar
Martin Reinecke committed
149
                (alpha_star, phi_star, le_star) = self._zoom(
150
                                                    alpha0, alpha1,
151
152
153
                                                    phi_0, phiprime_0,
                                                    phi_alpha0,
                                                    phiprime_alpha0,
Martin Reinecke's avatar
Martin Reinecke committed
154
                                                    phi_alpha1)
155
156
                break

157
            phiprime_alpha1 = le_alpha1.directional_derivative
Martin Reinecke's avatar
Martin Reinecke committed
158
            if abs(phiprime_alpha1) <= -self.c2*phiprime_0:
159
160
                alpha_star = alpha1
                phi_star = phi_alpha1
Martin Reinecke's avatar
Martin Reinecke committed
161
                le_star = le_alpha1
162
163
164
                break

            if phiprime_alpha1 >= 0:
Martin Reinecke's avatar
Martin Reinecke committed
165
                (alpha_star, phi_star, le_star) = self._zoom(
166
                                                    alpha1, alpha0,
167
168
169
                                                    phi_0, phiprime_0,
                                                    phi_alpha1,
                                                    phiprime_alpha1,
Martin Reinecke's avatar
Martin Reinecke committed
170
                                                    phi_alpha0)
171
172
173
174
                break

            # update alphas
            alpha0, alpha1 = alpha1, min(2*alpha1, max_step_size)
Martin Reinecke's avatar
Martin Reinecke committed
175
            if alpha1 == max_step_size:
Martin Reinecke's avatar
Martin Reinecke committed
176
                print ("reached max step size, bailing out")
Martin Reinecke's avatar
Martin Reinecke committed
177
178
179
180
                alpha_star = alpha1
                phi_star = phi_alpha1
                le_star = le_alpha1
                break
181
182
183
184
185
186
187
            phi_alpha0 = phi_alpha1
            phiprime_alpha0 = phiprime_alpha1

        else:
            # max_iterations was reached
            alpha_star = alpha1
            phi_star = phi_alpha1
Martin Reinecke's avatar
Martin Reinecke committed
188
            le_star = le_alpha1
189
190
            self.logger.error("The line search algorithm did not converge.")

191
        # extract the full energy from the line_energy
Martin Reinecke's avatar
Martin Reinecke committed
192
        energy_star = le_star.energy
Theo Steininger's avatar
Theo Steininger committed
193
194
        direction_length = pk.norm()
        step_length = alpha_star * direction_length
195
        return step_length, phi_star, energy_star
196
197

    def _zoom(self, alpha_lo, alpha_hi, phi_0, phiprime_0,
Martin Reinecke's avatar
Martin Reinecke committed
198
              phi_lo, phiprime_lo, phi_hi):
199
        """Performs the second stage of the line search algorithm.
200
201
202

        If the first stage was not successful then the Zoom algorithm tries to
        find a suitable step length by using bisection, quadratic, cubic
203
        interpolation.
204

205
206
207
        Parameters
        ----------
        alpha_lo : float
Martin Reinecke's avatar
Martin Reinecke committed
208
209
            A boundary for the step length interval.
            Fulfills Wolfe condition 1.
Martin Reinecke's avatar
Martin Reinecke committed
210
        alpha_hi : float
Martin Reinecke's avatar
Martin Reinecke committed
211
            The other boundary for the step length interval.
212
        phi_0 : float
213
            Value of the energy at the starting point of the line search
214
215
216
217
            algorithm.
        phiprime_0 : Field
            Gradient at the starting point of the line search algorithm.
        phi_lo : float
218
            Value of the energy if we perform a step of length alpha_lo in
219
220
            descent direction.
        phiprime_lo : Field
221
            Gradient at the nwe position if we perform a step of length
222
223
            alpha_lo in descent direction.
        phi_hi : float
224
            Value of the energy if we perform a step of length alpha_hi in
225
            descent direction.
226

227
228
229
230
231
232
233
234
        Returns
        -------
        alpha_star : float
            The optimal step length in the descent direction.
        phi_star : float
            Value of the energy after the performed descent.
        energy_star : Energy object
            The new Energy object on the new position.
235

236
        """
237
238
239
240
        max_iterations = self.max_zoom_iterations
        # define the cubic and quadratic interpolant checks
        cubic_delta = 0.2  # cubic
        quad_delta = 0.1  # quadratic
Martin Reinecke's avatar
Martin Reinecke committed
241
        phiprime_alphaj = 0.
Theo Steininger's avatar
Theo Steininger committed
242
243
        alpha_recent = None
        phi_recent = None
244

Martin Reinecke's avatar
Martin Reinecke committed
245
        assert phi_lo <= phi_0 + self.c1*alpha_lo*phiprime_0
Theo Steininger's avatar
Theo Steininger committed
246
        assert phiprime_lo*(alpha_hi-alpha_lo) < 0.
Martin Reinecke's avatar
Martin Reinecke committed
247
        for i in range(max_iterations):
Theo Steininger's avatar
Theo Steininger committed
248
249
            # assert phi_lo <= phi_0 + self.c1*alpha_lo*phiprime_0
            # assert phiprime_lo*(alpha_hi-alpha_lo)<0.
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
            delta_alpha = alpha_hi - alpha_lo
            if delta_alpha < 0:
                a, b = alpha_hi, alpha_lo
            else:
                a, b = alpha_lo, alpha_hi

            # Try cubic interpolation
            if i > 0:
                cubic_check = cubic_delta * delta_alpha
                alpha_j = self._cubicmin(alpha_lo, phi_lo, phiprime_lo,
                                         alpha_hi, phi_hi,
                                         alpha_recent, phi_recent)
            # If cubic was not successful or not available, try quadratic
            if (i == 0) or (alpha_j is None) or (alpha_j > b - cubic_check) or\
               (alpha_j < a + cubic_check):
                quad_check = quad_delta * delta_alpha
                alpha_j = self._quadmin(alpha_lo, phi_lo, phiprime_lo,
                                        alpha_hi, phi_hi)
268
                # If quadratic was not successful, try bisection
269
270
271
272
273
                if (alpha_j is None) or (alpha_j > b - quad_check) or \
                   (alpha_j < a + quad_check):
                    alpha_j = alpha_lo + 0.5*delta_alpha

            # Check if the current value of alpha_j is already sufficient
Martin Reinecke's avatar
Martin Reinecke committed
274
275
            le_alphaj = self.line_energy.at(alpha_j)
            phi_alphaj = le_alphaj.value
276

277
278
            # If the first Wolfe condition is not met replace alpha_hi
            # by alpha_j
Martin Reinecke's avatar
Martin Reinecke committed
279
            if (phi_alphaj > phi_0 + self.c1*alpha_j*phiprime_0) or\
280
281
282
283
               (phi_alphaj >= phi_lo):
                alpha_recent, phi_recent = alpha_hi, phi_hi
                alpha_hi, phi_hi = alpha_j, phi_alphaj
            else:
284
                phiprime_alphaj = le_alphaj.directional_derivative
285
                # If the second Wolfe condition is met, return the result
Martin Reinecke's avatar
Martin Reinecke committed
286
                if abs(phiprime_alphaj) <= -self.c2*phiprime_0:
287
288
                    alpha_star = alpha_j
                    phi_star = phi_alphaj
Martin Reinecke's avatar
Martin Reinecke committed
289
                    le_star = le_alphaj
290
291
292
293
294
295
296
297
298
299
300
301
                    break
                # If not, check the sign of the slope
                if phiprime_alphaj*delta_alpha >= 0:
                    alpha_recent, phi_recent = alpha_hi, phi_hi
                    alpha_hi, phi_hi = alpha_lo, phi_lo
                else:
                    alpha_recent, phi_recent = alpha_lo, phi_lo
                # Replace alpha_lo by alpha_j
                (alpha_lo, phi_lo, phiprime_lo) = (alpha_j, phi_alphaj,
                                                   phiprime_alphaj)

        else:
Martin Reinecke's avatar
Martin Reinecke committed
302
303
            alpha_star, phi_star, le_star = \
                alpha_j, phi_alphaj, le_alphaj
304
305
306
            self.logger.error("The line search algorithm (zoom) did not "
                              "converge.")

Martin Reinecke's avatar
Martin Reinecke committed
307
        return alpha_star, phi_star, le_star
308
309

    def _cubicmin(self, a, fa, fpa, b, fb, c, fc):
310
        """Estimating the minimum with cubic interpolation.
311

312
        Finds the minimizer for a cubic polynomial that goes through the
313
314
        points ( a,f(a) ), ( b,f(b) ), and ( c,f(c) ) with derivative at point
        a of fpa.
315
        f(x) = A *(x-a)^3 + B*(x-a)^2 + C*(x-a) + D
316
        If no minimizer can be found return None
317

318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
        Parameters
        ----------
        a : float
            Selected point.
        fa : float
            Value of polynomial at point a.
        fpa : Field
            Derivative at point a.
        b : float
            Selected point.
        fb : float
            Value of polynomial at point b.
        c : float
            Selected point.
        fc : float
            Value of polynomial at point c.
334

335
336
337
338
        Returns
        -------
        xmin : float
            Position of the approximated minimum.
339

340
341
342
343
344
345
346
        """

        with np.errstate(divide='raise', over='raise', invalid='raise'):
            try:
                C = fpa
                db = b - a
                dc = c - a
347
                denom = db * db * dc * dc * (db - dc)
348
                d1 = np.empty((2, 2))
349
350
351
352
                d1[0, 0] = dc * dc
                d1[0, 1] = -(db*db)
                d1[1, 0] = -(dc*dc*dc)
                d1[1, 1] = db*db*db
353
354
355
356
357
358
359
360
361
362
363
364
365
                [A, B] = np.dot(d1, np.asarray([fb - fa - C * db,
                                                fc - fa - C * dc]).flatten())
                A /= denom
                B /= denom
                radical = B * B - 3 * A * C
                xmin = a + (-B + np.sqrt(radical)) / (3 * A)
            except ArithmeticError:
                return None
        if not np.isfinite(xmin):
            return None
        return xmin

    def _quadmin(self, a, fa, fpa, b, fb):
366
        """Estimating the minimum with quadratic interpolation.
367

368
        Finds the minimizer for a quadratic polynomial that goes through
369
370
        the points ( a,f(a) ), ( b,f(b) ) with derivative at point a of fpa.
        f(x) = B*(x-a)^2 + C*(x-a) + D
371

372
373
374
375
376
377
378
379
380
381
382
383
        Parameters
        ----------
        a : float
            Selected point.
        fa : float
            Value of polynomial at point a.
        fpa : Field
            Derivative at point a.
        b : float
            Selected point.
        fb : float
            Value of polynomial at point b.
384

385
386
387
        Returns
        -------
        xmin : float
388
            Position of the approximated minimum.
389
390
391
392
393
394
395
396
397
398
399
400
401
402
        """
        # f(x) = B*(x-a)^2 + C*(x-a) + D
        with np.errstate(divide='raise', over='raise', invalid='raise'):
            try:
                D = fa
                C = fpa
                db = b - a * 1.0
                B = (fb - D - C * db) / (db * db)
                xmin = a - C / (2.0 * B)
            except ArithmeticError:
                return None
        if not np.isfinite(xmin):
            return None
        return xmin