line_search_strong_wolfe.py 13.9 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
Theo Steininger's avatar
Theo Steininger committed
13
14
15
16
17
#
# Copyright(C) 2013-2017 Max-Planck-Society
#
# NIFTy is being developed at the Max-Planck-Institut fuer Astrophysik
# and financially supported by the Studienstiftung des deutschen Volkes.
18

19
20
21
22
23
24
import numpy as np

from .line_search import LineSearch


class LineSearchStrongWolfe(LineSearch):
25
    """Class for finding a step size that satisfies the strong Wolfe conditions.
26

Martin Reinecke's avatar
Martin Reinecke committed
27
28
29
    Algorithm contains two stages. It begins whit a trial step length and
    keeps increasing it until it finds an acceptable step length or an
    interval. If it does not satisfy the Wolfe conditions, it performs the Zoom
30
31
32
    algorithm (second stage). By interpolating it decreases the size of the
    interval until an acceptable step length is found.

33
34
    Parameters
    ----------
35
    c1 : float
36
        Parameter for Armijo condition rule. (Default: 1e-4)
37
    c2 : float
38
        Parameter for curvature condition rule. (Default: 0.9)
39
    max_step_size : float
40
        Maximum step allowed in to be made in the descent direction.
41
42
43
44
45
        (Default: 50)
    max_iterations : integer
        Maximum number of iterations performed by the line search algorithm.
        (Default: 10)
    max_zoom_iterations : integer
46
        Maximum number of iterations performed by the zoom algorithm.
47
        (Default: 10)
48

49
50
51
52
53
54
    Attributes
    ----------
    c1 : float
        Parameter for Armijo condition rule.
    c2 : float
        Parameter for curvature condition rule.
55
    max_step_size : float
56
        Maximum step allowed in to be made in the descent direction.
57
58
59
60
    max_iterations : integer
        Maximum number of iterations performed by the line search algorithm.
    max_zoom_iterations : integer
        Maximum number of iterations performed by the zoom algorithm.
61

62
63
64
    """

    def __init__(self, c1=1e-4, c2=0.9,
Martin Reinecke's avatar
Martin Reinecke committed
65
                 max_step_size=1000000000, max_iterations=100,
Martin Reinecke's avatar
Martin Reinecke committed
66
                 max_zoom_iterations=30):
67
68
69
70
71
72
73
74
75

        super(LineSearchStrongWolfe, self).__init__()

        self.c1 = np.float(c1)
        self.c2 = np.float(c2)
        self.max_step_size = max_step_size
        self.max_iterations = int(max_iterations)
        self.max_zoom_iterations = int(max_zoom_iterations)

76
    def perform_line_search(self, energy, pk, f_k_minus_1=None):
77
        """Performs the first stage of the algorithm.
78
79
80

        It starts with a trial step size and it keeps increasing it until it
        satisfy the strong Wolf conditions. It also performs the descent and
81
        returns the optimal step length and the new enrgy.
82

83
84
85
86
87
88
89
90
        Parameters
        ----------
        energy : Energy object
            Energy object from which we will calculate the energy and the
            gradient at a specific point.
        pk : Field
            Unit vector pointing into the search direction.
        f_k_minus_1 : float
91
            Value of the fuction (which is being minimized) at the k-1
92
            iteration of the line search procedure. (Default: None)
93

94
95
96
97
98
99
100
101
        Returns
        -------
        alpha_star : float
            The optimal step length in the descent direction.
        phi_star : float
            Value of the energy after the performed descent.
        energy_star : Energy object
            The new Energy object on the new position.
102
103
104

        """

105
        self._set_line_energy(energy, pk, f_k_minus_1=f_k_minus_1)
106
107
108
109
110
        max_step_size = self.max_step_size
        max_iterations = self.max_iterations

        # initialize the zero phis
        old_phi_0 = self.f_k_minus_1
Martin Reinecke's avatar
Martin Reinecke committed
111
112
113
        le_0 = self.line_energy.at(0)
        phi_0 = le_0.value
        phiprime_0 = le_0.dd
Martin Reinecke's avatar
Martin Reinecke committed
114
        assert phiprime_0<0, "input direction must be a descent direction"
115
116
117

        # set alphas
        alpha0 = 0.
118
119
        if self.preferred_initial_step_size is not None:
            alpha1 = self.preferred_initial_step_size
120
        elif old_phi_0 is not None and phiprime_0 != 0:
121
122
123
124
125
126
127
128
129
130
131
132
            alpha1 = min(1.0, 1.01*2*(phi_0 - old_phi_0)/phiprime_0)
            if alpha1 < 0:
                alpha1 = 1.0
        else:
            alpha1 = 1.0

        # give the alpha0 phis the right init value
        phi_alpha0 = phi_0
        phiprime_alpha0 = phiprime_0

        # start the minimization loop
        for i in xrange(max_iterations):
Martin Reinecke's avatar
Martin Reinecke committed
133
134
            le_alpha1 = self.line_energy.at(alpha1)
            phi_alpha1 = le_alpha1.value
135
136
137
138
            if alpha1 == 0:
                self.logger.warn("Increment size became 0.")
                alpha_star = 0.
                phi_star = phi_0
Martin Reinecke's avatar
Martin Reinecke committed
139
                le_star = le_0
140
141
                break

Martin Reinecke's avatar
Martin Reinecke committed
142
            if (phi_alpha1 > phi_0 + self.c1*alpha1*phiprime_0) or \
Martin Reinecke's avatar
Martin Reinecke committed
143
               ((phi_alpha1 >= phi_alpha0) and (i > 0)):
Martin Reinecke's avatar
Martin Reinecke committed
144
                print "zoom1:",i
Martin Reinecke's avatar
Martin Reinecke committed
145
                (alpha_star, phi_star, le_star) = self._zoom(
146
                                                    alpha0, alpha1,
147
148
149
                                                    phi_0, phiprime_0,
                                                    phi_alpha0,
                                                    phiprime_alpha0,
Martin Reinecke's avatar
Martin Reinecke committed
150
                                                    phi_alpha1)
151
152
                break

Martin Reinecke's avatar
Martin Reinecke committed
153
154
            phiprime_alpha1 = le_alpha1.dd
            if abs(phiprime_alpha1) <= -self.c2*phiprime_0:
155
156
                alpha_star = alpha1
                phi_star = phi_alpha1
Martin Reinecke's avatar
Martin Reinecke committed
157
                le_star = le_alpha1
158
159
160
                break

            if phiprime_alpha1 >= 0:
Martin Reinecke's avatar
Martin Reinecke committed
161
                print "zoom2:",i
Martin Reinecke's avatar
Martin Reinecke committed
162
                (alpha_star, phi_star, le_star) = self._zoom(
163
                                                    alpha1, alpha0,
164
165
166
                                                    phi_0, phiprime_0,
                                                    phi_alpha1,
                                                    phiprime_alpha1,
Martin Reinecke's avatar
Martin Reinecke committed
167
                                                    phi_alpha0)
168
169
170
171
                break

            # update alphas
            alpha0, alpha1 = alpha1, min(2*alpha1, max_step_size)
Martin Reinecke's avatar
Martin Reinecke committed
172
            if alpha1 == max_step_size:
Martin Reinecke's avatar
Martin Reinecke committed
173
                print "reached max step size, bailing out"
Martin Reinecke's avatar
Martin Reinecke committed
174
175
176
177
                alpha_star = alpha1
                phi_star = phi_alpha1
                le_star = le_alpha1
                break
178
179
180
181
182
183
184
            phi_alpha0 = phi_alpha1
            phiprime_alpha0 = phiprime_alpha1

        else:
            # max_iterations was reached
            alpha_star = alpha1
            phi_star = phi_alpha1
Martin Reinecke's avatar
Martin Reinecke committed
185
            le_star = le_alpha1
186
187
            self.logger.error("The line search algorithm did not converge.")

188
        # extract the full energy from the line_energy
Martin Reinecke's avatar
Martin Reinecke committed
189
        energy_star = le_star.energy
Theo Steininger's avatar
Theo Steininger committed
190
191
        direction_length = pk.norm()
        step_length = alpha_star * direction_length
192
        return step_length, phi_star, energy_star
193
194

    def _zoom(self, alpha_lo, alpha_hi, phi_0, phiprime_0,
Martin Reinecke's avatar
Martin Reinecke committed
195
              phi_lo, phiprime_lo, phi_hi):
196
        """Performs the second stage of the line search algorithm.
197
198
199

        If the first stage was not successful then the Zoom algorithm tries to
        find a suitable step length by using bisection, quadratic, cubic
200
        interpolation.
201

202
203
204
        Parameters
        ----------
        alpha_lo : float
Martin Reinecke's avatar
Martin Reinecke committed
205
206
            A boundary for the step length interval.
            Fulfills Wolfe condition 1.
Martin Reinecke's avatar
Martin Reinecke committed
207
        alpha_hi : float
Martin Reinecke's avatar
Martin Reinecke committed
208
            The other boundary for the step length interval.
209
        phi_0 : float
210
            Value of the energy at the starting point of the line search
211
212
213
214
            algorithm.
        phiprime_0 : Field
            Gradient at the starting point of the line search algorithm.
        phi_lo : float
215
            Value of the energy if we perform a step of length alpha_lo in
216
217
            descent direction.
        phiprime_lo : Field
218
            Gradient at the nwe position if we perform a step of length
219
220
            alpha_lo in descent direction.
        phi_hi : float
221
            Value of the energy if we perform a step of length alpha_hi in
222
            descent direction.
223

224
225
226
227
228
229
230
231
        Returns
        -------
        alpha_star : float
            The optimal step length in the descent direction.
        phi_star : float
            Value of the energy after the performed descent.
        energy_star : Energy object
            The new Energy object on the new position.
232

233
        """
234
235
236
237
        max_iterations = self.max_zoom_iterations
        # define the cubic and quadratic interpolant checks
        cubic_delta = 0.2  # cubic
        quad_delta = 0.1  # quadratic
Martin Reinecke's avatar
Martin Reinecke committed
238
        phiprime_alphaj = 0.
239
        # initialize the most recent versions (j-1) of phi and alpha
Martin Reinecke's avatar
Martin Reinecke committed
240
241
242
243
244
        #alpha_recent = None
        #phi_recent = None
        assert phi_lo <= phi_0 + self.c1*alpha_lo*phiprime_0
        assert phiprime_lo*(alpha_hi-alpha_lo)<0.
        print "enter:"
245
        for i in xrange(max_iterations):
Martin Reinecke's avatar
Martin Reinecke committed
246
247
248
            #assert phi_lo <= phi_0 + self.c1*alpha_lo*phiprime_0
            #assert phiprime_lo*(alpha_hi-alpha_lo)<0.
#            print alpha_lo, alpha_hi
249
            delta_alpha = alpha_hi - alpha_lo
Martin Reinecke's avatar
Martin Reinecke committed
250
            alpha_j = alpha_lo + 0.5*delta_alpha
251
252

            # Check if the current value of alpha_j is already sufficient
Martin Reinecke's avatar
Martin Reinecke committed
253
254
            le_alphaj = self.line_energy.at(alpha_j)
            phi_alphaj = le_alphaj.value
255

256
257
            # If the first Wolfe condition is not met replace alpha_hi
            # by alpha_j
Martin Reinecke's avatar
Martin Reinecke committed
258
259
260
261
262
#            print "W1:", phi_alphaj, phi_0 + self.c1*alpha_j*phiprime_0
            print alpha_lo, phi_lo
            print alpha_hi, phi_hi
#            print phi_lo, phi_hi, phi_alphaj
#            print phiprime_lo, phiprime_alphaj
Martin Reinecke's avatar
Martin Reinecke committed
263
            if (phi_alphaj > phi_0 + self.c1*alpha_j*phiprime_0) or\
264
               (phi_alphaj >= phi_lo):
Martin Reinecke's avatar
Martin Reinecke committed
265
#                print "beep"
266
267
                alpha_hi, phi_hi = alpha_j, phi_alphaj
            else:
Martin Reinecke's avatar
Martin Reinecke committed
268
#                print "boop"
Martin Reinecke's avatar
Martin Reinecke committed
269
                phiprime_alphaj = le_alphaj.dd
270
                # If the second Wolfe condition is met, return the result
Martin Reinecke's avatar
Martin Reinecke committed
271
                if abs(phiprime_alphaj) <= -self.c2*phiprime_0:
272
273
                    alpha_star = alpha_j
                    phi_star = phi_alphaj
Martin Reinecke's avatar
Martin Reinecke committed
274
                    le_star = le_alphaj
275
276
277
278
279
280
281
282
283
                    break
                # If not, check the sign of the slope
                if phiprime_alphaj*delta_alpha >= 0:
                    alpha_hi, phi_hi = alpha_lo, phi_lo
                # Replace alpha_lo by alpha_j
                (alpha_lo, phi_lo, phiprime_lo) = (alpha_j, phi_alphaj,
                                                   phiprime_alphaj)

        else:
Martin Reinecke's avatar
Martin Reinecke committed
284
285
            alpha_star, phi_star, le_star = \
                alpha_j, phi_alphaj, le_alphaj
286
287
288
            self.logger.error("The line search algorithm (zoom) did not "
                              "converge.")

Martin Reinecke's avatar
Martin Reinecke committed
289
        return alpha_star, phi_star, le_star
290
291

    def _cubicmin(self, a, fa, fpa, b, fb, c, fc):
292
        """Estimating the minimum with cubic interpolation.
293

294
        Finds the minimizer for a cubic polynomial that goes through the
295
296
        points ( a,f(a) ), ( b,f(b) ), and ( c,f(c) ) with derivative at point
        a of fpa.
297
        f(x) = A *(x-a)^3 + B*(x-a)^2 + C*(x-a) + D
298
        If no minimizer can be found return None
299

300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
        Parameters
        ----------
        a : float
            Selected point.
        fa : float
            Value of polynomial at point a.
        fpa : Field
            Derivative at point a.
        b : float
            Selected point.
        fb : float
            Value of polynomial at point b.
        c : float
            Selected point.
        fc : float
            Value of polynomial at point c.
316

317
318
319
320
        Returns
        -------
        xmin : float
            Position of the approximated minimum.
321

322
323
324
325
326
327
328
        """

        with np.errstate(divide='raise', over='raise', invalid='raise'):
            try:
                C = fpa
                db = b - a
                dc = c - a
329
                denom = db * db * dc * dc * (db - dc)
330
                d1 = np.empty((2, 2))
331
332
333
334
                d1[0, 0] = dc * dc
                d1[0, 1] = -(db*db)
                d1[1, 0] = -(dc*dc*dc)
                d1[1, 1] = db*db*db
335
336
337
338
339
340
341
342
343
344
345
346
347
                [A, B] = np.dot(d1, np.asarray([fb - fa - C * db,
                                                fc - fa - C * dc]).flatten())
                A /= denom
                B /= denom
                radical = B * B - 3 * A * C
                xmin = a + (-B + np.sqrt(radical)) / (3 * A)
            except ArithmeticError:
                return None
        if not np.isfinite(xmin):
            return None
        return xmin

    def _quadmin(self, a, fa, fpa, b, fb):
348
        """Estimating the minimum with quadratic interpolation.
349

350
        Finds the minimizer for a quadratic polynomial that goes through
351
352
        the points ( a,f(a) ), ( b,f(b) ) with derivative at point a of fpa.
        f(x) = B*(x-a)^2 + C*(x-a) + D
353

354
355
356
357
358
359
360
361
362
363
364
365
        Parameters
        ----------
        a : float
            Selected point.
        fa : float
            Value of polynomial at point a.
        fpa : Field
            Derivative at point a.
        b : float
            Selected point.
        fb : float
            Value of polynomial at point b.
366

367
368
369
        Returns
        -------
        xmin : float
370
            Position of the approximated minimum.
371
372
373
374
375
376
377
378
379
380
381
382
383
384
        """
        # f(x) = B*(x-a)^2 + C*(x-a) + D
        with np.errstate(divide='raise', over='raise', invalid='raise'):
            try:
                D = fa
                C = fpa
                db = b - a * 1.0
                B = (fb - D - C * db) / (db * db)
                xmin = a - C / (2.0 * B)
            except ArithmeticError:
                return None
        if not np.isfinite(xmin):
            return None
        return xmin