line_search_strong_wolfe.py 12.5 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
Theo Steininger's avatar
Theo Steininger committed
13
#
Martin Reinecke's avatar
Martin Reinecke committed
14
# Copyright(C) 2013-2018 Max-Planck-Society
Theo Steininger's avatar
Theo Steininger committed
15
16
17
#
# NIFTy is being developed at the Max-Planck-Institut fuer Astrophysik
# and financially supported by the Studienstiftung des deutschen Volkes.
18

Martin Reinecke's avatar
Martin Reinecke committed
19
20
from __future__ import division
from builtins import range
21
22
import numpy as np
from .line_search import LineSearch
Martin Reinecke's avatar
Martin Reinecke committed
23
from .line_energy import LineEnergy
Martin Reinecke's avatar
Martin Reinecke committed
24
from .. import dobj
25
26
27


class LineSearchStrongWolfe(LineSearch):
28
    """Class for finding a step size that satisfies the strong Wolfe conditions.
29

30
    Algorithm contains two stages. It begins with a trial step length and
Martin Reinecke's avatar
Martin Reinecke committed
31
32
    keeps increasing it until it finds an acceptable step length or an
    interval. If it does not satisfy the Wolfe conditions, it performs the Zoom
33
34
35
    algorithm (second stage). By interpolating it decreases the size of the
    interval until an acceptable step length is found.

36
37
    Parameters
    ----------
38
    c1 : float
39
        Parameter for Armijo condition rule. (Default: 1e-4)
40
    c2 : float
41
        Parameter for curvature condition rule. (Default: 0.9)
42
    max_step_size : float
43
        Maximum step allowed in to be made in the descent direction.
Martin Reinecke's avatar
Martin Reinecke committed
44
45
        (Default: 1000000000)
    max_iterations : int, optional
46
        Maximum number of iterations performed by the line search algorithm.
Martin Reinecke's avatar
Martin Reinecke committed
47
48
        (Default: 100)
    max_zoom_iterations : int, optional
49
        Maximum number of iterations performed by the zoom algorithm.
Martin Reinecke's avatar
Martin Reinecke committed
50
        (Default: 100)
51
52
    """

Martin Reinecke's avatar
Martin Reinecke committed
53
    def __init__(self, preferred_initial_step_size=None, c1=1e-4, c2=0.9,
Martin Reinecke's avatar
Martin Reinecke committed
54
                 max_step_size=1000000000, max_iterations=100,
55
                 max_zoom_iterations=100):
56

Martin Reinecke's avatar
Martin Reinecke committed
57
58
        super(LineSearchStrongWolfe, self).__init__(
            preferred_initial_step_size)
59
60
61
62
63
64
65

        self.c1 = np.float(c1)
        self.c2 = np.float(c2)
        self.max_step_size = max_step_size
        self.max_iterations = int(max_iterations)
        self.max_zoom_iterations = int(max_zoom_iterations)

66
    def perform_line_search(self, energy, pk, f_k_minus_1=None):
67
        """Performs the first stage of the algorithm.
68
69

        It starts with a trial step size and it keeps increasing it until it
70
71
        satisfies the strong Wolf conditions. It also performs the descent and
        returns the optimal step length and the new energy.
72

73
74
        Parameters
        ----------
Martin Reinecke's avatar
Martin Reinecke committed
75
        energy : Energy
76
77
78
            Energy object from which we will calculate the energy and the
            gradient at a specific point.
        pk : Field
79
            Vector pointing into the search direction.
Martin Reinecke's avatar
Martin Reinecke committed
80
        f_k_minus_1 : float, optional
81
            Value of the fuction (which is being minimized) at the k-1
82
            iteration of the line search procedure. (Default: None)
83

84
85
        Returns
        -------
Martin Reinecke's avatar
Martin Reinecke committed
86
        Energy
87
            The new Energy object on the new position.
88
        """
89
        le_0 = LineEnergy(0., energy, pk, 0.)
90
91

        # initialize the zero phis
92
        old_phi_0 = f_k_minus_1
Martin Reinecke's avatar
Martin Reinecke committed
93
        phi_0 = le_0.value
94
        phiprime_0 = le_0.directional_derivative
95
96
97
98
        if phiprime_0 == 0:
            dobj.mprint("Directional derivative is zero; assuming convergence")
            return energy
        if phiprime_0 > 0:
99
            dobj.mprint("Error: search direction is not a descent direction")
Martin Reinecke's avatar
bug fix    
Martin Reinecke committed
100
            raise ValueError("search direction must be a descent direction")
101
102
103

        # set alphas
        alpha0 = 0.
104
105
106
        phi_alpha0 = phi_0
        phiprime_alpha0 = phiprime_0

107
108
        if self.preferred_initial_step_size is not None:
            alpha1 = self.preferred_initial_step_size
109
        elif old_phi_0 is not None:
110
111
112
113
            alpha1 = min(1.0, 1.01*2*(phi_0 - old_phi_0)/phiprime_0)
            if alpha1 < 0:
                alpha1 = 1.0
        else:
Martin Reinecke's avatar
Martin Reinecke committed
114
            alpha1 = 1.0/pk.norm()
115
116

        # start the minimization loop
Theo Steininger's avatar
Theo Steininger committed
117
118
119
        iteration_number = 0
        while iteration_number < self.max_iterations:
            iteration_number += 1
120
            if alpha1 == 0:
Theo Steininger's avatar
Theo Steininger committed
121
122
                result_energy = le_0.energy
                break
123
124
125

            le_alpha1 = le_0.at(alpha1)
            phi_alpha1 = le_alpha1.value
126

Martin Reinecke's avatar
Martin Reinecke committed
127
            if (phi_alpha1 > phi_0 + self.c1*alpha1*phiprime_0) or \
128
               ((phi_alpha1 >= phi_alpha0) and (iteration_number > 1)):
129
130
131
                le_star = self._zoom(alpha0, alpha1, phi_0, phiprime_0,
                                     phi_alpha0, phiprime_alpha0, phi_alpha1,
                                     le_0)
Theo Steininger's avatar
Theo Steininger committed
132
133
                result_energy = le_star.energy
                break
134

135
            phiprime_alpha1 = le_alpha1.directional_derivative
Martin Reinecke's avatar
Martin Reinecke committed
136
            if abs(phiprime_alpha1) <= -self.c2*phiprime_0:
Theo Steininger's avatar
Theo Steininger committed
137
138
                result_energy = le_alpha1.energy
                break
139
140

            if phiprime_alpha1 >= 0:
141
142
143
                le_star = self._zoom(alpha1, alpha0, phi_0, phiprime_0,
                                     phi_alpha1, phiprime_alpha1, phi_alpha0,
                                     le_0)
Theo Steininger's avatar
Theo Steininger committed
144
145
                result_energy = le_star.energy
                break
146
147

            # update alphas
148
149
150
151
            alpha0, alpha1 = alpha1, min(2*alpha1, self.max_step_size)
            if alpha1 == self.max_step_size:
                return le_alpha1.energy

152
153
154
            phi_alpha0 = phi_alpha1
            phiprime_alpha0 = phiprime_alpha1
        else:
Martin Reinecke's avatar
Martin Reinecke committed
155
            dobj.mprint("max iterations reached")
156
            return le_alpha1.energy
Theo Steininger's avatar
Theo Steininger committed
157
        return result_energy
158
159

    def _zoom(self, alpha_lo, alpha_hi, phi_0, phiprime_0,
160
              phi_lo, phiprime_lo, phi_hi, le_0):
161
        """Performs the second stage of the line search algorithm.
162
163
164

        If the first stage was not successful then the Zoom algorithm tries to
        find a suitable step length by using bisection, quadratic, cubic
165
        interpolation.
166

167
168
169
        Parameters
        ----------
        alpha_lo : float
Martin Reinecke's avatar
Martin Reinecke committed
170
171
            A boundary for the step length interval.
            Fulfills Wolfe condition 1.
Martin Reinecke's avatar
Martin Reinecke committed
172
        alpha_hi : float
Martin Reinecke's avatar
Martin Reinecke committed
173
            The other boundary for the step length interval.
174
        phi_0 : float
175
            Value of the energy at the starting point of the line search
176
            algorithm.
177
178
179
        phiprime_0 : float
            directional derivative at the starting point of the line search
            algorithm.
180
        phi_lo : float
181
            Value of the energy if we perform a step of length alpha_lo in
182
            descent direction.
183
184
185
        phiprime_lo : float
            directional derivative at the new position if we perform a step of
            length alpha_lo in descent direction.
186
        phi_hi : float
187
            Value of the energy if we perform a step of length alpha_hi in
188
            descent direction.
189

190
191
        Returns
        -------
Martin Reinecke's avatar
Martin Reinecke committed
192
        Energy
193
194
            The new Energy object on the new position.
        """
Martin Reinecke's avatar
Martin Reinecke committed
195
196
        cubic_delta = 0.2  # cubic interpolant checks
        quad_delta = 0.1  # quadratic interpolant checks
Theo Steininger's avatar
Theo Steininger committed
197
198
        alpha_recent = None
        phi_recent = None
199

200
201
202
203
        if phi_lo > phi_0 + self.c1*alpha_lo*phiprime_0:
            raise ValueError("inconsistent data")
        if phiprime_lo*(alpha_hi-alpha_lo) >= 0.:
            raise ValueError("inconsistent data")
Martin Reinecke's avatar
Martin Reinecke committed
204
        for i in range(self.max_zoom_iterations):
Theo Steininger's avatar
Theo Steininger committed
205
206
            # assert phi_lo <= phi_0 + self.c1*alpha_lo*phiprime_0
            # assert phiprime_lo*(alpha_hi-alpha_lo)<0.
207
            delta_alpha = alpha_hi - alpha_lo
208
            a, b = min(alpha_lo, alpha_hi), max(alpha_lo, alpha_hi)
209
210
211
212
213
214
215
216
217
218
219
220
221

            # Try cubic interpolation
            if i > 0:
                cubic_check = cubic_delta * delta_alpha
                alpha_j = self._cubicmin(alpha_lo, phi_lo, phiprime_lo,
                                         alpha_hi, phi_hi,
                                         alpha_recent, phi_recent)
            # If cubic was not successful or not available, try quadratic
            if (i == 0) or (alpha_j is None) or (alpha_j > b - cubic_check) or\
               (alpha_j < a + cubic_check):
                quad_check = quad_delta * delta_alpha
                alpha_j = self._quadmin(alpha_lo, phi_lo, phiprime_lo,
                                        alpha_hi, phi_hi)
222
                # If quadratic was not successful, try bisection
223
224
225
226
227
                if (alpha_j is None) or (alpha_j > b - quad_check) or \
                   (alpha_j < a + quad_check):
                    alpha_j = alpha_lo + 0.5*delta_alpha

            # Check if the current value of alpha_j is already sufficient
228
            le_alphaj = le_0.at(alpha_j)
Martin Reinecke's avatar
Martin Reinecke committed
229
            phi_alphaj = le_alphaj.value
230

231
232
            # If the first Wolfe condition is not met replace alpha_hi
            # by alpha_j
233
            if (phi_alphaj > phi_0 + self.c1*alpha_j*phiprime_0) or \
234
235
236
237
               (phi_alphaj >= phi_lo):
                alpha_recent, phi_recent = alpha_hi, phi_hi
                alpha_hi, phi_hi = alpha_j, phi_alphaj
            else:
238
                phiprime_alphaj = le_alphaj.directional_derivative
239
                # If the second Wolfe condition is met, return the result
Martin Reinecke's avatar
Martin Reinecke committed
240
                if abs(phiprime_alphaj) <= -self.c2*phiprime_0:
241
                    return le_alphaj
242
243
244
245
246
247
248
249
250
251
252
                # If not, check the sign of the slope
                if phiprime_alphaj*delta_alpha >= 0:
                    alpha_recent, phi_recent = alpha_hi, phi_hi
                    alpha_hi, phi_hi = alpha_lo, phi_lo
                else:
                    alpha_recent, phi_recent = alpha_lo, phi_lo
                # Replace alpha_lo by alpha_j
                (alpha_lo, phi_lo, phiprime_lo) = (alpha_j, phi_alphaj,
                                                   phiprime_alphaj)

        else:
Martin Reinecke's avatar
Martin Reinecke committed
253
            dobj.mprint("The line search algorithm (zoom) did not converge.")
254
            return le_alphaj
255
256

    def _cubicmin(self, a, fa, fpa, b, fb, c, fc):
257
        """Estimating the minimum with cubic interpolation.
258

259
        Finds the minimizer for a cubic polynomial that goes through the
Martin Reinecke's avatar
Martin Reinecke committed
260
        points (a,a), (b,fb), and (c,fc) with derivative at point a of fpa.
261
        If no minimizer can be found return None
262

263
264
        Parameters
        ----------
Martin Reinecke's avatar
Martin Reinecke committed
265
266
267
268
269
270
        a, fa, fpa : float
            abscissa, function value and derivative at first point
        b, fb : float
            abscissa and function value at second point
        c, fc : float
            abscissa and function value at third point
271

272
273
274
275
        Returns
        -------
        xmin : float
            Position of the approximated minimum.
276
277
278
279
280
281
        """
        with np.errstate(divide='raise', over='raise', invalid='raise'):
            try:
                C = fpa
                db = b - a
                dc = c - a
282
                denom = db * db * dc * dc * (db - dc)
283
                d1 = np.empty((2, 2))
284
285
286
287
                d1[0, 0] = dc * dc
                d1[0, 1] = -(db*db)
                d1[1, 0] = -(dc*dc*dc)
                d1[1, 1] = db*db*db
288
                [A, B] = np.dot(d1, np.asarray([fb - fa - C * db,
Martin Reinecke's avatar
Martin Reinecke committed
289
                                                fc - fa - C * dc]).ravel())
290
291
292
293
294
295
296
297
298
299
300
                A /= denom
                B /= denom
                radical = B * B - 3 * A * C
                xmin = a + (-B + np.sqrt(radical)) / (3 * A)
            except ArithmeticError:
                return None
        if not np.isfinite(xmin):
            return None
        return xmin

    def _quadmin(self, a, fa, fpa, b, fb):
301
        """Estimating the minimum with quadratic interpolation.
302

303
        Finds the minimizer for a quadratic polynomial that goes through
Martin Reinecke's avatar
Martin Reinecke committed
304
        the points (a,fa), (b,fb) with derivative at point a of fpa.
305

306
307
        Parameters
        ----------
Martin Reinecke's avatar
Martin Reinecke committed
308
309
310
311
        a, fa, fpa : float
            abscissa, function value and derivative at first point
        b, fb : float
            abscissa and function value at second point
312

313
314
315
        Returns
        -------
        xmin : float
316
            Position of the approximated minimum.
317
318
319
320
        """
        with np.errstate(divide='raise', over='raise', invalid='raise'):
            try:
                db = b - a * 1.0
Martin Reinecke's avatar
Martin Reinecke committed
321
322
                B = (fb - fa - fpa * db) / (db * db)
                xmin = a - fpa / (2.0 * B)
323
324
325
326
327
            except ArithmeticError:
                return None
        if not np.isfinite(xmin):
            return None
        return xmin