line_search_strong_wolfe.py 12.7 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
Theo Steininger's avatar
Theo Steininger committed
13
#
Martin Reinecke's avatar
Martin Reinecke committed
14
# Copyright(C) 2013-2018 Max-Planck-Society
Theo Steininger's avatar
Theo Steininger committed
15
16
17
#
# NIFTy is being developed at the Max-Planck-Institut fuer Astrophysik
# and financially supported by the Studienstiftung des deutschen Volkes.
18

Martin Reinecke's avatar
Martin Reinecke committed
19
20
from __future__ import division
from builtins import range
21
22
import numpy as np
from .line_search import LineSearch
Martin Reinecke's avatar
Martin Reinecke committed
23
from .line_energy import LineEnergy
Martin Reinecke's avatar
Martin Reinecke committed
24
from ..logger import logger
25
26
27


class LineSearchStrongWolfe(LineSearch):
28
    """Class for finding a step size that satisfies the strong Wolfe conditions.
29

30
    Algorithm contains two stages. It begins with a trial step length and
Martin Reinecke's avatar
Martin Reinecke committed
31
32
    keeps increasing it until it finds an acceptable step length or an
    interval. If it does not satisfy the Wolfe conditions, it performs the Zoom
33
34
35
    algorithm (second stage). By interpolating it decreases the size of the
    interval until an acceptable step length is found.

36
37
    Parameters
    ----------
38
    c1 : float
39
        Parameter for Armijo condition rule. (Default: 1e-4)
40
    c2 : float
41
        Parameter for curvature condition rule. (Default: 0.9)
42
    max_step_size : float
43
        Maximum step allowed in to be made in the descent direction.
Martin Reinecke's avatar
stage 1    
Martin Reinecke committed
44
        (Default: 1e30)
Martin Reinecke's avatar
Martin Reinecke committed
45
    max_iterations : int, optional
46
        Maximum number of iterations performed by the line search algorithm.
Martin Reinecke's avatar
Martin Reinecke committed
47
48
        (Default: 100)
    max_zoom_iterations : int, optional
49
        Maximum number of iterations performed by the zoom algorithm.
Martin Reinecke's avatar
Martin Reinecke committed
50
        (Default: 100)
51
52
    """

Martin Reinecke's avatar
Martin Reinecke committed
53
    def __init__(self, preferred_initial_step_size=None, c1=1e-4, c2=0.9,
Martin Reinecke's avatar
stage 1    
Martin Reinecke committed
54
                 max_step_size=1e30, max_iterations=100,
55
                 max_zoom_iterations=100):
56

Martin Reinecke's avatar
Martin Reinecke committed
57
58
        super(LineSearchStrongWolfe, self).__init__(
            preferred_initial_step_size)
59
60
61
62
63
64
65

        self.c1 = np.float(c1)
        self.c2 = np.float(c2)
        self.max_step_size = max_step_size
        self.max_iterations = int(max_iterations)
        self.max_zoom_iterations = int(max_zoom_iterations)

66
    def perform_line_search(self, energy, pk, f_k_minus_1=None):
67
        """Performs the first stage of the algorithm.
68
69

        It starts with a trial step size and it keeps increasing it until it
70
71
        satisfies the strong Wolf conditions. It also performs the descent and
        returns the optimal step length and the new energy.
72

73
74
        Parameters
        ----------
Martin Reinecke's avatar
Martin Reinecke committed
75
        energy : Energy
76
77
78
            Energy object from which we will calculate the energy and the
            gradient at a specific point.
        pk : Field
79
            Vector pointing into the search direction.
Martin Reinecke's avatar
Martin Reinecke committed
80
        f_k_minus_1 : float, optional
81
            Value of the fuction (which is being minimized) at the k-1
82
            iteration of the line search procedure. (Default: None)
83

84
85
        Returns
        -------
Martin Reinecke's avatar
Martin Reinecke committed
86
        Energy
87
            The new Energy object on the new position.
Martin Reinecke's avatar
stage 1    
Martin Reinecke committed
88
89
        bool
            whether the line search was considered successful or not
90
        """
91
        le_0 = LineEnergy(0., energy, pk, 0.)
92

Martin Reinecke's avatar
stage 1    
Martin Reinecke committed
93
94
95
96
97
        maxstepsize = energy.longest_step(pk)
        if maxstepsize is None:
            maxstepsize = self.max_step_size
        maxstepsize = min(maxstepsize, self.max_step_size)

98
        # initialize the zero phis
99
        old_phi_0 = f_k_minus_1
Martin Reinecke's avatar
Martin Reinecke committed
100
        phi_0 = le_0.value
101
        phiprime_0 = le_0.directional_derivative
102
        if phiprime_0 == 0:
Martin Reinecke's avatar
Martin Reinecke committed
103
104
            logger.warning(
                "Directional derivative is zero; assuming convergence")
Martin Reinecke's avatar
stage 1    
Martin Reinecke committed
105
            return energy, False
106
        if phiprime_0 > 0:
Martin Reinecke's avatar
Martin Reinecke committed
107
            logger.error("Error: search direction is not a descent direction")
Martin Reinecke's avatar
stage 1    
Martin Reinecke committed
108
            return energy, False
109
110
111

        # set alphas
        alpha0 = 0.
112
113
114
        phi_alpha0 = phi_0
        phiprime_alpha0 = phiprime_0

115
116
        if self.preferred_initial_step_size is not None:
            alpha1 = self.preferred_initial_step_size
117
        elif old_phi_0 is not None:
118
119
120
121
            alpha1 = min(1.0, 1.01*2*(phi_0 - old_phi_0)/phiprime_0)
            if alpha1 < 0:
                alpha1 = 1.0
        else:
Martin Reinecke's avatar
Martin Reinecke committed
122
            alpha1 = 1.0/pk.norm()
Martin Reinecke's avatar
stage 1    
Martin Reinecke committed
123
        alpha1 = min(alpha1, 0.99*maxstepsize)
124
125

        # start the minimization loop
Theo Steininger's avatar
Theo Steininger committed
126
127
128
        iteration_number = 0
        while iteration_number < self.max_iterations:
            iteration_number += 1
129
            if alpha1 == 0:
Martin Reinecke's avatar
stage 1    
Martin Reinecke committed
130
                return le_0.energy, False
131
132
133

            le_alpha1 = le_0.at(alpha1)
            phi_alpha1 = le_alpha1.value
134

Martin Reinecke's avatar
Martin Reinecke committed
135
            if (phi_alpha1 > phi_0 + self.c1*alpha1*phiprime_0) or \
136
               ((phi_alpha1 >= phi_alpha0) and (iteration_number > 1)):
Martin Reinecke's avatar
stage 1    
Martin Reinecke committed
137
138
139
                return self._zoom(alpha0, alpha1, phi_0, phiprime_0,
                                  phi_alpha0, phiprime_alpha0, phi_alpha1,
                                  le_0)
140

141
            phiprime_alpha1 = le_alpha1.directional_derivative
Martin Reinecke's avatar
Martin Reinecke committed
142
            if abs(phiprime_alpha1) <= -self.c2*phiprime_0:
Martin Reinecke's avatar
stage 1    
Martin Reinecke committed
143
                return le_alpha1.energy, True
144
145

            if phiprime_alpha1 >= 0:
Martin Reinecke's avatar
stage 1    
Martin Reinecke committed
146
147
148
                return self._zoom(alpha1, alpha0, phi_0, phiprime_0,
                                  phi_alpha1, phiprime_alpha1, phi_alpha0,
                                  le_0)
149
150

            # update alphas
Martin Reinecke's avatar
stage 1    
Martin Reinecke committed
151
152
            alpha0, alpha1 = alpha1, min(2*alpha1, maxstepsize)
            if alpha1 == maxstepsize:
Martin Reinecke's avatar
Martin Reinecke committed
153
                logger.warning("max step size reached")
Martin Reinecke's avatar
stage 1    
Martin Reinecke committed
154
                return le_alpha1.energy, False
155

156
157
            phi_alpha0 = phi_alpha1
            phiprime_alpha0 = phiprime_alpha1
Martin Reinecke's avatar
stage 1    
Martin Reinecke committed
158

Martin Reinecke's avatar
Martin Reinecke committed
159
        logger.warning("max iterations reached")
Martin Reinecke's avatar
stage 1    
Martin Reinecke committed
160
        return le_alpha1.energy, False
161
162

    def _zoom(self, alpha_lo, alpha_hi, phi_0, phiprime_0,
163
              phi_lo, phiprime_lo, phi_hi, le_0):
164
        """Performs the second stage of the line search algorithm.
165
166
167

        If the first stage was not successful then the Zoom algorithm tries to
        find a suitable step length by using bisection, quadratic, cubic
168
        interpolation.
169

170
171
172
        Parameters
        ----------
        alpha_lo : float
Martin Reinecke's avatar
Martin Reinecke committed
173
174
            A boundary for the step length interval.
            Fulfills Wolfe condition 1.
Martin Reinecke's avatar
Martin Reinecke committed
175
        alpha_hi : float
Martin Reinecke's avatar
Martin Reinecke committed
176
            The other boundary for the step length interval.
177
        phi_0 : float
178
            Value of the energy at the starting point of the line search
179
            algorithm.
180
181
182
        phiprime_0 : float
            directional derivative at the starting point of the line search
            algorithm.
183
        phi_lo : float
184
            Value of the energy if we perform a step of length alpha_lo in
185
            descent direction.
186
187
188
        phiprime_lo : float
            directional derivative at the new position if we perform a step of
            length alpha_lo in descent direction.
189
        phi_hi : float
190
            Value of the energy if we perform a step of length alpha_hi in
191
            descent direction.
192

193
194
        Returns
        -------
Martin Reinecke's avatar
Martin Reinecke committed
195
        Energy
196
197
            The new Energy object on the new position.
        """
Martin Reinecke's avatar
Martin Reinecke committed
198
199
        cubic_delta = 0.2  # cubic interpolant checks
        quad_delta = 0.1  # quadratic interpolant checks
Theo Steininger's avatar
Theo Steininger committed
200
201
        alpha_recent = None
        phi_recent = None
202

203
204
205
206
        if phi_lo > phi_0 + self.c1*alpha_lo*phiprime_0:
            raise ValueError("inconsistent data")
        if phiprime_lo*(alpha_hi-alpha_lo) >= 0.:
            raise ValueError("inconsistent data")
Martin Reinecke's avatar
Martin Reinecke committed
207
        for i in range(self.max_zoom_iterations):
Theo Steininger's avatar
Theo Steininger committed
208
209
            # assert phi_lo <= phi_0 + self.c1*alpha_lo*phiprime_0
            # assert phiprime_lo*(alpha_hi-alpha_lo)<0.
210
            delta_alpha = alpha_hi - alpha_lo
211
            a, b = min(alpha_lo, alpha_hi), max(alpha_lo, alpha_hi)
212
213
214
215
216
217
218
219
220
221
222
223
224

            # Try cubic interpolation
            if i > 0:
                cubic_check = cubic_delta * delta_alpha
                alpha_j = self._cubicmin(alpha_lo, phi_lo, phiprime_lo,
                                         alpha_hi, phi_hi,
                                         alpha_recent, phi_recent)
            # If cubic was not successful or not available, try quadratic
            if (i == 0) or (alpha_j is None) or (alpha_j > b - cubic_check) or\
               (alpha_j < a + cubic_check):
                quad_check = quad_delta * delta_alpha
                alpha_j = self._quadmin(alpha_lo, phi_lo, phiprime_lo,
                                        alpha_hi, phi_hi)
225
                # If quadratic was not successful, try bisection
226
227
228
229
230
                if (alpha_j is None) or (alpha_j > b - quad_check) or \
                   (alpha_j < a + quad_check):
                    alpha_j = alpha_lo + 0.5*delta_alpha

            # Check if the current value of alpha_j is already sufficient
231
            le_alphaj = le_0.at(alpha_j)
Martin Reinecke's avatar
Martin Reinecke committed
232
            phi_alphaj = le_alphaj.value
233

234
235
            # If the first Wolfe condition is not met replace alpha_hi
            # by alpha_j
236
            if (phi_alphaj > phi_0 + self.c1*alpha_j*phiprime_0) or \
237
238
239
240
               (phi_alphaj >= phi_lo):
                alpha_recent, phi_recent = alpha_hi, phi_hi
                alpha_hi, phi_hi = alpha_j, phi_alphaj
            else:
241
                phiprime_alphaj = le_alphaj.directional_derivative
242
                # If the second Wolfe condition is met, return the result
Martin Reinecke's avatar
Martin Reinecke committed
243
                if abs(phiprime_alphaj) <= -self.c2*phiprime_0:
Martin Reinecke's avatar
stage 1    
Martin Reinecke committed
244
                    return le_alphaj.energy, True
245
246
247
248
249
250
251
252
253
254
255
                # If not, check the sign of the slope
                if phiprime_alphaj*delta_alpha >= 0:
                    alpha_recent, phi_recent = alpha_hi, phi_hi
                    alpha_hi, phi_hi = alpha_lo, phi_lo
                else:
                    alpha_recent, phi_recent = alpha_lo, phi_lo
                # Replace alpha_lo by alpha_j
                (alpha_lo, phi_lo, phiprime_lo) = (alpha_j, phi_alphaj,
                                                   phiprime_alphaj)

        else:
Martin Reinecke's avatar
Martin Reinecke committed
256
257
            logger.warning(
                "The line search algorithm (zoom) did not converge.")
Martin Reinecke's avatar
stage 1    
Martin Reinecke committed
258
            return le_alphaj.energy, False
259
260

    def _cubicmin(self, a, fa, fpa, b, fb, c, fc):
261
        """Estimating the minimum with cubic interpolation.
262

263
        Finds the minimizer for a cubic polynomial that goes through the
Martin Reinecke's avatar
Martin Reinecke committed
264
        points (a,a), (b,fb), and (c,fc) with derivative at point a of fpa.
265
        If no minimizer can be found return None
266

267
268
        Parameters
        ----------
Martin Reinecke's avatar
Martin Reinecke committed
269
270
271
272
273
274
        a, fa, fpa : float
            abscissa, function value and derivative at first point
        b, fb : float
            abscissa and function value at second point
        c, fc : float
            abscissa and function value at third point
275

276
277
278
279
        Returns
        -------
        xmin : float
            Position of the approximated minimum.
280
281
282
283
284
285
        """
        with np.errstate(divide='raise', over='raise', invalid='raise'):
            try:
                C = fpa
                db = b - a
                dc = c - a
286
                denom = db * db * dc * dc * (db - dc)
287
                d1 = np.empty((2, 2))
288
289
290
291
                d1[0, 0] = dc * dc
                d1[0, 1] = -(db*db)
                d1[1, 0] = -(dc*dc*dc)
                d1[1, 1] = db*db*db
292
                [A, B] = np.dot(d1, np.asarray([fb - fa - C * db,
Martin Reinecke's avatar
Martin Reinecke committed
293
                                                fc - fa - C * dc]).ravel())
294
295
296
297
298
299
300
301
302
303
304
                A /= denom
                B /= denom
                radical = B * B - 3 * A * C
                xmin = a + (-B + np.sqrt(radical)) / (3 * A)
            except ArithmeticError:
                return None
        if not np.isfinite(xmin):
            return None
        return xmin

    def _quadmin(self, a, fa, fpa, b, fb):
305
        """Estimating the minimum with quadratic interpolation.
306

307
        Finds the minimizer for a quadratic polynomial that goes through
Martin Reinecke's avatar
Martin Reinecke committed
308
        the points (a,fa), (b,fb) with derivative at point a of fpa.
309

310
311
        Parameters
        ----------
Martin Reinecke's avatar
Martin Reinecke committed
312
313
314
315
        a, fa, fpa : float
            abscissa, function value and derivative at first point
        b, fb : float
            abscissa and function value at second point
316

317
318
319
        Returns
        -------
        xmin : float
320
            Position of the approximated minimum.
321
322
323
324
        """
        with np.errstate(divide='raise', over='raise', invalid='raise'):
            try:
                db = b - a * 1.0
Martin Reinecke's avatar
Martin Reinecke committed
325
326
                B = (fb - fa - fpa * db) / (db * db)
                xmin = a - fpa / (2.0 * B)
327
328
329
330
331
            except ArithmeticError:
                return None
        if not np.isfinite(xmin):
            return None
        return xmin