line_search_strong_wolfe.py 12.8 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
Theo Steininger's avatar
Theo Steininger committed
13 14 15 16 17
#
# Copyright(C) 2013-2017 Max-Planck-Society
#
# NIFTy is being developed at the Max-Planck-Institut fuer Astrophysik
# and financially supported by the Studienstiftung des deutschen Volkes.
18

Martin Reinecke's avatar
Martin Reinecke committed
19 20
from __future__ import division
from builtins import range
21 22
import numpy as np
from .line_search import LineSearch
Martin Reinecke's avatar
Martin Reinecke committed
23
from .line_energy import LineEnergy
Martin Reinecke's avatar
Martin Reinecke committed
24
from .. import dobj
25 26 27


class LineSearchStrongWolfe(LineSearch):
28
    """Class for finding a step size that satisfies the strong Wolfe conditions.
29

30
    Algorithm contains two stages. It begins with a trial step length and
Martin Reinecke's avatar
Martin Reinecke committed
31 32
    keeps increasing it until it finds an acceptable step length or an
    interval. If it does not satisfy the Wolfe conditions, it performs the Zoom
33 34 35
    algorithm (second stage). By interpolating it decreases the size of the
    interval until an acceptable step length is found.

36 37
    Parameters
    ----------
38
    c1 : float
39
        Parameter for Armijo condition rule. (Default: 1e-4)
40
    c2 : float
41
        Parameter for curvature condition rule. (Default: 0.9)
42
    max_step_size : float
43
        Maximum step allowed in to be made in the descent direction.
44 45 46 47 48
        (Default: 50)
    max_iterations : integer
        Maximum number of iterations performed by the line search algorithm.
        (Default: 10)
    max_zoom_iterations : integer
49
        Maximum number of iterations performed by the zoom algorithm.
50
        (Default: 10)
51

52 53 54 55 56 57
    Attributes
    ----------
    c1 : float
        Parameter for Armijo condition rule.
    c2 : float
        Parameter for curvature condition rule.
58
    max_step_size : float
59
        Maximum step allowed in to be made in the descent direction.
60 61 62 63
    max_iterations : integer
        Maximum number of iterations performed by the line search algorithm.
    max_zoom_iterations : integer
        Maximum number of iterations performed by the zoom algorithm.
64 65 66
    """

    def __init__(self, c1=1e-4, c2=0.9,
Martin Reinecke's avatar
Martin Reinecke committed
67
                 max_step_size=1000000000, max_iterations=100,
68
                 max_zoom_iterations=100):
69 70 71 72 73 74 75 76 77

        super(LineSearchStrongWolfe, self).__init__()

        self.c1 = np.float(c1)
        self.c2 = np.float(c2)
        self.max_step_size = max_step_size
        self.max_iterations = int(max_iterations)
        self.max_zoom_iterations = int(max_zoom_iterations)

78
    def perform_line_search(self, energy, pk, f_k_minus_1=None):
79
        """Performs the first stage of the algorithm.
80 81

        It starts with a trial step size and it keeps increasing it until it
82 83
        satisfies the strong Wolf conditions. It also performs the descent and
        returns the optimal step length and the new energy.
84

85 86 87 88 89 90
        Parameters
        ----------
        energy : Energy object
            Energy object from which we will calculate the energy and the
            gradient at a specific point.
        pk : Field
91
            Vector pointing into the search direction.
92
        f_k_minus_1 : float
93
            Value of the fuction (which is being minimized) at the k-1
94
            iteration of the line search procedure. (Default: None)
95

96 97 98 99
        Returns
        -------
        energy_star : Energy object
            The new Energy object on the new position.
100
        """
101
        le_0 = LineEnergy(0., energy, pk, 0.)
102 103

        # initialize the zero phis
104
        old_phi_0 = f_k_minus_1
Martin Reinecke's avatar
Martin Reinecke committed
105
        phi_0 = le_0.value
106
        phiprime_0 = le_0.directional_derivative
107 108 109 110
        if phiprime_0 == 0:
            dobj.mprint("Directional derivative is zero; assuming convergence")
            return energy
        if phiprime_0 > 0:
111
            dobj.mprint("Error: search direction is not a descent direction")
Martin Reinecke's avatar
bug fix  
Martin Reinecke committed
112
            raise ValueError("search direction must be a descent direction")
113 114 115

        # set alphas
        alpha0 = 0.
116 117 118
        phi_alpha0 = phi_0
        phiprime_alpha0 = phiprime_0

119 120
        if self.preferred_initial_step_size is not None:
            alpha1 = self.preferred_initial_step_size
121
        elif old_phi_0 is not None:
122 123 124 125
            alpha1 = min(1.0, 1.01*2*(phi_0 - old_phi_0)/phiprime_0)
            if alpha1 < 0:
                alpha1 = 1.0
        else:
Martin Reinecke's avatar
Martin Reinecke committed
126
            alpha1 = 1.0/pk.norm()
127 128

        # start the minimization loop
Theo Steininger's avatar
Theo Steininger committed
129 130 131
        iteration_number = 0
        while iteration_number < self.max_iterations:
            iteration_number += 1
132
            if alpha1 == 0:
Theo Steininger's avatar
Theo Steininger committed
133 134
                result_energy = le_0.energy
                break
135 136 137

            le_alpha1 = le_0.at(alpha1)
            phi_alpha1 = le_alpha1.value
138

Martin Reinecke's avatar
Martin Reinecke committed
139
            if (phi_alpha1 > phi_0 + self.c1*alpha1*phiprime_0) or \
140
               ((phi_alpha1 >= phi_alpha0) and (iteration_number > 1)):
141 142 143
                le_star = self._zoom(alpha0, alpha1, phi_0, phiprime_0,
                                     phi_alpha0, phiprime_alpha0, phi_alpha1,
                                     le_0)
Theo Steininger's avatar
Theo Steininger committed
144 145
                result_energy = le_star.energy
                break
146

147
            phiprime_alpha1 = le_alpha1.directional_derivative
Martin Reinecke's avatar
Martin Reinecke committed
148
            if abs(phiprime_alpha1) <= -self.c2*phiprime_0:
Theo Steininger's avatar
Theo Steininger committed
149 150
                result_energy = le_alpha1.energy
                break
151 152

            if phiprime_alpha1 >= 0:
153 154 155
                le_star = self._zoom(alpha1, alpha0, phi_0, phiprime_0,
                                     phi_alpha1, phiprime_alpha1, phi_alpha0,
                                     le_0)
Theo Steininger's avatar
Theo Steininger committed
156 157
                result_energy = le_star.energy
                break
158 159

            # update alphas
160 161 162 163
            alpha0, alpha1 = alpha1, min(2*alpha1, self.max_step_size)
            if alpha1 == self.max_step_size:
                return le_alpha1.energy

164 165 166
            phi_alpha0 = phi_alpha1
            phiprime_alpha0 = phiprime_alpha1
        else:
Martin Reinecke's avatar
Martin Reinecke committed
167
            dobj.mprint("max iterations reached")
168
            return le_alpha1.energy
Theo Steininger's avatar
Theo Steininger committed
169
        return result_energy
170 171

    def _zoom(self, alpha_lo, alpha_hi, phi_0, phiprime_0,
172
              phi_lo, phiprime_lo, phi_hi, le_0):
173
        """Performs the second stage of the line search algorithm.
174 175 176

        If the first stage was not successful then the Zoom algorithm tries to
        find a suitable step length by using bisection, quadratic, cubic
177
        interpolation.
178

179 180 181
        Parameters
        ----------
        alpha_lo : float
Martin Reinecke's avatar
Martin Reinecke committed
182 183
            A boundary for the step length interval.
            Fulfills Wolfe condition 1.
Martin Reinecke's avatar
Martin Reinecke committed
184
        alpha_hi : float
Martin Reinecke's avatar
Martin Reinecke committed
185
            The other boundary for the step length interval.
186
        phi_0 : float
187
            Value of the energy at the starting point of the line search
188
            algorithm.
189 190 191
        phiprime_0 : float
            directional derivative at the starting point of the line search
            algorithm.
192
        phi_lo : float
193
            Value of the energy if we perform a step of length alpha_lo in
194
            descent direction.
195 196 197
        phiprime_lo : float
            directional derivative at the new position if we perform a step of
            length alpha_lo in descent direction.
198
        phi_hi : float
199
            Value of the energy if we perform a step of length alpha_hi in
200
            descent direction.
201

202 203 204 205 206
        Returns
        -------
        energy_star : Energy object
            The new Energy object on the new position.
        """
Martin Reinecke's avatar
Martin Reinecke committed
207 208
        cubic_delta = 0.2  # cubic interpolant checks
        quad_delta = 0.1  # quadratic interpolant checks
Theo Steininger's avatar
Theo Steininger committed
209 210
        alpha_recent = None
        phi_recent = None
211

Martin Reinecke's avatar
Martin Reinecke committed
212
        assert phi_lo <= phi_0 + self.c1*alpha_lo*phiprime_0
Theo Steininger's avatar
Theo Steininger committed
213
        assert phiprime_lo*(alpha_hi-alpha_lo) < 0.
Martin Reinecke's avatar
Martin Reinecke committed
214
        for i in range(self.max_zoom_iterations):
Theo Steininger's avatar
Theo Steininger committed
215 216
            # assert phi_lo <= phi_0 + self.c1*alpha_lo*phiprime_0
            # assert phiprime_lo*(alpha_hi-alpha_lo)<0.
217
            delta_alpha = alpha_hi - alpha_lo
218
            a, b = min(alpha_lo, alpha_hi), max(alpha_lo, alpha_hi)
219 220 221 222 223 224 225 226 227 228 229 230 231

            # Try cubic interpolation
            if i > 0:
                cubic_check = cubic_delta * delta_alpha
                alpha_j = self._cubicmin(alpha_lo, phi_lo, phiprime_lo,
                                         alpha_hi, phi_hi,
                                         alpha_recent, phi_recent)
            # If cubic was not successful or not available, try quadratic
            if (i == 0) or (alpha_j is None) or (alpha_j > b - cubic_check) or\
               (alpha_j < a + cubic_check):
                quad_check = quad_delta * delta_alpha
                alpha_j = self._quadmin(alpha_lo, phi_lo, phiprime_lo,
                                        alpha_hi, phi_hi)
232
                # If quadratic was not successful, try bisection
233 234 235 236 237
                if (alpha_j is None) or (alpha_j > b - quad_check) or \
                   (alpha_j < a + quad_check):
                    alpha_j = alpha_lo + 0.5*delta_alpha

            # Check if the current value of alpha_j is already sufficient
238
            le_alphaj = le_0.at(alpha_j)
Martin Reinecke's avatar
Martin Reinecke committed
239
            phi_alphaj = le_alphaj.value
240

241 242
            # If the first Wolfe condition is not met replace alpha_hi
            # by alpha_j
243
            if (phi_alphaj > phi_0 + self.c1*alpha_j*phiprime_0) or \
244 245 246 247
               (phi_alphaj >= phi_lo):
                alpha_recent, phi_recent = alpha_hi, phi_hi
                alpha_hi, phi_hi = alpha_j, phi_alphaj
            else:
248
                phiprime_alphaj = le_alphaj.directional_derivative
249
                # If the second Wolfe condition is met, return the result
Martin Reinecke's avatar
Martin Reinecke committed
250
                if abs(phiprime_alphaj) <= -self.c2*phiprime_0:
251
                    return le_alphaj
252 253 254 255 256 257 258 259 260 261 262
                # If not, check the sign of the slope
                if phiprime_alphaj*delta_alpha >= 0:
                    alpha_recent, phi_recent = alpha_hi, phi_hi
                    alpha_hi, phi_hi = alpha_lo, phi_lo
                else:
                    alpha_recent, phi_recent = alpha_lo, phi_lo
                # Replace alpha_lo by alpha_j
                (alpha_lo, phi_lo, phiprime_lo) = (alpha_j, phi_alphaj,
                                                   phiprime_alphaj)

        else:
Martin Reinecke's avatar
Martin Reinecke committed
263
            dobj.mprint("The line search algorithm (zoom) did not converge.")
264
            return le_alphaj
265 266

    def _cubicmin(self, a, fa, fpa, b, fb, c, fc):
267
        """Estimating the minimum with cubic interpolation.
268

269
        Finds the minimizer for a cubic polynomial that goes through the
Martin Reinecke's avatar
Martin Reinecke committed
270
        points (a,a), (b,fb), and (c,fc) with derivative at point a of fpa.
271
        If no minimizer can be found return None
272

273 274
        Parameters
        ----------
Martin Reinecke's avatar
Martin Reinecke committed
275 276 277 278 279 280
        a, fa, fpa : float
            abscissa, function value and derivative at first point
        b, fb : float
            abscissa and function value at second point
        c, fc : float
            abscissa and function value at third point
281

282 283 284 285
        Returns
        -------
        xmin : float
            Position of the approximated minimum.
286 287 288 289 290 291
        """
        with np.errstate(divide='raise', over='raise', invalid='raise'):
            try:
                C = fpa
                db = b - a
                dc = c - a
292
                denom = db * db * dc * dc * (db - dc)
293
                d1 = np.empty((2, 2))
294 295 296 297
                d1[0, 0] = dc * dc
                d1[0, 1] = -(db*db)
                d1[1, 0] = -(dc*dc*dc)
                d1[1, 1] = db*db*db
298
                [A, B] = np.dot(d1, np.asarray([fb - fa - C * db,
Martin Reinecke's avatar
Martin Reinecke committed
299
                                                fc - fa - C * dc]).ravel())
300 301 302 303 304 305 306 307 308 309 310
                A /= denom
                B /= denom
                radical = B * B - 3 * A * C
                xmin = a + (-B + np.sqrt(radical)) / (3 * A)
            except ArithmeticError:
                return None
        if not np.isfinite(xmin):
            return None
        return xmin

    def _quadmin(self, a, fa, fpa, b, fb):
311
        """Estimating the minimum with quadratic interpolation.
312

313
        Finds the minimizer for a quadratic polynomial that goes through
Martin Reinecke's avatar
Martin Reinecke committed
314
        the points (a,fa), (b,fb) with derivative at point a of fpa.
315

316 317
        Parameters
        ----------
Martin Reinecke's avatar
Martin Reinecke committed
318 319 320 321
        a, fa, fpa : float
            abscissa, function value and derivative at first point
        b, fb : float
            abscissa and function value at second point
322

323 324 325
        Returns
        -------
        xmin : float
326
            Position of the approximated minimum.
327 328 329 330
        """
        with np.errstate(divide='raise', over='raise', invalid='raise'):
            try:
                db = b - a * 1.0
Martin Reinecke's avatar
Martin Reinecke committed
331 332
                B = (fb - fa - fpa * db) / (db * db)
                xmin = a - fpa / (2.0 * B)
333 334 335 336 337
            except ArithmeticError:
                return None
        if not np.isfinite(xmin):
            return None
        return xmin