line_search_strong_wolfe.py 13 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
Theo Steininger's avatar
Theo Steininger committed
13 14 15 16 17
#
# Copyright(C) 2013-2017 Max-Planck-Society
#
# NIFTy is being developed at the Max-Planck-Institut fuer Astrophysik
# and financially supported by the Studienstiftung des deutschen Volkes.
18

19 20 21
import numpy as np

from .line_search import LineSearch
22
from ...energies import LineEnergy
23 24 25


class LineSearchStrongWolfe(LineSearch):
26
    """Class for finding a step size that satisfies the strong Wolfe conditions.
27

28
    Algorithm contains two stages. It begins with a trial step length and
Martin Reinecke's avatar
Martin Reinecke committed
29 30
    keeps increasing it until it finds an acceptable step length or an
    interval. If it does not satisfy the Wolfe conditions, it performs the Zoom
31 32 33
    algorithm (second stage). By interpolating it decreases the size of the
    interval until an acceptable step length is found.

34 35
    Parameters
    ----------
36
    c1 : float
37
        Parameter for Armijo condition rule. (Default: 1e-4)
38
    c2 : float
39
        Parameter for curvature condition rule. (Default: 0.9)
40
    max_step_size : float
41
        Maximum step allowed in to be made in the descent direction.
42 43 44 45 46
        (Default: 50)
    max_iterations : integer
        Maximum number of iterations performed by the line search algorithm.
        (Default: 10)
    max_zoom_iterations : integer
47
        Maximum number of iterations performed by the zoom algorithm.
48
        (Default: 10)
49

50 51 52 53 54 55
    Attributes
    ----------
    c1 : float
        Parameter for Armijo condition rule.
    c2 : float
        Parameter for curvature condition rule.
56
    max_step_size : float
57
        Maximum step allowed in to be made in the descent direction.
58 59 60 61
    max_iterations : integer
        Maximum number of iterations performed by the line search algorithm.
    max_zoom_iterations : integer
        Maximum number of iterations performed by the zoom algorithm.
62

63 64 65
    """

    def __init__(self, c1=1e-4, c2=0.9,
Martin Reinecke's avatar
Martin Reinecke committed
66
                 max_step_size=1000000000, max_iterations=100,
Martin Reinecke's avatar
Martin Reinecke committed
67
                 max_zoom_iterations=30):
68 69 70 71 72 73 74 75 76

        super(LineSearchStrongWolfe, self).__init__()

        self.c1 = np.float(c1)
        self.c2 = np.float(c2)
        self.max_step_size = max_step_size
        self.max_iterations = int(max_iterations)
        self.max_zoom_iterations = int(max_zoom_iterations)

77
    def perform_line_search(self, energy, pk, f_k_minus_1=None):
78
        """Performs the first stage of the algorithm.
79 80

        It starts with a trial step size and it keeps increasing it until it
81 82
        satisfies the strong Wolf conditions. It also performs the descent and
        returns the optimal step length and the new energy.
83

84 85 86 87 88 89
        Parameters
        ----------
        energy : Energy object
            Energy object from which we will calculate the energy and the
            gradient at a specific point.
        pk : Field
90
            Vector pointing into the search direction.
91
        f_k_minus_1 : float
92
            Value of the fuction (which is being minimized) at the k-1
93
            iteration of the line search procedure. (Default: None)
94

95 96 97 98
        Returns
        -------
        energy_star : Energy object
            The new Energy object on the new position.
99 100 101

        """

102
        le_0 = LineEnergy(0., energy, pk, 0.)
103 104

        # initialize the zero phis
105
        old_phi_0 = f_k_minus_1
Martin Reinecke's avatar
Martin Reinecke committed
106
        phi_0 = le_0.value
107
        phiprime_0 = le_0.directional_derivative
Theo Steininger's avatar
Theo Steininger committed
108 109 110
        if phiprime_0 >= 0:
            self.logger.error("Input direction must be a descent direction")
            raise RuntimeError
111 112 113

        # set alphas
        alpha0 = 0.
114 115 116
        phi_alpha0 = phi_0
        phiprime_alpha0 = phiprime_0

117 118
        if self.preferred_initial_step_size is not None:
            alpha1 = self.preferred_initial_step_size
119
        elif old_phi_0 is not None:
120 121 122 123
            alpha1 = min(1.0, 1.01*2*(phi_0 - old_phi_0)/phiprime_0)
            if alpha1 < 0:
                alpha1 = 1.0
        else:
Martin Reinecke's avatar
Martin Reinecke committed
124
            alpha1 = 1.0/pk.norm()
125 126

        # start the minimization loop
127
        for i in xrange(self.max_iterations):
128 129
            if alpha1 == 0:
                self.logger.warn("Increment size became 0.")
130 131 132 133
                return le_0.energy

            le_alpha1 = le_0.at(alpha1)
            phi_alpha1 = le_alpha1.value
134

Martin Reinecke's avatar
Martin Reinecke committed
135
            if (phi_alpha1 > phi_0 + self.c1*alpha1*phiprime_0) or \
Martin Reinecke's avatar
Martin Reinecke committed
136
               ((phi_alpha1 >= phi_alpha0) and (i > 0)):
137 138 139 140
                le_star = self._zoom(alpha0, alpha1, phi_0, phiprime_0,
                                     phi_alpha0, phiprime_alpha0, phi_alpha1,
                                     le_0)
                return le_star.energy
141

142
            phiprime_alpha1 = le_alpha1.directional_derivative
Martin Reinecke's avatar
Martin Reinecke committed
143
            if abs(phiprime_alpha1) <= -self.c2*phiprime_0:
144
                return le_alpha1.energy
145 146

            if phiprime_alpha1 >= 0:
147 148 149 150
                le_star = self._zoom(alpha1, alpha0, phi_0, phiprime_0,
                                     phi_alpha1, phiprime_alpha1, phi_alpha0,
                                     le_0)
                return le_star.energy
151 152

            # update alphas
153 154
            alpha0, alpha1 = alpha1, min(2*alpha1, self.max_step_size)
            if alpha1 == self.max_step_size:
Martin Reinecke's avatar
Martin Reinecke committed
155
                print "reached max step size, bailing out"
156 157
                return le_alpha1.energy

158 159 160 161 162 163
            phi_alpha0 = phi_alpha1
            phiprime_alpha0 = phiprime_alpha1

        else:
            # max_iterations was reached
            self.logger.error("The line search algorithm did not converge.")
164
            return le_alpha1.energy
165 166

    def _zoom(self, alpha_lo, alpha_hi, phi_0, phiprime_0,
167
              phi_lo, phiprime_lo, phi_hi, le_0):
168
        """Performs the second stage of the line search algorithm.
169 170 171

        If the first stage was not successful then the Zoom algorithm tries to
        find a suitable step length by using bisection, quadratic, cubic
172
        interpolation.
173

174 175 176
        Parameters
        ----------
        alpha_lo : float
Martin Reinecke's avatar
Martin Reinecke committed
177 178
            A boundary for the step length interval.
            Fulfills Wolfe condition 1.
Martin Reinecke's avatar
Martin Reinecke committed
179
        alpha_hi : float
Martin Reinecke's avatar
Martin Reinecke committed
180
            The other boundary for the step length interval.
181
        phi_0 : float
182
            Value of the energy at the starting point of the line search
183 184 185 186
            algorithm.
        phiprime_0 : Field
            Gradient at the starting point of the line search algorithm.
        phi_lo : float
187
            Value of the energy if we perform a step of length alpha_lo in
188 189
            descent direction.
        phiprime_lo : Field
190
            Gradient at the nwe position if we perform a step of length
191 192
            alpha_lo in descent direction.
        phi_hi : float
193
            Value of the energy if we perform a step of length alpha_hi in
194
            descent direction.
195

196 197 198 199
        Returns
        -------
        energy_star : Energy object
            The new Energy object on the new position.
200

201
        """
202 203 204
        # define the cubic and quadratic interpolant checks
        cubic_delta = 0.2  # cubic
        quad_delta = 0.1  # quadratic
Theo Steininger's avatar
Theo Steininger committed
205 206
        alpha_recent = None
        phi_recent = None
207

Martin Reinecke's avatar
Martin Reinecke committed
208
        assert phi_lo <= phi_0 + self.c1*alpha_lo*phiprime_0
Theo Steininger's avatar
Theo Steininger committed
209
        assert phiprime_lo*(alpha_hi-alpha_lo) < 0.
210
        for i in xrange(self.max_zoom_iterations):
Theo Steininger's avatar
Theo Steininger committed
211 212
            # assert phi_lo <= phi_0 + self.c1*alpha_lo*phiprime_0
            # assert phiprime_lo*(alpha_hi-alpha_lo)<0.
213
            delta_alpha = alpha_hi - alpha_lo
214
            a, b = min(alpha_lo, alpha_hi), max(alpha_lo, alpha_hi)
215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231

            # Try cubic interpolation
            if i > 0:
                cubic_check = cubic_delta * delta_alpha
                alpha_j = self._cubicmin(alpha_lo, phi_lo, phiprime_lo,
                                         alpha_hi, phi_hi,
                                         alpha_recent, phi_recent)
            # If cubic was not successful or not available, try quadratic
            if (i == 0) or (alpha_j is None) or (alpha_j > b - cubic_check) or\
               (alpha_j < a + cubic_check):
                quad_check = quad_delta * delta_alpha
                alpha_j = self._quadmin(alpha_lo, phi_lo, phiprime_lo,
                                        alpha_hi, phi_hi)
                # If quadratic was not successful, try bisection
                if (alpha_j is None) or (alpha_j > b - quad_check) or \
                   (alpha_j < a + quad_check):
                    alpha_j = alpha_lo + 0.5*delta_alpha
232 233

            # Check if the current value of alpha_j is already sufficient
234
            le_alphaj = le_0.at(alpha_j)
Martin Reinecke's avatar
Martin Reinecke committed
235
            phi_alphaj = le_alphaj.value
236

237 238
            # If the first Wolfe condition is not met replace alpha_hi
            # by alpha_j
239
            if (phi_alphaj > phi_0 + self.c1*alpha_j*phiprime_0) or \
240
               (phi_alphaj >= phi_lo):
241
                alpha_recent, phi_recent = alpha_hi, phi_hi
242 243
                alpha_hi, phi_hi = alpha_j, phi_alphaj
            else:
244
                phiprime_alphaj = le_alphaj.directional_derivative
245
                # If the second Wolfe condition is met, return the result
Martin Reinecke's avatar
Martin Reinecke committed
246
                if abs(phiprime_alphaj) <= -self.c2*phiprime_0:
247
                    return le_alphaj
248 249
                # If not, check the sign of the slope
                if phiprime_alphaj*delta_alpha >= 0:
250
                    alpha_recent, phi_recent = alpha_hi, phi_hi
251
                    alpha_hi, phi_hi = alpha_lo, phi_lo
252 253
                else:
                    alpha_recent, phi_recent = alpha_lo, phi_lo
254 255 256 257 258 259 260
                # Replace alpha_lo by alpha_j
                (alpha_lo, phi_lo, phiprime_lo) = (alpha_j, phi_alphaj,
                                                   phiprime_alphaj)

        else:
            self.logger.error("The line search algorithm (zoom) did not "
                              "converge.")
261
            return le_alphaj
262 263

    def _cubicmin(self, a, fa, fpa, b, fb, c, fc):
264
        """Estimating the minimum with cubic interpolation.
265

266
        Finds the minimizer for a cubic polynomial that goes through the
267 268
        points ( a,f(a) ), ( b,f(b) ), and ( c,f(c) ) with derivative at point
        a of fpa.
269
        f(x) = A *(x-a)^3 + B*(x-a)^2 + C*(x-a) + D
270
        If no minimizer can be found return None
271

272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287
        Parameters
        ----------
        a : float
            Selected point.
        fa : float
            Value of polynomial at point a.
        fpa : Field
            Derivative at point a.
        b : float
            Selected point.
        fb : float
            Value of polynomial at point b.
        c : float
            Selected point.
        fc : float
            Value of polynomial at point c.
288

289 290 291 292
        Returns
        -------
        xmin : float
            Position of the approximated minimum.
293

294 295 296 297 298 299 300
        """

        with np.errstate(divide='raise', over='raise', invalid='raise'):
            try:
                C = fpa
                db = b - a
                dc = c - a
301
                denom = db * db * dc * dc * (db - dc)
302
                d1 = np.empty((2, 2))
303 304 305 306
                d1[0, 0] = dc * dc
                d1[0, 1] = -(db*db)
                d1[1, 0] = -(dc*dc*dc)
                d1[1, 1] = db*db*db
307 308 309 310 311 312 313 314 315 316 317 318 319
                [A, B] = np.dot(d1, np.asarray([fb - fa - C * db,
                                                fc - fa - C * dc]).flatten())
                A /= denom
                B /= denom
                radical = B * B - 3 * A * C
                xmin = a + (-B + np.sqrt(radical)) / (3 * A)
            except ArithmeticError:
                return None
        if not np.isfinite(xmin):
            return None
        return xmin

    def _quadmin(self, a, fa, fpa, b, fb):
320
        """Estimating the minimum with quadratic interpolation.
321

322
        Finds the minimizer for a quadratic polynomial that goes through
323 324
        the points ( a,f(a) ), ( b,f(b) ) with derivative at point a of fpa.
        f(x) = B*(x-a)^2 + C*(x-a) + D
325

326 327 328 329 330 331 332 333 334 335 336 337
        Parameters
        ----------
        a : float
            Selected point.
        fa : float
            Value of polynomial at point a.
        fpa : Field
            Derivative at point a.
        b : float
            Selected point.
        fb : float
            Value of polynomial at point b.
338

339 340 341
        Returns
        -------
        xmin : float
342
            Position of the approximated minimum.
343 344 345 346 347 348 349 350 351 352 353 354 355 356
        """
        # f(x) = B*(x-a)^2 + C*(x-a) + D
        with np.errstate(divide='raise', over='raise', invalid='raise'):
            try:
                D = fa
                C = fpa
                db = b - a * 1.0
                B = (fb - D - C * db) / (db * db)
                xmin = a - C / (2.0 * B)
            except ArithmeticError:
                return None
        if not np.isfinite(xmin):
            return None
        return xmin