There is a maintenance of MPCDF Gitlab on Thursday, April 22st 2020, 9:00 am CEST - Expect some service interruptions during this time

line_search_strong_wolfe.py 15.1 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
Theo Steininger's avatar
Theo Steininger committed
13 14 15 16 17
#
# Copyright(C) 2013-2017 Max-Planck-Society
#
# NIFTy is being developed at the Max-Planck-Institut fuer Astrophysik
# and financially supported by the Studienstiftung des deutschen Volkes.
18

19 20 21 22 23 24
import numpy as np

from .line_search import LineSearch


class LineSearchStrongWolfe(LineSearch):
25
    """Class for finding a step size that satisfies the strong Wolfe conditions.
26

Martin Reinecke's avatar
Martin Reinecke committed
27 28 29
    Algorithm contains two stages. It begins whit a trial step length and
    keeps increasing it until it finds an acceptable step length or an
    interval. If it does not satisfy the Wolfe conditions, it performs the Zoom
30 31 32
    algorithm (second stage). By interpolating it decreases the size of the
    interval until an acceptable step length is found.

33 34
    Parameters
    ----------
35
    c1 : float
36
        Parameter for Armijo condition rule. (Default: 1e-4)
37
    c2 : float
38
        Parameter for curvature condition rule. (Default: 0.9)
39
    max_step_size : float
40
        Maximum step allowed in to be made in the descent direction.
41 42 43 44 45
        (Default: 50)
    max_iterations : integer
        Maximum number of iterations performed by the line search algorithm.
        (Default: 10)
    max_zoom_iterations : integer
46
        Maximum number of iterations performed by the zoom algorithm.
47
        (Default: 10)
48

49 50 51 52 53 54
    Attributes
    ----------
    c1 : float
        Parameter for Armijo condition rule.
    c2 : float
        Parameter for curvature condition rule.
55
    max_step_size : float
56
        Maximum step allowed in to be made in the descent direction.
57 58 59 60
    max_iterations : integer
        Maximum number of iterations performed by the line search algorithm.
    max_zoom_iterations : integer
        Maximum number of iterations performed by the zoom algorithm.
61

62 63
    """

Martin Reinecke's avatar
Martin Reinecke committed
64 65 66
#    def __init__(self, c1=1e-4, c2=0.9,
#                 max_step_size=1000000000, max_iterations=100,
#                 max_zoom_iterations=100):
67 68
    def __init__(self, c1=1e-4, c2=0.9,
                 max_step_size=50, max_iterations=10,
69
                 max_zoom_iterations=10):
70 71 72 73 74 75 76 77 78

        super(LineSearchStrongWolfe, self).__init__()

        self.c1 = np.float(c1)
        self.c2 = np.float(c2)
        self.max_step_size = max_step_size
        self.max_iterations = int(max_iterations)
        self.max_zoom_iterations = int(max_zoom_iterations)

79
    def perform_line_search(self, energy, pk, f_k_minus_1=None):
80
        """Performs the first stage of the algorithm.
81 82 83

        It starts with a trial step size and it keeps increasing it until it
        satisfy the strong Wolf conditions. It also performs the descent and
84
        returns the optimal step length and the new enrgy.
85

86 87 88 89 90 91 92 93
        Parameters
        ----------
        energy : Energy object
            Energy object from which we will calculate the energy and the
            gradient at a specific point.
        pk : Field
            Unit vector pointing into the search direction.
        f_k_minus_1 : float
94
            Value of the fuction (which is being minimized) at the k-1
95
            iteration of the line search procedure. (Default: None)
96

97 98 99 100 101 102 103 104
        Returns
        -------
        alpha_star : float
            The optimal step length in the descent direction.
        phi_star : float
            Value of the energy after the performed descent.
        energy_star : Energy object
            The new Energy object on the new position.
105 106 107

        """

108
        self._set_line_energy(energy, pk, f_k_minus_1=f_k_minus_1)
109 110 111 112 113
        max_step_size = self.max_step_size
        max_iterations = self.max_iterations

        # initialize the zero phis
        old_phi_0 = self.f_k_minus_1
Martin Reinecke's avatar
Martin Reinecke committed
114 115 116
        le_0 = self.line_energy.at(0)
        phi_0 = le_0.value
        phiprime_0 = le_0.dd
Martin Reinecke's avatar
Martin Reinecke committed
117
        assert phiprime_0<0, "input direction must be a descent direction"
118 119 120 121 122 123 124

        if phiprime_0 == 0:
            self.logger.warn("Flat gradient in search direction.")
            return 0., 0.

        # set alphas
        alpha0 = 0.
125 126
        if self.preferred_initial_step_size is not None:
            alpha1 = self.preferred_initial_step_size
127
        elif old_phi_0 is not None and phiprime_0 != 0:
128 129 130 131 132 133 134 135 136 137 138 139
            alpha1 = min(1.0, 1.01*2*(phi_0 - old_phi_0)/phiprime_0)
            if alpha1 < 0:
                alpha1 = 1.0
        else:
            alpha1 = 1.0

        # give the alpha0 phis the right init value
        phi_alpha0 = phi_0
        phiprime_alpha0 = phiprime_0

        # start the minimization loop
        for i in xrange(max_iterations):
Martin Reinecke's avatar
Martin Reinecke committed
140 141
            #print "a0a1:",alpha0, alpha1
            #print "line search outer iteration", i
Martin Reinecke's avatar
Martin Reinecke committed
142
            le_alpha1 = self.line_energy.at(alpha1)
Martin Reinecke's avatar
Martin Reinecke committed
143
            #print "position:", le_alpha1.energy.position.val[0]
Martin Reinecke's avatar
Martin Reinecke committed
144
            phi_alpha1 = le_alpha1.value
Martin Reinecke's avatar
Martin Reinecke committed
145
            #print "energy:", le_alpha1.value
146 147 148 149
            if alpha1 == 0:
                self.logger.warn("Increment size became 0.")
                alpha_star = 0.
                phi_star = phi_0
Martin Reinecke's avatar
Martin Reinecke committed
150
                le_star = le_0
151 152
                break

Martin Reinecke's avatar
Martin Reinecke committed
153
            if (phi_alpha1 > phi_0 + self.c1*alpha1*phiprime_0) or \
154
               ((phi_alpha1 >= phi_alpha0) and (i > 1)):
Martin Reinecke's avatar
Martin Reinecke committed
155
                (alpha_star, phi_star, le_star) = self._zoom(
156
                                                    alpha0, alpha1,
157 158 159
                                                    phi_0, phiprime_0,
                                                    phi_alpha0,
                                                    phiprime_alpha0,
Martin Reinecke's avatar
Martin Reinecke committed
160
                                                    phi_alpha1)
161 162
                break

Martin Reinecke's avatar
Martin Reinecke committed
163 164
            phiprime_alpha1 = le_alpha1.dd
            if abs(phiprime_alpha1) <= -self.c2*phiprime_0:
165 166
                alpha_star = alpha1
                phi_star = phi_alpha1
Martin Reinecke's avatar
Martin Reinecke committed
167
                le_star = le_alpha1
168 169 170
                break

            if phiprime_alpha1 >= 0:
Martin Reinecke's avatar
Martin Reinecke committed
171
                (alpha_star, phi_star, le_star) = self._zoom(
172
                                                    alpha1, alpha0,
173 174 175
                                                    phi_0, phiprime_0,
                                                    phi_alpha1,
                                                    phiprime_alpha1,
Martin Reinecke's avatar
Martin Reinecke committed
176
                                                    phi_alpha0)
177 178 179 180
                break

            # update alphas
            alpha0, alpha1 = alpha1, min(2*alpha1, max_step_size)
Martin Reinecke's avatar
Martin Reinecke committed
181
            if alpha1 == max_step_size:
Martin Reinecke's avatar
Martin Reinecke committed
182
                print "bailout"
Martin Reinecke's avatar
Martin Reinecke committed
183 184 185 186
                alpha_star = alpha1
                phi_star = phi_alpha1
                le_star = le_alpha1
                break
187 188 189 190 191 192 193 194
            phi_alpha0 = phi_alpha1
            phiprime_alpha0 = phiprime_alpha1
            # phi_alpha1 = self._phi(alpha1)

        else:
            # max_iterations was reached
            alpha_star = alpha1
            phi_star = phi_alpha1
Martin Reinecke's avatar
Martin Reinecke committed
195
            le_star = le_alpha1
196 197
            self.logger.error("The line search algorithm did not converge.")

198
        # extract the full energy from the line_energy
Martin Reinecke's avatar
Martin Reinecke committed
199
        energy_star = le_star.energy
Theo Steininger's avatar
Theo Steininger committed
200 201
        direction_length = pk.norm()
        step_length = alpha_star * direction_length
202
        return step_length, phi_star, energy_star
203 204

    def _zoom(self, alpha_lo, alpha_hi, phi_0, phiprime_0,
Martin Reinecke's avatar
Martin Reinecke committed
205
              phi_lo, phiprime_lo, phi_hi):
206
        """Performs the second stage of the line search algorithm.
207 208 209

        If the first stage was not successful then the Zoom algorithm tries to
        find a suitable step length by using bisection, quadratic, cubic
210
        interpolation.
211

212 213 214 215
        Parameters
        ----------
        alpha_lo : float
            The lower boundary for the step length interval.
Martin Reinecke's avatar
Martin Reinecke committed
216
        alpha_hi : float
217 218
            The upper boundary for the step length interval.
        phi_0 : float
219
            Value of the energy at the starting point of the line search
220 221 222 223
            algorithm.
        phiprime_0 : Field
            Gradient at the starting point of the line search algorithm.
        phi_lo : float
224
            Value of the energy if we perform a step of length alpha_lo in
225 226
            descent direction.
        phiprime_lo : Field
227
            Gradient at the nwe position if we perform a step of length
228 229
            alpha_lo in descent direction.
        phi_hi : float
230
            Value of the energy if we perform a step of length alpha_hi in
231
            descent direction.
232

233 234 235 236 237 238 239 240
        Returns
        -------
        alpha_star : float
            The optimal step length in the descent direction.
        phi_star : float
            Value of the energy after the performed descent.
        energy_star : Energy object
            The new Energy object on the new position.
241

242
        """
Martin Reinecke's avatar
Martin Reinecke committed
243 244 245 246
        #print "entering zoom"
        #print alpha_lo, alpha_hi
        #print "pos1:",self.line_energy.at(alpha_lo).energy.position.val[0]
        #print "pos2:",self.line_energy.at(alpha_hi).energy.position.val[0]
247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274
        max_iterations = self.max_zoom_iterations
        # define the cubic and quadratic interpolant checks
        cubic_delta = 0.2  # cubic
        quad_delta = 0.1  # quadratic

        # initialize the most recent versions (j-1) of phi and alpha
        alpha_recent = 0
        phi_recent = phi_0

        for i in xrange(max_iterations):
            delta_alpha = alpha_hi - alpha_lo
            if delta_alpha < 0:
                a, b = alpha_hi, alpha_lo
            else:
                a, b = alpha_lo, alpha_hi

            # Try cubic interpolation
            if i > 0:
                cubic_check = cubic_delta * delta_alpha
                alpha_j = self._cubicmin(alpha_lo, phi_lo, phiprime_lo,
                                         alpha_hi, phi_hi,
                                         alpha_recent, phi_recent)
            # If cubic was not successful or not available, try quadratic
            if (i == 0) or (alpha_j is None) or (alpha_j > b - cubic_check) or\
               (alpha_j < a + cubic_check):
                quad_check = quad_delta * delta_alpha
                alpha_j = self._quadmin(alpha_lo, phi_lo, phiprime_lo,
                                        alpha_hi, phi_hi)
Martin Reinecke's avatar
Martin Reinecke committed
275
                # If quadratic was not successful, try bisection
276 277 278 279 280
                if (alpha_j is None) or (alpha_j > b - quad_check) or \
                   (alpha_j < a + quad_check):
                    alpha_j = alpha_lo + 0.5*delta_alpha

            # Check if the current value of alpha_j is already sufficient
Martin Reinecke's avatar
Martin Reinecke committed
281 282
            le_alphaj = self.line_energy.at(alpha_j)
            phi_alphaj = le_alphaj.value
283

284 285
            # If the first Wolfe condition is not met replace alpha_hi
            # by alpha_j
Martin Reinecke's avatar
Martin Reinecke committed
286
            if (phi_alphaj > phi_0 + self.c1*alpha_j*phiprime_0) or\
287 288 289 290
               (phi_alphaj >= phi_lo):
                alpha_recent, phi_recent = alpha_hi, phi_hi
                alpha_hi, phi_hi = alpha_j, phi_alphaj
            else:
Martin Reinecke's avatar
Martin Reinecke committed
291
                phiprime_alphaj = le_alphaj.dd
292
                # If the second Wolfe condition is met, return the result
Martin Reinecke's avatar
Martin Reinecke committed
293
                if abs(phiprime_alphaj) <= -self.c2*phiprime_0:
294 295
                    alpha_star = alpha_j
                    phi_star = phi_alphaj
Martin Reinecke's avatar
Martin Reinecke committed
296
                    le_star = le_alphaj
297 298 299 300 301 302 303 304 305 306 307 308
                    break
                # If not, check the sign of the slope
                if phiprime_alphaj*delta_alpha >= 0:
                    alpha_recent, phi_recent = alpha_hi, phi_hi
                    alpha_hi, phi_hi = alpha_lo, phi_lo
                else:
                    alpha_recent, phi_recent = alpha_lo, phi_lo
                # Replace alpha_lo by alpha_j
                (alpha_lo, phi_lo, phiprime_lo) = (alpha_j, phi_alphaj,
                                                   phiprime_alphaj)

        else:
Martin Reinecke's avatar
Martin Reinecke committed
309 310
            alpha_star, phi_star, le_star = \
                alpha_j, phi_alphaj, le_alphaj
311 312 313
            self.logger.error("The line search algorithm (zoom) did not "
                              "converge.")

Martin Reinecke's avatar
Martin Reinecke committed
314
        return alpha_star, phi_star, le_star
315 316

    def _cubicmin(self, a, fa, fpa, b, fb, c, fc):
317
        """Estimating the minimum with cubic interpolation.
318

319
        Finds the minimizer for a cubic polynomial that goes through the
320 321
        points ( a,f(a) ), ( b,f(b) ), and ( c,f(c) ) with derivative at point
        a of fpa.
322
        f(x) = A *(x-a)^3 + B*(x-a)^2 + C*(x-a) + D
323
        If no minimizer can be found return None
324

325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340
        Parameters
        ----------
        a : float
            Selected point.
        fa : float
            Value of polynomial at point a.
        fpa : Field
            Derivative at point a.
        b : float
            Selected point.
        fb : float
            Value of polynomial at point b.
        c : float
            Selected point.
        fc : float
            Value of polynomial at point c.
341

342 343 344 345
        Returns
        -------
        xmin : float
            Position of the approximated minimum.
346

347 348 349 350 351 352 353
        """

        with np.errstate(divide='raise', over='raise', invalid='raise'):
            try:
                C = fpa
                db = b - a
                dc = c - a
354
                denom = db * db * dc * dc * (db - dc)
355
                d1 = np.empty((2, 2))
356 357 358 359
                d1[0, 0] = dc * dc
                d1[0, 1] = -(db*db)
                d1[1, 0] = -(dc*dc*dc)
                d1[1, 1] = db*db*db
360 361 362 363 364 365 366 367 368 369 370 371 372
                [A, B] = np.dot(d1, np.asarray([fb - fa - C * db,
                                                fc - fa - C * dc]).flatten())
                A /= denom
                B /= denom
                radical = B * B - 3 * A * C
                xmin = a + (-B + np.sqrt(radical)) / (3 * A)
            except ArithmeticError:
                return None
        if not np.isfinite(xmin):
            return None
        return xmin

    def _quadmin(self, a, fa, fpa, b, fb):
373
        """Estimating the minimum with quadratic interpolation.
374

375
        Finds the minimizer for a quadratic polynomial that goes through
376 377
        the points ( a,f(a) ), ( b,f(b) ) with derivative at point a of fpa.
        f(x) = B*(x-a)^2 + C*(x-a) + D
378

379 380 381 382 383 384 385 386 387 388 389 390
        Parameters
        ----------
        a : float
            Selected point.
        fa : float
            Value of polynomial at point a.
        fpa : Field
            Derivative at point a.
        b : float
            Selected point.
        fb : float
            Value of polynomial at point b.
391

392 393 394
        Returns
        -------
        xmin : float
395
            Position of the approximated minimum.
396 397 398 399 400 401 402 403 404 405 406 407 408 409
        """
        # f(x) = B*(x-a)^2 + C*(x-a) + D
        with np.errstate(divide='raise', over='raise', invalid='raise'):
            try:
                D = fa
                C = fpa
                db = b - a * 1.0
                B = (fb - D - C * db) / (db * db)
                xmin = a - C / (2.0 * B)
            except ArithmeticError:
                return None
        if not np.isfinite(xmin):
            return None
        return xmin