line_search.py 15.6 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
Theo Steininger's avatar
Theo Steininger committed
13
#
14
# Copyright(C) 2013-2019 Max-Planck-Society
Theo Steininger's avatar
Theo Steininger committed
15
#
16
# NIFTy is being developed at the Max-Planck-Institut fuer Astrophysik.
17

Martin Reinecke's avatar
Martin Reinecke committed
18 19
import numpy as np

Philipp Arras's avatar
Philipp Arras committed
20 21 22
from ..logger import logger
from ..utilities import NiftyMeta

Martin Reinecke's avatar
Martin Reinecke committed
23 24

class LineEnergy(object):
25
    """Evaluates an underlying Energy along a certain line direction.
Martin Reinecke's avatar
Martin Reinecke committed
26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63

    Given an Energy class and a line direction, its position is parametrized by
    a scalar step size along the descent direction relative to a zero point.

    Parameters
    ----------
    line_position : float
        Defines the full spatial position of this energy via
        self.energy.position = zero_point + line_position*line_direction
    energy : Energy
        The Energy object which will be evaluated along the given direction.
    line_direction : Field
        Direction used for line evaluation. Does not have to be normalized.
    offset :  float *optional*
        Indirectly defines the zero point of the line via the equation
        energy.position = zero_point + offset*line_direction
        (default : 0.).

    Notes
    -----
    The LineEnergy is used in minimization schemes in order perform line
    searches. It describes an underlying Energy which is restricted along one
    direction, only requiring the step size parameter to determine a new
    position.
    """

    def __init__(self, line_position, energy, line_direction, offset=0.):
        self._line_position = float(line_position)
        self._line_direction = line_direction

        if self._line_position == float(offset):
            self._energy = energy
        else:
            pos = energy.position \
                + (self._line_position-float(offset))*self._line_direction
            self._energy = energy.at(position=pos)

    def at(self, line_position):
64
        """Returns LineEnergy at new position, memorizing the zero point.
Martin Reinecke's avatar
Martin Reinecke committed
65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98

        Parameters
        ----------
        line_position : float
            Parameter for the new position on the line direction.

        Returns
        -------
            LineEnergy object at new position with same zero point as `self`.

        """

        return LineEnergy(line_position, self._energy, self._line_direction,
                          offset=self._line_position)

    @property
    def energy(self):
        """
        Energy : The underlying Energy object
        """
        return self._energy

    @property
    def value(self):
        """
        float : The value of the energy functional at given `position`.
        """
        return self._energy.value

    @property
    def directional_derivative(self):
        """
        float : The directional derivative at the given `position`.
        """
Martin Reinecke's avatar
Martin Reinecke committed
99
        res = self._energy.gradient.s_vdot(self._line_direction)
Martin Reinecke's avatar
Martin Reinecke committed
100 101 102 103 104
        if abs(res.imag) / max(abs(res.real), 1.) > 1e-12:
            from ..logger import logger
            logger.warning("directional derivative has non-negligible "
                           "imaginary part: {}".format(res))
        return res.real
105

106

Martin Reinecke's avatar
Martin Reinecke committed
107
class LineSearch(metaclass=NiftyMeta):
Martin Reinecke's avatar
Martin Reinecke committed
108 109 110 111 112 113 114 115
    """Class for finding a step size that satisfies the strong Wolfe
    conditions.

    Algorithm contains two stages. It begins with a trial step length and
    keeps increasing it until it finds an acceptable step length or an
    interval. If it does not satisfy the Wolfe conditions, it performs the Zoom
    algorithm (second stage). By interpolating it decreases the size of the
    interval until an acceptable step length is found.
116

Martin Reinecke's avatar
Martin Reinecke committed
117 118 119 120
    Parameters
    ----------
    preferred_initial_step_size : float, optional
        Newton-based methods should intialize this to 1.
Martin Reinecke's avatar
Martin Reinecke committed
121
    c1 : float
Philipp Arras's avatar
Philipp Arras committed
122
        Parameter for Armijo condition rule. Default: 1e-4.
Martin Reinecke's avatar
Martin Reinecke committed
123
    c2 : float
Philipp Arras's avatar
Philipp Arras committed
124
        Parameter for curvature condition rule. Default: 0.9.
Martin Reinecke's avatar
Martin Reinecke committed
125 126
    max_step_size : float
        Maximum step allowed in to be made in the descent direction.
Philipp Arras's avatar
Philipp Arras committed
127
        Default: 1e30.
Martin Reinecke's avatar
Martin Reinecke committed
128 129
    max_iterations : int, optional
        Maximum number of iterations performed by the line search algorithm.
Philipp Arras's avatar
Philipp Arras committed
130
        Default: 100.
Martin Reinecke's avatar
Martin Reinecke committed
131 132
    max_zoom_iterations : int, optional
        Maximum number of iterations performed by the zoom algorithm.
Philipp Arras's avatar
Philipp Arras committed
133
        Default: 100.
134
    """
135

Martin Reinecke's avatar
Martin Reinecke committed
136 137 138 139
    def __init__(self, preferred_initial_step_size=None, c1=1e-4, c2=0.9,
                 max_step_size=1e30, max_iterations=100,
                 max_zoom_iterations=100):

Martin Reinecke's avatar
Martin Reinecke committed
140
        self.preferred_initial_step_size = preferred_initial_step_size
Martin Reinecke's avatar
Martin Reinecke committed
141 142 143 144 145
        self.c1 = np.float(c1)
        self.c2 = np.float(c2)
        self.max_step_size = max_step_size
        self.max_iterations = int(max_iterations)
        self.max_zoom_iterations = int(max_zoom_iterations)
146

147
    def perform_line_search(self, energy, pk, f_k_minus_1=None):
Martin Reinecke's avatar
Martin Reinecke committed
148
        """Performs the first stage of the algorithm.
Martin Reinecke's avatar
Martin Reinecke committed
149

Martin Reinecke's avatar
Martin Reinecke committed
150 151 152
        It starts with a trial step size and it keeps increasing it until it
        satisfies the strong Wolf conditions. It also performs the descent and
        returns the optimal step length and the new energy.
Martin Reinecke's avatar
Martin Reinecke committed
153 154 155 156 157 158 159 160 161 162

        Parameters
        ----------
        energy : Energy
            Energy object from which we will calculate the energy and the
            gradient at a specific point.
        pk : Field
            Vector pointing into the search direction.
        f_k_minus_1 : float, optional
            Value of the fuction (which is being minimized) at the k-1
Philipp Arras's avatar
Philipp Arras committed
163
            iteration of the line search procedure. Default: None.
Martin Reinecke's avatar
Martin Reinecke committed
164 165 166 167 168

        Returns
        -------
        Energy
            The new Energy object on the new position.
Martin Reinecke's avatar
stage 1  
Martin Reinecke committed
169 170
        bool
            whether the line search was considered successful or not
Martin Reinecke's avatar
Martin Reinecke committed
171
        """
Martin Reinecke's avatar
Martin Reinecke committed
172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212
        le_0 = LineEnergy(0., energy, pk, 0.)

        maxstepsize = energy.longest_step(pk)
        if maxstepsize is None:
            maxstepsize = self.max_step_size
        maxstepsize = min(maxstepsize, self.max_step_size)

        # initialize the zero phis
        old_phi_0 = f_k_minus_1
        phi_0 = le_0.value
        phiprime_0 = le_0.directional_derivative
        if phiprime_0 == 0:
            logger.warning(
                "Directional derivative is zero; assuming convergence")
            return energy, False
        if phiprime_0 > 0:
            logger.error("Error: search direction is not a descent direction")
            return energy, False

        # set alphas
        alpha0 = 0.
        phi_alpha0 = phi_0
        phiprime_alpha0 = phiprime_0

        if self.preferred_initial_step_size is not None:
            alpha1 = self.preferred_initial_step_size
        elif old_phi_0 is not None:
            alpha1 = min(1.0, 1.01*2*(phi_0 - old_phi_0)/phiprime_0)
            if alpha1 < 0:
                alpha1 = 1.0
        else:
            alpha1 = 1.0/pk.norm()
        alpha1 = min(alpha1, 0.99*maxstepsize)

        # start the minimization loop
        iteration_number = 0
        while iteration_number < self.max_iterations:
            iteration_number += 1
            if alpha1 == 0:
                return le_0.energy, False

213 214 215 216 217 218 219
            try:
                le_alpha1 = le_0.at(alpha1)
                phi_alpha1 = le_alpha1.value
            except FloatingPointError:  # backtrack
                alpha1 = (alpha0+alpha1)/2
                continue  # next iteration

Martin Reinecke's avatar
cleanup  
Martin Reinecke committed
220
            if np.isnan(phi_alpha1) or np.abs(phi_alpha1) > 1e100:  # also backtrack
221 222
                alpha1 = (alpha0+alpha1)/2
                continue  # next iteration
Martin Reinecke's avatar
Martin Reinecke committed
223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420

            if (phi_alpha1 > phi_0 + self.c1*alpha1*phiprime_0) or \
               ((phi_alpha1 >= phi_alpha0) and (iteration_number > 1)):
                return self._zoom(alpha0, alpha1, phi_0, phiprime_0,
                                  phi_alpha0, phiprime_alpha0, phi_alpha1,
                                  le_0)

            phiprime_alpha1 = le_alpha1.directional_derivative
            if abs(phiprime_alpha1) <= -self.c2*phiprime_0:
                return le_alpha1.energy, True

            if phiprime_alpha1 >= 0:
                return self._zoom(alpha1, alpha0, phi_0, phiprime_0,
                                  phi_alpha1, phiprime_alpha1, phi_alpha0,
                                  le_0)

            # update alphas
            alpha0, alpha1 = alpha1, min(2*alpha1, maxstepsize)
            if alpha1 == maxstepsize:
                logger.warning("max step size reached")
                return le_alpha1.energy, False

            phi_alpha0 = phi_alpha1
            phiprime_alpha0 = phiprime_alpha1

        logger.warning("max iterations reached")
        return le_alpha1.energy, False

    def _zoom(self, alpha_lo, alpha_hi, phi_0, phiprime_0,
              phi_lo, phiprime_lo, phi_hi, le_0):
        """Performs the second stage of the line search algorithm.

        If the first stage was not successful then the Zoom algorithm tries to
        find a suitable step length by using bisection, quadratic, cubic
        interpolation.

        Parameters
        ----------
        alpha_lo : float
            A boundary for the step length interval.
            Fulfills Wolfe condition 1.
        alpha_hi : float
            The other boundary for the step length interval.
        phi_0 : float
            Value of the energy at the starting point of the line search
            algorithm.
        phiprime_0 : float
            directional derivative at the starting point of the line search
            algorithm.
        phi_lo : float
            Value of the energy if we perform a step of length alpha_lo in
            descent direction.
        phiprime_lo : float
            directional derivative at the new position if we perform a step of
            length alpha_lo in descent direction.
        phi_hi : float
            Value of the energy if we perform a step of length alpha_hi in
            descent direction.

        Returns
        -------
        Energy
            The new Energy object on the new position.
        """
        cubic_delta = 0.2  # cubic interpolant checks
        quad_delta = 0.1  # quadratic interpolant checks
        alpha_recent = None
        phi_recent = None

        if phi_lo > phi_0 + self.c1*alpha_lo*phiprime_0:
            raise ValueError("inconsistent data")
        if phiprime_lo*(alpha_hi-alpha_lo) >= 0.:
            raise ValueError("inconsistent data")
        for i in range(self.max_zoom_iterations):
            # assert phi_lo <= phi_0 + self.c1*alpha_lo*phiprime_0
            # assert phiprime_lo*(alpha_hi-alpha_lo)<0.
            delta_alpha = alpha_hi - alpha_lo
            a, b = min(alpha_lo, alpha_hi), max(alpha_lo, alpha_hi)

            # Try cubic interpolation
            if i > 0:
                cubic_check = cubic_delta * delta_alpha
                alpha_j = self._cubicmin(alpha_lo, phi_lo, phiprime_lo,
                                         alpha_hi, phi_hi,
                                         alpha_recent, phi_recent)
            # If cubic was not successful or not available, try quadratic
            if (i == 0) or (alpha_j is None) or (alpha_j > b - cubic_check) or\
               (alpha_j < a + cubic_check):
                quad_check = quad_delta * delta_alpha
                alpha_j = self._quadmin(alpha_lo, phi_lo, phiprime_lo,
                                        alpha_hi, phi_hi)
                # If quadratic was not successful, try bisection
                if (alpha_j is None) or (alpha_j > b - quad_check) or \
                   (alpha_j < a + quad_check):
                    alpha_j = alpha_lo + 0.5*delta_alpha

            # Check if the current value of alpha_j is already sufficient
            le_alphaj = le_0.at(alpha_j)
            phi_alphaj = le_alphaj.value

            # If the first Wolfe condition is not met replace alpha_hi
            # by alpha_j
            if (phi_alphaj > phi_0 + self.c1*alpha_j*phiprime_0) or \
               (phi_alphaj >= phi_lo):
                alpha_recent, phi_recent = alpha_hi, phi_hi
                alpha_hi, phi_hi = alpha_j, phi_alphaj
            else:
                phiprime_alphaj = le_alphaj.directional_derivative
                # If the second Wolfe condition is met, return the result
                if abs(phiprime_alphaj) <= -self.c2*phiprime_0:
                    return le_alphaj.energy, True
                # If not, check the sign of the slope
                if phiprime_alphaj*delta_alpha >= 0:
                    alpha_recent, phi_recent = alpha_hi, phi_hi
                    alpha_hi, phi_hi = alpha_lo, phi_lo
                else:
                    alpha_recent, phi_recent = alpha_lo, phi_lo
                # Replace alpha_lo by alpha_j
                (alpha_lo, phi_lo, phiprime_lo) = (alpha_j, phi_alphaj,
                                                   phiprime_alphaj)

        else:
            logger.warning(
                "The line search algorithm (zoom) did not converge.")
            return le_alphaj.energy, False

    def _cubicmin(self, a, fa, fpa, b, fb, c, fc):
        """Estimating the minimum with cubic interpolation.

        Finds the minimizer for a cubic polynomial that goes through the
        points (a,a), (b,fb), and (c,fc) with derivative at point a of fpa.
        If no minimizer can be found return None

        Parameters
        ----------
        a, fa, fpa : float
            abscissa, function value and derivative at first point
        b, fb : float
            abscissa and function value at second point
        c, fc : float
            abscissa and function value at third point

        Returns
        -------
        xmin : float
            Position of the approximated minimum.
        """
        with np.errstate(divide='raise', over='raise', invalid='raise'):
            try:
                C = fpa
                db = b - a
                dc = c - a
                denom = db * db * dc * dc * (db - dc)
                d1 = np.empty((2, 2))
                d1[0, 0] = dc * dc
                d1[0, 1] = -(db*db)
                d1[1, 0] = -(dc*dc*dc)
                d1[1, 1] = db*db*db
                [A, B] = np.dot(d1, np.asarray([fb - fa - C * db,
                                                fc - fa - C * dc]).ravel())
                A /= denom
                B /= denom
                radical = B * B - 3 * A * C
                xmin = a + (-B + np.sqrt(radical)) / (3 * A)
            except ArithmeticError:
                return None
        if not np.isfinite(xmin):
            return None
        return xmin

    def _quadmin(self, a, fa, fpa, b, fb):
        """Estimating the minimum with quadratic interpolation.

        Finds the minimizer for a quadratic polynomial that goes through
        the points (a,fa), (b,fb) with derivative at point a of fpa.

        Parameters
        ----------
        a, fa, fpa : float
            abscissa, function value and derivative at first point
        b, fb : float
            abscissa and function value at second point

        Returns
        -------
        xmin : float
            Position of the approximated minimum.
        """
        with np.errstate(divide='raise', over='raise', invalid='raise'):
            try:
                db = b - a * 1.0
                B = (fb - fa - fpa * db) / (db * db)
                xmin = a - fpa / (2.0 * B)
            except ArithmeticError:
                return None
        if not np.isfinite(xmin):
            return None
        return xmin