line_search.py 15.3 KB
 Theo Steininger committed Apr 13, 2017 1 2 3 4 5 6 7 8 9 10 11 12 ``````# This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . `````` Theo Steininger committed May 24, 2017 13 ``````# `````` Martin Reinecke committed Jan 07, 2019 14 ``````# Copyright(C) 2013-2019 Max-Planck-Society `````` Theo Steininger committed May 24, 2017 15 ``````# `````` Martin Reinecke committed Jan 07, 2019 16 ``````# NIFTy is being developed at the Max-Planck-Institut fuer Astrophysik. `````` Philipp Arras committed Jul 10, 2018 17 `````` `````` Martin Reinecke committed Jan 16, 2019 18 ``````from ..utilities import NiftyMeta `````` Martin Reinecke committed Aug 27, 2018 19 20 21 22 23 ``````from ..logger import logger import numpy as np class LineEnergy(object): `````` Philipp Arras committed Jan 13, 2019 24 `````` """Evaluates an underlying Energy along a certain line direction. `````` Martin Reinecke committed Aug 27, 2018 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 `````` Given an Energy class and a line direction, its position is parametrized by a scalar step size along the descent direction relative to a zero point. Parameters ---------- line_position : float Defines the full spatial position of this energy via self.energy.position = zero_point + line_position*line_direction energy : Energy The Energy object which will be evaluated along the given direction. line_direction : Field Direction used for line evaluation. Does not have to be normalized. offset : float *optional* Indirectly defines the zero point of the line via the equation energy.position = zero_point + offset*line_direction (default : 0.). Notes ----- The LineEnergy is used in minimization schemes in order perform line searches. It describes an underlying Energy which is restricted along one direction, only requiring the step size parameter to determine a new position. """ def __init__(self, line_position, energy, line_direction, offset=0.): self._line_position = float(line_position) self._line_direction = line_direction if self._line_position == float(offset): self._energy = energy else: pos = energy.position \ + (self._line_position-float(offset))*self._line_direction self._energy = energy.at(position=pos) def at(self, line_position): `````` Philipp Arras committed Jan 13, 2019 63 `````` """Returns LineEnergy at new position, memorizing the zero point. `````` Martin Reinecke committed Aug 27, 2018 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 `````` Parameters ---------- line_position : float Parameter for the new position on the line direction. Returns ------- LineEnergy object at new position with same zero point as `self`. """ return LineEnergy(line_position, self._energy, self._line_direction, offset=self._line_position) @property def energy(self): """ Energy : The underlying Energy object """ return self._energy @property def value(self): """ float : The value of the energy functional at given `position`. """ return self._energy.value @property def directional_derivative(self): """ float : The directional derivative at the given `position`. """ res = self._energy.gradient.vdot(self._line_direction) if abs(res.imag) / max(abs(res.real), 1.) > 1e-12: from ..logger import logger logger.warning("directional derivative has non-negligible " "imaginary part: {}".format(res)) return res.real `````` Theo Steininger committed Oct 25, 2016 104 `````` `````` Theo Steininger committed Oct 18, 2016 105 `````` `````` Martin Reinecke committed Jan 16, 2019 106 ``````class LineSearch(metaclass=NiftyMeta): `````` Martin Reinecke committed Aug 27, 2018 107 108 109 110 111 112 113 114 `````` """Class for finding a step size that satisfies the strong Wolfe conditions. Algorithm contains two stages. It begins with a trial step length and keeps increasing it until it finds an acceptable step length or an interval. If it does not satisfy the Wolfe conditions, it performs the Zoom algorithm (second stage). By interpolating it decreases the size of the interval until an acceptable step length is found. `````` Martin Reinecke committed Jul 09, 2017 115 `````` `````` Martin Reinecke committed Feb 19, 2018 116 117 118 119 `````` Parameters ---------- preferred_initial_step_size : float, optional Newton-based methods should intialize this to 1. `````` Martin Reinecke committed Aug 27, 2018 120 `````` c1 : float `````` Philipp Arras committed Jan 13, 2019 121 `````` Parameter for Armijo condition rule. Default: 1e-4. `````` Martin Reinecke committed Aug 27, 2018 122 `````` c2 : float `````` Philipp Arras committed Jan 13, 2019 123 `````` Parameter for curvature condition rule. Default: 0.9. `````` Martin Reinecke committed Aug 27, 2018 124 125 `````` max_step_size : float Maximum step allowed in to be made in the descent direction. `````` Philipp Arras committed Jan 13, 2019 126 `````` Default: 1e30. `````` Martin Reinecke committed Aug 27, 2018 127 128 `````` max_iterations : int, optional Maximum number of iterations performed by the line search algorithm. `````` Philipp Arras committed Jan 13, 2019 129 `````` Default: 100. `````` Martin Reinecke committed Aug 27, 2018 130 131 `````` max_zoom_iterations : int, optional Maximum number of iterations performed by the zoom algorithm. `````` Philipp Arras committed Jan 13, 2019 132 `````` Default: 100. `````` Theo Steininger committed Oct 18, 2016 133 `````` """ `````` Martin Reinecke committed Jul 09, 2017 134 `````` `````` Martin Reinecke committed Aug 27, 2018 135 136 137 138 `````` def __init__(self, preferred_initial_step_size=None, c1=1e-4, c2=0.9, max_step_size=1e30, max_iterations=100, max_zoom_iterations=100): `````` Martin Reinecke committed Feb 17, 2018 139 `````` self.preferred_initial_step_size = preferred_initial_step_size `````` Martin Reinecke committed Aug 27, 2018 140 141 142 143 144 `````` self.c1 = np.float(c1) self.c2 = np.float(c2) self.max_step_size = max_step_size self.max_iterations = int(max_iterations) self.max_zoom_iterations = int(max_zoom_iterations) `````` Theo Steininger committed Oct 18, 2016 145 `````` `````` Theo Steininger committed Oct 25, 2016 146 `````` def perform_line_search(self, energy, pk, f_k_minus_1=None): `````` Martin Reinecke committed Aug 27, 2018 147 `````` """Performs the first stage of the algorithm. `````` Martin Reinecke committed Feb 19, 2018 148 `````` `````` Martin Reinecke committed Aug 27, 2018 149 150 151 `````` It starts with a trial step size and it keeps increasing it until it satisfies the strong Wolf conditions. It also performs the descent and returns the optimal step length and the new energy. `````` Martin Reinecke committed Feb 19, 2018 152 153 154 155 156 157 158 159 160 161 `````` Parameters ---------- energy : Energy Energy object from which we will calculate the energy and the gradient at a specific point. pk : Field Vector pointing into the search direction. f_k_minus_1 : float, optional Value of the fuction (which is being minimized) at the k-1 `````` Philipp Arras committed Jan 13, 2019 162 `````` iteration of the line search procedure. Default: None. `````` Martin Reinecke committed Feb 19, 2018 163 164 165 166 167 `````` Returns ------- Energy The new Energy object on the new position. `````` Martin Reinecke committed Mar 07, 2018 168 169 `````` bool whether the line search was considered successful or not `````` Martin Reinecke committed Feb 19, 2018 170 `````` """ `````` Martin Reinecke committed Aug 27, 2018 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 `````` le_0 = LineEnergy(0., energy, pk, 0.) maxstepsize = energy.longest_step(pk) if maxstepsize is None: maxstepsize = self.max_step_size maxstepsize = min(maxstepsize, self.max_step_size) # initialize the zero phis old_phi_0 = f_k_minus_1 phi_0 = le_0.value phiprime_0 = le_0.directional_derivative if phiprime_0 == 0: logger.warning( "Directional derivative is zero; assuming convergence") return energy, False if phiprime_0 > 0: logger.error("Error: search direction is not a descent direction") return energy, False # set alphas alpha0 = 0. phi_alpha0 = phi_0 phiprime_alpha0 = phiprime_0 if self.preferred_initial_step_size is not None: alpha1 = self.preferred_initial_step_size elif old_phi_0 is not None: alpha1 = min(1.0, 1.01*2*(phi_0 - old_phi_0)/phiprime_0) if alpha1 < 0: alpha1 = 1.0 else: alpha1 = 1.0/pk.norm() alpha1 = min(alpha1, 0.99*maxstepsize) # start the minimization loop iteration_number = 0 while iteration_number < self.max_iterations: iteration_number += 1 if alpha1 == 0: return le_0.energy, False le_alpha1 = le_0.at(alpha1) phi_alpha1 = le_alpha1.value if (phi_alpha1 > phi_0 + self.c1*alpha1*phiprime_0) or \ ((phi_alpha1 >= phi_alpha0) and (iteration_number > 1)): return self._zoom(alpha0, alpha1, phi_0, phiprime_0, phi_alpha0, phiprime_alpha0, phi_alpha1, le_0) phiprime_alpha1 = le_alpha1.directional_derivative if abs(phiprime_alpha1) <= -self.c2*phiprime_0: return le_alpha1.energy, True if phiprime_alpha1 >= 0: return self._zoom(alpha1, alpha0, phi_0, phiprime_0, phi_alpha1, phiprime_alpha1, phi_alpha0, le_0) # update alphas alpha0, alpha1 = alpha1, min(2*alpha1, maxstepsize) if alpha1 == maxstepsize: logger.warning("max step size reached") return le_alpha1.energy, False phi_alpha0 = phi_alpha1 phiprime_alpha0 = phiprime_alpha1 logger.warning("max iterations reached") return le_alpha1.energy, False def _zoom(self, alpha_lo, alpha_hi, phi_0, phiprime_0, phi_lo, phiprime_lo, phi_hi, le_0): """Performs the second stage of the line search algorithm. If the first stage was not successful then the Zoom algorithm tries to find a suitable step length by using bisection, quadratic, cubic interpolation. Parameters ---------- alpha_lo : float A boundary for the step length interval. Fulfills Wolfe condition 1. alpha_hi : float The other boundary for the step length interval. phi_0 : float Value of the energy at the starting point of the line search algorithm. phiprime_0 : float directional derivative at the starting point of the line search algorithm. phi_lo : float Value of the energy if we perform a step of length alpha_lo in descent direction. phiprime_lo : float directional derivative at the new position if we perform a step of length alpha_lo in descent direction. phi_hi : float Value of the energy if we perform a step of length alpha_hi in descent direction. Returns ------- Energy The new Energy object on the new position. """ cubic_delta = 0.2 # cubic interpolant checks quad_delta = 0.1 # quadratic interpolant checks alpha_recent = None phi_recent = None if phi_lo > phi_0 + self.c1*alpha_lo*phiprime_0: raise ValueError("inconsistent data") if phiprime_lo*(alpha_hi-alpha_lo) >= 0.: raise ValueError("inconsistent data") for i in range(self.max_zoom_iterations): # assert phi_lo <= phi_0 + self.c1*alpha_lo*phiprime_0 # assert phiprime_lo*(alpha_hi-alpha_lo)<0. delta_alpha = alpha_hi - alpha_lo a, b = min(alpha_lo, alpha_hi), max(alpha_lo, alpha_hi) # Try cubic interpolation if i > 0: cubic_check = cubic_delta * delta_alpha alpha_j = self._cubicmin(alpha_lo, phi_lo, phiprime_lo, alpha_hi, phi_hi, alpha_recent, phi_recent) # If cubic was not successful or not available, try quadratic if (i == 0) or (alpha_j is None) or (alpha_j > b - cubic_check) or\ (alpha_j < a + cubic_check): quad_check = quad_delta * delta_alpha alpha_j = self._quadmin(alpha_lo, phi_lo, phiprime_lo, alpha_hi, phi_hi) # If quadratic was not successful, try bisection if (alpha_j is None) or (alpha_j > b - quad_check) or \ (alpha_j < a + quad_check): alpha_j = alpha_lo + 0.5*delta_alpha # Check if the current value of alpha_j is already sufficient le_alphaj = le_0.at(alpha_j) phi_alphaj = le_alphaj.value # If the first Wolfe condition is not met replace alpha_hi # by alpha_j if (phi_alphaj > phi_0 + self.c1*alpha_j*phiprime_0) or \ (phi_alphaj >= phi_lo): alpha_recent, phi_recent = alpha_hi, phi_hi alpha_hi, phi_hi = alpha_j, phi_alphaj else: phiprime_alphaj = le_alphaj.directional_derivative # If the second Wolfe condition is met, return the result if abs(phiprime_alphaj) <= -self.c2*phiprime_0: return le_alphaj.energy, True # If not, check the sign of the slope if phiprime_alphaj*delta_alpha >= 0: alpha_recent, phi_recent = alpha_hi, phi_hi alpha_hi, phi_hi = alpha_lo, phi_lo else: alpha_recent, phi_recent = alpha_lo, phi_lo # Replace alpha_lo by alpha_j (alpha_lo, phi_lo, phiprime_lo) = (alpha_j, phi_alphaj, phiprime_alphaj) else: logger.warning( "The line search algorithm (zoom) did not converge.") return le_alphaj.energy, False def _cubicmin(self, a, fa, fpa, b, fb, c, fc): """Estimating the minimum with cubic interpolation. Finds the minimizer for a cubic polynomial that goes through the points (a,a), (b,fb), and (c,fc) with derivative at point a of fpa. If no minimizer can be found return None Parameters ---------- a, fa, fpa : float abscissa, function value and derivative at first point b, fb : float abscissa and function value at second point c, fc : float abscissa and function value at third point Returns ------- xmin : float Position of the approximated minimum. """ with np.errstate(divide='raise', over='raise', invalid='raise'): try: C = fpa db = b - a dc = c - a denom = db * db * dc * dc * (db - dc) d1 = np.empty((2, 2)) d1[0, 0] = dc * dc d1[0, 1] = -(db*db) d1[1, 0] = -(dc*dc*dc) d1[1, 1] = db*db*db [A, B] = np.dot(d1, np.asarray([fb - fa - C * db, fc - fa - C * dc]).ravel()) A /= denom B /= denom radical = B * B - 3 * A * C xmin = a + (-B + np.sqrt(radical)) / (3 * A) except ArithmeticError: return None if not np.isfinite(xmin): return None return xmin def _quadmin(self, a, fa, fpa, b, fb): """Estimating the minimum with quadratic interpolation. Finds the minimizer for a quadratic polynomial that goes through the points (a,fa), (b,fb) with derivative at point a of fpa. Parameters ---------- a, fa, fpa : float abscissa, function value and derivative at first point b, fb : float abscissa and function value at second point Returns ------- xmin : float Position of the approximated minimum. """ with np.errstate(divide='raise', over='raise', invalid='raise'): try: db = b - a * 1.0 B = (fb - fa - fpa * db) / (db * db) xmin = a - fpa / (2.0 * B) except ArithmeticError: return None if not np.isfinite(xmin): return None return xmin``````