Commit c300a904 authored by Martin Reinecke's avatar Martin Reinecke
Browse files

consolidate line searching

parent e75dccac
......@@ -54,7 +54,6 @@ from .probing import probe_with_posterior_samples, probe_diagonal, \
StatCalculator
from .minimization.line_search import LineSearch
from .minimization.line_search_strong_wolfe import LineSearchStrongWolfe
from .minimization.iteration_controllers import (
IterationController, GradientNormController, DeltaEnergyController)
from .minimization.minimizer import Minimizer
......@@ -66,7 +65,6 @@ from .minimization.descent_minimizers import (
from .minimization.scipy_minimizer import (ScipyMinimizer, L_BFGS_B, ScipyCG)
from .minimization.energy import Energy
from .minimization.quadratic_energy import QuadraticEnergy
from .minimization.line_energy import LineEnergy
from .minimization.energy_adapter import EnergyAdapter
from .minimization.kl_energy import KL_Energy
......
......@@ -22,7 +22,7 @@ import numpy as np
from ..compat import *
from ..logger import logger
from .line_search_strong_wolfe import LineSearchStrongWolfe
from .line_search import LineSearch
from .minimizer import Minimizer
......@@ -40,10 +40,10 @@ class DescentMinimizer(Minimizer):
Object that decides when to terminate the minimization.
line_searcher : callable *optional*
Function which infers the step size in the descent direction
(default : LineSearchStrongWolfe()).
(default : LineSearch()).
"""
def __init__(self, controller, line_searcher=LineSearchStrongWolfe()):
def __init__(self, controller, line_searcher=LineSearch()):
self._controller = controller
self.line_searcher = line_searcher
......@@ -144,8 +144,7 @@ class RelaxedNewton(DescentMinimizer):
def __init__(self, controller, line_searcher=None):
if line_searcher is None:
line_searcher = LineSearchStrongWolfe(
preferred_initial_step_size=1.)
line_searcher = LineSearch(preferred_initial_step_size=1.)
super(RelaxedNewton, self).__init__(controller=controller,
line_searcher=line_searcher)
......@@ -161,8 +160,7 @@ class NewtonCG(DescentMinimizer):
def __init__(self, controller, line_searcher=None):
if line_searcher is None:
line_searcher = LineSearchStrongWolfe(
preferred_initial_step_size=1.)
line_searcher = LineSearch(preferred_initial_step_size=1.)
super(NewtonCG, self).__init__(controller=controller,
line_searcher=line_searcher)
......@@ -201,7 +199,7 @@ class NewtonCG(DescentMinimizer):
class L_BFGS(DescentMinimizer):
def __init__(self, controller, line_searcher=LineSearchStrongWolfe(),
def __init__(self, controller, line_searcher=LineSearch(),
max_history_length=5):
super(L_BFGS, self).__init__(controller=controller,
line_searcher=line_searcher)
......@@ -266,7 +264,7 @@ class VL_BFGS(DescentMinimizer):
Microsoft
"""
def __init__(self, controller, line_searcher=LineSearchStrongWolfe(),
def __init__(self, controller, line_searcher=LineSearch(),
max_history_length=5):
super(VL_BFGS, self).__init__(controller=controller,
line_searcher=line_searcher)
......
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
# Copyright(C) 2013-2018 Max-Planck-Society
#
# NIFTy is being developed at the Max-Planck-Institut fuer Astrophysik
# and financially supported by the Studienstiftung des deutschen Volkes.
from __future__ import absolute_import, division, print_function
from ..compat import *
class LineEnergy(object):
""" Evaluates an underlying Energy along a certain line direction.
Given an Energy class and a line direction, its position is parametrized by
a scalar step size along the descent direction relative to a zero point.
Parameters
----------
line_position : float
Defines the full spatial position of this energy via
self.energy.position = zero_point + line_position*line_direction
energy : Energy
The Energy object which will be evaluated along the given direction.
line_direction : Field
Direction used for line evaluation. Does not have to be normalized.
offset : float *optional*
Indirectly defines the zero point of the line via the equation
energy.position = zero_point + offset*line_direction
(default : 0.).
Notes
-----
The LineEnergy is used in minimization schemes in order perform line
searches. It describes an underlying Energy which is restricted along one
direction, only requiring the step size parameter to determine a new
position.
"""
def __init__(self, line_position, energy, line_direction, offset=0.):
self._line_position = float(line_position)
self._line_direction = line_direction
if self._line_position == float(offset):
self._energy = energy
else:
pos = energy.position \
+ (self._line_position-float(offset))*self._line_direction
self._energy = energy.at(position=pos)
def at(self, line_position):
""" Returns LineEnergy at new position, memorizing the zero point.
Parameters
----------
line_position : float
Parameter for the new position on the line direction.
Returns
-------
LineEnergy object at new position with same zero point as `self`.
"""
return LineEnergy(line_position, self._energy, self._line_direction,
offset=self._line_position)
@property
def energy(self):
"""
Energy : The underlying Energy object
"""
return self._energy
@property
def value(self):
"""
float : The value of the energy functional at given `position`.
"""
return self._energy.value
@property
def directional_derivative(self):
"""
float : The directional derivative at the given `position`.
"""
res = self._energy.gradient.vdot(self._line_direction)
if abs(res.imag) / max(abs(res.real), 1.) > 1e-12:
from ..logger import logger
logger.warning("directional derivative has non-negligible "
"imaginary part: {}".format(res))
return res.real
......@@ -20,26 +20,139 @@ from __future__ import absolute_import, division, print_function
from ..compat import *
from ..utilities import NiftyMetaBase
from ..logger import logger
import numpy as np
class LineEnergy(object):
""" Evaluates an underlying Energy along a certain line direction.
Given an Energy class and a line direction, its position is parametrized by
a scalar step size along the descent direction relative to a zero point.
Parameters
----------
line_position : float
Defines the full spatial position of this energy via
self.energy.position = zero_point + line_position*line_direction
energy : Energy
The Energy object which will be evaluated along the given direction.
line_direction : Field
Direction used for line evaluation. Does not have to be normalized.
offset : float *optional*
Indirectly defines the zero point of the line via the equation
energy.position = zero_point + offset*line_direction
(default : 0.).
Notes
-----
The LineEnergy is used in minimization schemes in order perform line
searches. It describes an underlying Energy which is restricted along one
direction, only requiring the step size parameter to determine a new
position.
"""
def __init__(self, line_position, energy, line_direction, offset=0.):
self._line_position = float(line_position)
self._line_direction = line_direction
if self._line_position == float(offset):
self._energy = energy
else:
pos = energy.position \
+ (self._line_position-float(offset))*self._line_direction
self._energy = energy.at(position=pos)
def at(self, line_position):
""" Returns LineEnergy at new position, memorizing the zero point.
Parameters
----------
line_position : float
Parameter for the new position on the line direction.
Returns
-------
LineEnergy object at new position with same zero point as `self`.
"""
return LineEnergy(line_position, self._energy, self._line_direction,
offset=self._line_position)
@property
def energy(self):
"""
Energy : The underlying Energy object
"""
return self._energy
@property
def value(self):
"""
float : The value of the energy functional at given `position`.
"""
return self._energy.value
@property
def directional_derivative(self):
"""
float : The directional derivative at the given `position`.
"""
res = self._energy.gradient.vdot(self._line_direction)
if abs(res.imag) / max(abs(res.real), 1.) > 1e-12:
from ..logger import logger
logger.warning("directional derivative has non-negligible "
"imaginary part: {}".format(res))
return res.real
class LineSearch(NiftyMetaBase()):
"""Class for determining the optimal step size along some descent
direction.
"""Class for finding a step size that satisfies the strong Wolfe
conditions.
Algorithm contains two stages. It begins with a trial step length and
keeps increasing it until it finds an acceptable step length or an
interval. If it does not satisfy the Wolfe conditions, it performs the Zoom
algorithm (second stage). By interpolating it decreases the size of the
interval until an acceptable step length is found.
Parameters
----------
preferred_initial_step_size : float, optional
Newton-based methods should intialize this to 1.
c1 : float
Parameter for Armijo condition rule. (Default: 1e-4)
c2 : float
Parameter for curvature condition rule. (Default: 0.9)
max_step_size : float
Maximum step allowed in to be made in the descent direction.
(Default: 1e30)
max_iterations : int, optional
Maximum number of iterations performed by the line search algorithm.
(Default: 100)
max_zoom_iterations : int, optional
Maximum number of iterations performed by the zoom algorithm.
(Default: 100)
"""
def __init__(self, preferred_initial_step_size=None):
def __init__(self, preferred_initial_step_size=None, c1=1e-4, c2=0.9,
max_step_size=1e30, max_iterations=100,
max_zoom_iterations=100):
self.preferred_initial_step_size = preferred_initial_step_size
self.c1 = np.float(c1)
self.c2 = np.float(c2)
self.max_step_size = max_step_size
self.max_iterations = int(max_iterations)
self.max_zoom_iterations = int(max_zoom_iterations)
def perform_line_search(self, energy, pk, f_k_minus_1=None):
"""Find step size and advance.
"""Performs the first stage of the algorithm.
Determines a good step size and advances the current estimate
by this step size in the search direction.
It starts with a trial step size and it keeps increasing it until it
satisfies the strong Wolf conditions. It also performs the descent and
returns the optimal step length and the new energy.
Parameters
----------
......@@ -59,4 +172,244 @@ class LineSearch(NiftyMetaBase()):
bool
whether the line search was considered successful or not
"""
raise NotImplementedError
le_0 = LineEnergy(0., energy, pk, 0.)
maxstepsize = energy.longest_step(pk)
if maxstepsize is None:
maxstepsize = self.max_step_size
maxstepsize = min(maxstepsize, self.max_step_size)
# initialize the zero phis
old_phi_0 = f_k_minus_1
phi_0 = le_0.value
phiprime_0 = le_0.directional_derivative
if phiprime_0 == 0:
logger.warning(
"Directional derivative is zero; assuming convergence")
return energy, False
if phiprime_0 > 0:
logger.error("Error: search direction is not a descent direction")
return energy, False
# set alphas
alpha0 = 0.
phi_alpha0 = phi_0
phiprime_alpha0 = phiprime_0
if self.preferred_initial_step_size is not None:
alpha1 = self.preferred_initial_step_size
elif old_phi_0 is not None:
alpha1 = min(1.0, 1.01*2*(phi_0 - old_phi_0)/phiprime_0)
if alpha1 < 0:
alpha1 = 1.0
else:
alpha1 = 1.0/pk.norm()
alpha1 = min(alpha1, 0.99*maxstepsize)
# start the minimization loop
iteration_number = 0
while iteration_number < self.max_iterations:
iteration_number += 1
if alpha1 == 0:
return le_0.energy, False
le_alpha1 = le_0.at(alpha1)
phi_alpha1 = le_alpha1.value
if (phi_alpha1 > phi_0 + self.c1*alpha1*phiprime_0) or \
((phi_alpha1 >= phi_alpha0) and (iteration_number > 1)):
return self._zoom(alpha0, alpha1, phi_0, phiprime_0,
phi_alpha0, phiprime_alpha0, phi_alpha1,
le_0)
phiprime_alpha1 = le_alpha1.directional_derivative
if abs(phiprime_alpha1) <= -self.c2*phiprime_0:
return le_alpha1.energy, True
if phiprime_alpha1 >= 0:
return self._zoom(alpha1, alpha0, phi_0, phiprime_0,
phi_alpha1, phiprime_alpha1, phi_alpha0,
le_0)
# update alphas
alpha0, alpha1 = alpha1, min(2*alpha1, maxstepsize)
if alpha1 == maxstepsize:
logger.warning("max step size reached")
return le_alpha1.energy, False
phi_alpha0 = phi_alpha1
phiprime_alpha0 = phiprime_alpha1
logger.warning("max iterations reached")
return le_alpha1.energy, False
def _zoom(self, alpha_lo, alpha_hi, phi_0, phiprime_0,
phi_lo, phiprime_lo, phi_hi, le_0):
"""Performs the second stage of the line search algorithm.
If the first stage was not successful then the Zoom algorithm tries to
find a suitable step length by using bisection, quadratic, cubic
interpolation.
Parameters
----------
alpha_lo : float
A boundary for the step length interval.
Fulfills Wolfe condition 1.
alpha_hi : float
The other boundary for the step length interval.
phi_0 : float
Value of the energy at the starting point of the line search
algorithm.
phiprime_0 : float
directional derivative at the starting point of the line search
algorithm.
phi_lo : float
Value of the energy if we perform a step of length alpha_lo in
descent direction.
phiprime_lo : float
directional derivative at the new position if we perform a step of
length alpha_lo in descent direction.
phi_hi : float
Value of the energy if we perform a step of length alpha_hi in
descent direction.
Returns
-------
Energy
The new Energy object on the new position.
"""
cubic_delta = 0.2 # cubic interpolant checks
quad_delta = 0.1 # quadratic interpolant checks
alpha_recent = None
phi_recent = None
if phi_lo > phi_0 + self.c1*alpha_lo*phiprime_0:
raise ValueError("inconsistent data")
if phiprime_lo*(alpha_hi-alpha_lo) >= 0.:
raise ValueError("inconsistent data")
for i in range(self.max_zoom_iterations):
# assert phi_lo <= phi_0 + self.c1*alpha_lo*phiprime_0
# assert phiprime_lo*(alpha_hi-alpha_lo)<0.
delta_alpha = alpha_hi - alpha_lo
a, b = min(alpha_lo, alpha_hi), max(alpha_lo, alpha_hi)
# Try cubic interpolation
if i > 0:
cubic_check = cubic_delta * delta_alpha
alpha_j = self._cubicmin(alpha_lo, phi_lo, phiprime_lo,
alpha_hi, phi_hi,
alpha_recent, phi_recent)
# If cubic was not successful or not available, try quadratic
if (i == 0) or (alpha_j is None) or (alpha_j > b - cubic_check) or\
(alpha_j < a + cubic_check):
quad_check = quad_delta * delta_alpha
alpha_j = self._quadmin(alpha_lo, phi_lo, phiprime_lo,
alpha_hi, phi_hi)
# If quadratic was not successful, try bisection
if (alpha_j is None) or (alpha_j > b - quad_check) or \
(alpha_j < a + quad_check):
alpha_j = alpha_lo + 0.5*delta_alpha
# Check if the current value of alpha_j is already sufficient
le_alphaj = le_0.at(alpha_j)
phi_alphaj = le_alphaj.value
# If the first Wolfe condition is not met replace alpha_hi
# by alpha_j
if (phi_alphaj > phi_0 + self.c1*alpha_j*phiprime_0) or \
(phi_alphaj >= phi_lo):
alpha_recent, phi_recent = alpha_hi, phi_hi
alpha_hi, phi_hi = alpha_j, phi_alphaj
else:
phiprime_alphaj = le_alphaj.directional_derivative
# If the second Wolfe condition is met, return the result
if abs(phiprime_alphaj) <= -self.c2*phiprime_0:
return le_alphaj.energy, True
# If not, check the sign of the slope
if phiprime_alphaj*delta_alpha >= 0:
alpha_recent, phi_recent = alpha_hi, phi_hi
alpha_hi, phi_hi = alpha_lo, phi_lo
else:
alpha_recent, phi_recent = alpha_lo, phi_lo
# Replace alpha_lo by alpha_j
(alpha_lo, phi_lo, phiprime_lo) = (alpha_j, phi_alphaj,
phiprime_alphaj)
else:
logger.warning(
"The line search algorithm (zoom) did not converge.")
return le_alphaj.energy, False
def _cubicmin(self, a, fa, fpa, b, fb, c, fc):
"""Estimating the minimum with cubic interpolation.
Finds the minimizer for a cubic polynomial that goes through the
points (a,a), (b,fb), and (c,fc) with derivative at point a of fpa.
If no minimizer can be found return None
Parameters
----------
a, fa, fpa : float
abscissa, function value and derivative at first point
b, fb : float
abscissa and function value at second point
c, fc : float
abscissa and function value at third point
Returns
-------
xmin : float
Position of the approximated minimum.
"""
with np.errstate(divide='raise', over='raise', invalid='raise'):
try:
C = fpa
db = b - a
dc = c - a
denom = db * db * dc * dc * (db - dc)
d1 = np.empty((2, 2))
d1[0, 0] = dc * dc
d1[0, 1] = -(db*db)
d1[1, 0] = -(dc*dc*dc)
d1[1, 1] = db*db*db
[A, B] = np.dot(d1, np.asarray([fb - fa - C * db,
fc - fa - C * dc]).ravel())
A /= denom
B /= denom
radical = B * B - 3 * A * C
xmin = a + (-B + np.sqrt(radical)) / (3 * A)
except ArithmeticError:
return None
if not np.isfinite(xmin):
return None
return xmin
def _quadmin(self, a, fa, fpa, b, fb):
"""Estimating the minimum with quadratic interpolation.
Finds the minimizer for a quadratic polynomial that goes through
the points (a,fa), (b,fb) with derivative at point a of fpa.
Parameters
----------
a, fa, fpa : float
abscissa, function value and derivative at first point
b, fb : float
abscissa and function value at second point
Returns
-------
xmin : float
Position of the approximated minimum.
"""
with np.errstate(divide='raise', over='raise', invalid='raise'):
try:
db = b - a * 1.0
B = (fb - fa - fpa * db) / (db * db)
xmin = a - fpa / (2.0 * B)
except ArithmeticError:
return None
if not np.isfinite(xmin):
return None
return xmin
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
# Copyright(C) 2013-2018 Max-Planck-Society
#
# NIFTy is being developed at the Max-Planck-Institut fuer Astrophysik
# and financially supported by the Studienstiftung des deutschen Volkes.
from __future__ import absolute_import, division, print_function
import numpy as np
from ..compat import *
from ..logger import logger
from .line_energy import LineEnergy
from .line_search import LineSearch
class LineSearchStrongWolfe(LineSearch):
"""Class for finding a step size that satisfies the strong Wolfe
conditions.
Algorithm contains two stages. It begins with a trial step length and
keeps increasing it until it finds an acceptable step length or an
interval. If it does not satisfy the Wolfe conditions, it performs the Zoom
algorithm (second stage). By interpolating it decreases the size of the
interval until an acceptable step length is found.
Parameters
----------
c1 : float
Parameter for Armijo condition rule. (Default: 1e-4)
c2 : float
Parameter for curvature condition rule. (Default: 0.9)
max_step_size : float
Maximum step allowed in to be made in the descent direction.