Commit 519aba36 authored by Martin Reinecke's avatar Martin Reinecke

simplify descent minimization

parent 89ff8afe
Pipeline #16532 passed with stage
in 19 minutes and 15 seconds
...@@ -66,12 +66,9 @@ class Energy(Loggable, object): ...@@ -66,12 +66,9 @@ class Energy(Loggable, object):
__metaclass__ = NiftyMeta __metaclass__ = NiftyMeta
def __init__(self, position): def __init__(self, position):
super(Energy, self).__init__()
self._cache = {} self._cache = {}
try: self._position = position.copy()
position = position.copy()
except AttributeError:
pass
self._position = position
def at(self, position): def at(self, position):
""" Initializes and returns a new Energy object at the new position. """ Initializes and returns a new Energy object at the new position.
......
...@@ -68,12 +68,16 @@ class LineEnergy(object): ...@@ -68,12 +68,16 @@ class LineEnergy(object):
""" """
def __init__(self, line_position, energy, line_direction, offset=0.): def __init__(self, line_position, energy, line_direction, offset=0.):
super(LineEnergy, self).__init__()
self._line_position = float(line_position) self._line_position = float(line_position)
self._line_direction = line_direction self._line_direction = line_direction
pos = energy.position \ if self._line_position==float(offset):
+ (self._line_position-float(offset))*self._line_direction self.energy = energy
self.energy = energy.at(position=pos) else:
pos = energy.position \
+ (self._line_position-float(offset))*self._line_direction
self.energy = energy.at(position=pos)
def at(self, line_position): def at(self, line_position):
""" Returns LineEnergy at new position, memorizing the zero point. """ Returns LineEnergy at new position, memorizing the zero point.
......
...@@ -123,7 +123,6 @@ class DescentMinimizer(Loggable, object): ...@@ -123,7 +123,6 @@ class DescentMinimizer(Loggable, object):
convergence = 0 convergence = 0
f_k_minus_1 = None f_k_minus_1 = None
step_length = 0
iteration_number = 1 iteration_number = 1
while True: while True:
...@@ -150,7 +149,7 @@ class DescentMinimizer(Loggable, object): ...@@ -150,7 +149,7 @@ class DescentMinimizer(Loggable, object):
# compute the step length, which minimizes energy.value along the # compute the step length, which minimizes energy.value along the
# search direction # search direction
try: try:
step_length, f_k, new_energy = \ new_energy = \
self.line_searcher.perform_line_search( self.line_searcher.perform_line_search(
energy=energy, energy=energy,
pk=descent_direction, pk=descent_direction,
...@@ -160,12 +159,10 @@ class DescentMinimizer(Loggable, object): ...@@ -160,12 +159,10 @@ class DescentMinimizer(Loggable, object):
"Stopping because of RuntimeError in line-search") "Stopping because of RuntimeError in line-search")
break break
if f_k_minus_1 is None:
delta = 1e30
else:
delta = (abs(f_k-f_k_minus_1) /
max(abs(f_k), abs(f_k_minus_1), 1.))
f_k_minus_1 = energy.value f_k_minus_1 = energy.value
f_k = new_energy.value
delta = (abs(f_k-f_k_minus_1) /
max(abs(f_k), abs(f_k_minus_1), 1.))
# check if new energy value is bigger than old energy value # check if new energy value is bigger than old energy value
if (new_energy.value - energy.value) > 0: if (new_energy.value - energy.value) > 0:
self.logger.info("Line search algorithm returned a new energy " self.logger.info("Line search algorithm returned a new energy "
...@@ -174,9 +171,9 @@ class DescentMinimizer(Loggable, object): ...@@ -174,9 +171,9 @@ class DescentMinimizer(Loggable, object):
energy = new_energy energy = new_energy
# check convergence # check convergence
self.logger.debug("Iteration:%08u step_length=%3.1E " self.logger.debug("Iteration:%08u "
"delta=%3.1E energy=%3.1E" % "delta=%3.1E energy=%3.1E" %
(iteration_number, step_length, delta, (iteration_number, delta,
energy.value)) energy.value))
if delta == 0: if delta == 0:
convergence = self.convergence_level + 2 convergence = self.convergence_level + 2
......
...@@ -34,8 +34,6 @@ class LineSearch(Loggable, object): ...@@ -34,8 +34,6 @@ class LineSearch(Loggable, object):
---------- ----------
line_energy : LineEnergy Object line_energy : LineEnergy Object
LineEnergy object from which we can extract energy at a specific point. LineEnergy object from which we can extract energy at a specific point.
f_k_minus_1 : Field
Value of the field at the k-1 iteration of the line search procedure.
preferred_initial_step_size : float preferred_initial_step_size : float
Initial guess for the step length. Initial guess for the step length.
...@@ -44,32 +42,8 @@ class LineSearch(Loggable, object): ...@@ -44,32 +42,8 @@ class LineSearch(Loggable, object):
__metaclass__ = abc.ABCMeta __metaclass__ = abc.ABCMeta
def __init__(self): def __init__(self):
self.line_energy = None
self.f_k_minus_1 = None
self.preferred_initial_step_size = None self.preferred_initial_step_size = None
def _set_line_energy(self, energy, pk, f_k_minus_1=None):
"""Set the coordinates for a new line search.
Parameters
----------
energy : Energy object
Energy object from which we can calculate the energy, gradient and
curvature at a specific point.
pk : Field
Unit vector pointing into the search direction.
f_k_minus_1 : float
Value of the fuction (energy) which will be minimized at the k-1
iteration of the line search procedure. (Default: None)
"""
self.line_energy = LineEnergy(line_position=0.,
energy=energy,
line_direction=pk)
if f_k_minus_1 is not None:
f_k_minus_1 = f_k_minus_1.copy()
self.f_k_minus_1 = f_k_minus_1
@abc.abstractmethod @abc.abstractmethod
def perform_line_search(self, energy, pk, f_k_minus_1=None): def perform_line_search(self, energy, pk, f_k_minus_1=None):
raise NotImplementedError raise NotImplementedError
...@@ -19,12 +19,13 @@ ...@@ -19,12 +19,13 @@
import numpy as np import numpy as np
from .line_search import LineSearch from .line_search import LineSearch
from ...energies import LineEnergy
class LineSearchStrongWolfe(LineSearch): class LineSearchStrongWolfe(LineSearch):
"""Class for finding a step size that satisfies the strong Wolfe conditions. """Class for finding a step size that satisfies the strong Wolfe conditions.
Algorithm contains two stages. It begins whit a trial step length and Algorithm contains two stages. It begins with a trial step length and
keeps increasing it until it finds an acceptable step length or an keeps increasing it until it finds an acceptable step length or an
interval. If it does not satisfy the Wolfe conditions, it performs the Zoom interval. If it does not satisfy the Wolfe conditions, it performs the Zoom
algorithm (second stage). By interpolating it decreases the size of the algorithm (second stage). By interpolating it decreases the size of the
...@@ -77,8 +78,8 @@ class LineSearchStrongWolfe(LineSearch): ...@@ -77,8 +78,8 @@ class LineSearchStrongWolfe(LineSearch):
"""Performs the first stage of the algorithm. """Performs the first stage of the algorithm.
It starts with a trial step size and it keeps increasing it until it It starts with a trial step size and it keeps increasing it until it
satisfy the strong Wolf conditions. It also performs the descent and satisfies the strong Wolf conditions. It also performs the descent and
returns the optimal step length and the new enrgy. returns the optimal step length and the new energy.
Parameters Parameters
---------- ----------
...@@ -86,29 +87,22 @@ class LineSearchStrongWolfe(LineSearch): ...@@ -86,29 +87,22 @@ class LineSearchStrongWolfe(LineSearch):
Energy object from which we will calculate the energy and the Energy object from which we will calculate the energy and the
gradient at a specific point. gradient at a specific point.
pk : Field pk : Field
Unit vector pointing into the search direction. Vector pointing into the search direction.
f_k_minus_1 : float f_k_minus_1 : float
Value of the fuction (which is being minimized) at the k-1 Value of the fuction (which is being minimized) at the k-1
iteration of the line search procedure. (Default: None) iteration of the line search procedure. (Default: None)
Returns Returns
------- -------
alpha_star : float
The optimal step length in the descent direction.
phi_star : float
Value of the energy after the performed descent.
energy_star : Energy object energy_star : Energy object
The new Energy object on the new position. The new Energy object on the new position.
""" """
self._set_line_energy(energy, pk, f_k_minus_1=f_k_minus_1) le_0 = LineEnergy(0., energy, pk, 0.)
max_step_size = self.max_step_size
max_iterations = self.max_iterations
# initialize the zero phis # initialize the zero phis
old_phi_0 = self.f_k_minus_1 old_phi_0 = f_k_minus_1
le_0 = self.line_energy.at(0)
phi_0 = le_0.value phi_0 = le_0.value
phiprime_0 = le_0.directional_derivative phiprime_0 = le_0.directional_derivative
if phiprime_0 >= 0: if phiprime_0 >= 0:
...@@ -117,6 +111,9 @@ class LineSearchStrongWolfe(LineSearch): ...@@ -117,6 +111,9 @@ class LineSearchStrongWolfe(LineSearch):
# set alphas # set alphas
alpha0 = 0. alpha0 = 0.
phi_alpha0 = phi_0
phiprime_alpha0 = phiprime_0
if self.preferred_initial_step_size is not None: if self.preferred_initial_step_size is not None:
alpha1 = self.preferred_initial_step_size alpha1 = self.preferred_initial_step_size
elif old_phi_0 is not None and phiprime_0 != 0: elif old_phi_0 is not None and phiprime_0 != 0:
...@@ -126,73 +123,48 @@ class LineSearchStrongWolfe(LineSearch): ...@@ -126,73 +123,48 @@ class LineSearchStrongWolfe(LineSearch):
else: else:
alpha1 = 1.0 alpha1 = 1.0
# give the alpha0 phis the right init value
phi_alpha0 = phi_0
phiprime_alpha0 = phiprime_0
# start the minimization loop # start the minimization loop
for i in xrange(max_iterations): for i in xrange(self.max_iterations):
le_alpha1 = self.line_energy.at(alpha1)
phi_alpha1 = le_alpha1.value
if alpha1 == 0: if alpha1 == 0:
self.logger.warn("Increment size became 0.") self.logger.warn("Increment size became 0.")
alpha_star = 0. return le_0.energy
phi_star = phi_0
le_star = le_0 le_alpha1 = le_0.at(alpha1)
break phi_alpha1 = le_alpha1.value
if (phi_alpha1 > phi_0 + self.c1*alpha1*phiprime_0) or \ if (phi_alpha1 > phi_0 + self.c1*alpha1*phiprime_0) or \
((phi_alpha1 >= phi_alpha0) and (i > 0)): ((phi_alpha1 >= phi_alpha0) and (i > 0)):
(alpha_star, phi_star, le_star) = self._zoom( le_star = self._zoom(alpha0, alpha1, phi_0, phiprime_0,
alpha0, alpha1, phi_alpha0, phiprime_alpha0, phi_alpha1,
phi_0, phiprime_0, le_0)
phi_alpha0, return le_star.energy
phiprime_alpha0,
phi_alpha1)
break
phiprime_alpha1 = le_alpha1.directional_derivative phiprime_alpha1 = le_alpha1.directional_derivative
if abs(phiprime_alpha1) <= -self.c2*phiprime_0: if abs(phiprime_alpha1) <= -self.c2*phiprime_0:
alpha_star = alpha1 return le_alpha1.energy
phi_star = phi_alpha1
le_star = le_alpha1
break
if phiprime_alpha1 >= 0: if phiprime_alpha1 >= 0:
(alpha_star, phi_star, le_star) = self._zoom( le_star = self._zoom(alpha1, alpha0, phi_0, phiprime_0,
alpha1, alpha0, phi_alpha1, phiprime_alpha1, phi_alpha0,
phi_0, phiprime_0, le_0)
phi_alpha1, return le_star.energy
phiprime_alpha1,
phi_alpha0)
break
# update alphas # update alphas
alpha0, alpha1 = alpha1, min(2*alpha1, max_step_size) alpha0, alpha1 = alpha1, min(2*alpha1, self.max_step_size)
if alpha1 == max_step_size: if alpha1 == self.max_step_size:
print "reached max step size, bailing out" print "reached max step size, bailing out"
alpha_star = alpha1 return le_alpha1.energy
phi_star = phi_alpha1
le_star = le_alpha1
break
phi_alpha0 = phi_alpha1 phi_alpha0 = phi_alpha1
phiprime_alpha0 = phiprime_alpha1 phiprime_alpha0 = phiprime_alpha1
else: else:
# max_iterations was reached # max_iterations was reached
alpha_star = alpha1
phi_star = phi_alpha1
le_star = le_alpha1
self.logger.error("The line search algorithm did not converge.") self.logger.error("The line search algorithm did not converge.")
return le_alpha1.energy
# extract the full energy from the line_energy
energy_star = le_star.energy
direction_length = pk.norm()
step_length = alpha_star * direction_length
return step_length, phi_star, energy_star
def _zoom(self, alpha_lo, alpha_hi, phi_0, phiprime_0, def _zoom(self, alpha_lo, alpha_hi, phi_0, phiprime_0,
phi_lo, phiprime_lo, phi_hi): phi_lo, phiprime_lo, phi_hi, le_0):
"""Performs the second stage of the line search algorithm. """Performs the second stage of the line search algorithm.
If the first stage was not successful then the Zoom algorithm tries to If the first stage was not successful then the Zoom algorithm tries to
...@@ -223,32 +195,23 @@ class LineSearchStrongWolfe(LineSearch): ...@@ -223,32 +195,23 @@ class LineSearchStrongWolfe(LineSearch):
Returns Returns
------- -------
alpha_star : float
The optimal step length in the descent direction.
phi_star : float
Value of the energy after the performed descent.
energy_star : Energy object energy_star : Energy object
The new Energy object on the new position. The new Energy object on the new position.
""" """
max_iterations = self.max_zoom_iterations
# define the cubic and quadratic interpolant checks # define the cubic and quadratic interpolant checks
cubic_delta = 0.2 # cubic cubic_delta = 0.2 # cubic
quad_delta = 0.1 # quadratic quad_delta = 0.1 # quadratic
phiprime_alphaj = 0.
alpha_recent = None alpha_recent = None
phi_recent = None phi_recent = None
assert phi_lo <= phi_0 + self.c1*alpha_lo*phiprime_0 assert phi_lo <= phi_0 + self.c1*alpha_lo*phiprime_0
assert phiprime_lo*(alpha_hi-alpha_lo) < 0. assert phiprime_lo*(alpha_hi-alpha_lo) < 0.
for i in xrange(max_iterations): for i in xrange(self.max_zoom_iterations):
# assert phi_lo <= phi_0 + self.c1*alpha_lo*phiprime_0 # assert phi_lo <= phi_0 + self.c1*alpha_lo*phiprime_0
# assert phiprime_lo*(alpha_hi-alpha_lo)<0. # assert phiprime_lo*(alpha_hi-alpha_lo)<0.
delta_alpha = alpha_hi - alpha_lo delta_alpha = alpha_hi - alpha_lo
if delta_alpha < 0: a, b = min(alpha_lo, alpha_hi), max(alpha_lo, alpha_hi)
a, b = alpha_hi, alpha_lo
else:
a, b = alpha_lo, alpha_hi
# Try cubic interpolation # Try cubic interpolation
if i > 0: if i > 0:
...@@ -268,12 +231,12 @@ class LineSearchStrongWolfe(LineSearch): ...@@ -268,12 +231,12 @@ class LineSearchStrongWolfe(LineSearch):
alpha_j = alpha_lo + 0.5*delta_alpha alpha_j = alpha_lo + 0.5*delta_alpha
# Check if the current value of alpha_j is already sufficient # Check if the current value of alpha_j is already sufficient
le_alphaj = self.line_energy.at(alpha_j) le_alphaj = le_0.at(alpha_j)
phi_alphaj = le_alphaj.value phi_alphaj = le_alphaj.value
# If the first Wolfe condition is not met replace alpha_hi # If the first Wolfe condition is not met replace alpha_hi
# by alpha_j # by alpha_j
if (phi_alphaj > phi_0 + self.c1*alpha_j*phiprime_0) or\ if (phi_alphaj > phi_0 + self.c1*alpha_j*phiprime_0) or \
(phi_alphaj >= phi_lo): (phi_alphaj >= phi_lo):
alpha_recent, phi_recent = alpha_hi, phi_hi alpha_recent, phi_recent = alpha_hi, phi_hi
alpha_hi, phi_hi = alpha_j, phi_alphaj alpha_hi, phi_hi = alpha_j, phi_alphaj
...@@ -281,10 +244,7 @@ class LineSearchStrongWolfe(LineSearch): ...@@ -281,10 +244,7 @@ class LineSearchStrongWolfe(LineSearch):
phiprime_alphaj = le_alphaj.directional_derivative phiprime_alphaj = le_alphaj.directional_derivative
# If the second Wolfe condition is met, return the result # If the second Wolfe condition is met, return the result
if abs(phiprime_alphaj) <= -self.c2*phiprime_0: if abs(phiprime_alphaj) <= -self.c2*phiprime_0:
alpha_star = alpha_j return le_alphaj
phi_star = phi_alphaj
le_star = le_alphaj
break
# If not, check the sign of the slope # If not, check the sign of the slope
if phiprime_alphaj*delta_alpha >= 0: if phiprime_alphaj*delta_alpha >= 0:
alpha_recent, phi_recent = alpha_hi, phi_hi alpha_recent, phi_recent = alpha_hi, phi_hi
...@@ -296,12 +256,9 @@ class LineSearchStrongWolfe(LineSearch): ...@@ -296,12 +256,9 @@ class LineSearchStrongWolfe(LineSearch):
phiprime_alphaj) phiprime_alphaj)
else: else:
alpha_star, phi_star, le_star = \
alpha_j, phi_alphaj, le_alphaj
self.logger.error("The line search algorithm (zoom) did not " self.logger.error("The line search algorithm (zoom) did not "
"converge.") "converge.")
return le_alphaj
return alpha_star, phi_star, le_star
def _cubicmin(self, a, fa, fpa, b, fb, c, fc): def _cubicmin(self, a, fa, fpa, b, fb, c, fc):
"""Estimating the minimum with cubic interpolation. """Estimating the minimum with cubic interpolation.
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment