Commit 89e27a11 authored by Theo Steininger's avatar Theo Steininger
Browse files

Merge branch 'vl_bfgs' into 'master'

Vl bfgs

See merge request !159
parents 841b7ecb 026e670e
Pipeline #14548 passed with stages
in 14 minutes and 1 second
...@@ -1087,17 +1087,12 @@ class Field(Loggable, Versionable, object): ...@@ -1087,17 +1087,12 @@ class Field(Loggable, Versionable, object):
return dotted.sum(spaces=spaces) return dotted.sum(spaces=spaces)
def norm(self): def norm(self):
""" Computes the Lq-norm of the field values. """ Computes the L2-norm of the field values.
Parameters
----------
q : scalar
Parameter q of the Lq-norm (default: 2).
Returns Returns
------- -------
norm : scalar norm : scalar
The Lq-norm of the field values. The L2-norm of the field values.
""" """
return np.sqrt(np.abs(self.vdot(x=self))) return np.sqrt(np.abs(self.vdot(x=self)))
......
...@@ -146,14 +146,14 @@ class DescentMinimizer(Loggable, object): ...@@ -146,14 +146,14 @@ class DescentMinimizer(Loggable, object):
break break
# current position is encoded in energy object # current position is encoded in energy object
descend_direction = self.get_descend_direction(energy) descent_direction = self.get_descent_direction(energy)
# compute the step length, which minimizes energy.value along the # compute the step length, which minimizes energy.value along the
# search direction # search direction
step_length, f_k, new_energy = \ step_length, f_k, new_energy = \
self.line_searcher.perform_line_search( self.line_searcher.perform_line_search(
energy=energy, energy=energy,
pk=descend_direction, pk=descent_direction,
f_k_minus_1=f_k_minus_1) f_k_minus_1=f_k_minus_1)
f_k_minus_1 = energy.value f_k_minus_1 = energy.value
...@@ -195,5 +195,5 @@ class DescentMinimizer(Loggable, object): ...@@ -195,5 +195,5 @@ class DescentMinimizer(Loggable, object):
return energy, convergence return energy, convergence
@abc.abstractmethod @abc.abstractmethod
def get_descend_direction(self, energy): def get_descent_direction(self, energy):
raise NotImplementedError raise NotImplementedError
...@@ -25,31 +25,31 @@ from nifty import LineEnergy ...@@ -25,31 +25,31 @@ from nifty import LineEnergy
class LineSearch(Loggable, object): class LineSearch(Loggable, object):
"""Class for determining the optimal step size along some descent direction. """Class for determining the optimal step size along some descent direction.
Initialize the line search procedure which can be used by a specific line Initialize the line search procedure which can be used by a specific line
search method. Its finds the step size in a specific direction in the search method. Its finds the step size in a specific direction in the
minimization process. minimization process.
Attributes Attributes
---------- ----------
line_energy : LineEnergy Object line_energy : LineEnergy Object
LineEnergy object from which we can extract energy at a specific point. LineEnergy object from which we can extract energy at a specific point.
f_k_minus_1 : Field f_k_minus_1 : Field
Value of the field at the k-1 iteration of the line search procedure. Value of the field at the k-1 iteration of the line search procedure.
prefered_initial_step_size : float preferred_initial_step_size : float
Initial guess for the step length. Initial guess for the step length.
""" """
__metaclass__ = abc.ABCMeta __metaclass__ = abc.ABCMeta
def __init__(self): def __init__(self):
self.line_energy = None self.line_energy = None
self.f_k_minus_1 = None self.f_k_minus_1 = None
self.prefered_initial_step_size = None self.preferred_initial_step_size = None
def _set_line_energy(self, energy, pk, f_k_minus_1=None): def _set_line_energy(self, energy, pk, f_k_minus_1=None):
"""Set the coordinates for a new line search. """Set the coordinates for a new line search.
...@@ -58,13 +58,13 @@ class LineSearch(Loggable, object): ...@@ -58,13 +58,13 @@ class LineSearch(Loggable, object):
---------- ----------
energy : Energy object energy : Energy object
Energy object from which we can calculate the energy, gradient and Energy object from which we can calculate the energy, gradient and
curvature at a specific point. curvature at a specific point.
pk : Field pk : Field
Unit vector pointing into the search direction. Unit vector pointing into the search direction.
f_k_minus_1 : float f_k_minus_1 : float
Value of the fuction (energy) which will be minimized at the k-1 Value of the fuction (energy) which will be minimized at the k-1
iteration of the line search procedure. (Default: None) iteration of the line search procedure. (Default: None)
""" """
self.line_energy = LineEnergy(position=0., self.line_energy = LineEnergy(position=0.,
energy=energy, energy=energy,
......
...@@ -24,9 +24,9 @@ from .line_search import LineSearch ...@@ -24,9 +24,9 @@ from .line_search import LineSearch
class LineSearchStrongWolfe(LineSearch): class LineSearchStrongWolfe(LineSearch):
"""Class for finding a step size that satisfies the strong Wolfe conditions. """Class for finding a step size that satisfies the strong Wolfe conditions.
Algorithm contains two stages. It begins whit a trial step length and it Algorithm contains two stages. It begins whit a trial step length and
keeps increasing the it until it finds an acceptable step length or an keeps increasing it until it finds an acceptable step length or an
interval. If it does not satisfy the Wolfe conditions it performs the Zoom interval. If it does not satisfy the Wolfe conditions, it performs the Zoom
algorithm (second stage). By interpolating it decreases the size of the algorithm (second stage). By interpolating it decreases the size of the
interval until an acceptable step length is found. interval until an acceptable step length is found.
...@@ -120,8 +120,8 @@ class LineSearchStrongWolfe(LineSearch): ...@@ -120,8 +120,8 @@ class LineSearchStrongWolfe(LineSearch):
# set alphas # set alphas
alpha0 = 0. alpha0 = 0.
if self.prefered_initial_step_size is not None: if self.preferred_initial_step_size is not None:
alpha1 = self.prefered_initial_step_size alpha1 = self.preferred_initial_step_size
elif old_phi_0 is not None and phiprime_0 != 0: elif old_phi_0 is not None and phiprime_0 != 0:
alpha1 = min(1.0, 1.01*2*(phi_0 - old_phi_0)/phiprime_0) alpha1 = min(1.0, 1.01*2*(phi_0 - old_phi_0)/phiprime_0)
if alpha1 < 0: if alpha1 < 0:
......
...@@ -32,9 +32,9 @@ class RelaxedNewton(DescentMinimizer): ...@@ -32,9 +32,9 @@ class RelaxedNewton(DescentMinimizer):
convergence_level=convergence_level, convergence_level=convergence_level,
iteration_limit=iteration_limit) iteration_limit=iteration_limit)
self.line_searcher.prefered_initial_step_size = 1. self.line_searcher.preferred_initial_step_size = 1.
def get_descend_direction(self, energy): def get_descent_direction(self, energy):
""" Calculates the descent direction according to a Newton scheme. """ Calculates the descent direction according to a Newton scheme.
The descent direction is determined by weighting the gradient at the The descent direction is determined by weighting the gradient at the
...@@ -50,12 +50,9 @@ class RelaxedNewton(DescentMinimizer): ...@@ -50,12 +50,9 @@ class RelaxedNewton(DescentMinimizer):
Returns Returns
------- -------
descend_direction : Field descent_direction : Field
Returns the descent direction with proposed step length. In a Returns the descent direction with proposed step length. In a
quadratic potential this corresponds to the optimal step. quadratic potential this corresponds to the optimal step.
""" """
gradient = energy.gradient return -energy.curvature.inverse_times(energy.gradient)
curvature = energy.curvature
descend_direction = curvature.inverse_times(gradient)
return descend_direction * -1
...@@ -20,7 +20,7 @@ from .descent_minimizer import DescentMinimizer ...@@ -20,7 +20,7 @@ from .descent_minimizer import DescentMinimizer
class SteepestDescent(DescentMinimizer): class SteepestDescent(DescentMinimizer):
def get_descend_direction(self, energy): def get_descent_direction(self, energy):
""" Implementation of the steepest descent minimization scheme. """ Implementation of the steepest descent minimization scheme.
Also known as 'gradient descent'. This algorithm simply follows the Also known as 'gradient descent'. This algorithm simply follows the
...@@ -34,10 +34,9 @@ class SteepestDescent(DescentMinimizer): ...@@ -34,10 +34,9 @@ class SteepestDescent(DescentMinimizer):
Returns Returns
------- -------
descend_direction : Field descent_direction : Field
Returns the descent direction. Returns the descent direction.
""" """
descend_direction = energy.gradient return -energy.gradient
return descend_direction * -1
...@@ -40,7 +40,7 @@ class VL_BFGS(DescentMinimizer): ...@@ -40,7 +40,7 @@ class VL_BFGS(DescentMinimizer):
self._information_store = None self._information_store = None
return super(VL_BFGS, self).__call__(energy) return super(VL_BFGS, self).__call__(energy)
def get_descend_direction(self, energy): def get_descent_direction(self, energy):
"""Implementation of the Vector-free L-BFGS minimization scheme. """Implementation of the Vector-free L-BFGS minimization scheme.
Find the descent direction by using the inverse Hessian. Find the descent direction by using the inverse Hessian.
...@@ -57,7 +57,7 @@ class VL_BFGS(DescentMinimizer): ...@@ -57,7 +57,7 @@ class VL_BFGS(DescentMinimizer):
Returns Returns
------- -------
descend_direction : Field descent_direction : Field
Returns the descent direction. Returns the descent direction.
References References
...@@ -80,11 +80,11 @@ class VL_BFGS(DescentMinimizer): ...@@ -80,11 +80,11 @@ class VL_BFGS(DescentMinimizer):
b = self._information_store.b b = self._information_store.b
delta = self._information_store.delta delta = self._information_store.delta
descend_direction = delta[0] * b[0] descent_direction = delta[0] * b[0]
for i in xrange(1, len(delta)): for i in xrange(1, len(delta)):
descend_direction += delta[i] * b[i] descent_direction += delta[i] * b[i]
return descend_direction return descent_direction
class InformationStore(object): class InformationStore(object):
...@@ -104,34 +104,35 @@ class InformationStore(object): ...@@ -104,34 +104,35 @@ class InformationStore(object):
max_history_length : integer max_history_length : integer
Maximum number of stored past updates. Maximum number of stored past updates.
s : List s : List
List of past position differences, which are Fields. Circular buffer of past position differences, which are Fields.
y : List y : List
List of past gradient differences, which are Fields. Circular buffer of past gradient differences, which are Fields.
last_x : Field last_x : Field
Initial position in variable space. Latest position in variable space.
last_gradient : Field last_gradient : Field
Gradient at initial position. Gradient at latest position.
k : integer k : integer
Number of currently stored past updates. Number of updates that have taken place
_ss_store : dictionary ss : numpy.ndarray
Dictionary of scalar products between different elements of s. 2D circular buffer of scalar products between different elements of s.
_sy_store : dictionary sy : numpy.ndarray
Dictionary of scalar products between elements of s and y. 2D circular buffer of scalar products between elements of s and y.
_yy_store : dictionary yy : numpy.ndarray
Dictionary of scalar products between different elements of y. 2D circular buffer of scalar products between different elements of y.
""" """
def __init__(self, max_history_length, x0, gradient): def __init__(self, max_history_length, x0, gradient):
self.max_history_length = max_history_length self.max_history_length = max_history_length
self.s = LimitedList(max_history_length) self.s = [None]*max_history_length
self.y = LimitedList(max_history_length) self.y = [None]*max_history_length
self.last_x = x0.copy() self.last_x = x0.copy()
self.last_gradient = gradient.copy() self.last_gradient = gradient.copy()
self.k = 0 self.k = 0
self._ss_store = {} mmax = max_history_length
self._sy_store = {} self.ss = np.empty((mmax, mmax), dtype=np.float64)
self._yy_store = {} self.sy = np.empty((mmax, mmax), dtype=np.float64)
self.yy = np.empty((mmax, mmax), dtype=np.float64)
@property @property
def history_length(self): def history_length(self):
...@@ -152,15 +153,16 @@ class InformationStore(object): ...@@ -152,15 +153,16 @@ class InformationStore(object):
""" """
result = [] result = []
m = self.history_length m = self.history_length
mmax = self.max_history_length
k = self.k k = self.k
s = self.s s = self.s
for i in xrange(m): for i in xrange(m):
result.append(s[k-m+i]) result.append(s[(k-m+i) % mmax])
y = self.y y = self.y
for i in xrange(m): for i in xrange(m):
result.append(y[k-m+i]) result.append(y[(k-m+i) % mmax])
result.append(self.last_gradient) result.append(self.last_gradient)
...@@ -180,28 +182,36 @@ class InformationStore(object): ...@@ -180,28 +182,36 @@ class InformationStore(object):
""" """
m = self.history_length m = self.history_length
mmax = self.max_history_length
k = self.k k = self.k
result = np.empty((2*m+1, 2*m+1), dtype=np.float) result = np.empty((2*m+1, 2*m+1), dtype=np.float)
# update the stores
k1 = (k-1) % mmax
for i in xrange(m): for i in xrange(m):
for j in xrange(m): kmi = (k-m+i) % mmax
result[i, j] = self.ss_store(k-m+i, k-m+j) self.ss[kmi, k1] = self.ss[k1, kmi] = self.s[kmi].vdot(self.s[k1])
self.yy[kmi, k1] = self.yy[k1, kmi] = self.y[kmi].vdot(self.y[k1])
sy_ij = self.sy_store(k-m+i, k-m+j) self.sy[kmi, k1] = self.s[kmi].vdot(self.y[k1])
result[i, m+j] = sy_ij for j in xrange(m-1):
result[m+j, i] = sy_ij kmj = (k-m+j) % mmax
self.sy[k1, kmj] = self.s[k1].vdot(self.y[kmj])
result[m+i, m+j] = self.yy_store(k-m+i, k-m+j) for i in xrange(m):
kmi = (k-m+i) % mmax
for j in xrange(m):
kmj = (k-m+j) % mmax
result[i, j] = self.ss[kmi, kmj]
result[i, m+j] = result[m+j, i] = self.sy[kmi, kmj]
result[m+i, m+j] = self.yy[kmi, kmj]
sgrad_i = self.sgrad_store(k-m+i) sgrad_i = self.s[kmi].vdot(self.last_gradient)
result[2*m, i] = sgrad_i result[2*m, i] = result[i, 2*m] = sgrad_i
result[i, 2*m] = sgrad_i
ygrad_i = self.ygrad_store(k-m+i) ygrad_i = self.y[kmi].vdot(self.last_gradient)
result[2*m, m+i] = ygrad_i result[2*m, m+i] = result[m+i, 2*m] = ygrad_i
result[m+i, 2*m] = ygrad_i
result[2*m, 2*m] = self.gradgrad_store() result[2*m, 2*m] = self.last_gradient.norm()
return result return result
...@@ -231,185 +241,25 @@ class InformationStore(object): ...@@ -231,185 +241,25 @@ class InformationStore(object):
for i in xrange(2*m+1): for i in xrange(2*m+1):
delta[i] *= b_dot_b[m-1, 2*m-1]/b_dot_b[2*m-1, 2*m-1] delta[i] *= b_dot_b[m-1, 2*m-1]/b_dot_b[2*m-1, 2*m-1]
for j in xrange(m-1, -1, -1): for j in xrange(m):
delta_b_b = sum([delta[l]*b_dot_b[m+j, l] for l in xrange(2*m+1)]) delta_b_b = sum([delta[l]*b_dot_b[m+j, l] for l in xrange(2*m+1)])
beta = delta_b_b/b_dot_b[j, m+j] beta = delta_b_b/b_dot_b[j, m+j]
delta[j] += (alpha[j] - beta) delta[j] += (alpha[j] - beta)
return delta return delta
def ss_store(self, i, j):
"""Updates the dictionary _ss_store with a new scalar product.
Returns the scalar product of s_i and s_j.
Parameters
----------
i : integer
s index.
j : integer
s index.
Returns
-------
_ss_store[key] : float
Scalar product of s_i and s_j.
"""
key = tuple(sorted((i, j)))
if key not in self._ss_store:
self._ss_store[key] = self.s[i].vdot(self.s[j])
return self._ss_store[key]
def sy_store(self, i, j):
"""Updates the dictionary _sy_store with a new scalar product.
Returns the scalar product of s_i and y_j.
Parameters
----------
i : integer
s index.
j : integer
y index.
Returns
-------
_sy_store[key] : float
Scalar product of s_i and y_j.
"""
key = (i, j)
if key not in self._sy_store:
self._sy_store[key] = self.s[i].vdot(self.y[j])
return self._sy_store[key]
def yy_store(self, i, j):
"""Updates the dictionary _yy_store with a new scalar product.
Returns the scalar product of y_i and y_j.
Parameters
----------
i : integer
y index.
j : integer
y index.
Returns
------
_yy_store[key] : float
Scalar product of y_i and y_j.
"""
key = tuple(sorted((i, j)))
if key not in self._yy_store:
self._yy_store[key] = self.y[i].vdot(self.y[j])
return self._yy_store[key]
def sgrad_store(self, i):
"""Returns scalar product between s_i and gradient on initial position.
Returns
-------
scalar product : float
Scalar product.
"""
return self.s[i].vdot(self.last_gradient)
def ygrad_store(self, i):
"""Returns scalar product between y_i and gradient on initial position.
Returns
-------
scalar product : float
Scalar product.
"""
return self.y[i].vdot(self.last_gradient)
def gradgrad_store(self):
"""Returns scalar product of gradient on initial position with itself.
Returns
-------
scalar product : float
Scalar product.
"""
return self.last_gradient.vdot(self.last_gradient)
def add_new_point(self, x, gradient): def add_new_point(self, x, gradient):
"""Updates the s list and y list. """Updates the s list and y list.
Calculates the new position and gradient differences and adds them to Calculates the new position and gradient differences and enters them
the respective list. into the respective list.
""" """
self.k += 1 mmax = self.max_history_length
self.s[self.k % mmax] = x - self.last_x
new_s = x - self.last_x self.y[self.k % mmax] = gradient - self.last_gradient
self.s.add(new_s)
new_y = gradient - self.last_gradient
self.y.add(new_y)
self.last_x = x.copy() self.last_x = x.copy()
self.last_gradient = gradient.copy() self.last_gradient = gradient.copy()
self.k += 1
class LimitedList(object):
"""Class for creating a list of limited length.
Parameters
----------
history_length : integer
Maximum number of stored past updates.
Attributes
----------
history_length : integer