Commit 89e27a11 authored by Theo Steininger's avatar Theo Steininger

Merge branch 'vl_bfgs' into 'master'

Vl bfgs

See merge request !159
parents 841b7ecb 026e670e
Pipeline #14548 passed with stages
in 14 minutes and 1 second
......@@ -1087,17 +1087,12 @@ class Field(Loggable, Versionable, object):
return dotted.sum(spaces=spaces)
def norm(self):
""" Computes the Lq-norm of the field values.
Parameters
----------
q : scalar
Parameter q of the Lq-norm (default: 2).
""" Computes the L2-norm of the field values.
Returns
-------
norm : scalar
The Lq-norm of the field values.
The L2-norm of the field values.
"""
return np.sqrt(np.abs(self.vdot(x=self)))
......
......@@ -146,14 +146,14 @@ class DescentMinimizer(Loggable, object):
break
# current position is encoded in energy object
descend_direction = self.get_descend_direction(energy)
descent_direction = self.get_descent_direction(energy)
# compute the step length, which minimizes energy.value along the
# search direction
step_length, f_k, new_energy = \
self.line_searcher.perform_line_search(
energy=energy,
pk=descend_direction,
pk=descent_direction,
f_k_minus_1=f_k_minus_1)
f_k_minus_1 = energy.value
......@@ -195,5 +195,5 @@ class DescentMinimizer(Loggable, object):
return energy, convergence
@abc.abstractmethod
def get_descend_direction(self, energy):
def get_descent_direction(self, energy):
raise NotImplementedError
......@@ -25,31 +25,31 @@ from nifty import LineEnergy
class LineSearch(Loggable, object):
"""Class for determining the optimal step size along some descent direction.
Initialize the line search procedure which can be used by a specific line
search method. Its finds the step size in a specific direction in the
minimization process.
Attributes
----------
line_energy : LineEnergy Object
LineEnergy object from which we can extract energy at a specific point.
f_k_minus_1 : Field
Value of the field at the k-1 iteration of the line search procedure.
prefered_initial_step_size : float
preferred_initial_step_size : float
Initial guess for the step length.
"""
__metaclass__ = abc.ABCMeta
def __init__(self):
self.line_energy = None
self.f_k_minus_1 = None
self.prefered_initial_step_size = None
self.preferred_initial_step_size = None
def _set_line_energy(self, energy, pk, f_k_minus_1=None):
"""Set the coordinates for a new line search.
......@@ -58,13 +58,13 @@ class LineSearch(Loggable, object):
----------
energy : Energy object
Energy object from which we can calculate the energy, gradient and
curvature at a specific point.
curvature at a specific point.
pk : Field
Unit vector pointing into the search direction.
f_k_minus_1 : float
Value of the fuction (energy) which will be minimized at the k-1
Value of the fuction (energy) which will be minimized at the k-1
iteration of the line search procedure. (Default: None)
"""
self.line_energy = LineEnergy(position=0.,
energy=energy,
......
......@@ -24,9 +24,9 @@ from .line_search import LineSearch
class LineSearchStrongWolfe(LineSearch):
"""Class for finding a step size that satisfies the strong Wolfe conditions.
Algorithm contains two stages. It begins whit a trial step length and it
keeps increasing the it until it finds an acceptable step length or an
interval. If it does not satisfy the Wolfe conditions it performs the Zoom
Algorithm contains two stages. It begins whit a trial step length and
keeps increasing it until it finds an acceptable step length or an
interval. If it does not satisfy the Wolfe conditions, it performs the Zoom
algorithm (second stage). By interpolating it decreases the size of the
interval until an acceptable step length is found.
......@@ -120,8 +120,8 @@ class LineSearchStrongWolfe(LineSearch):
# set alphas
alpha0 = 0.
if self.prefered_initial_step_size is not None:
alpha1 = self.prefered_initial_step_size
if self.preferred_initial_step_size is not None:
alpha1 = self.preferred_initial_step_size
elif old_phi_0 is not None and phiprime_0 != 0:
alpha1 = min(1.0, 1.01*2*(phi_0 - old_phi_0)/phiprime_0)
if alpha1 < 0:
......
......@@ -32,9 +32,9 @@ class RelaxedNewton(DescentMinimizer):
convergence_level=convergence_level,
iteration_limit=iteration_limit)
self.line_searcher.prefered_initial_step_size = 1.
self.line_searcher.preferred_initial_step_size = 1.
def get_descend_direction(self, energy):
def get_descent_direction(self, energy):
""" Calculates the descent direction according to a Newton scheme.
The descent direction is determined by weighting the gradient at the
......@@ -50,12 +50,9 @@ class RelaxedNewton(DescentMinimizer):
Returns
-------
descend_direction : Field
descent_direction : Field
Returns the descent direction with proposed step length. In a
quadratic potential this corresponds to the optimal step.
"""
gradient = energy.gradient
curvature = energy.curvature
descend_direction = curvature.inverse_times(gradient)
return descend_direction * -1
return -energy.curvature.inverse_times(energy.gradient)
......@@ -20,7 +20,7 @@ from .descent_minimizer import DescentMinimizer
class SteepestDescent(DescentMinimizer):
def get_descend_direction(self, energy):
def get_descent_direction(self, energy):
""" Implementation of the steepest descent minimization scheme.
Also known as 'gradient descent'. This algorithm simply follows the
......@@ -34,10 +34,9 @@ class SteepestDescent(DescentMinimizer):
Returns
-------
descend_direction : Field
descent_direction : Field
Returns the descent direction.
"""
descend_direction = energy.gradient
return descend_direction * -1
return -energy.gradient
......@@ -40,7 +40,7 @@ class VL_BFGS(DescentMinimizer):
self._information_store = None
return super(VL_BFGS, self).__call__(energy)
def get_descend_direction(self, energy):
def get_descent_direction(self, energy):
"""Implementation of the Vector-free L-BFGS minimization scheme.
Find the descent direction by using the inverse Hessian.
......@@ -57,7 +57,7 @@ class VL_BFGS(DescentMinimizer):
Returns
-------
descend_direction : Field
descent_direction : Field
Returns the descent direction.
References
......@@ -80,11 +80,11 @@ class VL_BFGS(DescentMinimizer):
b = self._information_store.b
delta = self._information_store.delta
descend_direction = delta[0] * b[0]
descent_direction = delta[0] * b[0]
for i in xrange(1, len(delta)):
descend_direction += delta[i] * b[i]
descent_direction += delta[i] * b[i]
return descend_direction
return descent_direction
class InformationStore(object):
......@@ -104,34 +104,35 @@ class InformationStore(object):
max_history_length : integer
Maximum number of stored past updates.
s : List
List of past position differences, which are Fields.
Circular buffer of past position differences, which are Fields.
y : List
List of past gradient differences, which are Fields.
Circular buffer of past gradient differences, which are Fields.
last_x : Field
Initial position in variable space.
Latest position in variable space.
last_gradient : Field
Gradient at initial position.
Gradient at latest position.
k : integer
Number of currently stored past updates.
_ss_store : dictionary
Dictionary of scalar products between different elements of s.
_sy_store : dictionary
Dictionary of scalar products between elements of s and y.
_yy_store : dictionary
Dictionary of scalar products between different elements of y.
Number of updates that have taken place
ss : numpy.ndarray
2D circular buffer of scalar products between different elements of s.
sy : numpy.ndarray
2D circular buffer of scalar products between elements of s and y.
yy : numpy.ndarray
2D circular buffer of scalar products between different elements of y.
"""
def __init__(self, max_history_length, x0, gradient):
self.max_history_length = max_history_length
self.s = LimitedList(max_history_length)
self.y = LimitedList(max_history_length)
self.s = [None]*max_history_length
self.y = [None]*max_history_length
self.last_x = x0.copy()
self.last_gradient = gradient.copy()
self.k = 0
self._ss_store = {}
self._sy_store = {}
self._yy_store = {}
mmax = max_history_length
self.ss = np.empty((mmax, mmax), dtype=np.float64)
self.sy = np.empty((mmax, mmax), dtype=np.float64)
self.yy = np.empty((mmax, mmax), dtype=np.float64)
@property
def history_length(self):
......@@ -152,15 +153,16 @@ class InformationStore(object):
"""
result = []
m = self.history_length
mmax = self.max_history_length
k = self.k
s = self.s
for i in xrange(m):
result.append(s[k-m+i])
result.append(s[(k-m+i) % mmax])
y = self.y
for i in xrange(m):
result.append(y[k-m+i])
result.append(y[(k-m+i) % mmax])
result.append(self.last_gradient)
......@@ -180,28 +182,36 @@ class InformationStore(object):
"""
m = self.history_length
mmax = self.max_history_length
k = self.k
result = np.empty((2*m+1, 2*m+1), dtype=np.float)
# update the stores
k1 = (k-1) % mmax
for i in xrange(m):
for j in xrange(m):
result[i, j] = self.ss_store(k-m+i, k-m+j)
sy_ij = self.sy_store(k-m+i, k-m+j)
result[i, m+j] = sy_ij
result[m+j, i] = sy_ij
kmi = (k-m+i) % mmax
self.ss[kmi, k1] = self.ss[k1, kmi] = self.s[kmi].vdot(self.s[k1])
self.yy[kmi, k1] = self.yy[k1, kmi] = self.y[kmi].vdot(self.y[k1])
self.sy[kmi, k1] = self.s[kmi].vdot(self.y[k1])
for j in xrange(m-1):
kmj = (k-m+j) % mmax
self.sy[k1, kmj] = self.s[k1].vdot(self.y[kmj])
result[m+i, m+j] = self.yy_store(k-m+i, k-m+j)
for i in xrange(m):
kmi = (k-m+i) % mmax
for j in xrange(m):
kmj = (k-m+j) % mmax
result[i, j] = self.ss[kmi, kmj]
result[i, m+j] = result[m+j, i] = self.sy[kmi, kmj]
result[m+i, m+j] = self.yy[kmi, kmj]
sgrad_i = self.sgrad_store(k-m+i)
result[2*m, i] = sgrad_i
result[i, 2*m] = sgrad_i
sgrad_i = self.s[kmi].vdot(self.last_gradient)
result[2*m, i] = result[i, 2*m] = sgrad_i
ygrad_i = self.ygrad_store(k-m+i)
result[2*m, m+i] = ygrad_i
result[m+i, 2*m] = ygrad_i
ygrad_i = self.y[kmi].vdot(self.last_gradient)
result[2*m, m+i] = result[m+i, 2*m] = ygrad_i
result[2*m, 2*m] = self.gradgrad_store()
result[2*m, 2*m] = self.last_gradient.norm()
return result
......@@ -231,185 +241,25 @@ class InformationStore(object):
for i in xrange(2*m+1):
delta[i] *= b_dot_b[m-1, 2*m-1]/b_dot_b[2*m-1, 2*m-1]
for j in xrange(m-1, -1, -1):
for j in xrange(m):
delta_b_b = sum([delta[l]*b_dot_b[m+j, l] for l in xrange(2*m+1)])
beta = delta_b_b/b_dot_b[j, m+j]
delta[j] += (alpha[j] - beta)
return delta
def ss_store(self, i, j):
"""Updates the dictionary _ss_store with a new scalar product.
Returns the scalar product of s_i and s_j.
Parameters
----------
i : integer
s index.
j : integer
s index.
Returns
-------
_ss_store[key] : float
Scalar product of s_i and s_j.
"""
key = tuple(sorted((i, j)))
if key not in self._ss_store:
self._ss_store[key] = self.s[i].vdot(self.s[j])
return self._ss_store[key]
def sy_store(self, i, j):
"""Updates the dictionary _sy_store with a new scalar product.
Returns the scalar product of s_i and y_j.
Parameters
----------
i : integer
s index.
j : integer
y index.
Returns
-------
_sy_store[key] : float
Scalar product of s_i and y_j.
"""
key = (i, j)
if key not in self._sy_store:
self._sy_store[key] = self.s[i].vdot(self.y[j])
return self._sy_store[key]
def yy_store(self, i, j):
"""Updates the dictionary _yy_store with a new scalar product.
Returns the scalar product of y_i and y_j.
Parameters
----------
i : integer
y index.
j : integer
y index.
Returns
------
_yy_store[key] : float
Scalar product of y_i and y_j.
"""
key = tuple(sorted((i, j)))
if key not in self._yy_store:
self._yy_store[key] = self.y[i].vdot(self.y[j])
return self._yy_store[key]
def sgrad_store(self, i):
"""Returns scalar product between s_i and gradient on initial position.
Returns
-------
scalar product : float
Scalar product.
"""
return self.s[i].vdot(self.last_gradient)
def ygrad_store(self, i):
"""Returns scalar product between y_i and gradient on initial position.
Returns
-------
scalar product : float
Scalar product.
"""
return self.y[i].vdot(self.last_gradient)
def gradgrad_store(self):
"""Returns scalar product of gradient on initial position with itself.
Returns
-------
scalar product : float
Scalar product.
"""
return self.last_gradient.vdot(self.last_gradient)
def add_new_point(self, x, gradient):
"""Updates the s list and y list.
Calculates the new position and gradient differences and adds them to
the respective list.
Calculates the new position and gradient differences and enters them
into the respective list.
"""
self.k += 1
new_s = x - self.last_x
self.s.add(new_s)
new_y = gradient - self.last_gradient
self.y.add(new_y)
mmax = self.max_history_length
self.s[self.k % mmax] = x - self.last_x
self.y[self.k % mmax] = gradient - self.last_gradient
self.last_x = x.copy()
self.last_gradient = gradient.copy()
class LimitedList(object):
"""Class for creating a list of limited length.
Parameters
----------
history_length : integer
Maximum number of stored past updates.
Attributes
----------
history_length : integer
Maximum number of stored past updates.
_offset : integer
Offset to correct the indices which are bigger than maximum history.
length.
_storage : list
List where input values are stored.
"""
def __init__(self, history_length):
self.history_length = int(history_length)
self._offset = 0
self._storage = []
def __getitem__(self, index):
"""Returns the element with index [index-offset].
Parameters
----------
index : integer
Index of the selected element.
Returns
-------
selected element
"""
return self._storage[index-self._offset]
def add(self, value):
"""Adds a new element to the list.
If the list is of length maximum history then it removes the first
element first.
Parameters
----------
value : anything
New element in the list.
"""
if len(self._storage) == self.history_length:
self._storage.pop(0)
self._offset += 1
self._storage.append(value)
self.k += 1
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment