Commit 769812bf authored by Martin Reinecke's avatar Martin Reinecke

consolidate the descent minimizers

parent 192ceb54
......@@ -59,11 +59,8 @@ from .minimization.gradient_norm_controller import GradientNormController
from .minimization.minimizer import Minimizer
from .minimization.conjugate_gradient import ConjugateGradient
from .minimization.nonlinear_cg import NonlinearCG
from .minimization.descent_minimizer import DescentMinimizer
from .minimization.steepest_descent import SteepestDescent
from .minimization.vl_bfgs import VL_BFGS
from .minimization.l_bfgs import L_BFGS
from .minimization.relaxed_newton import RelaxedNewton
from .minimization.descent_minimizers import (
DescentMinimizer, SteepestDescent, VL_BFGS, L_BFGS, RelaxedNewton)
from .minimization.scipy_minimizer import (ScipyMinimizer, NewtonCG, L_BFGS_B,
ScipyCG)
from .minimization.energy import Energy
......
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
# Copyright(C) 2013-2018 Max-Planck-Society
#
# NIFTy is being developed at the Max-Planck-Institut fuer Astrophysik
# and financially supported by the Studienstiftung des deutschen Volkes.
from __future__ import absolute_import, division, print_function
from ..compat import *
import numpy as np
from .random import Random
import sys
ntask = 1
rank = 0
master = True
def is_numpy():
return False
def local_shape(shape, distaxis=0):
return shape
class data_object(object):
def __init__(self, shape, data):
self._data = data
def copy(self):
return data_object(self._data.shape, self._data.copy())
@property
def dtype(self):
return self._data.dtype
@property
def shape(self):
return self._data.shape
@property
def size(self):
return self._data.size
@property
def real(self):
return data_object(self._data.shape, self._data.real)
@property
def imag(self):
return data_object(self._data.shape, self._data.imag)
def conj(self):
return data_object(self._data.shape, self._data.conj())
def conjugate(self):
return data_object(self._data.shape, self._data.conjugate())
def _contraction_helper(self, op, axis):
if axis is not None:
if len(axis) == len(self._data.shape):
axis = None
if axis is None:
return getattr(self._data, op)()
res = getattr(self._data, op)(axis=axis)
return data_object(res.shape, res)
def sum(self, axis=None):
return self._contraction_helper("sum", axis)
def prod(self, axis=None):
return self._contraction_helper("prod", axis)
def min(self, axis=None):
return self._contraction_helper("min", axis)
def max(self, axis=None):
return self._contraction_helper("max", axis)
def mean(self, axis=None):
if axis is None:
sz = self.size
else:
sz = reduce(lambda x, y: x*y, [self.shape[i] for i in axis])
return self.sum(axis)/sz
def std(self, axis=None):
return np.sqrt(self.var(axis))
# FIXME: to be improved!
def var(self, axis=None):
if axis is not None and len(axis) != len(self.shape):
raise ValueError("functionality not yet supported")
return (abs(self-self.mean())**2).mean()
def _binary_helper(self, other, op):
a = self
if isinstance(other, data_object):
b = other
if a._data.shape != b._data.shape:
raise ValueError("shapes are incompatible.")
a = a._data
b = b._data
elif np.isscalar(other):
a = a._data
b = other
elif isinstance(other, np.ndarray):
a = a._data
b = other
else:
return NotImplemented
tval = getattr(a, op)(b)
if tval is a:
return self
else:
return data_object(self._data.shape, tval)
def __neg__(self):
return data_object(self._data.shape, -self._data)
def __abs__(self):
return data_object(self._data.shape, abs(self._data))
def all(self):
return self.sum() == self.size
def any(self):
return self.sum() != 0
def fill(self, value):
self._data.fill(value)
for op in ["__add__", "__radd__", "__iadd__",
"__sub__", "__rsub__", "__isub__",
"__mul__", "__rmul__", "__imul__",
"__div__", "__rdiv__", "__idiv__",
"__truediv__", "__rtruediv__", "__itruediv__",
"__floordiv__", "__rfloordiv__", "__ifloordiv__",
"__pow__", "__rpow__", "__ipow__",
"__lt__", "__le__", "__gt__", "__ge__", "__eq__", "__ne__"]:
def func(op):
def func2(self, other):
return self._binary_helper(other, op=op)
return func2
setattr(data_object, op, func(op))
def full(shape, fill_value, dtype=None):
return data_object(shape, np.full(shape, fill_value, dtype))
def empty(shape, dtype=None):
return data_object(shape, np.empty(shape, dtype))
def zeros(shape, dtype=None, distaxis=0):
return data_object(shape, np.zeros(shape, dtype))
def ones(shape, dtype=None, distaxis=0):
return data_object(shape, np.ones(shape, dtype))
def empty_like(a, dtype=None):
return data_object(np.empty_like(a._data, dtype))
def vdot(a, b):
return np.vdot(a._data, b._data)
def _math_helper(x, function, out):
function = getattr(np, function)
if out is not None:
function(x._data, out=out._data)
return out
else:
return data_object(x.shape, function(x._data))
_current_module = sys.modules[__name__]
for f in ["sqrt", "exp", "log", "tanh", "conjugate"]:
def func(f):
def func2(x, out=None):
return _math_helper(x, f, out)
return func2
setattr(_current_module, f, func(f))
def from_object(object, dtype, copy, set_locked):
if dtype is None:
dtype = object.dtype
dtypes_equal = dtype == object.dtype
if set_locked and dtypes_equal and locked(object):
return object
if not dtypes_equal and not copy:
raise ValueError("cannot change data type without copying")
if set_locked and not copy:
raise ValueError("cannot lock object without copying")
data = np.array(object._data, dtype=dtype, copy=copy)
if set_locked:
data.flags.writeable = False
return data_object(object._shape, data, distaxis=object._distaxis)
# This function draws all random numbers on all tasks, to produce the same
# array independent on the number of tasks
# MR FIXME: depending on what is really wanted/needed (i.e. same result
# independent of number of tasks, performance etc.) we need to adjust the
# algorithm.
def from_random(random_type, shape, dtype=np.float64, **kwargs):
generator_function = getattr(Random, random_type)
ldat = generator_function(dtype=dtype, shape=shape, **kwargs)
return from_local_data(shape, ldat)
def local_data(arr):
return arr._data
def ibegin_from_shape(glob_shape, distaxis=0):
return (0,) * len(glob_shape)
def ibegin(arr):
return (0,) * arr._data.ndim
def np_allreduce_sum(arr):
return arr.copy()
def np_allreduce_min(arr):
return arr.copy()
def distaxis(arr):
return -1
def from_local_data(shape, arr, distaxis=-1):
return data_object(shape, arr)
def from_global_data(arr, sum_up=False):
return data_object(arr.shape, arr)
def to_global_data(arr):
return arr._data
def redistribute(arr, dist=None, nodist=None):
return arr.copy()
def default_distaxis():
return -1
def lock(arr):
arr._data.flags.writeable = False
def locked(arr):
return not arr._data.flags.writeable
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
# Copyright(C) 2013-2018 Max-Planck-Society
#
# NIFTy is being developed at the Max-Planck-Institut fuer Astrophysik
# and financially supported by the Studienstiftung des deutschen Volkes.
from __future__ import absolute_import, division, print_function
from ..compat import *
from ..logger import logger
from .line_search_strong_wolfe import LineSearchStrongWolfe
from .minimizer import Minimizer
class DescentMinimizer(Minimizer):
""" A base class used by gradient methods to find a local minimum.
Descent minimization methods are used to find a local minimum of a scalar
function by following a descent direction. This class implements the
minimization procedure once a descent direction is known. The descent
direction has to be implemented separately.
Parameters
----------
controller : IterationController
Object that decides when to terminate the minimization.
line_searcher : callable *optional*
Function which infers the step size in the descent direction
(default : LineSearchStrongWolfe()).
"""
def __init__(self, controller, line_searcher=LineSearchStrongWolfe()):
self._controller = controller
self.line_searcher = line_searcher
def __call__(self, energy):
""" Performs the minimization of the provided Energy functional.
Parameters
----------
energy : Energy
Energy object which provides value, gradient and metric at a
specific position in parameter space.
Returns
-------
Energy
Latest `energy` of the minimization.
int
Can be controller.CONVERGED or controller.ERROR
Notes
-----
The minimization is stopped if
* the controller returns controller.CONVERGED or controller.ERROR,
* a perfectly flat point is reached,
* according to the line-search the minimum is found,
"""
f_k_minus_1 = None
controller = self._controller
status = controller.start(energy)
if status != controller.CONTINUE:
return energy, status
while True:
# check if position is at a flat point
if energy.gradient_norm == 0:
return energy, controller.CONVERGED
# compute a step length that reduces energy.value sufficiently
new_energy, success = self.line_searcher.perform_line_search(
energy=energy, pk=self.get_descent_direction(energy),
f_k_minus_1=f_k_minus_1)
if not success:
self.reset()
f_k_minus_1 = energy.value
if new_energy.value > energy.value:
logger.error("Error: Energy has increased")
return energy, controller.ERROR
if new_energy.value == energy.value:
logger.warning(
"Warning: Energy has not changed. Assuming convergence...")
return new_energy, controller.CONVERGED
energy = new_energy
status = self._controller.check(energy)
if status != controller.CONTINUE:
return energy, status
def reset(self):
pass
def get_descent_direction(self, energy):
""" Calculates the next descent direction.
Parameters
----------
energy : Energy
An instance of the Energy class which shall be minimized. The
position of `energy` is used as the starting point of minimization.
Returns
-------
Field
The descent direction.
"""
raise NotImplementedError
......@@ -19,10 +19,188 @@
from __future__ import absolute_import, division, print_function
import numpy as np
from ..compat import *
from .descent_minimizer import DescentMinimizer
from ..logger import logger
from .line_search_strong_wolfe import LineSearchStrongWolfe
from .minimizer import Minimizer
class DescentMinimizer(Minimizer):
""" A base class used by gradient methods to find a local minimum.
Descent minimization methods are used to find a local minimum of a scalar
function by following a descent direction. This class implements the
minimization procedure once a descent direction is known. The descent
direction has to be implemented separately.
Parameters
----------
controller : IterationController
Object that decides when to terminate the minimization.
line_searcher : callable *optional*
Function which infers the step size in the descent direction
(default : LineSearchStrongWolfe()).
"""
def __init__(self, controller, line_searcher=LineSearchStrongWolfe()):
self._controller = controller
self.line_searcher = line_searcher
def __call__(self, energy):
""" Performs the minimization of the provided Energy functional.
Parameters
----------
energy : Energy
Energy object which provides value, gradient and metric at a
specific position in parameter space.
Returns
-------
Energy
Latest `energy` of the minimization.
int
Can be controller.CONVERGED or controller.ERROR
Notes
-----
The minimization is stopped if
* the controller returns controller.CONVERGED or controller.ERROR,
* a perfectly flat point is reached,
* according to the line-search the minimum is found,
"""
f_k_minus_1 = None
controller = self._controller
status = controller.start(energy)
if status != controller.CONTINUE:
return energy, status
while True:
# check if position is at a flat point
if energy.gradient_norm == 0:
return energy, controller.CONVERGED
# compute a step length that reduces energy.value sufficiently
new_energy, success = self.line_searcher.perform_line_search(
energy=energy, pk=self.get_descent_direction(energy),
f_k_minus_1=f_k_minus_1)
if not success:
self.reset()
f_k_minus_1 = energy.value
if new_energy.value > energy.value:
logger.error("Error: Energy has increased")
return energy, controller.ERROR
if new_energy.value == energy.value:
logger.warning(
"Warning: Energy has not changed. Assuming convergence...")
return new_energy, controller.CONVERGED
energy = new_energy
status = self._controller.check(energy)
if status != controller.CONTINUE:
return energy, status
def reset(self):
pass
def get_descent_direction(self, energy):
""" Calculates the next descent direction.
Parameters
----------
energy : Energy
An instance of the Energy class which shall be minimized. The
position of `energy` is used as the starting point of minimization.
Returns
-------
Field
The descent direction.
"""
raise NotImplementedError
class SteepestDescent(DescentMinimizer):
""" Implementation of the steepest descent minimization scheme.
Also known as 'gradient descent'. This algorithm simply follows the
functional's gradient for minimization.
"""
def get_descent_direction(self, energy):
return -energy.gradient
class RelaxedNewton(DescentMinimizer):
""" Calculates the descent direction according to a Newton scheme.
The descent direction is determined by weighting the gradient at the
current parameter position with the inverse local metric.
"""
def __init__(self, controller, line_searcher=None):
if line_searcher is None:
line_searcher = LineSearchStrongWolfe(
preferred_initial_step_size=1.)
super(RelaxedNewton, self).__init__(controller=controller,
line_searcher=line_searcher)
def get_descent_direction(self, energy):
return -energy.metric.inverse_times(energy.gradient)
class L_BFGS(DescentMinimizer):
def __init__(self, controller, line_searcher=LineSearchStrongWolfe(),
max_history_length=5):
super(L_BFGS, self).__init__(controller=controller,
line_searcher=line_searcher)
self.max_history_length = max_history_length
def __call__(self, energy):
self.reset()
return super(L_BFGS, self).__call__(energy)
def reset(self):
self._k = 0
self._s = [None]*self.max_history_length
self._y = [None]*self.max_history_length
def get_descent_direction(self, energy):
x = energy.position
s = self._s
y = self._y
k = self._k
maxhist = self.max_history_length
gradient = energy.gradient
nhist = min(k, maxhist)
alpha = [None]*maxhist
p = -gradient
if k > 0:
idx = (k-1) % maxhist
s[idx] = x-self._lastx
y[idx] = gradient-self._lastgrad
if nhist > 0:
for i in range(k-1, k-nhist-1, -1):
idx = i % maxhist
alpha[idx] = s[idx].vdot(p)/s[idx].vdot(y[idx])
p = p - alpha[idx]*y[idx]
idx = (k-1) % maxhist
fact = s[idx].vdot(y[idx]) / y[idx].vdot(y[idx])
if fact <= 0.:
logger.error("L-BFGS curvature not positive definite!")
p = p*fact
for i in range(k-nhist, k):
idx = i % maxhist
beta = y[idx].vdot(p) / s[idx].vdot(y[idx])
p = p + (alpha[idx]-beta)*s[idx]
self._lastx = x
self._lastgrad = gradient
self._k += 1
return p
class VL_BFGS(DescentMinimizer):
......
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
# Copyright(C) 2013-2018 Max-Planck-Society
#
# NIFTy is being developed at the Max-Planck-Institut fuer Astrophysik
# and financially supported by the Studienstiftung des deutschen Volkes.
from __future__ import absolute_import, division, print_function
from ..compat import *
from ..logger import logger
from .descent_minimizer import DescentMinimizer
from .line_search_strong_wolfe import LineSearchStrongWolfe
class L_BFGS(DescentMinimizer):
def __init__(self, controller, line_searcher=LineSearchStrongWolfe(),
max_history_length=5):
super(L_BFGS, self).__init__(controller=controller,
line_searcher=line_searcher)
self.max_history_length = max_history_length
def __call__(self, energy):
self.reset()
return super(L_BFGS, self).__call__(energy)
def reset(self):
self._k = 0
self._s = [None]*self.max_history_length
self._y = [None]*self.max_history_length
def get_descent_direction(self, energy):
x = energy.position
s = self._s
y = self._y
k = self._k
maxhist = self.max_history_length
gradient = energy.gradient
nhist = min(k, maxhist)
alpha = [None]*maxhist
p = -gradient
if k > 0:
idx = (k-1) % maxhist
s[idx] = x-self._lastx
y[idx] = gradient-self._lastgrad
if nhist > 0:
for i in range(k-1, k-nhist-1, -1):
idx = i % maxhist
alpha[idx] = s[idx].vdot(p)/s[idx].vdot(y[idx])
p = p - alpha[idx]*y[idx]
idx = (k-1) % maxhist
fact = s[idx].vdot(y[idx]) / y[idx].vdot(y[idx])
if fact <= 0.:
logger.error("L-BFGS curvature not positive definite!")
p = p*fact
for i in range(k-nhist, k):
idx = i % maxhist
beta = y[idx].vdot(p) / s[idx].vdot(y[idx])
p = p + (alpha[idx]-beta)*s[idx]
self._lastx = x
self._lastgrad = gradient
self._k += 1
return p
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
# Copyright(C) 2013-2018 Max-Planck-Society
#
# NIFTy is being developed at the Max-Planck-Institut fuer Astrophysik
# and financially supported by the Studienstiftung des deutschen Volkes.
from __future__ import absolute_import, division, print_function
from ..compat import *
from .descent_minimizer import DescentMinimizer
from .line_search_strong_wolfe import LineSearchStrongWolfe
class RelaxedNewton(DescentMinimizer):
""" Calculates the descent direction according to a Newton scheme.
The descent direction is determined by weighting the gradient at the
current parameter position with the inverse local metric.
"""
def __init__(self, controller, line_searcher=None):
if line_searcher is None:
line_searcher = LineSearchStrongWolfe(
preferred_initial_step_size=1.)