From 769812bf476355ab8b18e7fc87edbeb616837b74 Mon Sep 17 00:00:00 2001 From: Martin Reinecke <martin@mpa-garching.mpg.de> Date: Fri, 10 Aug 2018 15:53:26 +0200 Subject: [PATCH] consolidate the descent minimizers --- nifty5/__init__.py | 7 +- nifty5/data_objects/numpy_do2.py | 282 ++++++++++++++++++ nifty5/minimization/descent_minimizer.py | 122 -------- .../{vl_bfgs.py => descent_minimizers.py} | 182 ++++++++++- nifty5/minimization/l_bfgs.py | 75 ----- nifty5/minimization/relaxed_newton.py | 41 --- nifty5/minimization/steepest_descent.py | 33 -- 7 files changed, 464 insertions(+), 278 deletions(-) create mode 100644 nifty5/data_objects/numpy_do2.py delete mode 100644 nifty5/minimization/descent_minimizer.py rename nifty5/minimization/{vl_bfgs.py => descent_minimizers.py} (56%) delete mode 100644 nifty5/minimization/l_bfgs.py delete mode 100644 nifty5/minimization/relaxed_newton.py delete mode 100644 nifty5/minimization/steepest_descent.py diff --git a/nifty5/__init__.py b/nifty5/__init__.py index 6d30fb855..50fa30e56 100644 --- a/nifty5/__init__.py +++ b/nifty5/__init__.py @@ -59,11 +59,8 @@ from .minimization.gradient_norm_controller import GradientNormController from .minimization.minimizer import Minimizer from .minimization.conjugate_gradient import ConjugateGradient from .minimization.nonlinear_cg import NonlinearCG -from .minimization.descent_minimizer import DescentMinimizer -from .minimization.steepest_descent import SteepestDescent -from .minimization.vl_bfgs import VL_BFGS -from .minimization.l_bfgs import L_BFGS -from .minimization.relaxed_newton import RelaxedNewton +from .minimization.descent_minimizers import ( + DescentMinimizer, SteepestDescent, VL_BFGS, L_BFGS, RelaxedNewton) from .minimization.scipy_minimizer import (ScipyMinimizer, NewtonCG, L_BFGS_B, ScipyCG) from .minimization.energy import Energy diff --git a/nifty5/data_objects/numpy_do2.py b/nifty5/data_objects/numpy_do2.py new file mode 100644 index 000000000..68a3b35fa --- /dev/null +++ b/nifty5/data_objects/numpy_do2.py @@ -0,0 +1,282 @@ +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# +# Copyright(C) 2013-2018 Max-Planck-Society +# +# NIFTy is being developed at the Max-Planck-Institut fuer Astrophysik +# and financially supported by the Studienstiftung des deutschen Volkes. + +from __future__ import absolute_import, division, print_function +from ..compat import * +import numpy as np +from .random import Random +import sys + +ntask = 1 +rank = 0 +master = True + + +def is_numpy(): + return False + + +def local_shape(shape, distaxis=0): + return shape + + +class data_object(object): + def __init__(self, shape, data): + self._data = data + + def copy(self): + return data_object(self._data.shape, self._data.copy()) + + @property + def dtype(self): + return self._data.dtype + + @property + def shape(self): + return self._data.shape + + @property + def size(self): + return self._data.size + + @property + def real(self): + return data_object(self._data.shape, self._data.real) + + @property + def imag(self): + return data_object(self._data.shape, self._data.imag) + + def conj(self): + return data_object(self._data.shape, self._data.conj()) + + def conjugate(self): + return data_object(self._data.shape, self._data.conjugate()) + + def _contraction_helper(self, op, axis): + if axis is not None: + if len(axis) == len(self._data.shape): + axis = None + if axis is None: + return getattr(self._data, op)() + + res = getattr(self._data, op)(axis=axis) + return data_object(res.shape, res) + + def sum(self, axis=None): + return self._contraction_helper("sum", axis) + + def prod(self, axis=None): + return self._contraction_helper("prod", axis) + + def min(self, axis=None): + return self._contraction_helper("min", axis) + + def max(self, axis=None): + return self._contraction_helper("max", axis) + + def mean(self, axis=None): + if axis is None: + sz = self.size + else: + sz = reduce(lambda x, y: x*y, [self.shape[i] for i in axis]) + return self.sum(axis)/sz + + def std(self, axis=None): + return np.sqrt(self.var(axis)) + + # FIXME: to be improved! + def var(self, axis=None): + if axis is not None and len(axis) != len(self.shape): + raise ValueError("functionality not yet supported") + return (abs(self-self.mean())**2).mean() + + def _binary_helper(self, other, op): + a = self + if isinstance(other, data_object): + b = other + if a._data.shape != b._data.shape: + raise ValueError("shapes are incompatible.") + a = a._data + b = b._data + elif np.isscalar(other): + a = a._data + b = other + elif isinstance(other, np.ndarray): + a = a._data + b = other + else: + return NotImplemented + + tval = getattr(a, op)(b) + if tval is a: + return self + else: + return data_object(self._data.shape, tval) + + def __neg__(self): + return data_object(self._data.shape, -self._data) + + def __abs__(self): + return data_object(self._data.shape, abs(self._data)) + + def all(self): + return self.sum() == self.size + + def any(self): + return self.sum() != 0 + + def fill(self, value): + self._data.fill(value) + + +for op in ["__add__", "__radd__", "__iadd__", + "__sub__", "__rsub__", "__isub__", + "__mul__", "__rmul__", "__imul__", + "__div__", "__rdiv__", "__idiv__", + "__truediv__", "__rtruediv__", "__itruediv__", + "__floordiv__", "__rfloordiv__", "__ifloordiv__", + "__pow__", "__rpow__", "__ipow__", + "__lt__", "__le__", "__gt__", "__ge__", "__eq__", "__ne__"]: + def func(op): + def func2(self, other): + return self._binary_helper(other, op=op) + return func2 + setattr(data_object, op, func(op)) + + +def full(shape, fill_value, dtype=None): + return data_object(shape, np.full(shape, fill_value, dtype)) + + +def empty(shape, dtype=None): + return data_object(shape, np.empty(shape, dtype)) + + +def zeros(shape, dtype=None, distaxis=0): + return data_object(shape, np.zeros(shape, dtype)) + + +def ones(shape, dtype=None, distaxis=0): + return data_object(shape, np.ones(shape, dtype)) + + +def empty_like(a, dtype=None): + return data_object(np.empty_like(a._data, dtype)) + + +def vdot(a, b): + return np.vdot(a._data, b._data) + + +def _math_helper(x, function, out): + function = getattr(np, function) + if out is not None: + function(x._data, out=out._data) + return out + else: + return data_object(x.shape, function(x._data)) + + +_current_module = sys.modules[__name__] + +for f in ["sqrt", "exp", "log", "tanh", "conjugate"]: + def func(f): + def func2(x, out=None): + return _math_helper(x, f, out) + return func2 + setattr(_current_module, f, func(f)) + + +def from_object(object, dtype, copy, set_locked): + if dtype is None: + dtype = object.dtype + dtypes_equal = dtype == object.dtype + if set_locked and dtypes_equal and locked(object): + return object + if not dtypes_equal and not copy: + raise ValueError("cannot change data type without copying") + if set_locked and not copy: + raise ValueError("cannot lock object without copying") + data = np.array(object._data, dtype=dtype, copy=copy) + if set_locked: + data.flags.writeable = False + return data_object(object._shape, data, distaxis=object._distaxis) + + +# This function draws all random numbers on all tasks, to produce the same +# array independent on the number of tasks +# MR FIXME: depending on what is really wanted/needed (i.e. same result +# independent of number of tasks, performance etc.) we need to adjust the +# algorithm. +def from_random(random_type, shape, dtype=np.float64, **kwargs): + generator_function = getattr(Random, random_type) + ldat = generator_function(dtype=dtype, shape=shape, **kwargs) + return from_local_data(shape, ldat) + + +def local_data(arr): + return arr._data + + +def ibegin_from_shape(glob_shape, distaxis=0): + return (0,) * len(glob_shape) + + +def ibegin(arr): + return (0,) * arr._data.ndim + + +def np_allreduce_sum(arr): + return arr.copy() + + +def np_allreduce_min(arr): + return arr.copy() + + +def distaxis(arr): + return -1 + + +def from_local_data(shape, arr, distaxis=-1): + return data_object(shape, arr) + + +def from_global_data(arr, sum_up=False): + return data_object(arr.shape, arr) + + +def to_global_data(arr): + return arr._data + + +def redistribute(arr, dist=None, nodist=None): + return arr.copy() + + +def default_distaxis(): + return -1 + + +def lock(arr): + arr._data.flags.writeable = False + + +def locked(arr): + return not arr._data.flags.writeable diff --git a/nifty5/minimization/descent_minimizer.py b/nifty5/minimization/descent_minimizer.py deleted file mode 100644 index 963159f18..000000000 --- a/nifty5/minimization/descent_minimizer.py +++ /dev/null @@ -1,122 +0,0 @@ -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program. If not, see <http://www.gnu.org/licenses/>. -# -# Copyright(C) 2013-2018 Max-Planck-Society -# -# NIFTy is being developed at the Max-Planck-Institut fuer Astrophysik -# and financially supported by the Studienstiftung des deutschen Volkes. - -from __future__ import absolute_import, division, print_function - -from ..compat import * -from ..logger import logger -from .line_search_strong_wolfe import LineSearchStrongWolfe -from .minimizer import Minimizer - - -class DescentMinimizer(Minimizer): - """ A base class used by gradient methods to find a local minimum. - - Descent minimization methods are used to find a local minimum of a scalar - function by following a descent direction. This class implements the - minimization procedure once a descent direction is known. The descent - direction has to be implemented separately. - - Parameters - ---------- - controller : IterationController - Object that decides when to terminate the minimization. - line_searcher : callable *optional* - Function which infers the step size in the descent direction - (default : LineSearchStrongWolfe()). - """ - - def __init__(self, controller, line_searcher=LineSearchStrongWolfe()): - self._controller = controller - self.line_searcher = line_searcher - - def __call__(self, energy): - """ Performs the minimization of the provided Energy functional. - - Parameters - ---------- - energy : Energy - Energy object which provides value, gradient and metric at a - specific position in parameter space. - - Returns - ------- - Energy - Latest `energy` of the minimization. - int - Can be controller.CONVERGED or controller.ERROR - - Notes - ----- - The minimization is stopped if - * the controller returns controller.CONVERGED or controller.ERROR, - * a perfectly flat point is reached, - * according to the line-search the minimum is found, - """ - f_k_minus_1 = None - controller = self._controller - status = controller.start(energy) - if status != controller.CONTINUE: - return energy, status - - while True: - # check if position is at a flat point - if energy.gradient_norm == 0: - return energy, controller.CONVERGED - - # compute a step length that reduces energy.value sufficiently - new_energy, success = self.line_searcher.perform_line_search( - energy=energy, pk=self.get_descent_direction(energy), - f_k_minus_1=f_k_minus_1) - if not success: - self.reset() - - f_k_minus_1 = energy.value - - if new_energy.value > energy.value: - logger.error("Error: Energy has increased") - return energy, controller.ERROR - - if new_energy.value == energy.value: - logger.warning( - "Warning: Energy has not changed. Assuming convergence...") - return new_energy, controller.CONVERGED - - energy = new_energy - status = self._controller.check(energy) - if status != controller.CONTINUE: - return energy, status - - def reset(self): - pass - - def get_descent_direction(self, energy): - """ Calculates the next descent direction. - - Parameters - ---------- - energy : Energy - An instance of the Energy class which shall be minimized. The - position of `energy` is used as the starting point of minimization. - - Returns - ------- - Field - The descent direction. - """ - raise NotImplementedError diff --git a/nifty5/minimization/vl_bfgs.py b/nifty5/minimization/descent_minimizers.py similarity index 56% rename from nifty5/minimization/vl_bfgs.py rename to nifty5/minimization/descent_minimizers.py index f675d7e42..9cbc92675 100644 --- a/nifty5/minimization/vl_bfgs.py +++ b/nifty5/minimization/descent_minimizers.py @@ -19,10 +19,188 @@ from __future__ import absolute_import, division, print_function import numpy as np - from ..compat import * -from .descent_minimizer import DescentMinimizer +from ..logger import logger from .line_search_strong_wolfe import LineSearchStrongWolfe +from .minimizer import Minimizer + + +class DescentMinimizer(Minimizer): + """ A base class used by gradient methods to find a local minimum. + + Descent minimization methods are used to find a local minimum of a scalar + function by following a descent direction. This class implements the + minimization procedure once a descent direction is known. The descent + direction has to be implemented separately. + + Parameters + ---------- + controller : IterationController + Object that decides when to terminate the minimization. + line_searcher : callable *optional* + Function which infers the step size in the descent direction + (default : LineSearchStrongWolfe()). + """ + + def __init__(self, controller, line_searcher=LineSearchStrongWolfe()): + self._controller = controller + self.line_searcher = line_searcher + + def __call__(self, energy): + """ Performs the minimization of the provided Energy functional. + + Parameters + ---------- + energy : Energy + Energy object which provides value, gradient and metric at a + specific position in parameter space. + + Returns + ------- + Energy + Latest `energy` of the minimization. + int + Can be controller.CONVERGED or controller.ERROR + + Notes + ----- + The minimization is stopped if + * the controller returns controller.CONVERGED or controller.ERROR, + * a perfectly flat point is reached, + * according to the line-search the minimum is found, + """ + f_k_minus_1 = None + controller = self._controller + status = controller.start(energy) + if status != controller.CONTINUE: + return energy, status + + while True: + # check if position is at a flat point + if energy.gradient_norm == 0: + return energy, controller.CONVERGED + + # compute a step length that reduces energy.value sufficiently + new_energy, success = self.line_searcher.perform_line_search( + energy=energy, pk=self.get_descent_direction(energy), + f_k_minus_1=f_k_minus_1) + if not success: + self.reset() + + f_k_minus_1 = energy.value + + if new_energy.value > energy.value: + logger.error("Error: Energy has increased") + return energy, controller.ERROR + + if new_energy.value == energy.value: + logger.warning( + "Warning: Energy has not changed. Assuming convergence...") + return new_energy, controller.CONVERGED + + energy = new_energy + status = self._controller.check(energy) + if status != controller.CONTINUE: + return energy, status + + def reset(self): + pass + + def get_descent_direction(self, energy): + """ Calculates the next descent direction. + + Parameters + ---------- + energy : Energy + An instance of the Energy class which shall be minimized. The + position of `energy` is used as the starting point of minimization. + + Returns + ------- + Field + The descent direction. + """ + raise NotImplementedError + + +class SteepestDescent(DescentMinimizer): + """ Implementation of the steepest descent minimization scheme. + + Also known as 'gradient descent'. This algorithm simply follows the + functional's gradient for minimization. + """ + + def get_descent_direction(self, energy): + return -energy.gradient + + +class RelaxedNewton(DescentMinimizer): + """ Calculates the descent direction according to a Newton scheme. + + The descent direction is determined by weighting the gradient at the + current parameter position with the inverse local metric. + """ + + def __init__(self, controller, line_searcher=None): + if line_searcher is None: + line_searcher = LineSearchStrongWolfe( + preferred_initial_step_size=1.) + super(RelaxedNewton, self).__init__(controller=controller, + line_searcher=line_searcher) + + def get_descent_direction(self, energy): + return -energy.metric.inverse_times(energy.gradient) + + +class L_BFGS(DescentMinimizer): + def __init__(self, controller, line_searcher=LineSearchStrongWolfe(), + max_history_length=5): + super(L_BFGS, self).__init__(controller=controller, + line_searcher=line_searcher) + self.max_history_length = max_history_length + + def __call__(self, energy): + self.reset() + return super(L_BFGS, self).__call__(energy) + + def reset(self): + self._k = 0 + self._s = [None]*self.max_history_length + self._y = [None]*self.max_history_length + + def get_descent_direction(self, energy): + x = energy.position + s = self._s + y = self._y + k = self._k + maxhist = self.max_history_length + gradient = energy.gradient + + nhist = min(k, maxhist) + alpha = [None]*maxhist + p = -gradient + if k > 0: + idx = (k-1) % maxhist + s[idx] = x-self._lastx + y[idx] = gradient-self._lastgrad + if nhist > 0: + for i in range(k-1, k-nhist-1, -1): + idx = i % maxhist + alpha[idx] = s[idx].vdot(p)/s[idx].vdot(y[idx]) + p = p - alpha[idx]*y[idx] + idx = (k-1) % maxhist + fact = s[idx].vdot(y[idx]) / y[idx].vdot(y[idx]) + if fact <= 0.: + logger.error("L-BFGS curvature not positive definite!") + p = p*fact + for i in range(k-nhist, k): + idx = i % maxhist + beta = y[idx].vdot(p) / s[idx].vdot(y[idx]) + p = p + (alpha[idx]-beta)*s[idx] + self._lastx = x + self._lastgrad = gradient + self._k += 1 + return p class VL_BFGS(DescentMinimizer): diff --git a/nifty5/minimization/l_bfgs.py b/nifty5/minimization/l_bfgs.py deleted file mode 100644 index 0c53c9c95..000000000 --- a/nifty5/minimization/l_bfgs.py +++ /dev/null @@ -1,75 +0,0 @@ -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program. If not, see <http://www.gnu.org/licenses/>. -# -# Copyright(C) 2013-2018 Max-Planck-Society -# -# NIFTy is being developed at the Max-Planck-Institut fuer Astrophysik -# and financially supported by the Studienstiftung des deutschen Volkes. - -from __future__ import absolute_import, division, print_function - -from ..compat import * -from ..logger import logger -from .descent_minimizer import DescentMinimizer -from .line_search_strong_wolfe import LineSearchStrongWolfe - - -class L_BFGS(DescentMinimizer): - def __init__(self, controller, line_searcher=LineSearchStrongWolfe(), - max_history_length=5): - super(L_BFGS, self).__init__(controller=controller, - line_searcher=line_searcher) - self.max_history_length = max_history_length - - def __call__(self, energy): - self.reset() - return super(L_BFGS, self).__call__(energy) - - def reset(self): - self._k = 0 - self._s = [None]*self.max_history_length - self._y = [None]*self.max_history_length - - def get_descent_direction(self, energy): - x = energy.position - s = self._s - y = self._y - k = self._k - maxhist = self.max_history_length - gradient = energy.gradient - - nhist = min(k, maxhist) - alpha = [None]*maxhist - p = -gradient - if k > 0: - idx = (k-1) % maxhist - s[idx] = x-self._lastx - y[idx] = gradient-self._lastgrad - if nhist > 0: - for i in range(k-1, k-nhist-1, -1): - idx = i % maxhist - alpha[idx] = s[idx].vdot(p)/s[idx].vdot(y[idx]) - p = p - alpha[idx]*y[idx] - idx = (k-1) % maxhist - fact = s[idx].vdot(y[idx]) / y[idx].vdot(y[idx]) - if fact <= 0.: - logger.error("L-BFGS curvature not positive definite!") - p = p*fact - for i in range(k-nhist, k): - idx = i % maxhist - beta = y[idx].vdot(p) / s[idx].vdot(y[idx]) - p = p + (alpha[idx]-beta)*s[idx] - self._lastx = x - self._lastgrad = gradient - self._k += 1 - return p diff --git a/nifty5/minimization/relaxed_newton.py b/nifty5/minimization/relaxed_newton.py deleted file mode 100644 index 069e44827..000000000 --- a/nifty5/minimization/relaxed_newton.py +++ /dev/null @@ -1,41 +0,0 @@ -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program. If not, see <http://www.gnu.org/licenses/>. -# -# Copyright(C) 2013-2018 Max-Planck-Society -# -# NIFTy is being developed at the Max-Planck-Institut fuer Astrophysik -# and financially supported by the Studienstiftung des deutschen Volkes. - -from __future__ import absolute_import, division, print_function - -from ..compat import * -from .descent_minimizer import DescentMinimizer -from .line_search_strong_wolfe import LineSearchStrongWolfe - - -class RelaxedNewton(DescentMinimizer): - """ Calculates the descent direction according to a Newton scheme. - - The descent direction is determined by weighting the gradient at the - current parameter position with the inverse local metric. - """ - - def __init__(self, controller, line_searcher=None): - if line_searcher is None: - line_searcher = LineSearchStrongWolfe( - preferred_initial_step_size=1.) - super(RelaxedNewton, self).__init__(controller=controller, - line_searcher=line_searcher) - - def get_descent_direction(self, energy): - return -energy.metric.inverse_times(energy.gradient) diff --git a/nifty5/minimization/steepest_descent.py b/nifty5/minimization/steepest_descent.py deleted file mode 100644 index 0598ad076..000000000 --- a/nifty5/minimization/steepest_descent.py +++ /dev/null @@ -1,33 +0,0 @@ -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program. If not, see <http://www.gnu.org/licenses/>. -# -# Copyright(C) 2013-2018 Max-Planck-Society -# -# NIFTy is being developed at the Max-Planck-Institut fuer Astrophysik -# and financially supported by the Studienstiftung des deutschen Volkes. - -from __future__ import absolute_import, division, print_function - -from ..compat import * -from .descent_minimizer import DescentMinimizer - - -class SteepestDescent(DescentMinimizer): - """ Implementation of the steepest descent minimization scheme. - - Also known as 'gradient descent'. This algorithm simply follows the - functional's gradient for minimization. - """ - - def get_descent_direction(self, energy): - return -energy.gradient -- GitLab