## NIFTY (Numerical Information Field Theory) has been developed at the ## Max-Planck-Institute for Astrophysics. ## ## Copyright (C) 2013 Max-Planck-Society ## ## Author: Marco Selig ## Project homepage: ## ## This program is free software: you can redistribute it and/or modify ## it under the terms of the GNU General Public License as published by ## the Free Software Foundation, either version 3 of the License, or ## (at your option) any later version. ## ## This program is distributed in the hope that it will be useful, ## but WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. ## See the GNU General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with this program. If not, see . """ .. __ ____ __ .. /__/ / _/ / /_ .. __ ___ __ / /_ / _/ __ __ .. / _ | / / / _/ / / / / / / .. / / / / / / / / / /_ / /_/ / .. /__/ /__/ /__/ /__/ \___/ \___ / tools .. /______/ This module extends NIFTY with a nifty set of tools including further operators, namely the :py:class:`invertible_operator` and the :py:class:`propagator_operator`, and minimization schemes, namely :py:class:`steepest_descent` and :py:class:`conjugate_gradient`. Those tools are supposed to support the user in solving information field theoretical problems (almost) without numerical pain. """ from __future__ import division #from nifty_core import * import numpy as np from nifty.config import notification, about from nifty.nifty_core import field from nifty.nifty_simple_math import vdot #from nifty_core import space, \ # field #from operators import operator, \ # diagonal_operator ##============================================================================= class conjugate_gradient(object): """ .. _______ ____ __ .. / _____/ / _ / .. / /____ __ / /_/ / __ .. \______//__/ \____ //__/ class .. /______/ NIFTY tool class for conjugate gradient This tool minimizes :math:`A x = b` with respect to `x` given `A` and `b` using a conjugate gradient; i.e., a step-by-step minimization relying on conjugated gradient directions. Further, `A` is assumed to be a positive definite and self-adjoint operator. The use of a preconditioner `W` that is roughly the inverse of `A` is optional. For details on the methodology refer to [#]_, for details on usage and output, see the notes below. Parameters ---------- A : {operator, function} Operator `A` applicable to a field. b : field Resulting field of the operation `A(x)`. W : {operator, function}, *optional* Operator `W` that is a preconditioner on `A` and is applicable to a field (default: None). spam : function, *optional* Callback function which is given the current `x` and iteration counter each iteration (default: None). reset : integer, *optional* Number of iterations after which to restart; i.e., forget previous conjugated directions (default: sqrt(b.get_dim())). note : bool, *optional* Indicates whether notes are printed or not (default: False). See Also -------- scipy.sparse.linalg.cg Notes ----- After initialization by `__init__`, the minimizer is started by calling it using `__call__`, which takes additional parameters. Notifications, if enabled, will state the iteration number, current step widths `alpha` and `beta`, the current relative residual `delta` that is compared to the tolerance, and the convergence level if changed. The minimizer will exit in three states: DEAD if alpha becomes infinite, QUIT if the maximum number of iterations is reached, or DONE if convergence is achieved. Returned will be the latest `x` and the latest convergence level, which can evaluate ``True`` for the exit states QUIT and DONE. References ---------- .. [#] J. R. Shewchuk, 1994, `"An Introduction to the Conjugate Gradient Method Without the Agonizing Pain" `_ Examples -------- >>> b = field(point_space(2), val=[1, 9]) >>> A = diagonal_operator(b.domain, diag=[4, 3]) >>> x,convergence = conjugate_gradient(A, b, note=True)(tol=1E-4, clevel=3) iteration : 00000001 alpha = 3.3E-01 beta = 1.3E-03 delta = 3.6E-02 iteration : 00000002 alpha = 2.5E-01 beta = 7.6E-04 delta = 1.0E-03 iteration : 00000003 alpha = 3.3E-01 beta = 2.5E-04 delta = 1.6E-05 convergence level : 1 iteration : 00000004 alpha = 2.5E-01 beta = 1.8E-06 delta = 2.1E-08 convergence level : 2 iteration : 00000005 alpha = 2.5E-01 beta = 2.2E-03 delta = 1.0E-09 convergence level : 3 ... done. >>> bool(convergence) True >>> x.val # yields 1/4 and 9/3 array([ 0.25, 3. ]) Attributes ---------- A : {operator, function} Operator `A` applicable to a field. x : field Current field. b : field Resulting field of the operation `A(x)`. W : {operator, function} Operator `W` that is a preconditioner on `A` and is applicable to a field; can be ``None``. spam : function Callback function which is given the current `x` and iteration counter each iteration; can be ``None``. reset : integer Number of iterations after which to restart; i.e., forget previous conjugated directions (default: sqrt(b.get_dim())). note : notification Notification instance. """ def __init__(self, A, b, W=None, spam=None, reset=None, note=False): """ Initializes the conjugate_gradient and sets the attributes (except for `x`). Parameters ---------- A : {operator, function} Operator `A` applicable to a field. b : field Resulting field of the operation `A(x)`. W : {operator, function}, *optional* Operator `W` that is a preconditioner on `A` and is applicable to a field (default: None). spam : function, *optional* Callback function which is given the current `x` and iteration counter each iteration (default: None). reset : integer, *optional* Number of iterations after which to restart; i.e., forget previous conjugated directions (default: sqrt(b.get_dim())). note : bool, *optional* Indicates whether notes are printed or not (default: False). """ if hasattr(A,"__call__") == True: self.A = A ## applies A else: raise AttributeError(about._errors.cstring( "ERROR: A must be callable!")) self.b = b if (W is None) or (hasattr(W,"__call__")==True): self.W = W ## applies W ~ A_inverse else: raise AttributeError(about._errors.cstring( "ERROR: W must be None or callable!")) self.spam = spam ## serves as callback given x and iteration number if reset is None: ## 2 < reset ~ sqrt(dim) self.reset = max(2, int(np.sqrt(b.domain.get_dim()))) else: self.reset = max(2, int(reset)) self.note = notification(default=bool(note)) ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ def __call__(self, x0=None, **kwargs): ## > runs cg with/without preconditioner """ Runs the conjugate gradient minimization. Parameters ---------- x0 : field, *optional* Starting guess for the minimization. tol : scalar, *optional* Tolerance specifying convergence; measured by current relative residual (default: 1E-4). clevel : integer, *optional* Number of times the tolerance should be undershot before exiting (default: 1). limii : integer, *optional* Maximum number of iterations performed (default: 10 * b.get_dim()). Returns ------- x : field Latest `x` of the minimization. convergence : integer Latest convergence level indicating whether the minimization has converged or not. """ self.x = self.b.copy_empty() self.x.set_val(new_val = 0) self.x.set_val(new_val = x0) if self.W is None: return self._calc_without(**kwargs) else: return self._calc_with(**kwargs) ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ def _calc_without(self, tol=1E-4, clevel=1, limii=None): ## > runs cg without preconditioner clevel = int(clevel) if limii is None: limii = 10*self.b.domain.get_dim() else: limii = int(limii) r = self.b-self.A(self.x) print ('r', r.val) d = self.b.copy_empty() d.set_val(new_val = r.get_val()) gamma = r.dot(d) if gamma==0: return self.x, clevel+1 delta_ = np.absolute(gamma)**(-0.5) convergence = 0 ii = 1 while(True): # print ('gamma', gamma) q = self.A(d) # print ('q', q.val) alpha = gamma/d.dot(q) ## positive definite if np.isfinite(alpha) == False: self.note.cprint( "\niteration : %08u alpha = NAN\n... dead."%ii) return self.x, 0 self.x += d * alpha # print ('x', self.x.val) if np.signbit(np.real(alpha)) == True: about.warnings.cprint( "WARNING: positive definiteness of A violated.") r = self.b-self.A(self.x) elif (ii%self.reset) == 0: r = self.b-self.A(self.x) else: r -= q * alpha # print ('r', r.val) gamma_ = gamma gamma = r.dot(r) # print ('gamma', gamma) beta = max(0, gamma/gamma_) ## positive definite # print ('d*beta', beta, (d*beta).val) d = r + d*beta # print ('d', d.val) delta = delta_*np.absolute(gamma)**0.5 self.note.cflush( "\niteration : %08u alpha = %3.1E beta = %3.1E delta = %3.1E"\ %(ii,np.real(alpha),np.real(beta),np.real(delta))) if gamma == 0: convergence = clevel+1 self.note.cprint(" convergence level : INF\n... done.") break elif np.absolute(delta) runs cg with preconditioner clevel = int(clevel) if(limii is None): limii = 10*self.b.domain.get_dim() else: limii = int(limii) r = self.b-self.A(self.x) d = self.W(r) gamma = r.dot(d) if gamma==0: return self.x, clevel+1 delta_ = np.absolute(gamma)**(-0.5) convergence = 0 ii = 1 while(True): q = self.A(d) alpha = gamma/d.dot(q) ## positive definite if np.isfinite(alpha) == False: self.note.cprint( "\niteration : %08u alpha = NAN\n... dead."%ii) return self.x, 0 self.x += d * alpha ## update if np.signbit(np.real(alpha)) == True: about.warnings.cprint( "WARNING: positive definiteness of A violated.") r = self.b-self.A(self.x) elif (ii%self.reset) == 0: r = self.b-self.A(self.x) else: r -= q * alpha s = self.W(r) gamma_ = gamma gamma = r.dot(s) if np.signbit(np.real(gamma)) == True: about.warnings.cprint( "WARNING: positive definiteness of W violated.") beta = max(0, gamma/gamma_) ## positive definite d = s + d*beta ## conjugated gradient delta = delta_*np.absolute(gamma)**0.5 self.note.cflush( "\niteration : %08u alpha = %3.1E beta = %3.1E delta = %3.1E"\ %(ii,np.real(alpha),np.real(beta),np.real(delta))) if gamma==0: convergence = clevel+1 self.note.cprint(" convergence level : INF\n... done.") break elif np.absolute(delta)" ##============================================================================= ##============================================================================= class steepest_descent(object): """ .. __ .. / / .. _______ ____/ / .. / _____/ / _ / .. /_____ / __ / /_/ / __ .. /_______//__/ \______|/__/ class NIFTY tool class for steepest descent minimization This tool minimizes a scalar energy-function by steepest descent using the functions gradient. Steps and step widths are choosen according to the Wolfe conditions [#]_. For details on usage and output, see the notes below. Parameters ---------- eggs : function Given the current `x` it returns the tuple of energy and gradient. spam : function, *optional* Callback function which is given the current `x` and iteration counter each iteration (default: None). a : {4-tuple}, *optional* Numbers obeying 0 < a1 ~ a2 < 1 ~ a3 < a4 that modify the step widths (default: (0.2,0.5,1,2)). c : {2-tuple}, *optional* Numbers obeying 0 < c1 < c2 < 1 that specify the Wolfe-conditions (default: (1E-4,0.9)). note : bool, *optional* Indicates whether notes are printed or not (default: False). See Also -------- scipy.optimize.fmin_cg, scipy.optimize.fmin_ncg, scipy.optimize.fmin_l_bfgs_b Notes ----- After initialization by `__init__`, the minimizer is started by calling it using `__call__`, which takes additional parameters. Notifications, if enabled, will state the iteration number, current step width `alpha`, current maximal change `delta` that is compared to the tolerance, and the convergence level if changed. The minimizer will exit in three states: DEAD if no step width above 1E-13 is accepted, QUIT if the maximum number of iterations is reached, or DONE if convergence is achieved. Returned will be the latest `x` and the latest convergence level, which can evaluate ``True`` for all exit states. References ---------- .. [#] J. Nocedal and S. J. Wright, Springer 2006, "Numerical Optimization", ISBN: 978-0-387-30303-1 (print) / 978-0-387-40065-5 `(online) `_ Examples -------- >>> def egg(x): ... E = 0.5*x.dot(x) # energy E(x) -- a two-dimensional parabola ... g = x # gradient ... return E,g >>> x = field(point_space(2), val=[1, 3]) >>> x,convergence = steepest_descent(egg, note=True)(x0=x, tol=1E-4, clevel=3) iteration : 00000001 alpha = 1.0E+00 delta = 6.5E-01 iteration : 00000002 alpha = 2.0E+00 delta = 1.4E-01 iteration : 00000003 alpha = 1.6E-01 delta = 2.1E-03 iteration : 00000004 alpha = 2.6E-03 delta = 3.0E-04 iteration : 00000005 alpha = 2.0E-04 delta = 5.3E-05 convergence level : 1 iteration : 00000006 alpha = 8.2E-05 delta = 4.4E-06 convergence level : 2 iteration : 00000007 alpha = 6.6E-06 delta = 3.1E-06 convergence level : 3 ... done. >>> bool(convergence) True >>> x.val # approximately zero array([ -6.87299426e-07 -2.06189828e-06]) Attributes ---------- x : field Current field. eggs : function Given the current `x` it returns the tuple of energy and gradient. spam : function Callback function which is given the current `x` and iteration counter each iteration; can be ``None``. a : {4-tuple} Numbers obeying 0 < a1 ~ a2 < 1 ~ a3 < a4 that modify the step widths (default: (0.2,0.5,1,2)). c : {2-tuple} Numbers obeying 0 < c1 < c2 < 1 that specify the Wolfe-conditions (default: (1E-4,0.9)). note : notification Notification instance. """ def __init__(self,eggs,spam=None,a=(0.2,0.5,1,2),c=(1E-4,0.9),note=False): """ Initializes the steepest_descent and sets the attributes (except for `x`). Parameters ---------- eggs : function Given the current `x` it returns the tuple of energy and gradient. spam : function, *optional* Callback function which is given the current `x` and iteration counter each iteration (default: None). a : {4-tuple}, *optional* Numbers obeying 0 < a1 ~ a2 < 1 ~ a3 < a4 that modify the step widths (default: (0.2,0.5,1,2)). c : {2-tuple}, *optional* Numbers obeying 0 < c1 < c2 < 1 that specify the Wolfe-conditions (default: (1E-4,0.9)). note : bool, *optional* Indicates whether notes are printed or not (default: False). """ self.eggs = eggs ## returns energy and gradient self.spam = spam ## serves as callback given x and iteration number self.a = a ## 0 < a1 ~ a2 < 1 ~ a3 < a4 self.c = c ## 0 < c1 < c2 < 1 self.note = notification(default=bool(note)) self._alpha = None ## last alpha ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ def __call__(self,x0,alpha=1,tol=1E-4,clevel=8,limii=100000): """ Runs the steepest descent minimization. Parameters ---------- x0 : field Starting guess for the minimization. alpha : scalar, *optional* Starting step width to be multiplied with normalized gradient (default: 1). tol : scalar, *optional* Tolerance specifying convergence; measured by maximal change in `x` (default: 1E-4). clevel : integer, *optional* Number of times the tolerance should be undershot before exiting (default: 8). limii : integer, *optional* Maximum number of iterations performed (default: 100,000). Returns ------- x : field Latest `x` of the minimization. convergence : integer Latest convergence level indicating whether the minimization has converged or not. """ if(not isinstance(x0,field)): raise TypeError(about._errors.cstring("ERROR: invalid input.")) self.x = x0 ## check for exsisting alpha if(alpha is None): if(self._alpha is not None): alpha = self._alpha else: alpha = 1 clevel = max(1,int(clevel)) limii = int(limii) E,g = self.eggs(self.x) ## energy and gradient norm = g.norm() ## gradient norm if(norm==0): self.note.cprint("\niteration : 00000000 alpha = 0.0E+00 delta = 0.0E+00\n... done.") return self.x,clevel+2 convergence = 0 ii = 1 while(True): x_,E,g,alpha,a = self._get_alpha(E,g,norm,alpha) ## "news",alpha,a if(alpha is None): self.note.cprint("\niteration : %08u alpha < 1.0E-13\n... dead."%ii) break else: delta = abs(g).max()*(alpha/norm) #delta = np.absolute(g.val).max()*(alpha/norm) self.note.cflush("\niteration : %08u alpha = %3.1E delta = %3.1E"%(ii,alpha,delta)) ## update self.x = x_ alpha *= a norm = g.norm() ## gradient norm if(delta==0): convergence = clevel+2 self.note.cprint(" convergence level : %u\n... done."%convergence) break elif(delta determines the new alpha while(True): ## Wolfe conditions wolfe,x_,E_,g_,a = self._check_wolfe(E,g,norm,alpha) # wolfe,x_,E_,g_,a = self._check_strong_wolfe(E,g,norm,alpha) if(wolfe): return x_,E_,g_,alpha,a else: alpha *= a if(alpha<1E-13): return None,None,None,None,None ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ def _check_wolfe(self,E,g,norm,alpha): ## > checks the Wolfe conditions x_ = self._get_x(g,norm,alpha) pg = norm E_,g_ = self.eggs(x_) if(E_>E+self.c[0]*alpha*pg): if(E_ checks the strong Wolfe conditions # x_ = self._get_x(g,norm,alpha) # pg = norm # E_,g_ = self.eggs(x_) # if(E_>E+self.c[0]*alpha*pg): # if(E_self.c[1]*np.absolute(pg)): # return True,x_,E_,g_,self.a[3] # return True,x_,E_,g_,self.a[2] ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ def _get_x(self,g,norm,alpha): ## > updates x return self.x-g*(alpha/norm) ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ def __repr__(self): return "" ##============================================================================= ##============================================================================= class quasi_newton_minimizer(object): """ .. __ .. / / .. _______ ____/ / .. / _____/ / _ / .. /_____ / __ / /_/ / __ .. /_______//__/ \______|/__/ class NIFTY tool class for steepest descent minimization This tool minimizes a scalar energy-function by steepest descent using the functions gradient. Steps and step widths are choosen according to the Wolfe conditions [#]_. For details on usage and output, see the notes below. Parameters ---------- eggs : function Given the current `x` it returns the tuple of energy and gradient. spam : function, *optional* Callback function which is given the current `x` and iteration counter each iteration (default: None). a : {4-tuple}, *optional* Numbers obeying 0 < a1 ~ a2 < 1 ~ a3 < a4 that modify the step widths (default: (0.2,0.5,1,2)). c : {2-tuple}, *optional* Numbers obeying 0 < c1 < c2 < 1 that specify the Wolfe-conditions (default: (1E-4,0.9)). note : bool, *optional* Indicates whether notes are printed or not (default: False). See Also -------- scipy.optimize.fmin_cg, scipy.optimize.fmin_ncg, scipy.optimize.fmin_l_bfgs_b Notes ----- After initialization by `__init__`, the minimizer is started by calling it using `__call__`, which takes additional parameters. Notifications, if enabled, will state the iteration number, current step width `alpha`, current maximal change `delta` that is compared to the tolerance, and the convergence level if changed. The minimizer will exit in three states: DEAD if no step width above 1E-13 is accepted, QUIT if the maximum number of iterations is reached, or DONE if convergence is achieved. Returned will be the latest `x` and the latest convergence level, which can evaluate ``True`` for all exit states. References ---------- .. [#] J. Nocedal and S. J. Wright, Springer 2006, "Numerical Optimization", ISBN: 978-0-387-30303-1 (print) / 978-0-387-40065-5 `(online) `_ Examples -------- >>> def egg(x): ... E = 0.5*x.dot(x) # energy E(x) -- a two-dimensional parabola ... g = x # gradient ... return E,g >>> x = field(point_space(2), val=[1, 3]) >>> x,convergence = steepest_descent(egg, note=True)(x0=x, tol=1E-4, clevel=3) iteration : 00000001 alpha = 1.0E+00 delta = 6.5E-01 iteration : 00000002 alpha = 2.0E+00 delta = 1.4E-01 iteration : 00000003 alpha = 1.6E-01 delta = 2.1E-03 iteration : 00000004 alpha = 2.6E-03 delta = 3.0E-04 iteration : 00000005 alpha = 2.0E-04 delta = 5.3E-05 convergence level : 1 iteration : 00000006 alpha = 8.2E-05 delta = 4.4E-06 convergence level : 2 iteration : 00000007 alpha = 6.6E-06 delta = 3.1E-06 convergence level : 3 ... done. >>> bool(convergence) True >>> x.val # approximately zero array([ -6.87299426e-07 -2.06189828e-06]) Attributes ---------- x : field Current field. eggs : function Given the current `x` it returns the tuple of energy and gradient. spam : function Callback function which is given the current `x` and iteration counter each iteration; can be ``None``. a : {4-tuple} Numbers obeying 0 < a1 ~ a2 < 1 ~ a3 < a4 that modify the step widths (default: (0.2,0.5,1,2)). c : {2-tuple} Numbers obeying 0 < c1 < c2 < 1 that specify the Wolfe-conditions (default: (1E-4,0.9)). note : notification Notification instance. """ def __init__(self, f, fprime, f_args=(), callback=None, c1=1E-4, c2=0.9, note=False): """ Initializes the steepest_descent and sets the attributes (except for `x`). Parameters ---------- eggs : function Given the current `x` it returns the tuple of energy and gradient. spam : function, *optional* Callback function which is given the current `x` and iteration counter each iteration (default: None). a : {4-tuple}, *optional* Numbers obeying 0 < a1 ~ a2 < 1 ~ a3 < a4 that modify the step widths (default: (0.2,0.5,1,2)). c : {2-tuple}, *optional* Numbers obeying 0 < c1 < c2 < 1 that specify the Wolfe-conditions (default: (1E-4,0.9)). note : bool, *optional* Indicates whether notes are printed or not (default: False). """ assert(callable(f)) assert(callable(fprime)) self.f = f self.fprime = fprime self.callback = callback self.line_searcher = line_search_strong_wolfe(f=self.f, fprime=self.fprime, f_args=f_args, c1=c1, c2=c2) self.note = notification(default=bool(note)) self.memory = {} def __call__(self, x0, tol=1E-4, clevel=8, limii=100000): """ Runs the steepest descent minimization. Parameters ---------- x0 : field Starting guess for the minimization. alpha : scalar, *optional* Starting step width to be multiplied with normalized gradient (default: 1). tol : scalar, *optional* Tolerance specifying convergence; measured by maximal change in `x` (default: 1E-4). clevel : integer, *optional* Number of times the tolerance should be undershot before exiting (default: 8). limii : integer, *optional* Maximum number of iterations performed (default: 100,000). Returns ------- x : field Latest `x` of the minimization. convergence : integer Latest convergence level indicating whether the minimization has converged or not. """ if not isinstance(x0, field): raise TypeError(about._errors.cstring("ERROR: invalid input.")) self.x = x0 clevel = max(1, int(clevel)) limii = int(limii) convergence = 0 f_k_minus_1 = None f_k = self.f(self.x) step_length = 0 for i in xrange(limii): if self.callback is not None: try: self.callback(self.x, f_k, i) except StopIteration: self.note.cprint("\nCallback function stopped minization.") break # compute the the gradient for the current x gradient = self.fprime(self.x) gradient_norm = gradient.norm() # check if x is at a flat point if gradient_norm == 0: self.note.cprint( "\niteration : 00000000 step_length = 0.0E+00 " + "delta = 0.0E+00\n... done.") convergence = clevel+2 break descend_direction = self._get_descend_direction(gradient, gradient_norm) # compute the step length, which minimizes f_k along the # search direction = the gradient self.line_searcher.set_coordinates(xk=self.x, pk=descend_direction, f_k=f_k, fprime_k=gradient, f_k_minus_1=f_k_minus_1) f_k_minus_1 = f_k step_length, f_k = self.line_searcher.perform_line_search() if step_length < 1E-13: self.note.cprint( "\niteration : %08u step_length < 1.0E-13\n... dead." % i) break # update x self.x += descend_direction*step_length # check convergence delta = abs(gradient).max() * (step_length/gradient_norm) self.note.cflush( "\niteration : %08u step_length = %3.1E delta = %3.1E" % (i, step_length, delta)) if delta == 0: convergence = clevel + 2 self.note.cprint(" convergence level : %u\n... done." % convergence) break elif delta < tol: convergence += 1 self.note.cflush(" convergence level : %u" % convergence) if convergence == clevel: convergence += int(i == clevel) self.note.cprint("\n... done.") break else: convergence = max(0, convergence-1) else: self.note.cprint("\n... quit.") return self.x, convergence def _get_descend_direction(self, gradient, gradient_norm): raise NotImplementedError def __repr__(self): return "" class steepest_descent_new(quasi_newton_minimizer): def _get_descend_direction(self, gradient, gradient_norm): return gradient/(-gradient_norm) class lbfgs(quasi_newton_minimizer): def __init__(self, *args, **kwargs): super(lbfgs, self).__init__(*args, **kwargs) # self.memory... def _get_descend_direction(self, gradient, gradient_norm): pass # ----------------------------------------------------------------------------- # Pure-Python Wolfe line and scalar searches # Based on scipy implementation # ----------------------------------------------------------------------------- class line_search_strong_wolfe(object): """ Class for finding a step size that satisfies the strong Wolfe conditions. """ def __init__(self, f, fprime, f_args=(), c1=1e-4, c2=0.9, max_step_size=50): """ Parameters ---------- f : callable f(x, *args) Objective function. fprime : callable f'(x, *args) Objective functions gradient. f_args : tuple (optional) Additional arguments passed to objective function and its derivation. c1 : float (optional) Parameter for Armijo condition rule. c2 : float (optional) Parameter for curvature condition rule. max_step_size : float (optional) Maximum step size """ assert(callable(f)) assert(callable(fprime)) self.c1 = np.float(c1) self.c2 = np.float(c2) self.max_step_size = max_step_size self.max_zoom_iterations = 100 self.f = f self.fprime = fprime self.f_args = f_args self.xk = None self.pk = None self.f_k = None self.f_k_minus_1 = None self.fprime_k = None self._last_alpha_star = 1. def set_coordinates(self, xk, pk, f_k=None, fprime_k=None, f_k_minus_1=None): """ Set the coordinates for a new line search. Parameters ---------- xk : ndarray, d2o, field Starting point. pk : ndarray, d2o, field Unit vector in search direction. f_k : float (optional) Function value f(x_k). fprime_k : ndarray, d2o, field (optional) Function value fprime(xk). f_k_minus_1 : None, float (optional) Function value f(x_k-1). """ self.xk = xk self.pk = pk if f_k is None: self.f_k = self.f(xk) else: self.f_k = f_k if fprime_k is None: self.fprime_k = self.fprime(xk) else: self.fprime_k = fprime_k self.f_k_minus_1 = f_k_minus_1 def _phi(self, alpha): if alpha == 0: value = self.f_k else: value = self.f(self.xk + self.pk*alpha, *self.f_args) return value def _phiprime(self, alpha): if alpha == 0: gradient = self.fprime_k else: gradient = self.fprime(self.xk + self.pk*alpha, *self.f_args) return vdot(gradient, self.pk) def perform_line_search(self, c1=None, c2=None, max_step_size=None, max_iterations=10): if c1 is None: c1 = self.c1 if c2 is None: c2 = self.c2 if max_step_size is None: max_step_size = self.max_step_size # initialize the zero phis old_phi_0 = self.f_k_minus_1 phi_0 = self._phi(0.) phiprime_0 = self._phiprime(0.) if phiprime_0 == 0: about.warnings.cprint( "WARNING: Flat gradient in search direction.") return 0., 0. # set alphas alpha0 = 0. if old_phi_0 is not None and phiprime_0 != 0: alpha1 = min(1.0, 1.01*2*(phi_0 - old_phi_0)/phiprime_0) if alpha1 < 0: alpha1 = 1.0 else: alpha1 = 1.0 # alpha1 = 1. # alpha1 = 1.01*self._last_alpha_star # give the alpha0 phis the right init value phi_alpha0 = phi_0 phiprime_alpha0 = phiprime_0 # start the minimization loop for i in xrange(max_iterations): phi_alpha1 = self._phi(alpha1) if alpha1 == 0: about.warnings.cprint("WARNING: Increment size became 0.") alpha_star = 0. phi_star = phi_0 break if (phi_alpha1 > phi_0 + c1*alpha1*phiprime_0) or \ ((phi_alpha1 >= phi_alpha0) and (i > 1)): (alpha_star, phi_star) = self._zoom(alpha0, alpha1, phi_0, phiprime_0, phi_alpha0, phiprime_alpha0, phi_alpha1, c1, c2) break phiprime_alpha1 = self._phiprime(alpha1) if abs(phiprime_alpha1) <= -c2*phiprime_0: alpha_star = alpha1 phi_star = phi_alpha1 break if phiprime_alpha1 >= 0: (alpha_star, phi_star) = self._zoom(alpha1, alpha0, phi_0, phiprime_0, phi_alpha1, phiprime_alpha1, phi_alpha0, c1, c2) break # update alphas alpha0, alpha1 = alpha1, min(2*alpha1, max_step_size) phi_alpha0 = phi_alpha1 phiprime_alpha0 = phiprime_alpha1 # phi_alpha1 = self._phi(alpha1) else: # max_iterations was reached alpha_star = alpha1 phi_star = phi_alpha1 about.warnings.cprint( "WARNING: The line search algorithm did not converge.") self._last_alpha_star = alpha_star return alpha_star, phi_star def _zoom(self, alpha_lo, alpha_hi, phi_0, phiprime_0, phi_lo, phiprime_lo, phi_hi, c1, c2): max_iterations = self.max_zoom_iterations # define the cubic and quadratic interpolant checks cubic_delta = 0.2 # cubic quad_delta = 0.1 # quadratic # initialize the most recent versions (j-1) of phi and alpha alpha_recent = 0 phi_recent = phi_0 for i in xrange(max_iterations): delta_alpha = alpha_hi - alpha_lo if delta_alpha < 0: a, b = alpha_hi, alpha_lo else: a, b = alpha_lo, alpha_hi # Try cubic interpolation if i > 0: cubic_check = cubic_delta * delta_alpha alpha_j = self._cubicmin(alpha_lo, phi_lo, phiprime_lo, alpha_hi, phi_hi, alpha_recent, phi_recent) # If cubic was not successful or not available, try quadratic if (i == 0) or (alpha_j is None) or (alpha_j > b - cubic_check) or\ (alpha_j < a + cubic_check): quad_check = quad_delta * delta_alpha alpha_j = self._quadmin(alpha_lo, phi_lo, phiprime_lo, alpha_hi, phi_hi) # If quadratic was not successfull, try bisection if (alpha_j is None) or (alpha_j > b - quad_check) or \ (alpha_j < a + quad_check): alpha_j = alpha_lo + 0.5*delta_alpha # Check if the current value of alpha_j is already sufficient phi_alphaj = self._phi(alpha_j) # If the first Wolfe condition is not met replace alpha_hi # by alpha_j if (phi_alphaj > phi_0 + c1*alpha_j*phiprime_0) or\ (phi_alphaj >= phi_lo): alpha_recent, phi_recent = alpha_hi, phi_hi alpha_hi, phi_hi = alpha_j, phi_alphaj else: phiprime_alphaj = self._phiprime(alpha_j) # If the second Wolfe condition is met, return the result if abs(phiprime_alphaj) <= -c2*phiprime_0: alpha_star = alpha_j phi_star = phi_alphaj break # If not, check the sign of the slope if phiprime_alphaj*delta_alpha >= 0: alpha_recent, phi_recent = alpha_hi, phi_hi alpha_hi, phi_hi = alpha_lo, phi_lo else: alpha_recent, phi_recent = alpha_lo, phi_lo # Replace alpha_lo by alpha_j (alpha_lo, phi_lo, phiprime_lo) = (alpha_j, phi_alphaj, phiprime_alphaj) else: alpha_star, phi_star = alpha_j, phi_alphaj about.warnings.cprint( "WARNING: The line search algorithm (zoom) did not converge.") return alpha_star, phi_star def _cubicmin(self, a, fa, fpa, b, fb, c, fc): """ Finds the minimizer for a cubic polynomial that goes through the points (a,fa), (b,fb), and (c,fc) with derivative at a of fpa. If no minimizer can be found return None """ # f(x) = A *(x-a)^3 + B*(x-a)^2 + C*(x-a) + D with np.errstate(divide='raise', over='raise', invalid='raise'): try: C = fpa db = b - a dc = c - a denom = (db * dc) ** 2 * (db - dc) d1 = np.empty((2, 2)) d1[0, 0] = dc ** 2 d1[0, 1] = -db ** 2 d1[1, 0] = -dc ** 3 d1[1, 1] = db ** 3 [A, B] = np.dot(d1, np.asarray([fb - fa - C * db, fc - fa - C * dc]).flatten()) A /= denom B /= denom radical = B * B - 3 * A * C xmin = a + (-B + np.sqrt(radical)) / (3 * A) except ArithmeticError: return None if not np.isfinite(xmin): return None return xmin def _quadmin(self, a, fa, fpa, b, fb): """ Finds the minimizer for a quadratic polynomial that goes through the points (a,fa), (b,fb) with derivative at a of fpa, """ # f(x) = B*(x-a)^2 + C*(x-a) + D with np.errstate(divide='raise', over='raise', invalid='raise'): try: D = fa C = fpa db = b - a * 1.0 B = (fb - D - C * db) / (db * db) xmin = a - C / (2.0 * B) except ArithmeticError: return None if not np.isfinite(xmin): return None return xmin class cyclic_storage(object): pass