diff --git a/demos/critical_filtering.py b/demos/critical_filtering.py index edf136e3bb990faa50e36c6ee1594cb98f21ec36..4337999cd0e570fd965b49d3aab0245e3104b568 100644 --- a/demos/critical_filtering.py +++ b/demos/critical_filtering.py @@ -1,17 +1,21 @@ -from nifty import * -from nifty.library.wiener_filter import WienerFilterEnergy +import numpy as np +from nifty import (VL_BFGS, DiagonalOperator, FFTOperator, Field, + LinearOperator, PowerSpace, RelaxedNewton, RGSpace, + SteepestDescent, create_power_operator, exp, log, sqrt) from nifty.library.critical_filter import CriticalPowerEnergy -import plotly.offline as pl -import plotly.graph_objs as go +from nifty.library.wiener_filter import WienerFilterEnergy +import plotly.graph_objs as go +import plotly.offline as pl from mpi4py import MPI + comm = MPI.COMM_WORLD rank = comm.rank np.random.seed(42) -def plot_parameters(m,t,p, p_d): +def plot_parameters(m, t, p, p_d): x = log(t.domain[0].kindex) m = fft.adjoint_times(m) @@ -20,7 +24,8 @@ def plot_parameters(m,t,p, p_d): p = p.val.get_full_data().real p_d = p_d.val.get_full_data().real pl.plot([go.Heatmap(z=m)], filename='map.html') - pl.plot([go.Scatter(x=x,y=t), go.Scatter(x=x ,y=p), go.Scatter(x=x, y=p_d)], filename="t.html") + pl.plot([go.Scatter(x=x, y=t), go.Scatter(x=x, y=p), + go.Scatter(x=x, y=p_d)], filename="t.html") class AdjointFFTResponse(LinearOperator): @@ -36,6 +41,7 @@ class AdjointFFTResponse(LinearOperator): def _adjoint_times(self, x, spaces=None): return self.FFT(self.R.adjoint_times(x)) + @property def domain(self): return self._domain @@ -48,41 +54,40 @@ class AdjointFFTResponse(LinearOperator): def unitary(self): return False + if __name__ == "__main__": distribution_strategy = 'not' # Set up position space - s_space = RGSpace([128,128]) + s_space = RGSpace([128, 128]) # s_space = HPSpace(32) # Define harmonic transformation and associated harmonic space fft = FFTOperator(s_space) h_space = fft.target[0] - # Setting up power space + # Set up power space p_space = PowerSpace(h_space, logarithmic=True, distribution_strategy=distribution_strategy) - # Choosing the prior correlation structure and defining correlation operator + # Choose the prior correlation structure and defining correlation operator p_spec = (lambda k: (.5 / (k + 1) ** 3)) S = create_power_operator(h_space, power_spectrum=p_spec, distribution_strategy=distribution_strategy) - # Drawing a sample sh from the prior distribution in harmonic space + # Draw a sample sh from the prior distribution in harmonic space sp = Field(p_space, val=p_spec, distribution_strategy=distribution_strategy) sh = sp.power_synthesize(real_signal=True) - - # Choosing the measurement instrument + # Choose the measurement instrument # Instrument = SmoothingOperator(s_space, sigma=0.01) Instrument = DiagonalOperator(s_space, diagonal=1.) # Instrument._diagonal.val[200:400, 200:400] = 0 - #Instrument._diagonal.val[64:512-64, 64:512-64] = 0 - + # Instrument._diagonal.val[64:512-64, 64:512-64] = 0 - #Adding a harmonic transformation to the instrument + # Add a harmonic transformation to the instrument R = AdjointFFTResponse(fft, Instrument) noise = 1. @@ -92,7 +97,7 @@ if __name__ == "__main__": std=sqrt(noise), mean=0) - # Creating the mock data + # Create mock data d = R(sh) + n # The information source @@ -103,52 +108,49 @@ if __name__ == "__main__": if rank == 0: pl.plot([go.Heatmap(z=d_data)], filename='data.html') - # minimization strategy - - def convergence_measure(a_energy, iteration): # returns current energy + # Minimization strategy + def convergence_measure(a_energy, iteration): # returns current energy x = a_energy.value - print (x, iteration) - - - minimizer1 = RelaxedNewton(convergence_tolerance=1e-2, - convergence_level=2, - iteration_limit=3, - callback=convergence_measure) - - minimizer2 = VL_BFGS(convergence_tolerance=0, - iteration_limit=7, - callback=convergence_measure, - max_history_length=3) - - # Setting starting position - flat_power = Field(p_space,val=1e-8) + print(x, iteration) + + minimizer1 = RelaxedNewton(convergence_tolerance=1e-4, + convergence_level=1, + iteration_limit=5, + callback=convergence_measure) + minimizer2 = VL_BFGS(convergence_tolerance=1e-4, + convergence_level=1, + iteration_limit=20, + callback=convergence_measure, + max_history_length=20) + minimizer3 = SteepestDescent(convergence_tolerance=1e-4, + iteration_limit=100, + callback=convergence_measure) + + # Set starting position + flat_power = Field(p_space, val=1e-8) m0 = flat_power.power_synthesize(real_signal=True) t0 = Field(p_space, val=log(1./(1+p_space.kindex)**2)) - - for i in range(500): S0 = create_power_operator(h_space, power_spectrum=exp(t0), - distribution_strategy=distribution_strategy) + distribution_strategy=distribution_strategy) - # Initializing the nonlinear Wiener Filter energy + # Initialize non-linear Wiener Filter energy map_energy = WienerFilterEnergy(position=m0, d=d, R=R, N=N, S=S0) - # Solving the Wiener Filter analytically + # Solve the Wiener Filter analytically D0 = map_energy.curvature m0 = D0.inverse_times(j) - # Initializing the power energy with updated parameters - power_energy = CriticalPowerEnergy(position=t0, m=m0, D=D0, smoothness_prior=10., samples=3) - - (power_energy, convergence) = minimizer1(power_energy) - - - # Setting new power spectrum - t0.val = power_energy.position.val.real + # Initialize power energy with updated parameters + power_energy = CriticalPowerEnergy(position=t0, m=m0, D=D0, + smoothness_prior=10., samples=3) - # Plotting current estimate - print i - if i%50 == 0: - plot_parameters(m0,t0,log(sp), data_power) + (power_energy, convergence) = minimizer2(power_energy) + # Set new power spectrum + t0.val = power_energy.position.val.real + # Plot current estimate + print(i) + if i % 5 == 0: + plot_parameters(m0, t0, log(sp), data_power) diff --git a/nifty/energies/line_energy.py b/nifty/energies/line_energy.py index fe0c1ec1e6ba4753b7a0b10018ca801e154e0ae6..1deb6fd9de808d57b3f4fec78a62b736e393cd63 100644 --- a/nifty/energies/line_energy.py +++ b/nifty/energies/line_energy.py @@ -16,10 +16,8 @@ # NIFTy is being developed at the Max-Planck-Institut fuer Astrophysik # and financially supported by the Studienstiftung des deutschen Volkes. -from .energy import Energy - -class LineEnergy(Energy): +class LineEnergy(object): """ Evaluates an underlying Energy along a certain line direction. Given an Energy class and a line direction, its position is parametrized by @@ -27,34 +25,31 @@ class LineEnergy(Energy): Parameters ---------- - position : float - The step length parameter along the given line direction. + line_position : float + Defines the full spatial position of this energy via + self.energy.position = zero_point + line_position*line_direction energy : Energy The Energy object which will be evaluated along the given direction. line_direction : Field - Direction used for line evaluation. - zero_point : Field *optional* - Fixing the zero point of the line restriction. Used to memorize this - position in new initializations. By the default the current position - of the supplied `energy` instance is used (default : None). + Direction used for line evaluation. Does not have to be normalized. + offset : float *optional* + Indirectly defines the zero point of the line via the equation + energy.position = zero_point + offset*line_direction + (default : 0.). Attributes ---------- - position : float + line_position : float The position along the given line direction relative to the zero point. value : float - The value of the energy functional at given `position`. - gradient : float - The gradient of the underlying energy instance along the line direction - projected on the line direction. - curvature : callable - A positive semi-definite operator or function describing the curvature - of the potential at given `position`. + The value of the energy functional at the given position + directional_derivative : float + The directional derivative at the given position line_direction : Field Direction along which the movement is restricted. Does not have to be normalized. energy : Energy - The underlying Energy at the `position` along the line direction. + The underlying Energy at the given position Raises ------ @@ -72,45 +67,49 @@ class LineEnergy(Energy): """ - def __init__(self, position, energy, line_direction, zero_point=None): - super(LineEnergy, self).__init__(position=position) - self.line_direction = line_direction - - if zero_point is None: - zero_point = energy.position - self._zero_point = zero_point + def __init__(self, line_position, energy, line_direction, offset=0.): + self._line_position = float(line_position) + self._line_direction = line_direction - position_on_line = self._zero_point + self.position*line_direction - self.energy = energy.at(position=position_on_line) + pos = energy.position \ + + (self._line_position-float(offset))*self._line_direction + self.energy = energy.at(position=pos) - def at(self, position): + def at(self, line_position): """ Returns LineEnergy at new position, memorizing the zero point. Parameters ---------- - position : float + line_position : float Parameter for the new position on the line direction. Returns ------- - out : LineEnergy LineEnergy object at new position with same zero point as `self`. """ - return self.__class__(position, + return self.__class__(line_position, self.energy, self.line_direction, - zero_point=self._zero_point) + offset=self.line_position) @property def value(self): return self.energy.value @property - def gradient(self): - return self.energy.gradient.vdot(self.line_direction) + def line_position(self): + return self._line_position + + @property + def line_direction(self): + return self._line_direction @property - def curvature(self): - return self.energy.curvature + def directional_derivative(self): + res = self.energy.gradient.vdot(self.line_direction) + if abs(res.imag) / max(abs(res.real), 1.) > 1e-12: + print "directional derivative has non-negligible " \ + "imaginary part:", res + return res.real diff --git a/nifty/minimization/descent_minimizer.py b/nifty/minimization/descent_minimizer.py index d65b9d86b23151c8c870c2245d157033dbe5c6db..1c1d49b5a8a9777d2762045087bab633cb98566e 100644 --- a/nifty/minimization/descent_minimizer.py +++ b/nifty/minimization/descent_minimizer.py @@ -137,7 +137,7 @@ class DescentMinimizer(Loggable, object): # compute the the gradient for the current location gradient = energy.gradient - gradient_norm = gradient.vdot(gradient) + gradient_norm = gradient.norm() # check if position is at a flat point if gradient_norm == 0: @@ -147,7 +147,6 @@ class DescentMinimizer(Loggable, object): # current position is encoded in energy object descent_direction = self.get_descent_direction(energy) - # compute the step length, which minimizes energy.value along the # search direction step_length, f_k, new_energy = \ @@ -155,8 +154,12 @@ class DescentMinimizer(Loggable, object): energy=energy, pk=descent_direction, f_k_minus_1=f_k_minus_1) + if f_k_minus_1 is None: + delta=1e30 + else: + delta = abs(f_k -f_k_minus_1)/max(abs(f_k),abs(f_k_minus_1),1.) f_k_minus_1 = energy.value - + tx1=energy.position-new_energy.position # check if new energy value is bigger than old energy value if (new_energy.value - energy.value) > 0: self.logger.info("Line search algorithm returned a new energy " @@ -165,7 +168,6 @@ class DescentMinimizer(Loggable, object): energy = new_energy # check convergence - delta = abs(gradient).max() * (step_length/np.sqrt(gradient_norm)) self.logger.debug("Iteration:%08u step_length=%3.1E " "delta=%3.1E energy=%3.1E" % (iteration_number, step_length, delta, diff --git a/nifty/minimization/line_searching/line_search.py b/nifty/minimization/line_searching/line_search.py index 93ede56af96a27ca4c0fd38c5bd4e730fd807f73..92c20485edf07060ba231ea818ffb5313f3dba3f 100644 --- a/nifty/minimization/line_searching/line_search.py +++ b/nifty/minimization/line_searching/line_search.py @@ -63,7 +63,7 @@ class LineSearch(Loggable, object): iteration of the line search procedure. (Default: None) """ - self.line_energy = LineEnergy(position=0., + self.line_energy = LineEnergy(line_position=0., energy=energy, line_direction=pk) if f_k_minus_1 is not None: diff --git a/nifty/minimization/line_searching/line_search_strong_wolfe.py b/nifty/minimization/line_searching/line_search_strong_wolfe.py index 9a4941e34de94096af886aa67419f1bb1c286257..7be46724fdf25a1fce6a3dbe03ed15e97d2d72a0 100644 --- a/nifty/minimization/line_searching/line_search_strong_wolfe.py +++ b/nifty/minimization/line_searching/line_search_strong_wolfe.py @@ -62,8 +62,8 @@ class LineSearchStrongWolfe(LineSearch): """ def __init__(self, c1=1e-4, c2=0.9, - max_step_size=50, max_iterations=10, - max_zoom_iterations=10): + max_step_size=1000000000, max_iterations=100, + max_zoom_iterations=30): super(LineSearchStrongWolfe, self).__init__() @@ -103,20 +103,15 @@ class LineSearchStrongWolfe(LineSearch): """ self._set_line_energy(energy, pk, f_k_minus_1=f_k_minus_1) - c1 = self.c1 - c2 = self.c2 max_step_size = self.max_step_size max_iterations = self.max_iterations # initialize the zero phis old_phi_0 = self.f_k_minus_1 - energy_0 = self.line_energy.at(0) - phi_0 = energy_0.value - phiprime_0 = energy_0.gradient - - if phiprime_0 == 0: - self.logger.warn("Flat gradient in search direction.") - return 0., 0. + le_0 = self.line_energy.at(0) + phi_0 = le_0.value + phiprime_0 = le_0.directional_derivative + assert phiprime_0<0, "input direction must be a descent direction" # set alphas alpha0 = 0. @@ -135,64 +130,67 @@ class LineSearchStrongWolfe(LineSearch): # start the minimization loop for i in xrange(max_iterations): - energy_alpha1 = self.line_energy.at(alpha1) - phi_alpha1 = energy_alpha1.value + le_alpha1 = self.line_energy.at(alpha1) + phi_alpha1 = le_alpha1.value if alpha1 == 0: self.logger.warn("Increment size became 0.") alpha_star = 0. phi_star = phi_0 - energy_star = energy_0 + le_star = le_0 break - if (phi_alpha1 > phi_0 + c1*alpha1*phiprime_0) or \ - ((phi_alpha1 >= phi_alpha0) and (i > 1)): - (alpha_star, phi_star, energy_star) = self._zoom( + if (phi_alpha1 > phi_0 + self.c1*alpha1*phiprime_0) or \ + ((phi_alpha1 >= phi_alpha0) and (i > 0)): + (alpha_star, phi_star, le_star) = self._zoom( alpha0, alpha1, phi_0, phiprime_0, phi_alpha0, phiprime_alpha0, - phi_alpha1, - c1, c2) + phi_alpha1) break - phiprime_alpha1 = energy_alpha1.gradient - if abs(phiprime_alpha1) <= -c2*phiprime_0: + phiprime_alpha1 = le_alpha1.directional_derivative + if abs(phiprime_alpha1) <= -self.c2*phiprime_0: alpha_star = alpha1 phi_star = phi_alpha1 - energy_star = energy_alpha1 + le_star = le_alpha1 break if phiprime_alpha1 >= 0: - (alpha_star, phi_star, energy_star) = self._zoom( + (alpha_star, phi_star, le_star) = self._zoom( alpha1, alpha0, phi_0, phiprime_0, phi_alpha1, phiprime_alpha1, - phi_alpha0, - c1, c2) + phi_alpha0) break # update alphas alpha0, alpha1 = alpha1, min(2*alpha1, max_step_size) + if alpha1 == max_step_size: + print "reached max step size, bailing out" + alpha_star = alpha1 + phi_star = phi_alpha1 + le_star = le_alpha1 + break phi_alpha0 = phi_alpha1 phiprime_alpha0 = phiprime_alpha1 - # phi_alpha1 = self._phi(alpha1) else: # max_iterations was reached alpha_star = alpha1 phi_star = phi_alpha1 - energy_star = energy_alpha1 + le_star = le_alpha1 self.logger.error("The line search algorithm did not converge.") # extract the full energy from the line_energy - energy_star = energy_star.energy + energy_star = le_star.energy direction_length = pk.norm() step_length = alpha_star * direction_length return step_length, phi_star, energy_star def _zoom(self, alpha_lo, alpha_hi, phi_0, phiprime_0, - phi_lo, phiprime_lo, phi_hi, c1, c2): + phi_lo, phiprime_lo, phi_hi): """Performs the second stage of the line search algorithm. If the first stage was not successful then the Zoom algorithm tries to @@ -202,9 +200,10 @@ class LineSearchStrongWolfe(LineSearch): Parameters ---------- alpha_lo : float - The lower boundary for the step length interval. - alph_hi : float - The upper boundary for the step length interval. + A boundary for the step length interval. + Fulfills Wolfe condition 1. + alpha_hi : float + The other boundary for the step length interval. phi_0 : float Value of the energy at the starting point of the line search algorithm. @@ -219,10 +218,6 @@ class LineSearchStrongWolfe(LineSearch): phi_hi : float Value of the energy if we perform a step of length alpha_hi in descent direction. - c1 : float - Parameter for Armijo condition rule. - c2 : float - Parameter for curvature condition rule. Returns ------- @@ -238,12 +233,13 @@ class LineSearchStrongWolfe(LineSearch): # define the cubic and quadratic interpolant checks cubic_delta = 0.2 # cubic quad_delta = 0.1 # quadratic + phiprime_alphaj = 0. - # initialize the most recent versions (j-1) of phi and alpha - alpha_recent = 0 - phi_recent = phi_0 - + assert phi_lo <= phi_0 + self.c1*alpha_lo*phiprime_0 + assert phiprime_lo*(alpha_hi-alpha_lo)<0. for i in xrange(max_iterations): + #assert phi_lo <= phi_0 + self.c1*alpha_lo*phiprime_0 + #assert phiprime_lo*(alpha_hi-alpha_lo)<0. delta_alpha = alpha_hi - alpha_lo if delta_alpha < 0: a, b = alpha_hi, alpha_lo @@ -262,28 +258,28 @@ class LineSearchStrongWolfe(LineSearch): quad_check = quad_delta * delta_alpha alpha_j = self._quadmin(alpha_lo, phi_lo, phiprime_lo, alpha_hi, phi_hi) - # If quadratic was not successfull, try bisection + # If quadratic was not successful, try bisection if (alpha_j is None) or (alpha_j > b - quad_check) or \ (alpha_j < a + quad_check): alpha_j = alpha_lo + 0.5*delta_alpha # Check if the current value of alpha_j is already sufficient - energy_alphaj = self.line_energy.at(alpha_j) - phi_alphaj = energy_alphaj.value + le_alphaj = self.line_energy.at(alpha_j) + phi_alphaj = le_alphaj.value # If the first Wolfe condition is not met replace alpha_hi # by alpha_j - if (phi_alphaj > phi_0 + c1*alpha_j*phiprime_0) or\ + if (phi_alphaj > phi_0 + self.c1*alpha_j*phiprime_0) or\ (phi_alphaj >= phi_lo): alpha_recent, phi_recent = alpha_hi, phi_hi alpha_hi, phi_hi = alpha_j, phi_alphaj else: - phiprime_alphaj = energy_alphaj.gradient + phiprime_alphaj = le_alphaj.directional_derivative # If the second Wolfe condition is met, return the result - if abs(phiprime_alphaj) <= -c2*phiprime_0: + if abs(phiprime_alphaj) <= -self.c2*phiprime_0: alpha_star = alpha_j phi_star = phi_alphaj - energy_star = energy_alphaj + le_star = le_alphaj break # If not, check the sign of the slope if phiprime_alphaj*delta_alpha >= 0: @@ -296,12 +292,12 @@ class LineSearchStrongWolfe(LineSearch): phiprime_alphaj) else: - alpha_star, phi_star, energy_star = \ - alpha_j, phi_alphaj, energy_alphaj + alpha_star, phi_star, le_star = \ + alpha_j, phi_alphaj, le_alphaj self.logger.error("The line search algorithm (zoom) did not " "converge.") - return alpha_star, phi_star, energy_star + return alpha_star, phi_star, le_star def _cubicmin(self, a, fa, fpa, b, fb, c, fc): """Estimating the minimum with cubic interpolation. diff --git a/test/test_minimization/test_descent_minimizers.py b/test/test_minimization/test_descent_minimizers.py index fcffa73d7698faec737e979f35faf5a1a4404898..81daf7eb5609b24cbdd2be30fcf06cbd80b4a81f 100644 --- a/test/test_minimization/test_descent_minimizers.py +++ b/test/test_minimization/test_descent_minimizers.py @@ -43,7 +43,8 @@ class Test_DescentMinimizers(unittest.TestCase): covariance = DiagonalOperator(space, diagonal=covariance_diagonal) energy = QuadraticPotential(position=starting_point, eigenvalues=covariance) - minimizer = minimizer_class(iteration_limit=30) + minimizer = minimizer_class(iteration_limit=30, + convergence_tolerance=1e-10) (energy, convergence) = minimizer(energy)