Commit f51e2814 authored by Theo Steininger's avatar Theo Steininger

Merge branch 'line_search' into 'master'

Line search

See merge request !166
parents 0808fda1 570a1ddd
Pipeline #15535 passed with stages
in 13 minutes and 17 seconds
from nifty import *
from nifty.library.wiener_filter import WienerFilterEnergy
import numpy as np
from nifty import (VL_BFGS, DiagonalOperator, FFTOperator, Field,
LinearOperator, PowerSpace, RelaxedNewton, RGSpace,
SteepestDescent, create_power_operator, exp, log, sqrt)
from nifty.library.critical_filter import CriticalPowerEnergy
import plotly.offline as pl
import plotly.graph_objs as go
from nifty.library.wiener_filter import WienerFilterEnergy
import plotly.graph_objs as go
import plotly.offline as pl
from mpi4py import MPI
comm = MPI.COMM_WORLD
rank = comm.rank
np.random.seed(42)
def plot_parameters(m,t,p, p_d):
def plot_parameters(m, t, p, p_d):
x = log(t.domain[0].kindex)
m = fft.adjoint_times(m)
......@@ -20,7 +24,8 @@ def plot_parameters(m,t,p, p_d):
p = p.val.get_full_data().real
p_d = p_d.val.get_full_data().real
pl.plot([go.Heatmap(z=m)], filename='map.html')
pl.plot([go.Scatter(x=x,y=t), go.Scatter(x=x ,y=p), go.Scatter(x=x, y=p_d)], filename="t.html")
pl.plot([go.Scatter(x=x, y=t), go.Scatter(x=x, y=p),
go.Scatter(x=x, y=p_d)], filename="t.html")
class AdjointFFTResponse(LinearOperator):
......@@ -36,6 +41,7 @@ class AdjointFFTResponse(LinearOperator):
def _adjoint_times(self, x, spaces=None):
return self.FFT(self.R.adjoint_times(x))
@property
def domain(self):
return self._domain
......@@ -48,41 +54,40 @@ class AdjointFFTResponse(LinearOperator):
def unitary(self):
return False
if __name__ == "__main__":
distribution_strategy = 'not'
# Set up position space
s_space = RGSpace([128,128])
s_space = RGSpace([128, 128])
# s_space = HPSpace(32)
# Define harmonic transformation and associated harmonic space
fft = FFTOperator(s_space)
h_space = fft.target[0]
# Setting up power space
# Set up power space
p_space = PowerSpace(h_space, logarithmic=True,
distribution_strategy=distribution_strategy)
# Choosing the prior correlation structure and defining correlation operator
# Choose the prior correlation structure and defining correlation operator
p_spec = (lambda k: (.5 / (k + 1) ** 3))
S = create_power_operator(h_space, power_spectrum=p_spec,
distribution_strategy=distribution_strategy)
# Drawing a sample sh from the prior distribution in harmonic space
# Draw a sample sh from the prior distribution in harmonic space
sp = Field(p_space, val=p_spec,
distribution_strategy=distribution_strategy)
sh = sp.power_synthesize(real_signal=True)
# Choosing the measurement instrument
# Choose the measurement instrument
# Instrument = SmoothingOperator(s_space, sigma=0.01)
Instrument = DiagonalOperator(s_space, diagonal=1.)
# Instrument._diagonal.val[200:400, 200:400] = 0
#Instrument._diagonal.val[64:512-64, 64:512-64] = 0
# Instrument._diagonal.val[64:512-64, 64:512-64] = 0
#Adding a harmonic transformation to the instrument
# Add a harmonic transformation to the instrument
R = AdjointFFTResponse(fft, Instrument)
noise = 1.
......@@ -92,7 +97,7 @@ if __name__ == "__main__":
std=sqrt(noise),
mean=0)
# Creating the mock data
# Create mock data
d = R(sh) + n
# The information source
......@@ -103,52 +108,49 @@ if __name__ == "__main__":
if rank == 0:
pl.plot([go.Heatmap(z=d_data)], filename='data.html')
# minimization strategy
def convergence_measure(a_energy, iteration): # returns current energy
# Minimization strategy
def convergence_measure(a_energy, iteration): # returns current energy
x = a_energy.value
print (x, iteration)
minimizer1 = RelaxedNewton(convergence_tolerance=1e-2,
convergence_level=2,
iteration_limit=3,
callback=convergence_measure)
minimizer2 = VL_BFGS(convergence_tolerance=0,
iteration_limit=7,
callback=convergence_measure,
max_history_length=3)
# Setting starting position
flat_power = Field(p_space,val=1e-8)
print(x, iteration)
minimizer1 = RelaxedNewton(convergence_tolerance=1e-4,
convergence_level=1,
iteration_limit=5,
callback=convergence_measure)
minimizer2 = VL_BFGS(convergence_tolerance=1e-4,
convergence_level=1,
iteration_limit=20,
callback=convergence_measure,
max_history_length=20)
minimizer3 = SteepestDescent(convergence_tolerance=1e-4,
iteration_limit=100,
callback=convergence_measure)
# Set starting position
flat_power = Field(p_space, val=1e-8)
m0 = flat_power.power_synthesize(real_signal=True)
t0 = Field(p_space, val=log(1./(1+p_space.kindex)**2))
for i in range(500):
S0 = create_power_operator(h_space, power_spectrum=exp(t0),
distribution_strategy=distribution_strategy)
distribution_strategy=distribution_strategy)
# Initializing the nonlinear Wiener Filter energy
# Initialize non-linear Wiener Filter energy
map_energy = WienerFilterEnergy(position=m0, d=d, R=R, N=N, S=S0)
# Solving the Wiener Filter analytically
# Solve the Wiener Filter analytically
D0 = map_energy.curvature
m0 = D0.inverse_times(j)
# Initializing the power energy with updated parameters
power_energy = CriticalPowerEnergy(position=t0, m=m0, D=D0, smoothness_prior=10., samples=3)
(power_energy, convergence) = minimizer1(power_energy)
# Setting new power spectrum
t0.val = power_energy.position.val.real
# Initialize power energy with updated parameters
power_energy = CriticalPowerEnergy(position=t0, m=m0, D=D0,
smoothness_prior=10., samples=3)
# Plotting current estimate
print i
if i%50 == 0:
plot_parameters(m0,t0,log(sp), data_power)
(power_energy, convergence) = minimizer2(power_energy)
# Set new power spectrum
t0.val = power_energy.position.val.real
# Plot current estimate
print(i)
if i % 5 == 0:
plot_parameters(m0, t0, log(sp), data_power)
......@@ -16,10 +16,8 @@
# NIFTy is being developed at the Max-Planck-Institut fuer Astrophysik
# and financially supported by the Studienstiftung des deutschen Volkes.
from .energy import Energy
class LineEnergy(Energy):
class LineEnergy(object):
""" Evaluates an underlying Energy along a certain line direction.
Given an Energy class and a line direction, its position is parametrized by
......@@ -27,34 +25,31 @@ class LineEnergy(Energy):
Parameters
----------
position : float
The step length parameter along the given line direction.
line_position : float
Defines the full spatial position of this energy via
self.energy.position = zero_point + line_position*line_direction
energy : Energy
The Energy object which will be evaluated along the given direction.
line_direction : Field
Direction used for line evaluation.
zero_point : Field *optional*
Fixing the zero point of the line restriction. Used to memorize this
position in new initializations. By the default the current position
of the supplied `energy` instance is used (default : None).
Direction used for line evaluation. Does not have to be normalized.
offset : float *optional*
Indirectly defines the zero point of the line via the equation
energy.position = zero_point + offset*line_direction
(default : 0.).
Attributes
----------
position : float
line_position : float
The position along the given line direction relative to the zero point.
value : float
The value of the energy functional at given `position`.
gradient : float
The gradient of the underlying energy instance along the line direction
projected on the line direction.
curvature : callable
A positive semi-definite operator or function describing the curvature
of the potential at given `position`.
The value of the energy functional at the given position
directional_derivative : float
The directional derivative at the given position
line_direction : Field
Direction along which the movement is restricted. Does not have to be
normalized.
energy : Energy
The underlying Energy at the `position` along the line direction.
The underlying Energy at the given position
Raises
------
......@@ -72,45 +67,49 @@ class LineEnergy(Energy):
"""
def __init__(self, position, energy, line_direction, zero_point=None):
super(LineEnergy, self).__init__(position=position)
self.line_direction = line_direction
if zero_point is None:
zero_point = energy.position
self._zero_point = zero_point
def __init__(self, line_position, energy, line_direction, offset=0.):
self._line_position = float(line_position)
self._line_direction = line_direction
position_on_line = self._zero_point + self.position*line_direction
self.energy = energy.at(position=position_on_line)
pos = energy.position \
+ (self._line_position-float(offset))*self._line_direction
self.energy = energy.at(position=pos)
def at(self, position):
def at(self, line_position):
""" Returns LineEnergy at new position, memorizing the zero point.
Parameters
----------
position : float
line_position : float
Parameter for the new position on the line direction.
Returns
-------
out : LineEnergy
LineEnergy object at new position with same zero point as `self`.
"""
return self.__class__(position,
return self.__class__(line_position,
self.energy,
self.line_direction,
zero_point=self._zero_point)
offset=self.line_position)
@property
def value(self):
return self.energy.value
@property
def gradient(self):
return self.energy.gradient.vdot(self.line_direction)
def line_position(self):
return self._line_position
@property
def line_direction(self):
return self._line_direction
@property
def curvature(self):
return self.energy.curvature
def directional_derivative(self):
res = self.energy.gradient.vdot(self.line_direction)
if abs(res.imag) / max(abs(res.real), 1.) > 1e-12:
print "directional derivative has non-negligible " \
"imaginary part:", res
return res.real
......@@ -137,7 +137,7 @@ class DescentMinimizer(Loggable, object):
# compute the the gradient for the current location
gradient = energy.gradient
gradient_norm = gradient.vdot(gradient)
gradient_norm = gradient.norm()
# check if position is at a flat point
if gradient_norm == 0:
......@@ -147,7 +147,6 @@ class DescentMinimizer(Loggable, object):
# current position is encoded in energy object
descent_direction = self.get_descent_direction(energy)
# compute the step length, which minimizes energy.value along the
# search direction
step_length, f_k, new_energy = \
......@@ -155,8 +154,12 @@ class DescentMinimizer(Loggable, object):
energy=energy,
pk=descent_direction,
f_k_minus_1=f_k_minus_1)
if f_k_minus_1 is None:
delta=1e30
else:
delta = abs(f_k -f_k_minus_1)/max(abs(f_k),abs(f_k_minus_1),1.)
f_k_minus_1 = energy.value
tx1=energy.position-new_energy.position
# check if new energy value is bigger than old energy value
if (new_energy.value - energy.value) > 0:
self.logger.info("Line search algorithm returned a new energy "
......@@ -165,7 +168,6 @@ class DescentMinimizer(Loggable, object):
energy = new_energy
# check convergence
delta = abs(gradient).max() * (step_length/np.sqrt(gradient_norm))
self.logger.debug("Iteration:%08u step_length=%3.1E "
"delta=%3.1E energy=%3.1E" %
(iteration_number, step_length, delta,
......
......@@ -63,7 +63,7 @@ class LineSearch(Loggable, object):
iteration of the line search procedure. (Default: None)
"""
self.line_energy = LineEnergy(position=0.,
self.line_energy = LineEnergy(line_position=0.,
energy=energy,
line_direction=pk)
if f_k_minus_1 is not None:
......
......@@ -62,8 +62,8 @@ class LineSearchStrongWolfe(LineSearch):
"""
def __init__(self, c1=1e-4, c2=0.9,
max_step_size=50, max_iterations=10,
max_zoom_iterations=10):
max_step_size=1000000000, max_iterations=100,
max_zoom_iterations=30):
super(LineSearchStrongWolfe, self).__init__()
......@@ -103,20 +103,15 @@ class LineSearchStrongWolfe(LineSearch):
"""
self._set_line_energy(energy, pk, f_k_minus_1=f_k_minus_1)
c1 = self.c1
c2 = self.c2
max_step_size = self.max_step_size
max_iterations = self.max_iterations
# initialize the zero phis
old_phi_0 = self.f_k_minus_1
energy_0 = self.line_energy.at(0)
phi_0 = energy_0.value
phiprime_0 = energy_0.gradient
if phiprime_0 == 0:
self.logger.warn("Flat gradient in search direction.")
return 0., 0.
le_0 = self.line_energy.at(0)
phi_0 = le_0.value
phiprime_0 = le_0.directional_derivative
assert phiprime_0<0, "input direction must be a descent direction"
# set alphas
alpha0 = 0.
......@@ -135,64 +130,67 @@ class LineSearchStrongWolfe(LineSearch):
# start the minimization loop
for i in xrange(max_iterations):
energy_alpha1 = self.line_energy.at(alpha1)
phi_alpha1 = energy_alpha1.value
le_alpha1 = self.line_energy.at(alpha1)
phi_alpha1 = le_alpha1.value
if alpha1 == 0:
self.logger.warn("Increment size became 0.")
alpha_star = 0.
phi_star = phi_0
energy_star = energy_0
le_star = le_0
break
if (phi_alpha1 > phi_0 + c1*alpha1*phiprime_0) or \
((phi_alpha1 >= phi_alpha0) and (i > 1)):
(alpha_star, phi_star, energy_star) = self._zoom(
if (phi_alpha1 > phi_0 + self.c1*alpha1*phiprime_0) or \
((phi_alpha1 >= phi_alpha0) and (i > 0)):
(alpha_star, phi_star, le_star) = self._zoom(
alpha0, alpha1,
phi_0, phiprime_0,
phi_alpha0,
phiprime_alpha0,
phi_alpha1,
c1, c2)
phi_alpha1)
break
phiprime_alpha1 = energy_alpha1.gradient
if abs(phiprime_alpha1) <= -c2*phiprime_0:
phiprime_alpha1 = le_alpha1.directional_derivative
if abs(phiprime_alpha1) <= -self.c2*phiprime_0:
alpha_star = alpha1
phi_star = phi_alpha1
energy_star = energy_alpha1
le_star = le_alpha1
break
if phiprime_alpha1 >= 0:
(alpha_star, phi_star, energy_star) = self._zoom(
(alpha_star, phi_star, le_star) = self._zoom(
alpha1, alpha0,
phi_0, phiprime_0,
phi_alpha1,
phiprime_alpha1,
phi_alpha0,
c1, c2)
phi_alpha0)
break
# update alphas
alpha0, alpha1 = alpha1, min(2*alpha1, max_step_size)
if alpha1 == max_step_size:
print "reached max step size, bailing out"
alpha_star = alpha1
phi_star = phi_alpha1
le_star = le_alpha1
break
phi_alpha0 = phi_alpha1
phiprime_alpha0 = phiprime_alpha1
# phi_alpha1 = self._phi(alpha1)
else:
# max_iterations was reached
alpha_star = alpha1
phi_star = phi_alpha1
energy_star = energy_alpha1
le_star = le_alpha1
self.logger.error("The line search algorithm did not converge.")
# extract the full energy from the line_energy
energy_star = energy_star.energy
energy_star = le_star.energy
direction_length = pk.norm()
step_length = alpha_star * direction_length
return step_length, phi_star, energy_star
def _zoom(self, alpha_lo, alpha_hi, phi_0, phiprime_0,
phi_lo, phiprime_lo, phi_hi, c1, c2):
phi_lo, phiprime_lo, phi_hi):
"""Performs the second stage of the line search algorithm.
If the first stage was not successful then the Zoom algorithm tries to
......@@ -202,9 +200,10 @@ class LineSearchStrongWolfe(LineSearch):
Parameters
----------
alpha_lo : float
The lower boundary for the step length interval.
alph_hi : float
The upper boundary for the step length interval.
A boundary for the step length interval.
Fulfills Wolfe condition 1.
alpha_hi : float
The other boundary for the step length interval.
phi_0 : float
Value of the energy at the starting point of the line search
algorithm.
......@@ -219,10 +218,6 @@ class LineSearchStrongWolfe(LineSearch):
phi_hi : float
Value of the energy if we perform a step of length alpha_hi in
descent direction.
c1 : float
Parameter for Armijo condition rule.
c2 : float
Parameter for curvature condition rule.
Returns
-------
......@@ -238,12 +233,13 @@ class LineSearchStrongWolfe(LineSearch):
# define the cubic and quadratic interpolant checks
cubic_delta = 0.2 # cubic
quad_delta = 0.1 # quadratic
phiprime_alphaj = 0.
# initialize the most recent versions (j-1) of phi and alpha
alpha_recent = 0
phi_recent = phi_0
assert phi_lo <= phi_0 + self.c1*alpha_lo*phiprime_0
assert phiprime_lo*(alpha_hi-alpha_lo)<0.
for i in xrange(max_iterations):
#assert phi_lo <= phi_0 + self.c1*alpha_lo*phiprime_0
#assert phiprime_lo*(alpha_hi-alpha_lo)<0.
delta_alpha = alpha_hi - alpha_lo
if delta_alpha < 0:
a, b = alpha_hi, alpha_lo
......@@ -262,28 +258,28 @@ class LineSearchStrongWolfe(LineSearch):
quad_check = quad_delta * delta_alpha
alpha_j = self._quadmin(alpha_lo, phi_lo, phiprime_lo,
alpha_hi, phi_hi)
# If quadratic was not successfull, try bisection
# If quadratic was not successful, try bisection
if (alpha_j is None) or (alpha_j > b - quad_check) or \
(alpha_j < a + quad_check):
alpha_j = alpha_lo + 0.5*delta_alpha
# Check if the current value of alpha_j is already sufficient
energy_alphaj = self.line_energy.at(alpha_j)
phi_alphaj = energy_alphaj.value
le_alphaj = self.line_energy.at(alpha_j)
phi_alphaj = le_alphaj.value
# If the first Wolfe condition is not met replace alpha_hi
# by alpha_j
if (phi_alphaj > phi_0 + c1*alpha_j*phiprime_0) or\
if (phi_alphaj > phi_0 + self.c1*alpha_j*phiprime_0) or\
(phi_alphaj >= phi_lo):
alpha_recent, phi_recent = alpha_hi, phi_hi
alpha_hi, phi_hi = alpha_j, phi_alphaj
else:
phiprime_alphaj = energy_alphaj.gradient
phiprime_alphaj = le_alphaj.directional_derivative
# If the second Wolfe condition is met, return the result
if abs(phiprime_alphaj) <= -c2*phiprime_0:
if abs(phiprime_alphaj) <= -self.c2*phiprime_0:
alpha_star = alpha_j
phi_star = phi_alphaj
energy_star = energy_alphaj
le_star = le_alphaj
break
# If not, check the sign of the slope
if phiprime_alphaj*delta_alpha >= 0:
......@@ -296,12 +292,12 @@ class LineSearchStrongWolfe(LineSearch):
phiprime_alphaj)
else:
alpha_star, phi_star, energy_star = \
alpha_j, phi_alphaj, energy_alphaj
alpha_star, phi_star, le_star = \
alpha_j, phi_alphaj, le_alphaj
self.logger.error("The line search algorithm (zoom) did not "
"converge.")
return alpha_star, phi_star, energy_star
return alpha_star, phi_star, le_star
def _cubicmin(self, a, fa, fpa, b, fb, c, fc):
"""Estimating the minimum with cubic interpolation.
......
......@@ -43,7 +43,8 @@ class Test_DescentMinimizers(unittest.TestCase):
covariance = DiagonalOperator(space, diagonal=covariance_diagonal)
energy = QuadraticPotential(position=starting_point,
eigenvalues=covariance)
minimizer = minimizer_class(iteration_limit=30)
minimizer = minimizer_class(iteration_limit=30,
convergence_tolerance=1e-10)
(energy, convergence) = minimizer(energy)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment