Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
ift
NIFTy
Commits
a648ea49
Commit
a648ea49
authored
Oct 20, 2016
by
theos
Browse files
Finished VL-BFGS minimizer. Still untested.
parent
6f4449c8
Changes
2
Hide whitespace changes
Inline
Side-by-side
nifty/minimization/__init__.py
View file @
a648ea49
...
...
@@ -4,3 +4,4 @@ from line_searching import *
from
conjugate_gradient
import
ConjugateGradient
from
quasi_newton_minimizer
import
QuasiNewtonMinimizer
from
steepest_descent
import
SteepestDescent
from
vl_bfgs
import
VL_BFGS
nifty/minimization/vl_bfgs.py
View file @
a648ea49
# -*- coding: utf-8 -*-
import
numpy
as
np
from
.quasi_newton_minimizer
import
QuasiNewtonMinimizer
from
.line_searching
import
LineSearchStrongWolfe
class
VL_BFGS
(
QuasiNewtonMinimizer
):
def
__init__
(
self
,
line_searcher
=
LineSearchStrongWolfe
(),
callback
=
None
,
convergence_tolerance
=
1E-4
,
convergence_level
=
3
,
iteration_limit
=
None
,
max_history_length
=
10
):
super
(
VL_BFGS
,
self
).
__init__
(
line_searcher
=
line_searcher
,
callback
=
callback
,
convergence_tolerance
=
convergence_tolerance
,
convergence_level
=
convergence_level
,
iteration_limit
=
iteration_limit
)
self
.
max_history_length
=
max_history_length
def
_get_descend_direction
(
self
,
x
,
gradient
):
pass
# initialize the information store if it doesn't already exist
try
:
self
.
_information_store
.
add_new_point
(
x
,
gradient
)
except
AttributeError
:
self
.
_information_store
=
InformationStore
(
self
.
max_history_length
,
x0
=
x
,
gradient
=
gradient
)
b
=
self
.
_information_store
.
b
delta
=
self
.
_information_store
.
delta
descend_direction
=
delta
[
0
]
*
b
[
0
]
for
i
in
xrange
(
1
,
len
(
delta
)):
descend_direction
+=
delta
[
i
]
*
b
[
i
]
return
descend_direction
class
InformationStore
(
object
):
def
__init__
(
self
,
history_length
,
x0
,
gradient
):
self
.
history_length
=
history_length
self
.
s
=
LimitedList
(
history_length
)
self
.
y
=
LimitedList
(
history_length
)
def
__init__
(
self
,
max_
history_length
,
x0
,
gradient
):
self
.
max_
history_length
=
max_
history_length
self
.
s
=
LimitedList
(
max_
history_length
)
self
.
y
=
LimitedList
(
max_
history_length
)
self
.
last_x
=
x0
self
.
last_gradient
=
gradient
self
.
k
=
0
self
.
dot_matrix
=
{}
self
.
_ss_store
=
{}
self
.
_sy_store
=
{}
self
.
_yy_store
=
{}
self
.
_sgrad_store
=
{}
self
.
_ygrad_store
=
{}
self
.
_gradgrad_store
=
None
# self.dot_matrix = {}
@
property
def
history_length
(
self
):
return
min
(
self
.
k
,
self
.
max_history_length
)
@
property
def
b
(
self
):
result
=
[]
m
=
self
.
history_length
k
=
self
.
k
s
=
self
.
s
for
i
in
xrange
(
m
):
result
.
append
(
s
[
k
-
m
+
i
])
y
=
self
.
y
for
i
in
xrange
(
m
):
result
.
append
(
y
[
k
-
m
+
i
])
result
.
append
(
self
.
last_gradient
)
return
result
@
property
def
b_dot_b
(
self
):
m
=
self
.
history_length
k
=
self
.
k
result
=
np
.
empty
((
2
*
m
+
1
,
2
*
m
+
1
),
dtype
=
np
.
float
)
for
i
in
xrange
(
m
):
for
j
in
xrange
(
m
):
result
[
i
,
j
]
=
self
.
ss_store
(
k
-
m
+
i
,
k
-
m
+
j
)
sy_ij
=
self
.
sy_store
(
k
-
m
+
i
,
k
-
m
+
j
)
result
[
i
,
m
+
j
]
=
sy_ij
result
[
m
+
j
,
i
]
=
sy_ij
result
[
m
+
i
,
m
+
j
]
=
self
.
yy_store
(
k
-
m
+
i
,
k
-
m
+
j
)
sgrad_i
=
self
.
sgrad_store
(
k
-
m
+
i
)
result
[
2
*
m
,
i
]
=
sgrad_i
result
[
i
,
2
*
m
]
=
sgrad_i
ygrad_i
=
self
.
ygrad_store
(
k
-
m
+
i
)
result
[
2
*
m
,
m
+
i
]
=
ygrad_i
result
[
m
+
i
,
2
*
m
]
=
ygrad_i
result
[
2
*
m
,
2
*
m
]
=
self
.
gradgrad_store
()
return
result
@
property
def
actual_history_length
(
self
):
return
min
(
self
.
k
,
self
.
history_length
)
def
delta
(
self
):
m
=
self
.
history_length
b_dot_b
=
self
.
b_dot_b
delta
=
np
.
zeros
(
2
*
m
+
1
,
dtype
=
np
.
float
)
delta
[
2
*
m
]
=
-
1
alpha
=
np
.
empty
(
m
,
dtype
=
np
.
float
)
for
j
in
xrange
(
m
-
1
,
-
1
,
-
1
):
delta_b_b
=
sum
([
delta
[
l
]
*
b_dot_b
[
l
,
j
]
for
l
in
xrange
(
2
*
m
+
1
)])
alpha
[
j
]
=
delta_b_b
/
b_dot_b
[
j
,
m
+
j
]
delta
[
m
+
j
]
-=
alpha
[
j
]
for
i
in
xrange
(
2
*
m
+
1
):
delta
[
i
]
*=
b_dot_b
[
m
-
1
,
2
*
m
-
1
]
/
b_dot_b
[
2
*
m
-
1
,
2
*
m
-
1
]
for
j
in
xrange
(
m
-
1
,
-
1
,
-
1
):
delta_b_b
=
sum
([
delta
[
l
]
*
b_dot_b
[
m
+
j
,
l
]
for
l
in
xrange
(
2
*
m
+
1
)])
beta
=
delta_b_b
/
b_dot_b
[
j
,
m
+
j
]
delta
[
j
]
+=
(
alpha
[
j
]
-
beta
)
return
delta
def
ss_store
(
self
,
i
,
j
):
key
=
tuple
(
sorted
((
i
,
j
)))
if
key
not
in
self
.
_ss_store
:
self
.
_ss_store
[
key
]
=
self
.
s
[
i
].
dot
(
self
.
s
[
j
])
return
self
.
_ss_store
[
key
]
def
sy_store
(
self
,
i
,
j
):
key
=
(
i
,
j
)
if
key
not
in
self
.
_sy_store
:
self
.
_sy_store
[
key
]
=
self
.
s
[
i
].
dot
(
self
.
y
[
j
])
return
self
.
_sy_store
[
key
]
def
yy_store
(
self
,
i
,
j
):
key
=
tuple
(
sorted
((
i
,
j
)))
if
key
not
in
self
.
_yy_store
:
self
.
_yy_store
[
key
]
=
self
.
y
[
i
].
dot
(
self
.
y
[
j
])
return
self
.
_yy_store
[
key
]
def
sgrad_store
(
self
,
i
):
return
self
.
s
[
i
].
dot
(
self
.
last_gradient
)
def
ygrad_store
(
self
,
i
):
return
self
.
y
[
i
].
dot
(
self
.
last_gradient
)
def
gradgrad_store
(
self
):
return
self
.
last_gradient
.
dot
(
self
.
last_gradient
)
def
add_new_point
(
self
,
x
,
gradient
):
self
.
k
+=
1
...
...
@@ -31,47 +168,50 @@ class InformationStore(object):
new_y
=
gradient
-
self
.
last_gradient
self
.
y
.
add
(
new_y
)
k
=
self
.
k
m
=
self
.
actual_history_length
big_m
=
self
.
history_length
# compute dot products
for
i
in
xrange
(
k
-
1
,
k
-
m
-
1
,
-
1
):
# new_s with s
key
=
(
big_m
+
m
,
big_m
+
1
+
i
)
self
.
dot_matrix
[
key
]
=
new_s
.
dot
(
self
.
s
[
i
])
# new_s with y
key
=
(
big_m
+
m
,
i
+
1
)
self
.
dot_matrix
[
key
]
=
new_s
.
dot
(
self
.
y
[
i
])
# new_y with s
if
i
!=
k
-
1
:
key
=
(
big_m
+
1
+
i
,
k
)
self
.
dot_matrix
[
key
]
=
new_y
.
dot
(
self
.
s
[
i
])
# new_y with y
# actually key = (i+1, k) but the convention is that the first
# index is larger than the second one
key
=
(
k
,
i
+
1
)
self
.
dot_matrix
[
key
]
=
new_y
.
dot
(
self
.
y
[
i
])
# gradient with s
key
=
(
big_m
+
1
+
i
,
0
)
self
.
dot_matrix
[
key
]
=
gradient
.
dot
(
self
.
s
[
i
])
# gradient with y
key
=
(
i
+
1
,
0
)
self
.
dot_matrix
[
key
]
=
gradient
.
dot
(
self
.
y
[
i
])
# gradient with gradient
key
=
(
0
,
0
)
self
.
dot_matrix
[
key
]
=
gradient
.
dot
(
gradient
)
self
.
last_x
=
x
self
.
last_gradient
=
gradient
# TODO: remove old entries from dictionary
#
# k = self.k
# m = self.actual_history_length
# big_m = self.history_length
#
# # compute dot products
# for i in xrange(k-1, k-m-1, -1):
# # new_s with s
# key = (big_m+m, big_m+1+i)
# self.dot_matrix[key] = new_s.dot(self.s[i])
#
# # new_s with y
# key = (big_m+m, i+1)
# self.dot_matrix[key] = new_s.dot(self.y[i])
#
# # new_y with s
# if i != k-1:
# key = (big_m+1+i, k)
# self.dot_matrix[key] = new_y.dot(self.s[i])
#
# # new_y with y
# # actually key = (i+1, k) but the convention is that the first
# # index is larger than the second one
# key = (k, i+1)
# self.dot_matrix[key] = new_y.dot(self.y[i])
#
# # gradient with s
# key = (big_m+1+i, 0)
# self.dot_matrix[key] = gradient.dot(self.s[i])
#
# # gradient with y
# key = (i+1, 0)
# self.dot_matrix[key] = gradient.dot(self.y[i])
#
# # gradient with gradient
# key = (0, 0)
# self.dot_matrix[key] = gradient.dot(gradient)
#
# self.last_x = x
# self.last_gradient = gradient
#
class
LimitedList
(
object
):
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment