vl_bfgs.py 7.65 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
Theo Steininger's avatar
Theo Steininger committed
13
#
Martin Reinecke's avatar
Martin Reinecke committed
14
# Copyright(C) 2013-2018 Max-Planck-Society
Theo Steininger's avatar
Theo Steininger committed
15
16
17
#
# NIFTy is being developed at the Max-Planck-Institut fuer Astrophysik
# and financially supported by the Studienstiftung des deutschen Volkes.
theos's avatar
theos committed
18

19
20
from __future__ import (absolute_import, division, print_function)
from builtins import *
21
import numpy as np
22
from .descent_minimizer import DescentMinimizer
Martin Reinecke's avatar
Martin Reinecke committed
23
from .line_search_strong_wolfe import LineSearchStrongWolfe
theos's avatar
theos committed
24
25


26
class VL_BFGS(DescentMinimizer):
Martin Reinecke's avatar
Martin Reinecke committed
27
28
29
30
31
32
33
34
35
36
37
38
39
40
    """Implementation of the Vector-free L-BFGS minimization scheme.

    Find the descent direction by using the inverse Hessian.
    Instead of storing the whole matrix, it stores only the last few
    updates, which are used to do operations requiring the inverse
    Hessian product. The updates are represented in a new basis to optimize
    the algorithm.

    References
    ----------
    W. Chen, Z. Wang, J. Zhou, "Large-scale L-BFGS using MapReduce", 2014,
    Microsoft
    """

Martin Reinecke's avatar
Martin Reinecke committed
41
42
    def __init__(self, controller, line_searcher=LineSearchStrongWolfe(),
                 max_history_length=5):
Martin Reinecke's avatar
cleanup    
Martin Reinecke committed
43
44
        super(VL_BFGS, self).__init__(controller=controller,
                                      line_searcher=line_searcher)
45
46
        self.max_history_length = max_history_length

47
    def __call__(self, energy):
48
        self._information_store = None
49
        return super(VL_BFGS, self).__call__(energy)
50

Martin Reinecke's avatar
stage 1    
Martin Reinecke committed
51
52
53
    def reset(self):
        self._information_store = None

54
    def get_descent_direction(self, energy):
55
56
        x = energy.position
        gradient = energy.gradient
57
58
59
60
        # initialize the information store if it doesn't already exist
        try:
            self._information_store.add_new_point(x, gradient)
        except AttributeError:
Martin Reinecke's avatar
Martin Reinecke committed
61
62
            self._information_store = _InformationStore(
                self.max_history_length, x0=x, gradient=gradient)
63
64
65
66

        b = self._information_store.b
        delta = self._information_store.delta

67
        descent_direction = delta[0] * b[0]
Martin Reinecke's avatar
Martin Reinecke committed
68
        for i in range(1, len(delta)):
69
            descent_direction = descent_direction + delta[i]*b[i]
70

71
        return descent_direction
theos's avatar
theos committed
72
73


Martin Reinecke's avatar
Martin Reinecke committed
74
class _InformationStore(object):
Matevz, Sraml (sraml)'s avatar
Matevz, Sraml (sraml) committed
75
    """Class for storing a list of past updates.
76

Matevz, Sraml (sraml)'s avatar
Matevz, Sraml (sraml) committed
77
78
    Parameters
    ----------
Martin Reinecke's avatar
Martin Reinecke committed
79
    max_history_length : int
Matevz, Sraml (sraml)'s avatar
Matevz, Sraml (sraml) committed
80
81
82
83
84
        Maximum number of stored past updates.
    x0 : Field
        Initial position in variable space.
    gradient : Field
        Gradient at position x0.
85

Matevz, Sraml (sraml)'s avatar
Matevz, Sraml (sraml) committed
86
87
    Attributes
    ----------
Martin Reinecke's avatar
Martin Reinecke committed
88
    max_history_length : int
Matevz, Sraml (sraml)'s avatar
Matevz, Sraml (sraml) committed
89
90
        Maximum number of stored past updates.
    s : List
Martin Reinecke's avatar
Martin Reinecke committed
91
        Circular buffer of past position differences, which are Fields.
Matevz, Sraml (sraml)'s avatar
Matevz, Sraml (sraml) committed
92
    y : List
Martin Reinecke's avatar
Martin Reinecke committed
93
        Circular buffer of past gradient differences, which are Fields.
Matevz, Sraml (sraml)'s avatar
Matevz, Sraml (sraml) committed
94
    last_x : Field
Martin Reinecke's avatar
tweaks    
Martin Reinecke committed
95
        Latest position in variable space.
Matevz, Sraml (sraml)'s avatar
Matevz, Sraml (sraml) committed
96
    last_gradient : Field
Martin Reinecke's avatar
tweaks    
Martin Reinecke committed
97
        Gradient at latest position.
Martin Reinecke's avatar
Martin Reinecke committed
98
    k : int
Martin Reinecke's avatar
tweaks    
Martin Reinecke committed
99
        Number of updates that have taken place
Martin Reinecke's avatar
Martin Reinecke committed
100
    ss : numpy.ndarray
Martin Reinecke's avatar
Martin Reinecke committed
101
        2D circular buffer of scalar products between different elements of s.
Martin Reinecke's avatar
Martin Reinecke committed
102
    sy : numpy.ndarray
Martin Reinecke's avatar
Martin Reinecke committed
103
        2D circular buffer of scalar products between elements of s and y.
Martin Reinecke's avatar
Martin Reinecke committed
104
    yy : numpy.ndarray
Martin Reinecke's avatar
Martin Reinecke committed
105
        2D circular buffer of scalar products between different elements of y.
Matevz, Sraml (sraml)'s avatar
Matevz, Sraml (sraml) committed
106
    """
107
108
    def __init__(self, max_history_length, x0, gradient):
        self.max_history_length = max_history_length
109
110
        self.s = [None]*max_history_length
        self.y = [None]*max_history_length
111
112
        self.last_x = x0
        self.last_gradient = gradient
theos's avatar
theos committed
113
        self.k = 0
114

Martin Reinecke's avatar
Martin Reinecke committed
115
        mmax = max_history_length
Martin Reinecke's avatar
Martin Reinecke committed
116
117
118
        self.ss = np.empty((mmax, mmax), dtype=np.float64)
        self.sy = np.empty((mmax, mmax), dtype=np.float64)
        self.yy = np.empty((mmax, mmax), dtype=np.float64)
119
120
121

    @property
    def history_length(self):
Martin Reinecke's avatar
Martin Reinecke committed
122
        """Returns the number of currently stored updates."""
123
124
125
126
        return min(self.k, self.max_history_length)

    @property
    def b(self):
127
128
        """Combines s, y and gradient to form the new base vectors b.

Matevz, Sraml (sraml)'s avatar
Matevz, Sraml (sraml) committed
129
130
        Returns
        -------
Martin Reinecke's avatar
Martin Reinecke committed
131
        List
Matevz, Sraml (sraml)'s avatar
Matevz, Sraml (sraml) committed
132
133
            List of new basis vectors.
        """
134
135
        result = []
        m = self.history_length
Martin Reinecke's avatar
Martin Reinecke committed
136
        mmax = self.max_history_length
137

Martin Reinecke's avatar
Martin Reinecke committed
138
        for i in range(m):
Martin Reinecke's avatar
Martin Reinecke committed
139
            result.append(self.s[(self.k-m+i) % mmax])
140

Martin Reinecke's avatar
Martin Reinecke committed
141
        for i in range(m):
Martin Reinecke's avatar
Martin Reinecke committed
142
            result.append(self.y[(self.k-m+i) % mmax])
143
144
145
146
147
148
149

        result.append(self.last_gradient)

        return result

    @property
    def b_dot_b(self):
Matevz, Sraml (sraml)'s avatar
Matevz, Sraml (sraml) committed
150
        """Generates the (2m+1) * (2m+1) scalar matrix.
151

Matevz, Sraml (sraml)'s avatar
Matevz, Sraml (sraml) committed
152
        The i,j-th element of the matrix is a scalar product between the i-th
153
154
        and j-th base vector.

Matevz, Sraml (sraml)'s avatar
Matevz, Sraml (sraml) committed
155
156
        Returns
        -------
Martin Reinecke's avatar
Martin Reinecke committed
157
        numpy.ndarray
Matevz, Sraml (sraml)'s avatar
Matevz, Sraml (sraml) committed
158
159
            Scalar matrix.
        """
160
        m = self.history_length
Martin Reinecke's avatar
Martin Reinecke committed
161
        mmax = self.max_history_length
162
163
164
        k = self.k
        result = np.empty((2*m+1, 2*m+1), dtype=np.float)

Martin Reinecke's avatar
tweaks    
Martin Reinecke committed
165
        # update the stores
Martin Reinecke's avatar
Martin Reinecke committed
166
        k1 = (k-1) % mmax
Martin Reinecke's avatar
Martin Reinecke committed
167
        for i in range(m):
Martin Reinecke's avatar
Martin Reinecke committed
168
            kmi = (k-m+i) % mmax
Martin Reinecke's avatar
Martin Reinecke committed
169
170
171
            self.ss[kmi, k1] = self.ss[k1, kmi] = self.s[kmi].vdot(self.s[k1])
            self.yy[kmi, k1] = self.yy[k1, kmi] = self.y[kmi].vdot(self.y[k1])
            self.sy[kmi, k1] = self.s[kmi].vdot(self.y[k1])
Martin Reinecke's avatar
Martin Reinecke committed
172
        for j in range(m-1):
Martin Reinecke's avatar
Martin Reinecke committed
173
174
            kmj = (k-m+j) % mmax
            self.sy[k1, kmj] = self.s[k1].vdot(self.y[kmj])
Martin Reinecke's avatar
tweaks    
Martin Reinecke committed
175

Martin Reinecke's avatar
Martin Reinecke committed
176
        for i in range(m):
Martin Reinecke's avatar
Martin Reinecke committed
177
            kmi = (k-m+i) % mmax
Martin Reinecke's avatar
Martin Reinecke committed
178
            for j in range(m):
Martin Reinecke's avatar
Martin Reinecke committed
179
                kmj = (k-m+j) % mmax
Martin Reinecke's avatar
Martin Reinecke committed
180
181
182
                result[i, j] = self.ss[kmi, kmj]
                result[i, m+j] = result[m+j, i] = self.sy[kmi, kmj]
                result[m+i, m+j] = self.yy[kmi, kmj]
183

184
            sgrad_i = self.s[kmi].vdot(self.last_gradient)
Martin Reinecke's avatar
tweaks    
Martin Reinecke committed
185
            result[2*m, i] = result[i, 2*m] = sgrad_i
186

Martin Reinecke's avatar
fix    
Martin Reinecke committed
187
            ygrad_i = self.y[kmi].vdot(self.last_gradient)
Martin Reinecke's avatar
tweaks    
Martin Reinecke committed
188
            result[2*m, m+i] = result[m+i, 2*m] = ygrad_i
189

Martin Reinecke's avatar
tweaks    
Martin Reinecke committed
190
        result[2*m, 2*m] = self.last_gradient.norm()
191
        return result
theos's avatar
theos committed
192
193

    @property
194
    def delta(self):
Matevz, Sraml (sraml)'s avatar
Matevz, Sraml (sraml) committed
195
        """Calculates the new scalar coefficients (deltas).
196

Matevz, Sraml (sraml)'s avatar
Matevz, Sraml (sraml) committed
197
198
        Returns
        -------
Martin Reinecke's avatar
Martin Reinecke committed
199
        List
Matevz, Sraml (sraml)'s avatar
Matevz, Sraml (sraml) committed
200
201
            List of the new scalar coefficients (deltas).
        """
202
203
204
205
206
207
208
209
        m = self.history_length
        b_dot_b = self.b_dot_b

        delta = np.zeros(2*m+1, dtype=np.float)
        delta[2*m] = -1

        alpha = np.empty(m, dtype=np.float)

Martin Reinecke's avatar
Martin Reinecke committed
210
211
        for j in range(m-1, -1, -1):
            delta_b_b = sum([delta[l] * b_dot_b[l, j] for l in range(2*m+1)])
212
213
214
            alpha[j] = delta_b_b/b_dot_b[j, m+j]
            delta[m+j] -= alpha[j]

Martin Reinecke's avatar
Martin Reinecke committed
215
        for i in range(2*m+1):
216
217
            delta[i] *= b_dot_b[m-1, 2*m-1]/b_dot_b[2*m-1, 2*m-1]

Martin Reinecke's avatar
Martin Reinecke committed
218
        for j in range(m):
Martin Reinecke's avatar
Martin Reinecke committed
219
            delta_b_b = sum([delta[l]*b_dot_b[m+j, l] for l in range(2*m+1)])
220
221
222
223
224
            beta = delta_b_b/b_dot_b[j, m+j]
            delta[j] += (alpha[j] - beta)

        return delta

theos's avatar
theos committed
225
    def add_new_point(self, x, gradient):
Matevz, Sraml (sraml)'s avatar
Matevz, Sraml (sraml) committed
226
227
        """Updates the s list and y list.

Martin Reinecke's avatar
Martin Reinecke committed
228
229
        Calculates the new position and gradient differences and enters them
        into the respective list.
Matevz, Sraml (sraml)'s avatar
Matevz, Sraml (sraml) committed
230
        """
Martin Reinecke's avatar
Martin Reinecke committed
231
232
233
        mmax = self.max_history_length
        self.s[self.k % mmax] = x - self.last_x
        self.y[self.k % mmax] = gradient - self.last_gradient
theos's avatar
theos committed
234

235
236
        self.last_x = x
        self.last_gradient = gradient
theos's avatar
theos committed
237

Martin Reinecke's avatar
fixes    
Martin Reinecke committed
238
        self.k += 1