vl_bfgs.py 7.65 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
Theo Steininger's avatar
Theo Steininger committed
13
#
Martin Reinecke's avatar
Martin Reinecke committed
14
# Copyright(C) 2013-2018 Max-Planck-Society
Theo Steininger's avatar
Theo Steininger committed
15
16
17
#
# NIFTy is being developed at the Max-Planck-Institut fuer Astrophysik
# and financially supported by the Studienstiftung des deutschen Volkes.
theos's avatar
theos committed
18

19
from __future__ import absolute_import, division, print_function
20

21
import numpy as np
22
23

from ..compat import *
24
from .descent_minimizer import DescentMinimizer
Martin Reinecke's avatar
Martin Reinecke committed
25
from .line_search_strong_wolfe import LineSearchStrongWolfe
theos's avatar
theos committed
26
27


28
class VL_BFGS(DescentMinimizer):
Martin Reinecke's avatar
Martin Reinecke committed
29
30
31
32
33
34
35
36
37
38
39
40
41
42
    """Implementation of the Vector-free L-BFGS minimization scheme.

    Find the descent direction by using the inverse Hessian.
    Instead of storing the whole matrix, it stores only the last few
    updates, which are used to do operations requiring the inverse
    Hessian product. The updates are represented in a new basis to optimize
    the algorithm.

    References
    ----------
    W. Chen, Z. Wang, J. Zhou, "Large-scale L-BFGS using MapReduce", 2014,
    Microsoft
    """

Martin Reinecke's avatar
Martin Reinecke committed
43
44
    def __init__(self, controller, line_searcher=LineSearchStrongWolfe(),
                 max_history_length=5):
Martin Reinecke's avatar
cleanup    
Martin Reinecke committed
45
46
        super(VL_BFGS, self).__init__(controller=controller,
                                      line_searcher=line_searcher)
47
48
        self.max_history_length = max_history_length

49
    def __call__(self, energy):
50
        self._information_store = None
51
        return super(VL_BFGS, self).__call__(energy)
52

Martin Reinecke's avatar
stage 1    
Martin Reinecke committed
53
54
55
    def reset(self):
        self._information_store = None

56
    def get_descent_direction(self, energy):
57
58
        x = energy.position
        gradient = energy.gradient
59
60
61
62
        # initialize the information store if it doesn't already exist
        try:
            self._information_store.add_new_point(x, gradient)
        except AttributeError:
Martin Reinecke's avatar
Martin Reinecke committed
63
64
            self._information_store = _InformationStore(
                self.max_history_length, x0=x, gradient=gradient)
65
66
67
68

        b = self._information_store.b
        delta = self._information_store.delta

69
        descent_direction = delta[0] * b[0]
Martin Reinecke's avatar
Martin Reinecke committed
70
        for i in range(1, len(delta)):
71
            descent_direction = descent_direction + delta[i]*b[i]
72

73
        return descent_direction
theos's avatar
theos committed
74
75


Martin Reinecke's avatar
Martin Reinecke committed
76
class _InformationStore(object):
Matevz, Sraml (sraml)'s avatar
Matevz, Sraml (sraml) committed
77
    """Class for storing a list of past updates.
78

Matevz, Sraml (sraml)'s avatar
Matevz, Sraml (sraml) committed
79
80
    Parameters
    ----------
Martin Reinecke's avatar
Martin Reinecke committed
81
    max_history_length : int
Matevz, Sraml (sraml)'s avatar
Matevz, Sraml (sraml) committed
82
83
84
85
86
        Maximum number of stored past updates.
    x0 : Field
        Initial position in variable space.
    gradient : Field
        Gradient at position x0.
87

Matevz, Sraml (sraml)'s avatar
Matevz, Sraml (sraml) committed
88
89
    Attributes
    ----------
Martin Reinecke's avatar
Martin Reinecke committed
90
    max_history_length : int
Matevz, Sraml (sraml)'s avatar
Matevz, Sraml (sraml) committed
91
92
        Maximum number of stored past updates.
    s : List
Martin Reinecke's avatar
Martin Reinecke committed
93
        Circular buffer of past position differences, which are Fields.
Matevz, Sraml (sraml)'s avatar
Matevz, Sraml (sraml) committed
94
    y : List
Martin Reinecke's avatar
Martin Reinecke committed
95
        Circular buffer of past gradient differences, which are Fields.
Matevz, Sraml (sraml)'s avatar
Matevz, Sraml (sraml) committed
96
    last_x : Field
Martin Reinecke's avatar
tweaks    
Martin Reinecke committed
97
        Latest position in variable space.
Matevz, Sraml (sraml)'s avatar
Matevz, Sraml (sraml) committed
98
    last_gradient : Field
Martin Reinecke's avatar
tweaks    
Martin Reinecke committed
99
        Gradient at latest position.
Martin Reinecke's avatar
Martin Reinecke committed
100
    k : int
Martin Reinecke's avatar
tweaks    
Martin Reinecke committed
101
        Number of updates that have taken place
Martin Reinecke's avatar
Martin Reinecke committed
102
    ss : numpy.ndarray
Martin Reinecke's avatar
Martin Reinecke committed
103
        2D circular buffer of scalar products between different elements of s.
Martin Reinecke's avatar
Martin Reinecke committed
104
    sy : numpy.ndarray
Martin Reinecke's avatar
Martin Reinecke committed
105
        2D circular buffer of scalar products between elements of s and y.
Martin Reinecke's avatar
Martin Reinecke committed
106
    yy : numpy.ndarray
Martin Reinecke's avatar
Martin Reinecke committed
107
        2D circular buffer of scalar products between different elements of y.
Matevz, Sraml (sraml)'s avatar
Matevz, Sraml (sraml) committed
108
    """
109
110
    def __init__(self, max_history_length, x0, gradient):
        self.max_history_length = max_history_length
111
112
        self.s = [None]*max_history_length
        self.y = [None]*max_history_length
113
114
        self.last_x = x0
        self.last_gradient = gradient
theos's avatar
theos committed
115
        self.k = 0
116

Martin Reinecke's avatar
Martin Reinecke committed
117
        mmax = max_history_length
Martin Reinecke's avatar
Martin Reinecke committed
118
119
120
        self.ss = np.empty((mmax, mmax), dtype=np.float64)
        self.sy = np.empty((mmax, mmax), dtype=np.float64)
        self.yy = np.empty((mmax, mmax), dtype=np.float64)
121
122
123

    @property
    def history_length(self):
Martin Reinecke's avatar
Martin Reinecke committed
124
        """Returns the number of currently stored updates."""
125
126
127
128
        return min(self.k, self.max_history_length)

    @property
    def b(self):
129
130
        """Combines s, y and gradient to form the new base vectors b.

Matevz, Sraml (sraml)'s avatar
Matevz, Sraml (sraml) committed
131
132
        Returns
        -------
Martin Reinecke's avatar
Martin Reinecke committed
133
        List
Matevz, Sraml (sraml)'s avatar
Matevz, Sraml (sraml) committed
134
135
            List of new basis vectors.
        """
136
137
        result = []
        m = self.history_length
Martin Reinecke's avatar
Martin Reinecke committed
138
        mmax = self.max_history_length
139

Martin Reinecke's avatar
Martin Reinecke committed
140
        for i in range(m):
Martin Reinecke's avatar
Martin Reinecke committed
141
            result.append(self.s[(self.k-m+i) % mmax])
142

Martin Reinecke's avatar
Martin Reinecke committed
143
        for i in range(m):
Martin Reinecke's avatar
Martin Reinecke committed
144
            result.append(self.y[(self.k-m+i) % mmax])
145
146
147
148
149
150
151

        result.append(self.last_gradient)

        return result

    @property
    def b_dot_b(self):
Matevz, Sraml (sraml)'s avatar
Matevz, Sraml (sraml) committed
152
        """Generates the (2m+1) * (2m+1) scalar matrix.
153

Matevz, Sraml (sraml)'s avatar
Matevz, Sraml (sraml) committed
154
        The i,j-th element of the matrix is a scalar product between the i-th
155
156
        and j-th base vector.

Matevz, Sraml (sraml)'s avatar
Matevz, Sraml (sraml) committed
157
158
        Returns
        -------
Martin Reinecke's avatar
Martin Reinecke committed
159
        numpy.ndarray
Matevz, Sraml (sraml)'s avatar
Matevz, Sraml (sraml) committed
160
161
            Scalar matrix.
        """
162
        m = self.history_length
Martin Reinecke's avatar
Martin Reinecke committed
163
        mmax = self.max_history_length
164
165
166
        k = self.k
        result = np.empty((2*m+1, 2*m+1), dtype=np.float)

Martin Reinecke's avatar
tweaks    
Martin Reinecke committed
167
        # update the stores
Martin Reinecke's avatar
Martin Reinecke committed
168
        k1 = (k-1) % mmax
Martin Reinecke's avatar
Martin Reinecke committed
169
        for i in range(m):
Martin Reinecke's avatar
Martin Reinecke committed
170
            kmi = (k-m+i) % mmax
Martin Reinecke's avatar
Martin Reinecke committed
171
172
173
            self.ss[kmi, k1] = self.ss[k1, kmi] = self.s[kmi].vdot(self.s[k1])
            self.yy[kmi, k1] = self.yy[k1, kmi] = self.y[kmi].vdot(self.y[k1])
            self.sy[kmi, k1] = self.s[kmi].vdot(self.y[k1])
Martin Reinecke's avatar
Martin Reinecke committed
174
        for j in range(m-1):
Martin Reinecke's avatar
Martin Reinecke committed
175
176
            kmj = (k-m+j) % mmax
            self.sy[k1, kmj] = self.s[k1].vdot(self.y[kmj])
Martin Reinecke's avatar
tweaks    
Martin Reinecke committed
177

Martin Reinecke's avatar
Martin Reinecke committed
178
        for i in range(m):
Martin Reinecke's avatar
Martin Reinecke committed
179
            kmi = (k-m+i) % mmax
Martin Reinecke's avatar
Martin Reinecke committed
180
            for j in range(m):
Martin Reinecke's avatar
Martin Reinecke committed
181
                kmj = (k-m+j) % mmax
Martin Reinecke's avatar
Martin Reinecke committed
182
183
184
                result[i, j] = self.ss[kmi, kmj]
                result[i, m+j] = result[m+j, i] = self.sy[kmi, kmj]
                result[m+i, m+j] = self.yy[kmi, kmj]
185

186
            sgrad_i = self.s[kmi].vdot(self.last_gradient)
Martin Reinecke's avatar
tweaks    
Martin Reinecke committed
187
            result[2*m, i] = result[i, 2*m] = sgrad_i
188

Martin Reinecke's avatar
fix    
Martin Reinecke committed
189
            ygrad_i = self.y[kmi].vdot(self.last_gradient)
Martin Reinecke's avatar
tweaks    
Martin Reinecke committed
190
            result[2*m, m+i] = result[m+i, 2*m] = ygrad_i
191

Martin Reinecke's avatar
tweaks    
Martin Reinecke committed
192
        result[2*m, 2*m] = self.last_gradient.norm()
193
        return result
theos's avatar
theos committed
194
195

    @property
196
    def delta(self):
Matevz, Sraml (sraml)'s avatar
Matevz, Sraml (sraml) committed
197
        """Calculates the new scalar coefficients (deltas).
198

Matevz, Sraml (sraml)'s avatar
Matevz, Sraml (sraml) committed
199
200
        Returns
        -------
Martin Reinecke's avatar
Martin Reinecke committed
201
        List
Matevz, Sraml (sraml)'s avatar
Matevz, Sraml (sraml) committed
202
203
            List of the new scalar coefficients (deltas).
        """
204
205
206
207
208
209
210
211
        m = self.history_length
        b_dot_b = self.b_dot_b

        delta = np.zeros(2*m+1, dtype=np.float)
        delta[2*m] = -1

        alpha = np.empty(m, dtype=np.float)

Martin Reinecke's avatar
Martin Reinecke committed
212
213
        for j in range(m-1, -1, -1):
            delta_b_b = sum([delta[l] * b_dot_b[l, j] for l in range(2*m+1)])
214
215
216
            alpha[j] = delta_b_b/b_dot_b[j, m+j]
            delta[m+j] -= alpha[j]

Martin Reinecke's avatar
Martin Reinecke committed
217
        for i in range(2*m+1):
218
219
            delta[i] *= b_dot_b[m-1, 2*m-1]/b_dot_b[2*m-1, 2*m-1]

Martin Reinecke's avatar
Martin Reinecke committed
220
        for j in range(m):
Martin Reinecke's avatar
Martin Reinecke committed
221
            delta_b_b = sum([delta[l]*b_dot_b[m+j, l] for l in range(2*m+1)])
222
223
224
225
226
            beta = delta_b_b/b_dot_b[j, m+j]
            delta[j] += (alpha[j] - beta)

        return delta

theos's avatar
theos committed
227
    def add_new_point(self, x, gradient):
Matevz, Sraml (sraml)'s avatar
Matevz, Sraml (sraml) committed
228
229
        """Updates the s list and y list.

Martin Reinecke's avatar
Martin Reinecke committed
230
231
        Calculates the new position and gradient differences and enters them
        into the respective list.
Matevz, Sraml (sraml)'s avatar
Matevz, Sraml (sraml) committed
232
        """
Martin Reinecke's avatar
Martin Reinecke committed
233
234
235
        mmax = self.max_history_length
        self.s[self.k % mmax] = x - self.last_x
        self.y[self.k % mmax] = gradient - self.last_gradient
theos's avatar
theos committed
236

237
238
        self.last_x = x
        self.last_gradient = gradient
theos's avatar
theos committed
239

Martin Reinecke's avatar
fixes    
Martin Reinecke committed
240
        self.k += 1