vl_bfgs.py 7.66 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
Theo Steininger's avatar
Theo Steininger committed
13
#
Martin Reinecke's avatar
Martin Reinecke committed
14
# Copyright(C) 2013-2018 Max-Planck-Society
Theo Steininger's avatar
Theo Steininger committed
15 16 17
#
# NIFTy is being developed at the Max-Planck-Institut fuer Astrophysik
# and financially supported by the Studienstiftung des deutschen Volkes.
Theo Steininger's avatar
Theo Steininger committed
18

Martin Reinecke's avatar
Martin Reinecke committed
19 20 21
from __future__ import division
from builtins import range
from builtins import object
22
import numpy as np
23
from .descent_minimizer import DescentMinimizer
Martin Reinecke's avatar
Martin Reinecke committed
24
from .line_search_strong_wolfe import LineSearchStrongWolfe
Theo Steininger's avatar
Theo Steininger committed
25 26


27
class VL_BFGS(DescentMinimizer):
Martin Reinecke's avatar
Martin Reinecke committed
28 29 30 31 32 33 34 35 36 37 38 39 40 41
    """Implementation of the Vector-free L-BFGS minimization scheme.

    Find the descent direction by using the inverse Hessian.
    Instead of storing the whole matrix, it stores only the last few
    updates, which are used to do operations requiring the inverse
    Hessian product. The updates are represented in a new basis to optimize
    the algorithm.

    References
    ----------
    W. Chen, Z. Wang, J. Zhou, "Large-scale L-BFGS using MapReduce", 2014,
    Microsoft
    """

Martin Reinecke's avatar
Martin Reinecke committed
42 43
    def __init__(self, controller, line_searcher=LineSearchStrongWolfe(),
                 max_history_length=5):
Martin Reinecke's avatar
cleanup  
Martin Reinecke committed
44 45
        super(VL_BFGS, self).__init__(controller=controller,
                                      line_searcher=line_searcher)
46 47
        self.max_history_length = max_history_length

48
    def __call__(self, energy):
49
        self._information_store = None
50
        return super(VL_BFGS, self).__call__(energy)
51

Martin Reinecke's avatar
stage 1  
Martin Reinecke committed
52 53 54
    def reset(self):
        self._information_store = None

55
    def get_descent_direction(self, energy):
56 57
        x = energy.position
        gradient = energy.gradient
58 59 60 61
        # initialize the information store if it doesn't already exist
        try:
            self._information_store.add_new_point(x, gradient)
        except AttributeError:
Martin Reinecke's avatar
Martin Reinecke committed
62 63
            self._information_store = _InformationStore(
                self.max_history_length, x0=x, gradient=gradient)
64 65 66 67

        b = self._information_store.b
        delta = self._information_store.delta

68
        descent_direction = delta[0] * b[0]
Martin Reinecke's avatar
Martin Reinecke committed
69
        for i in range(1, len(delta)):
70
            descent_direction += delta[i] * b[i]
71

72
        return descent_direction
Theo Steininger's avatar
Theo Steininger committed
73 74


Martin Reinecke's avatar
Martin Reinecke committed
75
class _InformationStore(object):
Matevz, Sraml (sraml)'s avatar
Matevz, Sraml (sraml) committed
76
    """Class for storing a list of past updates.
77

Matevz, Sraml (sraml)'s avatar
Matevz, Sraml (sraml) committed
78 79
    Parameters
    ----------
Martin Reinecke's avatar
Martin Reinecke committed
80
    max_history_length : int
Matevz, Sraml (sraml)'s avatar
Matevz, Sraml (sraml) committed
81 82 83 84 85
        Maximum number of stored past updates.
    x0 : Field
        Initial position in variable space.
    gradient : Field
        Gradient at position x0.
86

Matevz, Sraml (sraml)'s avatar
Matevz, Sraml (sraml) committed
87 88
    Attributes
    ----------
Martin Reinecke's avatar
Martin Reinecke committed
89
    max_history_length : int
Matevz, Sraml (sraml)'s avatar
Matevz, Sraml (sraml) committed
90 91
        Maximum number of stored past updates.
    s : List
Martin Reinecke's avatar
Martin Reinecke committed
92
        Circular buffer of past position differences, which are Fields.
Matevz, Sraml (sraml)'s avatar
Matevz, Sraml (sraml) committed
93
    y : List
Martin Reinecke's avatar
Martin Reinecke committed
94
        Circular buffer of past gradient differences, which are Fields.
Matevz, Sraml (sraml)'s avatar
Matevz, Sraml (sraml) committed
95
    last_x : Field
Martin Reinecke's avatar
tweaks  
Martin Reinecke committed
96
        Latest position in variable space.
Matevz, Sraml (sraml)'s avatar
Matevz, Sraml (sraml) committed
97
    last_gradient : Field
Martin Reinecke's avatar
tweaks  
Martin Reinecke committed
98
        Gradient at latest position.
Martin Reinecke's avatar
Martin Reinecke committed
99
    k : int
Martin Reinecke's avatar
tweaks  
Martin Reinecke committed
100
        Number of updates that have taken place
Martin Reinecke's avatar
Martin Reinecke committed
101
    ss : numpy.ndarray
Martin Reinecke's avatar
Martin Reinecke committed
102
        2D circular buffer of scalar products between different elements of s.
Martin Reinecke's avatar
Martin Reinecke committed
103
    sy : numpy.ndarray
Martin Reinecke's avatar
Martin Reinecke committed
104
        2D circular buffer of scalar products between elements of s and y.
Martin Reinecke's avatar
Martin Reinecke committed
105
    yy : numpy.ndarray
Martin Reinecke's avatar
Martin Reinecke committed
106
        2D circular buffer of scalar products between different elements of y.
Matevz, Sraml (sraml)'s avatar
Matevz, Sraml (sraml) committed
107
    """
108 109
    def __init__(self, max_history_length, x0, gradient):
        self.max_history_length = max_history_length
110 111
        self.s = [None]*max_history_length
        self.y = [None]*max_history_length
112 113
        self.last_x = x0.copy()
        self.last_gradient = gradient.copy()
Theo Steininger's avatar
Theo Steininger committed
114
        self.k = 0
115

Martin Reinecke's avatar
Martin Reinecke committed
116
        mmax = max_history_length
Martin Reinecke's avatar
Martin Reinecke committed
117 118 119
        self.ss = np.empty((mmax, mmax), dtype=np.float64)
        self.sy = np.empty((mmax, mmax), dtype=np.float64)
        self.yy = np.empty((mmax, mmax), dtype=np.float64)
120 121 122

    @property
    def history_length(self):
Martin Reinecke's avatar
Martin Reinecke committed
123
        """Returns the number of currently stored updates."""
124 125 126 127
        return min(self.k, self.max_history_length)

    @property
    def b(self):
128 129
        """Combines s, y and gradient to form the new base vectors b.

Matevz, Sraml (sraml)'s avatar
Matevz, Sraml (sraml) committed
130 131
        Returns
        -------
Martin Reinecke's avatar
Martin Reinecke committed
132
        List
Matevz, Sraml (sraml)'s avatar
Matevz, Sraml (sraml) committed
133 134
            List of new basis vectors.
        """
135 136
        result = []
        m = self.history_length
Martin Reinecke's avatar
Martin Reinecke committed
137
        mmax = self.max_history_length
138

Martin Reinecke's avatar
Martin Reinecke committed
139
        for i in range(m):
Martin Reinecke's avatar
Martin Reinecke committed
140
            result.append(self.s[(self.k-m+i) % mmax])
141

Martin Reinecke's avatar
Martin Reinecke committed
142
        for i in range(m):
Martin Reinecke's avatar
Martin Reinecke committed
143
            result.append(self.y[(self.k-m+i) % mmax])
144 145 146 147 148 149 150

        result.append(self.last_gradient)

        return result

    @property
    def b_dot_b(self):
Matevz, Sraml (sraml)'s avatar
Matevz, Sraml (sraml) committed
151
        """Generates the (2m+1) * (2m+1) scalar matrix.
152

Matevz, Sraml (sraml)'s avatar
Matevz, Sraml (sraml) committed
153
        The i,j-th element of the matrix is a scalar product between the i-th
154 155
        and j-th base vector.

Matevz, Sraml (sraml)'s avatar
Matevz, Sraml (sraml) committed
156 157
        Returns
        -------
Martin Reinecke's avatar
Martin Reinecke committed
158
        numpy.ndarray
Matevz, Sraml (sraml)'s avatar
Matevz, Sraml (sraml) committed
159 160
            Scalar matrix.
        """
161
        m = self.history_length
Martin Reinecke's avatar
Martin Reinecke committed
162
        mmax = self.max_history_length
163 164 165
        k = self.k
        result = np.empty((2*m+1, 2*m+1), dtype=np.float)

Martin Reinecke's avatar
tweaks  
Martin Reinecke committed
166
        # update the stores
Martin Reinecke's avatar
Martin Reinecke committed
167
        k1 = (k-1) % mmax
Martin Reinecke's avatar
Martin Reinecke committed
168
        for i in range(m):
Martin Reinecke's avatar
Martin Reinecke committed
169
            kmi = (k-m+i) % mmax
Martin Reinecke's avatar
Martin Reinecke committed
170 171 172
            self.ss[kmi, k1] = self.ss[k1, kmi] = self.s[kmi].vdot(self.s[k1])
            self.yy[kmi, k1] = self.yy[k1, kmi] = self.y[kmi].vdot(self.y[k1])
            self.sy[kmi, k1] = self.s[kmi].vdot(self.y[k1])
Martin Reinecke's avatar
Martin Reinecke committed
173
        for j in range(m-1):
Martin Reinecke's avatar
Martin Reinecke committed
174 175
            kmj = (k-m+j) % mmax
            self.sy[k1, kmj] = self.s[k1].vdot(self.y[kmj])
Martin Reinecke's avatar
tweaks  
Martin Reinecke committed
176

Martin Reinecke's avatar
Martin Reinecke committed
177
        for i in range(m):
Martin Reinecke's avatar
Martin Reinecke committed
178
            kmi = (k-m+i) % mmax
Martin Reinecke's avatar
Martin Reinecke committed
179
            for j in range(m):
Martin Reinecke's avatar
Martin Reinecke committed
180
                kmj = (k-m+j) % mmax
Martin Reinecke's avatar
Martin Reinecke committed
181 182 183
                result[i, j] = self.ss[kmi, kmj]
                result[i, m+j] = result[m+j, i] = self.sy[kmi, kmj]
                result[m+i, m+j] = self.yy[kmi, kmj]
184

185
            sgrad_i = self.s[kmi].vdot(self.last_gradient)
Martin Reinecke's avatar
tweaks  
Martin Reinecke committed
186
            result[2*m, i] = result[i, 2*m] = sgrad_i
187

Martin Reinecke's avatar
fix  
Martin Reinecke committed
188
            ygrad_i = self.y[kmi].vdot(self.last_gradient)
Martin Reinecke's avatar
tweaks  
Martin Reinecke committed
189
            result[2*m, m+i] = result[m+i, 2*m] = ygrad_i
190

Martin Reinecke's avatar
tweaks  
Martin Reinecke committed
191
        result[2*m, 2*m] = self.last_gradient.norm()
192
        return result
Theo Steininger's avatar
Theo Steininger committed
193 194

    @property
195
    def delta(self):
Matevz, Sraml (sraml)'s avatar
Matevz, Sraml (sraml) committed
196
        """Calculates the new scalar coefficients (deltas).
197

Matevz, Sraml (sraml)'s avatar
Matevz, Sraml (sraml) committed
198 199
        Returns
        -------
Martin Reinecke's avatar
Martin Reinecke committed
200
        List
Matevz, Sraml (sraml)'s avatar
Matevz, Sraml (sraml) committed
201 202
            List of the new scalar coefficients (deltas).
        """
203 204 205 206 207 208 209 210
        m = self.history_length
        b_dot_b = self.b_dot_b

        delta = np.zeros(2*m+1, dtype=np.float)
        delta[2*m] = -1

        alpha = np.empty(m, dtype=np.float)

Martin Reinecke's avatar
Martin Reinecke committed
211 212
        for j in range(m-1, -1, -1):
            delta_b_b = sum([delta[l] * b_dot_b[l, j] for l in range(2*m+1)])
213 214 215
            alpha[j] = delta_b_b/b_dot_b[j, m+j]
            delta[m+j] -= alpha[j]

Martin Reinecke's avatar
Martin Reinecke committed
216
        for i in range(2*m+1):
217 218
            delta[i] *= b_dot_b[m-1, 2*m-1]/b_dot_b[2*m-1, 2*m-1]

Martin Reinecke's avatar
Martin Reinecke committed
219
        for j in range(m):
Martin Reinecke's avatar
Martin Reinecke committed
220
            delta_b_b = sum([delta[l]*b_dot_b[m+j, l] for l in range(2*m+1)])
221 222 223 224 225
            beta = delta_b_b/b_dot_b[j, m+j]
            delta[j] += (alpha[j] - beta)

        return delta

Theo Steininger's avatar
Theo Steininger committed
226
    def add_new_point(self, x, gradient):
Matevz, Sraml (sraml)'s avatar
Matevz, Sraml (sraml) committed
227 228
        """Updates the s list and y list.

Martin Reinecke's avatar
Martin Reinecke committed
229 230
        Calculates the new position and gradient differences and enters them
        into the respective list.
Matevz, Sraml (sraml)'s avatar
Matevz, Sraml (sraml) committed
231
        """
Martin Reinecke's avatar
Martin Reinecke committed
232 233 234
        mmax = self.max_history_length
        self.s[self.k % mmax] = x - self.last_x
        self.y[self.k % mmax] = gradient - self.last_gradient
Theo Steininger's avatar
Theo Steininger committed
235

236 237
        self.last_x = x.copy()
        self.last_gradient = gradient.copy()
Theo Steininger's avatar
Theo Steininger committed
238

Martin Reinecke's avatar
fixes  
Martin Reinecke committed
239
        self.k += 1