vl_bfgs.py 8.05 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
Theo Steininger's avatar
Theo Steininger committed
13 14 15 16 17
#
# Copyright(C) 2013-2017 Max-Planck-Society
#
# NIFTy is being developed at the Max-Planck-Institut fuer Astrophysik
# and financially supported by the Studienstiftung des deutschen Volkes.
Theo Steininger's avatar
Theo Steininger committed
18

Martin Reinecke's avatar
Martin Reinecke committed
19 20 21
from __future__ import division
from builtins import range
from builtins import object
22
import numpy as np
23
from .descent_minimizer import DescentMinimizer
Martin Reinecke's avatar
Martin Reinecke committed
24
from .line_search_strong_wolfe import LineSearchStrongWolfe
Theo Steininger's avatar
Theo Steininger committed
25 26


27
class VL_BFGS(DescentMinimizer):
Martin Reinecke's avatar
Martin Reinecke committed
28 29
    def __init__(self, controller, line_searcher=LineSearchStrongWolfe(),
                 max_history_length=5):
Martin Reinecke's avatar
cleanup  
Martin Reinecke committed
30 31
        super(VL_BFGS, self).__init__(controller=controller,
                                      line_searcher=line_searcher)
32 33
        self.max_history_length = max_history_length

34
    def __call__(self, energy):
35
        self._information_store = None
36
        return super(VL_BFGS, self).__call__(energy)
37

38
    def get_descent_direction(self, energy):
39 40 41 42 43 44 45 46
        """Implementation of the Vector-free L-BFGS minimization scheme.

        Find the descent direction by using the inverse Hessian.
        Instead of storing the whole matrix, it stores only the last few
        updates, which are used to do operations requiring the inverse
        Hessian product. The updates are represented in a new basis to optimize
        the algorithm.

Matevz, Sraml (sraml)'s avatar
Matevz, Sraml (sraml) committed
47 48
        Parameters
        ----------
49 50 51 52
        energy : Energy
            An instance of the Energy class which shall be minized. The
            position of `energy` is used as the starting point of minization.

Matevz, Sraml (sraml)'s avatar
Matevz, Sraml (sraml) committed
53 54
        Returns
        -------
55
        descent_direction : Field
56 57 58 59 60 61 62
            Returns the descent direction.

        References
        ----------
        W. Chen, Z. Wang, J. Zhou, "Large-scale L-BFGS using MapReduce", 2014,
        Microsoft

Matevz, Sraml (sraml)'s avatar
Matevz, Sraml (sraml) committed
63
        """
64

65 66
        x = energy.position
        gradient = energy.gradient
67 68 69 70 71 72 73 74 75 76 77
        # initialize the information store if it doesn't already exist
        try:
            self._information_store.add_new_point(x, gradient)
        except AttributeError:
            self._information_store = InformationStore(self.max_history_length,
                                                       x0=x,
                                                       gradient=gradient)

        b = self._information_store.b
        delta = self._information_store.delta

78
        descent_direction = delta[0] * b[0]
Martin Reinecke's avatar
Martin Reinecke committed
79
        for i in range(1, len(delta)):
80
            descent_direction += delta[i] * b[i]
81

82
        return descent_direction
Theo Steininger's avatar
Theo Steininger committed
83 84 85


class InformationStore(object):
Matevz, Sraml (sraml)'s avatar
Matevz, Sraml (sraml) committed
86
    """Class for storing a list of past updates.
87

Matevz, Sraml (sraml)'s avatar
Matevz, Sraml (sraml) committed
88 89
    Parameters
    ----------
Martin Reinecke's avatar
Martin Reinecke committed
90
    max_history_length : int
Matevz, Sraml (sraml)'s avatar
Matevz, Sraml (sraml) committed
91 92 93 94 95
        Maximum number of stored past updates.
    x0 : Field
        Initial position in variable space.
    gradient : Field
        Gradient at position x0.
96

Matevz, Sraml (sraml)'s avatar
Matevz, Sraml (sraml) committed
97 98
    Attributes
    ----------
Martin Reinecke's avatar
Martin Reinecke committed
99
    max_history_length : int
Matevz, Sraml (sraml)'s avatar
Matevz, Sraml (sraml) committed
100 101
        Maximum number of stored past updates.
    s : List
Martin Reinecke's avatar
Martin Reinecke committed
102
        Circular buffer of past position differences, which are Fields.
Matevz, Sraml (sraml)'s avatar
Matevz, Sraml (sraml) committed
103
    y : List
Martin Reinecke's avatar
Martin Reinecke committed
104
        Circular buffer of past gradient differences, which are Fields.
Matevz, Sraml (sraml)'s avatar
Matevz, Sraml (sraml) committed
105
    last_x : Field
Martin Reinecke's avatar
tweaks  
Martin Reinecke committed
106
        Latest position in variable space.
Matevz, Sraml (sraml)'s avatar
Matevz, Sraml (sraml) committed
107
    last_gradient : Field
Martin Reinecke's avatar
tweaks  
Martin Reinecke committed
108
        Gradient at latest position.
Martin Reinecke's avatar
Martin Reinecke committed
109
    k : int
Martin Reinecke's avatar
tweaks  
Martin Reinecke committed
110
        Number of updates that have taken place
Martin Reinecke's avatar
Martin Reinecke committed
111
    ss : numpy.ndarray
Martin Reinecke's avatar
Martin Reinecke committed
112
        2D circular buffer of scalar products between different elements of s.
Martin Reinecke's avatar
Martin Reinecke committed
113
    sy : numpy.ndarray
Martin Reinecke's avatar
Martin Reinecke committed
114
        2D circular buffer of scalar products between elements of s and y.
Martin Reinecke's avatar
Martin Reinecke committed
115
    yy : numpy.ndarray
Martin Reinecke's avatar
Martin Reinecke committed
116
        2D circular buffer of scalar products between different elements of y.
Matevz, Sraml (sraml)'s avatar
Matevz, Sraml (sraml) committed
117
    """
118 119
    def __init__(self, max_history_length, x0, gradient):
        self.max_history_length = max_history_length
120 121
        self.s = [None]*max_history_length
        self.y = [None]*max_history_length
122 123
        self.last_x = x0.copy()
        self.last_gradient = gradient.copy()
Theo Steininger's avatar
Theo Steininger committed
124
        self.k = 0
125

Martin Reinecke's avatar
Martin Reinecke committed
126
        mmax = max_history_length
Martin Reinecke's avatar
Martin Reinecke committed
127 128 129
        self.ss = np.empty((mmax, mmax), dtype=np.float64)
        self.sy = np.empty((mmax, mmax), dtype=np.float64)
        self.yy = np.empty((mmax, mmax), dtype=np.float64)
130 131 132

    @property
    def history_length(self):
Martin Reinecke's avatar
Martin Reinecke committed
133
        """Returns the number of currently stored updates."""
134 135 136 137
        return min(self.k, self.max_history_length)

    @property
    def b(self):
138 139
        """Combines s, y and gradient to form the new base vectors b.

Matevz, Sraml (sraml)'s avatar
Matevz, Sraml (sraml) committed
140 141
        Returns
        -------
Martin Reinecke's avatar
Martin Reinecke committed
142
        List
Matevz, Sraml (sraml)'s avatar
Matevz, Sraml (sraml) committed
143 144
            List of new basis vectors.
        """
145 146
        result = []
        m = self.history_length
Martin Reinecke's avatar
Martin Reinecke committed
147
        mmax = self.max_history_length
148

Martin Reinecke's avatar
Martin Reinecke committed
149
        for i in range(m):
Martin Reinecke's avatar
Martin Reinecke committed
150
            result.append(self.s[(self.k-m+i) % mmax])
151

Martin Reinecke's avatar
Martin Reinecke committed
152
        for i in range(m):
Martin Reinecke's avatar
Martin Reinecke committed
153
            result.append(self.y[(self.k-m+i) % mmax])
154 155 156 157 158 159 160

        result.append(self.last_gradient)

        return result

    @property
    def b_dot_b(self):
Matevz, Sraml (sraml)'s avatar
Matevz, Sraml (sraml) committed
161
        """Generates the (2m+1) * (2m+1) scalar matrix.
162

Matevz, Sraml (sraml)'s avatar
Matevz, Sraml (sraml) committed
163
        The i,j-th element of the matrix is a scalar product between the i-th
164 165
        and j-th base vector.

Matevz, Sraml (sraml)'s avatar
Matevz, Sraml (sraml) committed
166 167
        Returns
        -------
Martin Reinecke's avatar
Martin Reinecke committed
168
        numpy.ndarray
Matevz, Sraml (sraml)'s avatar
Matevz, Sraml (sraml) committed
169 170
            Scalar matrix.
        """
171
        m = self.history_length
Martin Reinecke's avatar
Martin Reinecke committed
172
        mmax = self.max_history_length
173 174 175
        k = self.k
        result = np.empty((2*m+1, 2*m+1), dtype=np.float)

Martin Reinecke's avatar
tweaks  
Martin Reinecke committed
176
        # update the stores
Martin Reinecke's avatar
Martin Reinecke committed
177
        k1 = (k-1) % mmax
Martin Reinecke's avatar
Martin Reinecke committed
178
        for i in range(m):
Martin Reinecke's avatar
Martin Reinecke committed
179
            kmi = (k-m+i) % mmax
Martin Reinecke's avatar
Martin Reinecke committed
180 181 182
            self.ss[kmi, k1] = self.ss[k1, kmi] = self.s[kmi].vdot(self.s[k1])
            self.yy[kmi, k1] = self.yy[k1, kmi] = self.y[kmi].vdot(self.y[k1])
            self.sy[kmi, k1] = self.s[kmi].vdot(self.y[k1])
Martin Reinecke's avatar
Martin Reinecke committed
183
        for j in range(m-1):
Martin Reinecke's avatar
Martin Reinecke committed
184 185
            kmj = (k-m+j) % mmax
            self.sy[k1, kmj] = self.s[k1].vdot(self.y[kmj])
Martin Reinecke's avatar
tweaks  
Martin Reinecke committed
186

Martin Reinecke's avatar
Martin Reinecke committed
187
        for i in range(m):
Martin Reinecke's avatar
Martin Reinecke committed
188
            kmi = (k-m+i) % mmax
Martin Reinecke's avatar
Martin Reinecke committed
189
            for j in range(m):
Martin Reinecke's avatar
Martin Reinecke committed
190
                kmj = (k-m+j) % mmax
Martin Reinecke's avatar
Martin Reinecke committed
191 192 193
                result[i, j] = self.ss[kmi, kmj]
                result[i, m+j] = result[m+j, i] = self.sy[kmi, kmj]
                result[m+i, m+j] = self.yy[kmi, kmj]
194

195
            sgrad_i = self.s[kmi].vdot(self.last_gradient)
Martin Reinecke's avatar
tweaks  
Martin Reinecke committed
196
            result[2*m, i] = result[i, 2*m] = sgrad_i
197

Martin Reinecke's avatar
fix  
Martin Reinecke committed
198
            ygrad_i = self.y[kmi].vdot(self.last_gradient)
Martin Reinecke's avatar
tweaks  
Martin Reinecke committed
199
            result[2*m, m+i] = result[m+i, 2*m] = ygrad_i
200

Martin Reinecke's avatar
tweaks  
Martin Reinecke committed
201
        result[2*m, 2*m] = self.last_gradient.norm()
202
        return result
Theo Steininger's avatar
Theo Steininger committed
203 204

    @property
205
    def delta(self):
Matevz, Sraml (sraml)'s avatar
Matevz, Sraml (sraml) committed
206
        """Calculates the new scalar coefficients (deltas).
207

Matevz, Sraml (sraml)'s avatar
Matevz, Sraml (sraml) committed
208 209
        Returns
        -------
Martin Reinecke's avatar
Martin Reinecke committed
210
        List
Matevz, Sraml (sraml)'s avatar
Matevz, Sraml (sraml) committed
211 212
            List of the new scalar coefficients (deltas).
        """
213 214 215 216 217 218 219 220
        m = self.history_length
        b_dot_b = self.b_dot_b

        delta = np.zeros(2*m+1, dtype=np.float)
        delta[2*m] = -1

        alpha = np.empty(m, dtype=np.float)

Martin Reinecke's avatar
Martin Reinecke committed
221 222
        for j in range(m-1, -1, -1):
            delta_b_b = sum([delta[l] * b_dot_b[l, j] for l in range(2*m+1)])
223 224 225
            alpha[j] = delta_b_b/b_dot_b[j, m+j]
            delta[m+j] -= alpha[j]

Martin Reinecke's avatar
Martin Reinecke committed
226
        for i in range(2*m+1):
227 228
            delta[i] *= b_dot_b[m-1, 2*m-1]/b_dot_b[2*m-1, 2*m-1]

Martin Reinecke's avatar
Martin Reinecke committed
229
        for j in range(m):
Martin Reinecke's avatar
Martin Reinecke committed
230
            delta_b_b = sum([delta[l]*b_dot_b[m+j, l] for l in range(2*m+1)])
231 232 233 234 235
            beta = delta_b_b/b_dot_b[j, m+j]
            delta[j] += (alpha[j] - beta)

        return delta

Theo Steininger's avatar
Theo Steininger committed
236
    def add_new_point(self, x, gradient):
Matevz, Sraml (sraml)'s avatar
Matevz, Sraml (sraml) committed
237 238
        """Updates the s list and y list.

Martin Reinecke's avatar
Martin Reinecke committed
239 240
        Calculates the new position and gradient differences and enters them
        into the respective list.
Matevz, Sraml (sraml)'s avatar
Matevz, Sraml (sraml) committed
241
        """
Martin Reinecke's avatar
Martin Reinecke committed
242 243 244
        mmax = self.max_history_length
        self.s[self.k % mmax] = x - self.last_x
        self.y[self.k % mmax] = gradient - self.last_gradient
Theo Steininger's avatar
Theo Steininger committed
245

246 247
        self.last_x = x.copy()
        self.last_gradient = gradient.copy()
Theo Steininger's avatar
Theo Steininger committed
248

Martin Reinecke's avatar
fixes  
Martin Reinecke committed
249
        self.k += 1