gp.hpp 16.8 KB
Newer Older
1
2
#ifndef LIMBO_MODEL_GP_HPP
#define LIMBO_MODEL_GP_HPP
3
4

#include <cassert>
5
#include <iostream>
6
#include <limits>
7
#include <vector>
8

9
#include <Eigen/Cholesky>
10
11
12
#include <Eigen/Core>
#include <Eigen/LU>

13
#include <limbo/model/gp/no_lf_opt.hpp>
14

15
namespace limbo {
16
    namespace model {
Jean-Baptiste Mouret's avatar
Jean-Baptiste Mouret committed
17
18
19
20
21
        /// @ingroup model
        /// A classic Gaussian process.
        /// It is parametrized by:
        /// - a mean function
        /// - [optionnal] an optimizer for the hyper-parameters
22
        template <typename Params, typename KernelFunction, typename MeanFunction, class HyperParamsOptimizer = gp::NoLFOpt<Params>>
23
        class GP {
24
        public:
25
            /// useful because the model might be created before knowing anything about the process
26
            GP() : _dim_in(-1), _dim_out(-1) {}
27
28
29

            /// useful because the model might be created  before having samples
            GP(int dim_in, int dim_out)
30
                : _dim_in(dim_in), _dim_out(dim_out), _kernel_function(dim_in), _mean_function(dim_out) {}
31

32
            /// Compute the GP from samples, observation, noise. [optional: blacklisted samples]. This call needs to be explicit!
33
34
35
36
37
38
39
40
            void compute(const std::vector<Eigen::VectorXd>& samples,
                const std::vector<Eigen::VectorXd>& observations, double noise,
                const std::vector<Eigen::VectorXd>& bl_samples = std::vector<Eigen::VectorXd>())
            {
                assert(samples.size() != 0);
                assert(observations.size() != 0);
                assert(samples.size() == observations.size());

41
42
43
                _dim_in = samples[0].size();
                _kernel_function = KernelFunction(_dim_in); // the cost of building a functor should be relatively low
                
44

45
46
                _dim_out = observations[0].size();
                _mean_function = MeanFunction(_dim_out); // the cost of building a functor should be relatively low
47

48
49
50
51
52
53
54
                _samples = samples;

                _observations.resize(observations.size(), _dim_out);
                for (int i = 0; i < _observations.rows(); ++i)
                    _observations.row(i) = observations[i];

                _mean_observation = _observations.colwise().mean();
55
56
57

                _noise = noise;

58
59
60
61
                _bl_samples = bl_samples;

                this->_compute_obs_mean();
                this->_compute_kernel(false);
62

63
64
                if (!_bl_samples.empty())
                    this->_compute_bl_kernel();
65

66
67
                HyperParamsOptimizer()(*this);
            }
68

69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
            void add_sample(const Eigen::VectorXd& sample, const Eigen::VectorXd& observation, double noise)
            {
                if (_samples.empty()) {
                    if (_bl_samples.empty()) {
                        _dim_in = sample.size();
                        _kernel_function = KernelFunction(_dim_in); // the cost of building a functor should be relatively low
                    } else {
                        assert(sample.size() == _dim_in);
                    }

                    _dim_out = observation.size();
                    _mean_function = MeanFunction(_dim_out); // the cost of building a functor should be relatively low
                } else {
                    assert(sample.size() == _dim_in);
                    assert(observation.size() == _dim_out);
84
85
                }

86
87
88
89
90
91
92
93
94
95
96
97
98
99
                _samples.push_back(sample);

                _observations.conservativeResize(_observations.rows() + 1, _dim_out);
                _observations.bottomRows<1>() = observation.transpose();

                _mean_observation = _observations.colwise().mean();

                _noise = noise;

                this->_compute_obs_mean();
                this->_compute_kernel(true);

                if (!_bl_samples.empty())
                    this->_compute_bl_kernel();
100
101
102
103

                HyperParamsOptimizer()(*this);
            }

104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
            void add_bl_sample(const Eigen::VectorXd& bl_sample, double noise)
            {
                if (_samples.empty() && _bl_samples.empty()) {
                    _dim_in = bl_sample.size();
                    _kernel_function = KernelFunction(_dim_in); // the cost of building a functor should be relatively low
                } else {
                    assert(bl_sample.size() == _dim_in);
                }

                _bl_samples.push_back(bl_sample);
                _noise = noise;

                if (!_samples.empty()) {
                    this->_compute_bl_kernel();
                    HyperParamsOptimizer()(*this);
                }
            }

122
            /**
123
124
125
126
             \\rst
             return :math:`\\mu`, :math:`\\sigma^2` (unormalized). If there is no sample, return the value according to the mean function. Using this method instead of separate calls to mu() and sigma() is more efficient because some computations are shared between mu() and sigma().
             \\endrst
	  		*/
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
            std::tuple<Eigen::VectorXd, double> query(const Eigen::VectorXd& v) const
            {
                if (_samples.size() == 0 && _bl_samples.size() == 0)
                    return std::make_tuple(_mean_function(v, *this),
                        sqrt(_kernel_function(v, v)));

                if (_samples.size() == 0)
                    return std::make_tuple(_mean_function(v, *this),
                        _sigma(v, _compute_k_bl(v, _compute_k(v))));

                Eigen::VectorXd k = _compute_k(v);
                return std::make_tuple(_mu(v, k), _sigma(v, _compute_k_bl(v, k)));
            }

            /**
142
143
144
145
             \\rst
             return :math:`\\mu` (unormalized). If there is no sample, return the value according to the mean function.
             \\endrst
	  		*/
146
147
148
149
150
151
152
153
            Eigen::VectorXd mu(const Eigen::VectorXd& v) const
            {
                if (_samples.size() == 0)
                    return _mean_function(v, *this);
                return _mu(v, _compute_k(v));
            }

            /**
154
155
156
157
             \\rst
             return :math:`\\sigma^2` (unormalized). If there is no sample, return the value according to the mean function.
             \\endrst
	  		*/
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
            double sigma(const Eigen::VectorXd& v) const
            {
                if (_samples.size() == 0 && _bl_samples.size() == 0)
                    return sqrt(_kernel_function(v, v));
                return _sigma(v, _compute_k_bl(v, _compute_k(v)));
            }

            /// return the number of dimensions of the input
            int dim_in() const
            {
                assert(_dim_in != -1); // need to compute first !
                return _dim_in;
            }

            /// return the number of dimensions of the output
            int dim_out() const
            {
                assert(_dim_out != -1); // need to compute first !
                return _dim_out;
            }

            const KernelFunction& kernel_function() const { return _kernel_function; }

181
            KernelFunction& kernel_function() { return _kernel_function; }
182
183
184
185
186

            const MeanFunction& mean_function() const { return _mean_function; }

            MeanFunction& mean_function() { return _mean_function; }

Jean-Baptiste Mouret's avatar
Jean-Baptiste Mouret committed
187
            /// return the maximum observation (only call this if the output of the GP is of dimension 1)
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
            Eigen::VectorXd max_observation() const
            {
                if (_observations.cols() > 1)
                    std::cout << "WARNING max_observation with multi dimensional "
                                 "observations doesn't make sense"
                              << std::endl;
                return _observations.maxCoeff();
            }

            /// return the mean observation (only call this if the output of the GP is of dimension 1)
            Eigen::VectorXd mean_observation() const
            {
                // TO-DO: Check if _dim_out is correct?!
                return _samples.size() > 0 ? _mean_observation
                                           : Eigen::VectorXd::Zero(_dim_out);
            }

            const Eigen::MatrixXd& mean_vector() const { return _mean_vector; }

            const Eigen::MatrixXd& obs_mean() const { return _obs_mean; }

            /// return the number of samples used to compute the GP
            int nb_samples() const { return _samples.size(); }

            /** return the number of blacklisted samples used to compute the GP
213
	     \\rst
214
	     For the blacklist concept, see the Limbo-specific concept guide.
215
	     \\endrst
216
	     */
217
218
            int nb_bl_samples() const { return _bl_samples.size(); }

219
220
            ///  recomputes the GP
            void recompute(bool update_obs_mean)
221
            {
222
223
224
225
226
227
228
229
230
                assert(!_samples.empty());

                if (update_obs_mean)
                    this->_compute_obs_mean(); // ORDER MATTERS

                this->_compute_kernel(false);

                if (!_bl_samples.empty())
                    this->_compute_bl_kernel();
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
            }

            /// return the likelihood (do not compute it!)
            double get_lik() const { return _lik; }

            /// set the likelihood (you need to compute it from outside!)
            void set_lik(const double& lik) { _lik = lik; }

            /// LLT matrix (from Cholesky decomposition)
            //const Eigen::LLT<Eigen::MatrixXd>& llt() const { return _llt; }
            const Eigen::MatrixXd& matrixL() const { return _matrixL; }

            const Eigen::MatrixXd& alpha() const { return _alpha; }

            /// return the list of samples that have been tested so far
            const std::vector<Eigen::VectorXd>& samples() const { return _samples; }

        protected:
            int _dim_in;
            int _dim_out;

            KernelFunction _kernel_function;
            MeanFunction _mean_function;

            std::vector<Eigen::VectorXd> _samples;
            Eigen::MatrixXd _observations;
            std::vector<Eigen::VectorXd> _bl_samples; // black listed samples
            Eigen::MatrixXd _mean_vector;
            Eigen::MatrixXd _obs_mean;

            double _noise;
            Eigen::MatrixXd _alpha;
            Eigen::VectorXd _mean_observation;

            Eigen::MatrixXd _kernel;

            // Eigen::MatrixXd _inverted_kernel;

            Eigen::MatrixXd _matrixL;
            Eigen::MatrixXd _inv_bl_kernel;

            double _lik;

            void _compute_obs_mean()
            {
                _mean_vector.resize(_samples.size(), _dim_out);
                for (int i = 0; i < _mean_vector.rows(); i++)
                    _mean_vector.row(i) = _mean_function(_samples[i], *this);
                _obs_mean = _observations - _mean_vector;
            }

            void _compute_kernel(bool incremental = false)
            {
Antoine Cully's avatar
Antoine Cully committed
284
	      
285
286
287
                // Check if new kernel is the same as before
                if (!incremental) //_kernel.cols()==0 || !kernel.block(0,0,_kernel.rows(),_kernel.cols()).isApprox(_kernel, 1e-10)) // incremental LLT is impossible
                {
Antoine Cully's avatar
Antoine Cully committed
288
		  
289
		          _kernel.resize(_samples.size(), _samples.size());
Antoine Cully's avatar
Antoine Cully committed
290
		  
291
292
293
294
                    // O(n^2) [should be negligible]
                    for (size_t i = 0; i < _samples.size(); i++)
                        for (size_t j = 0; j < _samples.size(); ++j)
                            _kernel(i, j) = _kernel_function(_samples[i], _samples[j]) + ((i == j) ? _noise : 0); // noise only on the diagonal
Antoine Cully's avatar
Antoine Cully committed
295
296
		  
		  
297
298
299
                    // O(n^3)
                    //  _inverted_kernel = _kernel.inverse();
                    _matrixL = Eigen::LLT<Eigen::MatrixXd>(_kernel).matrixL();
Antoine Cully's avatar
Antoine Cully committed
300
301
		  
		    
302
303
304
305
                }
                else //incremental LLT
                //This part of the code is inpired from the Bayesopt Library (cholesky_add_row function). However, the mathematical fundations can be easily retrieved by detailling the equations of the extended L matrix that produces the desired kernel.
                {
Antoine Cully's avatar
Antoine Cully committed
306
307
308
309
		  
		  size_t n = _samples.size();
		  _kernel.conservativeResize(n, n);
		  
310

Antoine Cully's avatar
Antoine Cully committed
311
		  
312
313
314
315
                    for (size_t i = 0; i < n; ++i) {
                        _kernel(i, n - 1) = _kernel_function(_samples[i], _samples[n - 1]) + ((i == n - 1) ? _noise : 0); // noise only on the diagonal
                        _kernel(n - 1, i) = _kernel(i, n - 1);
                    }
Antoine Cully's avatar
Antoine Cully committed
316
317
318
		  
		    _matrixL.conservativeResizeLike(Eigen::MatrixXd::Zero(n, n));
		    
319
320
321
322
323
324
325
326
                    double L_j;
                    for (size_t j = 0; j < n - 1; ++j) {
                        L_j = _kernel(n - 1, j) - (_matrixL.block(j, 0, 1, j) * _matrixL.block(n - 1, 0, 1, j).transpose())(0, 0);
                        _matrixL(n - 1, j) = (L_j) / _matrixL(j, j);
                    }

                    L_j = _kernel(n - 1, n - 1) - (_matrixL.block(n - 1, 0, 1, n - 1) * _matrixL.block(n - 1, 0, 1, n - 1).transpose())(0, 0);
                    _matrixL(n - 1, n - 1) = sqrt(L_j);
Antoine Cully's avatar
Antoine Cully committed
327
		  
328
329
                }

Antoine Cully's avatar
Antoine Cully committed
330
331

 
332
333
334
                // alpha = K^{-1} * this->_obs_mean;
                _alpha = _matrixL.template triangularView<Eigen::Lower>().solve(_obs_mean);
                _matrixL.template triangularView<Eigen::Lower>().adjoint().solveInPlace(_alpha); //can probably be improved by avoiding to generate the view twice
Antoine Cully's avatar
Antoine Cully committed
335
						  
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
            }

            void _compute_bl_kernel()
            {
                Eigen::MatrixXd A1 = Eigen::MatrixXd::Identity(this->_samples.size(), this->_samples.size());
                _matrixL.template triangularView<Eigen::Lower>().solveInPlace(A1);
                _matrixL.template triangularView<Eigen::Lower>().transpose().solveInPlace(A1);

                _inv_bl_kernel.resize(_samples.size() + _bl_samples.size(),
                    _samples.size() + _bl_samples.size());

                Eigen::MatrixXd B(_samples.size(), _bl_samples.size());
                for (size_t i = 0; i < _samples.size(); i++)
                    for (size_t j = 0; j < _bl_samples.size(); ++j)
                        B(i, j) = _kernel_function(_samples[i], _bl_samples[j]);

                Eigen::MatrixXd D(_bl_samples.size(), _bl_samples.size());
                for (size_t i = 0; i < _bl_samples.size(); i++)
                    for (size_t j = 0; j < _bl_samples.size(); ++j)
                        D(i, j) = _kernel_function(_bl_samples[i], _bl_samples[j]) + ((i == j) ? _noise : 0);

                Eigen::MatrixXd comA = (D - B.transpose() * A1 * B);
                Eigen::LLT<Eigen::MatrixXd> llt_bl(comA);
                Eigen::MatrixXd comA1 = Eigen::MatrixXd::Identity(_bl_samples.size(), _bl_samples.size());
                llt_bl.matrixL().solveInPlace(comA1);
                llt_bl.matrixL().transpose().solveInPlace(comA1);

                // fill the matrix block wise
                _inv_bl_kernel.block(0, 0, _samples.size(), _samples.size()) = A1 + A1 * B * comA1 * B.transpose() * A1;
                _inv_bl_kernel.block(0, _samples.size(), _samples.size(),
                    _bl_samples.size())
                    = -A1 * B * comA1;
                _inv_bl_kernel.block(_samples.size(), 0, _bl_samples.size(),
                    _samples.size())
                    = _inv_bl_kernel.block(0, _samples.size(), _samples.size(),
                                        _bl_samples.size())
                          .transpose();
                _inv_bl_kernel.block(_samples.size(), _samples.size(), _bl_samples.size(),
                    _bl_samples.size())
                    = comA1;
            }

            Eigen::VectorXd _mu(const Eigen::VectorXd& v, const Eigen::VectorXd& k) const
            {
                return (k.transpose() * _alpha) + _mean_function(v, *this).transpose();
            }

            double _sigma(const Eigen::VectorXd& v, const Eigen::VectorXd& k) const
            {
                double res;
                if (_bl_samples.size() == 0) {
                    Eigen::VectorXd z = _matrixL.triangularView<Eigen::Lower>().solve(k);
                    res = _kernel_function(v, v) - z.dot(z);
                }
                else {
                    res = _kernel_function(v, v) - k.transpose() * _inv_bl_kernel * k;
                }

                return (res <= std::numeric_limits<double>::epsilon()) ? 0 : res;
            }

            Eigen::VectorXd _compute_k(const Eigen::VectorXd& v) const
            {
                Eigen::VectorXd k(_samples.size());
                for (int i = 0; i < k.size(); i++)
                    k[i] = _kernel_function(_samples[i], v);
                return k;
            }

            Eigen::VectorXd _compute_k_bl(const Eigen::VectorXd& v,
                const Eigen::VectorXd& k) const
            {
                if (_bl_samples.size() == 0) {
                    return k;
                }

                Eigen::VectorXd k_bl(_samples.size() + _bl_samples.size());

                k_bl.head(_samples.size()) = k;
                for (size_t i = 0; i < _bl_samples.size(); i++)
                    k_bl[i + this->_samples.size()] = this->_kernel_function(_bl_samples[i], v);
                return k_bl;
            }
        };
    }
}
422

423
#endif