gp.hpp 17.6 KB
Newer Older
1
2
#ifndef LIMBO_MODEL_GP_HPP
#define LIMBO_MODEL_GP_HPP
3
4

#include <cassert>
5
#include <iostream>
6
#include <limits>
7
#include <vector>
8

9
#include <Eigen/Cholesky>
10
11
12
#include <Eigen/Core>
#include <Eigen/LU>

13
#include <limbo/model/gp/no_lf_opt.hpp>
14
#include <limbo/tools.hpp>
15

16
namespace limbo {
17
    namespace model {
Jean-Baptiste Mouret's avatar
Jean-Baptiste Mouret committed
18
19
20
21
22
        /// @ingroup model
        /// A classic Gaussian process.
        /// It is parametrized by:
        /// - a mean function
        /// - [optionnal] an optimizer for the hyper-parameters
23
        template <typename Params, typename KernelFunction, typename MeanFunction, class HyperParamsOptimizer = gp::NoLFOpt<Params>>
24
        class GP {
25
        public:
26
            /// useful because the model might be created before knowing anything about the process
27
            GP() : _dim_in(-1), _dim_out(-1) {}
28
29
30

            /// useful because the model might be created  before having samples
            GP(int dim_in, int dim_out)
31
                : _dim_in(dim_in), _dim_out(dim_out), _kernel_function(dim_in), _mean_function(dim_out) {}
32

33
            /// Compute the GP from samples, observation, noise. [optional: blacklisted samples]. This call needs to be explicit!
34
            void compute(const std::vector<Eigen::VectorXd>& samples,
Antoine Cully's avatar
Antoine Cully committed
35
36
37
38
                const std::vector<Eigen::VectorXd>& observations,
                const Eigen::VectorXd& noises,
                const std::vector<Eigen::VectorXd>& bl_samples = std::vector<Eigen::VectorXd>(),
                const Eigen::VectorXd& noises_bl = Eigen::VectorXd())
39
40
41
42
            {
                assert(samples.size() != 0);
                assert(observations.size() != 0);
                assert(samples.size() == observations.size());
Konstantinos Chatzilygeroudis's avatar
Konstantinos Chatzilygeroudis committed
43
                assert(bl_samples.size() == (unsigned int)noises_bl.size());
44

45
46
                _dim_in = samples[0].size();
                _kernel_function = KernelFunction(_dim_in); // the cost of building a functor should be relatively low
47

48
49
                _dim_out = observations[0].size();
                _mean_function = MeanFunction(_dim_out); // the cost of building a functor should be relatively low
50

51
52
53
54
55
56
57
                _samples = samples;

                _observations.resize(observations.size(), _dim_out);
                for (int i = 0; i < _observations.rows(); ++i)
                    _observations.row(i) = observations[i];

                _mean_observation = _observations.colwise().mean();
58

Antoine Cully's avatar
Antoine Cully committed
59
60
                _noises = noises;
                _noises_bl = noises_bl;
61

62
63
64
                _bl_samples = bl_samples;

                this->_compute_obs_mean();
65
                this->_compute_full_kernel();
66

67
68
                if (!_bl_samples.empty())
                    this->_compute_bl_kernel();
69
            }
70

71
            /// Do not forget to call this if you use hyper-prameters optimization!!
Konstantinos Chatzilygeroudis's avatar
Konstantinos Chatzilygeroudis committed
72
73
            void optimize_hyperparams()
            {
74
75
                HyperParamsOptimizer()(*this);
            }
76

77
78
            /// add sample and update the GP. This code uses an incremental implementation of the Cholesky
            /// decomposition. It is therefore much faster than a call to compute()
79
80
81
82
83
84
            void add_sample(const Eigen::VectorXd& sample, const Eigen::VectorXd& observation, double noise)
            {
                if (_samples.empty()) {
                    if (_bl_samples.empty()) {
                        _dim_in = sample.size();
                        _kernel_function = KernelFunction(_dim_in); // the cost of building a functor should be relatively low
Konstantinos Chatzilygeroudis's avatar
Konstantinos Chatzilygeroudis committed
85
86
                    }
                    else {
87
88
89
90
91
                        assert(sample.size() == _dim_in);
                    }

                    _dim_out = observation.size();
                    _mean_function = MeanFunction(_dim_out); // the cost of building a functor should be relatively low
Konstantinos Chatzilygeroudis's avatar
Konstantinos Chatzilygeroudis committed
92
93
                }
                else {
94
95
                    assert(sample.size() == _dim_in);
                    assert(observation.size() == _dim_out);
96
97
                }

98
99
100
101
102
103
104
                _samples.push_back(sample);

                _observations.conservativeResize(_observations.rows() + 1, _dim_out);
                _observations.bottomRows<1>() = observation.transpose();

                _mean_observation = _observations.colwise().mean();

Antoine Cully's avatar
Antoine Cully committed
105
106
107
                _noises.conservativeResize(_noises.size() + 1);
                _noises[_noises.size() - 1] = noise;
                //_noise = noise;
108
109

                this->_compute_obs_mean();
110
                this->_compute_incremental_kernel();
111
112
113

                if (!_bl_samples.empty())
                    this->_compute_bl_kernel();
114
115
            }

116
            /// add blacklisted sample and update the GP
117
118
119
120
121
            void add_bl_sample(const Eigen::VectorXd& bl_sample, double noise)
            {
                if (_samples.empty() && _bl_samples.empty()) {
                    _dim_in = bl_sample.size();
                    _kernel_function = KernelFunction(_dim_in); // the cost of building a functor should be relatively low
Konstantinos Chatzilygeroudis's avatar
Konstantinos Chatzilygeroudis committed
122
123
                }
                else {
124
125
126
127
                    assert(bl_sample.size() == _dim_in);
                }

                _bl_samples.push_back(bl_sample);
Antoine Cully's avatar
Antoine Cully committed
128
129
130
131

                _noises_bl.conservativeResize(_noises_bl.size() + 1);
                _noises_bl[_noises_bl.size() - 1] = noise;
                //_noise = noise;
132
133
134
135
136
137

                if (!_samples.empty()) {
                    this->_compute_bl_kernel();
                }
            }

138
            /**
139
             \\rst
140
             return :math:`\mu`, :math:`\sigma^2` (unormalized). If there is no sample, return the value according to the mean function. Using this method instead of separate calls to mu() and sigma() is more efficient because some computations are shared between mu() and sigma().
141
142
             \\endrst
	  		*/
143
144
145
146
            std::tuple<Eigen::VectorXd, double> query(const Eigen::VectorXd& v) const
            {
                if (_samples.size() == 0 && _bl_samples.size() == 0)
                    return std::make_tuple(_mean_function(v, *this),
147
                        _kernel_function(v, v));
148
149
150
151
152
153
154
155
156
157

                if (_samples.size() == 0)
                    return std::make_tuple(_mean_function(v, *this),
                        _sigma(v, _compute_k_bl(v, _compute_k(v))));

                Eigen::VectorXd k = _compute_k(v);
                return std::make_tuple(_mu(v, k), _sigma(v, _compute_k_bl(v, k)));
            }

            /**
158
             \\rst
159
             return :math:`\mu` (unormalized). If there is no sample, return the value according to the mean function.
160
161
             \\endrst
	  		*/
162
163
164
165
166
167
168
169
            Eigen::VectorXd mu(const Eigen::VectorXd& v) const
            {
                if (_samples.size() == 0)
                    return _mean_function(v, *this);
                return _mu(v, _compute_k(v));
            }

            /**
170
             \\rst
171
             return :math:`\sigma^2` (unormalized). If there is no sample, return the max :math:`\sigma^2`.
172
173
             \\endrst
	  		*/
174
175
176
            double sigma(const Eigen::VectorXd& v) const
            {
                if (_samples.size() == 0 && _bl_samples.size() == 0)
177
                    return _kernel_function(v, v);
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
                return _sigma(v, _compute_k_bl(v, _compute_k(v)));
            }

            /// return the number of dimensions of the input
            int dim_in() const
            {
                assert(_dim_in != -1); // need to compute first !
                return _dim_in;
            }

            /// return the number of dimensions of the output
            int dim_out() const
            {
                assert(_dim_out != -1); // need to compute first !
                return _dim_out;
            }

            const KernelFunction& kernel_function() const { return _kernel_function; }

197
            KernelFunction& kernel_function() { return _kernel_function; }
198
199
200
201
202

            const MeanFunction& mean_function() const { return _mean_function; }

            MeanFunction& mean_function() { return _mean_function; }

Jean-Baptiste Mouret's avatar
Jean-Baptiste Mouret committed
203
            /// return the maximum observation (only call this if the output of the GP is of dimension 1)
204
205
206
207
208
209
            Eigen::VectorXd max_observation() const
            {
                if (_observations.cols() > 1)
                    std::cout << "WARNING max_observation with multi dimensional "
                                 "observations doesn't make sense"
                              << std::endl;
210
                return tools::make_vector(_observations.maxCoeff());
211
212
213
214
215
            }

            /// return the mean observation (only call this if the output of the GP is of dimension 1)
            Eigen::VectorXd mean_observation() const
            {
216
                // TODO: Check if _dim_out is correct?!
217
218
219
220
221
222
223
224
225
226
227
228
                return _samples.size() > 0 ? _mean_observation
                                           : Eigen::VectorXd::Zero(_dim_out);
            }

            const Eigen::MatrixXd& mean_vector() const { return _mean_vector; }

            const Eigen::MatrixXd& obs_mean() const { return _obs_mean; }

            /// return the number of samples used to compute the GP
            int nb_samples() const { return _samples.size(); }

            /** return the number of blacklisted samples used to compute the GP
229
	     \\rst
230
	     For the blacklist concept, see the Limbo-specific concept guide.
231
	     \\endrst
232
	     */
233
234
            int nb_bl_samples() const { return _bl_samples.size(); }

235
            ///  recomputes the GP
Jean-Baptiste Mouret's avatar
Jean-Baptiste Mouret committed
236
            void recompute(bool update_obs_mean = true)
237
            {
238
239
240
                assert(!_samples.empty());

                if (update_obs_mean)
241
                    this->_compute_obs_mean();
242

243
                this->_compute_full_kernel();
244
245
246

                if (!_bl_samples.empty())
                    this->_compute_bl_kernel();
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
            }

            /// return the likelihood (do not compute it!)
            double get_lik() const { return _lik; }

            /// set the likelihood (you need to compute it from outside!)
            void set_lik(const double& lik) { _lik = lik; }

            /// LLT matrix (from Cholesky decomposition)
            //const Eigen::LLT<Eigen::MatrixXd>& llt() const { return _llt; }
            const Eigen::MatrixXd& matrixL() const { return _matrixL; }

            const Eigen::MatrixXd& alpha() const { return _alpha; }

            /// return the list of samples that have been tested so far
            const std::vector<Eigen::VectorXd>& samples() const { return _samples; }

        protected:
            int _dim_in;
            int _dim_out;

            KernelFunction _kernel_function;
            MeanFunction _mean_function;

            std::vector<Eigen::VectorXd> _samples;
            Eigen::MatrixXd _observations;
            std::vector<Eigen::VectorXd> _bl_samples; // black listed samples
            Eigen::MatrixXd _mean_vector;
            Eigen::MatrixXd _obs_mean;

Antoine Cully's avatar
Antoine Cully committed
277
278
279
280
            //double _noise;
            Eigen::VectorXd _noises;
            Eigen::VectorXd _noises_bl;

281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
            Eigen::MatrixXd _alpha;
            Eigen::VectorXd _mean_observation;

            Eigen::MatrixXd _kernel;

            // Eigen::MatrixXd _inverted_kernel;

            Eigen::MatrixXd _matrixL;
            Eigen::MatrixXd _inv_bl_kernel;

            double _lik;

            void _compute_obs_mean()
            {
                _mean_vector.resize(_samples.size(), _dim_out);
                for (int i = 0; i < _mean_vector.rows(); i++)
                    _mean_vector.row(i) = _mean_function(_samples[i], *this);
                _obs_mean = _observations - _mean_vector;
            }

301
            void _compute_full_kernel()
302
            {
303
304
305
306
307
308
                size_t n = _samples.size();
                _kernel.resize(n, n);

                // O(n^2) [should be negligible]
                for (size_t i = 0; i < n; i++)
                    for (size_t j = 0; j <= i; ++j)
Antoine Cully's avatar
Antoine Cully committed
309
                        _kernel(i, j) = _kernel_function(_samples[i], _samples[j]) + ((i == j) ? _noises[i] : 0); // noise only on the diagonal
310
311
312
313
314
315
316
317

                for (size_t i = 0; i < n; i++)
                    for (size_t j = 0; j < i; ++j)
                        _kernel(j, i) = _kernel(i, j);

                // O(n^3)
                _matrixL = Eigen::LLT<Eigen::MatrixXd>(_kernel).matrixL();

318
                this->_compute_alpha();
319
320
321
            }

            void _compute_incremental_kernel()
322
            {
323
                // Incremental LLT
324
                // This part of the code is inpired from the Bayesopt Library (cholesky_add_row function).
325
326
                // However, the mathematical fundations can be easily retrieved by detailling the equations of the
                // extended L matrix that produces the desired kernel.
327

328
329
                size_t n = _samples.size();
                _kernel.conservativeResize(n, n);
330

331
                for (size_t i = 0; i < n; ++i) {
Antoine Cully's avatar
Antoine Cully committed
332
                    _kernel(i, n - 1) = _kernel_function(_samples[i], _samples[n - 1]) + ((i == n - 1) ? _noises[i] : 0); // noise only on the diagonal
333
                    _kernel(n - 1, i) = _kernel(i, n - 1);
334
335
                }

336
337
338
339
340
341
                _matrixL.conservativeResizeLike(Eigen::MatrixXd::Zero(n, n));

                double L_j;
                for (size_t j = 0; j < n - 1; ++j) {
                    L_j = _kernel(n - 1, j) - (_matrixL.block(j, 0, 1, j) * _matrixL.block(n - 1, 0, 1, j).transpose())(0, 0);
                    _matrixL(n - 1, j) = (L_j) / _matrixL(j, j);
342
343
                }

344
345
                L_j = _kernel(n - 1, n - 1) - (_matrixL.block(n - 1, 0, 1, n - 1) * _matrixL.block(n - 1, 0, 1, n - 1).transpose())(0, 0);
                _matrixL(n - 1, n - 1) = sqrt(L_j);
Antoine Cully's avatar
Antoine Cully committed
346

347
                this->_compute_alpha();
348
349
            }

350
            void _compute_alpha()
351
            {
352
                // alpha = K^{-1} * this->_obs_mean;
353
                Eigen::TriangularView<Eigen::MatrixXd, Eigen::Lower> triang = _matrixL.template triangularView<Eigen::Lower>();
354
355
                _alpha = triang.solve(_obs_mean);
                triang.adjoint().solveInPlace(_alpha);
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
            }

            void _compute_bl_kernel()
            {
                Eigen::MatrixXd A1 = Eigen::MatrixXd::Identity(this->_samples.size(), this->_samples.size());
                _matrixL.template triangularView<Eigen::Lower>().solveInPlace(A1);
                _matrixL.template triangularView<Eigen::Lower>().transpose().solveInPlace(A1);

                _inv_bl_kernel.resize(_samples.size() + _bl_samples.size(),
                    _samples.size() + _bl_samples.size());

                Eigen::MatrixXd B(_samples.size(), _bl_samples.size());
                for (size_t i = 0; i < _samples.size(); i++)
                    for (size_t j = 0; j < _bl_samples.size(); ++j)
                        B(i, j) = _kernel_function(_samples[i], _bl_samples[j]);

                Eigen::MatrixXd D(_bl_samples.size(), _bl_samples.size());
                for (size_t i = 0; i < _bl_samples.size(); i++)
                    for (size_t j = 0; j < _bl_samples.size(); ++j)
Antoine Cully's avatar
Antoine Cully committed
375
                        D(i, j) = _kernel_function(_bl_samples[i], _bl_samples[j]) + ((i == j) ? _noises_bl[i] : 0);
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390

                Eigen::MatrixXd comA = (D - B.transpose() * A1 * B);
                Eigen::LLT<Eigen::MatrixXd> llt_bl(comA);
                Eigen::MatrixXd comA1 = Eigen::MatrixXd::Identity(_bl_samples.size(), _bl_samples.size());
                llt_bl.matrixL().solveInPlace(comA1);
                llt_bl.matrixL().transpose().solveInPlace(comA1);

                // fill the matrix block wise
                _inv_bl_kernel.block(0, 0, _samples.size(), _samples.size()) = A1 + A1 * B * comA1 * B.transpose() * A1;
                _inv_bl_kernel.block(0, _samples.size(), _samples.size(),
                    _bl_samples.size())
                    = -A1 * B * comA1;
                _inv_bl_kernel.block(_samples.size(), 0, _bl_samples.size(),
                    _samples.size())
                    = _inv_bl_kernel.block(0, _samples.size(), _samples.size(),
Konstantinos Chatzilygeroudis's avatar
Konstantinos Chatzilygeroudis committed
391
                                         _bl_samples.size())
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
                          .transpose();
                _inv_bl_kernel.block(_samples.size(), _samples.size(), _bl_samples.size(),
                    _bl_samples.size())
                    = comA1;
            }

            Eigen::VectorXd _mu(const Eigen::VectorXd& v, const Eigen::VectorXd& k) const
            {
                return (k.transpose() * _alpha) + _mean_function(v, *this).transpose();
            }

            double _sigma(const Eigen::VectorXd& v, const Eigen::VectorXd& k) const
            {
                double res;
                if (_bl_samples.size() == 0) {
                    Eigen::VectorXd z = _matrixL.triangularView<Eigen::Lower>().solve(k);
                    res = _kernel_function(v, v) - z.dot(z);
                }
                else {
                    res = _kernel_function(v, v) - k.transpose() * _inv_bl_kernel * k;
                }

                return (res <= std::numeric_limits<double>::epsilon()) ? 0 : res;
            }

            Eigen::VectorXd _compute_k(const Eigen::VectorXd& v) const
            {
                Eigen::VectorXd k(_samples.size());
                for (int i = 0; i < k.size(); i++)
                    k[i] = _kernel_function(_samples[i], v);
                return k;
            }

            Eigen::VectorXd _compute_k_bl(const Eigen::VectorXd& v,
                const Eigen::VectorXd& k) const
            {
                if (_bl_samples.size() == 0) {
                    return k;
                }

                Eigen::VectorXd k_bl(_samples.size() + _bl_samples.size());

                k_bl.head(_samples.size()) = k;
                for (size_t i = 0; i < _bl_samples.size(); i++)
                    k_bl[i + this->_samples.size()] = this->_kernel_function(_bl_samples[i], v);
                return k_bl;
            }
        };
    }
}
442

443
#endif