gp.hpp 19.9 KB
Newer Older
1
2
3
4
//| Copyright Inria May 2015
//| This project has received funding from the European Research Council (ERC) under
//| the European Union's Horizon 2020 research and innovation programme (grant
//| agreement No 637972) - see http://www.resibots.eu
5
//|
6
7
8
9
10
11
//| Contributor(s):
//|   - Jean-Baptiste Mouret (jean-baptiste.mouret@inria.fr)
//|   - Antoine Cully (antoinecully@gmail.com)
//|   - Kontantinos Chatzilygeroudis (konstantinos.chatzilygeroudis@inria.fr)
//|   - Federico Allocati (fede.allocati@gmail.com)
//|   - Vaios Papaspyros (b.papaspyros@gmail.com)
12
//|
13
14
15
16
17
//| This software is a computer library whose purpose is to optimize continuous,
//| black-box functions. It mainly implements Gaussian processes and Bayesian
//| optimization.
//| Main repository: http://github.com/resibots/limbo
//| Documentation: http://www.resibots.eu/limbo
18
//|
19
20
21
22
23
//| This software is governed by the CeCILL-C license under French law and
//| abiding by the rules of distribution of free software.  You can  use,
//| modify and/ or redistribute the software under the terms of the CeCILL-C
//| license as circulated by CEA, CNRS and INRIA at the following URL
//| "http://www.cecill.info".
24
//|
25
26
27
28
29
//| As a counterpart to the access to the source code and  rights to copy,
//| modify and redistribute granted by the license, users are provided only
//| with a limited warranty  and the software's author,  the holder of the
//| economic rights,  and the successive licensors  have only  limited
//| liability.
30
//|
31
32
33
34
35
36
37
38
39
40
//| In this respect, the user's attention is drawn to the risks associated
//| with loading,  using,  modifying and/or developing or reproducing the
//| software by the user in light of its specific status of free software,
//| that may mean  that it is complicated to manipulate,  and  that  also
//| therefore means  that it is reserved for developers  and  experienced
//| professionals having in-depth computer knowledge. Users are therefore
//| encouraged to load and test the software's suitability as regards their
//| requirements in conditions enabling the security of their systems and/or
//| data to be ensured and,  more generally, to use and operate it in the
//| same conditions as regards security.
41
//|
42
43
//| The fact that you are presently reading this means that you have had
//| knowledge of the CeCILL-C license and that you accept its terms.
44
//|
45
46
#ifndef LIMBO_MODEL_GP_HPP
#define LIMBO_MODEL_GP_HPP
47
48

#include <cassert>
49
#include <iostream>
50
#include <limits>
51
#include <vector>
52

53
#include <Eigen/Cholesky>
54
55
56
#include <Eigen/Core>
#include <Eigen/LU>

57
#include <limbo/model/gp/no_lf_opt.hpp>
58
#include <limbo/tools.hpp>
59

60
namespace limbo {
61
    namespace model {
Jean-Baptiste Mouret's avatar
Jean-Baptiste Mouret committed
62
63
64
65
66
        /// @ingroup model
        /// A classic Gaussian process.
        /// It is parametrized by:
        /// - a mean function
        /// - [optionnal] an optimizer for the hyper-parameters
67
        template <typename Params, typename KernelFunction, typename MeanFunction, class HyperParamsOptimizer = gp::NoLFOpt<Params>>
68
        class GP {
69
        public:
70
            /// useful because the model might be created before knowing anything about the process
71
            GP() : _dim_in(-1), _dim_out(-1) {}
72
73
74

            /// useful because the model might be created  before having samples
            GP(int dim_in, int dim_out)
75
                : _dim_in(dim_in), _dim_out(dim_out), _kernel_function(dim_in), _mean_function(dim_out) {}
76

77
            /// Compute the GP from samples, observation, noise. [optional: blacklisted samples]. This call needs to be explicit!
78
            void compute(const std::vector<Eigen::VectorXd>& samples,
Antoine Cully's avatar
Antoine Cully committed
79
80
81
82
                const std::vector<Eigen::VectorXd>& observations,
                const Eigen::VectorXd& noises,
                const std::vector<Eigen::VectorXd>& bl_samples = std::vector<Eigen::VectorXd>(),
                const Eigen::VectorXd& noises_bl = Eigen::VectorXd())
83
84
85
86
            {
                assert(samples.size() != 0);
                assert(observations.size() != 0);
                assert(samples.size() == observations.size());
Konstantinos Chatzilygeroudis's avatar
Konstantinos Chatzilygeroudis committed
87
                assert(bl_samples.size() == (unsigned int)noises_bl.size());
88

89
90
                _dim_in = samples[0].size();
                _kernel_function = KernelFunction(_dim_in); // the cost of building a functor should be relatively low
91

92
93
                _dim_out = observations[0].size();
                _mean_function = MeanFunction(_dim_out); // the cost of building a functor should be relatively low
94

95
96
97
98
99
100
101
                _samples = samples;

                _observations.resize(observations.size(), _dim_out);
                for (int i = 0; i < _observations.rows(); ++i)
                    _observations.row(i) = observations[i];

                _mean_observation = _observations.colwise().mean();
102

Antoine Cully's avatar
Antoine Cully committed
103
104
                _noises = noises;
                _noises_bl = noises_bl;
105

106
107
108
                _bl_samples = bl_samples;

                this->_compute_obs_mean();
109
                this->_compute_full_kernel();
110

111
112
                if (!_bl_samples.empty())
                    this->_compute_bl_kernel();
113
            }
114

115
            /// Do not forget to call this if you use hyper-prameters optimization!!
Konstantinos Chatzilygeroudis's avatar
Konstantinos Chatzilygeroudis committed
116
117
            void optimize_hyperparams()
            {
118
                _hp_optimize(*this);
119
            }
120

121
122
            /// add sample and update the GP. This code uses an incremental implementation of the Cholesky
            /// decomposition. It is therefore much faster than a call to compute()
123
124
125
126
127
128
            void add_sample(const Eigen::VectorXd& sample, const Eigen::VectorXd& observation, double noise)
            {
                if (_samples.empty()) {
                    if (_bl_samples.empty()) {
                        _dim_in = sample.size();
                        _kernel_function = KernelFunction(_dim_in); // the cost of building a functor should be relatively low
Konstantinos Chatzilygeroudis's avatar
Konstantinos Chatzilygeroudis committed
129
130
                    }
                    else {
131
132
133
134
135
                        assert(sample.size() == _dim_in);
                    }

                    _dim_out = observation.size();
                    _mean_function = MeanFunction(_dim_out); // the cost of building a functor should be relatively low
Konstantinos Chatzilygeroudis's avatar
Konstantinos Chatzilygeroudis committed
136
137
                }
                else {
138
139
                    assert(sample.size() == _dim_in);
                    assert(observation.size() == _dim_out);
140
141
                }

142
143
144
145
146
147
148
                _samples.push_back(sample);

                _observations.conservativeResize(_observations.rows() + 1, _dim_out);
                _observations.bottomRows<1>() = observation.transpose();

                _mean_observation = _observations.colwise().mean();

Antoine Cully's avatar
Antoine Cully committed
149
150
151
                _noises.conservativeResize(_noises.size() + 1);
                _noises[_noises.size() - 1] = noise;
                //_noise = noise;
152
153

                this->_compute_obs_mean();
154
                this->_compute_incremental_kernel();
155
156
157

                if (!_bl_samples.empty())
                    this->_compute_bl_kernel();
158
159
            }

160
            /// add blacklisted sample and update the GP
161
162
163
164
165
            void add_bl_sample(const Eigen::VectorXd& bl_sample, double noise)
            {
                if (_samples.empty() && _bl_samples.empty()) {
                    _dim_in = bl_sample.size();
                    _kernel_function = KernelFunction(_dim_in); // the cost of building a functor should be relatively low
Konstantinos Chatzilygeroudis's avatar
Konstantinos Chatzilygeroudis committed
166
167
                }
                else {
168
169
170
171
                    assert(bl_sample.size() == _dim_in);
                }

                _bl_samples.push_back(bl_sample);
Antoine Cully's avatar
Antoine Cully committed
172
173
174
175

                _noises_bl.conservativeResize(_noises_bl.size() + 1);
                _noises_bl[_noises_bl.size() - 1] = noise;
                //_noise = noise;
176
177
178
179
180
181

                if (!_samples.empty()) {
                    this->_compute_bl_kernel();
                }
            }

182
            /**
183
             \\rst
184
             return :math:`\mu`, :math:`\sigma^2` (unormalized). If there is no sample, return the value according to the mean function. Using this method instead of separate calls to mu() and sigma() is more efficient because some computations are shared between mu() and sigma().
185
186
             \\endrst
	  		*/
187
188
189
190
            std::tuple<Eigen::VectorXd, double> query(const Eigen::VectorXd& v) const
            {
                if (_samples.size() == 0 && _bl_samples.size() == 0)
                    return std::make_tuple(_mean_function(v, *this),
191
                        _kernel_function(v, v));
192
193
194
195
196
197
198
199
200
201

                if (_samples.size() == 0)
                    return std::make_tuple(_mean_function(v, *this),
                        _sigma(v, _compute_k_bl(v, _compute_k(v))));

                Eigen::VectorXd k = _compute_k(v);
                return std::make_tuple(_mu(v, k), _sigma(v, _compute_k_bl(v, k)));
            }

            /**
202
             \\rst
203
             return :math:`\mu` (unormalized). If there is no sample, return the value according to the mean function.
204
205
             \\endrst
	  		*/
206
207
208
209
210
211
212
213
            Eigen::VectorXd mu(const Eigen::VectorXd& v) const
            {
                if (_samples.size() == 0)
                    return _mean_function(v, *this);
                return _mu(v, _compute_k(v));
            }

            /**
214
             \\rst
215
             return :math:`\sigma^2` (unormalized). If there is no sample, return the max :math:`\sigma^2`.
216
217
             \\endrst
	  		*/
218
219
220
            double sigma(const Eigen::VectorXd& v) const
            {
                if (_samples.size() == 0 && _bl_samples.size() == 0)
221
                    return _kernel_function(v, v);
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
                return _sigma(v, _compute_k_bl(v, _compute_k(v)));
            }

            /// return the number of dimensions of the input
            int dim_in() const
            {
                assert(_dim_in != -1); // need to compute first !
                return _dim_in;
            }

            /// return the number of dimensions of the output
            int dim_out() const
            {
                assert(_dim_out != -1); // need to compute first !
                return _dim_out;
            }

            const KernelFunction& kernel_function() const { return _kernel_function; }

241
            KernelFunction& kernel_function() { return _kernel_function; }
242
243
244
245
246

            const MeanFunction& mean_function() const { return _mean_function; }

            MeanFunction& mean_function() { return _mean_function; }

Jean-Baptiste Mouret's avatar
Jean-Baptiste Mouret committed
247
            /// return the maximum observation (only call this if the output of the GP is of dimension 1)
248
249
250
251
252
253
            Eigen::VectorXd max_observation() const
            {
                if (_observations.cols() > 1)
                    std::cout << "WARNING max_observation with multi dimensional "
                                 "observations doesn't make sense"
                              << std::endl;
254
                return tools::make_vector(_observations.maxCoeff());
255
256
257
258
259
            }

            /// return the mean observation (only call this if the output of the GP is of dimension 1)
            Eigen::VectorXd mean_observation() const
            {
260
                // TODO: Check if _dim_out is correct?!
261
262
263
264
265
266
267
268
269
270
271
272
                return _samples.size() > 0 ? _mean_observation
                                           : Eigen::VectorXd::Zero(_dim_out);
            }

            const Eigen::MatrixXd& mean_vector() const { return _mean_vector; }

            const Eigen::MatrixXd& obs_mean() const { return _obs_mean; }

            /// return the number of samples used to compute the GP
            int nb_samples() const { return _samples.size(); }

            /** return the number of blacklisted samples used to compute the GP
273
	     \\rst
274
	     For the blacklist concept, see the Limbo-specific concept guide.
275
	     \\endrst
276
	     */
277
278
            int nb_bl_samples() const { return _bl_samples.size(); }

279
            ///  recomputes the GP
Jean-Baptiste Mouret's avatar
Jean-Baptiste Mouret committed
280
            void recompute(bool update_obs_mean = true)
281
            {
282
283
284
                assert(!_samples.empty());

                if (update_obs_mean)
285
                    this->_compute_obs_mean();
286

287
                this->_compute_full_kernel();
288
289
290

                if (!_bl_samples.empty())
                    this->_compute_bl_kernel();
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
            }

            /// return the likelihood (do not compute it!)
            double get_lik() const { return _lik; }

            /// set the likelihood (you need to compute it from outside!)
            void set_lik(const double& lik) { _lik = lik; }

            /// LLT matrix (from Cholesky decomposition)
            //const Eigen::LLT<Eigen::MatrixXd>& llt() const { return _llt; }
            const Eigen::MatrixXd& matrixL() const { return _matrixL; }

            const Eigen::MatrixXd& alpha() const { return _alpha; }

            /// return the list of samples that have been tested so far
            const std::vector<Eigen::VectorXd>& samples() const { return _samples; }

        protected:
            int _dim_in;
            int _dim_out;

            KernelFunction _kernel_function;
            MeanFunction _mean_function;

            std::vector<Eigen::VectorXd> _samples;
            Eigen::MatrixXd _observations;
            std::vector<Eigen::VectorXd> _bl_samples; // black listed samples
            Eigen::MatrixXd _mean_vector;
            Eigen::MatrixXd _obs_mean;

Antoine Cully's avatar
Antoine Cully committed
321
322
323
324
            //double _noise;
            Eigen::VectorXd _noises;
            Eigen::VectorXd _noises_bl;

325
326
327
328
329
330
331
332
333
334
335
336
            Eigen::MatrixXd _alpha;
            Eigen::VectorXd _mean_observation;

            Eigen::MatrixXd _kernel;

            // Eigen::MatrixXd _inverted_kernel;

            Eigen::MatrixXd _matrixL;
            Eigen::MatrixXd _inv_bl_kernel;

            double _lik;

337
338
            HyperParamsOptimizer _hp_optimize;

339
340
341
342
343
344
345
346
            void _compute_obs_mean()
            {
                _mean_vector.resize(_samples.size(), _dim_out);
                for (int i = 0; i < _mean_vector.rows(); i++)
                    _mean_vector.row(i) = _mean_function(_samples[i], *this);
                _obs_mean = _observations - _mean_vector;
            }

347
            void _compute_full_kernel()
348
            {
349
350
351
352
353
354
                size_t n = _samples.size();
                _kernel.resize(n, n);

                // O(n^2) [should be negligible]
                for (size_t i = 0; i < n; i++)
                    for (size_t j = 0; j <= i; ++j)
Antoine Cully's avatar
Antoine Cully committed
355
                        _kernel(i, j) = _kernel_function(_samples[i], _samples[j]) + ((i == j) ? _noises[i] : 0); // noise only on the diagonal
356
357
358
359
360
361
362
363

                for (size_t i = 0; i < n; i++)
                    for (size_t j = 0; j < i; ++j)
                        _kernel(j, i) = _kernel(i, j);

                // O(n^3)
                _matrixL = Eigen::LLT<Eigen::MatrixXd>(_kernel).matrixL();

364
                this->_compute_alpha();
365
366
367
            }

            void _compute_incremental_kernel()
368
            {
369
                // Incremental LLT
370
                // This part of the code is inpired from the Bayesopt Library (cholesky_add_row function).
371
372
                // However, the mathematical fundations can be easily retrieved by detailling the equations of the
                // extended L matrix that produces the desired kernel.
373

374
375
                size_t n = _samples.size();
                _kernel.conservativeResize(n, n);
376

377
                for (size_t i = 0; i < n; ++i) {
Antoine Cully's avatar
Antoine Cully committed
378
                    _kernel(i, n - 1) = _kernel_function(_samples[i], _samples[n - 1]) + ((i == n - 1) ? _noises[i] : 0); // noise only on the diagonal
379
                    _kernel(n - 1, i) = _kernel(i, n - 1);
380
381
                }

382
383
384
385
386
387
                _matrixL.conservativeResizeLike(Eigen::MatrixXd::Zero(n, n));

                double L_j;
                for (size_t j = 0; j < n - 1; ++j) {
                    L_j = _kernel(n - 1, j) - (_matrixL.block(j, 0, 1, j) * _matrixL.block(n - 1, 0, 1, j).transpose())(0, 0);
                    _matrixL(n - 1, j) = (L_j) / _matrixL(j, j);
388
389
                }

390
391
                L_j = _kernel(n - 1, n - 1) - (_matrixL.block(n - 1, 0, 1, n - 1) * _matrixL.block(n - 1, 0, 1, n - 1).transpose())(0, 0);
                _matrixL(n - 1, n - 1) = sqrt(L_j);
Antoine Cully's avatar
Antoine Cully committed
392

393
                this->_compute_alpha();
394
395
            }

396
            void _compute_alpha()
397
            {
398
                // alpha = K^{-1} * this->_obs_mean;
399
                Eigen::TriangularView<Eigen::MatrixXd, Eigen::Lower> triang = _matrixL.template triangularView<Eigen::Lower>();
400
401
                _alpha = triang.solve(_obs_mean);
                triang.adjoint().solveInPlace(_alpha);
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
            }

            void _compute_bl_kernel()
            {
                Eigen::MatrixXd A1 = Eigen::MatrixXd::Identity(this->_samples.size(), this->_samples.size());
                _matrixL.template triangularView<Eigen::Lower>().solveInPlace(A1);
                _matrixL.template triangularView<Eigen::Lower>().transpose().solveInPlace(A1);

                _inv_bl_kernel.resize(_samples.size() + _bl_samples.size(),
                    _samples.size() + _bl_samples.size());

                Eigen::MatrixXd B(_samples.size(), _bl_samples.size());
                for (size_t i = 0; i < _samples.size(); i++)
                    for (size_t j = 0; j < _bl_samples.size(); ++j)
                        B(i, j) = _kernel_function(_samples[i], _bl_samples[j]);

                Eigen::MatrixXd D(_bl_samples.size(), _bl_samples.size());
                for (size_t i = 0; i < _bl_samples.size(); i++)
                    for (size_t j = 0; j < _bl_samples.size(); ++j)
Antoine Cully's avatar
Antoine Cully committed
421
                        D(i, j) = _kernel_function(_bl_samples[i], _bl_samples[j]) + ((i == j) ? _noises_bl[i] : 0);
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436

                Eigen::MatrixXd comA = (D - B.transpose() * A1 * B);
                Eigen::LLT<Eigen::MatrixXd> llt_bl(comA);
                Eigen::MatrixXd comA1 = Eigen::MatrixXd::Identity(_bl_samples.size(), _bl_samples.size());
                llt_bl.matrixL().solveInPlace(comA1);
                llt_bl.matrixL().transpose().solveInPlace(comA1);

                // fill the matrix block wise
                _inv_bl_kernel.block(0, 0, _samples.size(), _samples.size()) = A1 + A1 * B * comA1 * B.transpose() * A1;
                _inv_bl_kernel.block(0, _samples.size(), _samples.size(),
                    _bl_samples.size())
                    = -A1 * B * comA1;
                _inv_bl_kernel.block(_samples.size(), 0, _bl_samples.size(),
                    _samples.size())
                    = _inv_bl_kernel.block(0, _samples.size(), _samples.size(),
Konstantinos Chatzilygeroudis's avatar
Konstantinos Chatzilygeroudis committed
437
                                         _bl_samples.size())
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
                          .transpose();
                _inv_bl_kernel.block(_samples.size(), _samples.size(), _bl_samples.size(),
                    _bl_samples.size())
                    = comA1;
            }

            Eigen::VectorXd _mu(const Eigen::VectorXd& v, const Eigen::VectorXd& k) const
            {
                return (k.transpose() * _alpha) + _mean_function(v, *this).transpose();
            }

            double _sigma(const Eigen::VectorXd& v, const Eigen::VectorXd& k) const
            {
                double res;
                if (_bl_samples.size() == 0) {
                    Eigen::VectorXd z = _matrixL.triangularView<Eigen::Lower>().solve(k);
                    res = _kernel_function(v, v) - z.dot(z);
                }
                else {
                    res = _kernel_function(v, v) - k.transpose() * _inv_bl_kernel * k;
                }

                return (res <= std::numeric_limits<double>::epsilon()) ? 0 : res;
            }

            Eigen::VectorXd _compute_k(const Eigen::VectorXd& v) const
            {
                Eigen::VectorXd k(_samples.size());
                for (int i = 0; i < k.size(); i++)
                    k[i] = _kernel_function(_samples[i], v);
                return k;
            }

            Eigen::VectorXd _compute_k_bl(const Eigen::VectorXd& v,
                const Eigen::VectorXd& k) const
            {
                if (_bl_samples.size() == 0) {
                    return k;
                }

                Eigen::VectorXd k_bl(_samples.size() + _bl_samples.size());

                k_bl.head(_samples.size()) = k;
                for (size_t i = 0; i < _bl_samples.size(); i++)
                    k_bl[i + this->_samples.size()] = this->_kernel_function(_bl_samples[i], v);
                return k_bl;
            }
        };
    }
}
488

489
#endif