gp.hpp 15.2 KB
Newer Older
1
2
3
4
//| Copyright Inria May 2015
//| This project has received funding from the European Research Council (ERC) under
//| the European Union's Horizon 2020 research and innovation programme (grant
//| agreement No 637972) - see http://www.resibots.eu
5
//|
6
7
8
9
10
11
//| Contributor(s):
//|   - Jean-Baptiste Mouret (jean-baptiste.mouret@inria.fr)
//|   - Antoine Cully (antoinecully@gmail.com)
//|   - Kontantinos Chatzilygeroudis (konstantinos.chatzilygeroudis@inria.fr)
//|   - Federico Allocati (fede.allocati@gmail.com)
//|   - Vaios Papaspyros (b.papaspyros@gmail.com)
Konstantinos Chatzilygeroudis's avatar
Konstantinos Chatzilygeroudis committed
12
//|   - Roberto Rama (bertoski@gmail.com)
13
//|
14
15
16
17
18
//| This software is a computer library whose purpose is to optimize continuous,
//| black-box functions. It mainly implements Gaussian processes and Bayesian
//| optimization.
//| Main repository: http://github.com/resibots/limbo
//| Documentation: http://www.resibots.eu/limbo
19
//|
20
21
22
23
24
//| This software is governed by the CeCILL-C license under French law and
//| abiding by the rules of distribution of free software.  You can  use,
//| modify and/ or redistribute the software under the terms of the CeCILL-C
//| license as circulated by CEA, CNRS and INRIA at the following URL
//| "http://www.cecill.info".
25
//|
26
27
28
29
30
//| As a counterpart to the access to the source code and  rights to copy,
//| modify and redistribute granted by the license, users are provided only
//| with a limited warranty  and the software's author,  the holder of the
//| economic rights,  and the successive licensors  have only  limited
//| liability.
31
//|
32
33
34
35
36
37
38
39
40
41
//| In this respect, the user's attention is drawn to the risks associated
//| with loading,  using,  modifying and/or developing or reproducing the
//| software by the user in light of its specific status of free software,
//| that may mean  that it is complicated to manipulate,  and  that  also
//| therefore means  that it is reserved for developers  and  experienced
//| professionals having in-depth computer knowledge. Users are therefore
//| encouraged to load and test the software's suitability as regards their
//| requirements in conditions enabling the security of their systems and/or
//| data to be ensured and,  more generally, to use and operate it in the
//| same conditions as regards security.
42
//|
43
44
//| The fact that you are presently reading this means that you have had
//| knowledge of the CeCILL-C license and that you accept its terms.
45
//|
46
47
#ifndef LIMBO_MODEL_GP_HPP
#define LIMBO_MODEL_GP_HPP
48
49

#include <cassert>
50
#include <iostream>
51
#include <limits>
52
#include <vector>
53

54
#include <Eigen/Cholesky>
55
56
57
#include <Eigen/Core>
#include <Eigen/LU>

58
#include <limbo/model/gp/no_lf_opt.hpp>
59
#include <limbo/tools.hpp>
60

61
namespace limbo {
62
    namespace model {
Jean-Baptiste Mouret's avatar
Jean-Baptiste Mouret committed
63
64
65
66
67
        /// @ingroup model
        /// A classic Gaussian process.
        /// It is parametrized by:
        /// - a mean function
        /// - [optionnal] an optimizer for the hyper-parameters
68
        template <typename Params, typename KernelFunction, typename MeanFunction, class HyperParamsOptimizer = gp::NoLFOpt<Params>>
69
        class GP {
70
        public:
71
            /// useful because the model might be created before knowing anything about the process
72
            GP() : _dim_in(-1), _dim_out(-1) {}
73
74
75

            /// useful because the model might be created  before having samples
            GP(int dim_in, int dim_out)
76
                : _dim_in(dim_in), _dim_out(dim_out), _kernel_function(dim_in), _mean_function(dim_out) {}
77

78
            /// Compute the GP from samples, observation, noise. This call needs to be explicit!
79
            void compute(const std::vector<Eigen::VectorXd>& samples,
Antoine Cully's avatar
Antoine Cully committed
80
                const std::vector<Eigen::VectorXd>& observations,
81
                const Eigen::VectorXd& noises, bool compute_kernel = true)
82
83
84
85
86
            {
                assert(samples.size() != 0);
                assert(observations.size() != 0);
                assert(samples.size() == observations.size());

Antoine Cully's avatar
Antoine Cully committed
87
88
89
90
                if (_dim_in != samples[0].size()) {
                    _dim_in = samples[0].size();
                    _kernel_function = KernelFunction(_dim_in); // the cost of building a functor should be relatively low
                }
91

92
93
94
95
                if (_dim_out != observations[0].size()) {
                    _dim_out = observations[0].size();
                    _mean_function = MeanFunction(_dim_out); // the cost of building a functor should be relatively low
                }
96

97
98
99
100
101
102
103
                _samples = samples;

                _observations.resize(observations.size(), _dim_out);
                for (int i = 0; i < _observations.rows(); ++i)
                    _observations.row(i) = observations[i];

                _mean_observation = _observations.colwise().mean();
104

Antoine Cully's avatar
Antoine Cully committed
105
                _noises = noises;
106
107

                this->_compute_obs_mean();
108
109
                if (compute_kernel)
                    this->_compute_full_kernel();
110
            }
111

112
            /// Do not forget to call this if you use hyper-prameters optimization!!
Konstantinos Chatzilygeroudis's avatar
Konstantinos Chatzilygeroudis committed
113
114
            void optimize_hyperparams()
            {
115
                _hp_optimize(*this);
116
            }
117

118
119
            /// add sample and update the GP. This code uses an incremental implementation of the Cholesky
            /// decomposition. It is therefore much faster than a call to compute()
120
121
122
            void add_sample(const Eigen::VectorXd& sample, const Eigen::VectorXd& observation, double noise)
            {
                if (_samples.empty()) {
Antoine Cully's avatar
Antoine Cully committed
123
124
125
126
127
128
129
130
                    if (_dim_in != sample.size()) {
                        _dim_in = sample.size();
                        _kernel_function = KernelFunction(_dim_in); // the cost of building a functor should be relatively low
                    }
                    if (_dim_out != observation.size()) {
                        _dim_out = observation.size();
                        _mean_function = MeanFunction(_dim_out); // the cost of building a functor should be relatively low
                    }
Konstantinos Chatzilygeroudis's avatar
Konstantinos Chatzilygeroudis committed
131
132
                }
                else {
133
134
                    assert(sample.size() == _dim_in);
                    assert(observation.size() == _dim_out);
135
136
                }

137
138
139
140
141
142
143
                _samples.push_back(sample);

                _observations.conservativeResize(_observations.rows() + 1, _dim_out);
                _observations.bottomRows<1>() = observation.transpose();

                _mean_observation = _observations.colwise().mean();

Antoine Cully's avatar
Antoine Cully committed
144
145
146
                _noises.conservativeResize(_noises.size() + 1);
                _noises[_noises.size() - 1] = noise;
                //_noise = noise;
147
148

                this->_compute_obs_mean();
149
                this->_compute_incremental_kernel();
150
151
            }

152
            /**
153
             \\rst
154
             return :math:`\mu`, :math:`\sigma^2` (unormalized). If there is no sample, return the value according to the mean function. Using this method instead of separate calls to mu() and sigma() is more efficient because some computations are shared between mu() and sigma().
155
156
             \\endrst
	  		*/
157
158
159
160
            std::tuple<Eigen::VectorXd, double> query(const Eigen::VectorXd& v) const
            {
                if (_samples.size() == 0)
                    return std::make_tuple(_mean_function(v, *this),
161
                        _kernel_function(v, v));
162
163

                Eigen::VectorXd k = _compute_k(v);
164
                return std::make_tuple(_mu(v, k), _sigma(v, k));
165
166
167
            }

            /**
168
             \\rst
169
             return :math:`\mu` (unormalized). If there is no sample, return the value according to the mean function.
170
171
             \\endrst
	  		*/
172
173
174
175
176
177
178
179
            Eigen::VectorXd mu(const Eigen::VectorXd& v) const
            {
                if (_samples.size() == 0)
                    return _mean_function(v, *this);
                return _mu(v, _compute_k(v));
            }

            /**
180
             \\rst
181
             return :math:`\sigma^2` (unormalized). If there is no sample, return the max :math:`\sigma^2`.
182
183
             \\endrst
	  		*/
184
185
            double sigma(const Eigen::VectorXd& v) const
            {
186
                if (_samples.size() == 0)
187
                    return _kernel_function(v, v);
188
                return _sigma(v, _compute_k(v));
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
            }

            /// return the number of dimensions of the input
            int dim_in() const
            {
                assert(_dim_in != -1); // need to compute first !
                return _dim_in;
            }

            /// return the number of dimensions of the output
            int dim_out() const
            {
                assert(_dim_out != -1); // need to compute first !
                return _dim_out;
            }

            const KernelFunction& kernel_function() const { return _kernel_function; }

207
            KernelFunction& kernel_function() { return _kernel_function; }
208
209
210
211
212

            const MeanFunction& mean_function() const { return _mean_function; }

            MeanFunction& mean_function() { return _mean_function; }

Jean-Baptiste Mouret's avatar
Jean-Baptiste Mouret committed
213
            /// return the maximum observation (only call this if the output of the GP is of dimension 1)
214
215
216
217
218
219
            Eigen::VectorXd max_observation() const
            {
                if (_observations.cols() > 1)
                    std::cout << "WARNING max_observation with multi dimensional "
                                 "observations doesn't make sense"
                              << std::endl;
220
                return tools::make_vector(_observations.maxCoeff());
221
222
223
224
225
            }

            /// return the mean observation (only call this if the output of the GP is of dimension 1)
            Eigen::VectorXd mean_observation() const
            {
226
                // TODO: Check if _dim_out is correct?!
227
228
229
230
231
232
233
234
235
236
237
                return _samples.size() > 0 ? _mean_observation
                                           : Eigen::VectorXd::Zero(_dim_out);
            }

            const Eigen::MatrixXd& mean_vector() const { return _mean_vector; }

            const Eigen::MatrixXd& obs_mean() const { return _obs_mean; }

            /// return the number of samples used to compute the GP
            int nb_samples() const { return _samples.size(); }

238
            ///  recomputes the GP
239
            void recompute(bool update_obs_mean = true, bool update_full_kernel = true)
240
            {
241
242
243
                assert(!_samples.empty());

                if (update_obs_mean)
244
                    this->_compute_obs_mean();
245

246
247
248
249
                if (update_full_kernel)
                    this->_compute_full_kernel();
                else
                    this->_compute_alpha();
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
            }

            /// return the likelihood (do not compute it!)
            double get_lik() const { return _lik; }

            /// set the likelihood (you need to compute it from outside!)
            void set_lik(const double& lik) { _lik = lik; }

            /// LLT matrix (from Cholesky decomposition)
            //const Eigen::LLT<Eigen::MatrixXd>& llt() const { return _llt; }
            const Eigen::MatrixXd& matrixL() const { return _matrixL; }

            const Eigen::MatrixXd& alpha() const { return _alpha; }

            /// return the list of samples that have been tested so far
            const std::vector<Eigen::VectorXd>& samples() const { return _samples; }

        protected:
            int _dim_in;
            int _dim_out;

            KernelFunction _kernel_function;
            MeanFunction _mean_function;

            std::vector<Eigen::VectorXd> _samples;
            Eigen::MatrixXd _observations;
            Eigen::MatrixXd _mean_vector;
            Eigen::MatrixXd _obs_mean;

Antoine Cully's avatar
Antoine Cully committed
279
280
281
            Eigen::VectorXd _noises;
            Eigen::VectorXd _noises_bl;

282
283
284
285
286
287
288
289
290
            Eigen::MatrixXd _alpha;
            Eigen::VectorXd _mean_observation;

            Eigen::MatrixXd _kernel;

            Eigen::MatrixXd _matrixL;

            double _lik;

291
292
            HyperParamsOptimizer _hp_optimize;

293
294
295
296
297
298
299
300
            void _compute_obs_mean()
            {
                _mean_vector.resize(_samples.size(), _dim_out);
                for (int i = 0; i < _mean_vector.rows(); i++)
                    _mean_vector.row(i) = _mean_function(_samples[i], *this);
                _obs_mean = _observations - _mean_vector;
            }

301
            void _compute_full_kernel()
302
            {
303
304
305
306
307
308
                size_t n = _samples.size();
                _kernel.resize(n, n);

                // O(n^2) [should be negligible]
                for (size_t i = 0; i < n; i++)
                    for (size_t j = 0; j <= i; ++j)
Antoine Cully's avatar
Antoine Cully committed
309
                        _kernel(i, j) = _kernel_function(_samples[i], _samples[j]) + ((i == j) ? _noises[i] : 0); // noise only on the diagonal
310
311
312
313
314
315
316
317

                for (size_t i = 0; i < n; i++)
                    for (size_t j = 0; j < i; ++j)
                        _kernel(j, i) = _kernel(i, j);

                // O(n^3)
                _matrixL = Eigen::LLT<Eigen::MatrixXd>(_kernel).matrixL();

318
                this->_compute_alpha();
319
320
321
            }

            void _compute_incremental_kernel()
322
            {
323
                // Incremental LLT
324
                // This part of the code is inpired from the Bayesopt Library (cholesky_add_row function).
325
326
                // However, the mathematical fundations can be easily retrieved by detailling the equations of the
                // extended L matrix that produces the desired kernel.
327

328
329
                size_t n = _samples.size();
                _kernel.conservativeResize(n, n);
330

331
                for (size_t i = 0; i < n; ++i) {
Antoine Cully's avatar
Antoine Cully committed
332
                    _kernel(i, n - 1) = _kernel_function(_samples[i], _samples[n - 1]) + ((i == n - 1) ? _noises[i] : 0); // noise only on the diagonal
333
                    _kernel(n - 1, i) = _kernel(i, n - 1);
334
335
                }

336
337
338
339
340
341
                _matrixL.conservativeResizeLike(Eigen::MatrixXd::Zero(n, n));

                double L_j;
                for (size_t j = 0; j < n - 1; ++j) {
                    L_j = _kernel(n - 1, j) - (_matrixL.block(j, 0, 1, j) * _matrixL.block(n - 1, 0, 1, j).transpose())(0, 0);
                    _matrixL(n - 1, j) = (L_j) / _matrixL(j, j);
342
343
                }

344
345
                L_j = _kernel(n - 1, n - 1) - (_matrixL.block(n - 1, 0, 1, n - 1) * _matrixL.block(n - 1, 0, 1, n - 1).transpose())(0, 0);
                _matrixL(n - 1, n - 1) = sqrt(L_j);
Antoine Cully's avatar
Antoine Cully committed
346

347
                this->_compute_alpha();
348
349
            }

350
            void _compute_alpha()
351
            {
352
                // alpha = K^{-1} * this->_obs_mean;
353
                Eigen::TriangularView<Eigen::MatrixXd, Eigen::Lower> triang = _matrixL.template triangularView<Eigen::Lower>();
354
355
                _alpha = triang.solve(_obs_mean);
                triang.adjoint().solveInPlace(_alpha);
356
357
358
359
360
361
362
363
364
            }

            Eigen::VectorXd _mu(const Eigen::VectorXd& v, const Eigen::VectorXd& k) const
            {
                return (k.transpose() * _alpha) + _mean_function(v, *this).transpose();
            }

            double _sigma(const Eigen::VectorXd& v, const Eigen::VectorXd& k) const
            {
365
366
                Eigen::VectorXd z = _matrixL.triangularView<Eigen::Lower>().solve(k);
                double res = _kernel_function(v, v) - z.dot(z);
367
368
369
370
371
372
373
374
375
376
377
378
379
380

                return (res <= std::numeric_limits<double>::epsilon()) ? 0 : res;
            }

            Eigen::VectorXd _compute_k(const Eigen::VectorXd& v) const
            {
                Eigen::VectorXd k(_samples.size());
                for (int i = 0; i < k.size(); i++)
                    k[i] = _kernel_function(_samples[i], v);
                return k;
            }
        };
    }
}
381

382
#endif