Unverified Commit 70ac7c2d authored by Konstantinos Chatzilygeroudis's avatar Konstantinos Chatzilygeroudis Committed by GitHub
Browse files

Merge pull request #244 from resibots/serialize_model

- First prototype of the serialization
- All kernel and mean functions can be tuned (if they have parameters)
parents 14f51918 eab6022b
......@@ -7,6 +7,7 @@ os:
sudo: required
dist: trusty
group: deprecated-2017Q4
compiler:
- gcc
......
......@@ -13,7 +13,7 @@ We assume that our samples are in a vector called ``samples`` and that our obser
.. literalinclude:: ../../src/tutorials/gp.cpp
:language: c++
:linenos:
:lines: 77-86
:lines: 79-88
Basic usage
------------
......@@ -23,14 +23,14 @@ We first create a basic GP with an Exponential kernel (``kernel::Exp<Params>``)
.. literalinclude:: ../../src/tutorials/gp.cpp
:language: c++
:linenos:
:lines: 59-72
:lines: 61-74
The type of the GP is defined by the following lines:
.. literalinclude:: ../../src/tutorials/gp.cpp
:language: c++
:linenos:
:lines: 87-91
:lines: 89-93
To use the GP, we need :
......@@ -40,7 +40,7 @@ To use the GP, we need :
.. literalinclude:: ../../src/tutorials/gp.cpp
:language: c++
:linenos:
:lines: 92-97
:lines: 94-99
Here we assume that the noise is the same for all samples and that it is equal to 0.01.
......@@ -57,7 +57,7 @@ To visualize the predictions of the GP, we can query it for many points and reco
.. literalinclude:: ../../src/tutorials/gp.cpp
:language: c++
:linenos:
:lines: 101-110
:lines: 101-112
Hyper-parameter optimization
......@@ -71,21 +71,21 @@ A new GP type is defined as follows:
.. literalinclude:: ../../src/tutorials/gp.cpp
:language: c++
:linenos:
:lines: 112-116
:lines: 114-118
It uses the default values for the parameters of ``SquaredExpARD``:
.. literalinclude:: ../../src/tutorials/gp.cpp
:language: c++
:linenos:
:lines: 64-67
:lines: 66-69
After calling the ``compute()`` method, the hyper-parameters can be optimized by calling the ``optimize_hyperparams()`` function. The GP does not need to be recomputed and we pass ``false`` for the last parameter in ``compute()`` as we do not need to compute the kernel matrix again (it will be recomputed in the hyper-parameters optimization).
.. literalinclude:: ../../src/tutorials/gp.cpp
:language: c++
:linenos:
:lines: 119-121
:lines: 121-123
We can have a look at the difference between the two GPs:
......@@ -105,4 +105,25 @@ Here is the complete ``main.cpp`` file of this tutorial:
.. literalinclude:: ../../src/tutorials/gp.cpp
:language: c++
:lines: 48-
:lines: 46-
Saving and Loading
-------------------
We can also save our optimized GP model:
.. literalinclude:: ../../src/tutorials/gp.cpp
:language: c++
:linenos:
:lines: 140-141
This will create a directory called ``myGP`` with several files (the GP data, kernel hyperparameters etc.). If we want a binary format (i.e., more compact), we can replace the ``TextArchive`` by ``BinaryArchive``.
To the load a saved model, we can do the following:
.. literalinclude:: ../../src/tutorials/gp.cpp
:language: c++
:linenos:
:lines: 143-144
Note that we need to have the same kernel and mean function (i.e., the same GP type) as the one used for saving.
\ No newline at end of file
......@@ -123,7 +123,7 @@ protected:
};
template <typename Params>
struct MeanOffset {
struct MeanOffset : public mean::BaseMean<Params> {
MeanOffset(size_t dim_out = 1) {}
template <typename GP>
......@@ -142,7 +142,7 @@ struct MeanOffset {
};
template <typename Params>
struct MeanRotation {
struct MeanRotation : public mean::BaseMean<Params> {
MeanRotation(size_t dim_out = 1) {}
template <typename GP>
......@@ -166,7 +166,7 @@ struct MeanRotation {
};
template <typename Params>
struct MeanComplet {
struct MeanComplet : public mean::BaseMean<Params> {
MeanComplet(size_t dim_out = 1) {}
template <typename GP>
......
......@@ -55,7 +55,7 @@ namespace limbo {
BO_PARAM(double, sigma_sq, 1);
BO_PARAM(double, l, 1);
};
}
} // namespace defaults
namespace kernel {
/**
@ingroup kernel
......@@ -72,16 +72,50 @@ namespace limbo {
*/
template <typename Params>
struct Exp : public BaseKernel<Params, Exp<Params>> {
Exp(size_t dim = 1) {}
Exp(size_t dim = 1) : _sf2(Params::kernel_exp::sigma_sq()), _l(Params::kernel_exp::l())
{
_h_params = Eigen::VectorXd(2);
_h_params << std::log(_l), std::log(std::sqrt(_sf2));
}
size_t params_size() const { return 2; }
// Return the hyper parameters in log-space
Eigen::VectorXd params() const { return _h_params; }
// We expect the input parameters to be in log-space
void set_params(const Eigen::VectorXd& p)
{
_h_params = p;
_l = std::exp(p(0));
_sf2 = std::exp(2.0 * p(1));
}
double kernel(const Eigen::VectorXd& v1, const Eigen::VectorXd& v2) const
{
double l_sq = Params::kernel_exp::l() * Params::kernel_exp::l();
double r = (v1 - v2).squaredNorm();
return Params::kernel_exp::sigma_sq() * (std::exp(-0.5 * r / l_sq));
double l_sq = _l * _l;
double r = (v1 - v2).squaredNorm() / l_sq;
return _sf2 * std::exp(-0.5 * r);
}
Eigen::VectorXd gradient(const Eigen::VectorXd& x1, const Eigen::VectorXd& x2) const
{
Eigen::VectorXd grad(this->params_size());
double l_sq = _l * _l;
double r = (x1 - x2).squaredNorm() / l_sq;
double k = _sf2 * std::exp(-0.5 * r);
grad(0) = r * k;
grad(1) = 2 * k;
return grad;
}
protected:
double _sf2, _l;
Eigen::VectorXd _h_params;
};
}
}
} // namespace kernel
} // namespace limbo
#endif
......@@ -57,7 +57,7 @@ namespace limbo {
BO_PARAM(double, noise, 0.01);
BO_PARAM(bool, optimize_noise, false);
};
}
} // namespace defaults
namespace kernel {
/**
......@@ -128,8 +128,22 @@ namespace limbo {
protected:
double _noise;
double _noise_p;
// Functions for compilation issues
// They should never be called like this
size_t params_size() const { return 0; }
Eigen::VectorXd params() const { return Eigen::VectorXd(); }
void set_params(const Eigen::VectorXd& p) {}
Eigen::VectorXd gradient(const Eigen::VectorXd& x1, const Eigen::VectorXd& x2) const
{
// This should never be called!
assert(false);
}
};
}
}
} // namespace kernel
} // namespace limbo
#endif
......@@ -56,7 +56,7 @@ namespace limbo {
/// @ingroup kernel_defaults
BO_PARAM(double, l, 1);
};
}
} // namespace defaults
namespace kernel {
/**
@ingroup kernel
......@@ -82,21 +82,62 @@ namespace limbo {
*/
template <typename Params>
struct MaternFiveHalves : public BaseKernel<Params, MaternFiveHalves<Params>> {
MaternFiveHalves(size_t dim = 1) {}
MaternFiveHalves(size_t dim = 1) : _sf2(Params::kernel_maternfivehalves::sigma_sq()), _l(Params::kernel_maternfivehalves::l())
{
_h_params = Eigen::VectorXd(2);
_h_params << std::log(_l), std::log(std::sqrt(_sf2));
}
size_t params_size() const { return 2; }
// Return the hyper parameters in log-space
Eigen::VectorXd params() const { return _h_params; }
// We expect the input parameters to be in log-space
void set_params(const Eigen::VectorXd& p)
{
_h_params = p;
_l = std::exp(p(0));
_sf2 = std::exp(2.0 * p(1));
}
double kernel(const Eigen::VectorXd& v1, const Eigen::VectorXd& v2) const
{
double d = (v1 - v2).norm();
double d_sq = d * d;
double l = Params::kernel_maternfivehalves::l();
double l_sq = l * l;
double term1 = std::sqrt(5) * d / l;
double l_sq = _l * _l;
double term1 = std::sqrt(5) * d / _l;
double term2 = 5. * d_sq / (3. * l_sq);
return Params::kernel_maternfivehalves::sigma_sq() * (1 + term1 + term2) * std::exp(-term1);
return _sf2 * (1 + term1 + term2) * std::exp(-term1);
}
Eigen::VectorXd gradient(const Eigen::VectorXd& x1, const Eigen::VectorXd& x2) const
{
Eigen::VectorXd grad(this->params_size());
double d = (x1 - x2).norm();
double d_sq = d * d;
double l_sq = _l * _l;
double term1 = std::sqrt(5) * d / _l;
double term2 = 5. * d_sq / (3. * l_sq);
double r = std::exp(-term1);
// derivative of term1 = -term1
// derivative of term2 = -2*term2
// derivative of e^(-term1) = term1*r
grad(0) = _sf2 * (r * term1 * (1 + term1 + term2) + (-term1 - 2. * term2) * r);
grad(1) = 2 * _sf2 * (1 + term1 + term2) * r;
return grad;
}
protected:
double _sf2, _l;
Eigen::VectorXd _h_params;
};
}
}
} // namespace kernel
} // namespace limbo
#endif
......@@ -56,7 +56,7 @@ namespace limbo {
/// @ingroup kernel_defaults
BO_PARAM(double, l, 1);
};
}
} // namespace defaults
namespace kernel {
/**
@ingroup kernel
......@@ -80,17 +80,55 @@ namespace limbo {
*/
template <typename Params>
struct MaternThreeHalves : public BaseKernel<Params, MaternThreeHalves<Params>> {
MaternThreeHalves(size_t dim = 1) {}
MaternThreeHalves(size_t dim = 1) : _sf2(Params::kernel_maternthreehalves::sigma_sq()), _l(Params::kernel_maternthreehalves::l())
{
_h_params = Eigen::VectorXd(2);
_h_params << std::log(_l), std::log(std::sqrt(_sf2));
}
size_t params_size() const { return 2; }
// Return the hyper parameters in log-space
Eigen::VectorXd params() const { return _h_params; }
// We expect the input parameters to be in log-space
void set_params(const Eigen::VectorXd& p)
{
_h_params = p;
_l = std::exp(p(0));
_sf2 = std::exp(2.0 * p(1));
}
double kernel(const Eigen::VectorXd& v1, const Eigen::VectorXd& v2) const
{
double d = (v1 - v2).norm();
double term = std::sqrt(3) * d / Params::kernel_maternthreehalves::l();
double term = std::sqrt(3) * d / _l;
return Params::kernel_maternthreehalves::sigma_sq() * (1 + term) * std::exp(-term);
return _sf2 * (1 + term) * std::exp(-term);
}
Eigen::VectorXd gradient(const Eigen::VectorXd& x1, const Eigen::VectorXd& x2) const
{
Eigen::VectorXd grad(this->params_size());
double d = (x1 - x2).norm();
double term = std::sqrt(3) * d / _l;
double r = std::exp(-term);
// derivative of (1+term) = -term
// derivative of e^(-term) = term*r
grad(0) = _sf2 * (-term * r + (1 + term) * term * r);
grad(1) = 2 * _sf2 * (1 + term) * r;
return grad;
}
protected:
double _sf2, _l;
Eigen::VectorXd _h_params;
};
}
}
} // namespace kernel
} // namespace limbo
#endif
......@@ -56,7 +56,7 @@ namespace limbo {
/// @ingroup kernel_defaults
BO_PARAM(double, sigma_sq, 1);
};
}
} // namespace defaults
namespace kernel {
/**
......@@ -100,7 +100,7 @@ namespace limbo {
_ell(i) = std::exp(p(i));
for (size_t j = 0; j < (unsigned int)Params::kernel_squared_exp_ard::k(); ++j)
for (size_t i = 0; i < _input_dim; ++i)
_A(i, j) = std::exp(p((j + 1) * _input_dim + i));
_A(i, j) = p((j + 1) * _input_dim + i);
_sf2 = std::exp(2.0 * p(params_size() - 1));
}
......@@ -110,13 +110,15 @@ namespace limbo {
Eigen::VectorXd grad = Eigen::VectorXd::Zero(this->params_size());
Eigen::MatrixXd K = (_A * _A.transpose());
K.diagonal() += (Eigen::MatrixXd)(_ell.array().inverse().square());
double z = ((x1 - x2).transpose() * K * (x1 - x2)).norm();
double z = ((x1 - x2).transpose() * K * (x1 - x2));
double k = _sf2 * std::exp(-0.5 * z);
grad.head(_input_dim) = (x1 - x2).cwiseQuotient(_ell).array().square() * k;
Eigen::MatrixXd G = -k * (x1 - x2) * (x1 - x2).transpose() * _A;
for (size_t j = 0; j < Params::kernel_squared_exp_ard::k(); ++j)
grad.segment((1 + j) * _input_dim, _input_dim) = G.col(j);
for (size_t j = 0; j < (unsigned int)Params::kernel_squared_exp_ard::k(); ++j) {
Eigen::VectorXd G = -((x1 - x2).transpose() * _A.col(j))(0) * (x1 - x2) * k;
grad.segment((j + 1) * _input_dim, _input_dim) = G;
}
grad(grad.size() - 1) = 2 * k;
......@@ -140,7 +142,7 @@ namespace limbo {
if (Params::kernel_squared_exp_ard::k() > 0) {
Eigen::MatrixXd K = (_A * _A.transpose());
K.diagonal() += (Eigen::MatrixXd)(_ell.array().inverse().square());
z = ((x1 - x2).transpose() * K * (x1 - x2)).norm();
z = ((x1 - x2).transpose() * K * (x1 - x2));
}
else {
z = (x1 - x2).cwiseQuotient(_ell).squaredNorm();
......@@ -157,7 +159,7 @@ namespace limbo {
size_t _input_dim;
Eigen::VectorXd _h_params;
};
}
}
} // namespace kernel
} // namespace limbo
#endif
......@@ -49,6 +49,7 @@
///@defgroup mean
///@defgroup mean_defaults
#include <limbo/mean/mean.hpp>
#include <limbo/mean/constant.hpp>
#include <limbo/mean/data.hpp>
#include <limbo/mean/function_ard.hpp>
......
......@@ -46,9 +46,7 @@
#ifndef LIMBO_MEAN_CONSTANT_HPP
#define LIMBO_MEAN_CONSTANT_HPP
#include <Eigen/Core>
#include <limbo/tools/macros.hpp>
#include <limbo/mean/mean.hpp>
namespace limbo {
namespace defaults {
......@@ -56,7 +54,8 @@ namespace limbo {
///@ingroup mean_defaults
BO_PARAM(double, constant, 1);
};
}
} // namespace defaults
namespace mean {
/** @ingroup mean
A constant mean (the traditionnal choice for Bayesian optimization)
......@@ -65,19 +64,40 @@ namespace limbo {
- ``double constant`` (the value of the constant)
*/
template <typename Params>
struct Constant {
Constant(size_t dim_out = 1) : _dim_out(dim_out) {}
struct Constant : public BaseMean<Params> {
Constant(size_t dim_out = 1) : _dim_out(dim_out), _constant(Params::mean_constant::constant()) {}
template <typename GP>
Eigen::VectorXd operator()(const Eigen::VectorXd& v, const GP&) const
{
return Eigen::VectorXd::Constant(_dim_out, Params::mean_constant::constant());
return Eigen::VectorXd::Constant(_dim_out, _constant);
}
template <typename GP>
Eigen::MatrixXd grad(const Eigen::VectorXd& x, const GP& gp) const
{
return Eigen::MatrixXd::Ones(_dim_out, 1);
}
size_t h_params_size() const { return 1; }
Eigen::VectorXd h_params() const
{
Eigen::VectorXd p(1);
p << _constant;
return p;
}
void set_h_params(const Eigen::VectorXd& p)
{
_constant = p(0);
}
protected:
size_t _dim_out;
double _constant;
};
}
}
} // namespace mean
} // namespace limbo
#endif
......@@ -46,14 +46,14 @@
#ifndef LIMBO_MEAN_DATA_HPP
#define LIMBO_MEAN_DATA_HPP
#include <Eigen/Core>
#include <limbo/mean/mean.hpp>
namespace limbo {
namespace mean {
///@ingroup mean
///Use the mean of the observation as a constant mean
template <typename Params>
struct Data {
struct Data : public BaseMean<Params> {
Data(size_t dim_out = 1) {}
template <typename GP>
......@@ -62,7 +62,7 @@ namespace limbo {
return gp.mean_observation().array();
}
};
}
}
} // namespace mean
} // namespace limbo
#endif
......@@ -46,46 +46,66 @@
#ifndef LIMBO_MEAN_FUNCTION_ARD_HPP
#define LIMBO_MEAN_FUNCTION_ARD_HPP
#include <Eigen/Core>
#include <limbo/mean/mean.hpp>
namespace limbo {
namespace mean {
/// Functor used to optimize the mean function using the maximum likelihood principle
///
/// It incorporates the hyperparameters of the underlying mean function, if any
/// @see limbo::model::gp::KernelMeanLFOpt, limbo::model::gp::MeanLFOpt
template <typename Params, typename MeanFunction>
struct FunctionARD {
struct FunctionARD : public BaseMean<Params> {
FunctionARD(size_t dim_out = 1)
: _mean_function(dim_out), _tr(dim_out, dim_out + 1)
{
Eigen::VectorXd h = Eigen::VectorXd::Zero(dim_out * (dim_out + 1));
Eigen::VectorXd h = Eigen::VectorXd::Zero(dim_out * (dim_out + 1) + _mean_function.h_params_size());
for (size_t i = 0; i < dim_out; i++)
h[i * (dim_out + 2)] = 1;
if (_mean_function.h_params_size() > 0)
h.tail(_mean_function.h_params_size()) = _mean_function.h_params();
this->set_h_params(h);
}
size_t h_params_size() const { return _tr.rows() * _tr.cols(); }
size_t h_params_size() const { return _tr.rows() * _tr.cols() + _mean_function.h_params_size(); }
const Eigen::VectorXd& h_params() const { return _h_params; }
Eigen::VectorXd h_params() const
{
Eigen::VectorXd params(h_params_size());
params.head(_tr.rows() * _tr.cols()) = _h_params;
if (_mean_function.h_params_size() > 0)
params.tail(_mean_function.h_params_size()) = _mean_function.h_params();
return params;
}
void set_h_params(const Eigen::VectorXd& p)
{
_h_params = p;
_h_params = p.head(_tr.rows() * _tr.cols());
for (int c = 0; c < _tr.cols(); c++)
for (int r = 0; r < _tr.rows(); r++)
_tr(r, c) = p[r * _tr.cols() + c];
if (_mean_function.h_params_size() > 0)
_mean_function.set_h_params(p.tail(_mean_function.h_params_size()));
}
template <typename GP>
Eigen::MatrixXd grad(const Eigen::VectorXd& x, const GP& gp) const
{
Eigen::MatrixXd grad = Eigen::MatrixXd::Zero(_tr.rows(), _h_params.size());
Eigen::MatrixXd grad = Eigen::MatrixXd::Zero(_tr.rows(), h_params_size());
Eigen::VectorXd m = _mean_function(x, gp);
for (int i = 0; i < _tr.rows(); i++) {
grad.block(i, i * _tr.cols(), 1, _tr.cols() - 1) = m.transpose();
grad(i, (i + 1) * _tr.cols() - 1) = 1;
}
if (_mean_function.h_params_size() > 0) {
Eigen