Commit 6c826994 authored by Konstantinos Chatzilygeroudis's avatar Konstantinos Chatzilygeroudis
Browse files

Added MultiGP - solves #266

parent 1d4cd408
......@@ -50,6 +50,7 @@
///@defgroup model_opt_defaults
#include <limbo/model/gp.hpp>
#include <limbo/model/multi_gp.hpp>
#include <limbo/model/sparsified_gp.hpp>
#include <limbo/model/gp/kernel_lf_opt.hpp>
......
//| Copyright Inria May 2015
//| This project has received funding from the European Research Council (ERC) under
//| the European Union's Horizon 2020 research and innovation programme (grant
//| agreement No 637972) - see http://www.resibots.eu
//|
//| Contributor(s):
//| - Jean-Baptiste Mouret (jean-baptiste.mouret@inria.fr)
//| - Antoine Cully (antoinecully@gmail.com)
//| - Konstantinos Chatzilygeroudis (konstantinos.chatzilygeroudis@inria.fr)
//| - Federico Allocati (fede.allocati@gmail.com)
//| - Vaios Papaspyros (b.papaspyros@gmail.com)
//| - Roberto Rama (bertoski@gmail.com)
//|
//| This software is a computer library whose purpose is to optimize continuous,
//| black-box functions. It mainly implements Gaussian processes and Bayesian
//| optimization.
//| Main repository: http://github.com/resibots/limbo
//| Documentation: http://www.resibots.eu/limbo
//|
//| This software is governed by the CeCILL-C license under French law and
//| abiding by the rules of distribution of free software. You can use,
//| modify and/ or redistribute the software under the terms of the CeCILL-C
//| license as circulated by CEA, CNRS and INRIA at the following URL
//| "http://www.cecill.info".
//|
//| As a counterpart to the access to the source code and rights to copy,
//| modify and redistribute granted by the license, users are provided only
//| with a limited warranty and the software's author, the holder of the
//| economic rights, and the successive licensors have only limited
//| liability.
//|
//| In this respect, the user's attention is drawn to the risks associated
//| with loading, using, modifying and/or developing or reproducing the
//| software by the user in light of its specific status of free software,
//| that may mean that it is complicated to manipulate, and that also
//| therefore means that it is reserved for developers and experienced
//| professionals having in-depth computer knowledge. Users are therefore
//| encouraged to load and test the software's suitability as regards their
//| requirements in conditions enabling the security of their systems and/or
//| data to be ensured and, more generally, to use and operate it in the
//| same conditions as regards security.
//|
//| The fact that you are presently reading this means that you have had
//| knowledge of the CeCILL-C license and that you accept its terms.
//|
#ifndef LIMBO_MODEL_MULTI_GP_HPP
#define LIMBO_MODEL_MULTI_GP_HPP
#include <limbo/mean/null_function.hpp>
namespace limbo {
namespace model {
/// @ingroup model
/// A wrapper for N-output Gaussian processes.
/// It is parametrized by:
/// - GP class
/// - a kernel function
/// - a mean function
/// - [optional] an optimizer for the hyper-parameters
template <typename Params, template <typename, typename, typename, typename> class GPClass, typename KernelFunction, typename MeanFunction, class HyperParamsOptimizer = limbo::model::gp::NoLFOpt<Params>>
class MultiGP {
public:
using GP_t = GPClass<Params, KernelFunction, limbo::mean::NullFunction<Params>, limbo::model::gp::NoLFOpt<Params>>;
/// useful because the model might be created before knowing anything about the process
MultiGP() : _dim_in(-1), _dim_out(-1) {}
/// useful because the model might be created before having samples
MultiGP(int dim_in, int dim_out)
: _dim_in(dim_in), _dim_out(dim_out), _mean_function(dim_out)
{
// initialize dim_in models with 1 output
_gp_models.resize(_dim_out);
for (int i = 0; i < _dim_out; i++) {
_gp_models[i] = GP_t(_dim_in, 1);
}
}
/// Compute the GP from samples and observations. This call needs to be explicit!
void compute(const std::vector<Eigen::VectorXd>& samples,
const std::vector<Eigen::VectorXd>& observations, bool compute_kernel = true)
{
assert(samples.size() != 0);
assert(observations.size() != 0);
assert(samples.size() == observations.size());
if (_dim_in != samples[0].size()) {
_dim_in = samples[0].size();
}
if (_dim_out != observations[0].size()) {
_dim_out = observations[0].size();
_mean_function = MeanFunction(_dim_out); // the cost of building a functor should be relatively low
}
if ((int)_gp_models.size() != _dim_out) {
_gp_models.resize(_dim_out);
for (int i = 0; i < _dim_out; i++)
_gp_models[i] = GP_t(_dim_in, 1);
}
// save observations
// TO-DO: Check how can we improve for not saving observations twice (one here and one for each GP)!?
_observations = observations;
// compute the new observations for the GPs
std::vector<std::vector<Eigen::VectorXd>> obs(_dim_out);
for (size_t j = 0; j < observations.size(); j++) {
Eigen::VectorXd mean_vector = _mean_function(samples[j], *this);
assert(mean_vector.size() == _dim_out);
for (int i = 0; i < _dim_out; i++) {
obs[i].push_back(limbo::tools::make_vector(observations[j][i] - mean_vector[i]));
}
}
// do the actual computation
limbo::tools::par::loop(0, _dim_out, [&](size_t i) {
_gp_models[i].compute(samples, obs[i], compute_kernel);
});
}
/// Do not forget to call this if you use hyper-parameters optimization!!
void optimize_hyperparams()
{
_hp_optimize(*this);
}
const MeanFunction& mean_function() const { return _mean_function; }
MeanFunction& mean_function() { return _mean_function; }
/// add sample and update the GPs. This code uses an incremental implementation of the Cholesky
/// decomposition. It is therefore much faster than a call to compute()
void add_sample(const Eigen::VectorXd& sample, const Eigen::VectorXd& observation)
{
if (_gp_models.size() == 0) {
if (_dim_in != sample.size()) {
_dim_in = sample.size();
}
if (_dim_out != observation.size()) {
_dim_out = observation.size();
_gp_models.resize(_dim_out);
for (int i = 0; i < _dim_out; i++)
_gp_models[i] = GP_t(_dim_in, 1);
_mean_function = MeanFunction(_dim_out); // the cost of building a functor should be relatively low
}
}
else {
assert(sample.size() == _dim_in);
assert(observation.size() == _dim_out);
}
_observations.push_back(observation);
Eigen::VectorXd mean_vector = _mean_function(sample, *this);
assert(mean_vector.size() == _dim_out);
limbo::tools::par::loop(0, _dim_out, [&](size_t i) {
_gp_models[i].add_sample(sample, limbo::tools::make_vector(observation[i] - mean_vector[i]));
});
}
/**
\\rst
return :math:`\mu`, :math:`\sigma^2` (un-normalized; this will return a vector --- one for each GP). Using this method instead of separate calls to mu() and sigma() is more efficient because some computations are shared between mu() and sigma().
\\endrst
*/
std::tuple<Eigen::VectorXd, Eigen::VectorXd> query(const Eigen::VectorXd& v) const
{
Eigen::VectorXd mu(_dim_out);
Eigen::VectorXd sigma(_dim_out);
// query the mean function
Eigen::VectorXd mean_vector = _mean_function(v, *this);
// parallel query of the GPs
limbo::tools::par::loop(0, _dim_out, [&](size_t i) {
Eigen::VectorXd tmp;
std::tie(tmp, sigma(i)) = _gp_models[i].query(v);
mu(i) = tmp(0) + mean_vector(i);
});
return std::make_tuple(mu, sigma);
}
/**
\\rst
return :math:`\mu` (un-normalized). If there is no sample, return the value according to the mean function.
\\endrst
*/
Eigen::VectorXd mu(const Eigen::VectorXd& v) const
{
Eigen::VectorXd mu(_dim_out);
Eigen::VectorXd mean_vector = _mean_function(v, *this);
limbo::tools::par::loop(0, _dim_out, [&](size_t i) {
mu(i) = _gp_models[i].mu(v)[0] + mean_vector(i);
});
return mu;
}
/**
\\rst
return :math:`\sigma^2` (un-normalized). This returns a vector; one value for each GP.
\\endrst
*/
Eigen::VectorXd sigma(const Eigen::VectorXd& v) const
{
Eigen::VectorXd sigma(_dim_out);
limbo::tools::par::loop(0, _dim_out, [&](size_t i) {
sigma(i) = _gp_models[i].sigma(v);
});
return sigma;
}
/// return the number of dimensions of the input
int dim_in() const
{
assert(_dim_in != -1); // need to compute first!
return _dim_in;
}
/// return the number of dimensions of the output
int dim_out() const
{
assert(_dim_out != -1); // need to compute first!
return _dim_out;
}
/// return the number of samples used to compute the GP
int nb_samples() const
{
return (_gp_models.size() > 0) ? _gp_models[0].nb_samples() : 0;
}
/// recomputes the GPs
void recompute(bool update_obs_mean = true, bool update_full_kernel = true)
{
// if there are no GPs, there's nothing to recompute
if (_gp_models.size() == 0)
return;
if (update_obs_mean) // if the mean is updated, we need to fully re-compute
return compute(_gp_models[0].samples(), _observations, update_full_kernel);
else
limbo::tools::par::loop(0, _dim_out, [&](size_t i) {
_gp_models[i].recompute(false, update_full_kernel);
});
}
/// return the list of samples that have been tested so far
const std::vector<Eigen::VectorXd>& samples() const
{
assert(_gp_models.size());
return _gp_models[0].samples();
}
/// return the list of GPs
std::vector<GP_t> gp_models() const
{
return _gp_models;
}
/// return the list of GPs
std::vector<GP_t>& gp_models()
{
return _gp_models;
}
protected:
std::vector<GP_t> _gp_models;
int _dim_in, _dim_out;
HyperParamsOptimizer _hp_optimize;
MeanFunction _mean_function;
std::vector<Eigen::VectorXd> _observations;
};
} // namespace model
} // namespace limbo
#endif
\ No newline at end of file
//| Copyright Inria May 2015
//| This project has received funding from the European Research Council (ERC) under
//| the European Union's Horizon 2020 research and innovation programme (grant
//| agreement No 637972) - see http://www.resibots.eu
//|
//| Contributor(s):
//| - Jean-Baptiste Mouret (jean-baptiste.mouret@inria.fr)
//| - Antoine Cully (antoinecully@gmail.com)
//| - Konstantinos Chatzilygeroudis (konstantinos.chatzilygeroudis@inria.fr)
//| - Federico Allocati (fede.allocati@gmail.com)
//| - Vaios Papaspyros (b.papaspyros@gmail.com)
//| - Roberto Rama (bertoski@gmail.com)
//|
//| This software is a computer library whose purpose is to optimize continuous,
//| black-box functions. It mainly implements Gaussian processes and Bayesian
//| optimization.
//| Main repository: http://github.com/resibots/limbo
//| Documentation: http://www.resibots.eu/limbo
//|
//| This software is governed by the CeCILL-C license under French law and
//| abiding by the rules of distribution of free software. You can use,
//| modify and/ or redistribute the software under the terms of the CeCILL-C
//| license as circulated by CEA, CNRS and INRIA at the following URL
//| "http://www.cecill.info".
//|
//| As a counterpart to the access to the source code and rights to copy,
//| modify and redistribute granted by the license, users are provided only
//| with a limited warranty and the software's author, the holder of the
//| economic rights, and the successive licensors have only limited
//| liability.
//|
//| In this respect, the user's attention is drawn to the risks associated
//| with loading, using, modifying and/or developing or reproducing the
//| software by the user in light of its specific status of free software,
//| that may mean that it is complicated to manipulate, and that also
//| therefore means that it is reserved for developers and experienced
//| professionals having in-depth computer knowledge. Users are therefore
//| encouraged to load and test the software's suitability as regards their
//| requirements in conditions enabling the security of their systems and/or
//| data to be ensured and, more generally, to use and operate it in the
//| same conditions as regards security.
//|
//| The fact that you are presently reading this means that you have had
//| knowledge of the CeCILL-C license and that you accept its terms.
//|
#ifndef LIMBO_MODEL_MULTI_GP_PARALLEL_LF_OPT_HPP
#define LIMBO_MODEL_MULTI_GP_PARALLEL_LF_OPT_HPP
#include <limbo/model/gp/hp_opt.hpp>
namespace limbo {
namespace model {
namespace multi_gp {
///@ingroup model_opt
///optimize each GP independently in parallel using HyperParamsOptimizer
template <typename Params, typename HyperParamsOptimizer = limbo::model::gp::NoLFOpt<Params>>
struct ParallelLFOpt : public limbo::model::gp::HPOpt<Params> {
public:
template <typename GP>
void operator()(GP& gp)
{
this->_called = true;
auto& gps = gp.gp_models();
limbo::tools::par::loop(0, gps.size(), [&](size_t i) {
HyperParamsOptimizer hp_optimize;
hp_optimize(gps[i]);
});
}
};
} // namespace multi_gp
} // namespace model
} // namespace limbo
#endif
\ No newline at end of file
......@@ -61,6 +61,8 @@
#include <limbo/model/gp/kernel_loo_opt.hpp>
#include <limbo/model/gp/kernel_mean_lf_opt.hpp>
#include <limbo/model/gp/mean_lf_opt.hpp>
#include <limbo/model/multi_gp.hpp>
#include <limbo/model/multi_gp/parallel_lf_opt.hpp>
#include <limbo/model/sparsified_gp.hpp>
#include <limbo/opt/grid_search.hpp>
#include <limbo/tools/macros.hpp>
......@@ -906,3 +908,207 @@ BOOST_AUTO_TEST_CASE(test_sparse_gp_accuracy)
BOOST_CHECK(double(failures) / double(N) < 0.1);
}
BOOST_AUTO_TEST_CASE(test_multi_gp_dim)
{
using namespace limbo;
using KF_t = kernel::MaternFiveHalves<Params>;
using Mean_t = mean::Constant<Params>;
using GP_t = model::GP<Params, KF_t, Mean_t>;
GP_t gp; // no init with dim
std::vector<Eigen::VectorXd> observations = {make_v2(5, 5), make_v2(10, 10),
make_v2(5, 5)};
std::vector<Eigen::VectorXd> samples = {make_v2(1, 1), make_v2(2, 2), make_v2(3, 3)};
gp.compute(samples, observations);
using MultiGP_t = model::MultiGP<Params, model::GP, KF_t, Mean_t>;
MultiGP_t multi_gp;
multi_gp.compute(samples, observations);
Eigen::VectorXd mu;
double sigma;
std::tie(mu, sigma) = gp.query(make_v2(1, 1));
BOOST_CHECK(std::abs((mu(0) - 5)) < 1);
BOOST_CHECK(std::abs((mu(1) - 5)) < 1);
BOOST_CHECK(sigma <= 2. * (Params::kernel::noise() + 1e-8));
Eigen::VectorXd mu_multi, sigma_multi;
std::tie(mu_multi, sigma_multi) = multi_gp.query(make_v2(1, 1));
BOOST_CHECK(std::abs((mu_multi(0) - 5)) < 1);
BOOST_CHECK(std::abs((mu_multi(1) - 5)) < 1);
BOOST_CHECK(sigma_multi(0) <= 2. * (Params::kernel::noise() + 1e-8));
BOOST_CHECK(sigma_multi(1) <= 2. * (Params::kernel::noise() + 1e-8));
BOOST_CHECK(std::abs(mu_multi(0) - mu(0)) < 1e-6);
BOOST_CHECK(std::abs(mu_multi(1) - mu(1)) < 1e-6);
BOOST_CHECK(std::abs(sigma_multi(0) - sigma) < 1e-6);
BOOST_CHECK(std::abs(sigma_multi(1) - sigma) < 1e-6);
}
BOOST_AUTO_TEST_CASE(test_multi_gp)
{
using namespace limbo;
using KF_t = kernel::MaternFiveHalves<Params>;
using Mean_t = mean::Constant<Params>;
using MultiGP_t = model::MultiGP<Params, model::GP, KF_t, Mean_t>;
MultiGP_t gp;
std::vector<Eigen::VectorXd> observations = {make_v1(5), make_v1(10),
make_v1(5)};
std::vector<Eigen::VectorXd> samples = {make_v1(1), make_v1(2), make_v1(3)};
gp.compute(samples, observations);
Eigen::VectorXd mu, sigma;
std::tie(mu, sigma) = gp.query(make_v1(1));
BOOST_CHECK(std::abs((mu(0) - 5)) < 1);
BOOST_CHECK(sigma(0) <= 2. * (Params::kernel::noise() + 1e-8));
std::tie(mu, sigma) = gp.query(make_v1(2));
BOOST_CHECK(std::abs((mu(0) - 10)) < 1);
BOOST_CHECK(sigma(0) <= 2. * (Params::kernel::noise() + 1e-8));
std::tie(mu, sigma) = gp.query(make_v1(3));
BOOST_CHECK(std::abs((mu(0) - 5)) < 1);
BOOST_CHECK(sigma(0) <= 2. * (Params::kernel::noise() + 1e-8));
for (double x = 0; x < 4; x += 0.05) {
Eigen::VectorXd mu, sigma;
std::tie(mu, sigma) = gp.query(make_v1(x));
BOOST_CHECK(gp.mu(make_v1(x)) == mu);
BOOST_CHECK(gp.sigma(make_v1(x)) == sigma);
}
}
BOOST_AUTO_TEST_CASE(test_sparse_multi_gp)
{
using namespace limbo;
constexpr size_t N = 20;
constexpr size_t M = 100;
size_t failures = 0;
struct SparseParams {
struct mean_constant : public defaults::mean_constant {
};
struct kernel : public defaults::kernel {
};
struct kernel_squared_exp_ard : public defaults::kernel_squared_exp_ard {
};
struct opt_rprop : public defaults::opt_rprop {
};
struct model_sparse_gp {
BO_PARAM(int, max_points, M / 2);
};
};
using KF_t = kernel::SquaredExpARD<SparseParams>;
using MF_t = mean::Constant<SparseParams>;
using MultiGP_t = model::MultiGP<SparseParams, model::GP, KF_t, MF_t, model::multi_gp::ParallelLFOpt<SparseParams, model::gp::KernelLFOpt<SparseParams>>>;
using MultiSparseGP_t = model::MultiGP<SparseParams, model::SparsifiedGP, KF_t, MF_t, model::multi_gp::ParallelLFOpt<SparseParams, model::gp::KernelLFOpt<SparseParams>>>;
for (size_t i = 0; i < N; i++) {
std::vector<Eigen::VectorXd> observations;
std::vector<Eigen::VectorXd> samples;
tools::rgen_double_t rgen(-2., 2.);
for (size_t i = 0; i < M; i++) {
samples.push_back(make_v1(rgen.rand()));
observations.push_back(make_v1(std::cos(samples.back()[0])));
}
std::vector<Eigen::VectorXd> test_observations;
std::vector<Eigen::VectorXd> test_samples;
for (size_t i = 0; i < M; i++) {
test_samples.push_back(make_v1(rgen.rand()));
test_observations.push_back(make_v1(std::cos(test_samples.back()[0])));
}
MultiGP_t gp;
gp.compute(samples, observations, false);
gp.optimize_hyperparams();
MultiSparseGP_t sgp;
sgp.compute(samples, observations, false);
sgp.optimize_hyperparams();
bool failed = false;
// check if normal GP and sparse GP produce very similar results
// in the learned points
for (size_t i = 0; i < M; i++) {
Eigen::VectorXd gp_val, sgp_val, gp_sigma, sgp_sigma;
std::tie(gp_val, gp_sigma) = gp.query(samples[i]);
std::tie(sgp_val, sgp_sigma) = sgp.query(samples[i]);
if (std::abs(gp_val[0] - sgp_val[0]) > 1e-2 || std::abs(gp_sigma[0] - sgp_sigma[0]) > 1e-2)
failed = true;
}
// check if normal GP and sparse GP produce very similar results
// in the test points
for (size_t i = 0; i < M; i++) {
Eigen::VectorXd gp_val, sgp_val, gp_sigma, sgp_sigma;
std::tie(gp_val, gp_sigma) = gp.query(test_samples[i]);
std::tie(sgp_val, sgp_sigma) = sgp.query(test_samples[i]);
if (std::abs(gp_val[0] - sgp_val[0]) > 1e-2 || std::abs(gp_sigma[0] - sgp_sigma[0]) > 1e-2)
failed = true;
}
// check if normal GP and sparse GP produce very similar errors
// in the test points
for (size_t i = 0; i < M; i++) {
double gp_val = gp.mu(test_samples[i])[0];
double sgp_val = sgp.mu(test_samples[i])[0];
double gp_error_val = std::abs(gp_val - test_observations[i][0]);
double sgp_error_val = std::abs(sgp_val - test_observations[i][0]);
if (std::abs(gp_error_val - sgp_error_val) > 1e-2)
failed = true;
}
if (failed)
failures++;
}
BOOST_CHECK(double(failures) / double(N) < 0.1);
}
BOOST_AUTO_TEST_CASE(test_multi_gp_auto)
{
using KF_t = kernel::SquaredExpARD<Params>;
using Mean_t = mean::Constant<Params>;
using MultiGP_t = model::MultiGP<Params, model::GP, KF_t, Mean_t, model::multi_gp::ParallelLFOpt<Params, model::gp::KernelLFOpt<Params>>>;
MultiGP_t gp;
std::vector<Eigen::VectorXd> observations = {make_v2(5, 5), make_v2(10, 10), make_v2(5, 5)};
std::vector<Eigen::VectorXd> samples = {make_v1(1), make_v1(2), make_v1(3)};
gp.compute(samples, observations, false);
gp.optimize_hyperparams();
Eigen::VectorXd mu, sigma;
std::tie(mu, sigma) = gp.query(make_v1(1));
BOOST_CHECK(std::abs((mu(0) - 5)) < 1);
BOOST_CHECK(sigma(0) <= 2. * (gp.gp_models()[0].kernel_function().noise() + 1e-8));
BOOST_CHECK(sigma(1) <= 2. * (gp.gp_models()[1].kernel_function().noise() + 1e-8));
std::tie(mu, sigma) = gp.query(make_v1(2));
BOOST_CHECK(std::abs((mu(0) - 10)) < 1);
BOOST_CHECK(sigma(0) <= 2. * (gp.gp_models()[0].kernel_function().noise() + 1e-8));
BOOST_CHECK(sigma(1) <= 2. * (gp.gp_models()[1].kernel_function().noise() + 1e-8));
std::tie(mu, sigma) = gp.query(make_v1(3));
BOOST_CHECK(std::abs((mu(0) - 5)) < 1);
BOOST_CHECK(sigma(0) <= 2. * (gp.gp_models()[0].kernel_function().noise() + 1e-8));
BOOST_CHECK(sigma(1) <= 2. * (gp.gp_models()[1].kernel_function().noise() + 1e-8));
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment