Commit b3a5a3c2 authored by Federico Allocati's avatar Federico Allocati
Browse files

Refactored GP Likelihood optimizations and moved them to model/gp/

parent 5803d5ec
......@@ -2,7 +2,7 @@
#include <limbo/kernel/squared_exp_ard.hpp>
#include <limbo/mean/function_ard.hpp>
#include <limbo/model/gp.hpp>
#include <limbo/opt/impl/gp_kernel_mean_lf_opt.hpp>
#include <limbo/model/gp/kernel_mean_lf_opt.hpp>
#include <limbo/bayes_opt/boptimizer.hpp>
using namespace limbo;
......@@ -155,9 +155,8 @@ int main()
typedef mean::FunctionARD<Params, MeanComplet<Params>> Mean_t;
typedef model::GP<Params, Kernel_t, Mean_t> GP_t;
typedef UCB_multi<Params, GP_t> Acqui_t;
typedef opt::impl::GPKernelMeanLFOpt<Params> opt_t;
bayes_opt::BOptimizer<Params, modelfun<GP_t>, acquifun<Acqui_t>, optfun<opt_t>> opt;
bayes_opt::BOptimizer<Params, modelfun<GP_t>, acquifun<Acqui_t>, model::gp::KernelMeanLFOpt<Params>> opt;
opt.optimize(fit_eval());
std::cout << opt.best_observation() << " res "
......
......@@ -24,6 +24,7 @@
#include <limbo/mean/data.hpp>
#include <limbo/opt/cmaes.hpp>
#include <limbo/model/gp.hpp>
#include <limbo/model/gp/kernel_lf_opt.hpp>
#include <limbo/init/random_sampling.hpp>
namespace limbo {
......@@ -96,7 +97,7 @@ namespace limbo {
typedef opt::Cmaes<Params> acquiopt_t; // 2
typedef kernel::SquaredExpARD<Params> kf_t;
typedef mean::Data<Params> mean_t;
typedef model::GP<Params, kf_t, mean_t> model_t; // 3
typedef model::GP<Params, kf_t, mean_t, model::gp::KernelLFOpt<Params>> model_t; // 3
// WARNING: you have to specify the acquisition function
// if you use a custom model
typedef acqui::GP_UCB<Params, model_t> acqui_t; // 4
......
......@@ -2,5 +2,9 @@
#define LIMBO_MODEL_HPP
#include <limbo/model/gp.hpp>
#include <limbo/model/gp/kernel_lf_opt.hpp>
#include <limbo/model/gp/kernel_mean_lf_opt.hpp>
#include <limbo/model/gp/mean_lf_opt.hpp>
#include <limbo/model/gp/no_lf_opt.hpp>
#endif
......@@ -6,33 +6,18 @@
#include <limits>
#include <vector>
#include <boost/parameter.hpp>
#include <Eigen/Core>
#include <Eigen/LU>
#include <Eigen/Cholesky>
#include <limbo/opt/impl/model_no_opt.hpp>
#include <limbo/model/gp/no_lf_opt.hpp>
namespace limbo {
BOOST_PARAMETER_TEMPLATE_KEYWORD(optfun)
namespace model {
typedef boost::parameter::parameters<boost::parameter::optional<tag::optfun>> gp_signature;
template <typename Params, typename KernelFunction, typename MeanFunction, class OptFun = boost::parameter::void_>
template <typename Params, typename KernelFunction, typename MeanFunction, class HyperParamsOptimizer = gp::NoLFOpt<Params>>
class GP {
public:
// defaults
struct defaults {
typedef opt::impl::ModelNoOpt<Params> opt_t; // 1
};
typedef typename gp_signature::bind<OptFun>::type args;
typedef typename boost::parameter::binding<args, tag::optfun, typename defaults::opt_t>::type opt_t;
GP() : _dim_in(-1), _dim_out(-1) {}
// useful because the model might be created before having samples
GP(int dim_in, int dim_out)
......@@ -67,7 +52,7 @@ namespace limbo {
_compute_obs_mean();
_compute_kernel();
opt_t()(*this);
HyperParamsOptimizer()(*this);
}
// return mu, sigma (unormaliz)
......@@ -122,7 +107,7 @@ namespace limbo {
Eigen::VectorXd max_observation() const
{
if (_observations.cols() > 1)
std::cout << "WARNING max_observation with multi dim_inensional "
std::cout << "WARNING max_observation with multi dimensional "
"observations doesn't make sense" << std::endl;
return _observations.maxCoeff();
}
......
#ifndef LIMBO_MODEL_GP_KERNEL_LF_OPT_HPP
#define LIMBO_MODEL_GP_KERNEL_LF_OPT_HPP
#include <Eigen/Core>
#include <limbo/opt/rprop.hpp>
#include <limbo/opt/parallel_repeater.hpp>
namespace limbo {
namespace model {
namespace gp {
template <typename Params>
struct KernelLFOpt {
public:
template <typename GP>
void operator()(GP& gp) const
{
KernelLFOptimization<GP> optimization(gp);
opt::ParallelRepeater<Params, opt::Rprop<Params>> par_rprop;
auto params = par_rprop(optimization);
gp.kernel_function().set_h_params(params);
gp.set_lik(optimization.utility(params));
gp.update();
}
protected:
template <typename GP>
struct KernelLFOptimization {
public:
KernelLFOptimization(const GP& gp) : _gp(gp) {}
double utility(const Eigen::VectorXd& params)
{
this->_gp.kernel_function().set_h_params(params);
this->_gp.update();
size_t n = this->_gp.obs_mean().rows();
// --- cholesky ---
// see:
// http://xcorr.net/2008/06/11/log-determinant-of-positive-definite-matrices-in-matlab/
Eigen::MatrixXd l = this->_gp.llt().matrixL();
long double det = 2 * l.diagonal().array().log().sum();
// alpha = K^{-1} * this->_obs_mean;
// double a = this->_obs_mean.col(0).dot(this->_alpha.col(0));
double a = (this->_gp.obs_mean().transpose() * this->_gp.alpha())
.trace(); // generalization for multi dimensional observation
// std::cout<<" a: "<<a <<" det: "<< det<<std::endl;
double lik = -0.5 * a - 0.5 * det - 0.5 * n * log(2 * M_PI);
return lik;
}
std::pair<double, Eigen::VectorXd> utility_and_grad(const Eigen::VectorXd& params)
{
this->_gp.kernel_function().set_h_params(params);
this->_gp.update();
size_t n = this->_gp.obs_mean().rows();
// --- cholesky ---
// see:
// http://xcorr.net/2008/06/11/log-determinant-of-positive-definite-matrices-in-matlab/
Eigen::MatrixXd l = this->_gp.llt().matrixL();
long double det = 2 * l.diagonal().array().log().sum();
double a = (this->_gp.obs_mean().transpose() * this->_gp.alpha())
.trace(); // generalization for multi dimensional observation
// std::cout<<" a: "<<a <<" det: "<< det<<std::endl;
double lik = -0.5 * a - 0.5 * det - 0.5 * n * log(2 * M_PI);
// K^{-1} using Cholesky decomposition
Eigen::MatrixXd w = Eigen::MatrixXd::Identity(n, n);
this->_gp.llt().matrixL().solveInPlace(w);
this->_gp.llt().matrixL().transpose().solveInPlace(w);
// alpha * alpha.transpose() - K^{-1}
w = this->_gp.alpha() * this->_gp.alpha().transpose() - w;
// only compute half of the matrix (symmetrical matrix)
Eigen::VectorXd grad = Eigen::VectorXd::Zero(this->param_size());
for (size_t i = 0; i < n; ++i) {
for (size_t j = 0; j <= i; ++j) {
Eigen::VectorXd g = this->_gp.kernel_function().grad(this->_gp.samples()[i], this->_gp.samples()[j]);
if (i == j)
grad += w(i, j) * g * 0.5;
else
grad += w(i, j) * g;
}
}
return std::make_pair(lik, grad);
}
size_t param_size() const
{
return this->_gp.kernel_function().h_params_size();
}
Eigen::VectorXd init() const
{
return (Eigen::VectorXd::Random(param_size()).array() - 1);
}
protected:
GP _gp;
Eigen::VectorXd _init;
};
};
}
}
}
#endif
#ifndef LIMBO_MODEL_GP_KERNEL_MEAN_LF_OPT_HPP
#define LIMBO_MODEL_GP_KERNEL_MEAN_LF_OPT_HPP
#include <Eigen/Core>
#include <limbo/opt/rprop.hpp>
#include <limbo/opt/parallel_repeater.hpp>
namespace limbo {
namespace model {
namespace gp {
template <typename Params>
struct KernelMeanLFOpt {
public:
template <typename GP>
void operator()(GP& gp) const
{
KernelMeanLFOptimization<GP> optimization(gp);
opt::ParallelRepeater<Params, opt::Rprop<Params>> par_rprop;
auto params = par_rprop(optimization);
gp.kernel_function().set_h_params(params.head(gp.kernel_function().h_params_size()));
gp.mean_function().set_h_params(params.tail(gp.mean_function().h_params_size()));
gp.set_lik(optimization.utility(params));
gp.update();
}
protected:
template <typename GP>
struct KernelMeanLFOptimization {
public:
KernelMeanLFOptimization(const GP& gp) : _gp(gp) {}
double utility(const Eigen::VectorXd& params)
{
this->_gp.kernel_function().set_h_params(params.head(this->_gp.kernel_function().h_params_size()));
this->_gp.mean_function().set_h_params(params.tail(this->_gp.mean_function().h_params_size()));
this->_gp.update();
size_t n = this->_gp.obs_mean().rows();
// --- cholesky ---
// see:
// http://xcorr.net/2008/06/11/log-determinant-of-positive-definite-matrices-in-matlab/
Eigen::MatrixXd l = this->_gp.llt().matrixL();
long double det = 2 * l.diagonal().array().log().sum();
// alpha = K^{-1} * this->_obs_mean;
// double a = this->_obs_mean.col(0).dot(this->_alpha.col(0));
double a = (this->_gp.obs_mean().transpose() * this->_gp.alpha())
.trace(); // generalization for multi dimensional observation
// std::cout<<" a: "<<a <<" det: "<< det<<std::endl;
double lik = -0.5 * a - 0.5 * det - 0.5 * n * log(2 * M_PI);
return lik;
}
std::pair<double, Eigen::VectorXd> utility_and_grad(const Eigen::VectorXd& params)
{
this->_gp.kernel_function().set_h_params(params.head(this->_gp.kernel_function().h_params_size()));
this->_gp.mean_function().set_h_params(params.tail(this->_gp.mean_function().h_params_size()));
this->_gp.update();
size_t n = this->_gp.obs_mean().rows();
// --- cholesky ---
// see:
// http://xcorr.net/2008/06/11/log-determinant-of-positive-definite-matrices-in-matlab/
Eigen::MatrixXd l = this->_gp.llt().matrixL();
long double det = 2 * l.diagonal().array().log().sum();
double a = (this->_gp.obs_mean().transpose() * this->_gp.alpha())
.trace(); // generalization for multi dimensional observation
// std::cout<<" a: "<<a <<" det: "<< det<<std::endl;
double lik = -0.5 * a - 0.5 * det - 0.5 * n * log(2 * M_PI);
// K^{-1} using Cholesky decomposition
Eigen::MatrixXd K = Eigen::MatrixXd::Identity(n, n);
this->_gp.llt().matrixL().solveInPlace(K);
this->_gp.llt().matrixL().transpose().solveInPlace(K);
// alpha * alpha.transpose() - K^{-1}
Eigen::MatrixXd w = this->_gp.alpha() * this->_gp.alpha().transpose() - K;
// only compute half of the matrix (symmetrical matrix)
Eigen::VectorXd grad = Eigen::VectorXd::Zero(this->param_size());
for (size_t i = 0; i < n; ++i) {
for (size_t j = 0; j <= i; ++j) {
Eigen::VectorXd g = this->_gp.kernel_function().grad(this->_gp.samples()[i], this->_gp.samples()[j]);
if (i == j)
grad.head(this->_gp.kernel_function().h_params_size()) += w(i, j) * g * 0.5;
else
grad.head(this->_gp.kernel_function().h_params_size()) += w(i, j) * g;
}
}
for (int i_obs = 0; i_obs < this->_gp.dim_out(); ++i_obs)
for (size_t n_obs = 0; n_obs < n; n_obs++) {
grad.tail(this->_gp.mean_function().h_params_size()) += this->_gp.obs_mean().col(i_obs).transpose() * K.col(n_obs) * this->_gp.mean_function().grad(this->_gp.samples()[n_obs], this->_gp).row(i_obs);
}
return std::make_pair(lik, grad);
}
size_t param_size() const
{
return this->_gp.kernel_function().h_params_size() + this->_gp.mean_function().h_params_size();
}
Eigen::VectorXd init() const
{
return (Eigen::VectorXd::Random(param_size()).array() - 1);
}
protected:
GP _gp;
Eigen::VectorXd _init;
};
};
}
}
}
#endif
#ifndef LIMBO_MODEL_GP_MEAN_LF_OPT_HPP
#define LIMBO_MODEL_GP_MEAN_LF_OPT_HPP
#include <Eigen/Core>
#include <limbo/opt/rprop.hpp>
#include <limbo/opt/parallel_repeater.hpp>
namespace limbo {
namespace model {
namespace gp {
template <typename Params>
struct MeanLFOpt {
public:
template <typename GP>
void operator()(GP& gp) const
{
MeanLFOptimization<GP> optimization(gp);
opt::ParallelRepeater<Params, opt::Rprop<Params>> par_rprop;
auto params = par_rprop(optimization);
gp.mean_function().set_h_params(params);
gp.set_lik(optimization.utility(params));
gp.update();
}
protected:
template <typename GP>
struct MeanLFOptimization {
public:
MeanLFOptimization(const GP& gp) : _gp(gp) {}
double utility(const Eigen::VectorXd& params)
{
this->_gp.mean_function().set_h_params(params);
this->_gp.update();
size_t n = this->_gp.obs_mean().rows();
// --- cholesky ---
// see:
// http://xcorr.net/2008/06/11/log-determinant-of-positive-definite-matrices-in-matlab/
Eigen::MatrixXd l = this->_gp.llt().matrixL();
long double det = 2 * l.diagonal().array().log().sum();
// alpha = K^{-1} * this->_obs_mean;
// double a = this->_obs_mean.col(0).dot(this->_alpha.col(0));
double a = (this->_gp.obs_mean().transpose() * this->_gp.alpha())
.trace(); // generalization for multi dimensional observation
// std::cout<<" a: "<<a <<" det: "<< det<<std::endl;
double lik = -0.5 * a - 0.5 * det - 0.5 * n * log(2 * M_PI);
return lik;
}
std::pair<double, Eigen::VectorXd> utility_and_grad(const Eigen::VectorXd& params)
{
this->_gp.mean_function().set_h_params(params);
this->_gp.update();
size_t n = this->_gp.obs_mean().rows();
// --- cholesky ---
// see:
// http://xcorr.net/2008/06/11/log-determinant-of-positive-definite-matrices-in-matlab/
Eigen::MatrixXd l = this->_gp.llt().matrixL();
long double det = 2 * l.diagonal().array().log().sum();
double a = (this->_gp.obs_mean().transpose() * this->_gp.alpha())
.trace(); // generalization for multi dimensional observation
// std::cout<<" a: "<<a <<" det: "<< det<<std::endl;
double lik = -0.5 * a - 0.5 * det - 0.5 * n * log(2 * M_PI);
// K^{-1} using Cholesky decomposition
Eigen::MatrixXd K = Eigen::MatrixXd::Identity(n, n);
this->_gp.llt().matrixL().solveInPlace(K);
this->_gp.llt().matrixL().transpose().solveInPlace(K);
Eigen::VectorXd grad = Eigen::VectorXd::Zero(this->param_size());
for (int i_obs = 0; i_obs < this->_gp.dim_out(); ++i_obs)
for (size_t n_obs = 0; n_obs < n; n_obs++) {
grad.tail(this->_gp.mean_function().h_params_size()) += this->_gp.obs_mean().col(i_obs).transpose() * K.col(n_obs) * this->_gp.mean_function().grad(this->_gp.samples()[n_obs], this->_gp).row(i_obs);
}
return std::make_pair(lik, grad);
}
size_t param_size() const
{
return this->_gp.mean_function().h_params_size();
}
Eigen::VectorXd init() const
{
return (Eigen::VectorXd::Random(param_size()).array() - 1);
}
protected:
GP _gp;
Eigen::VectorXd _init;
};
};
}
}
}
#endif
#ifndef LIMBO_MODEL_GP_NO_LF_OPT_HPP
#define LIMBO_MODEL_GP_NO_LF_OPT_HPP
namespace limbo {
namespace model {
namespace gp {
template <typename Params>
struct NoLFOpt {
template <typename GP>
void operator()(GP&) const {}
};
}
}
}
#endif
......@@ -3,9 +3,6 @@
#include <limbo/opt/cmaes.hpp>
#include <limbo/opt/grid_search.hpp>
#include <limbo/opt/impl/gp_kernel_lf_opt.hpp>
#include <limbo/opt/impl/gp_kernel_mean_lf_opt.hpp>
#include <limbo/opt/impl/gp_mean_lf_opt.hpp>
#include <limbo/opt/parallel_repeater.hpp>
#include <limbo/opt/random_point.hpp>
#include <limbo/opt/rprop.hpp>
......
#ifndef LIMBO_OPT_IMPL_GP_KERNEL_LF_OPT_HPP
#define LIMBO_OPT_IMPL_GP_KERNEL_LF_OPT_HPP
#include <iostream>
#include <Eigen/Core>
#include <Eigen/LU>
#include <Eigen/Cholesky>
#include <limbo/opt/rprop.hpp>
#include <limbo/opt/parallel_repeater.hpp>
namespace limbo {
namespace opt {
namespace impl {
template <typename Params, typename Model>
struct GPKernelLFOptStruct {
public:
GPKernelLFOptStruct(const Model& model) : _model(model) {}
double utility(const Eigen::VectorXd& params)
{
this->_model.kernel_function().set_h_params(params);
this->_model.update();
size_t n = this->_model.obs_mean().rows();
// --- cholesky ---
// see:
// http://xcorr.net/2008/06/11/log-determinant-of-positive-definite-matrices-in-matlab/
Eigen::MatrixXd l = this->_model.llt().matrixL();
long double det = 2 * l.diagonal().array().log().sum();
// alpha = K^{-1} * this->_obs_mean;
// double a = this->_obs_mean.col(0).dot(this->_alpha.col(0));
double a = (this->_model.obs_mean().transpose() * this->_model.alpha())
.trace(); // generalization for multi dimensional observation
// std::cout<<" a: "<<a <<" det: "<< det<<std::endl;
double lik = -0.5 * a - 0.5 * det - 0.5 * n * log(2 * M_PI);
return lik;
}
std::pair<double, Eigen::VectorXd> utility_and_grad(const Eigen::VectorXd& params)
{
this->_model.kernel_function().set_h_params(params);
this->_model.update();
size_t n = this->_model.obs_mean().rows();
// --- cholesky ---
// see:
// http://xcorr.net/2008/06/11/log-determinant-of-positive-definite-matrices-in-matlab/
Eigen::MatrixXd l = this->_model.llt().matrixL();
long double det = 2 * l.diagonal().array().log().sum();
double a = (this->_model.obs_mean().transpose() * this->_model.alpha())
.trace(); // generalization for multi dimensional observation
// std::cout<<" a: "<<a <<" det: "<< det<<std::endl;
double lik = -0.5 * a - 0.5 * det - 0.5 * n * log(2 * M_PI);
// K^{-1} using Cholesky decomposition
Eigen::MatrixXd w = Eigen::MatrixXd::Identity(n, n);
this->_model.llt().matrixL().solveInPlace(w);
this->_model.llt().matrixL().transpose().solveInPlace(w);
// alpha * alpha.transpose() - K^{-1}
w = this->_model.alpha() * this->_model.alpha().transpose() - w;
// only compute half of the matrix (symmetrical matrix)
Eigen::VectorXd grad = Eigen::VectorXd::Zero(this->param_size());
for (size_t i = 0; i < n; ++i) {
for (size_t j = 0; j <= i; ++j) {
Eigen::VectorXd g = this->_model.kernel_function().grad(this->_model.samples()[i], this->_model.samples()[j]);
if (i == j)
grad += w(i, j) * g * 0.5;
else
grad += w(i, j) * g;
}
}
return std::make_pair(lik, grad);
}
size_t param_size() const
{
return this->_model.kernel_function().h_params_size();
}
Eigen::VectorXd init() const
{
return (Eigen::VectorXd::Random(param_size()).array() - 1);
}
protected:
Model _model;
Eigen::VectorXd _init;
};
template <typename Params>
struct GPKernelLFOpt {
template <typename Opt>
void operator()(Opt& opt)
{
GPKernelLFOptStruct<Params, Opt> util(opt);
ParallelRepeater<Params, Rprop<Params>> par_rprop;