Commit 094c508e authored by Aneoshun's avatar Aneoshun
Browse files

add auto mean

parent 43c58166
......@@ -56,11 +56,11 @@ def qsub(conf_file):
#PBS -N @exp
#PBS -o stdout
#PBS -b stderr
#PBS -M @email
# maximum execution time
#PBS -l walltime=@wall_time
# mail parameters
#PBS -m n
# number of nodes
#PBS -l nodes=@nb_cores:ppn=@ppn
#PBS -l pmem=5200mb -l mem=5200mb
......
......@@ -37,7 +37,8 @@ struct Params {
};
struct fit_eval {
static constexpr size_t dim = 2;
static constexpr size_t dim_in = 2;
static constexpr size_t dim_out = 1;
Eigen::VectorXd operator()(const Eigen::VectorXd& x) const {
Eigen::VectorXd res(1);
res(0) = 0;
......
......@@ -41,8 +41,11 @@ class UCB_multi {
public:
UCB_multi(const Model& model, int iteration = 0) : _model(model) {
}
size_t dim() const {
return _model.dim();
size_t dim_in() const {
return _model.dim_in();
}
size_t dim_out() const {
return _model.dim_out();
}
double operator()(const Eigen::VectorXd& v) const {
//double mu, sigma;
......@@ -59,7 +62,8 @@ protected:
struct fit_eval {
static constexpr size_t dim = 2;
static constexpr size_t dim_in = 2;
static constexpr size_t dim_out = 2;
Eigen::VectorXd operator()(const Eigen::VectorXd& x) const {
Eigen::VectorXd res(2);
res(0) = 0;
......
//#define SHOW_TIMER
#include <boost/fusion/container/vector.hpp>
#include <boost/fusion/include/vector.hpp>
#include "limbo/limbo.hpp"
#include "limbo/inner_cmaes.hpp"
#include "limbo/gp_auto_mean.hpp"
using namespace limbo;
struct Params {
// struct gp_ucb : public defaults::gp_ucb {};
struct gp_auto_mean {
BO_PARAM(int, n_rprop, 100);
BO_PARAM(int, rprop_restart, 10);
};
struct cmaes : public defaults::cmaes {};
struct meanconstant : public defaults::meanconstant {};
struct ucb
{
BO_PARAM(float, alpha,0.1);
};
struct kf_maternfivehalfs
{
BO_PARAM( float,sigma,1);
BO_PARAM( float, l, 0.2);
};
struct boptimizer {
BO_PARAM(double, noise, 0.001);
BO_PARAM(int, dump_period, 1);
};
struct init {
BO_PARAM(int, nb_samples, 5);
};
struct maxiterations {
BO_PARAM(int, n_iterations, 100);
};
};
template<typename Params, typename Model>
class UCB_multi {
public:
UCB_multi(const Model& model, int iteration = 0) : _model(model) {
}
size_t dim_in() const {
return _model.dim_in();
}
size_t dim_out() const {
return _model.dim_out();
}
double operator()(const Eigen::VectorXd& v) const {
//double mu, sigma;
//std::tie(mu, sigma) = _model.query(v);
//return (mu + Params::ucb::alpha() * sqrt(sigma));
return (sqrt(_model.sigma(v)));
}
protected:
const Model& _model;
};
template<typename Params, typename ObsType=Eigen::VectorXd>
struct MeanOffset {
MeanOffset(size_t dim_out = 1) {
}
template<typename GP>
ObsType operator()(const Eigen::VectorXd& x, const GP& gp)const {
Eigen::VectorXd res(2);
res(0) = 2; // constant overestimation
res(1) = 2; // constant overestimation
for (int i = 0; i < x.size(); i++)
{
res(0) += 1 - (x[i] - 0.3) * (x[i] - 0.3) + sin(10 * x[i]) *0.2;
res(1) += 1 - (x[i] - 0.3) * (x[i] - 0.3) *0.4;
}
return res;
}
};
template<typename Params, typename ObsType=Eigen::VectorXd>
struct MeanRotation {
MeanRotation(size_t dim_out = 1) {
}
template<typename GP>
ObsType operator()(const Eigen::VectorXd& x, const GP& gp)const {
Eigen::VectorXd res(2);
res(0) = 0; // constant overestimation
res(1) = 0; // constant overestimation
for (int i = 0; i < x.size(); i++)
{
res(0) += 1 - (x[i] - 0.3) * (x[i] - 0.3) + sin(10 * x[i]) * 0.2;
res(1) += 1 - (x[i] - 0.3) * (x[i] - 0.3) *0.4;
}
double theta=M_PI/2;
Eigen::Matrix2d rot;
rot(0,0)=cos(theta);
rot(0,1)=-sin(theta);
rot(1,0)=sin(theta);
rot(1,1)=cos(theta);
return rot*res;
}
};
template<typename Params, typename ObsType=Eigen::VectorXd>
struct MeanComplet {
MeanComplet(size_t dim_out = 1) {
}
template<typename GP>
ObsType operator()(const Eigen::VectorXd& x, const GP& gp)const {
Eigen::VectorXd res(2);
res(0) = 2; // constant overestimation
res(1) = 2; // constant overestimation
for (int i = 0; i < x.size(); i++)
{
res(0) += 1 - (x[i] - 0.3) * (x[i] - 0.3) + sin(10 * x[i]) * 0.2;
res(1) += 1 - (x[i] - 0.3) * (x[i] - 0.3) *0.4;
}
double theta=M_PI/2;
Eigen::Matrix2d rot;
rot(0,0)=cos(theta);
rot(0,1)=-sin(theta);
rot(1,0)=sin(theta);
rot(1,1)=cos(theta);
return rot*res;
}
};
struct fit_eval {
static constexpr size_t dim_in = 2;
static constexpr size_t dim_out = 2;
Eigen::VectorXd operator()(const Eigen::VectorXd& x) const {
Eigen::VectorXd res(dim_out);
res(0) = 0;
res(1) = 0;
for (int i = 0; i < x.size(); i++)
{
res(0) += 1 - (x[i] - 0.3) * (x[i] - 0.3) + sin(10 * x[i]) * 0.2;
res(1) += 1 - (x[i] - 0.3) * (x[i] - 0.3) *0.4;
}
return res;
}
};
int main() {
typedef kernel_functions::SquaredExpARD<Params> Kernel_t;
typedef mean_functions::MeanFunctionARD<Params, MeanComplet<Params>> Mean_t;
typedef model::GPAutoMean<Params, Kernel_t, Mean_t> GP_t;
typedef UCB_multi<Params,GP_t> Acqui_t;
BOptimizer<Params,model_fun<GP_t>, acq_fun<Acqui_t>> opt;
opt.optimize(fit_eval());
std::cout << opt.best_observation().transpose()
<< " res " << opt.best_sample().transpose()
<< std::endl;
return 0;
}
......@@ -25,7 +25,8 @@ struct Params {
struct zdt2 {
static constexpr size_t dim = 30;
static constexpr size_t dim_in = 30;
static constexpr size_t dim_out = 2;
Eigen::VectorXd operator()(const Eigen::VectorXd& x) const {
Eigen::VectorXd res(2);
double f1 = x(0);
......@@ -41,7 +42,8 @@ struct zdt2 {
};
struct mop2 {
static constexpr size_t dim = 2;
static constexpr size_t dim_in = 2;
static constexpr size_t dim_out = 2;
Eigen::VectorXd operator()(const Eigen::VectorXd& x) const {
Eigen::VectorXd res(2);
// scale to [-2, 2]
......
......@@ -15,6 +15,12 @@ def build(bld):
target = 'obs_multi',
uselib = 'BOOST EIGEN TBB',
use = 'limbo')
obj = bld.program(features = 'cxx',
source = 'obs_multi_auto_mean.cpp',
includes = '. .. ../../',
target = 'obs_multi_auto_mean',
uselib = 'BOOST EIGEN TBB',
use = 'limbo')
obj = bld.program(features = 'cxx',
source = 'parego.cpp',
includes = '. .. ../../',
......
......@@ -27,8 +27,11 @@ namespace limbo {
public:
UCB(const Model& model, int iteration = 0) : _model(model) {
}
size_t dim() const {
return _model.dim();
size_t dim_in() const {
return _model.dim_in();
}
size_t dim_out() const {
return _model.dim_out();
}
double operator()(const Eigen::VectorXd& v) const {
//double mu, sigma;
......@@ -53,8 +56,11 @@ namespace limbo {
static constexpr double pi2 = M_PI * M_PI;
_beta = sqrtf(2.0 * log(t3 * pi2 / delta3));
}
size_t dim() const {
return _model.dim();
size_t dim_in() const {
return _model.dim_in();
}
size_t dim_out() const {
return _model.dim_out();
}
double operator()(const Eigen::VectorXd& v) const {
//double mu, sigma;
......
......@@ -32,10 +32,9 @@ namespace limbo {
// static_assert(std::is_floating_point<obs_t>::value, "BOptimizer wants double/double for obs");
this->_init(feval, reset);
model_t model(EvalFunction::dim);
model_t model(EvalFunction::dim_in, EvalFunction::dim_out);
if(this->_observations.size())
model.compute(this->_samples, this->_observations, Params::boptimizer::noise());
inner_optimization_t inner_optimization;
while (this->_samples.size() == 0 || this->_pursue(*this)) {
......@@ -43,7 +42,7 @@ namespace limbo {
acquisition_function_t acqui(model, this->_iteration);
Eigen::VectorXd new_sample = inner_optimization(acqui, acqui.dim());
Eigen::VectorXd new_sample = inner_optimization(acqui, acqui.dim_in());
this->add_new_sample(new_sample, feval(new_sample));
......
......@@ -18,44 +18,39 @@ namespace limbo {
template<typename Params, typename KernelFunction, typename MeanFunction, typename ObsType= Eigen::VectorXd>
class GP {
public:
GP() : _dim(-1) {}
GP() : _dim_in(-1), _dim_out(-1) {}
// useful because the model might be created before having samples
GP(int d) : _dim(d), _kernel_function(d) {}
GP(int dim_in, int dim_out) : _dim_in(dim_in),_dim_out(dim_out), _kernel_function(dim_in), _mean_function(dim_out) {}
void compute(const std::vector<Eigen::VectorXd>& samples,
const std::vector<ObsType>& observations,
double noise) {
if (_dim == -1) {
if (_dim_in == -1) {
assert(samples.size() != 0);
assert(observations.size() != 0);
assert(samples.size() == observations.size());
_dim = samples[0].size();
_dim_in = samples[0].size();
_dim_out= observations[0].size();
}
_samples = samples;
_observations.resize(observations.size(),observations[0].size());
_noise = noise;
int obs_dim=observations[0].size();
for (int i = 0; i < _observations.rows(); ++i)
_observations.row(i) = observations[i];
_mean_observation.resize(obs_dim);
_mean_observation.resize(_dim_out);
for(int i=0; i< _observations.cols(); i++)
_mean_observation(i) = _observations.col(i).sum() / _observations.rows();
_mean_vector.resize(_samples.size(),obs_dim);
for (int i = 0; i < _mean_vector.rows(); i++)
_mean_vector.row(i) = _mean_function(_samples[i], *this);
_obs_mean = _observations - _mean_vector;
_compute_obs_mean();
_compute_kernel();
}
// return mu, sigma (unormaliz)
std::tuple<ObsType, double> query(const Eigen::VectorXd& v) const {
if (_samples.size() == 0)
......@@ -76,9 +71,13 @@ namespace limbo {
return sqrt(_kernel_function(v, v));
return _sigma(v, _compute_k(v));
}
int dim() const {
assert(_dim != -1);//need to compute first !
return _dim;
int dim_in() const {
assert(_dim_in != -1);//need to compute first !
return _dim_in;
}
int dim_out() const {
assert(_dim_out != -1);//need to compute first !
return _dim_out;
}
const KernelFunction& kernel_function() const {
return _kernel_function;
......@@ -88,7 +87,7 @@ namespace limbo {
}
ObsType max_observation() const {
if(_observations.cols()>1)
std::cout<<"WARNING max_observation with multi dimensional observations doesn't make sense"<<std::endl;
std::cout<<"WARNING max_observation with multi dim_inensional observations doesn't make sense"<<std::endl;
return _observations.maxCoeff();
}
ObsType mean_observation() const {
......@@ -97,7 +96,8 @@ namespace limbo {
const Eigen::MatrixXd& mean_vector() const{return _mean_vector;}
protected:
int _dim;
int _dim_in;
int _dim_out;
KernelFunction _kernel_function;
MeanFunction _mean_function;
......@@ -111,10 +111,17 @@ namespace limbo {
ObsType _mean_observation;
Eigen::MatrixXd _kernel;
Eigen::MatrixXd _inverted_kernel;
// Eigen::MatrixXd _inverted_kernel;
Eigen::MatrixXd _l_matrix;
Eigen::LLT<Eigen::MatrixXd> _llt;
void _compute_obs_mean(){
_mean_vector.resize(_samples.size(),_dim_out);
for (int i = 0; i < _mean_vector.rows(); i++)
_mean_vector.row(i) = _mean_function(_samples[i], *this);
_obs_mean = _observations - _mean_vector;
}
void _compute_kernel() {
// O(n^2) [should be negligible]
......
......@@ -26,7 +26,7 @@ namespace limbo {
class GPAuto : public GP<Params, KernelFunction, MeanFunction> {
public:
// TODO : init KernelFunction with dim in GP
GPAuto(int d) : GP<Params, KernelFunction, MeanFunction>(d) {}
GPAuto(int dim_in, int dim_out) : GP<Params, KernelFunction, MeanFunction>(dim_in, dim_out) {}
void compute(const std::vector<Eigen::VectorXd>& samples,
const std::vector<ObsType>& observations,
......@@ -44,7 +44,7 @@ namespace limbo {
return this->_kernel*this->_alpha.col(0) - this->_obs_mean;
}
// see Rasmussen and Williams, 2006 (p. 113)
double log_likelihood(const Eigen::VectorXd& h_params,
virtual double log_likelihood(const Eigen::VectorXd& h_params,
bool update_kernel = true) {
this->_kernel_function.set_h_params(h_params);
if (update_kernel)
......@@ -66,7 +66,7 @@ namespace limbo {
}
// see Rasmussen and Williams, 2006 (p. 114)
Eigen::VectorXd log_likelihood_grad(const Eigen::VectorXd& h_params,
virtual Eigen::VectorXd log_likelihood_grad(const Eigen::VectorXd& h_params,
bool update_kernel = true) {
this->_kernel_function.set_h_params(h_params);
if (update_kernel)
......@@ -104,7 +104,7 @@ namespace limbo {
return grad;
}
protected:
void _optimize_likelihood() {
virtual void _optimize_likelihood() {
par::init();
typedef std::pair<Eigen::VectorXd, double> pair_t;
auto body = [ = ](int i) {
......
#ifndef GP_AUTO_MEAN_HPP_
#define GP_AUTO_MEAN_HPP_
#include <limits>
#include <cassert>
#include <tbb/parallel_reduce.h>
#include <Eigen/Core>
#include <Eigen/LU>
#include <Eigen/Dense>
#include <Eigen/Cholesky>
#include <Eigen/LU>
#include "gp.hpp"
#include "rprop.hpp"
#include "parallel.hpp"
namespace limbo {
namespace defaults {
struct gp_auto_mean {
BO_PARAM(int, n_rprop, 100);
BO_PARAM(int, rprop_restart, 100);
};
}
namespace model {
template<typename Params, typename KernelFunction, typename MeanFunction, typename ObsType=Eigen::VectorXd>
class GPAutoMean : public GP<Params, KernelFunction, MeanFunction> {
public:
// TODO : init KernelFunction with dim in GP
GPAutoMean(int dim_in, int dim_out) : GP<Params, KernelFunction, MeanFunction>(dim_in, dim_out) {std::cout<<" H PARAMS SIZE: "<<this->_kernel_function.h_params_size()<<std::endl;}
void compute(const std::vector<Eigen::VectorXd>& samples,
const std::vector<ObsType>& observations,
double noise) {
GP<Params, KernelFunction, MeanFunction>::compute(samples, observations, noise);
_optimize_likelihood();
// std::cout<<"kernel params: "<<this->_kernel_function.h_params().transpose()<<std::endl;
// std::cout<<"mean params: "<<this->_mean_function.h_params().transpose()<<std::endl;
this->_compute_kernel();
}
Eigen::VectorXd check_inverse(){
return this->_kernel*this->_alpha.col(0) - this->_obs_mean;
}
// see Rasmussen and Williams, 2006 (p. 113)
virtual double log_likelihood(const Eigen::VectorXd& h_params,
bool update_kernel = true) {
this->_kernel_function.set_h_params(h_params.head(this->_kernel_function.h_params_size()));
this->_mean_function.set_h_params(h_params.tail(this->_mean_function.h_params_size()));
if (update_kernel){
this->_compute_kernel();
this->_compute_obs_mean();
}
size_t n = this->_obs_mean.rows();
// --- cholesky ---
// see: http://xcorr.net/2008/06/11/log-determinant-of-positive-definite-matrices-in-matlab/
Eigen::MatrixXd l = this->_llt.matrixL();
long double det = 2 * l.diagonal().array().log().sum();
// alpha = K^{-1} * this->_obs_mean;
// double a = this->_obs_mean.col(0).dot(this->_alpha.col(0));
double a = (this->_obs_mean.transpose() * this->_alpha).trace(); // generalization for multi dimensional observation
// std::cout<<" a: "<<a <<" det: "<< det<<std::endl;
double lik= -0.5 * a - 0.5 * det - 0.5 * n * log(2 * M_PI);
return lik;
}
// see Rasmussen and Williams, 2006 (p. 114)
virtual Eigen::VectorXd log_likelihood_grad(const Eigen::VectorXd& h_params,
bool update_kernel = true) {
this->_kernel_function.set_h_params(h_params.head(this->_kernel_function.h_params_size()));
this->_mean_function.set_h_params(h_params.tail(this->_mean_function.h_params_size()));
if (update_kernel)
this->_compute_kernel();
size_t n = this->_observations.rows();
/// what we should write, but it is less numerically stable than using the Cholesky decomposition
// Eigen::MatrixXd alpha = this->_inverted_kernel * this->_obs_mean;
// Eigen::MatrixXd w = alpha * alpha.transpose() - this->_inverted_kernel;
// alpha = K^{-1} * this->_obs_mean;
Eigen::MatrixXd alpha = this->_llt.matrixL().solve(this->_obs_mean);
this->_llt.matrixL().adjoint().solveInPlace(alpha);
// K^{-1} using Cholesky decomposition
Eigen::MatrixXd K = Eigen::MatrixXd::Identity(n, n);
this->_llt.matrixL().solveInPlace(K);
this->_llt.matrixL().transpose().solveInPlace(K);
// alpha * alpha.transpose() - K^{-1}
Eigen::MatrixXd w = alpha * alpha.transpose() - K;
// only compute half of the matrix (symmetrical matrix)
Eigen::VectorXd grad =
Eigen::VectorXd::Zero(this->_kernel_function.h_params_size()+this->_mean_function.h_params_size());
for (size_t i = 0; i < n; ++i) {
for (size_t j = 0; j <= i; ++j) {
Eigen::VectorXd g = this->_kernel_function.grad(this->_samples[i], this->_samples[j]);
if (i == j)
grad.head(this->_kernel_function.h_params_size()) += w(i, j) * g * 0.5;
else
grad.head(this->_kernel_function.h_params_size()) += w(i, j) * g;
}
}
for (int i_obs = 0; i_obs < this->_dim_out; ++i_obs)
for (size_t n_obs=0;n_obs< n;n_obs++)
{
//std::cout<<"mean grad for "<<this->_samples[n_obs].transpose()<<std::endl<<this->_mean_function.grad(this->_samples[n_obs],*this)<<std::endl;
grad.tail(this->_mean_function.h_params_size())+=this->_obs_mean.col(i_obs).transpose() * K.col(n_obs) * this->_mean_function.grad(this->_samples[n_obs],*this).row(i_obs);
}
return grad;
}
protected:
virtual void _optimize_likelihood() {
par::init();
typedef std::pair<Eigen::VectorXd, double> pair_t;
auto body = [ = ](int i) {
// we need a copy because each thread should touch a copy of the GP!
auto gp = *this;
Eigen::VectorXd v = rprop::optimize([&](const Eigen::VectorXd & v) {
return gp.log_likelihood(v);
},
[&](const Eigen::VectorXd & v) {
return gp.log_likelihood_grad(v, false);
},
this->kernel_function().h_params_size()+this->_mean_function.h_params_size(), Params::gp_auto_mean::n_rprop());
double lik = gp.log_likelihood(v);//this->log_likelihood(v);
return std::make_pair(v, lik);
};
auto comp = [](const pair_t& v1, const pair_t& v2) {
return v1.second > v2.second;
};
pair_t init(Eigen::VectorXd::Zero(1), -std::numeric_limits<float>::max());
auto m = par::max(init, Params::gp_auto_mean::rprop_restart(), body, comp);
std::cout << "likelihood:" << m.second << std::endl;
this->_kernel_function.set_h_params(m.first.head(this->_kernel_function.h_params_size()));
this->_mean_function.set_h_params(m.first.tail(this->_mean_function.h_params_size()));
}
};
}
}
#endif
......@@ -27,8 +27,8 @@ namespace limbo {
template<typename F, typename Opt>
void operator()(<