Unverified Commit 6afca247 authored by Konstantinos Chatzilygeroudis's avatar Konstantinos Chatzilygeroudis Committed by GitHub

Merge pull request #271 from resibots/optimizers

Adding a few gradient-based optimizers
parents f215c7eb 50637658
......@@ -61,6 +61,8 @@
#include <limbo/opt/nlopt_grad.hpp>
#include <limbo/opt/nlopt_no_grad.hpp>
#endif
#include <limbo/opt/adam.hpp>
#include <limbo/opt/gradient_ascent.hpp>
#include <limbo/opt/parallel_repeater.hpp>
#include <limbo/opt/random_point.hpp>
#include <limbo/opt/rprop.hpp>
......
//| Copyright Inria May 2015
//| This project has received funding from the European Research Council (ERC) under
//| the European Union's Horizon 2020 research and innovation programme (grant
//| agreement No 637972) - see http://www.resibots.eu
//|
//| Contributor(s):
//| - Jean-Baptiste Mouret (jean-baptiste.mouret@inria.fr)
//| - Antoine Cully (antoinecully@gmail.com)
//| - Konstantinos Chatzilygeroudis (konstantinos.chatzilygeroudis@inria.fr)
//| - Federico Allocati (fede.allocati@gmail.com)
//| - Vaios Papaspyros (b.papaspyros@gmail.com)
//| - Roberto Rama (bertoski@gmail.com)
//|
//| This software is a computer library whose purpose is to optimize continuous,
//| black-box functions. It mainly implements Gaussian processes and Bayesian
//| optimization.
//| Main repository: http://github.com/resibots/limbo
//| Documentation: http://www.resibots.eu/limbo
//|
//| This software is governed by the CeCILL-C license under French law and
//| abiding by the rules of distribution of free software. You can use,
//| modify and/ or redistribute the software under the terms of the CeCILL-C
//| license as circulated by CEA, CNRS and INRIA at the following URL
//| "http://www.cecill.info".
//|
//| As a counterpart to the access to the source code and rights to copy,
//| modify and redistribute granted by the license, users are provided only
//| with a limited warranty and the software's author, the holder of the
//| economic rights, and the successive licensors have only limited
//| liability.
//|
//| In this respect, the user's attention is drawn to the risks associated
//| with loading, using, modifying and/or developing or reproducing the
//| software by the user in light of its specific status of free software,
//| that may mean that it is complicated to manipulate, and that also
//| therefore means that it is reserved for developers and experienced
//| professionals having in-depth computer knowledge. Users are therefore
//| encouraged to load and test the software's suitability as regards their
//| requirements in conditions enabling the security of their systems and/or
//| data to be ensured and, more generally, to use and operate it in the
//| same conditions as regards security.
//|
//| The fact that you are presently reading this means that you have had
//| knowledge of the CeCILL-C license and that you accept its terms.
//|
#ifndef LIMBO_OPT_ADAM_HPP
#define LIMBO_OPT_ADAM_HPP
#include <algorithm>
#include <Eigen/Core>
#include <limbo/opt/optimizer.hpp>
#include <limbo/tools/macros.hpp>
#include <limbo/tools/math.hpp>
namespace limbo {
namespace defaults {
struct opt_adam {
/// @ingroup opt_defaults
/// number of max iterations
BO_PARAM(int, iterations, 300);
/// alpha - learning rate
BO_PARAM(double, alpha, 0.001);
/// β1
BO_PARAM(double, b1, 0.9);
/// β2
BO_PARAM(double, b2, 0.999);
/// norm epsilon for stopping
BO_PARAM(double, eps_stop, 0.0);
};
} // namespace defaults
namespace opt {
/// @ingroup opt
/// Adam optimizer
/// Equations from: http://ruder.io/optimizing-gradient-descent/index.html#gradientdescentoptimizationalgorithms
/// (I changed a bit the notation; η to α)
///
/// Parameters:
/// - int iterations
/// - double alpha
/// - double b1
/// - double b2
/// - double eps_stop
template <typename Params>
struct Adam {
template <typename F>
Eigen::VectorXd operator()(const F& f, const Eigen::VectorXd& init, bool bounded) const
{
assert(Params::opt_adam::b1() >= 0. && Params::opt_adam::b1() < 1.);
assert(Params::opt_adam::b2() >= 0. && Params::opt_adam::b2() < 1.);
assert(Params::opt_adam::alpha() >= 0.);
size_t param_dim = init.size();
double b1 = Params::opt_adam::b1();
double b2 = Params::opt_adam::b2();
double b1_t = b1;
double b2_t = b2;
double alpha = Params::opt_adam::alpha();
double stop = Params::opt_adam::eps_stop();
double epsilon = 1e-8;
Eigen::VectorXd m = Eigen::VectorXd::Zero(param_dim);
Eigen::VectorXd v = Eigen::VectorXd::Zero(param_dim);
Eigen::VectorXd params = init;
if (bounded) {
for (int j = 0; j < params.size(); j++) {
if (params(j) < 0)
params(j) = 0;
if (params(j) > 1)
params(j) = 1;
}
}
for (int i = 0; i < Params::opt_adam::iterations(); ++i) {
Eigen::VectorXd prev_params = params;
auto perf = opt::eval_grad(f, params);
Eigen::VectorXd grad = opt::grad(perf);
m = b1 * m.array() + (1. - b1) * grad.array();
v = b2 * v.array() + (1. - b2) * grad.array().square();
Eigen::VectorXd m_hat = m.array() / (1. - b1_t);
Eigen::VectorXd v_hat = v.array() / (1. - b2_t);
params.array() += alpha * m_hat.array() / (v_hat.array().sqrt() + epsilon);
b1_t *= b1;
b2_t *= b2;
if (bounded) {
for (int j = 0; j < params.size(); j++) {
if (params(j) < 0)
params(j) = 0;
if (params(j) > 1)
params(j) = 1;
}
}
if ((prev_params - params).norm() < stop)
break;
}
return params;
}
};
} // namespace opt
} // namespace limbo
#endif
//| Copyright Inria May 2015
//| This project has received funding from the European Research Council (ERC) under
//| the European Union's Horizon 2020 research and innovation programme (grant
//| agreement No 637972) - see http://www.resibots.eu
//|
//| Contributor(s):
//| - Jean-Baptiste Mouret (jean-baptiste.mouret@inria.fr)
//| - Antoine Cully (antoinecully@gmail.com)
//| - Konstantinos Chatzilygeroudis (konstantinos.chatzilygeroudis@inria.fr)
//| - Federico Allocati (fede.allocati@gmail.com)
//| - Vaios Papaspyros (b.papaspyros@gmail.com)
//| - Roberto Rama (bertoski@gmail.com)
//|
//| This software is a computer library whose purpose is to optimize continuous,
//| black-box functions. It mainly implements Gaussian processes and Bayesian
//| optimization.
//| Main repository: http://github.com/resibots/limbo
//| Documentation: http://www.resibots.eu/limbo
//|
//| This software is governed by the CeCILL-C license under French law and
//| abiding by the rules of distribution of free software. You can use,
//| modify and/ or redistribute the software under the terms of the CeCILL-C
//| license as circulated by CEA, CNRS and INRIA at the following URL
//| "http://www.cecill.info".
//|
//| As a counterpart to the access to the source code and rights to copy,
//| modify and redistribute granted by the license, users are provided only
//| with a limited warranty and the software's author, the holder of the
//| economic rights, and the successive licensors have only limited
//| liability.
//|
//| In this respect, the user's attention is drawn to the risks associated
//| with loading, using, modifying and/or developing or reproducing the
//| software by the user in light of its specific status of free software,
//| that may mean that it is complicated to manipulate, and that also
//| therefore means that it is reserved for developers and experienced
//| professionals having in-depth computer knowledge. Users are therefore
//| encouraged to load and test the software's suitability as regards their
//| requirements in conditions enabling the security of their systems and/or
//| data to be ensured and, more generally, to use and operate it in the
//| same conditions as regards security.
//|
//| The fact that you are presently reading this means that you have had
//| knowledge of the CeCILL-C license and that you accept its terms.
//|
#ifndef LIMBO_OPT_GRADIENT_ASCENT_HPP
#define LIMBO_OPT_GRADIENT_ASCENT_HPP
#include <algorithm>
#include <Eigen/Core>
#include <limbo/opt/optimizer.hpp>
#include <limbo/tools/macros.hpp>
#include <limbo/tools/math.hpp>
namespace limbo {
namespace defaults {
struct opt_gradient_ascent {
/// @ingroup opt_defaults
/// number of max iterations
BO_PARAM(int, iterations, 300);
/// alpha - learning rate
BO_PARAM(double, alpha, 0.001);
/// gamma - for momentum
BO_PARAM(double, gamma, 0.0);
/// nesterov momentum; turn on/off
BO_PARAM(bool, nesterov, false);
/// norm epsilon for stopping
BO_PARAM(double, eps_stop, 0.0);
};
} // namespace defaults
namespace opt {
/// @ingroup opt
/// Gradient Ascent with or without momentum (Nesterov or simple)
/// Equations from: http://ruder.io/optimizing-gradient-descent/index.html#gradientdescentoptimizationalgorithms
/// (I changed a bit the notation; η to α)
///
/// Parameters:
/// - int iterations
/// - double alpha
/// - double gamma
/// - bool nesterov
/// - double eps_stop
template <typename Params>
struct GradientAscent {
template <typename F>
Eigen::VectorXd operator()(const F& f, const Eigen::VectorXd& init, bool bounded) const
{
assert(Params::opt_gradient_ascent::gamma() >= 0. && Params::opt_gradient_ascent::gamma() < 1.);
assert(Params::opt_gradient_ascent::alpha() >= 0.);
size_t param_dim = init.size();
double gamma = Params::opt_gradient_ascent::gamma();
double alpha = Params::opt_gradient_ascent::alpha();
double stop = Params::opt_gradient_ascent::eps_stop();
bool is_nesterov = Params::opt_gradient_ascent::nesterov();
Eigen::VectorXd v = Eigen::VectorXd::Zero(param_dim);
Eigen::VectorXd params = init;
if (bounded) {
for (int j = 0; j < params.size(); j++) {
if (params(j) < 0)
params(j) = 0;
if (params(j) > 1)
params(j) = 1;
}
}
for (int i = 0; i < Params::opt_gradient_ascent::iterations(); ++i) {
Eigen::VectorXd prev_params = params;
Eigen::VectorXd query_params = params;
// if Nesterov momentum, change query parameters
if (is_nesterov) {
query_params.array() += gamma * v.array();
// make sure that the parameters are still in bounds, if needed
if (bounded) {
for (int j = 0; j < query_params.size(); j++) {
if (query_params(j) < 0)
query_params(j) = 0;
if (query_params(j) > 1)
query_params(j) = 1;
}
}
}
auto perf = opt::eval_grad(f, query_params);
Eigen::VectorXd grad = opt::grad(perf);
v = gamma * v.array() + alpha * grad.array();
params.array() += v.array();
if (bounded) {
for (int j = 0; j < params.size(); j++) {
if (params(j) < 0)
params(j) = 0;
if (params(j) > 1)
params(j) = 1;
}
}
if ((prev_params - params).norm() < stop)
break;
}
return params;
}
};
} // namespace opt
} // namespace limbo
#endif
......@@ -48,8 +48,10 @@
#include <boost/test/unit_test.hpp>
#include <limbo/opt/adam.hpp>
#include <limbo/opt/chained.hpp>
#include <limbo/opt/cmaes.hpp>
#include <limbo/opt/gradient_ascent.hpp>
#include <limbo/opt/grid_search.hpp>
#include <limbo/opt/parallel_repeater.hpp>
#include <limbo/opt/random_point.hpp>
......@@ -71,6 +73,16 @@ struct Params {
struct opt_rprop : public defaults::opt_rprop {
BO_PARAM(int, iterations, 150);
};
struct opt_gradient_ascent : public defaults::opt_gradient_ascent {
BO_PARAM(int, iterations, 150);
BO_PARAM(double, alpha, 0.1);
};
struct opt_adam : public defaults::opt_adam {
BO_PARAM(int, iterations, 150);
BO_PARAM(double, alpha, 0.1);
};
};
// test with a standard function
......@@ -181,6 +193,84 @@ BOOST_AUTO_TEST_CASE(test_gradient)
BOOST_CHECK_EQUAL(simple_calls, Params::opt_rprop::iterations());
}
BOOST_AUTO_TEST_CASE(test_classic_optimizers)
{
using namespace limbo;
struct MomentumParams {
struct opt_gradient_ascent : public defaults::opt_gradient_ascent {
BO_PARAM(int, iterations, 150);
BO_PARAM(double, alpha, 0.1);
BO_PARAM(double, gamma, 0.8);
};
};
struct NesterovParams {
struct opt_gradient_ascent : public defaults::opt_gradient_ascent {
BO_PARAM(int, iterations, 150);
BO_PARAM(double, alpha, 0.1);
BO_PARAM(double, gamma, 0.8);
BO_PARAM(bool, nesterov, true);
};
};
opt::Rprop<Params> rprop;
opt::Adam<Params> adam;
opt::GradientAscent<Params> gradient_ascent;
opt::GradientAscent<MomentumParams> gradient_ascent_momentum;
opt::GradientAscent<NesterovParams> gradient_ascent_nesterov;
simple_calls = 0;
check_grad = true;
Eigen::VectorXd best_point = rprop(simple_func, Eigen::VectorXd::Constant(1, 2.0), false);
BOOST_CHECK_EQUAL(best_point.size(), 1);
BOOST_CHECK(std::abs(best_point(0) + 1.) < 1e-3);
BOOST_CHECK_EQUAL(simple_calls, Params::opt_rprop::iterations());
double best_rprop = best_point(0);
simple_calls = 0;
check_grad = true;
best_point = gradient_ascent(simple_func, Eigen::VectorXd::Constant(1, 2.0), false);
BOOST_CHECK_EQUAL(best_point.size(), 1);
BOOST_CHECK(std::abs(best_point(0) + 1.) < 1e-3);
BOOST_CHECK_EQUAL(simple_calls, Params::opt_gradient_ascent::iterations());
double best_gradient_ascent = best_point(0);
simple_calls = 0;
check_grad = true;
best_point = gradient_ascent_momentum(simple_func, Eigen::VectorXd::Constant(1, 2.0), false);
BOOST_CHECK_EQUAL(best_point.size(), 1);
BOOST_CHECK(std::abs(best_point(0) + 1.) < 1e-3);
BOOST_CHECK_EQUAL(simple_calls, MomentumParams::opt_gradient_ascent::iterations());
double best_gradient_ascent_momentum = best_point(0);
simple_calls = 0;
check_grad = true;
best_point = gradient_ascent_nesterov(simple_func, Eigen::VectorXd::Constant(1, 2.0), false);
BOOST_CHECK_EQUAL(best_point.size(), 1);
BOOST_CHECK(std::abs(best_point(0) + 1.) < 1e-3);
BOOST_CHECK_EQUAL(simple_calls, NesterovParams::opt_gradient_ascent::iterations());
double best_gradient_ascent_nesterov = best_point(0);
simple_calls = 0;
check_grad = true;
best_point = adam(simple_func, Eigen::VectorXd::Constant(1, 2.0), false);
BOOST_CHECK_EQUAL(best_point.size(), 1);
BOOST_CHECK(std::abs(best_point(0) + 1.) < 1e-3);
BOOST_CHECK_EQUAL(simple_calls, Params::opt_adam::iterations());
double best_adam = best_point(0);
BOOST_CHECK(std::abs(best_rprop - best_gradient_ascent) < 1e-3);
BOOST_CHECK(std::abs(best_rprop - best_gradient_ascent_momentum) < 1e-3);
BOOST_CHECK(std::abs(best_rprop - best_gradient_ascent_nesterov) < 1e-3);
BOOST_CHECK(std::abs(best_rprop - best_adam) < 1e-3);
}
BOOST_AUTO_TEST_CASE(test_parallel_repeater)
{
#ifdef USE_TBB
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment