Skip to content
GitLab
Menu
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Matthias Mayr
limbo
Commits
50637658
Commit
50637658
authored
Jun 10, 2018
by
Konstantinos Chatzilygeroudis
Browse files
Adding a few gradient-based optimizers
parent
2bb26473
Changes
4
Hide whitespace changes
Inline
Side-by-side
src/limbo/opt.hpp
View file @
50637658
...
...
@@ -61,6 +61,8 @@
#include
<limbo/opt/nlopt_grad.hpp>
#include
<limbo/opt/nlopt_no_grad.hpp>
#endif
#include
<limbo/opt/adam.hpp>
#include
<limbo/opt/gradient_ascent.hpp>
#include
<limbo/opt/parallel_repeater.hpp>
#include
<limbo/opt/random_point.hpp>
#include
<limbo/opt/rprop.hpp>
...
...
src/limbo/opt/adam.hpp
0 → 100644
View file @
50637658
//| Copyright Inria May 2015
//| This project has received funding from the European Research Council (ERC) under
//| the European Union's Horizon 2020 research and innovation programme (grant
//| agreement No 637972) - see http://www.resibots.eu
//|
//| Contributor(s):
//| - Jean-Baptiste Mouret (jean-baptiste.mouret@inria.fr)
//| - Antoine Cully (antoinecully@gmail.com)
//| - Konstantinos Chatzilygeroudis (konstantinos.chatzilygeroudis@inria.fr)
//| - Federico Allocati (fede.allocati@gmail.com)
//| - Vaios Papaspyros (b.papaspyros@gmail.com)
//| - Roberto Rama (bertoski@gmail.com)
//|
//| This software is a computer library whose purpose is to optimize continuous,
//| black-box functions. It mainly implements Gaussian processes and Bayesian
//| optimization.
//| Main repository: http://github.com/resibots/limbo
//| Documentation: http://www.resibots.eu/limbo
//|
//| This software is governed by the CeCILL-C license under French law and
//| abiding by the rules of distribution of free software. You can use,
//| modify and/ or redistribute the software under the terms of the CeCILL-C
//| license as circulated by CEA, CNRS and INRIA at the following URL
//| "http://www.cecill.info".
//|
//| As a counterpart to the access to the source code and rights to copy,
//| modify and redistribute granted by the license, users are provided only
//| with a limited warranty and the software's author, the holder of the
//| economic rights, and the successive licensors have only limited
//| liability.
//|
//| In this respect, the user's attention is drawn to the risks associated
//| with loading, using, modifying and/or developing or reproducing the
//| software by the user in light of its specific status of free software,
//| that may mean that it is complicated to manipulate, and that also
//| therefore means that it is reserved for developers and experienced
//| professionals having in-depth computer knowledge. Users are therefore
//| encouraged to load and test the software's suitability as regards their
//| requirements in conditions enabling the security of their systems and/or
//| data to be ensured and, more generally, to use and operate it in the
//| same conditions as regards security.
//|
//| The fact that you are presently reading this means that you have had
//| knowledge of the CeCILL-C license and that you accept its terms.
//|
#ifndef LIMBO_OPT_ADAM_HPP
#define LIMBO_OPT_ADAM_HPP
#include
<algorithm>
#include
<Eigen/Core>
#include
<limbo/opt/optimizer.hpp>
#include
<limbo/tools/macros.hpp>
#include
<limbo/tools/math.hpp>
namespace
limbo
{
namespace
defaults
{
struct
opt_adam
{
/// @ingroup opt_defaults
/// number of max iterations
BO_PARAM
(
int
,
iterations
,
300
);
/// alpha - learning rate
BO_PARAM
(
double
,
alpha
,
0.001
);
/// β1
BO_PARAM
(
double
,
b1
,
0.9
);
/// β2
BO_PARAM
(
double
,
b2
,
0.999
);
/// norm epsilon for stopping
BO_PARAM
(
double
,
eps_stop
,
0.0
);
};
}
// namespace defaults
namespace
opt
{
/// @ingroup opt
/// Adam optimizer
/// Equations from: http://ruder.io/optimizing-gradient-descent/index.html#gradientdescentoptimizationalgorithms
/// (I changed a bit the notation; η to α)
///
/// Parameters:
/// - int iterations
/// - double alpha
/// - double b1
/// - double b2
/// - double eps_stop
template
<
typename
Params
>
struct
Adam
{
template
<
typename
F
>
Eigen
::
VectorXd
operator
()(
const
F
&
f
,
const
Eigen
::
VectorXd
&
init
,
bool
bounded
)
const
{
assert
(
Params
::
opt_adam
::
b1
()
>=
0.
&&
Params
::
opt_adam
::
b1
()
<
1.
);
assert
(
Params
::
opt_adam
::
b2
()
>=
0.
&&
Params
::
opt_adam
::
b2
()
<
1.
);
assert
(
Params
::
opt_adam
::
alpha
()
>=
0.
);
size_t
param_dim
=
init
.
size
();
double
b1
=
Params
::
opt_adam
::
b1
();
double
b2
=
Params
::
opt_adam
::
b2
();
double
b1_t
=
b1
;
double
b2_t
=
b2
;
double
alpha
=
Params
::
opt_adam
::
alpha
();
double
stop
=
Params
::
opt_adam
::
eps_stop
();
double
epsilon
=
1e-8
;
Eigen
::
VectorXd
m
=
Eigen
::
VectorXd
::
Zero
(
param_dim
);
Eigen
::
VectorXd
v
=
Eigen
::
VectorXd
::
Zero
(
param_dim
);
Eigen
::
VectorXd
params
=
init
;
if
(
bounded
)
{
for
(
int
j
=
0
;
j
<
params
.
size
();
j
++
)
{
if
(
params
(
j
)
<
0
)
params
(
j
)
=
0
;
if
(
params
(
j
)
>
1
)
params
(
j
)
=
1
;
}
}
for
(
int
i
=
0
;
i
<
Params
::
opt_adam
::
iterations
();
++
i
)
{
Eigen
::
VectorXd
prev_params
=
params
;
auto
perf
=
opt
::
eval_grad
(
f
,
params
);
Eigen
::
VectorXd
grad
=
opt
::
grad
(
perf
);
m
=
b1
*
m
.
array
()
+
(
1.
-
b1
)
*
grad
.
array
();
v
=
b2
*
v
.
array
()
+
(
1.
-
b2
)
*
grad
.
array
().
square
();
Eigen
::
VectorXd
m_hat
=
m
.
array
()
/
(
1.
-
b1_t
);
Eigen
::
VectorXd
v_hat
=
v
.
array
()
/
(
1.
-
b2_t
);
params
.
array
()
+=
alpha
*
m_hat
.
array
()
/
(
v_hat
.
array
().
sqrt
()
+
epsilon
);
b1_t
*=
b1
;
b2_t
*=
b2
;
if
(
bounded
)
{
for
(
int
j
=
0
;
j
<
params
.
size
();
j
++
)
{
if
(
params
(
j
)
<
0
)
params
(
j
)
=
0
;
if
(
params
(
j
)
>
1
)
params
(
j
)
=
1
;
}
}
if
((
prev_params
-
params
).
norm
()
<
stop
)
break
;
}
return
params
;
}
};
}
// namespace opt
}
// namespace limbo
#endif
src/limbo/opt/gradient_ascent.hpp
0 → 100644
View file @
50637658
//| Copyright Inria May 2015
//| This project has received funding from the European Research Council (ERC) under
//| the European Union's Horizon 2020 research and innovation programme (grant
//| agreement No 637972) - see http://www.resibots.eu
//|
//| Contributor(s):
//| - Jean-Baptiste Mouret (jean-baptiste.mouret@inria.fr)
//| - Antoine Cully (antoinecully@gmail.com)
//| - Konstantinos Chatzilygeroudis (konstantinos.chatzilygeroudis@inria.fr)
//| - Federico Allocati (fede.allocati@gmail.com)
//| - Vaios Papaspyros (b.papaspyros@gmail.com)
//| - Roberto Rama (bertoski@gmail.com)
//|
//| This software is a computer library whose purpose is to optimize continuous,
//| black-box functions. It mainly implements Gaussian processes and Bayesian
//| optimization.
//| Main repository: http://github.com/resibots/limbo
//| Documentation: http://www.resibots.eu/limbo
//|
//| This software is governed by the CeCILL-C license under French law and
//| abiding by the rules of distribution of free software. You can use,
//| modify and/ or redistribute the software under the terms of the CeCILL-C
//| license as circulated by CEA, CNRS and INRIA at the following URL
//| "http://www.cecill.info".
//|
//| As a counterpart to the access to the source code and rights to copy,
//| modify and redistribute granted by the license, users are provided only
//| with a limited warranty and the software's author, the holder of the
//| economic rights, and the successive licensors have only limited
//| liability.
//|
//| In this respect, the user's attention is drawn to the risks associated
//| with loading, using, modifying and/or developing or reproducing the
//| software by the user in light of its specific status of free software,
//| that may mean that it is complicated to manipulate, and that also
//| therefore means that it is reserved for developers and experienced
//| professionals having in-depth computer knowledge. Users are therefore
//| encouraged to load and test the software's suitability as regards their
//| requirements in conditions enabling the security of their systems and/or
//| data to be ensured and, more generally, to use and operate it in the
//| same conditions as regards security.
//|
//| The fact that you are presently reading this means that you have had
//| knowledge of the CeCILL-C license and that you accept its terms.
//|
#ifndef LIMBO_OPT_GRADIENT_ASCENT_HPP
#define LIMBO_OPT_GRADIENT_ASCENT_HPP
#include
<algorithm>
#include
<Eigen/Core>
#include
<limbo/opt/optimizer.hpp>
#include
<limbo/tools/macros.hpp>
#include
<limbo/tools/math.hpp>
namespace
limbo
{
namespace
defaults
{
struct
opt_gradient_ascent
{
/// @ingroup opt_defaults
/// number of max iterations
BO_PARAM
(
int
,
iterations
,
300
);
/// alpha - learning rate
BO_PARAM
(
double
,
alpha
,
0.001
);
/// gamma - for momentum
BO_PARAM
(
double
,
gamma
,
0.0
);
/// nesterov momentum; turn on/off
BO_PARAM
(
bool
,
nesterov
,
false
);
/// norm epsilon for stopping
BO_PARAM
(
double
,
eps_stop
,
0.0
);
};
}
// namespace defaults
namespace
opt
{
/// @ingroup opt
/// Gradient Ascent with or without momentum (Nesterov or simple)
/// Equations from: http://ruder.io/optimizing-gradient-descent/index.html#gradientdescentoptimizationalgorithms
/// (I changed a bit the notation; η to α)
///
/// Parameters:
/// - int iterations
/// - double alpha
/// - double gamma
/// - bool nesterov
/// - double eps_stop
template
<
typename
Params
>
struct
GradientAscent
{
template
<
typename
F
>
Eigen
::
VectorXd
operator
()(
const
F
&
f
,
const
Eigen
::
VectorXd
&
init
,
bool
bounded
)
const
{
assert
(
Params
::
opt_gradient_ascent
::
gamma
()
>=
0.
&&
Params
::
opt_gradient_ascent
::
gamma
()
<
1.
);
assert
(
Params
::
opt_gradient_ascent
::
alpha
()
>=
0.
);
size_t
param_dim
=
init
.
size
();
double
gamma
=
Params
::
opt_gradient_ascent
::
gamma
();
double
alpha
=
Params
::
opt_gradient_ascent
::
alpha
();
double
stop
=
Params
::
opt_gradient_ascent
::
eps_stop
();
bool
is_nesterov
=
Params
::
opt_gradient_ascent
::
nesterov
();
Eigen
::
VectorXd
v
=
Eigen
::
VectorXd
::
Zero
(
param_dim
);
Eigen
::
VectorXd
params
=
init
;
if
(
bounded
)
{
for
(
int
j
=
0
;
j
<
params
.
size
();
j
++
)
{
if
(
params
(
j
)
<
0
)
params
(
j
)
=
0
;
if
(
params
(
j
)
>
1
)
params
(
j
)
=
1
;
}
}
for
(
int
i
=
0
;
i
<
Params
::
opt_gradient_ascent
::
iterations
();
++
i
)
{
Eigen
::
VectorXd
prev_params
=
params
;
Eigen
::
VectorXd
query_params
=
params
;
// if Nesterov momentum, change query parameters
if
(
is_nesterov
)
{
query_params
.
array
()
+=
gamma
*
v
.
array
();
// make sure that the parameters are still in bounds, if needed
if
(
bounded
)
{
for
(
int
j
=
0
;
j
<
query_params
.
size
();
j
++
)
{
if
(
query_params
(
j
)
<
0
)
query_params
(
j
)
=
0
;
if
(
query_params
(
j
)
>
1
)
query_params
(
j
)
=
1
;
}
}
}
auto
perf
=
opt
::
eval_grad
(
f
,
query_params
);
Eigen
::
VectorXd
grad
=
opt
::
grad
(
perf
);
v
=
gamma
*
v
.
array
()
+
alpha
*
grad
.
array
();
params
.
array
()
+=
v
.
array
();
if
(
bounded
)
{
for
(
int
j
=
0
;
j
<
params
.
size
();
j
++
)
{
if
(
params
(
j
)
<
0
)
params
(
j
)
=
0
;
if
(
params
(
j
)
>
1
)
params
(
j
)
=
1
;
}
}
if
((
prev_params
-
params
).
norm
()
<
stop
)
break
;
}
return
params
;
}
};
}
// namespace opt
}
// namespace limbo
#endif
src/tests/test_optimizers.cpp
View file @
50637658
...
...
@@ -48,8 +48,10 @@
#include
<boost/test/unit_test.hpp>
#include
<limbo/opt/adam.hpp>
#include
<limbo/opt/chained.hpp>
#include
<limbo/opt/cmaes.hpp>
#include
<limbo/opt/gradient_ascent.hpp>
#include
<limbo/opt/grid_search.hpp>
#include
<limbo/opt/parallel_repeater.hpp>
#include
<limbo/opt/random_point.hpp>
...
...
@@ -71,6 +73,16 @@ struct Params {
struct
opt_rprop
:
public
defaults
::
opt_rprop
{
BO_PARAM
(
int
,
iterations
,
150
);
};
struct
opt_gradient_ascent
:
public
defaults
::
opt_gradient_ascent
{
BO_PARAM
(
int
,
iterations
,
150
);
BO_PARAM
(
double
,
alpha
,
0.1
);
};
struct
opt_adam
:
public
defaults
::
opt_adam
{
BO_PARAM
(
int
,
iterations
,
150
);
BO_PARAM
(
double
,
alpha
,
0.1
);
};
};
// test with a standard function
...
...
@@ -181,6 +193,84 @@ BOOST_AUTO_TEST_CASE(test_gradient)
BOOST_CHECK_EQUAL
(
simple_calls
,
Params
::
opt_rprop
::
iterations
());
}
BOOST_AUTO_TEST_CASE
(
test_classic_optimizers
)
{
using
namespace
limbo
;
struct
MomentumParams
{
struct
opt_gradient_ascent
:
public
defaults
::
opt_gradient_ascent
{
BO_PARAM
(
int
,
iterations
,
150
);
BO_PARAM
(
double
,
alpha
,
0.1
);
BO_PARAM
(
double
,
gamma
,
0.8
);
};
};
struct
NesterovParams
{
struct
opt_gradient_ascent
:
public
defaults
::
opt_gradient_ascent
{
BO_PARAM
(
int
,
iterations
,
150
);
BO_PARAM
(
double
,
alpha
,
0.1
);
BO_PARAM
(
double
,
gamma
,
0.8
);
BO_PARAM
(
bool
,
nesterov
,
true
);
};
};
opt
::
Rprop
<
Params
>
rprop
;
opt
::
Adam
<
Params
>
adam
;
opt
::
GradientAscent
<
Params
>
gradient_ascent
;
opt
::
GradientAscent
<
MomentumParams
>
gradient_ascent_momentum
;
opt
::
GradientAscent
<
NesterovParams
>
gradient_ascent_nesterov
;
simple_calls
=
0
;
check_grad
=
true
;
Eigen
::
VectorXd
best_point
=
rprop
(
simple_func
,
Eigen
::
VectorXd
::
Constant
(
1
,
2.0
),
false
);
BOOST_CHECK_EQUAL
(
best_point
.
size
(),
1
);
BOOST_CHECK
(
std
::
abs
(
best_point
(
0
)
+
1.
)
<
1e-3
);
BOOST_CHECK_EQUAL
(
simple_calls
,
Params
::
opt_rprop
::
iterations
());
double
best_rprop
=
best_point
(
0
);
simple_calls
=
0
;
check_grad
=
true
;
best_point
=
gradient_ascent
(
simple_func
,
Eigen
::
VectorXd
::
Constant
(
1
,
2.0
),
false
);
BOOST_CHECK_EQUAL
(
best_point
.
size
(),
1
);
BOOST_CHECK
(
std
::
abs
(
best_point
(
0
)
+
1.
)
<
1e-3
);
BOOST_CHECK_EQUAL
(
simple_calls
,
Params
::
opt_gradient_ascent
::
iterations
());
double
best_gradient_ascent
=
best_point
(
0
);
simple_calls
=
0
;
check_grad
=
true
;
best_point
=
gradient_ascent_momentum
(
simple_func
,
Eigen
::
VectorXd
::
Constant
(
1
,
2.0
),
false
);
BOOST_CHECK_EQUAL
(
best_point
.
size
(),
1
);
BOOST_CHECK
(
std
::
abs
(
best_point
(
0
)
+
1.
)
<
1e-3
);
BOOST_CHECK_EQUAL
(
simple_calls
,
MomentumParams
::
opt_gradient_ascent
::
iterations
());
double
best_gradient_ascent_momentum
=
best_point
(
0
);
simple_calls
=
0
;
check_grad
=
true
;
best_point
=
gradient_ascent_nesterov
(
simple_func
,
Eigen
::
VectorXd
::
Constant
(
1
,
2.0
),
false
);
BOOST_CHECK_EQUAL
(
best_point
.
size
(),
1
);
BOOST_CHECK
(
std
::
abs
(
best_point
(
0
)
+
1.
)
<
1e-3
);
BOOST_CHECK_EQUAL
(
simple_calls
,
NesterovParams
::
opt_gradient_ascent
::
iterations
());
double
best_gradient_ascent_nesterov
=
best_point
(
0
);
simple_calls
=
0
;
check_grad
=
true
;
best_point
=
adam
(
simple_func
,
Eigen
::
VectorXd
::
Constant
(
1
,
2.0
),
false
);
BOOST_CHECK_EQUAL
(
best_point
.
size
(),
1
);
BOOST_CHECK
(
std
::
abs
(
best_point
(
0
)
+
1.
)
<
1e-3
);
BOOST_CHECK_EQUAL
(
simple_calls
,
Params
::
opt_adam
::
iterations
());
double
best_adam
=
best_point
(
0
);
BOOST_CHECK
(
std
::
abs
(
best_rprop
-
best_gradient_ascent
)
<
1e-3
);
BOOST_CHECK
(
std
::
abs
(
best_rprop
-
best_gradient_ascent_momentum
)
<
1e-3
);
BOOST_CHECK
(
std
::
abs
(
best_rprop
-
best_gradient_ascent_nesterov
)
<
1e-3
);
BOOST_CHECK
(
std
::
abs
(
best_rprop
-
best_adam
)
<
1e-3
);
}
BOOST_AUTO_TEST_CASE
(
test_parallel_repeater
)
{
#ifdef USE_TBB
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment