Commit 325948f4 authored by Jean-Baptiste Mouret's avatar Jean-Baptiste Mouret
Browse files

Multi-objective optimization with Expected Improvement of the HyperVolume (ehvi)

parent 1b2f786e
#include "limbo/limbo.hpp"
#include "limbo/parego.hpp"
#include "limbo/ehvi.hpp"
#include "limbo/nsbo.hpp"
using namespace limbo;
......@@ -108,7 +110,7 @@ namespace limbo {
template<typename BO>
void operator()(BO& opt) {
opt.update_pareto_data();
#ifdef PAREGO // this is already done is NSBO
#ifndef NSBO // this is already done is NSBO
opt.template update_pareto_model<F::dim>();
#endif
auto dir = opt.res_dir() + "/";
......@@ -137,7 +139,7 @@ namespace limbo {
obs << opt.observations()[i].transpose() << " "
<< opt.samples()[i].transpose()
<< std::endl;
/*
std::string m1 = "model_" + it + ".dat";
std::ofstream m1f(m1.c_str());
for (float x = 0; x < 1; x += 0.01)
......@@ -151,6 +153,8 @@ namespace limbo {
<< opt.models()[1].sigma(v) << std::endl;
}
*/
std::cout<<"stats done"<<std::endl;
}
};
}
......@@ -174,7 +178,13 @@ int main() {
#endif
typedef stat::ParetoBenchmark<func_t> stat_t;
#ifdef PAREGO
Parego<Params, stat_fun<stat_t> > opt;
#elif defined(NSBO)
Nsbo<Params, stat_fun<stat_t> > opt;
#elif defined(EHVI)
Ehvi<Params, stat_fun<stat_t> > opt;
#endif
opt.optimize(func_t());
return 0;
......
......@@ -6,10 +6,15 @@ def build(bld):
source = 'multi.cpp',
uselib_local = 'limbo',
uselib = 'BOOST EIGEN TBB SFERES',
variants = ['PAREGO ZDT1 DIM30',
'PAREGO ZDT2 DIM30',
'PAREGO ZDT3 DIM30',
'PAREGO ZDT1 DIM30',
variants = ['PAREGO ZDT2 DIM2',
'PAREGO ZDT2 DIM6',
'PAREGO ZDT3 DIM6',
],)
'PAREGO MOP2',
# NSBO
'NSBO ZDT2 DIM2',
'NSBO ZDT2 DIM6',
'NSBO MOP2',
# EHVI
'EHVI ZDT2 DIM6',
'EHVI ZDT2 DIM2',
'EHVI MOP2'
],)
MANUAL to software EHVI (c) Iris Hupkens, 2013
===================================================================
Implementation of algorithms first described in:
Iris Hupkens: Complexity Reduction and Validation of Computing the
Expected Hypervolume Improvement, Master's Thesis (with honors)
published as LIACS, Leiden University, The Netherlands, Internal
Report 2013-12, August, 2013
\url{http://www.liacs.nl/assets/Masterscripties/2013-12IHupkens.pdf}
Supervisors: Dr. Michael T.M. Emmerich, Dr. André H. Deutz
The code is organized as follows:
- ehvi_sliceupdate.cc contains the implementation of the
slice-update scheme
- ehvi_montecarlo.cc contains the implementation of the Monte Carlo
integration scheme
- ehvi_calculations.cc contains the implementations of the other
schemes (2-term, 5-term and 8-term) as well as the implementation
of the 2-dimensional calculation scheme.
-ehvi_multi.cc contains special implementations of the 5-term and
slice-update schemes which calculate the EHVI for multiple
individuals at the same time.
The files helper.cc and ehvi_hvol.cc contain functions which are
used by multiple update schemes. Hypervolume calculations are
implemented in ehvi_hvol.cc and the rest (such as the psi function)
is in helper.cc. There is also a file ehvi_consts.h which allows
the seed of the random number generator and the number of Monte
Carlo iterations to be changed.
The functions in these files can be used directly in C++ code, but
main.cc contains facilities to use it as a stand-alone command-line
application. To use it in this way, the software can be compiled
with gcc by unzipping the code into a directory and using the
command:
g++ -O3 -o EHVI -std=c++0x *.cc
If compiling it in a directory which also contains other .cc files
is desired, the following can be used instead:
g++ -O3 -o EHVI -std=c++0x main.cc helper.cc ehvi_sliceupdate.cc \
ehvi_multi.cc ehvi_montecarlo.cc ehvi_hvol.cc ehvi_calculations.cc
Running the software without command line arguments will allow a
test case to be entered into the terminal (with an arbitrary
population size and 1 candidate individual), which is then run
through the available calculation schemes.
Providing the name of a file as an argument will load that file and
perform EHVI calculations on it. The file should consist of the
following:
- A single integer representing \emph{n}
- n*3 floating point numbers representing the coordinates of P.
- 3 floating point numbers representing r
- An arbitrary number of individuals, represented by first 3
coordinates of their mean value, and then the 3 values of their
standard deviation in each dimension
No other text should be present in the file. All numbers should be
divided by whitespace, and additional whitespace is ignored.
The default scheme used will be the slice-update scheme, but other
schemes can be specified as arguments after the filename. The list
of possible schemes to request is:
2term 5term 8term sliceupdate montecarlo
However, only \emph{5term} and \emph{sliceupdate} can be used if
more than one candidate individual is provided.
4 8 8 2 11 6 7 9 5 8 14 3 9
0 0 0 6 6 6 3 3 3 5 2 4 1 3 6 1 7 2 3 5 3 2 3 5 2 8 3
Then the following command will calculate the EHVI for the four
candidate individuals using the 5-term scheme:
./EHVI multitest.txt 5term
And the output of the program will be:
Loading testcase from file... Calculating with 5-term scheme
(multi-version)... 47.24623199 11.21775781 8.935099634 19.88518203
Only the actual answers are output to stdout while the rest of the
output is written to stderr, making it possible to write the
answers to a file using simple shell commands.
//The exact EHVI calculation schemes.
#include "helper.h"
#include "ehvi_hvol.h"
#include <deque>
#include <algorithm> //sorting
#include <math.h> //INFINITY macro
using namespace std;
//When doing 2d hypervolume calculations, uncomment the following line
//to NOT use O(1) S-minus updates:
//#define NAIVE_DOMPOINTS
//Returns the expected 2d hypervolume improvement of population P with reference
//point r, mean vector mu and standard deviation vector s.
double ehvi2d(deque<individual*> P, double r[], double mu[], double s[]){
sort(P.begin(), P.end(), xcomparator);
double answer = 0; //The eventual answer
int k = P.size(); //Holds amount of points.
#ifdef NAIVE_DOMPOINTS
deque<individual*> dompoints; //For the old-fashioned way.
#endif
double Sminus; //Correction term for the integral.
int Sstart = k-1, Shorizontal = 0;
//See thesis for explanation of how the O(1) iteration complexity
//is reached. NOTE: i = y = f[1], j = x = f[0]
for (int i=0;i<=k;i++){
Sminus = 0;
Shorizontal = Sstart;
for (int j=k-i;j<=k;j++){
double fmax[2]; //staircase width and height
double cl1, cl2, cu1, cu2; //Boundaries of grid cells
if (j == k){
fmax[1] = r[1];
cu1 = INFINITY;
}
else {
fmax[1] = P[j]->f[1];
cu1 = P[j]->f[0];
}
if (i == k){
fmax[0] = r[0];
cu2 = INFINITY;
}
else {
fmax[0] = P[k-i-1]->f[0];
cu2 = P[k-i-1]->f[1];
}
cl1 = (j == 0 ? r[0] : P[j-1]->f[0]);
cl2 = (i == 0 ? r[1] : P[k-i]->f[1]);
//Cell boundaries have been decided. Determine Sminus.
#ifdef NAIVE_DOMPOINTS
dompoints.clear();
for (int m = 0; m < k; m++){
if (cl1 >= P[m]->f[0] && cl2 >= P[m]->f[1]){
dompoints.push_back(P[m]);
}
}
Sminus = calculateS(dompoints, fmax);
#else
if (Shorizontal > Sstart){
Sminus += (P[Shorizontal]->f[0] - fmax[0]) * (P[Shorizontal]->f[1] - fmax[1]);
}
Shorizontal++;
#endif
//And then we integrate.
double psi1 = exipsi(fmax[0],cl1,mu[0],s[0]) - exipsi(fmax[0],cu1,mu[0],s[0]);
double psi2 = exipsi(fmax[1],cl2,mu[1],s[1]) - exipsi(fmax[1],cu2,mu[1],s[1]);
double gausscdf1 = gausscdf((cu1-mu[0])/s[0]) - gausscdf((cl1-mu[0])/s[0]);
double gausscdf2 = gausscdf((cu2-mu[1])/s[1]) - gausscdf((cl2-mu[1])/s[1]);
double sum = (psi1*psi2) - (Sminus*gausscdf1*gausscdf2);
if (sum > 0)
answer += sum;
}
Sstart--;
}
return answer;
}
//Subtracts expected dominated dominated hypervolume from expected dominated hypervolume.
double ehvi3d_2term(deque<individual*> P, double r[], double mu[], double s[]){
double answer = 0; //The eventual answer.
int n = P.size(); //Holds amount of points.
double Sminus; //Correction term for the integral.
deque<individual*> Py, Pz; //P sorted by y/z coordinate
sort(P.begin(), P.end(), ycomparator);
for (int i=0;i<P.size();i++){
Py.push_back(P[i]);
}
sort(P.begin(), P.end(), zcomparator);
for (unsigned int i=0;i<P.size();i++){
Pz.push_back(P[i]);
}
sort(P.begin(), P.end(), xcomparator);
for (int z=0;z<=n;z++){
for (int y=0;y<=n;y++){
for (int x=0;x<=n;x++){
double fmax[3]; //upper corner of hypervolume improvement box
double cl[3], cu[3]; //Boundaries of grid cells
cl[0] = (x == 0 ? r[0] : P[x-1]->f[0]);
cl[1] = (y == 0 ? r[1] : Py[y-1]->f[1]);
cl[2] = (z == 0 ? r[2] : Pz[z-1]->f[2]);
cu[0] = (x == n ? INFINITY : P[x]->f[0]);
cu[1] = (y == n ? INFINITY : Py[y]->f[1]);
cu[2] = (z == n ? INFINITY : Pz[z]->f[2]);
//Calculate expected one-dimensional improvements w.r.t. r
double psi1 = exipsi(r[0],cl[0],mu[0],s[0]) - exipsi(r[0],cu[0],mu[0],s[0]);
double psi2 = exipsi(r[1],cl[1],mu[1],s[1]) - exipsi(r[1],cu[1],mu[1],s[1]);
double psi3 = exipsi(r[2],cl[2],mu[2],s[2]) - exipsi(r[2],cu[2],mu[2],s[2]);
//Calculate the probability of being within the cell.
double gausscdf1 = gausscdf((cu[0]-mu[0])/s[0]) - gausscdf((cl[0]-mu[0])/s[0]);
double gausscdf2 = gausscdf((cu[1]-mu[1])/s[1]) - gausscdf((cl[1]-mu[1])/s[1]);
double gausscdf3 = gausscdf((cu[2]-mu[2])/s[2]) - gausscdf((cl[2]-mu[2])/s[2]);
//Calculate the 'expected position of p' and the correction term Sminus
if (gausscdf1 == 0 || gausscdf2 == 0 || gausscdf3 == 0)
continue; //avoid division by 0, cell contribution is 0 in these cases anyway.
fmax[0] = (psi1/gausscdf1) + r[0];
fmax[1] = (psi2/gausscdf2) + r[1];
fmax[2] = (psi3/gausscdf3) + r[2];
Sminus = hvol3d(Pz, r, fmax);
//the expected hypervolume improvement is the expected rectangular volume
//w.r.t. r minus the correction term Sminus
double sum = (psi1*psi2*psi3) - (Sminus*gausscdf1*gausscdf2*gausscdf3);
if (sum > 0) //Safety check; "Not-A-Number > 0" returns false
answer += sum;
}
}
}
return answer;
}
double ehvi3d_5term(deque<individual*> P, double r[], double mu[], double s[]){
//Extra-complicated 3-dimensional ehvi function. Subtracts 4 quantities off a rectangular volume.
double answer = 0; //The eventual answer
int n = P.size(); //Holds amount of points.
double Sminus; //Correction term for the integral.
deque<individual*> Py, Pz; //P sorted by y/z coordinate
sort(P.begin(), P.end(), ycomparator);
for (int i=0;i<P.size();i++){
Py.push_back(P[i]);
}
sort(P.begin(), P.end(), zcomparator);
for (unsigned int i=0;i<P.size();i++){
Pz.push_back(P[i]);
}
sort(P.begin(), P.end(), xcomparator);
for (int z=0;z<=n;z++){
for (int y=0;y<=n;y++){
for (int x=0;x<=n;x++){
double v[3]; //upper corner of hypervolume improvement box
double cl[3], cu[3]; //Boundaries of grid cells
cl[0] = (x == 0 ? r[0] : P[x-1]->f[0]);
cl[1] = (y == 0 ? r[1] : Py[y-1]->f[1]);
cl[2] = (z == 0 ? r[2] : Pz[z-1]->f[2]);
cu[0] = (x == n ? INFINITY : P[x]->f[0]);
cu[1] = (y == n ? INFINITY : Py[y]->f[1]);
cu[2] = (z == n ? INFINITY : Pz[z]->f[2]);
//We have to find v. This naive implementation is O(n) per iteration.
v[0] = r[0];
v[1] = r[1];
v[2] = r[2];
bool dominated = false;
for (unsigned int i=0;i<P.size();i++){
if (P[i]->f[0] >= cu[0] && P[i]->f[1] >= cu[1] && P[i]->f[2] >= cu[2]){
dominated = true;
break;
}
else if (P[i]->f[0] <= cu[0] && P[i]->f[1] >= cu[1] && P[i]->f[2] >= cu[2]){
if (P[i]->f[0] > v[0])
v[0] = P[i]->f[0];
}
else if (P[i]->f[0] >= cu[0] && P[i]->f[1] <= cu[1] && P[i]->f[2] >= cu[2]){
if (P[i]->f[1] > v[1])
v[1] = P[i]->f[1];
}
else if (P[i]->f[0] >= cu[0] && P[i]->f[1] >= cu[1] && P[i]->f[2] <= cu[2]){
if (P[i]->f[2] > v[2])
v[2] = P[i]->f[2];
}
}
if (dominated)
continue; //Cell's contribution is 0.
Sminus = hvol3d(Pz, v, cl);
//And then we integrate.
double psi1 = exipsi(v[0],cl[0],mu[0],s[0]) - exipsi(v[0],cu[0],mu[0],s[0]);
double psi2 = exipsi(v[1],cl[1],mu[1],s[1]) - exipsi(v[1],cu[1],mu[1],s[1]);
double psi3 = exipsi(v[2],cl[2],mu[2],s[2]) - exipsi(v[2],cu[2],mu[2],s[2]);
double gausscdf1 = gausscdf((cu[0]-mu[0])/s[0]) - gausscdf((cl[0]-mu[0])/s[0]);
double gausscdf2 = gausscdf((cu[1]-mu[1])/s[1]) - gausscdf((cl[1]-mu[1])/s[1]);
double gausscdf3 = gausscdf((cu[2]-mu[2])/s[2]) - gausscdf((cl[2]-mu[2])/s[2]);
double sum = (psi1*psi2*psi3) - (Sminus*gausscdf1*gausscdf2*gausscdf3);
//gausscdf represents chance of a point falling within the range [cl,cu)
//psi = partial expected improvement
//so psi - (gausscdf * (cl - v)) = p's expected distance from cl
double xslice = calculateslice(P, v, cl, 0);
double yslice = calculateslice(Py, v, cl, 1);
double zslice = calculateslice(Pz, v, cl, 2);
// cout << "Current partial contribution: " << sum << endl;
sum -= (xslice * gausscdf2 * gausscdf3 * (psi1 - (gausscdf1 * (cl[0]-v[0]))));
sum -= (yslice * gausscdf1 * gausscdf3 * (psi2 - (gausscdf2 * (cl[1]-v[1]))));
sum -= (zslice * gausscdf1 * gausscdf2 * (psi3 - (gausscdf3 * (cl[2]-v[2]))));
// cout << "Calculated partial contribution: " << sum << endl;
if (sum > 0)
answer += sum;
}
}
}
return answer;
}
double ehvi3d_8term(deque<individual*> P, double r[], double mu[], double s[]){
//Implementation of the fully decomposed 3d EHVI calculation. Adds 8 quantities. NOT DONE YET.
double answer = 0; //The eventual answer
double sum; //Partial answer-holding variable.
int n = P.size(); //Holds amount of points.
double tempcorr, temprect, tempimp; //Correction term, rectangular volume, temp. improvement
deque<individual*> Py, Pz; //P sorted by y/z coordinate
sort(P.begin(), P.end(), ycomparator);
for (int i=0;i<P.size();i++){
Py.push_back(P[i]);
}
sort(P.begin(), P.end(), zcomparator);
for (unsigned int i=0;i<P.size();i++){
Pz.push_back(P[i]);
}
sort(P.begin(), P.end(), xcomparator);
for (int z=0;z<=n;z++){
for (int y=0;y<=n;y++){
for (int x=0;x<=n;x++){
double v[3]; //upper corner of hypervolume improvement box
double cl[3], cu[3]; //Boundaries of grid cell
cl[0] = (x == 0 ? r[0] : P[x-1]->f[0]);
cl[1] = (y == 0 ? r[1] : Py[y-1]->f[1]);
cl[2] = (z == 0 ? r[2] : Pz[z-1]->f[2]);
cu[0] = (x == n ? INFINITY : P[x]->f[0]);
cu[1] = (y == n ? INFINITY : Py[y]->f[1]);
cu[2] = (z == n ? INFINITY : Pz[z]->f[2]);
//We have to find v. This naive implementation is O(n) per iteration.
v[0] = r[0];
v[1] = r[1];
v[2] = r[2];
bool dominated = false;
for (unsigned int i=0;i<P.size();i++){
if (P[i]->f[0] >= cu[0] && P[i]->f[1] >= cu[1] && P[i]->f[2] >= cu[2]){
dominated = true;
break;
}
else if (P[i]->f[0] <= cu[0] && P[i]->f[1] >= cu[1] && P[i]->f[2] >= cu[2]){
if (P[i]->f[0] > v[0])
v[0] = P[i]->f[0];
}
else if (P[i]->f[0] >= cu[0] && P[i]->f[1] <= cu[1] && P[i]->f[2] >= cu[2]){
if (P[i]->f[1] > v[1])
v[1] = P[i]->f[1];
}
else if (P[i]->f[0] >= cu[0] && P[i]->f[1] >= cu[1] && P[i]->f[2] <= cu[2]){
if (P[i]->f[2] > v[2])
v[2] = P[i]->f[2];
}
}
if (dominated)
continue; //Cell's contribution is 0.
//We will first calculate the chance of being in the cell:
double gausscdf1 = gausscdf((cu[0]-mu[0])/s[0]) - gausscdf((cl[0]-mu[0])/s[0]);
double gausscdf2 = gausscdf((cu[1]-mu[1])/s[1]) - gausscdf((cl[1]-mu[1])/s[1]);
double gausscdf3 = gausscdf((cu[2]-mu[2])/s[2]) - gausscdf((cl[2]-mu[2])/s[2]);
//And the expected improvement with regards to cl:
double psi1 = exipsi(cl[0],cl[0],mu[0],s[0]) - exipsi(cl[0],cu[0],mu[0],s[0]);
double psi2 = exipsi(cl[1],cl[1],mu[1],s[1]) - exipsi(cl[1],cu[1],mu[1],s[1]);
double psi3 = exipsi(cl[2],cl[2],mu[2],s[2]) - exipsi(cl[2],cu[2],mu[2],s[2]);
//FIRST QUANTITY: Q_emptyset.
tempcorr = hvol3d(Pz, r, cl);
temprect = (cl[0] - r[0])*(cl[1] - r[1])*(cl[2] - r[2]);
sum = gausscdf1 * gausscdf2 * gausscdf3 * (temprect - tempcorr);
//SECOND QUANTITY: Q_x
tempcorr = calculateslice(P, r, cl, 0);
temprect = (cl[1] - r[1])*(cl[2] - r[2]);
tempimp = (temprect - tempcorr) * psi1;
sum += tempimp * gausscdf2 * gausscdf3;
//THIRD QUANTITY: Q_y
tempcorr = calculateslice(Py, r, cl, 1);
temprect = (cl[0] - r[0])*(cl[2] - r[2]);
tempimp = (temprect - tempcorr) * psi2;
sum += tempimp * gausscdf1 * gausscdf3;
//FOURTH QUANTITY: Q_z
tempcorr = calculateslice(Pz, r, cl, 2);
temprect = (cl[0] - r[0])*(cl[1] - r[1]);
tempimp = (temprect - tempcorr) * psi3;
sum += tempimp * gausscdf1 * gausscdf2;
//FIFTH QUANTITY: Q_xy
tempimp = (cl[2] - v[2]) * gausscdf3;
sum += psi1 * psi2 * tempimp;
//SIXTH QUANTITY: Q_xz
tempimp = (cl[1] - v[1]) * gausscdf2;
sum += psi1 * psi3 * tempimp;
//SEVENTH QUANTITY: Q_yz
tempimp = (cl[0] - v[0]) * gausscdf1;
sum += psi2 * psi3 * tempimp;
//EIGHTH QUANTITY: Q_xyz
sum += psi1 * psi2 * psi3;
if (sum > 0)
answer += sum;
}
}
}
return answer;
}
//The exact EHVI calculation schemes except slice update.
#include "helper.h"
#include <deque>
using namespace std;
//When doing 2d hypervolume calculations, uncomment the following line
//to NOT use O(1) S-minus updates:
//#define NAIVE_DOMPOINTS
double ehvi2d(deque<individual*> P, double r[], double mu[], double s[]);
double ehvi3d_2term(deque<individual*> P, double r[], double mu[], double s[]);
double ehvi3d_5term(deque<individual*> P, double r[], double mu[], double s[]);
double ehvi3d_8term(deque<individual*> P, double r[], double mu[], double s[]);
#ifndef EHVI_CONSTS_H
#define EHVI_CONSTS_H
const int DIMENSIONS = 3; //Amount of function vectors.
const long long MONTECARLOS = 100000LL; //Amount of Monte Carlo simulations to do. Currently 100 thousand.
const int SEED = 12345678; //Used to seed the random number engine.
//Some pre-calculated constants to speed up calculations
const double SQRT_TWO = 1.41421356237; // sqrt(2)
const double SQRT_TWOPI_NEG = 0.3989422804; // 1/sqrt(2*PI)
const double HALFPI = 1.57079632679; // pi/2
#endif
//This file implements 3d hypervolume calculations using an altered
//version of the 2d archiver by Iris Hupkens. 2d hypervolume
//calculations are also included.
//Written by Iris Hupkens, 2013.
#include <iostream>
#include <cstdlib>
#include <string>
#include <fstream>
#include <deque>
#include <limits.h>
#include <math.h>
#include <algorithm>
#include "ehvi_hvol.h"
using namespace std;
//Some prototypes:
struct avlnode;
struct point{
//the data for a single point in the set + links to its AVL trees
double x;
double y;
double S;
avlnode * xnode;
};
struct avlnode{
//an AVL tree node
avlnode * left;
avlnode * right;
avlnode * parent;
int balance; //height of left subtree - height of right subtree
point * data;
};
class bitree{
//Implementation of the AVL tree
public:
//stats treestats;
~bitree();
int nodecount; //amount of nodes in the tree
double xmin, ymin; //coordinates of the reference point
double totalS; //Hypervolume covered by the approximation set
avlnode * root; //root for the AVL tree
bitree(double rx, double ry){
nodecount = 0;
totalS = 0;
xmin = rx;
ymin = ry;
root = NULL;
}
avlnode * attemptcandidate(double x, double y);
avlnode * getprevious(avlnode * start);
avlnode * getnext(avlnode * start);
void calculateS(avlnode * node);
point * removenode(avlnode * node);
void removedominated(avlnode * node);
private:
void insertrebalance(avlnode * node,int change);
void deleterebalance(avlnode * node,int change);
void rotateleft(avlnode * node);
void rotateright(avlnode * node);
void recursivedestroyall(avlnode * node);