Skip to content
Snippets Groups Projects
Commit 98c08dcd authored by Erik Strand's avatar Erik Strand
Browse files

Update Nelder-Mead logging

parent af24d2d9
No related branches found
No related tags found
No related merge requests found
...@@ -3,7 +3,5 @@ add_executable(nelder_mead ...@@ -3,7 +3,5 @@ add_executable(nelder_mead
) )
target_link_libraries(nelder_mead optimization_lib clara) target_link_libraries(nelder_mead optimization_lib clara)
if (VISUALIZE)
make_plot_target(nelder_mead 2d ARGS -d 2) make_plot_target(nelder_mead 2d ARGS -d 2)
make_plot_target(nelder_mead 10d ARGS -d 10 -n 10000) make_plot_target(nelder_mead 10d ARGS -d 10 -n 10000)
endif()
#ifndef OPTIMIZATION_NELDER_MEAD_LOG_H #ifndef OPTIMIZATION_NELDER_MEAD_LOGS_EVERYTHING_H
#define OPTIMIZATION_NELDER_MEAD_LOG_H #define OPTIMIZATION_NELDER_MEAD_LOGS_EVERYTHING_H
#include "utils/vector.h"
#include "utils/matrix.h"
#include "utils/vis_only.h"
#ifdef VISUALIZE
#include <vector>
#include "json.hpp" #include "json.hpp"
#include "../nelder_mead.h"
#include "utils/eigen_json.h" #include "utils/eigen_json.h"
#endif #include <string>
#include <vector>
namespace optimization { namespace optimization {
//-------------------------------------------------------------------------------------------------- //--------------------------------------------------------------------------------------------------
template <int32_t D> template <int32_t D>
struct NelderMeadState { struct NelderMeadState {
// If D != -1, N == D + 1. If D == -1, N == -1. typename NelderMead<D>::MatrixDN simplex;
static constexpr int32_t N = [D_ = D](){ typename NelderMead<D>::VectorN values;
if (D_ < 0) {
return D_;
} else {
return D_ + 1;
}
}();
// vector in the space we're searching
using VectorD = VectorNs<D>;
// vector in one higher dimension (e.g. to store a real value for each simplex vertex)
using VectorN = VectorNs<N>;
// D x N matrix (each column is a VectorD holding a vertex, each row is a VectorN)
// There are two reasons to store vertices as columns. First, it avoids the use of
// transposition. Eigen will happily tranpose for you if you assign a row vector to a column
// vector, but it will complain if you do something like `c_vec = c_vec2 + r_vec`. Second, Eigen
// defaults to column major storage, so this keeps our vertex values contiguous. For logging we
// transpose the matrix, so each row is a vertex, since this is more natural to deal with in
// numpy/matplotlib.
using MatrixDN = MatrixNs<D, N>;
NelderMeadState(MatrixDN const& s, VectorN const& v)
: simplex(s), values(v) {}
MatrixDN simplex;
VectorN values;
}; };
//-------------------------------------------------------------------------------------------------- //--------------------------------------------------------------------------------------------------
// This is used as a base class rather than a member so that the empty base class optimization can
// be applied (the member would take up space even if it is an empty class).
template <int32_t D> template <int32_t D>
struct NelderMeadLog { struct NelderMeadLogEverything {
using VectorD = typename NelderMeadState<D>::VectorD;
using VectorN = typename NelderMeadState<D>::VectorN;
using MatrixDN = typename NelderMeadState<D>::MatrixDN;
void reserve(uint32_t n) VIS_ONLY_METHOD;
template <typename Objective> template <typename Objective>
void initialize(Objective const&) VIS_ONLY_METHOD; void initialize(Objective const&) { objective_name = Objective::name; }
void push_back(MatrixDN const& simplex, VectorN const& values) VIS_ONLY_METHOD;
void clear() VIS_ONLY_METHOD; void push_back(
typename NelderMead<D>::MatrixDN const& simplex,
typename NelderMead<D>::VectorN const& values
) {
states.push_back({simplex, values});
}
void clear();
#ifdef VISUALIZE
std::string objective_name; std::string objective_name;
std::vector<NelderMeadState<D>> states; std::vector<NelderMeadState<D>> states;
#endif
}; };
#ifdef VISUALIZE
//..................................................................................................
template <int32_t D>
void NelderMeadLog<D>::reserve(uint32_t n) {
states.reserve(n);
}
//..................................................................................................
template <int32_t D>
template <typename Objective>
void NelderMeadLog<D>::initialize(Objective const&) {
objective_name = Objective::name;
}
//.................................................................................................. //..................................................................................................
template <int32_t D> template <int32_t D>
void NelderMeadLog<D>::push_back( void NelderMeadLogEverything<D>::clear() {
MatrixDN const& simplex, objective_name.clear();
VectorN const& values states.clear();
) {
states.emplace_back(simplex, values);
} }
//-------------------------------------------------------------------------------------------------- //--------------------------------------------------------------------------------------------------
template <int32_t D> template <int32_t D>
void to_json(nlohmann::json& j, NelderMeadState<D> const& state) { void to_json(nlohmann::json& j, NelderMeadState<D> const& state) {
...@@ -102,16 +53,14 @@ void to_json(nlohmann::json& j, NelderMeadState<D> const& state) { ...@@ -102,16 +53,14 @@ void to_json(nlohmann::json& j, NelderMeadState<D> const& state) {
//-------------------------------------------------------------------------------------------------- //--------------------------------------------------------------------------------------------------
template <int32_t D> template <int32_t D>
void to_json(nlohmann::json& j, NelderMeadLog<D> const& log) { void to_json(nlohmann::json& j, NelderMeadLogEverything<D> const& log) {
j = nlohmann::json{ j = nlohmann::json{
{"algorithm", "nelder-mead"}, {"algorithm", "gradient descent"},
{"objective", log.objective_name}, {"objective", log.objective_name},
{"data", log.states} {"data", log.states}
}; };
} }
#endif
} }
#endif #endif
#ifndef OPTIMIZATION_NELDER_MEAD_VIS_H #ifndef OPTIMIZATION_NELDER_MEAD_VIS_H
#define OPTIMIZATION_NELDER_MEAD_VIS_H #define OPTIMIZATION_NELDER_MEAD_VIS_H
#include <iostream> #include "everything.h"
#include "nelder_mead_log.h"
#include "utils/eigen_json.h"
namespace optimization { namespace optimization {
//-------------------------------------------------------------------------------------------------- //--------------------------------------------------------------------------------------------------
template <int32_t D> template <int32_t D>
struct NelderMeadVis { struct NelderMeadVis {
NelderMeadLog<D> const& log; NelderMeadLogEverything<D> const& log;
}; };
//.................................................................................................. //..................................................................................................
......
#ifndef OPTIMIZATION_NELDER_MEAD_LOGS_NOTHING_H
#define OPTIMIZATION_NELDER_MEAD_LOGS_NOTHING_H
#include "utils/vector.h"
#include "utils/matrix.h"
namespace optimization {
//--------------------------------------------------------------------------------------------------
struct NelderMeadLogNothing {
template <typename Objective>
void initialize(Objective const&) {}
template <int32_t D, int32_t N>
void push_back(MatrixNs<D, N> const&, VectorNs<N> const&) {}
};
}
#endif
#include "clara.hpp" #include "clara.hpp"
#include "logs/everything_vis.h"
#include "nelder_mead.h" #include "nelder_mead.h"
#include "objectives/paraboloid.h" #include "objectives/paraboloid.h"
#include "objectives/rosenbrock.h" #include "objectives/rosenbrock.h"
#include <iostream> #include <iostream>
#ifdef VISUALIZE
#include "nelder_mead_vis.h"
#include "utils/eigen_json.h"
#include <fstream> #include <fstream>
using json = nlohmann::json;
#endif
using namespace optimization; using namespace optimization;
using json = nlohmann::json;
//-------------------------------------------------------------------------------------------------- //--------------------------------------------------------------------------------------------------
int main(int const argc, char const** argv) { int main(int const argc, char const** argv) {
...@@ -49,23 +44,29 @@ int main(int const argc, char const** argv) { ...@@ -49,23 +44,29 @@ int main(int const argc, char const** argv) {
objective.dim() = dim; objective.dim() = dim;
NelderMead<-1> optimizer(max_evaluations, relative_y_tolerance); NelderMead<-1> optimizer(max_evaluations, relative_y_tolerance);
VectorXs minimum = optimizer.optimize(objective, simplex); NelderMeadLogEverything<-1> log;
// Only log stuff if we're going to use it.
if (log_file_path.empty() && vis_file_path.empty()) {
optimizer.optimize(objective, simplex);
} else {
optimizer.optimize(objective, simplex, log);
}
std::cout << "n evaluations: " << optimizer.n_evaluations() << '\n'; std::cout << "n evaluations: " << optimizer.n_evaluations() << '\n';
std::cout << "final point: " << minimum << '\n'; std::cout << "final point: " << optimizer.point() << '\n';
#ifdef VISUALIZE
if (!log_file_path.empty()) { if (!log_file_path.empty()) {
json data = optimizer; json data = log;
std::ofstream log_file(log_file_path); std::ofstream log_file(log_file_path);
log_file << data.dump(4) << '\n'; log_file << data.dump(4) << '\n';
} }
if (!vis_file_path.empty()) { if (!vis_file_path.empty()) {
json data = NelderMeadVis<-1>{optimizer}; json data = NelderMeadVis<-1>{log};
std::ofstream vis_file(vis_file_path); std::ofstream vis_file(vis_file_path);
vis_file << data.dump(4) << '\n'; vis_file << data.dump(4) << '\n';
} }
#endif
return 0; return 0;
} }
#ifndef OPTIMIZATION_NELDER_MEAD_H #ifndef OPTIMIZATION_NELDER_MEAD_H
#define OPTIMIZATION_NELDER_MEAD_H #define OPTIMIZATION_NELDER_MEAD_H
#include "nelder_mead_log.h" #include "logs/nothing.h"
#include <cmath> #include <cmath>
#include <iostream> #include <iostream>
...@@ -10,54 +10,90 @@ namespace optimization { ...@@ -10,54 +10,90 @@ namespace optimization {
//-------------------------------------------------------------------------------------------------- //--------------------------------------------------------------------------------------------------
// TODO record termination condition // TODO record termination condition
template <int32_t D = -1> template <int32_t D = -1>
class NelderMead : public NelderMeadLog<D> { class NelderMead {
public: public:
// If D != -1, N == D + 1. If D == -1, N == -1.
static constexpr int32_t N = [D_ = D](){
if (D_ < 0) {
return D_;
} else {
return D_ + 1;
}
}();
// vector in the space we're searching // vector in the space we're searching
using VectorD = typename NelderMeadLog<D>::VectorD; using VectorD = VectorNs<D>;
// vector in one higher dimension (e.g. to store a real value for each simplex vertex) // vector in one higher dimension (e.g. to store a real value for each simplex vertex)
using VectorN = typename NelderMeadLog<D>::VectorN; using VectorN = VectorNs<N>;
// D x N matrix (each column is a VectorD holding a vertex, each row is a VectorN) // D x N matrix (each column is a VectorD holding a vertex, each row is a VectorN)
using MatrixDN = typename NelderMeadLog<D>::MatrixDN; // There are two reasons to store vertices as columns. First, it avoids the use of
// transposition. Eigen will happily tranpose for you if you assign a row vector to a column
// vector, but it will complain if you do something like `c_vec = c_vec2 + r_vec`. Second, Eigen
// defaults to column major storage, so this keeps our vertex values contiguous. For logging we
// transpose the matrix, so each row is a vertex, since this is more natural to deal with in
// numpy/matplotlib.
using MatrixDN = MatrixNs<D, N>;
// Constructor
NelderMead(uint32_t me = 1000, Scalar ry = 1e-8) NelderMead(uint32_t me = 1000, Scalar ry = 1e-8)
: max_evaluations_(me), relative_y_tolerance_(ry) : max_evaluations_(me), relative_y_tolerance_(ry)
{} {}
uint32_t n_evaluations() const { return n_evaluations_; } uint32_t n_evaluations() const { return n_evaluations_; }
MatrixDN const& simplex_vertices() const { return simplex_vertices_; } MatrixDN const& simplex_vertices() const { return simplex_vertices_; }
VectorN const& simplex_values() const { return simplex_values_; } VectorN const& simplex_values() const { return simplex_values_; }
//VectorD const& min_point() const { return simplex_vertices_[i_lowest_]; }
decltype(auto) point() const {
return simplex_vertices_.col(i_lowest_);
}
Scalar value() const { return simplex_values_[i_lowest_]; }
// Constructs a simplex from an initial point by offset all coordinate values. // Constructs a simplex from an initial point by offsetting all coordinate values.
template <typename Objective> template <typename Objective>
VectorD optimize(Objective& objective, VectorD const& initial_point, Scalar offset = 1); VectorD optimize(
// Each row of simplex is a vertex. Objective& objective,
VectorD const& initial_point,
Scalar offset = 1
);
template <typename Objective, typename Log>
VectorD optimize(
Objective& objective,
VectorD const& initial_point,
Scalar offset,
Log& log
);
// Each column of simplex is a vertex.
template <typename Objective> template <typename Objective>
VectorD optimize(Objective& objective, MatrixDN const& simplex); VectorD optimize(Objective& objective, MatrixDN const& simplex);
template <typename Objective, typename Log>
VectorD const& min_point() const { return simplex_vertices_[i_lowest_]; } VectorD optimize(Objective& objective, MatrixDN const& simplex, Log& log);
Scalar min_value() const { return simplex_values_[i_lowest_]; }
private: private:
template <typename Objective> template <typename Objective>
Scalar try_new_point(Objective& objective, Scalar factor); Scalar try_new_point(Objective& objective, Scalar factor);
// hyperparameters
uint32_t max_evaluations_;
//Scalar relative_x_tolerance_;
Scalar relative_y_tolerance_;
// algorithm state
// For fixed size D (and thus N), dim_ and n_vertices_ are redundant. // For fixed size D (and thus N), dim_ and n_vertices_ are redundant.
uint32_t dim_; uint32_t dim_; // we are searching in R^dim_
uint32_t n_vertices_; // == dim_ + 1 uint32_t n_vertices_; // the simplex has n_vertices_ == dim_ + 1 different vertices
MatrixDN simplex_vertices_; uint32_t n_evaluations_; // number of times we've evaluated the objective
VectorN simplex_values_; MatrixDN simplex_vertices_; // the simplex itself (each column is a vertex)
VectorD vertex_sum_; VectorN simplex_values_; // the function values at the vertices
// Invariant: simplex_values_[i_lowest] <= simplex_values_[i_second_highest] <= simplex_values_[i_highest] VectorD vertex_sum_; // the sum of all the vertices
VectorD x_new_; // used for trial points
// Invariant:
// simplex_values_[i_lowest] <= simplex_values_[i_second_highest] <= simplex_values_[i_highest]
uint32_t i_lowest_; uint32_t i_lowest_;
uint32_t i_second_highest_; uint32_t i_second_highest_;
uint32_t i_highest_; uint32_t i_highest_;
uint32_t n_evaluations_; // algorithm constants
uint32_t max_evaluations_;
//Scalar relative_x_tolerance_;
Scalar relative_y_tolerance_;
static constexpr Scalar reflection_coefficient_ = -1.0; static constexpr Scalar reflection_coefficient_ = -1.0;
static constexpr Scalar expansion_coefficient_ = 2.0; static constexpr Scalar expansion_coefficient_ = 2.0;
static constexpr Scalar contraction_coefficient_ = 0.5; static constexpr Scalar contraction_coefficient_ = 0.5;
...@@ -72,6 +108,19 @@ auto NelderMead<D>::optimize( ...@@ -72,6 +108,19 @@ auto NelderMead<D>::optimize(
Objective& objective, Objective& objective,
VectorD const& initial_point, VectorD const& initial_point,
Scalar offset Scalar offset
) -> VectorD {
NelderMeadLogNothing log;
return optimize(objective, initial_point, offset, log);
}
//..................................................................................................
template <int32_t D>
template <typename Objective, typename Log>
auto NelderMead<D>::optimize(
Objective& objective,
VectorD const& initial_point,
Scalar offset,
Log& log
) -> VectorD { ) -> VectorD {
MatrixDN simplex; MatrixDN simplex;
simplex.resize(initial_point.size(), initial_point.size() + 1); simplex.resize(initial_point.size(), initial_point.size() + 1);
...@@ -80,20 +129,29 @@ auto NelderMead<D>::optimize( ...@@ -80,20 +129,29 @@ auto NelderMead<D>::optimize(
simplex.col(i + 1) = initial_point; simplex.col(i + 1) = initial_point;
simplex(i, i + 1) += offset; simplex(i, i + 1) += offset;
} }
return optimize(objective, simplex); return optimize(objective, simplex, log);
} }
//.................................................................................................. //..................................................................................................
template <int32_t D> template <int32_t D>
template <typename Objective> template <typename Objective>
auto NelderMead<D>::optimize(Objective& objective, MatrixDN const& simplex) -> VectorD { auto NelderMead<D>::optimize(Objective& objective, MatrixDN const& simplex) -> VectorD {
NelderMeadLogNothing log;
return optimize(objective, simplex, log);
}
//..................................................................................................
template <int32_t D>
template <typename Objective, typename Log>
auto NelderMead<D>::optimize(Objective& objective, MatrixDN const& simplex, Log& log) -> VectorD {
// initialize state
simplex_vertices_ = simplex; simplex_vertices_ = simplex;
n_vertices_ = simplex_vertices_.cols(); n_vertices_ = simplex_vertices_.cols();
dim_ = simplex_vertices_.rows(); dim_ = simplex_vertices_.rows();
assert(n_vertices_ == dim_ + 1); assert(n_vertices_ == dim_ + 1);
simplex_values_.resize(n_vertices_); simplex_values_.resize(n_vertices_);
vertex_sum_.resize(dim_);
vertex_sum_ = simplex_vertices_.rowwise().sum(); vertex_sum_ = simplex_vertices_.rowwise().sum();
log.initialize(objective);
// Evaluate the objective at all simplex vertices. // Evaluate the objective at all simplex vertices.
for (uint32_t i = 0u; i < n_vertices_; ++i) { for (uint32_t i = 0u; i < n_vertices_; ++i) {
...@@ -101,8 +159,6 @@ auto NelderMead<D>::optimize(Objective& objective, MatrixDN const& simplex) -> V ...@@ -101,8 +159,6 @@ auto NelderMead<D>::optimize(Objective& objective, MatrixDN const& simplex) -> V
} }
n_evaluations_ = n_vertices_; n_evaluations_ = n_vertices_;
NelderMeadLog<D>::initialize(objective);
while (true) { while (true) {
// Find lowest, highest, and second highest points. // Find lowest, highest, and second highest points.
i_lowest_ = (simplex_values_[0] < simplex_values_[1]) ? 0 : 1; i_lowest_ = (simplex_values_[0] < simplex_values_[1]) ? 0 : 1;
...@@ -123,7 +179,7 @@ auto NelderMead<D>::optimize(Objective& objective, MatrixDN const& simplex) -> V ...@@ -123,7 +179,7 @@ auto NelderMead<D>::optimize(Objective& objective, MatrixDN const& simplex) -> V
} }
// Log the simplex. // Log the simplex.
NelderMeadLog<D>::push_back(simplex_vertices_, simplex_values_); log.push_back(simplex_vertices_, simplex_values_);
// Evaluate termination conditions. // Evaluate termination conditions.
Scalar const difference = std::abs(simplex_values_[i_highest_] - simplex_values_[i_lowest_]); Scalar const difference = std::abs(simplex_values_[i_highest_] - simplex_values_[i_lowest_]);
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment