Skip to content
Snippets Groups Projects
Commit 5378e657 authored by Erik Strand's avatar Erik Strand
Browse files

Add Nelder-Mead

parents d5d74d36 c459c082
No related branches found
No related tags found
No related merge requests found
......@@ -5,6 +5,7 @@ if (NOT CMAKE_BUILD_TYPE)
set(CMAKE_BUILD_TYPE "Release")
endif()
message(STATUS "Build type: ${CMAKE_BUILD_TYPE}")
message(STATUS "C++ compiler: ${CMAKE_CXX_COMPILER}")
option(VISUALIZE "compiles extra code to allow visualization of optimization algorithms" ON)
if (VISUALIZE)
......
......@@ -42,12 +42,29 @@ def rosenbrock_plot(x_min, x_max, y_min, y_max):
fig.colorbar(contour_filled, format=mpl.ticker.LogFormatterSciNotation())
return fig, ax
def add_points(fig, ax, centers):
x = [c[0] for c in centers]
y = [c[1] for c in centers]
def add_points(fig, ax, points):
x = [p[0] for p in points]
y = [p[1] for p in points]
ax.plot(x, y, 'bx')
return fig, ax
# points is a list of numpy arrays of dim n x 2
def add_polygons(fig, ax, polygons):
for polygon in polygons:
p = mpl.patches.Polygon(polygon, True, fill=False, color="black")
ax.add_patch(p)
#p = PatchCollection(patches, alpha=0.4)
#ax.add_collection(p)
return fig, ax
# points is a numpy array of dim n x 2 e.g. np.array([[0, 0], [1, 0], [1, 1]])
def add_lines(fig, ax, points, closed=False):
p = mpl.patches.Polygon(points, closed, fill=False, color="black")
ax.add_patch(p)
#p = PatchCollection(patches, alpha=0.4)
#ax.add_collection(p)
return fig, ax
def add_circles(fig, ax, centers, radii):
for center, r in zip(centers, radii):
ax.add_artist(plt.Circle(center, r, color='k', fill=False))
......@@ -82,9 +99,11 @@ if __name__ == "__main__":
exit(1)
if "points" in data:
#points = np.array([sample["point"] for sample in data["data"]])
fig, ax = add_points(fig, ax, data["points"])
if "polygons" in data:
fig, ax = add_polygons(fig, ax, data["polygons"])
# Haven't tested this yet
#if "cirlces" in data:
# fig, ax = add_circles(fig, ax, data["cirlces"])
......
add_subdirectory(gradient_descent)
add_subdirectory(nelder_mead)
if (VISUALIZE)
add_executable(nelder_mead
main.cpp
)
target_link_libraries(nelder_mead optimization_lib clara)
make_plot_target(nelder_mead)
endif()
#include "clara.hpp"
#include "nelder_mead.h"
#include "nelder_mead_vis.h"
#include "objectives/paraboloid.h"
#include "objectives/rosenbrock.h"
#include "utils/eigen_json.h"
#include <iostream>
#include <fstream>
using namespace optimization;
using json = nlohmann::json;
//--------------------------------------------------------------------------------------------------
int main(int const argc, char const** argv) {
std::string log_file_path;
std::string vis_file_path;
uint32_t max_evaluations = 1000;
Scalar relative_y_tolerance = 1e-8;
auto const cli =
clara::Arg(log_file_path, "log_file_path")("Location of the optimization log") |
clara::Arg(vis_file_path, "vis_file_path")("Location of the visualization file") |
clara::Opt(max_evaluations, "max_evaluations")["-n"]["--max-evals"]("Max number of function evaluations") |
clara::Opt(relative_y_tolerance, "relative_y_tolerance")["-y"]["--rel-y-tol"]("Relative tolerance for function values (termination condition)");
auto const result = cli.parse(clara::Args(argc, argv));
if (!result) {
std::cerr << "Error in command line: " << result.errorMessage() << std::endl;
exit(1);
}
Eigen::Matrix<Scalar, 2, 3> simplex;
simplex.col(0) = Vector2s(-2, -1);
simplex.col(1) = Vector2s(-1, -1);
simplex.col(2) = Vector2s(-2, 0);
//using Objective = Paraboloid<Vector2<Scalar>>;
//Objective objective(dim);
using Objective = Rosenbrock<Vector2s>;
Objective objective;
NelderMead<Objective, 2> optimizer(max_evaluations, relative_y_tolerance);
optimizer.optimize(objective, simplex);
if (!log_file_path.empty()) {
json data = optimizer;
std::ofstream log_file(log_file_path);
log_file << data.dump(4) << '\n';
}
if (!vis_file_path.empty()) {
json data = NelderMeadVis<Objective, 2>{optimizer};
std::ofstream vis_file(vis_file_path);
vis_file << data.dump(4) << '\n';
}
return 0;
}
#ifndef OPTIMIZATION_NELDER_MEAD_H
#define OPTIMIZATION_NELDER_MEAD_H
#include "nelder_mead_log.h"
#include <cmath>
#include <iostream>
namespace optimization {
//--------------------------------------------------------------------------------------------------
// TODO record termination condition
template <typename Objective, int32_t D = -1>
class NelderMead : public NelderMeadLog<Objective, D> {
public:
// vector in the space we're searching
using VectorD = typename NelderMeadLog<Objective, D>::VectorD;
// vector in one higher dimension (e.g. to store a real value for each simplex vertex)
using VectorN = typename NelderMeadLog<Objective, D>::VectorN;
// D x N matrix (each column is a VectorD holding a vertex, each row is a VectorN)
using MatrixDN = typename NelderMeadLog<Objective, D>::MatrixDN;
NelderMead(uint32_t me = 1000, Scalar ry = 1e-8)
: max_evaluations_(me), relative_y_tolerance_(ry)
{}
MatrixDN const& simplex_vertices() const { return simplex_vertices_; }
VectorN const& simplex_values() const { return simplex_values_; }
// Each row of simplex is a vertex.
VectorD optimize(Objective const& objective, MatrixDN const& simplex);
VectorD const& min_point() const { return simplex_vertices_[i_lowest_]; }
Scalar min_value() const { return simplex_values_[i_lowest_]; }
private:
Scalar try_new_point(Objective const& objective, Scalar factor);
// For fixed size D (and thus N), dim_ and n_vertices_ are redundant.
uint32_t dim_;
uint32_t n_vertices_; // == dim_ + 1
MatrixDN simplex_vertices_;
VectorN simplex_values_;
VectorD vertex_sum_;
// Invariant: simplex_values_[i_lowest] <= simplex_values_[i_second_highest] <= simplex_values_[i_highest]
uint32_t i_lowest_;
uint32_t i_second_highest_;
uint32_t i_highest_;
uint32_t n_evaluations_;
uint32_t max_evaluations_;
//Scalar relative_x_tolerance_;
Scalar relative_y_tolerance_;
static constexpr Scalar reflection_coefficient_ = -1.0;
static constexpr Scalar expansion_coefficient_ = 2.0;
static constexpr Scalar contraction_coefficient_ = 0.5;
static constexpr Scalar shrinking_coefficient_ = 0.5;
static constexpr Scalar tiny_ = 1e-10;
};
//..................................................................................................
template <typename Objective, int32_t D>
auto NelderMead<Objective, D>::optimize(Objective const& objective, MatrixDN const& simplex) -> VectorD {
simplex_vertices_ = simplex;
n_vertices_ = simplex_vertices_.cols();
dim_ = simplex_vertices_.rows();
assert(n_vertices_ == dim_ + 1);
simplex_values_.resize(n_vertices_);
vertex_sum_.resize(dim_);
vertex_sum_ = simplex_vertices_.rowwise().sum();
// Evaluate the objective at all simplex vertices.
for (uint32_t i = 0u; i < n_vertices_; ++i) {
objective.eval(simplex_vertices_.col(i), simplex_values_[i]);
}
n_evaluations_ = n_vertices_;
while (true) {
// Find lowest, highest, and second highest points.
i_lowest_ = (simplex_values_[0] < simplex_values_[1]) ? 0 : 1;
i_second_highest_ = i_lowest_;
i_highest_ = 1 - i_lowest_;
for (uint32_t i = 2; i < n_vertices_; ++i) {
Scalar const y = simplex_values_[i];
if (y < simplex_values_[i_lowest_]) {
i_lowest_ = i;
}
else if (y > simplex_values_[i_highest_]) {
i_second_highest_ = i_highest_;
i_highest_ = i;
}
else if (y > simplex_values_[i_second_highest_]) {
i_second_highest_ = i;
}
}
// Log the simplex.
NelderMeadLog<Objective, D>::push_back(simplex_vertices_, simplex_values_);
// Evaluate termination conditions.
Scalar const difference = std::abs(simplex_values_[i_highest_] - simplex_values_[i_lowest_]);
Scalar const sum = std::abs(simplex_values_[i_highest_] + simplex_values_[i_lowest_]);
Scalar const relative_y_difference = 2 * difference / (sum + tiny_);
if (relative_y_difference < relative_y_tolerance_) {
std::cout << "Achieved relative tolerance of y: " << relative_y_difference << '\n';
return simplex_vertices_.col(i_lowest_);
}
if (n_evaluations_ > max_evaluations_) {
std::cout << "Exceeded max function evaluations.\n";
return simplex_vertices_.col(i_lowest_);
}
// Try reflecting the worst point.
Scalar const y_1 = try_new_point(objective, reflection_coefficient_);
// If the new point is the best so far, try going further in the same direction (expansion).
if (y_1 <= simplex_values_[i_lowest_]) {
try_new_point(objective, expansion_coefficient_);
continue;
}
// If it wasn't the best, but it still makes a different point the worst, keep it.
if (y_1 < simplex_values_[i_second_highest_]) {
continue;
}
// If the reflected point is still the worst, try contracting along one dimension.
// Note that we could be contracting from the original point, or the reflected point.
Scalar const y_hi = simplex_values_[i_highest_];
Scalar const y_2 = try_new_point(objective, contraction_coefficient_);
// If the contracted point is better than the worst, keep it.
if (y_2 < y_hi) {
continue;
}
// If the contracted point is even worse, shrink everything.
for (uint32_t i = 0; i < n_vertices_; ++i) {
if (i != i_lowest_) {
simplex_vertices_.col(i) =
shrinking_coefficient_ * (simplex_vertices_.col(i_lowest_) + simplex_vertices_.col(i));
objective.eval(simplex_vertices_.col(i), simplex_values_[i]);
}
}
n_evaluations_ += dim_;
// The vertex sum must be recomputed from scratch in this case.
vertex_sum_ = simplex_vertices_.rowwise().sum();
}
}
//..................................................................................................
template <typename Objective, int32_t D>
Scalar NelderMead<Objective, D>::try_new_point(Objective const& objective, Scalar factor) {
// Generate a new point by reflecting/expanding/contracting the worst point.
Scalar const t1 = (Scalar(1) - factor) / dim_;
Scalar const t2 = factor - t1;
VectorD const x_new = t1 * vertex_sum_ + t2 * simplex_vertices_.col(i_highest_);
// Evaluate the new point.
Scalar y_new;
objective.eval(x_new, y_new);
++n_evaluations_;
// If the new point is an improvement, keep it.
if (y_new < simplex_values_[i_highest_]) {
vertex_sum_ += x_new - simplex_vertices_.col(i_highest_);
simplex_vertices_.col(i_highest_) = x_new;
simplex_values_[i_highest_] = y_new;
}
return y_new;
}
}
#endif
#ifndef OPTIMIZATION_NELDER_MEAD_LOG_H
#define OPTIMIZATION_NELDER_MEAD_LOG_H
#include "utils/vector.h"
#include "utils/matrix.h"
#include "utils/vis_only.h"
#ifdef VISUALIZE
#include <vector>
#include "json.hpp"
#include "utils/eigen_json.h"
#endif
namespace optimization {
//--------------------------------------------------------------------------------------------------
template <typename Objective, int32_t D>
struct NelderMeadState {
// If D != -1, N == D + 1. If D == -1, N == -1.
static constexpr int32_t N = [D_ = D](){
if (D_ < 0) {
return D_;
} else {
return D_ + 1;
}
}();
// vector in the space we're searching
using VectorD = VectorNs<D>;
// vector in one higher dimension (e.g. to store a real value for each simplex vertex)
using VectorN = VectorNs<N>;
// D x N matrix (each column is a VectorD holding a vertex, each row is a VectorN)
// There are two reasons to store vertices as columns. First, it avoids the use of
// transposition. Eigen will happily tranpose for you if you assign a row vector to a column
// vector, but it will complain if you do something like `c_vec = c_vec2 + r_vec`. Second, Eigen
// defaults to column major storage, so this keeps our vertex values contiguous. For logging we
// transpose the matrix, so each row is a vertex, since this is more natural to deal with in
// numpy/matplotlib.
using MatrixDN = MatrixNs<D, N>;
NelderMeadState(MatrixDN const& s, VectorN const& v)
: simplex(s), values(v) {}
MatrixDN simplex;
VectorN values;
};
//--------------------------------------------------------------------------------------------------
// This is used as a base class rather than a member so that the empty base class optimization can
// be applied (the member would take up space even if it is an empty class).
template <typename Objective, int32_t D>
struct NelderMeadLog {
using VectorD = typename NelderMeadState<Objective, D>::VectorD;
using VectorN = typename NelderMeadState<Objective, D>::VectorN;
using MatrixDN = typename NelderMeadState<Objective, D>::MatrixDN;
void reserve(uint32_t n) VIS_ONLY_METHOD;
void push_back(MatrixDN const& simplex, VectorN const& values) VIS_ONLY_METHOD;
void clear() VIS_ONLY_METHOD;
#ifdef VISUALIZE
std::vector<NelderMeadState<Objective, D>> states;
#endif
};
#ifdef VISUALIZE
//..................................................................................................
template <typename Objective, int32_t D>
void NelderMeadLog<Objective, D>::reserve(uint32_t n) {
states.reserve(n);
}
//..................................................................................................
template <typename Objective, int32_t D>
void NelderMeadLog<Objective, D>::push_back(
MatrixDN const& simplex,
VectorN const& values
) {
states.emplace_back(simplex, values);
}
//..................................................................................................
template <typename Objective, int32_t D>
void to_json(nlohmann::json& j, NelderMeadState<Objective, D> const& state) {
j = nlohmann::json{
{"simplex", transpose_for_json(state.simplex)},
{"values", state.values}
};
}
//..................................................................................................
template <typename Objective, int32_t D>
void to_json(nlohmann::json& j, NelderMeadLog<Objective, D> const& log) {
j = nlohmann::json{
{"algorithm", "nelder-mead"},
{"objective", Objective::name},
{"data", log.states}
};
}
#endif
}
#endif
#ifndef OPTIMIZATION_NELDER_MEAD_VIS_H
#define OPTIMIZATION_NELDER_MEAD_VIS_H
#include <iostream>
#include "nelder_mead_log.h"
#include "utils/eigen_json.h"
namespace optimization {
//--------------------------------------------------------------------------------------------------
template <typename Objective, int32_t D>
struct NelderMeadVis {
NelderMeadLog<Objective, D> const& log;
};
//..................................................................................................
template <typename Objective, int32_t D>
void to_json(nlohmann::json& j, NelderMeadVis<Objective, D> const& vis) {
j = nlohmann::json{
{"algorithm", "nelder-mead"},
{"objective", Objective::name},
{"data", {
{"polygons", nlohmann::json::array()}
}}
};
for (auto const& state : vis.log.states) {
j["data"]["polygons"].push_back(transpose_for_json(state.simplex));
}
}
}
#endif
......@@ -4,10 +4,29 @@
#include "json.hpp"
#include <Eigen/Core>
namespace optimization {
//--------------------------------------------------------------------------------------------------
// serialization of matrix transposes
// I can't get this to work with Eigen's crazy templates inside nlohmann so I'm doing it myself.
template <typename T, int32_t N1, int32_t N2>
struct EigenMatrixTranspose {
Eigen::Matrix<T, N1, N2> const& matrix;
};
//--------------------------------------------------------------------------------------------------
template <typename T, int32_t N1, int32_t N2>
EigenMatrixTranspose<T, N1, N2> transpose_for_json(Eigen::Matrix<T, N1, N2> const& matrix) {
return EigenMatrixTranspose<T, N1, N2>{matrix};
}
}
namespace nlohmann {
//--------------------------------------------------------------------------------------------------
// serialization of vectors
// Note: for some reason nlohmann::json can't figure out what's going on if I try to use DenseBase.
template <typename T, int32_t N>
struct adl_serializer<Eigen::Matrix<T, N, 1>> {
static void to_json(json& j, Eigen::Matrix<T, N, 1> const& vector) {
......@@ -48,6 +67,21 @@ struct adl_serializer<Eigen::Matrix<T, N1, N2>> {
*/
};
//--------------------------------------------------------------------------------------------------
// serialization of matrix tranposes
template <typename T, int32_t N1, int32_t N2>
struct adl_serializer<optimization::EigenMatrixTranspose<T, N1, N2>> {
static void to_json(json& j, optimization::EigenMatrixTranspose<T, N1, N2> const& matrix) {
for (uint32_t row = 0; row < matrix.matrix.cols(); ++row) {
json row_json;
for (uint32_t col = 0; col < matrix.matrix.rows(); ++col) {
row_json.push_back(matrix.matrix(col, row));
}
j.push_back(row_json);
}
}
};
}
#endif
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment