Add Nelder-Mead

5378e657 · Erik Strand · d5d74d36 · c459c082 · 5378e657 · 5378e657
Commit 5378e657 authored Apr 12, 2020 by Erik Strand
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -5,6 +5,7 @@ if (NOT CMAKE_BUILD_TYPE)
    set(CMAKE_BUILD_TYPE "Release")
 endif()
 message(STATUS "Build type: ${CMAKE_BUILD_TYPE}")
+message(STATUS "C++ compiler: ${CMAKE_CXX_COMPILER}")

 option(VISUALIZE "compiles extra code to allow visualization of optimization algorithms" ON)
 if (VISUALIZE)

--- a/apps/plots.py
+++ b/apps/plots.py
@@ -42,12 +42,29 @@ def rosenbrock_plot(x_min, x_max, y_min, y_max):
    fig.colorbar(contour_filled, format=mpl.ticker.LogFormatterSciNotation())
    return fig, ax

-def add_points(fig, ax, centers):
-    x = [c[0] for c in centers]
-    y = [c[1] for c in centers]
+def add_points(fig, ax, points):
+    x = [p[0] for p in points]
+    y = [p[1] for p in points]
    ax.plot(x, y, 'bx')
    return fig, ax

+# points is a list of numpy arrays of dim n x 2
+def add_polygons(fig, ax, polygons):
+    for polygon in polygons:
+        p = mpl.patches.Polygon(polygon, True, fill=False, color="black")
+        ax.add_patch(p)
+        #p = PatchCollection(patches, alpha=0.4)
+        #ax.add_collection(p)
+    return fig, ax
+
+# points is a numpy array of dim n x 2 e.g. np.array([[0, 0], [1, 0], [1, 1]])
+def add_lines(fig, ax, points, closed=False):
+    p = mpl.patches.Polygon(points, closed, fill=False, color="black")
+    ax.add_patch(p)
+    #p = PatchCollection(patches, alpha=0.4)
+    #ax.add_collection(p)
+    return fig, ax
+
 def add_circles(fig, ax, centers, radii):
    for center, r in zip(centers, radii):
        ax.add_artist(plt.Circle(center, r, color='k', fill=False))
@@ -82,9 +99,11 @@ if __name__ == "__main__":
        exit(1)

    if "points" in data:
-        #points = np.array([sample["point"] for sample in data["data"]])
        fig, ax = add_points(fig, ax, data["points"])

+    if "polygons" in data:
+        fig, ax = add_polygons(fig, ax, data["polygons"])
+
    # Haven't tested this yet
    #if "cirlces" in data:
    #    fig, ax = add_circles(fig, ax, data["cirlces"])

--- a/optimization/optimizers/CMakeLists.txt
+++ b/optimization/optimizers/CMakeLists.txt
 add_subdirectory(gradient_descent)
+add_subdirectory(nelder_mead)
--- a/optimization/optimizers/nelder_mead/CMakeLists.txt
+++ b/optimization/optimizers/nelder_mead/CMakeLists.txt
+if (VISUALIZE)
+    add_executable(nelder_mead
+        main.cpp
+    )
+    target_link_libraries(nelder_mead optimization_lib clara)
+    make_plot_target(nelder_mead)
+endif()
--- a/optimization/optimizers/nelder_mead/main.cpp
+++ b/optimization/optimizers/nelder_mead/main.cpp
+#include "clara.hpp"
+#include "nelder_mead.h"
+#include "nelder_mead_vis.h"
+#include "objectives/paraboloid.h"
+#include "objectives/rosenbrock.h"
+#include "utils/eigen_json.h"
+#include <iostream>
+#include <fstream>
+
+using namespace optimization;
+using json = nlohmann::json;
+
+//--------------------------------------------------------------------------------------------------
+int main(int const argc, char const** argv) {
+    std::string log_file_path;
+    std::string vis_file_path;
+    uint32_t max_evaluations = 1000;
+    Scalar relative_y_tolerance = 1e-8;
+
+    auto const cli =
+        clara::Arg(log_file_path, "log_file_path")("Location of the optimization log") |
+        clara::Arg(vis_file_path, "vis_file_path")("Location of the visualization file") |
+        clara::Opt(max_evaluations, "max_evaluations")["-n"]["--max-evals"]("Max number of function evaluations") |
+        clara::Opt(relative_y_tolerance, "relative_y_tolerance")["-y"]["--rel-y-tol"]("Relative tolerance for function values (termination condition)");
+    auto const result = cli.parse(clara::Args(argc, argv));
+    if (!result) {
+        std::cerr << "Error in command line: " << result.errorMessage() << std::endl;
+        exit(1);
+    }
+
+    Eigen::Matrix<Scalar, 2, 3> simplex;
+    simplex.col(0) = Vector2s(-2, -1);
+    simplex.col(1) = Vector2s(-1, -1);
+    simplex.col(2) = Vector2s(-2, 0);
+
+    //using Objective = Paraboloid<Vector2<Scalar>>;
+    //Objective objective(dim);
+    using Objective = Rosenbrock<Vector2s>;
+    Objective objective;
+    NelderMead<Objective, 2> optimizer(max_evaluations, relative_y_tolerance);
+    optimizer.optimize(objective, simplex);
+
+    if (!log_file_path.empty()) {
+        json data = optimizer;
+        std::ofstream log_file(log_file_path);
+        log_file << data.dump(4) << '\n';
+    }
+
+    if (!vis_file_path.empty()) {
+        json data = NelderMeadVis<Objective, 2>{optimizer};
+        std::ofstream vis_file(vis_file_path);
+        vis_file << data.dump(4) << '\n';
+    }
+
+    return 0;
+}
--- a/optimization/optimizers/nelder_mead/nelder_mead.h
+++ b/optimization/optimizers/nelder_mead/nelder_mead.h
+#ifndef OPTIMIZATION_NELDER_MEAD_H
+#define OPTIMIZATION_NELDER_MEAD_H
+
+#include "nelder_mead_log.h"
+#include <cmath>
+#include <iostream>
+
+namespace optimization {
+
+//--------------------------------------------------------------------------------------------------
+// TODO record termination condition
+template <typename Objective, int32_t D = -1>
+class NelderMead : public NelderMeadLog<Objective, D> {
+public:
+    // vector in the space we're searching
+    using VectorD = typename NelderMeadLog<Objective, D>::VectorD;
+    // vector in one higher dimension (e.g. to store a real value for each simplex vertex)
+    using VectorN = typename NelderMeadLog<Objective, D>::VectorN;
+    // D x N matrix (each column is a VectorD holding a vertex, each row is a VectorN)
+    using MatrixDN = typename NelderMeadLog<Objective, D>::MatrixDN;
+
+    NelderMead(uint32_t me = 1000, Scalar ry = 1e-8)
+        : max_evaluations_(me), relative_y_tolerance_(ry)
+    {}
+
+    MatrixDN const& simplex_vertices() const { return simplex_vertices_; }
+    VectorN const& simplex_values() const { return simplex_values_; }
+
+    // Each row of simplex is a vertex.
+    VectorD optimize(Objective const& objective, MatrixDN const& simplex);
+
+    VectorD const& min_point() const { return simplex_vertices_[i_lowest_]; }
+    Scalar min_value() const { return simplex_values_[i_lowest_]; }
+
+private:
+    Scalar try_new_point(Objective const& objective, Scalar factor);
+
+    // For fixed size D (and thus N), dim_ and n_vertices_ are redundant.
+    uint32_t dim_;
+    uint32_t n_vertices_; // == dim_ + 1
+    MatrixDN simplex_vertices_;
+    VectorN simplex_values_;
+    VectorD vertex_sum_;
+    // Invariant: simplex_values_[i_lowest] <= simplex_values_[i_second_highest] <= simplex_values_[i_highest]
+    uint32_t i_lowest_;
+    uint32_t i_second_highest_;
+    uint32_t i_highest_;
+
+    uint32_t n_evaluations_;
+    uint32_t max_evaluations_;
+    //Scalar relative_x_tolerance_;
+    Scalar relative_y_tolerance_;
+
+    static constexpr Scalar reflection_coefficient_ = -1.0;
+    static constexpr Scalar expansion_coefficient_ = 2.0;
+    static constexpr Scalar contraction_coefficient_ = 0.5;
+    static constexpr Scalar shrinking_coefficient_ = 0.5;
+    static constexpr Scalar tiny_ = 1e-10;
+};
+
+//..................................................................................................
+template <typename Objective, int32_t D>
+auto NelderMead<Objective, D>::optimize(Objective const& objective, MatrixDN const& simplex) -> VectorD {
+    simplex_vertices_ = simplex;
+    n_vertices_ = simplex_vertices_.cols();
+    dim_ = simplex_vertices_.rows();
+    assert(n_vertices_ == dim_ + 1);
+    simplex_values_.resize(n_vertices_);
+    vertex_sum_.resize(dim_);
+    vertex_sum_ = simplex_vertices_.rowwise().sum();
+
+    // Evaluate the objective at all simplex vertices.
+    for (uint32_t i = 0u; i < n_vertices_; ++i) {
+        objective.eval(simplex_vertices_.col(i), simplex_values_[i]);
+    }
+    n_evaluations_ = n_vertices_;
+
+    while (true) {
+        // Find lowest, highest, and second highest points.
+        i_lowest_ = (simplex_values_[0] < simplex_values_[1]) ? 0 : 1;
+        i_second_highest_ = i_lowest_;
+        i_highest_ = 1 - i_lowest_;
+        for (uint32_t i = 2; i < n_vertices_; ++i) {
+            Scalar const y = simplex_values_[i];
+            if (y < simplex_values_[i_lowest_]) {
+                i_lowest_ = i;
+            }
+            else if (y > simplex_values_[i_highest_]) {
+                i_second_highest_ = i_highest_;
+                i_highest_ = i;
+            }
+            else if (y > simplex_values_[i_second_highest_]) {
+                i_second_highest_ = i;
+            }
+        }
+
+        // Log the simplex.
+        NelderMeadLog<Objective, D>::push_back(simplex_vertices_, simplex_values_);
+
+        // Evaluate termination conditions.
+        Scalar const difference = std::abs(simplex_values_[i_highest_] - simplex_values_[i_lowest_]);
+        Scalar const sum = std::abs(simplex_values_[i_highest_] + simplex_values_[i_lowest_]);
+        Scalar const relative_y_difference = 2 * difference / (sum + tiny_);
+        if (relative_y_difference < relative_y_tolerance_) {
+            std::cout << "Achieved relative tolerance of y: " << relative_y_difference << '\n';
+            return simplex_vertices_.col(i_lowest_);
+        }
+        if (n_evaluations_ > max_evaluations_) {
+            std::cout << "Exceeded max function evaluations.\n";
+            return simplex_vertices_.col(i_lowest_);
+        }
+
+        // Try reflecting the worst point.
+        Scalar const y_1 = try_new_point(objective, reflection_coefficient_);
+
+        // If the new point is the best so far, try going further in the same direction (expansion).
+        if (y_1 <= simplex_values_[i_lowest_]) {
+            try_new_point(objective, expansion_coefficient_);
+            continue;
+        }
+
+        // If it wasn't the best, but it still makes a different point the worst, keep it.
+        if (y_1 < simplex_values_[i_second_highest_]) {
+            continue;
+        }
+
+        // If the reflected point is still the worst, try contracting along one dimension.
+        // Note that we could be contracting from the original point, or the reflected point.
+        Scalar const y_hi = simplex_values_[i_highest_];
+        Scalar const y_2 = try_new_point(objective, contraction_coefficient_);
+
+        // If the contracted point is better than the worst, keep it.
+        if (y_2 < y_hi) {
+            continue;
+        }
+
+        // If the contracted point is even worse, shrink everything.
+        for (uint32_t i = 0; i < n_vertices_; ++i) {
+            if (i != i_lowest_) {
+                simplex_vertices_.col(i) =
+                    shrinking_coefficient_ * (simplex_vertices_.col(i_lowest_) + simplex_vertices_.col(i));
+                objective.eval(simplex_vertices_.col(i), simplex_values_[i]);
+            }
+        }
+        n_evaluations_ += dim_;
+        // The vertex sum must be recomputed from scratch in this case.
+        vertex_sum_ = simplex_vertices_.rowwise().sum();
+    }
+}
+
+//..................................................................................................
+template <typename Objective, int32_t D>
+Scalar NelderMead<Objective, D>::try_new_point(Objective const& objective, Scalar factor) {
+    // Generate a new point by reflecting/expanding/contracting the worst point.
+    Scalar const t1 = (Scalar(1) - factor) / dim_;
+    Scalar const t2 = factor - t1;
+    VectorD const x_new = t1 * vertex_sum_ + t2 * simplex_vertices_.col(i_highest_);
+
+    // Evaluate the new point.
+    Scalar y_new;
+    objective.eval(x_new, y_new);
+    ++n_evaluations_;
+
+    // If the new point is an improvement, keep it.
+    if (y_new < simplex_values_[i_highest_]) {
+        vertex_sum_ += x_new - simplex_vertices_.col(i_highest_);
+        simplex_vertices_.col(i_highest_) = x_new;
+        simplex_values_[i_highest_] = y_new;
+    }
+    return y_new;
+}
+
+}
+
+#endif
--- a/optimization/optimizers/nelder_mead/nelder_mead_log.h
+++ b/optimization/optimizers/nelder_mead/nelder_mead_log.h
+#ifndef OPTIMIZATION_NELDER_MEAD_LOG_H
+#define OPTIMIZATION_NELDER_MEAD_LOG_H
+
+#include "utils/vector.h"
+#include "utils/matrix.h"
+#include "utils/vis_only.h"
+
+#ifdef VISUALIZE
+#include <vector>
+#include "json.hpp"
+#include "utils/eigen_json.h"
+#endif
+
+namespace optimization {
+
+//--------------------------------------------------------------------------------------------------
+template <typename Objective, int32_t D>
+struct NelderMeadState {
+    // If D != -1, N == D + 1. If D == -1, N == -1.
+    static constexpr int32_t N = [D_ = D](){
+        if (D_ < 0) {
+            return D_;
+        } else {
+            return D_ + 1;
+        }
+    }();
+
+    // vector in the space we're searching
+    using VectorD = VectorNs<D>;
+    // vector in one higher dimension (e.g. to store a real value for each simplex vertex)
+    using VectorN = VectorNs<N>;
+    // D x N matrix (each column is a VectorD holding a vertex, each row is a VectorN)
+    // There are two reasons to store vertices as columns. First, it avoids the use of
+    // transposition. Eigen will happily tranpose for you if you assign a row vector to a column
+    // vector, but it will complain if you do something like `c_vec = c_vec2 + r_vec`. Second, Eigen
+    // defaults to column major storage, so this keeps our vertex values contiguous. For logging we
+    // transpose the matrix, so each row is a vertex, since this is more natural to deal with in
+    // numpy/matplotlib.
+    using MatrixDN = MatrixNs<D, N>;
+
+    NelderMeadState(MatrixDN const& s, VectorN const& v)
+        : simplex(s), values(v) {}
+
+    MatrixDN simplex;
+    VectorN values;
+};
+
+//--------------------------------------------------------------------------------------------------
+// This is used as a base class rather than a member so that the empty base class optimization can
+// be applied (the member would take up space even if it is an empty class).
+template <typename Objective, int32_t D>
+struct NelderMeadLog {
+    using VectorD = typename NelderMeadState<Objective, D>::VectorD;
+    using VectorN = typename NelderMeadState<Objective, D>::VectorN;
+    using MatrixDN = typename NelderMeadState<Objective, D>::MatrixDN;
+
+    void reserve(uint32_t n) VIS_ONLY_METHOD;
+    void push_back(MatrixDN const& simplex, VectorN const& values) VIS_ONLY_METHOD;
+    void clear() VIS_ONLY_METHOD;
+
+    #ifdef VISUALIZE
+    std::vector<NelderMeadState<Objective, D>> states;
+    #endif
+};
+
+#ifdef VISUALIZE
+
+//..................................................................................................
+template <typename Objective, int32_t D>
+void NelderMeadLog<Objective, D>::reserve(uint32_t n) {
+    states.reserve(n);
+}
+
+//..................................................................................................
+template <typename Objective, int32_t D>
+void NelderMeadLog<Objective, D>::push_back(
+    MatrixDN const& simplex,
+    VectorN const& values
+) {
+    states.emplace_back(simplex, values);
+}
+
+
+//..................................................................................................
+template <typename Objective, int32_t D>
+void to_json(nlohmann::json& j, NelderMeadState<Objective, D> const& state) {
+    j = nlohmann::json{
+        {"simplex", transpose_for_json(state.simplex)},
+        {"values", state.values}
+    };
+}
+
+//..................................................................................................
+template <typename Objective, int32_t D>
+void to_json(nlohmann::json& j, NelderMeadLog<Objective, D> const& log) {
+    j = nlohmann::json{
+        {"algorithm", "nelder-mead"},
+        {"objective", Objective::name},
+        {"data", log.states}
+    };
+}
+
+#endif
+
+}
+
+#endif
--- a/optimization/optimizers/nelder_mead/nelder_mead_vis.h
+++ b/optimization/optimizers/nelder_mead/nelder_mead_vis.h
+#ifndef OPTIMIZATION_NELDER_MEAD_VIS_H
+#define OPTIMIZATION_NELDER_MEAD_VIS_H
+
+#include <iostream>
+#include "nelder_mead_log.h"
+#include "utils/eigen_json.h"
+
+namespace optimization {
+
+//--------------------------------------------------------------------------------------------------
+template <typename Objective, int32_t D>
+struct NelderMeadVis {
+    NelderMeadLog<Objective, D> const& log;
+};
+
+//..................................................................................................
+template <typename Objective, int32_t D>
+void to_json(nlohmann::json& j, NelderMeadVis<Objective, D> const& vis) {
+    j = nlohmann::json{
+        {"algorithm", "nelder-mead"},
+        {"objective", Objective::name},
+        {"data", {
+            {"polygons", nlohmann::json::array()}
+        }}
+    };
+    for (auto const& state : vis.log.states) {
+        j["data"]["polygons"].push_back(transpose_for_json(state.simplex));
+    }
+}
+
+}
+
+#endif
--- a/optimization/utils/eigen_json.h
+++ b/optimization/utils/eigen_json.h
@@ -4,10 +4,29 @@
 #include "json.hpp"
 #include <Eigen/Core>

+namespace optimization {
+
+//--------------------------------------------------------------------------------------------------
+// serialization of matrix transposes
+// I can't get this to work with Eigen's crazy templates inside nlohmann so I'm doing it myself.
+template <typename T, int32_t N1, int32_t N2>
+struct EigenMatrixTranspose {
+    Eigen::Matrix<T, N1, N2> const& matrix;
+};
+
+//--------------------------------------------------------------------------------------------------
+template <typename T, int32_t N1, int32_t N2>
+EigenMatrixTranspose<T, N1, N2> transpose_for_json(Eigen::Matrix<T, N1, N2> const& matrix) {
+    return EigenMatrixTranspose<T, N1, N2>{matrix};
+}
+
+}
+
 namespace nlohmann {

 //--------------------------------------------------------------------------------------------------
 // serialization of vectors
+// Note: for some reason nlohmann::json can't figure out what's going on if I try to use DenseBase.
 template <typename T, int32_t N>
 struct adl_serializer<Eigen::Matrix<T, N, 1>> {
    static void to_json(json& j, Eigen::Matrix<T, N, 1> const& vector) {
@@ -48,6 +67,21 @@ struct adl_serializer<Eigen::Matrix<T, N1, N2>> {
    */
 };

+//--------------------------------------------------------------------------------------------------
+// serialization of matrix tranposes
+template <typename T, int32_t N1, int32_t N2>
+struct adl_serializer<optimization::EigenMatrixTranspose<T, N1, N2>> {
+    static void to_json(json& j, optimization::EigenMatrixTranspose<T, N1, N2> const& matrix) {
+        for (uint32_t row = 0; row < matrix.matrix.cols(); ++row) {
+            json row_json;
+            for (uint32_t col = 0; col < matrix.matrix.rows(); ++col) {
+                row_json.push_back(matrix.matrix(col, row));
+            }
+            j.push_back(row_json);
+        }
+    }
+};
+
 }

 #endif