Update Nelder-Mead logging

98c08dcd · Erik Strand · af24d2d9 · 98c08dcd · 98c08dcd · 98c08dcd
Commit 98c08dcd authored 5 years ago by Erik Strand
--- a/optimization/optimizers/nelder_mead/CMakeLists.txt
+++ b/optimization/optimizers/nelder_mead/CMakeLists.txt
@@ -3,7 +3,5 @@ add_executable(nelder_mead
 )
 target_link_libraries(nelder_mead optimization_lib clara)
-if (VISUALIZE)
 make_plot_target(nelder_mead 2d ARGS -d 2)
 make_plot_target(nelder_mead 10d ARGS -d 10 -n 10000)
-endif()
--- a/optimization/optimizers/nelder_mead/nelder_mead_log.h
+++ b/optimization/optimizers/nelder_mead/nelder_mead_log.h
-#ifndef OPTIMIZATION_NELDER_MEAD_LOG_H
+#ifndef OPTIMIZATION_NELDER_MEAD_LOGS_EVERYTHING_H
-#define OPTIMIZATION_NELDER_MEAD_LOG_H
+#define OPTIMIZATION_NELDER_MEAD_LOGS_EVERYTHING_H
-#include "utils/vector.h"
-#include "utils/matrix.h"
-#include "utils/vis_only.h"
-#ifdef VISUALIZE
-#include <vector>
 #include "json.hpp"
+#include "../nelder_mead.h"
 #include "utils/eigen_json.h"
-#endif
+#include <string>
+#include <vector>
 namespace optimization {
 //--------------------------------------------------------------------------------------------------
 template <int32_t D>
 struct NelderMeadState {
-    // If D != -1, N == D + 1. If D == -1, N == -1.
+    typename NelderMead<D>::MatrixDN simplex;
-    static constexpr int32_t N = [D_ = D](){
+    typename NelderMead<D>::VectorN values;
-        if (D_ < 0) {
-            return D_;
-        } else {
-            return D_ + 1;
-        }
-    }();
-    // vector in the space we're searching
-    using VectorD = VectorNs<D>;
-    // vector in one higher dimension (e.g. to store a real value for each simplex vertex)
-    using VectorN = VectorNs<N>;
-    // D x N matrix (each column is a VectorD holding a vertex, each row is a VectorN)
-    // There are two reasons to store vertices as columns. First, it avoids the use of
-    // transposition. Eigen will happily tranpose for you if you assign a row vector to a column
-    // vector, but it will complain if you do something like `c_vec = c_vec2 + r_vec`. Second, Eigen
-    // defaults to column major storage, so this keeps our vertex values contiguous. For logging we
-    // transpose the matrix, so each row is a vertex, since this is more natural to deal with in
-    // numpy/matplotlib.
-    using MatrixDN = MatrixNs<D, N>;
-    NelderMeadState(MatrixDN const& s, VectorN const& v)
-        : simplex(s), values(v) {}
-    MatrixDN simplex;
-    VectorN values;
 };
 //--------------------------------------------------------------------------------------------------
-// This is used as a base class rather than a member so that the empty base class optimization can
-// be applied (the member would take up space even if it is an empty class).
 template <int32_t D>
-struct NelderMeadLog {
+struct NelderMeadLogEverything {
-    using VectorD = typename NelderMeadState<D>::VectorD;
-    using VectorN = typename NelderMeadState<D>::VectorN;
-    using MatrixDN = typename NelderMeadState<D>::MatrixDN;
-    void reserve(uint32_t n) VIS_ONLY_METHOD;
    template <typename Objective>
-    void initialize(Objective const&) VIS_ONLY_METHOD;
+    void initialize(Objective const&) { objective_name = Objective::name; }
-    void push_back(MatrixDN const& simplex, VectorN const& values) VIS_ONLY_METHOD;
-    void clear() VIS_ONLY_METHOD;
+    void push_back(
+        typename NelderMead<D>::MatrixDN const& simplex,
+        typename NelderMead<D>::VectorN const& values
+    ) {
+        states.push_back({simplex, values});
+    }
+    void clear();
-    #ifdef VISUALIZE
    std::string objective_name;
    std::vector<NelderMeadState<D>> states;
-    #endif
 };
-#ifdef VISUALIZE
-//..................................................................................................
-template <int32_t D>
-void NelderMeadLog<D>::reserve(uint32_t n) {
-    states.reserve(n);
-}
-//..................................................................................................
-template <int32_t D>
-template <typename Objective>
-void NelderMeadLog<D>::initialize(Objective const&) {
-    objective_name = Objective::name;
-}
 //..................................................................................................
 template <int32_t D>
-void NelderMeadLog<D>::push_back(
+void NelderMeadLogEverything<D>::clear() {
-    MatrixDN const& simplex,
+    objective_name.clear();
-    VectorN const& values
+    states.clear();
-) {
-    states.emplace_back(simplex, values);
 }
 //--------------------------------------------------------------------------------------------------
 template <int32_t D>
 void to_json(nlohmann::json& j, NelderMeadState<D> const& state) {
@@ -102,16 +53,14 @@ void to_json(nlohmann::json& j, NelderMeadState<D> const& state) {
 //--------------------------------------------------------------------------------------------------
 template <int32_t D>
-void to_json(nlohmann::json& j, NelderMeadLog<D> const& log) {
+void to_json(nlohmann::json& j, NelderMeadLogEverything<D> const& log) {
    j = nlohmann::json{
-        {"algorithm", "nelder-mead"},
+        {"algorithm", "gradient descent"},
        {"objective", log.objective_name},
        {"data", log.states}
    };
 }
-#endif
 }
 #endif
--- a/optimization/optimizers/nelder_mead/nelder_mead_vis.h
+++ b/optimization/optimizers/nelder_mead/nelder_mead_vis.h
 #ifndef OPTIMIZATION_NELDER_MEAD_VIS_H
 #define OPTIMIZATION_NELDER_MEAD_VIS_H
-#include <iostream>
+#include "everything.h"
-#include "nelder_mead_log.h"
-#include "utils/eigen_json.h"
 namespace optimization {
 //--------------------------------------------------------------------------------------------------
 template <int32_t D>
 struct NelderMeadVis {
-    NelderMeadLog<D> const& log;
+    NelderMeadLogEverything<D> const& log;
 };
 //..................................................................................................

--- a/optimization/optimizers/nelder_mead/logs/nothing.h
+++ b/optimization/optimizers/nelder_mead/logs/nothing.h
+#ifndef OPTIMIZATION_NELDER_MEAD_LOGS_NOTHING_H
+#define OPTIMIZATION_NELDER_MEAD_LOGS_NOTHING_H
+#include "utils/vector.h"
+#include "utils/matrix.h"
+namespace optimization {
+//--------------------------------------------------------------------------------------------------
+struct NelderMeadLogNothing {
+    template <typename Objective>
+    void initialize(Objective const&) {}
+    template <int32_t D, int32_t N>
+    void push_back(MatrixNs<D, N> const&, VectorNs<N> const&) {}
+};
+}
+#endif
--- a/optimization/optimizers/nelder_mead/main.cpp
+++ b/optimization/optimizers/nelder_mead/main.cpp
 #include "clara.hpp"
+#include "logs/everything_vis.h"
 #include "nelder_mead.h"
 #include "objectives/paraboloid.h"
 #include "objectives/rosenbrock.h"
 #include <iostream>
-#ifdef VISUALIZE
-#include "nelder_mead_vis.h"
-#include "utils/eigen_json.h"
 #include <fstream>
-using json = nlohmann::json;
-#endif
 using namespace optimization;
+using json = nlohmann::json;
 //--------------------------------------------------------------------------------------------------
 int main(int const argc, char const** argv) {
@@ -49,23 +44,29 @@ int main(int const argc, char const** argv) {
    objective.dim() = dim;
    NelderMead<-1> optimizer(max_evaluations, relative_y_tolerance);
-    VectorXs minimum = optimizer.optimize(objective, simplex);
+    NelderMeadLogEverything<-1> log;
+    // Only log stuff if we're going to use it.
+    if (log_file_path.empty() && vis_file_path.empty()) {
+        optimizer.optimize(objective, simplex);
+    } else {
+        optimizer.optimize(objective, simplex, log);
+    }
    std::cout << "n evaluations: " << optimizer.n_evaluations() << '\n';
-    std::cout << "final point: " << minimum << '\n';
+    std::cout << "final point: " << optimizer.point() << '\n';
-    #ifdef VISUALIZE
    if (!log_file_path.empty()) {
-        json data = optimizer;
+        json data = log;
        std::ofstream log_file(log_file_path);
        log_file << data.dump(4) << '\n';
    }
    if (!vis_file_path.empty()) {
-        json data = NelderMeadVis<-1>{optimizer};
+        json data = NelderMeadVis<-1>{log};
        std::ofstream vis_file(vis_file_path);
        vis_file << data.dump(4) << '\n';
    }
-    #endif
    return 0;
 }
--- a/optimization/optimizers/nelder_mead/nelder_mead.h
+++ b/optimization/optimizers/nelder_mead/nelder_mead.h
 #ifndef OPTIMIZATION_NELDER_MEAD_H
 #define OPTIMIZATION_NELDER_MEAD_H
-#include "nelder_mead_log.h"
+#include "logs/nothing.h"
 #include <cmath>
 #include <iostream>
@@ -10,54 +10,90 @@ namespace optimization {
 //--------------------------------------------------------------------------------------------------
 // TODO record termination condition
 template <int32_t D = -1>
-class NelderMead : public NelderMeadLog<D> {
+class NelderMead {
 public:
+    // If D != -1, N == D + 1. If D == -1, N == -1.
+    static constexpr int32_t N = [D_ = D](){
+        if (D_ < 0) {
+            return D_;
+        } else {
+            return D_ + 1;
+        }
+    }();
    // vector in the space we're searching
-    using VectorD = typename NelderMeadLog<D>::VectorD;
+    using VectorD = VectorNs<D>;
    // vector in one higher dimension (e.g. to store a real value for each simplex vertex)
-    using VectorN = typename NelderMeadLog<D>::VectorN;
+    using VectorN = VectorNs<N>;
    // D x N matrix (each column is a VectorD holding a vertex, each row is a VectorN)
-    using MatrixDN = typename NelderMeadLog<D>::MatrixDN;
+    // There are two reasons to store vertices as columns. First, it avoids the use of
+    // transposition. Eigen will happily tranpose for you if you assign a row vector to a column
+    // vector, but it will complain if you do something like `c_vec = c_vec2 + r_vec`. Second, Eigen
+    // defaults to column major storage, so this keeps our vertex values contiguous. For logging we
+    // transpose the matrix, so each row is a vertex, since this is more natural to deal with in
+    // numpy/matplotlib.
+    using MatrixDN = MatrixNs<D, N>;
+    // Constructor
    NelderMead(uint32_t me = 1000, Scalar ry = 1e-8)
        : max_evaluations_(me), relative_y_tolerance_(ry)
    {}
    uint32_t n_evaluations() const { return n_evaluations_; }
    MatrixDN const& simplex_vertices() const { return simplex_vertices_; }
    VectorN const& simplex_values() const { return simplex_values_; }
+    //VectorD const& min_point() const { return simplex_vertices_[i_lowest_]; }
+    decltype(auto) point() const {
+        return simplex_vertices_.col(i_lowest_);
+    }
+    Scalar value() const { return simplex_values_[i_lowest_]; }
-    // Constructs a simplex from an initial point by offset all coordinate values.
+    // Constructs a simplex from an initial point by offsetting all coordinate values.
    template <typename Objective>
-    VectorD optimize(Objective& objective, VectorD const& initial_point, Scalar offset = 1);
+    VectorD optimize(
-    // Each row of simplex is a vertex.
+        Objective& objective,
+        VectorD const& initial_point,
+        Scalar offset = 1
+    );
+    template <typename Objective, typename Log>
+    VectorD optimize(
+        Objective& objective,
+        VectorD const& initial_point,
+        Scalar offset,
+        Log& log
+    );
+    // Each column of simplex is a vertex.
    template <typename Objective>
    VectorD optimize(Objective& objective, MatrixDN const& simplex);
+    template <typename Objective, typename Log>
-    VectorD const& min_point() const { return simplex_vertices_[i_lowest_]; }
+    VectorD optimize(Objective& objective, MatrixDN const& simplex, Log& log);
-    Scalar min_value() const { return simplex_values_[i_lowest_]; }
 private:
    template <typename Objective>
    Scalar try_new_point(Objective& objective, Scalar factor);
+    // hyperparameters
+    uint32_t max_evaluations_;
+    //Scalar relative_x_tolerance_;
+    Scalar relative_y_tolerance_;
+    // algorithm state
    // For fixed size D (and thus N), dim_ and n_vertices_ are redundant.
-    uint32_t dim_;
+    uint32_t dim_;              // we are searching in R^dim_
-    uint32_t n_vertices_; // == dim_ + 1
+    uint32_t n_vertices_;       // the simplex has n_vertices_ == dim_ + 1 different vertices
-    MatrixDN simplex_vertices_;
+    uint32_t n_evaluations_;    // number of times we've evaluated the objective
-    VectorN simplex_values_;
+    MatrixDN simplex_vertices_; // the simplex itself (each column is a vertex)
-    VectorD vertex_sum_;
+    VectorN simplex_values_;    // the function values at the vertices
-    // Invariant: simplex_values_[i_lowest] <= simplex_values_[i_second_highest] <= simplex_values_[i_highest]
+    VectorD vertex_sum_;        // the sum of all the vertices
+    VectorD x_new_;             // used for trial points
+    // Invariant:
+    // simplex_values_[i_lowest] <= simplex_values_[i_second_highest] <= simplex_values_[i_highest]
    uint32_t i_lowest_;
    uint32_t i_second_highest_;
    uint32_t i_highest_;
-    uint32_t n_evaluations_;
+    // algorithm constants
-    uint32_t max_evaluations_;
-    //Scalar relative_x_tolerance_;
-    Scalar relative_y_tolerance_;
    static constexpr Scalar reflection_coefficient_ = -1.0;
    static constexpr Scalar expansion_coefficient_ = 2.0;
    static constexpr Scalar contraction_coefficient_ = 0.5;
@@ -72,6 +108,19 @@ auto NelderMead<D>::optimize(
    Objective& objective,
    VectorD const& initial_point,
    Scalar offset
+) -> VectorD {
+    NelderMeadLogNothing log;
+    return optimize(objective, initial_point, offset, log);
+}
+//..................................................................................................
+template <int32_t D>
+template <typename Objective, typename Log>
+auto NelderMead<D>::optimize(
+    Objective& objective,
+    VectorD const& initial_point,
+    Scalar offset,
+    Log& log
 ) -> VectorD {
    MatrixDN simplex;
    simplex.resize(initial_point.size(), initial_point.size() + 1);
@@ -80,20 +129,29 @@ auto NelderMead<D>::optimize(
        simplex.col(i + 1) = initial_point;
        simplex(i, i + 1) += offset;
    }
-    return optimize(objective, simplex);
+    return optimize(objective, simplex, log);
 }
 //..................................................................................................
 template <int32_t D>
 template <typename Objective>
 auto NelderMead<D>::optimize(Objective& objective, MatrixDN const& simplex) -> VectorD {
+    NelderMeadLogNothing log;
+    return optimize(objective, simplex, log);
+}
+//..................................................................................................
+template <int32_t D>
+template <typename Objective, typename Log>
+auto NelderMead<D>::optimize(Objective& objective, MatrixDN const& simplex, Log& log) -> VectorD {
+    // initialize state
    simplex_vertices_ = simplex;
    n_vertices_ = simplex_vertices_.cols();
    dim_ = simplex_vertices_.rows();
    assert(n_vertices_ == dim_ + 1);
    simplex_values_.resize(n_vertices_);
-    vertex_sum_.resize(dim_);
    vertex_sum_ = simplex_vertices_.rowwise().sum();
+    log.initialize(objective);
    // Evaluate the objective at all simplex vertices.
    for (uint32_t i = 0u; i < n_vertices_; ++i) {
@@ -101,8 +159,6 @@ auto NelderMead<D>::optimize(Objective& objective, MatrixDN const& simplex) -> V
    }
    n_evaluations_ = n_vertices_;
-    NelderMeadLog<D>::initialize(objective);
    while (true) {
        // Find lowest, highest, and second highest points.
        i_lowest_ = (simplex_values_[0] < simplex_values_[1]) ? 0 : 1;
@@ -123,7 +179,7 @@ auto NelderMead<D>::optimize(Objective& objective, MatrixDN const& simplex) -> V
        }
        // Log the simplex.
-        NelderMeadLog<D>::push_back(simplex_vertices_, simplex_values_);
+        log.push_back(simplex_vertices_, simplex_values_);
        // Evaluate termination conditions.
        Scalar const difference = std::abs(simplex_values_[i_highest_] - simplex_values_[i_lowest_]);