diff --git a/CMakeLists.txt b/CMakeLists.txt index 9db4d2e772ad4088c9c2e6cbc7ccf07fd7890a0a..ec636dfca541fea1fd6906810dc9b7ff6be09399 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -20,3 +20,4 @@ include(cmake/make_plot_target.cmake) add_subdirectory(external) add_subdirectory(optimization) +add_subdirectory(test) diff --git a/apps/plots.py b/apps/plots.py index d1623308353cb31ffd39927e26197fd2b6117b81..777ffd2566f1dd822a89ce90d6d1a22823dd15fd 100644 --- a/apps/plots.py +++ b/apps/plots.py @@ -20,7 +20,7 @@ class OffsetLogNorm(mpl.colors.Normalize): def rosenbrock_plot(x_min, x_max, y_min, y_max): fig = plt.figure(figsize=(8,7)) - left, bottom, width, height = 0.1, 0.1, 0.8, 0.8 + left, bottom, width, height = 0.1, 0.1, 0.86, 0.8 ax = fig.add_axes([left, bottom, width, height]) ax.set_aspect(1) #ax.set_xlabel('x (cm)') @@ -33,7 +33,7 @@ def rosenbrock_plot(x_min, x_max, y_min, y_max): Z = rosenbrock(X, Y) levels = [1e-2, 1e-1, 1e0, 1e1, 1e2, 1e3, 1e4, 1e5] - ax.contour(X, Y, Z, levels, colors='k') + ax.contour(X, Y, Z, levels, colors='0.5', linewidths=0.5) contour_filled = ax.contourf(X, Y, Z, levels, cmap='plasma', norm=OffsetLogNorm(vmin=Z.min(), vmax=Z.max(), offset = 1) @@ -45,13 +45,13 @@ def rosenbrock_plot(x_min, x_max, y_min, y_max): def add_points(fig, ax, points): x = [p[0] for p in points] y = [p[1] for p in points] - ax.plot(x, y, 'bx') + ax.plot(x, y, 'k.') return fig, ax # points is a list of numpy arrays of dim n x 2 def add_polygons(fig, ax, polygons): for polygon in polygons: - p = mpl.patches.Polygon(polygon, True, fill=False, color="black") + p = mpl.patches.Polygon(polygon, True, fill=False, color="black", zorder=2) ax.add_patch(p) #p = PatchCollection(patches, alpha=0.4) #ax.add_collection(p) @@ -90,7 +90,7 @@ if __name__ == "__main__": data = vis_json["data"] if objective == "rosenbrock": - fig, ax = rosenbrock_plot(-3, 3, -3, 3) + fig, ax = rosenbrock_plot(-2.5, 2.5, -2.5, 2.5) # need to implement this #elif objective == "paraboloid": # fig, ax = paraboloid_plot(-3, 3, -3, 3) diff --git a/optimization/objectives/rosenbrock.h b/optimization/objectives/rosenbrock.h index 7bbdf295fe247322312c947990e5b0ba5d44ae79..318c2bba8aff34473e7113730393c723b1a83a94 100644 --- a/optimization/objectives/rosenbrock.h +++ b/optimization/objectives/rosenbrock.h @@ -16,6 +16,8 @@ public: Rosenbrock() {} + uint32_t dim() const { return 2; } + void eval(Input const& x, Scalar& value) const { Scalar const x_squared = x[0] * x[0]; Scalar const tmp1 = (1 - x[0]); diff --git a/optimization/objectives/samples.h b/optimization/objectives/samples.h index ea249f78b046bf8ab654b0e28720554444ae13d6..f5359a75be910c22d444ee1861cc5c9d33f941f5 100644 --- a/optimization/objectives/samples.h +++ b/optimization/objectives/samples.h @@ -1,23 +1,24 @@ #ifndef OPTIMIZATION_OBJECTIVES_SAMPLES_H #define OPTIMIZATION_OBJECTIVES_SAMPLES_H +#include "utils/scalar.h" + namespace optimization { //-------------------------------------------------------------------------------------------------- -template <typename Objective> +template <typename Vector> struct Sample { - using Input = typename Objective::Input; - Sample() {} - Sample(Input const& p, Scalar v) + Sample(Vector const& p, Scalar v) : point(p), value(v) {} - Input point; + Vector point; Scalar value; }; //-------------------------------------------------------------------------------------------------- +// TODO: Templatize on the underlying data types, not the objective function. template <typename Objective> struct GradientSample { using Input = typename Objective::Input; diff --git a/optimization/optimizers/CMakeLists.txt b/optimization/optimizers/CMakeLists.txt index d308346f865b28ba6b705f272a667ef8f5645278..bd410e1e04c085e98eecb25403a0d81f50545f2f 100644 --- a/optimization/optimizers/CMakeLists.txt +++ b/optimization/optimizers/CMakeLists.txt @@ -1,2 +1,3 @@ +add_subdirectory(conjugate_gradient_descent) add_subdirectory(gradient_descent) add_subdirectory(nelder_mead) diff --git a/optimization/optimizers/conjugate_gradient_descent/CMakeLists.txt b/optimization/optimizers/conjugate_gradient_descent/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..85359f49630c66a8bfada7eefcf23a3d7bed2196 --- /dev/null +++ b/optimization/optimizers/conjugate_gradient_descent/CMakeLists.txt @@ -0,0 +1,7 @@ +if (VISUALIZE) + add_executable(conjugate_gradient_descent + main.cpp + ) + target_link_libraries(conjugate_gradient_descent optimization_lib clara) + make_plot_target(conjugate_gradient_descent) +endif() diff --git a/optimization/optimizers/conjugate_gradient_descent/conjugate_gradient_descent.h b/optimization/optimizers/conjugate_gradient_descent/conjugate_gradient_descent.h new file mode 100644 index 0000000000000000000000000000000000000000..9f390d9e4fb03a6e2f7b26dcbe06b66670fad420 --- /dev/null +++ b/optimization/optimizers/conjugate_gradient_descent/conjugate_gradient_descent.h @@ -0,0 +1,124 @@ +#ifndef OPTIMIZATION_CONJUGATE_GRADIENT_DESCENT_H +#define OPTIMIZATION_CONJUGATE_GRADIENT_DESCENT_H + +#include "conjugate_gradient_descent_log.h" +#include "optimizers/line_search/brent.h" +#include "optimizers/line_search/line_objective.h" + +#include <iostream> + +namespace optimization { + +//-------------------------------------------------------------------------------------------------- +template <int32_t N> +class ConjugateGradientDescent : public ConjugateGradientDescentLog<N> { +public: + using Vector = VectorNs<N>; + + ConjugateGradientDescent(Scalar gt = 1e-8, Scalar pt = 1e-8, uint32_t mi = 1000) + : gradient_threshold_(gt), progress_threshold_(pt), max_iterations_(mi) + {} + + uint32_t n_evaluations() const { return n_evaluations_; } + uint32_t n_iterations() const { return n_iterations_; } + + template <typename Objective> + Vector optimize(Objective& objective, Vector const& initial_point); + +private: + uint32_t n_evaluations_; + uint32_t n_iterations_; + + // termination parameters + Scalar gradient_threshold_; + Scalar progress_threshold_; + uint32_t max_iterations_; + + static constexpr Scalar tiny_ = std::numeric_limits<Scalar>::epsilon(); +}; + +//.................................................................................................. +template <int32_t N> +template <typename Objective> +VectorNs<N> ConjugateGradientDescent<N>::optimize( + Objective& objective, + Vector const& initial_point +) { + n_evaluations_ = 0; + n_iterations_ = 0; + + BracketFinder bracket; + Brent line_minimizer; + + // This object holds the u_i (dir) and the x_i (x0). + LineObjective<Objective, N> line_objective(objective); + line_objective.x0() = initial_point; + + Scalar alpha = -1; + Scalar value, last_value; + Vector gradient, last_gradient; + gradient.resize(objective.dim()); + last_gradient.resize(objective.dim()); + + objective.eval(line_objective.x0(), value, gradient); + ++n_evaluations_; + line_objective.dir() = gradient; + + ConjugateGradientDescentLog<N>::initialize(objective, line_objective.x0(), value, gradient); + + // Return if the gradient is basically zero. + if (gradient.norm() < gradient_threshold_) { + return line_objective.x0(); + } + + while (true) { + // Find the minimum along this direction. + bracket.bracket(line_objective, Scalar(0), alpha); + alpha = line_minimizer.optimize(line_objective, bracket).point; + n_evaluations_ += bracket.n_evaluations(); + n_evaluations_ += line_minimizer.n_evaluations(); + // Note: at some point line_objective.x() already had this new value, but right now there's + // no guarantee that it's the current value. + line_objective.x0() += alpha * line_objective.dir(); + + // Can we re-use evaluation here? Would need to rewrite line search to have guarantees on + // which point it evaluated last. Probably best to do this after I've written dbrent. + last_value = value; + gradient.swap(last_gradient); + objective.eval(line_objective.x0(), value, gradient); + ++n_iterations_; + + ConjugateGradientDescentLog<N>::push_back( + line_objective.dir(), + line_objective.x0(), + value, + gradient + ); + + // Check termination conditions. + if (gradient.norm() < gradient_threshold_) { + std::cout << "reached gradient threshold (" << gradient.norm() << ")\n"; + return line_objective.x0(); + } + if (2 * std::abs(last_value - value) <= + progress_threshold_ * (std::abs(last_value) + std::abs(value) + tiny_) + ) { + std::cout << "reached progress threshold\n"; + return line_objective.x0(); + } + if (n_iterations_ > max_iterations_) { + std::cout << "reached max iterations\n"; + return line_objective.x0(); + } + + // Choose the next search direction. It is conjugate to all prior directions. + // I view this as the start of the next iteration. + Scalar const gamma = + gradient.dot(gradient - last_gradient) / last_gradient.dot(last_gradient); + line_objective.dir() = gradient + gamma * line_objective.dir(); + } +} + +} + +#endif diff --git a/optimization/optimizers/conjugate_gradient_descent/conjugate_gradient_descent_log.h b/optimization/optimizers/conjugate_gradient_descent/conjugate_gradient_descent_log.h new file mode 100644 index 0000000000000000000000000000000000000000..4bfcb0bbb84446782b90874f303defa7381c750d --- /dev/null +++ b/optimization/optimizers/conjugate_gradient_descent/conjugate_gradient_descent_log.h @@ -0,0 +1,113 @@ +#ifndef OPTIMIZATION_CONJUGATE_GRADIENT_DESCENT_LOG_H +#define OPTIMIZATION_CONJUGATE_GRADIENT_DESCENT_LOG_H + +#include "utils/vector.h" +#include "utils/vis_only.h" + +#ifdef VISUALIZE +#include "json.hpp" +#include <string> +#include <vector> +#endif + +namespace optimization { + +//-------------------------------------------------------------------------------------------------- +template <int32_t N> +struct ConjugateGradientDescentState { + VectorNs<N> direction; // the direction we searched to get here (not where we will search next) + VectorNs<N> point; // where we are + VectorNs<N> gradient; // the gradient where we are + Scalar value; // the function value where we are +}; + +//-------------------------------------------------------------------------------------------------- +// This is used as a base class rather than a member so that the empty base class optimization can +// be applied (the member would take up space even if it is an empty class). +template <int32_t N> +struct ConjugateGradientDescentLog { + void reserve(uint32_t n) VIS_ONLY_METHOD; + template <typename Objective> + void initialize( + Objective const&, + VectorNs<N> const& point, + Scalar value, + VectorNs<N> const& gradient + ) VIS_ONLY_METHOD; + void push_back( + VectorNs<N> const& direction, + VectorNs<N> const& point, + Scalar value, + VectorNs<N> const& gradient + ) VIS_ONLY_METHOD; + void clear() VIS_ONLY_METHOD; + + #ifdef VISUALIZE + std::string objective_name; + std::vector<ConjugateGradientDescentState<N>> states; + #endif +}; + +#ifdef VISUALIZE + +//.................................................................................................. +template <int32_t N> +void ConjugateGradientDescentLog<N>::reserve(uint32_t n) { + states.reserve(n); +} + +//.................................................................................................. +template <int32_t N> +template <typename Objective> +void ConjugateGradientDescentLog<N>::initialize( + Objective const&, + VectorNs<N> const& point, + Scalar value, + VectorNs<N> const& gradient +) { + objective_name = Objective::name; + states.emplace_back(); + states.back().direction.resize(N); + states.back().direction.setZero(N); + states.back().point = point; + states.back().gradient = gradient; + states.back().value = value; +} + +//.................................................................................................. +template <int32_t N> +void ConjugateGradientDescentLog<N>::push_back( + VectorNs<N> const& direction, + VectorNs<N> const& point, + Scalar value, + VectorNs<N> const& gradient +) { + states.push_back({direction, point, gradient, value}); +} + +//.................................................................................................. +template <int32_t N> +void to_json(nlohmann::json& j, ConjugateGradientDescentState<N> const& state) { + j = nlohmann::json{ + {"direction", state.direction}, + {"point", state.point}, + {"value", state.value}, + {"gradient", state.gradient} + }; +} + +//.................................................................................................. +template <int32_t N> +void to_json(nlohmann::json& j, ConjugateGradientDescentLog<N> const& log) { + j = nlohmann::json{ + {"algorithm", "conjugate gradient descent"}, + {"objective", log.objective_name}, + {"data", log.states} + }; +} + +#endif + +} + +#endif diff --git a/optimization/optimizers/conjugate_gradient_descent/conjugate_gradient_descent_vis.h b/optimization/optimizers/conjugate_gradient_descent/conjugate_gradient_descent_vis.h new file mode 100644 index 0000000000000000000000000000000000000000..9b7adaeb40d53bbde27112bb448d01a9955c8ad2 --- /dev/null +++ b/optimization/optimizers/conjugate_gradient_descent/conjugate_gradient_descent_vis.h @@ -0,0 +1,31 @@ +#ifndef OPTIMIZATION_CONJUGATE_GRADIENT_DESCENT_VIS_H +#define OPTIMIZATION_CONJUGATE_GRADIENT_DESCENT_VIS_H + +#include "conjugate_gradient_descent_log.h" + +namespace optimization { + +//-------------------------------------------------------------------------------------------------- +template <int32_t N> +struct ConjugateGradientDescentVis { + ConjugateGradientDescentLog<N> const& log; +}; + +//.................................................................................................. +template <int32_t N> +void to_json(nlohmann::json& j, ConjugateGradientDescentVis<N> const& vis) { + j = nlohmann::json{ + {"algorithm", "conjugate gradient descent"}, + {"objective", vis.log.objective_name}, + {"data", { + {"points", nlohmann::json::array()} + }} + }; + for (auto const& state : vis.log.states) { + j["data"]["points"].push_back(state.point); + } +} + +} + +#endif diff --git a/optimization/optimizers/conjugate_gradient_descent/main.cpp b/optimization/optimizers/conjugate_gradient_descent/main.cpp new file mode 100644 index 0000000000000000000000000000000000000000..80074694af97e827cd0df6c3f92bbd11ddd48a15 --- /dev/null +++ b/optimization/optimizers/conjugate_gradient_descent/main.cpp @@ -0,0 +1,62 @@ +#include "clara.hpp" +#include "conjugate_gradient_descent.h" +#include "conjugate_gradient_descent_vis.h" +#include "objectives/paraboloid.h" +#include "objectives/rosenbrock.h" +#include "utils/eigen_json.h" +#include <iostream> +#include <fstream> + +using namespace optimization; +using json = nlohmann::json; + +//-------------------------------------------------------------------------------------------------- +int main(int const argc, char const** argv) { + std::string log_file_path; + std::string vis_file_path; + Scalar gradient_threshold = 1e-8; + Scalar progress_threshold = 1e-8; + uint32_t max_iterations = 1000; + Scalar x0 = -1; + Scalar y0 = 2; + + auto const cli = + clara::Arg(log_file_path, "log_file_path")("Location of the optimization log") | + clara::Arg(vis_file_path, "vis_file_path")("Location of the visualization file") | + clara::Opt(gradient_threshold, "gradient_threshold")["-g"]["--gradient"]("Return if the gradient norm is this small") | + clara::Opt(progress_threshold, "progress_threshold")["-p"]["--progress"]("Return if the difference between two consecutive values is this small") | + clara::Opt(max_iterations, "max_iterations")["-n"]["--max-iterations"]("Maximum number of line minimization cycles") | + clara::Opt(x0, "x0")["-x"]["--x0"]("X coordinate of initial point") | + clara::Opt(y0, "y0")["-y"]["--y0"]("Y coordinate of initial point"); + auto const result = cli.parse(clara::Args(argc, argv)); + if (!result) { + std::cerr << "Error in command line: " << result.errorMessage() << std::endl; + exit(1); + } + + Vector2<Scalar> initial_point = Vector2<Scalar>(x0, y0); + + //using Objective = Paraboloid<Vector2<Scalar>>; + //Objective objective(dim); + using Objective = Rosenbrock<Vector2<Scalar>>; + Objective objective; + ConjugateGradientDescent<2> optimizer(gradient_threshold, progress_threshold, max_iterations); + VectorNs<2> minimum = optimizer.optimize(objective, initial_point); + std::cout << "n iterations: " << optimizer.n_iterations() << '\n'; + std::cout << "n evaluations: " << optimizer.n_evaluations() << '\n'; + std::cout << "final point: " << minimum << '\n'; + + if (!log_file_path.empty()) { + json data = optimizer; + std::ofstream log_file(log_file_path); + log_file << data.dump(4) << '\n'; + } + + if (!vis_file_path.empty()) { + json data = ConjugateGradientDescentVis<2>{optimizer}; + std::ofstream vis_file(vis_file_path); + vis_file << data.dump(4) << '\n'; + } + + return 0; +} diff --git a/optimization/optimizers/gradient_descent/main.cpp b/optimization/optimizers/gradient_descent/main.cpp index 754027a103267882a2671531b4a879a3b27f5be0..a77ae6c9ec666e501399e99cf5b521dbbcdf4e9e 100644 --- a/optimization/optimizers/gradient_descent/main.cpp +++ b/optimization/optimizers/gradient_descent/main.cpp @@ -15,10 +15,10 @@ int main(int const argc, char const** argv) { std::string log_file_path; std::string vis_file_path; uint32_t dim = 2; - uint32_t n_steps = 100; - Scalar learning_rate = 0.001; + uint32_t n_steps = 1000; + Scalar learning_rate = 0.0015; Scalar x0 = -1; - Scalar y0 = 0; + Scalar y0 = 2; auto const cli = clara::Arg(log_file_path, "log_file_path")("Location of the optimization log") | @@ -41,7 +41,9 @@ int main(int const argc, char const** argv) { using Objective = Rosenbrock<Vector2<Scalar>>; Objective objective; GradientDescent<Objective> optimizer(learning_rate, n_steps); - optimizer.optimize(objective, initial_point); + VectorNs<2> minimum = optimizer.optimize(objective, initial_point); + std::cout << "n evaluations: " << n_steps << '\n'; + std::cout << "final point: " << minimum << '\n'; if (!log_file_path.empty()) { json data = optimizer; diff --git a/optimization/optimizers/line_search/bracket.h b/optimization/optimizers/line_search/bracket.h new file mode 100644 index 0000000000000000000000000000000000000000..8d94f05e32c7431e0e03aad28e1bf33ea58d3e90 --- /dev/null +++ b/optimization/optimizers/line_search/bracket.h @@ -0,0 +1,159 @@ +#ifndef OPTIMIZATION_OPTIMIZERS_LINE_SEARCH_BRACKET_H +#define OPTIMIZATION_OPTIMIZERS_LINE_SEARCH_BRACKET_H + +#include "utils/scalar.h" +#include <utility> // std::swap + +namespace optimization { + +//-------------------------------------------------------------------------------------------------- +// TODO: Could be nice for this class to guarantee y_1_ > y_2_ < y_3_ and x_1_ < x_2_ < x_3_ or +// x_1_ > x_2_ > x_3_ (with the "or" evaluted before the "and"). But for now this feels like +// egregious overengineering. +class Bracket { +public: + Bracket() {} + Bracket(Scalar x_1, Scalar x_2, Scalar x_3, Scalar y_1, Scalar y_2, Scalar y_3) + : x_1_(x_1), x_2_(x_2), x_3_(x_3), y_1_(y_1), y_2_(y_2), y_3_(y_3) + {} + Bracket(Bracket const&) = default; + Bracket& operator=(Bracket const&) = default; + + Scalar x_1() const { return x_1_; } + Scalar x_2() const { return x_2_; } + Scalar x_3() const { return x_3_; } + Scalar y_1() const { return y_1_; } + Scalar y_2() const { return y_2_; } + Scalar y_3() const { return y_3_; } + +protected: + Scalar x_1_; + Scalar x_2_; + Scalar x_3_; + Scalar y_1_; + Scalar y_2_; + Scalar y_3_; +}; + +//-------------------------------------------------------------------------------------------------- +// TODO: Should this have a failsafe max number of function evals (for monotonic objectives)? +class BracketFinder : public Bracket { +public: + BracketFinder(): n_evaluations_(0) {} + + uint32_t n_evaluations() const { return n_evaluations_; } + + template <typename Objective> + void bracket(Objective& objective, Scalar x_1, Scalar x_2); + +private: + uint32_t n_evaluations_; + + static constexpr Scalar golden_ratio_ = 1.618034; + static constexpr Scalar max_ratio_ = 1e2; + static constexpr Scalar tiny_ = 1e-10; +}; + +//.................................................................................................. +// TODO: How should this method handle flat functions? +template <typename Objective> +void BracketFinder::bracket(Objective& objective, Scalar x_1, Scalar x_2) { + // Copy in starting values and perform first evaluations. + x_1_ = x_1; + x_2_ = x_2; + objective.eval(x_1_, y_1_); + objective.eval(x_2_, y_2_); + n_evaluations_ = 2; + + // Ensure that y_2_ < y_1_. + assert(y_1_ != y_2_); + if (y_1_ < y_2_) { + std::swap(x_1_, x_2_); + std::swap(y_1_, y_2_); + } + + // First try a golden ratio step. + // From here on out, either x_1_ < x_2_ < x_3_ or x_1_ > x_2_ > x_3_. + x_3_ = x_2_ + golden_ratio_ * (x_2_ - x_1_); + objective.eval(x_3_, y_3_); + ++n_evaluations_; + + // Search until we have a bracket. + // Inside this loop, y_1_ > y_2_ > y_3_. So we don't know if x_1_ -> x_2_ -> x_3_ is going left + // or right, but we do know it's going downhill. + while (y_2_ > y_3_) { + // Use parabolic interpolation to guess the minimum. + // This formula can be derived by constructing the relevant Lagrange polynomials, and + // setting the derivative to zero. This is not the most symmetric form of the answer, but it + // involves no squaring. + Scalar const tmp_1 = (x_2_ - x_3_) * (y_2_ - y_1_); + Scalar const tmp_2 = (x_2_ - x_1_) * (y_2_ - y_3_); + Scalar const tmp_3 = tmp_1 - tmp_2; + Scalar const sign = (tmp_3 >= 0) ? 1 : -1; + Scalar const numerator = sign * (tmp_1 * (x_2_ - x_3_) - tmp_2 * (x_2_ - x_1_)); + Scalar const denominator = 2 * std::max(sign * tmp_3, tiny_); + Scalar x_new = x_2_ - numerator / denominator; + Scalar y_new; + + // Don't step past this point. + Scalar const x_lim = x_2_ + max_ratio_ * (x_3_ - x_2_); + + if ((x_new - x_2_) * (x_3_ - x_new) > Scalar(0)) { + // If the new point between x_2_ and x_3_, try it. + objective.eval(x_new, y_new); + ++n_evaluations_; + + if (y_new < y_3_) { + // x_2_, x_new, x_3_ is a bracket + x_1_ = x_2_; + x_2_ = x_new; + y_1_ = y_2_; + y_2_ = y_new; + return; + } else if (y_2_ < y_new) { + // x_1_, x_2_, x_new is a bracket + x_3_ = x_new; + y_3_ = y_new; + return; + } + } else if ((x_new - x_3_) * (x_lim - x_new) > Scalar(0)) { + // If the new point seems to be downhill and is not super far away, try it. + objective.eval(x_new, y_new); + ++n_evaluations_; + + if (y_new < y_3_) { + // We're still going downhill. Get rid of x_2_ and try a golden ratio step. + Scalar const step = golden_ratio_ * (x_new - x_3_); + x_2_ = x_3_; + x_3_ = x_new; + x_new += step; + y_2_ = y_3_; + y_3_ = y_new; + objective.eval(x_new, y_new); + ++n_evaluations_; + } + } else if ((x_lim - x_3_) * (x_new - x_lim) >= Scalar(0)) { + // If the new point is past x_lim, try x_lim next. + x_new = x_lim; + objective.eval(x_new, y_new); + ++n_evaluations_; + } else { + // If the new point seems to be too far uphill, just do a golden ratio step. + x_new = x_3_ + golden_ratio_ * (x_3_ - x_2_); + objective.eval(x_new, y_new); + ++n_evaluations_; + } + + // Get rid of the highest point. + x_1_ = x_2_; + x_2_ = x_3_; + x_3_ = x_new; + y_1_ = y_2_; + y_2_ = y_3_; + y_3_ = y_new; + } +} + +} + +#endif diff --git a/optimization/optimizers/line_search/brent.h b/optimization/optimizers/line_search/brent.h new file mode 100644 index 0000000000000000000000000000000000000000..26f65d605556789f26064695fee828b6ffbd590c --- /dev/null +++ b/optimization/optimizers/line_search/brent.h @@ -0,0 +1,206 @@ +#ifndef OPTIMIZATION_OPTIMIZERS_LINE_SEARCH_BRENT_H +#define OPTIMIZATION_OPTIMIZERS_LINE_SEARCH_BRENT_H + +#include "bracket.h" +#include "objectives/samples.h" +#include <cmath> +#include <limits> + +#include <iostream> + +namespace optimization { + +//-------------------------------------------------------------------------------------------------- +class Brent { +public: + // Note: + // - the default absolute tolerance is equivalent to no absolute termination condition + // - the default relative tolerance is suitable for double precision floats + Brent(Scalar abs_tol = -1, Scalar rel_tol = 3e-8, uint32_t me = 100) + : abs_tol_(abs_tol), rel_tol_(rel_tol), max_evaluations_(me) + {} + + // Brent's method evaluates the function exactly once per iteration. + uint32_t n_iterations() const { return n_evaluations_; } + uint32_t n_evaluations() const { return n_evaluations_; } + + template <typename Objective> + Sample<Scalar> optimize(Objective& objective, Bracket const& bracket); + +private: + uint32_t n_evaluations_; + + // Goal for absolute width of the bracket. Put a negative number if you want no abs tol. + Scalar abs_tol_; + // Goal for width of bracket relative to central value. Should always have this (and we use its + // absolute value, so setting it negative won't help). + Scalar rel_tol_; + uint32_t max_evaluations_; + + static constexpr Scalar golden_ratio_small_ = 0.3819660; + static constexpr Scalar tiny_ = std::numeric_limits<Scalar>::epsilon() * 1e-3; +}; + +//.................................................................................................. +template <typename Objective> +Sample<Scalar> Brent::optimize(Objective& objective, Bracket const& bracket) { + n_evaluations_ = 0; + + // These two points define our bracket. Invariant: a < b. + Scalar a, b; + if (bracket.x_1() < bracket.x_3()) { + a = bracket.x_1(); + b = bracket.x_3(); + } else { + a = bracket.x_3(); + b = bracket.x_1(); + } + + // These are the points and values of the three best points found so far. Invariants: + // a <= x_1 <= b + // a <= x_2 <= b + // a <= x_3 <= b + // y_1 <= y_2 <= y_3. + Scalar x_1, x_2, x_3; + Scalar y_1, y_2, y_3; + x_1 = bracket.x_2(); + y_1 = bracket.y_2(); + if (bracket.y_1() <= bracket.y_3()) { + x_2 = bracket.x_1(); + y_2 = bracket.y_1(); + x_3 = bracket.x_3(); + y_3 = bracket.y_3(); + } else { + x_2 = bracket.x_3(); + y_2 = bracket.y_3(); + x_3 = bracket.x_1(); + y_3 = bracket.y_1(); + } + + // This variable is used to store the next step (as a displacement from x_1). + Scalar step = 0; + + // When we take parabolic steps, this variable records the last step size. When we take golden + // section steps, this variable records the size of the section of the bracket that we stepped + // into (i.e. what was the larger half of the bracket). + Scalar prev_step = std::abs(x_3 - x_2); + + while (true) { + // Check the absolute termination condition. + if (b - a <= abs_tol_) { + return Sample<Scalar>(x_1, y_1); + } + + // The midpoint of the current bracket. + Scalar const midpoint = 0.5 * (a + b); + + // Note that tol_1 and tol_2 are non-negative. + Scalar const tol_1 = std::abs(rel_tol_ * x_1) + tiny_; + Scalar const tol_2 = 2.0 * tol_1; + + // Check the relative termination condition. + if (std::abs(x_1 - midpoint) <= (tol_2 - 0.5 * (b - a))) { + return Sample<Scalar>(x_1, y_1); + } + + // Try a parabolic fit using x_1, x_2, and x_3. + // The minimum of the parabola is at x_1 + numerator / denominator. + // Note that the denominator is always positive (we put the sign in the numerator). + Scalar const tmp_1 = (x_1 - x_2) * (y_1 - y_3); + Scalar const tmp_2 = (x_1 - x_3) * (y_1 - y_2); + Scalar const sgn = (tmp_2 >= tmp_1) ? 1 : -1; + Scalar const numerator = -sgn * ((x_1 - x_3) * tmp_2 - (x_1 - x_2) * tmp_1); + Scalar const denominator = sgn * 2.0 * (tmp_2 - tmp_1); + + // For us to use this point, we require that the step size is sufficiently small, and + // that the point is within our bracket. The reason we've kept the numerator and + // denominator separate, and made the latter positive, is so we can write these tests in + // forms that work even when the denominator is zero. We end up with three conditions: + // + // 1. The proposed step size is less than half the second to last step size (prev_step). + // | numerator / denominator | < 0.5 * prev_step + // ==> | numerator | < 0.5 * | denominator * prev_step | + // + // 2. The parabolic minimum must be to the right of a + // x_1 + numerator / denominator > a + // ==> x_1 numerator > denominator * (a - x_1) + // + // 3. The parabolic minimum must be to the left of b + // x_1 + numerator / denominator < b + // ==> x_1 numerator < denominator * (b - x_1) + if ( + std::abs(numerator) < 0.5 * std::abs(denominator * prev_step) + && numerator > denominator * (a - x_1) + && numerator < denominator * (b - x_1) + ) { + // Take the parabolic step. + prev_step = step; + step = numerator / denominator; + Scalar const x_new = x_1 + step; + // Make sure we're not stepping too close to a or b. + if (x_new - a < tol_2 || b - x_new < tol_2) { + step = (midpoint - x_1 >= 0) ? tol_1 : -tol_1; + } + } else { + // Take a golden section step. + // This "resets" prev_step, to be the size of the half of the bracket we step into. + prev_step = (x_1 >= midpoint) ? (a - x_1) : (b - x_1); + step = golden_ratio_small_ * prev_step; + } + + // Ensure the step isn't too small. + if (std::abs(step) < tol_1) { + step = (step >= 0) ? tol_1 : -tol_1; + } + + // Take the step and evaluate the result. + Scalar const x_new = x_1 + step; + Scalar y_new; + objective.eval(x_new, y_new); + ++n_evaluations_; + + if (y_new <= y_1) { + // y_new is the best point we've seen so far, so x_1 becomes a bracket bound. + if (x_new >= x_1) { + a = x_1; + } else { + b = x_1; + } + + // Update our three points. + x_3 = x_2; + x_2 = x_1; + x_1 = x_new; + y_3 = y_2; + y_2 = y_1; + y_1 = y_new; + } else { + // x_1 is still the best point, so x_new becomes a bracket bound. + if (x_new < x_1) { + a = x_new; + } else { + b = x_new; + } + + // Update our three points. + if (y_new <= y_2) { + x_3 = x_2; + x_2 = x_new; + y_3 = y_2; + y_2 = y_new; + } else if (y_new <= y_3) { + x_3 = x_new; + y_3 = y_new; + } + } + + // Check failsafe termination condition. + if (n_evaluations_ > max_evaluations_) { + return Sample<Scalar>(x_1, y_1); + } + } +} + +} + +#endif diff --git a/optimization/optimizers/line_search/golden_section.h b/optimization/optimizers/line_search/golden_section.h new file mode 100644 index 0000000000000000000000000000000000000000..17eb70072b20bfbb3ef17da3e5d0bb623f1937d9 --- /dev/null +++ b/optimization/optimizers/line_search/golden_section.h @@ -0,0 +1,105 @@ +#ifndef OPTIMIZATION_OPTIMIZERS_LINE_SEARCH_GOLDEN_SECTION_H +#define OPTIMIZATION_OPTIMIZERS_LINE_SEARCH_GOLDEN_SECTION_H + +#include "bracket.h" +#include "objectives/samples.h" +#include <cmath> +#include <limits> + +namespace optimization { + +//-------------------------------------------------------------------------------------------------- +class GoldenSection { +public: + // Note: + // - the default absolute tolerance is equivalent to no absolute termination condition + // - the default relative tolerance is suitable for double precision floats + GoldenSection(Scalar abs_tol = -1, Scalar rel_tol = 3e-8) + : abs_tol_(abs_tol), rel_tol_(rel_tol), n_evaluations_(0) + {} + + uint32_t n_evaluations() const { return n_evaluations_; } + + template <typename Objective> + Sample<Scalar> optimize(Objective& objective, Bracket const& bracket); + +private: + // Goal for absolute width of the bracket. Put a negative number if you want no abs tol. + Scalar abs_tol_; + // Goal for width of bracket relative to central value. Should always have this. + Scalar rel_tol_; + uint32_t n_evaluations_; + + static constexpr Scalar golden_ratio_big_ = 0.618034; + static constexpr Scalar golden_ratio_small_ = Scalar(1) - golden_ratio_big_; + static constexpr Scalar tiny_ = std::numeric_limits<Scalar>::epsilon() * 1e-3; +}; + +//.................................................................................................. +template <typename Objective> +Sample<Scalar> GoldenSection::optimize(Objective& objective, Bracket const& bracket) { + // Invariants: + // x_0 < x_1 < x_2 < x_3 + // y_1 < f(x_0) and y_1 < f(y_3) + // y_2 < f(x_0) and y_2 < f(y_3) + Scalar x_0, x_1, x_2, x_3; + Scalar y_1, y_2; + + // Copy in the edges of the bracket, ensuring order is respected. + if (bracket.x_1() < bracket.x_3()) { + x_0 = bracket.x_1(); + x_3 = bracket.x_3(); + } else { + x_0 = bracket.x_3(); + x_3 = bracket.x_1(); + } + + // Copy in the middle point of the bracket, and perform the first golden section interpolation. + if (bracket.x_2() - x_0 <= x_3 - bracket.x_2()) { + // If the middle of the bracket is closer to the left edge, interpolate into the right half + // of the bracket. + x_1 = bracket.x_2(); + y_1 = bracket.y_2(); + x_2 = x_1 + golden_ratio_small_ * (x_3 - x_1); + objective.eval(x_2, y_2); + } else { + // If the middle of the bracket is closer to the right edge, interpolate into the left half + // of the bracket. + x_2 = bracket.x_2(); + y_2 = bracket.y_2(); + x_1 = x_2 - golden_ratio_small_ * (x_2 - x_0); + objective.eval(x_1, y_1); + } + n_evaluations_ = 1; + + // Keep interpolating until our bracket is sufficiently tight. + while ( + x_3 - x_0 > abs_tol_ + && x_3 - x_0 > rel_tol_ * (std::abs(x_1) + std::abs(x_2)) + tiny_ + ) { + if (y_2 < y_1) { + // x_1 is our new left edge; interpolate between x_2 and x_3 + x_0 = x_1; + x_1 = x_2; + x_2 = golden_ratio_big_ * x_2 + golden_ratio_small_ * x_3; + y_1 = y_2; + objective.eval(x_2, y_2); + ++n_evaluations_; + } else { + // x_2 is our new left edge; interpolate between x_0 and x_1 + x_3 = x_2; + x_2 = x_1; + x_1 = golden_ratio_small_ * x_0 + golden_ratio_big_ * x_1; + y_2 = y_1; + objective.eval(x_1, y_1); + ++n_evaluations_; + } + } + + // Return the smaller of the two inner points. + return (y_1 < y_2) ? Sample<Scalar>(x_1, y_1) : Sample<Scalar>(x_2, y_2); +} + +} + +#endif diff --git a/optimization/optimizers/line_search/line_objective.h b/optimization/optimizers/line_search/line_objective.h new file mode 100644 index 0000000000000000000000000000000000000000..02c148a1e699ba84a6c3cb97c8824f5a1ded4ecb --- /dev/null +++ b/optimization/optimizers/line_search/line_objective.h @@ -0,0 +1,44 @@ +#ifndef OPTIMIZATION_LINE_SEARCH_LINE_OBJECTIVE_H +#define OPTIMIZATION_LINE_SEARCH_LINE_OBJECTIVE_H + +#include "utils/vector.h" + +namespace optimization { + +//-------------------------------------------------------------------------------------------------- +template <typename Objective, int32_t N> +class LineObjective { +public: + LineObjective(Objective const& o): objective_(o) { + x0_.resize(objective_.dim()); + dir_.resize(objective_.dim()); + x_.resize(objective_.dim()); + } + + VectorNs<N>& x0() { return x0_; } + VectorNs<N> const& x0() const { return x0_; } + VectorNs<N>& dir() { return dir_; } + VectorNs<N> const& dir() const { return dir_; } + VectorNs<N>& x() { return x_; } + VectorNs<N> const& x() const { return x_; } + + void eval(Scalar t, Scalar& value) { + x_ = x0_ + t * dir_; + objective_.eval(x_, value); + } + + void eval(Scalar t, Scalar& value, VectorNs<N>& gradient) { + x_ = x0_ + t * dir_; + objective_.eval(x_, value, gradient); + } + +private: + Objective const& objective_; + VectorNs<N> x0_; + VectorNs<N> dir_; + VectorNs<N> x_; +}; + +} + +#endif diff --git a/optimization/optimizers/nelder_mead/main.cpp b/optimization/optimizers/nelder_mead/main.cpp index 3324ecab812dd9647f61ccd0335edb9f4ae344ed..fe5fe42122c4c7c4c8406f0b478dc2e0c8edb16b 100644 --- a/optimization/optimizers/nelder_mead/main.cpp +++ b/optimization/optimizers/nelder_mead/main.cpp @@ -29,16 +29,18 @@ int main(int const argc, char const** argv) { } Eigen::Matrix<Scalar, 2, 3> simplex; - simplex.col(0) = Vector2s(-2, -1); - simplex.col(1) = Vector2s(-1, -1); - simplex.col(2) = Vector2s(-2, 0); + simplex.col(0) = Vector2s(-1, 2); + simplex.col(1) = Vector2s(-1, 1); + simplex.col(2) = Vector2s(-2, 2); //using Objective = Paraboloid<Vector2<Scalar>>; //Objective objective(dim); using Objective = Rosenbrock<Vector2s>; Objective objective; NelderMead<Objective, 2> optimizer(max_evaluations, relative_y_tolerance); - optimizer.optimize(objective, simplex); + VectorNs<2> minimum = optimizer.optimize(objective, simplex); + std::cout << "n evaluations: " << optimizer.n_evaluations() << '\n'; + std::cout << "final point: " << minimum << '\n'; if (!log_file_path.empty()) { json data = optimizer; diff --git a/optimization/optimizers/nelder_mead/nelder_mead.h b/optimization/optimizers/nelder_mead/nelder_mead.h index b071ff7141d5f74bf1a692d9a2e8a231422a02a8..d60f1ed5ad2f96eb1790fef339fc137af2b6b712 100644 --- a/optimization/optimizers/nelder_mead/nelder_mead.h +++ b/optimization/optimizers/nelder_mead/nelder_mead.h @@ -23,6 +23,8 @@ public: : max_evaluations_(me), relative_y_tolerance_(ry) {} + uint32_t n_evaluations() const { return n_evaluations_; } + MatrixDN const& simplex_vertices() const { return simplex_vertices_; } VectorN const& simplex_values() const { return simplex_values_; } diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..8515a9c50019fd863675fa8e93209919140c2af8 --- /dev/null +++ b/test/CMakeLists.txt @@ -0,0 +1,7 @@ +add_executable(test + main.cpp + optimizers/line_search/bracket.cpp + optimizers/line_search/brent.cpp + optimizers/line_search/golden_section.cpp +) +target_link_libraries(test optimization_lib catch2) diff --git a/test/main.cpp b/test/main.cpp new file mode 100644 index 0000000000000000000000000000000000000000..0c7c351f437f5f43f3bb62beb254a9f1ecbec5a0 --- /dev/null +++ b/test/main.cpp @@ -0,0 +1,2 @@ +#define CATCH_CONFIG_MAIN +#include "catch.hpp" diff --git a/test/optimizers/line_search/bracket.cpp b/test/optimizers/line_search/bracket.cpp new file mode 100644 index 0000000000000000000000000000000000000000..b2dd1ae4604a507eef79374dbbc0b7205b1e5776 --- /dev/null +++ b/test/optimizers/line_search/bracket.cpp @@ -0,0 +1,42 @@ +#include "catch.hpp" +#include "optimizers/line_search/bracket.h" + +using namespace optimization; + +//-------------------------------------------------------------------------------------------------- +TEST_CASE("Bracket", "[Bracket]") { + BracketFinder bracket; + + SECTION("parabola") { + struct Parabola { + void eval(Scalar x, Scalar& y) const { y = x * x; } + }; + Parabola parabola; + + bracket.bracket(parabola, -2, -1); + // Necessary invariant: we have a bracket. + REQUIRE(bracket.y_2() < bracket.y_1()); + REQUIRE(bracket.y_2() < bracket.y_3()); + // Because this is a parabola, it should find the minimum exactly. + REQUIRE(bracket.y_2() < 1e-9); + // The points should have this ordering because we started searching to the left of the min. + REQUIRE(bracket.x_1() < bracket.x_2()); + REQUIRE(bracket.x_2() < bracket.x_3()); + // Here it does a golden ratio step then completes the bracket with interpolation. + // So two initial evaluations, plus two more, for four total. + REQUIRE(bracket.n_evaluations() == 4); + + bracket.bracket(parabola, 20, 22); + // Necessary invariant: we have a bracket. + REQUIRE(bracket.y_2() < bracket.y_1()); + REQUIRE(bracket.y_2() < bracket.y_3()); + // Because this is a parabola, it should find the minimum exactly. + REQUIRE(bracket.y_2() < 1e-9); + // Here we started searching to the right of the min. + REQUIRE(bracket.x_1() > bracket.x_2()); + REQUIRE(bracket.x_2() > bracket.x_3()); + // Here it does a golden ratio step, interpolation step, and golden ratio step. + // So two initial evaluations, plus three more, for five total. + REQUIRE(bracket.n_evaluations() == 5); + } +} diff --git a/test/optimizers/line_search/brent.cpp b/test/optimizers/line_search/brent.cpp new file mode 100644 index 0000000000000000000000000000000000000000..2cca032f38a81a4112af16eabd9239bc5fe7782d --- /dev/null +++ b/test/optimizers/line_search/brent.cpp @@ -0,0 +1,36 @@ +#include "catch.hpp" +#include "optimizers/line_search/brent.h" + +using namespace optimization; + +//-------------------------------------------------------------------------------------------------- +TEST_CASE("Brent", "[Brent]") { + // The 1e-8 sets the absolute tolerance on the width of the bracket. + // There's still a default relative tolerance in effect as well, but these tests don't hit it. + Brent brent(1e-8); + Bracket bracket; + Sample<Scalar> result; + + SECTION("parabola") { + struct Parabola { + void eval(Scalar x, Scalar& y) const { y = x * x; } + }; + Parabola parabola; + + bracket = Bracket(-2, -1, 2, 4, 1, 4); + result = brent.optimize(parabola, bracket); + // The parabola is flat here so y accuracy had better exceed x accuracy. + REQUIRE(std::abs(result.point) < 1e-8); + REQUIRE(std::abs(result.value) < 1e-16); + // Just a sanity check. + REQUIRE(brent.n_evaluations() < 100); + + bracket = Bracket(50, 10, -100, 2500, 100, 10000); + result = brent.optimize(parabola, bracket); + // The parabola is flat here so y accuracy had better exceed x accuracy. + REQUIRE(std::abs(result.point) < 1e-8); + REQUIRE(std::abs(result.value) < 1e-16); + // Just a sanity check. + REQUIRE(brent.n_evaluations() < 100); + } +} diff --git a/test/optimizers/line_search/golden_section.cpp b/test/optimizers/line_search/golden_section.cpp new file mode 100644 index 0000000000000000000000000000000000000000..875c65923cf5d32407eda3f61afa5fcaf46ed942 --- /dev/null +++ b/test/optimizers/line_search/golden_section.cpp @@ -0,0 +1,36 @@ +#include "catch.hpp" +#include "optimizers/line_search/golden_section.h" + +using namespace optimization; + +//-------------------------------------------------------------------------------------------------- +TEST_CASE("GoldenSection", "[GoldenSection]") { + // The 1e-8 sets the absolute tolerance on the width of the bracket. + // There's still a default relative tolerance in effect as well, but these tests don't hit it. + GoldenSection golden_section(1e-8); + Bracket bracket; + Sample<Scalar> result; + + SECTION("parabola") { + struct Parabola { + void eval(Scalar x, Scalar& y) const { y = x * x; } + }; + Parabola parabola; + + bracket = Bracket(-2, -1, 2, 4, 1, 4); + result = golden_section.optimize(parabola, bracket); + // The parabola is flat here so y accuracy had better exceed x accuracy. + REQUIRE(std::abs(result.point) < 1e-8); + REQUIRE(std::abs(result.value) < 1e-16); + // Just a sanity check. + REQUIRE(golden_section.n_evaluations() < 100); + + bracket = Bracket(50, 10, -100, 2500, 100, 10000); + result = golden_section.optimize(parabola, bracket); + // The parabola is flat here so y accuracy had better exceed x accuracy. + REQUIRE(std::abs(result.point) < 1e-8); + REQUIRE(std::abs(result.value) < 1e-16); + // Just a sanity check. + REQUIRE(golden_section.n_evaluations() < 100); + } +}