diff --git a/optimization/optimizers/cma_es/cma_es.h b/optimization/optimizers/cma_es/cma_es.h index 7cdfa004ab4a144415a2b7b1181fc48111f06275..4994c35477c81c4204087e33ac7a995eb3c14b33 100644 --- a/optimization/optimizers/cma_es/cma_es.h +++ b/optimization/optimizers/cma_es/cma_es.h @@ -35,6 +35,7 @@ public: inline Eigen::Map<const Eigen::MatrixXd> point(); Scalar value() { return cmaes_Get(&cma_es_, "fbestever"); } + // TODO: Support passing initial point by rvalue reference. template <typename Objective> Eigen::Map<const Eigen::MatrixXd> optimize( Objective& objective, diff --git a/optimization/optimizers/conjugate_gradient_descent/conjugate_gradient_descent.h b/optimization/optimizers/conjugate_gradient_descent/conjugate_gradient_descent.h index 1dcb6b97eaa0811165926a2c1b8c678a113e2e29..85fe5be55a12333c7949a187cf83fc91f9a3973a 100644 --- a/optimization/optimizers/conjugate_gradient_descent/conjugate_gradient_descent.h +++ b/optimization/optimizers/conjugate_gradient_descent/conjugate_gradient_descent.h @@ -3,7 +3,6 @@ #include "logs/nothing.h" #include "optimizers/line_search/brent.h" -#include "optimizers/line_search/line_objective.h" #include <iostream> @@ -33,12 +32,37 @@ public: VectorNs<N> const& gradient() const { return gradient_; } Scalar value() const { return value_; } + // There are four cases for optimize, since you can use a log or not, and can pass an rvalue or + // not. These methods ultimately all call optimize_impl to do the real work. + // Note: I explicitly write out the Vector const& and Vector&& cases rather than passing by + // value and moving since Eigen forbids passing by value: + // https://eigen.tuxfamily.org/dox/group__TopicPassingByValue.html template <typename Objective> - Vector const& optimize(Objective& objective, Vector const& initial_point); + Vector const& optimize(Objective& objective, Vector const& initial_point) { + ConjugateGradientDescentLogNothing log; + return optimize(objective, initial_point, log); + } + template <typename Objective, typename Log> + Vector const& optimize(Objective& objective, Vector const& initial_point, Log& log) { + point_ = initial_point; + return optimize_impl(objective, log); + } + template <typename Objective> + Vector const& optimize(Objective& objective, Vector&& initial_point) { + ConjugateGradientDescentLogNothing log; + return optimize(objective, std::move(initial_point), log); + } template <typename Objective, typename Log> - Vector const& optimize(Objective& objective, Vector const& initial_point, Log& log); + Vector const& optimize(Objective& objective, Vector&& initial_point, Log& log) { + point_ = std::move(initial_point); + return optimize_impl(objective, log); + } private: + // Actually does the optimization, once point_ has been initialized. + template <typename Objective, typename Log> + Vector const& optimize_impl(Objective& objective, Log& log); + // hyperparameters Scalar gradient_threshold_; Scalar progress_threshold_; @@ -48,49 +72,40 @@ private: // algorithm state uint32_t n_evaluations_; uint32_t n_iterations_; - // This object holds the current point (x0) and search direction (dir). - // TODO allow this to live here, somehow - //LineObjective<Objective, N> line_objective_; - VectorNs<N> point_; - VectorNs<N> direction_; - VectorNs<N> new_point_; + Vector point_; // where we are currently + Vector direction_; // the direction we're searching + Vector gradient_; // the gradient at point_ + // This variable sees double duty. When we're line searching, it holds the candidate points. + // Between evaluating the function at the line minimum and computing the next search direction, + // it stores the gradient at the point of the previous iteration. + Vector new_point_or_last_gradient_; Scalar alpha_; // stores the most recent jump distance, which we use as a guess for the next one Scalar value_; Scalar last_value_; - Vector gradient_; - Vector last_gradient_; // algorithm constants static constexpr Scalar tiny_ = std::numeric_limits<Scalar>::epsilon(); }; -//.................................................................................................. -template <int32_t N> -template <typename Objective> -VectorNs<N> const& ConjugateGradientDescent<N>::optimize( - Objective& objective, - Vector const& initial_point -) { - ConjugateGradientDescentLogNothing log; - return optimize(objective, initial_point, log); -} - //.................................................................................................. template <int32_t N> template <typename Objective, typename Log> -VectorNs<N> const& ConjugateGradientDescent<N>::optimize( +VectorNs<N> const& ConjugateGradientDescent<N>::optimize_impl( Objective& objective, - Vector const& initial_point, Log& log ) { n_evaluations_ = 0; n_iterations_ = 0; - point_ = initial_point; - alpha_ = -1; + // point_ has already been initialized + direction_.resize(point_.size()); gradient_.resize(point_.size()); - last_gradient_.resize(point_.size()); + new_point_or_last_gradient_.resize(point_.size()); + alpha_ = -1; - LineObjective<Objective, N> line_objective{objective, point_, direction_, new_point_}; + auto const line_objective = [&](Scalar t, Scalar& value) { + new_point_or_last_gradient_ = point_ + t * direction_; + objective(new_point_or_last_gradient_, value); + }; BracketFinder bracket; Brent line_minimizer; @@ -106,19 +121,23 @@ VectorNs<N> const& ConjugateGradientDescent<N>::optimize( } while (true) { - // Find the minimum along this direction. + // Find the minimum along this direction. The next two lines are the only ones that use + // new_point_or_last_gradient_ as new_point_ (happens inside the lambda line_objective). bracket.bracket(line_objective, Scalar(0), alpha_); alpha_ = line_minimizer.optimize(line_objective, bracket).point; n_evaluations_ += bracket.n_evaluations(); n_evaluations_ += line_minimizer.n_evaluations(); - // Note: at some point new_point_ already had this new value, but right now there's no - // guarantee that it's the current value. - point_ += alpha_ * direction_; - // Can we re-use evaluation here? Would need to rewrite line search to have guarantees on - // which point it evaluated last. Probably best to do this after I've written dbrent. last_value_ = value_; - gradient_.swap(last_gradient_); + // Basically this line means new_point_or_last_gradient_ = std::move(gradient_), but I want + // to be clear that we're not releasing any memory here. For the rest of the loop, + // new_point_or_last_gradient_ stores the last gradient. + new_point_or_last_gradient_.swap(gradient_); + // Again, we evaluted the objective here already during the line search, but we probably + // threw that data out. + // Note: at some point new_point_or_last_gradient_ already had this new value, but right now + // there's no guarantee that it's the current value. + point_ += alpha_ * direction_; objective(point_, value_, gradient_); ++n_iterations_; @@ -149,10 +168,11 @@ VectorNs<N> const& ConjugateGradientDescent<N>::optimize( return point_; } - // Choose the next search direction. It is conjugate to all prior directions. - // I view this as the start of the next iteration. - Scalar const gamma = - gradient_.dot(gradient_ - last_gradient_) / last_gradient_.dot(last_gradient_); + // Choose the next search direction. It is conjugate to all prior directions. Recall that + // new_point_or_last_gradient_ currently stores the last gradient. I view this as the start + // of the next iteration. + Scalar const gamma = gradient_.dot(gradient_ - new_point_or_last_gradient_) + / new_point_or_last_gradient_.dot(new_point_or_last_gradient_); direction_ = gradient_ + gamma * direction_; } } diff --git a/optimization/optimizers/conjugate_gradient_descent/main.cpp b/optimization/optimizers/conjugate_gradient_descent/main.cpp index 73276d594679f83d185a789988f26f8f7121a711..df00eca0c9bf0336f4dc01d2e0628cef453dfaa4 100644 --- a/optimization/optimizers/conjugate_gradient_descent/main.cpp +++ b/optimization/optimizers/conjugate_gradient_descent/main.cpp @@ -60,9 +60,9 @@ int main(int const argc, char const** argv) { // Only log stuff if we're going to use it. if (log_file_path.empty() && vis_file_path.empty()) { - optimizer.optimize(objective, initial_point); + optimizer.optimize(objective, std::move(initial_point)); } else { - optimizer.optimize(objective, initial_point, log); + optimizer.optimize(objective, std::move(initial_point), log); } std::cout << "n evaluations: " << optimizer.n_evaluations() << '\n'; diff --git a/optimization/optimizers/gradient_descent/gradient_descent.h b/optimization/optimizers/gradient_descent/gradient_descent.h index e8a7bf680c24a7533b6b509a330717b4291f1598..77968c353c04f49fb3766e784ea5dab513ef1ca6 100644 --- a/optimization/optimizers/gradient_descent/gradient_descent.h +++ b/optimization/optimizers/gradient_descent/gradient_descent.h @@ -25,6 +25,7 @@ public: VectorNs<N> const& gradient() const { return gradient_; } Scalar value() const { return value_; } + // TODO: Support passing initial point by rvalue reference. template <typename Objective> VectorNs<N> const& optimize(Objective& objective, VectorNs<N> const& initial_point); diff --git a/optimization/optimizers/line_search/line_objective.h b/optimization/optimizers/line_search/line_objective.h deleted file mode 100644 index 7a7bb55a4054aa2b82f1679797c3bc76f8726c65..0000000000000000000000000000000000000000 --- a/optimization/optimizers/line_search/line_objective.h +++ /dev/null @@ -1,35 +0,0 @@ -#ifndef OPTIMIZATION_LINE_SEARCH_LINE_OBJECTIVE_H -#define OPTIMIZATION_LINE_SEARCH_LINE_OBJECTIVE_H - -#include "utils/vector.h" - -namespace optimization { - -//-------------------------------------------------------------------------------------------------- -template <typename Objective, int32_t N> -struct LineObjective { -public: - LineObjective(Objective& o, VectorNs<N>& x0, VectorNs<N>& dir, VectorNs<N>& x) : - objective_(o), x0_(x0), dir_(dir), x_(x) - {} - - void operator()(Scalar t, Scalar& value) { - x_ = x0_ + t * dir_; - objective_(x_, value); - } - - void operator()(Scalar t, Scalar& value, VectorNs<N>& gradient) { - x_ = x0_ + t * dir_; - objective_(x_, value, gradient); - } - -private: - Objective& objective_; - VectorNs<N>& x0_; - VectorNs<N>& dir_; - VectorNs<N>& x_; -}; - -} - -#endif diff --git a/optimization/optimizers/nelder_mead/main.cpp b/optimization/optimizers/nelder_mead/main.cpp index 95c1c4355bc8b07950a1b0eae41ffab66b414ca9..30a3983871cd79dc0faf7329ddcbc7266d31a0b6 100644 --- a/optimization/optimizers/nelder_mead/main.cpp +++ b/optimization/optimizers/nelder_mead/main.cpp @@ -46,9 +46,9 @@ int main(int const argc, char const** argv) { // Only log stuff if we're going to use it. if (log_file_path.empty() && vis_file_path.empty()) { - optimizer.optimize(objective, simplex); + optimizer.optimize(objective, std::move(simplex)); } else { - optimizer.optimize(objective, simplex, log); + optimizer.optimize(objective, std::move(simplex), log); } std::cout << "n evaluations: " << optimizer.n_evaluations() << '\n'; diff --git a/optimization/optimizers/nelder_mead/nelder_mead.h b/optimization/optimizers/nelder_mead/nelder_mead.h index 2f0fbcb0acb38d0118149aa80d28834902591e6c..b2ca96bb398da2c49394a932de96c2e90d43943d 100644 --- a/optimization/optimizers/nelder_mead/nelder_mead.h +++ b/optimization/optimizers/nelder_mead/nelder_mead.h @@ -48,14 +48,17 @@ public: } Scalar value() const { return simplex_values_[i_lowest_]; } - // Constructs a simplex from an initial point by offsetting all coordinate values. - // These methods return the best point, as a reference to a column (like point()). + // All optimization methods return the best point, as a reference to a column (like point()). + // These two construct a simplex from an initial point by offsetting all coordinate values. template <typename Objective> decltype(auto) optimize( Objective& objective, VectorD const& initial_point, Scalar offset = 1 - ); + ) { + NelderMeadLogNothing log; + return optimize(objective, initial_point, offset, log); + } template <typename Objective, typename Log> decltype(auto) optimize( Objective& objective, @@ -63,14 +66,34 @@ public: Scalar offset, Log& log ); - - // Each column of simplex is a vertex. + // These four take a full simplex. Each column is a vertex. template <typename Objective> - decltype(auto) optimize(Objective& objective, MatrixDN const& simplex); + decltype(auto) optimize(Objective& objective, MatrixDN&& simplex) { + NelderMeadLogNothing log; + return optimize(objective, std::move(simplex), log); + } template <typename Objective, typename Log> - decltype(auto) optimize(Objective& objective, MatrixDN const& simplex, Log& log); + decltype(auto) optimize(Objective& objective, MatrixDN&& simplex, Log& log) { + simplex_vertices_ = std::move(simplex); + return optimize_impl(objective, log); + } + template <typename Objective> + decltype(auto) optimize(Objective& objective, MatrixDN const& simplex) { + NelderMeadLogNothing log; + return optimize(objective, simplex, log); + } + template <typename Objective, typename Log> + decltype(auto) optimize(Objective& objective, MatrixDN const& simplex, Log& log) { + simplex_vertices_ = simplex; + return optimize_impl(objective, log); + } private: + // Does the optimization assuming simplex_vertices_ has been initialized. + template <typename Objective, typename Log> + decltype(auto) optimize_impl(Objective& objective, Log& log); + + // Helper method for optimize_impl. template <typename Objective> Scalar try_new_point(Objective& objective, Scalar factor); @@ -102,18 +125,6 @@ private: static constexpr Scalar tiny_ = 1e-10; }; -//.................................................................................................. -template <int32_t D> -template <typename Objective> -decltype(auto) NelderMead<D>::optimize( - Objective& objective, - VectorD const& initial_point, - Scalar offset -) { - NelderMeadLogNothing log; - return optimize(objective, initial_point, offset, log); -} - //.................................................................................................. template <int32_t D> template <typename Objective, typename Log> @@ -123,30 +134,20 @@ decltype(auto) NelderMead<D>::optimize( Scalar offset, Log& log ) { - MatrixDN simplex; - simplex.resize(initial_point.size(), initial_point.size() + 1); - simplex.col(0) = initial_point; + simplex_vertices_.resize(initial_point.size(), initial_point.size() + 1); + simplex_vertices_.col(0) = initial_point; for (uint32_t i = 0; i < initial_point.size(); ++i) { - simplex.col(i + 1) = initial_point; - simplex(i, i + 1) += offset; + simplex_vertices_.col(i + 1) = initial_point; + simplex_vertices_(i, i + 1) += offset; } - return optimize(objective, simplex, log); -} - -//.................................................................................................. -template <int32_t D> -template <typename Objective> -decltype(auto) NelderMead<D>::optimize(Objective& objective, MatrixDN const& simplex) { - NelderMeadLogNothing log; - return optimize(objective, simplex, log); + return optimize_impl(objective, log); } //.................................................................................................. template <int32_t D> template <typename Objective, typename Log> -decltype(auto) NelderMead<D>::optimize(Objective& objective, MatrixDN const& simplex, Log& log) { - // initialize state - simplex_vertices_ = simplex; +decltype(auto) NelderMead<D>::optimize_impl(Objective& objective, Log& log) { + // initialize state, assuming simplex_vertices_ is already set n_vertices_ = simplex_vertices_.cols(); dim_ = simplex_vertices_.rows(); assert(n_vertices_ == dim_ + 1);