Skip to content
Snippets Groups Projects
Commit c87a5e0e authored by Erik Strand's avatar Erik Strand
Browse files

Improve memory efficiency

This enables some move semantics for potentially big arguments. I also
removed a redundant vector from the internal state of cgd.
parents cf6c5c38 174ba2da
No related branches found
No related tags found
No related merge requests found
......@@ -35,6 +35,7 @@ public:
inline Eigen::Map<const Eigen::MatrixXd> point();
Scalar value() { return cmaes_Get(&cma_es_, "fbestever"); }
// TODO: Support passing initial point by rvalue reference.
template <typename Objective>
Eigen::Map<const Eigen::MatrixXd> optimize(
Objective& objective,
......
......@@ -3,7 +3,6 @@
#include "logs/nothing.h"
#include "optimizers/line_search/brent.h"
#include "optimizers/line_search/line_objective.h"
#include <iostream>
......@@ -33,12 +32,37 @@ public:
VectorNs<N> const& gradient() const { return gradient_; }
Scalar value() const { return value_; }
// There are four cases for optimize, since you can use a log or not, and can pass an rvalue or
// not. These methods ultimately all call optimize_impl to do the real work.
// Note: I explicitly write out the Vector const& and Vector&& cases rather than passing by
// value and moving since Eigen forbids passing by value:
// https://eigen.tuxfamily.org/dox/group__TopicPassingByValue.html
template <typename Objective>
Vector const& optimize(Objective& objective, Vector const& initial_point);
Vector const& optimize(Objective& objective, Vector const& initial_point) {
ConjugateGradientDescentLogNothing log;
return optimize(objective, initial_point, log);
}
template <typename Objective, typename Log>
Vector const& optimize(Objective& objective, Vector const& initial_point, Log& log) {
point_ = initial_point;
return optimize_impl(objective, log);
}
template <typename Objective>
Vector const& optimize(Objective& objective, Vector&& initial_point) {
ConjugateGradientDescentLogNothing log;
return optimize(objective, std::move(initial_point), log);
}
template <typename Objective, typename Log>
Vector const& optimize(Objective& objective, Vector const& initial_point, Log& log);
Vector const& optimize(Objective& objective, Vector&& initial_point, Log& log) {
point_ = std::move(initial_point);
return optimize_impl(objective, log);
}
private:
// Actually does the optimization, once point_ has been initialized.
template <typename Objective, typename Log>
Vector const& optimize_impl(Objective& objective, Log& log);
// hyperparameters
Scalar gradient_threshold_;
Scalar progress_threshold_;
......@@ -48,49 +72,40 @@ private:
// algorithm state
uint32_t n_evaluations_;
uint32_t n_iterations_;
// This object holds the current point (x0) and search direction (dir).
// TODO allow this to live here, somehow
//LineObjective<Objective, N> line_objective_;
VectorNs<N> point_;
VectorNs<N> direction_;
VectorNs<N> new_point_;
Vector point_; // where we are currently
Vector direction_; // the direction we're searching
Vector gradient_; // the gradient at point_
// This variable sees double duty. When we're line searching, it holds the candidate points.
// Between evaluating the function at the line minimum and computing the next search direction,
// it stores the gradient at the point of the previous iteration.
Vector new_point_or_last_gradient_;
Scalar alpha_; // stores the most recent jump distance, which we use as a guess for the next one
Scalar value_;
Scalar last_value_;
Vector gradient_;
Vector last_gradient_;
// algorithm constants
static constexpr Scalar tiny_ = std::numeric_limits<Scalar>::epsilon();
};
//..................................................................................................
template <int32_t N>
template <typename Objective>
VectorNs<N> const& ConjugateGradientDescent<N>::optimize(
Objective& objective,
Vector const& initial_point
) {
ConjugateGradientDescentLogNothing log;
return optimize(objective, initial_point, log);
}
//..................................................................................................
template <int32_t N>
template <typename Objective, typename Log>
VectorNs<N> const& ConjugateGradientDescent<N>::optimize(
VectorNs<N> const& ConjugateGradientDescent<N>::optimize_impl(
Objective& objective,
Vector const& initial_point,
Log& log
) {
n_evaluations_ = 0;
n_iterations_ = 0;
point_ = initial_point;
alpha_ = -1;
// point_ has already been initialized
direction_.resize(point_.size());
gradient_.resize(point_.size());
last_gradient_.resize(point_.size());
new_point_or_last_gradient_.resize(point_.size());
alpha_ = -1;
LineObjective<Objective, N> line_objective{objective, point_, direction_, new_point_};
auto const line_objective = [&](Scalar t, Scalar& value) {
new_point_or_last_gradient_ = point_ + t * direction_;
objective(new_point_or_last_gradient_, value);
};
BracketFinder bracket;
Brent line_minimizer;
......@@ -106,19 +121,23 @@ VectorNs<N> const& ConjugateGradientDescent<N>::optimize(
}
while (true) {
// Find the minimum along this direction.
// Find the minimum along this direction. The next two lines are the only ones that use
// new_point_or_last_gradient_ as new_point_ (happens inside the lambda line_objective).
bracket.bracket(line_objective, Scalar(0), alpha_);
alpha_ = line_minimizer.optimize(line_objective, bracket).point;
n_evaluations_ += bracket.n_evaluations();
n_evaluations_ += line_minimizer.n_evaluations();
// Note: at some point new_point_ already had this new value, but right now there's no
// guarantee that it's the current value.
point_ += alpha_ * direction_;
// Can we re-use evaluation here? Would need to rewrite line search to have guarantees on
// which point it evaluated last. Probably best to do this after I've written dbrent.
last_value_ = value_;
gradient_.swap(last_gradient_);
// Basically this line means new_point_or_last_gradient_ = std::move(gradient_), but I want
// to be clear that we're not releasing any memory here. For the rest of the loop,
// new_point_or_last_gradient_ stores the last gradient.
new_point_or_last_gradient_.swap(gradient_);
// Again, we evaluted the objective here already during the line search, but we probably
// threw that data out.
// Note: at some point new_point_or_last_gradient_ already had this new value, but right now
// there's no guarantee that it's the current value.
point_ += alpha_ * direction_;
objective(point_, value_, gradient_);
++n_iterations_;
......@@ -149,10 +168,11 @@ VectorNs<N> const& ConjugateGradientDescent<N>::optimize(
return point_;
}
// Choose the next search direction. It is conjugate to all prior directions.
// I view this as the start of the next iteration.
Scalar const gamma =
gradient_.dot(gradient_ - last_gradient_) / last_gradient_.dot(last_gradient_);
// Choose the next search direction. It is conjugate to all prior directions. Recall that
// new_point_or_last_gradient_ currently stores the last gradient. I view this as the start
// of the next iteration.
Scalar const gamma = gradient_.dot(gradient_ - new_point_or_last_gradient_)
/ new_point_or_last_gradient_.dot(new_point_or_last_gradient_);
direction_ = gradient_ + gamma * direction_;
}
}
......
......@@ -60,9 +60,9 @@ int main(int const argc, char const** argv) {
// Only log stuff if we're going to use it.
if (log_file_path.empty() && vis_file_path.empty()) {
optimizer.optimize(objective, initial_point);
optimizer.optimize(objective, std::move(initial_point));
} else {
optimizer.optimize(objective, initial_point, log);
optimizer.optimize(objective, std::move(initial_point), log);
}
std::cout << "n evaluations: " << optimizer.n_evaluations() << '\n';
......
......@@ -25,6 +25,7 @@ public:
VectorNs<N> const& gradient() const { return gradient_; }
Scalar value() const { return value_; }
// TODO: Support passing initial point by rvalue reference.
template <typename Objective>
VectorNs<N> const& optimize(Objective& objective, VectorNs<N> const& initial_point);
......
#ifndef OPTIMIZATION_LINE_SEARCH_LINE_OBJECTIVE_H
#define OPTIMIZATION_LINE_SEARCH_LINE_OBJECTIVE_H
#include "utils/vector.h"
namespace optimization {
//--------------------------------------------------------------------------------------------------
template <typename Objective, int32_t N>
struct LineObjective {
public:
LineObjective(Objective& o, VectorNs<N>& x0, VectorNs<N>& dir, VectorNs<N>& x) :
objective_(o), x0_(x0), dir_(dir), x_(x)
{}
void operator()(Scalar t, Scalar& value) {
x_ = x0_ + t * dir_;
objective_(x_, value);
}
void operator()(Scalar t, Scalar& value, VectorNs<N>& gradient) {
x_ = x0_ + t * dir_;
objective_(x_, value, gradient);
}
private:
Objective& objective_;
VectorNs<N>& x0_;
VectorNs<N>& dir_;
VectorNs<N>& x_;
};
}
#endif
......@@ -46,9 +46,9 @@ int main(int const argc, char const** argv) {
// Only log stuff if we're going to use it.
if (log_file_path.empty() && vis_file_path.empty()) {
optimizer.optimize(objective, simplex);
optimizer.optimize(objective, std::move(simplex));
} else {
optimizer.optimize(objective, simplex, log);
optimizer.optimize(objective, std::move(simplex), log);
}
std::cout << "n evaluations: " << optimizer.n_evaluations() << '\n';
......
......@@ -48,14 +48,17 @@ public:
}
Scalar value() const { return simplex_values_[i_lowest_]; }
// Constructs a simplex from an initial point by offsetting all coordinate values.
// These methods return the best point, as a reference to a column (like point()).
// All optimization methods return the best point, as a reference to a column (like point()).
// These two construct a simplex from an initial point by offsetting all coordinate values.
template <typename Objective>
decltype(auto) optimize(
Objective& objective,
VectorD const& initial_point,
Scalar offset = 1
);
) {
NelderMeadLogNothing log;
return optimize(objective, initial_point, offset, log);
}
template <typename Objective, typename Log>
decltype(auto) optimize(
Objective& objective,
......@@ -63,14 +66,34 @@ public:
Scalar offset,
Log& log
);
// Each column of simplex is a vertex.
// These four take a full simplex. Each column is a vertex.
template <typename Objective>
decltype(auto) optimize(Objective& objective, MatrixDN const& simplex);
decltype(auto) optimize(Objective& objective, MatrixDN&& simplex) {
NelderMeadLogNothing log;
return optimize(objective, std::move(simplex), log);
}
template <typename Objective, typename Log>
decltype(auto) optimize(Objective& objective, MatrixDN const& simplex, Log& log);
decltype(auto) optimize(Objective& objective, MatrixDN&& simplex, Log& log) {
simplex_vertices_ = std::move(simplex);
return optimize_impl(objective, log);
}
template <typename Objective>
decltype(auto) optimize(Objective& objective, MatrixDN const& simplex) {
NelderMeadLogNothing log;
return optimize(objective, simplex, log);
}
template <typename Objective, typename Log>
decltype(auto) optimize(Objective& objective, MatrixDN const& simplex, Log& log) {
simplex_vertices_ = simplex;
return optimize_impl(objective, log);
}
private:
// Does the optimization assuming simplex_vertices_ has been initialized.
template <typename Objective, typename Log>
decltype(auto) optimize_impl(Objective& objective, Log& log);
// Helper method for optimize_impl.
template <typename Objective>
Scalar try_new_point(Objective& objective, Scalar factor);
......@@ -102,18 +125,6 @@ private:
static constexpr Scalar tiny_ = 1e-10;
};
//..................................................................................................
template <int32_t D>
template <typename Objective>
decltype(auto) NelderMead<D>::optimize(
Objective& objective,
VectorD const& initial_point,
Scalar offset
) {
NelderMeadLogNothing log;
return optimize(objective, initial_point, offset, log);
}
//..................................................................................................
template <int32_t D>
template <typename Objective, typename Log>
......@@ -123,30 +134,20 @@ decltype(auto) NelderMead<D>::optimize(
Scalar offset,
Log& log
) {
MatrixDN simplex;
simplex.resize(initial_point.size(), initial_point.size() + 1);
simplex.col(0) = initial_point;
simplex_vertices_.resize(initial_point.size(), initial_point.size() + 1);
simplex_vertices_.col(0) = initial_point;
for (uint32_t i = 0; i < initial_point.size(); ++i) {
simplex.col(i + 1) = initial_point;
simplex(i, i + 1) += offset;
simplex_vertices_.col(i + 1) = initial_point;
simplex_vertices_(i, i + 1) += offset;
}
return optimize(objective, simplex, log);
}
//..................................................................................................
template <int32_t D>
template <typename Objective>
decltype(auto) NelderMead<D>::optimize(Objective& objective, MatrixDN const& simplex) {
NelderMeadLogNothing log;
return optimize(objective, simplex, log);
return optimize_impl(objective, log);
}
//..................................................................................................
template <int32_t D>
template <typename Objective, typename Log>
decltype(auto) NelderMead<D>::optimize(Objective& objective, MatrixDN const& simplex, Log& log) {
// initialize state
simplex_vertices_ = simplex;
decltype(auto) NelderMead<D>::optimize_impl(Objective& objective, Log& log) {
// initialize state, assuming simplex_vertices_ is already set
n_vertices_ = simplex_vertices_.cols();
dim_ = simplex_vertices_.rows();
assert(n_vertices_ == dim_ + 1);
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment