diff --git a/optimization/optimizers/cma_es/cma_es.h b/optimization/optimizers/cma_es/cma_es.h
index 7cdfa004ab4a144415a2b7b1181fc48111f06275..4994c35477c81c4204087e33ac7a995eb3c14b33 100644
--- a/optimization/optimizers/cma_es/cma_es.h
+++ b/optimization/optimizers/cma_es/cma_es.h
@@ -35,6 +35,7 @@ public:
     inline Eigen::Map<const Eigen::MatrixXd> point();
     Scalar value() { return cmaes_Get(&cma_es_, "fbestever"); }
 
+    // TODO: Support passing initial point by rvalue reference.
     template <typename Objective>
     Eigen::Map<const Eigen::MatrixXd> optimize(
         Objective& objective,
diff --git a/optimization/optimizers/conjugate_gradient_descent/conjugate_gradient_descent.h b/optimization/optimizers/conjugate_gradient_descent/conjugate_gradient_descent.h
index 1dcb6b97eaa0811165926a2c1b8c678a113e2e29..85fe5be55a12333c7949a187cf83fc91f9a3973a 100644
--- a/optimization/optimizers/conjugate_gradient_descent/conjugate_gradient_descent.h
+++ b/optimization/optimizers/conjugate_gradient_descent/conjugate_gradient_descent.h
@@ -3,7 +3,6 @@
 
 #include "logs/nothing.h"
 #include "optimizers/line_search/brent.h"
-#include "optimizers/line_search/line_objective.h"
 
 #include <iostream>
 
@@ -33,12 +32,37 @@ public:
     VectorNs<N> const& gradient() const { return gradient_; }
     Scalar value() const { return value_; }
 
+    // There are four cases for optimize, since you can use a log or not, and can pass an rvalue or
+    // not. These methods ultimately all call optimize_impl to do the real work.
+    // Note: I explicitly write out the Vector const& and Vector&& cases rather than passing by
+    // value and moving since Eigen forbids passing by value:
+    // https://eigen.tuxfamily.org/dox/group__TopicPassingByValue.html
     template <typename Objective>
-    Vector const& optimize(Objective& objective, Vector const& initial_point);
+    Vector const& optimize(Objective& objective, Vector const& initial_point) {
+        ConjugateGradientDescentLogNothing log;
+        return optimize(objective, initial_point, log);
+    }
+    template <typename Objective, typename Log>
+    Vector const& optimize(Objective& objective, Vector const& initial_point, Log& log) {
+        point_ = initial_point;
+        return optimize_impl(objective, log);
+    }
+    template <typename Objective>
+    Vector const& optimize(Objective& objective, Vector&& initial_point) {
+        ConjugateGradientDescentLogNothing log;
+        return optimize(objective, std::move(initial_point), log);
+    }
     template <typename Objective, typename Log>
-    Vector const& optimize(Objective& objective, Vector const& initial_point, Log& log);
+    Vector const& optimize(Objective& objective, Vector&& initial_point, Log& log) {
+        point_ = std::move(initial_point);
+        return optimize_impl(objective, log);
+    }
 
 private:
+    // Actually does the optimization, once point_ has been initialized.
+    template <typename Objective, typename Log>
+    Vector const& optimize_impl(Objective& objective, Log& log);
+
     // hyperparameters
     Scalar gradient_threshold_;
     Scalar progress_threshold_;
@@ -48,49 +72,40 @@ private:
     // algorithm state
     uint32_t n_evaluations_;
     uint32_t n_iterations_;
-    // This object holds the current point (x0) and search direction (dir).
-    // TODO allow this to live here, somehow
-    //LineObjective<Objective, N> line_objective_;
-    VectorNs<N> point_;
-    VectorNs<N> direction_;
-    VectorNs<N> new_point_;
+    Vector point_;         // where we are currently
+    Vector direction_;     // the direction we're searching
+    Vector gradient_;      // the gradient at point_
+    // This variable sees double duty. When we're line searching, it holds the candidate points.
+    // Between evaluating the function at the line minimum and computing the next search direction,
+    // it stores the gradient at the point of the previous iteration.
+    Vector new_point_or_last_gradient_;
     Scalar alpha_; // stores the most recent jump distance, which we use as a guess for the next one
     Scalar value_;
     Scalar last_value_;
-    Vector gradient_;
-    Vector last_gradient_;
 
     // algorithm constants
     static constexpr Scalar tiny_ = std::numeric_limits<Scalar>::epsilon();
 };
 
-//..................................................................................................
-template <int32_t N>
-template <typename Objective>
-VectorNs<N> const& ConjugateGradientDescent<N>::optimize(
-    Objective& objective,
-    Vector const& initial_point
-) {
-    ConjugateGradientDescentLogNothing log;
-    return optimize(objective, initial_point, log);
-}
-
 //..................................................................................................
 template <int32_t N>
 template <typename Objective, typename Log>
-VectorNs<N> const& ConjugateGradientDescent<N>::optimize(
+VectorNs<N> const& ConjugateGradientDescent<N>::optimize_impl(
     Objective& objective,
-    Vector const& initial_point,
     Log& log
 ) {
     n_evaluations_ = 0;
     n_iterations_ = 0;
-    point_ = initial_point;
-    alpha_ = -1;
+    // point_ has already been initialized
+    direction_.resize(point_.size());
     gradient_.resize(point_.size());
-    last_gradient_.resize(point_.size());
+    new_point_or_last_gradient_.resize(point_.size());
+    alpha_ = -1;
 
-    LineObjective<Objective, N> line_objective{objective, point_, direction_, new_point_};
+    auto const line_objective = [&](Scalar t, Scalar& value) {
+        new_point_or_last_gradient_ = point_ + t * direction_;
+        objective(new_point_or_last_gradient_, value);
+    };
     BracketFinder bracket;
     Brent line_minimizer;
 
@@ -106,19 +121,23 @@ VectorNs<N> const& ConjugateGradientDescent<N>::optimize(
     }
 
     while (true) {
-        // Find the minimum along this direction.
+        // Find the minimum along this direction. The next two lines are the only ones that use
+        // new_point_or_last_gradient_ as new_point_ (happens inside the lambda line_objective).
         bracket.bracket(line_objective, Scalar(0), alpha_);
         alpha_ = line_minimizer.optimize(line_objective, bracket).point;
         n_evaluations_ += bracket.n_evaluations();
         n_evaluations_ += line_minimizer.n_evaluations();
-        // Note: at some point new_point_ already had this new value, but right now there's no
-        // guarantee that it's the current value.
-        point_ += alpha_ * direction_;
 
-        // Can we re-use evaluation here? Would need to rewrite line search to have guarantees on
-        // which point it evaluated last. Probably best to do this after I've written dbrent.
         last_value_ = value_;
-        gradient_.swap(last_gradient_);
+        // Basically this line means new_point_or_last_gradient_ = std::move(gradient_), but I want
+        // to be clear that we're not releasing any memory here. For the rest of the loop,
+        // new_point_or_last_gradient_ stores the last gradient.
+        new_point_or_last_gradient_.swap(gradient_);
+        // Again, we evaluted the objective here already during the line search, but we probably
+        // threw that data out.
+        // Note: at some point new_point_or_last_gradient_ already had this new value, but right now
+        // there's no guarantee that it's the current value.
+        point_ += alpha_ * direction_;
         objective(point_, value_, gradient_);
         ++n_iterations_;
 
@@ -149,10 +168,11 @@ VectorNs<N> const& ConjugateGradientDescent<N>::optimize(
             return point_;
         }
 
-        // Choose the next search direction. It is conjugate to all prior directions.
-        // I view this as the start of the next iteration.
-        Scalar const gamma =
-            gradient_.dot(gradient_ - last_gradient_) / last_gradient_.dot(last_gradient_);
+        // Choose the next search direction. It is conjugate to all prior directions. Recall that
+        // new_point_or_last_gradient_ currently stores the last gradient. I view this as the start
+        // of the next iteration.
+        Scalar const gamma = gradient_.dot(gradient_ - new_point_or_last_gradient_)
+            / new_point_or_last_gradient_.dot(new_point_or_last_gradient_);
         direction_ = gradient_ + gamma * direction_;
     }
 }
diff --git a/optimization/optimizers/conjugate_gradient_descent/main.cpp b/optimization/optimizers/conjugate_gradient_descent/main.cpp
index 73276d594679f83d185a789988f26f8f7121a711..df00eca0c9bf0336f4dc01d2e0628cef453dfaa4 100644
--- a/optimization/optimizers/conjugate_gradient_descent/main.cpp
+++ b/optimization/optimizers/conjugate_gradient_descent/main.cpp
@@ -60,9 +60,9 @@ int main(int const argc, char const** argv) {
 
     // Only log stuff if we're going to use it.
     if (log_file_path.empty() && vis_file_path.empty()) {
-        optimizer.optimize(objective, initial_point);
+        optimizer.optimize(objective, std::move(initial_point));
     } else {
-        optimizer.optimize(objective, initial_point, log);
+        optimizer.optimize(objective, std::move(initial_point), log);
     }
 
     std::cout << "n evaluations: " << optimizer.n_evaluations() << '\n';
diff --git a/optimization/optimizers/gradient_descent/gradient_descent.h b/optimization/optimizers/gradient_descent/gradient_descent.h
index e8a7bf680c24a7533b6b509a330717b4291f1598..77968c353c04f49fb3766e784ea5dab513ef1ca6 100644
--- a/optimization/optimizers/gradient_descent/gradient_descent.h
+++ b/optimization/optimizers/gradient_descent/gradient_descent.h
@@ -25,6 +25,7 @@ public:
     VectorNs<N> const& gradient() const { return gradient_; }
     Scalar value() const { return value_; }
 
+    // TODO: Support passing initial point by rvalue reference.
     template <typename Objective>
     VectorNs<N> const& optimize(Objective& objective, VectorNs<N> const& initial_point);
 
diff --git a/optimization/optimizers/line_search/line_objective.h b/optimization/optimizers/line_search/line_objective.h
deleted file mode 100644
index 7a7bb55a4054aa2b82f1679797c3bc76f8726c65..0000000000000000000000000000000000000000
--- a/optimization/optimizers/line_search/line_objective.h
+++ /dev/null
@@ -1,35 +0,0 @@
-#ifndef OPTIMIZATION_LINE_SEARCH_LINE_OBJECTIVE_H
-#define OPTIMIZATION_LINE_SEARCH_LINE_OBJECTIVE_H
-
-#include "utils/vector.h"
-
-namespace optimization {
-
-//--------------------------------------------------------------------------------------------------
-template <typename Objective, int32_t N>
-struct LineObjective {
-public:
-    LineObjective(Objective& o, VectorNs<N>& x0, VectorNs<N>& dir, VectorNs<N>& x) :
-        objective_(o), x0_(x0), dir_(dir), x_(x)
-    {}
-
-    void operator()(Scalar t, Scalar& value) {
-        x_ = x0_ + t * dir_;
-        objective_(x_, value);
-    }
-
-    void operator()(Scalar t, Scalar& value, VectorNs<N>& gradient) {
-        x_ = x0_ + t * dir_;
-        objective_(x_, value, gradient);
-    }
-
-private:
-    Objective& objective_;
-    VectorNs<N>& x0_;
-    VectorNs<N>& dir_;
-    VectorNs<N>& x_;
-};
-
-}
-
-#endif
diff --git a/optimization/optimizers/nelder_mead/main.cpp b/optimization/optimizers/nelder_mead/main.cpp
index 95c1c4355bc8b07950a1b0eae41ffab66b414ca9..30a3983871cd79dc0faf7329ddcbc7266d31a0b6 100644
--- a/optimization/optimizers/nelder_mead/main.cpp
+++ b/optimization/optimizers/nelder_mead/main.cpp
@@ -46,9 +46,9 @@ int main(int const argc, char const** argv) {
 
     // Only log stuff if we're going to use it.
     if (log_file_path.empty() && vis_file_path.empty()) {
-        optimizer.optimize(objective, simplex);
+        optimizer.optimize(objective, std::move(simplex));
     } else {
-        optimizer.optimize(objective, simplex, log);
+        optimizer.optimize(objective, std::move(simplex), log);
     }
 
     std::cout << "n evaluations: " << optimizer.n_evaluations() << '\n';
diff --git a/optimization/optimizers/nelder_mead/nelder_mead.h b/optimization/optimizers/nelder_mead/nelder_mead.h
index 2f0fbcb0acb38d0118149aa80d28834902591e6c..b2ca96bb398da2c49394a932de96c2e90d43943d 100644
--- a/optimization/optimizers/nelder_mead/nelder_mead.h
+++ b/optimization/optimizers/nelder_mead/nelder_mead.h
@@ -48,14 +48,17 @@ public:
     }
     Scalar value() const { return simplex_values_[i_lowest_]; }
 
-    // Constructs a simplex from an initial point by offsetting all coordinate values.
-    // These methods return the best point, as a reference to a column (like point()).
+    // All optimization methods return the best point, as a reference to a column (like point()).
+    // These two construct a simplex from an initial point by offsetting all coordinate values.
     template <typename Objective>
     decltype(auto) optimize(
         Objective& objective,
         VectorD const& initial_point,
         Scalar offset = 1
-    );
+    ) {
+        NelderMeadLogNothing log;
+        return optimize(objective, initial_point, offset, log);
+    }
     template <typename Objective, typename Log>
     decltype(auto) optimize(
         Objective& objective,
@@ -63,14 +66,34 @@ public:
         Scalar offset,
         Log& log
     );
-
-    // Each column of simplex is a vertex.
+    // These four take a full simplex. Each column is a vertex.
     template <typename Objective>
-    decltype(auto) optimize(Objective& objective, MatrixDN const& simplex);
+    decltype(auto) optimize(Objective& objective, MatrixDN&& simplex) {
+        NelderMeadLogNothing log;
+        return optimize(objective, std::move(simplex), log);
+    }
     template <typename Objective, typename Log>
-    decltype(auto) optimize(Objective& objective, MatrixDN const& simplex, Log& log);
+    decltype(auto) optimize(Objective& objective, MatrixDN&& simplex, Log& log) {
+        simplex_vertices_ = std::move(simplex);
+        return optimize_impl(objective, log);
+    }
+    template <typename Objective>
+    decltype(auto) optimize(Objective& objective, MatrixDN const& simplex) {
+        NelderMeadLogNothing log;
+        return optimize(objective, simplex, log);
+    }
+    template <typename Objective, typename Log>
+    decltype(auto) optimize(Objective& objective, MatrixDN const& simplex, Log& log) {
+        simplex_vertices_ = simplex;
+        return optimize_impl(objective, log);
+    }
 
 private:
+    // Does the optimization assuming simplex_vertices_ has been initialized.
+    template <typename Objective, typename Log>
+    decltype(auto) optimize_impl(Objective& objective, Log& log);
+
+    // Helper method for optimize_impl.
     template <typename Objective>
     Scalar try_new_point(Objective& objective, Scalar factor);
 
@@ -102,18 +125,6 @@ private:
     static constexpr Scalar tiny_ = 1e-10;
 };
 
-//..................................................................................................
-template <int32_t D>
-template <typename Objective>
-decltype(auto) NelderMead<D>::optimize(
-    Objective& objective,
-    VectorD const& initial_point,
-    Scalar offset
-) {
-    NelderMeadLogNothing log;
-    return optimize(objective, initial_point, offset, log);
-}
-
 //..................................................................................................
 template <int32_t D>
 template <typename Objective, typename Log>
@@ -123,30 +134,20 @@ decltype(auto) NelderMead<D>::optimize(
     Scalar offset,
     Log& log
 ) {
-    MatrixDN simplex;
-    simplex.resize(initial_point.size(), initial_point.size() + 1);
-    simplex.col(0) = initial_point;
+    simplex_vertices_.resize(initial_point.size(), initial_point.size() + 1);
+    simplex_vertices_.col(0) = initial_point;
     for (uint32_t i = 0; i < initial_point.size(); ++i) {
-        simplex.col(i + 1) = initial_point;
-        simplex(i, i + 1) += offset;
+        simplex_vertices_.col(i + 1) = initial_point;
+        simplex_vertices_(i, i + 1) += offset;
     }
-    return optimize(objective, simplex, log);
-}
-
-//..................................................................................................
-template <int32_t D>
-template <typename Objective>
-decltype(auto) NelderMead<D>::optimize(Objective& objective, MatrixDN const& simplex) {
-    NelderMeadLogNothing log;
-    return optimize(objective, simplex, log);
+    return optimize_impl(objective, log);
 }
 
 //..................................................................................................
 template <int32_t D>
 template <typename Objective, typename Log>
-decltype(auto) NelderMead<D>::optimize(Objective& objective, MatrixDN const& simplex, Log& log) {
-    // initialize state
-    simplex_vertices_ = simplex;
+decltype(auto) NelderMead<D>::optimize_impl(Objective& objective, Log& log) {
+    // initialize state, assuming simplex_vertices_ is already set
     n_vertices_ = simplex_vertices_.cols();
     dim_ = simplex_vertices_.rows();
     assert(n_vertices_ == dim_ + 1);