diff --git a/optimization/optimizers/gradient_descent/gradient_descent.h b/optimization/optimizers/gradient_descent/gradient_descent.h
index 46dd99ee254c1f36287355935f228bff1262c87a..1a920961b5cec3069c54755972cb2d67b72c4e7c 100644
--- a/optimization/optimizers/gradient_descent/gradient_descent.h
+++ b/optimization/optimizers/gradient_descent/gradient_descent.h
@@ -7,54 +7,89 @@
 namespace optimization {
 
 //--------------------------------------------------------------------------------------------------
-template <int32_t N, typename Log = GradientDescentLogNothing>
-class GradientDescent : public Log {
+template <int32_t N>
+class GradientDescent {
 public:
     GradientDescent() {}
     GradientDescent(Scalar learning_rate, uint32_t me, Scalar gt)
         : learning_rate_(learning_rate), max_evaluations_(me), gradient_threshold_(gt)
     {}
 
+    Scalar& learning_rate() { return learning_rate_; }
+    Scalar learning_rate() const { return learning_rate_; }
+
     uint32_t n_evaluations() const { return n_evaluations_; }
     // We evaluate the objective function exactly once per iteration.
     uint32_t n_iterations() const { return n_evaluations_; }
-
-    Scalar& learning_rate() { return learning_rate_; }
-    Scalar learning_rate() const { return learning_rate_; }
+    VectorNs<N> const& point() const { return point_; }
+    VectorNs<N> const& gradient() const { return gradient_; }
+    Scalar value() const { return value_; }
 
     template <typename Objective>
-    VectorNs<N> optimize(Objective& objective, VectorNs<N> const& initial_point) {
-        VectorNs<N> point = initial_point;
-        Scalar value;
-        VectorNs<N> gradient;
-        gradient.resize(point.size());
-        objective.eval(point, value, gradient);
-        Log::initialize(objective);
-        Log::push_back(point, value, gradient);
-
-        for (n_evaluations_ = 1; n_evaluations_ < max_evaluations_; ++n_evaluations_) {
-            if (gradient.norm() <= gradient_threshold_) {
-                std::cout << "Gradient norm below threshold: " << gradient.norm() << '\n';
-                break;
-            }
-
-            point -= learning_rate_ * gradient;
-            objective.eval(point, value, gradient);
-
-            Log::push_back(point, value, gradient);
-        }
+    VectorNs<N> const& optimize(Objective& objective, VectorNs<N> const& initial_point);
 
-        return point;
-    }
+    template <typename Objective, typename Log>
+    VectorNs<N> const& optimize(Objective& objective, VectorNs<N> const& initial_point, Log& log);
 
 private:
-    uint32_t n_evaluations_;
-
+    // hyperparameters
     Scalar learning_rate_;
     uint32_t max_evaluations_;
     Scalar gradient_threshold_;
+
+    // algorithm state
+    uint32_t n_evaluations_;
+    VectorNs<N> point_;
+    VectorNs<N> gradient_;
+    Scalar value_;
 };
 
+//..................................................................................................
+template <int32_t N>
+template <typename Objective>
+VectorNs<N> const& GradientDescent<N>::optimize(
+    Objective& objective,
+    VectorNs<N> const& initial_point
+) {
+    GradientDescentLogNothing log;
+    return optimize(objective, initial_point, log);
+}
+
+//..................................................................................................
+template <int32_t N>
+template <typename Objective, typename Log>
+VectorNs<N> const& GradientDescent<N>::optimize(
+    Objective& objective,
+    VectorNs<N> const& initial_point,
+    Log& log
+) {
+    n_evaluations_ = 0;
+    point_ = initial_point;
+    gradient_.resize(point_.size());
+    log.initialize(objective);
+
+    while (true) {
+        objective.eval(point_, value_, gradient_);
+        ++n_evaluations_;
+        log.push_back(point_, value_, gradient_);
+
+        // termination conditions
+        if (n_evaluations_ >= max_evaluations_) {
+            std::cout << "Gradient descent reached max evaluations: " << n_evaluations_ << '\n';
+            break;
+        }
+        if (gradient_.norm() <= gradient_threshold_) {
+            std::cout << "Gradient descent reached gradient threshold: " << gradient_.norm();
+            std::cout << '\n';
+            break;
+        }
+
+        point_ -= learning_rate_ * gradient_;
+    }
+
+    return point_;
+}
+
 }
 
 #endif
diff --git a/optimization/optimizers/gradient_descent/logs/everything.h b/optimization/optimizers/gradient_descent/logs/everything.h
index 8eaf55772a7043ca1f1834162003d8ec28c8c5de..53ced123e29d1a2619805208f411e7b6c028b274 100644
--- a/optimization/optimizers/gradient_descent/logs/everything.h
+++ b/optimization/optimizers/gradient_descent/logs/everything.h
@@ -5,7 +5,7 @@
 #include "objectives/samples_vis.h"
 #include "utils/eigen_json.h"
 #include "utils/vector.h"
-#include "utils/vis_only.h"
+#include <string>
 #include <vector>
 
 namespace optimization {
diff --git a/optimization/optimizers/gradient_descent/main.cpp b/optimization/optimizers/gradient_descent/main.cpp
index b71f58896ae4bdbe003c49c7587eac5cdf1a2059..3ab9d34129d6cd234c22ab19d0e8e1ce09ae6e07 100644
--- a/optimization/optimizers/gradient_descent/main.cpp
+++ b/optimization/optimizers/gradient_descent/main.cpp
@@ -8,9 +8,8 @@
 #include <fstream>
 #include <iostream>
 
-using json = nlohmann::json;
-
 using namespace optimization;
+using json = nlohmann::json;
 
 //--------------------------------------------------------------------------------------------------
 int main(int const argc, char const** argv) {
@@ -52,31 +51,29 @@ int main(int const argc, char const** argv) {
     Objective objective;
     objective.dim() = dim;
 
+    GradientDescent<-1> optimizer(learning_rate, max_evaluations, gradient_threshold);
+    GradientDescentLogEverything<-1> log;
+
+    // Only log stuff if we're going to use it.
     if (log_file_path.empty() && vis_file_path.empty()) {
-        // If we're not saving data, use a lean optimizer.
-        // TODO: Find a way to deduplicate code between these branches.
-        GradientDescent<-1> optimizer(learning_rate, max_evaluations, gradient_threshold);
-        VectorXs minimum = optimizer.optimize(objective, initial_point);
-        std::cout << "n evaluations: " << optimizer.n_evaluations() << '\n';
-        std::cout << "final point: " << minimum << '\n';
+        optimizer.optimize(objective, initial_point);
     } else {
-        using Log = GradientDescentLogEverything<-1>;
-        GradientDescent<-1, Log> optimizer(learning_rate, max_evaluations, gradient_threshold);
-        VectorXs minimum = optimizer.optimize(objective, initial_point);
-        std::cout << "n evaluations: " << optimizer.n_evaluations() << '\n';
-        std::cout << "final point: " << minimum << '\n';
+        optimizer.optimize(objective, initial_point, log);
+    }
+
+    std::cout << "n evaluations: " << optimizer.n_evaluations() << '\n';
+    std::cout << "final point: " << optimizer.point() << '\n';
 
-        if (!log_file_path.empty()) {
-            json data = optimizer;
-            std::ofstream log_file(log_file_path);
-            log_file << data.dump(4) << '\n';
-        }
+    if (!log_file_path.empty()) {
+        json data = log;
+        std::ofstream log_file(log_file_path);
+        log_file << data.dump(4) << '\n';
+    }
 
-        if (!vis_file_path.empty()) {
-            json data = GradientDescentVis<-1>{optimizer};
-            std::ofstream vis_file(vis_file_path);
-            vis_file << data.dump(4) << '\n';
-        }
+    if (!vis_file_path.empty()) {
+        json data = GradientDescentVis<-1>{log};
+        std::ofstream vis_file(vis_file_path);
+        vis_file << data.dump(4) << '\n';
     }
 
     return 0;