diff --git a/optimization/optimizers/gradient_descent/gradient_descent.h b/optimization/optimizers/gradient_descent/gradient_descent.h index 46dd99ee254c1f36287355935f228bff1262c87a..1a920961b5cec3069c54755972cb2d67b72c4e7c 100644 --- a/optimization/optimizers/gradient_descent/gradient_descent.h +++ b/optimization/optimizers/gradient_descent/gradient_descent.h @@ -7,54 +7,89 @@ namespace optimization { //-------------------------------------------------------------------------------------------------- -template <int32_t N, typename Log = GradientDescentLogNothing> -class GradientDescent : public Log { +template <int32_t N> +class GradientDescent { public: GradientDescent() {} GradientDescent(Scalar learning_rate, uint32_t me, Scalar gt) : learning_rate_(learning_rate), max_evaluations_(me), gradient_threshold_(gt) {} + Scalar& learning_rate() { return learning_rate_; } + Scalar learning_rate() const { return learning_rate_; } + uint32_t n_evaluations() const { return n_evaluations_; } // We evaluate the objective function exactly once per iteration. uint32_t n_iterations() const { return n_evaluations_; } - - Scalar& learning_rate() { return learning_rate_; } - Scalar learning_rate() const { return learning_rate_; } + VectorNs<N> const& point() const { return point_; } + VectorNs<N> const& gradient() const { return gradient_; } + Scalar value() const { return value_; } template <typename Objective> - VectorNs<N> optimize(Objective& objective, VectorNs<N> const& initial_point) { - VectorNs<N> point = initial_point; - Scalar value; - VectorNs<N> gradient; - gradient.resize(point.size()); - objective.eval(point, value, gradient); - Log::initialize(objective); - Log::push_back(point, value, gradient); - - for (n_evaluations_ = 1; n_evaluations_ < max_evaluations_; ++n_evaluations_) { - if (gradient.norm() <= gradient_threshold_) { - std::cout << "Gradient norm below threshold: " << gradient.norm() << '\n'; - break; - } - - point -= learning_rate_ * gradient; - objective.eval(point, value, gradient); - - Log::push_back(point, value, gradient); - } + VectorNs<N> const& optimize(Objective& objective, VectorNs<N> const& initial_point); - return point; - } + template <typename Objective, typename Log> + VectorNs<N> const& optimize(Objective& objective, VectorNs<N> const& initial_point, Log& log); private: - uint32_t n_evaluations_; - + // hyperparameters Scalar learning_rate_; uint32_t max_evaluations_; Scalar gradient_threshold_; + + // algorithm state + uint32_t n_evaluations_; + VectorNs<N> point_; + VectorNs<N> gradient_; + Scalar value_; }; +//.................................................................................................. +template <int32_t N> +template <typename Objective> +VectorNs<N> const& GradientDescent<N>::optimize( + Objective& objective, + VectorNs<N> const& initial_point +) { + GradientDescentLogNothing log; + return optimize(objective, initial_point, log); +} + +//.................................................................................................. +template <int32_t N> +template <typename Objective, typename Log> +VectorNs<N> const& GradientDescent<N>::optimize( + Objective& objective, + VectorNs<N> const& initial_point, + Log& log +) { + n_evaluations_ = 0; + point_ = initial_point; + gradient_.resize(point_.size()); + log.initialize(objective); + + while (true) { + objective.eval(point_, value_, gradient_); + ++n_evaluations_; + log.push_back(point_, value_, gradient_); + + // termination conditions + if (n_evaluations_ >= max_evaluations_) { + std::cout << "Gradient descent reached max evaluations: " << n_evaluations_ << '\n'; + break; + } + if (gradient_.norm() <= gradient_threshold_) { + std::cout << "Gradient descent reached gradient threshold: " << gradient_.norm(); + std::cout << '\n'; + break; + } + + point_ -= learning_rate_ * gradient_; + } + + return point_; +} + } #endif diff --git a/optimization/optimizers/gradient_descent/logs/everything.h b/optimization/optimizers/gradient_descent/logs/everything.h index 8eaf55772a7043ca1f1834162003d8ec28c8c5de..53ced123e29d1a2619805208f411e7b6c028b274 100644 --- a/optimization/optimizers/gradient_descent/logs/everything.h +++ b/optimization/optimizers/gradient_descent/logs/everything.h @@ -5,7 +5,7 @@ #include "objectives/samples_vis.h" #include "utils/eigen_json.h" #include "utils/vector.h" -#include "utils/vis_only.h" +#include <string> #include <vector> namespace optimization { diff --git a/optimization/optimizers/gradient_descent/main.cpp b/optimization/optimizers/gradient_descent/main.cpp index b71f58896ae4bdbe003c49c7587eac5cdf1a2059..3ab9d34129d6cd234c22ab19d0e8e1ce09ae6e07 100644 --- a/optimization/optimizers/gradient_descent/main.cpp +++ b/optimization/optimizers/gradient_descent/main.cpp @@ -8,9 +8,8 @@ #include <fstream> #include <iostream> -using json = nlohmann::json; - using namespace optimization; +using json = nlohmann::json; //-------------------------------------------------------------------------------------------------- int main(int const argc, char const** argv) { @@ -52,31 +51,29 @@ int main(int const argc, char const** argv) { Objective objective; objective.dim() = dim; + GradientDescent<-1> optimizer(learning_rate, max_evaluations, gradient_threshold); + GradientDescentLogEverything<-1> log; + + // Only log stuff if we're going to use it. if (log_file_path.empty() && vis_file_path.empty()) { - // If we're not saving data, use a lean optimizer. - // TODO: Find a way to deduplicate code between these branches. - GradientDescent<-1> optimizer(learning_rate, max_evaluations, gradient_threshold); - VectorXs minimum = optimizer.optimize(objective, initial_point); - std::cout << "n evaluations: " << optimizer.n_evaluations() << '\n'; - std::cout << "final point: " << minimum << '\n'; + optimizer.optimize(objective, initial_point); } else { - using Log = GradientDescentLogEverything<-1>; - GradientDescent<-1, Log> optimizer(learning_rate, max_evaluations, gradient_threshold); - VectorXs minimum = optimizer.optimize(objective, initial_point); - std::cout << "n evaluations: " << optimizer.n_evaluations() << '\n'; - std::cout << "final point: " << minimum << '\n'; + optimizer.optimize(objective, initial_point, log); + } + + std::cout << "n evaluations: " << optimizer.n_evaluations() << '\n'; + std::cout << "final point: " << optimizer.point() << '\n'; - if (!log_file_path.empty()) { - json data = optimizer; - std::ofstream log_file(log_file_path); - log_file << data.dump(4) << '\n'; - } + if (!log_file_path.empty()) { + json data = log; + std::ofstream log_file(log_file_path); + log_file << data.dump(4) << '\n'; + } - if (!vis_file_path.empty()) { - json data = GradientDescentVis<-1>{optimizer}; - std::ofstream vis_file(vis_file_path); - vis_file << data.dump(4) << '\n'; - } + if (!vis_file_path.empty()) { + json data = GradientDescentVis<-1>{log}; + std::ofstream vis_file(vis_file_path); + vis_file << data.dump(4) << '\n'; } return 0;