diff --git a/optimization/optimizers/gradient_descent/CMakeLists.txt b/optimization/optimizers/gradient_descent/CMakeLists.txt index a1f0d4b4bb6971067e6b1edf22f5a6f28e59a18b..bb3b52e461bcfa71127efe678e0f49bdf1473c63 100644 --- a/optimization/optimizers/gradient_descent/CMakeLists.txt +++ b/optimization/optimizers/gradient_descent/CMakeLists.txt @@ -4,6 +4,6 @@ add_executable(gradient_descent target_link_libraries(gradient_descent optimization_lib clara) if (VISUALIZE) - make_plot_target(gradient_descent 2d ARGS -d 2 -l 0.0015) + make_plot_target(gradient_descent 2d ARGS -d 2 -l 0.0015 -n 10000) make_plot_target(gradient_descent 10d ARGS -d 10 -l 0.0005 -n 10000) endif() diff --git a/optimization/optimizers/gradient_descent/gradient_descent.h b/optimization/optimizers/gradient_descent/gradient_descent.h index bcd5bf20abd88a4b7549b5e71d95a47b5840231c..1061e181f11a3252ee7514a318b82b102409e85b 100644 --- a/optimization/optimizers/gradient_descent/gradient_descent.h +++ b/optimization/optimizers/gradient_descent/gradient_descent.h @@ -2,6 +2,7 @@ #define OPTIMIZATION_GRADIENT_DESCENT_H #include "gradient_descent_log.h" +#include <iostream> namespace optimization { @@ -10,10 +11,14 @@ template <int32_t N> class GradientDescent : public GradientDescentLog<N> { public: GradientDescent() {} - GradientDescent(Scalar learning_rate, uint32_t n_steps) - : learning_rate_(learning_rate), n_steps_(n_steps) + GradientDescent(Scalar learning_rate, uint32_t me, Scalar gt) + : learning_rate_(learning_rate), max_evaluations_(me), gradient_threshold_(gt) {} + uint32_t n_evaluations() const { return n_evaluations_; } + // We evaluate the objective function exactly once per iteration. + uint32_t n_iterations() const { return n_evaluations_; } + Scalar& learning_rate() { return learning_rate_; } Scalar learning_rate() const { return learning_rate_; } @@ -27,9 +32,15 @@ public: GradientDescentLog<N>::initialize(objective); GradientDescentLog<N>::push_back(point, value, gradient); - for (uint32_t i = 0; i < n_steps_; ++i) { + for (n_evaluations_ = 1; n_evaluations_ < max_evaluations_; ++n_evaluations_) { + if (gradient.norm() <= gradient_threshold_) { + std::cout << "Gradient norm below threshold: " << gradient.norm() << '\n'; + break; + } + point -= learning_rate_ * gradient; objective.eval(point, value, gradient); + GradientDescentLog<N>::push_back(point, value, gradient); } @@ -37,8 +48,11 @@ public: } private: + uint32_t n_evaluations_; + Scalar learning_rate_; - uint32_t n_steps_; + uint32_t max_evaluations_; + Scalar gradient_threshold_; }; } diff --git a/optimization/optimizers/gradient_descent/main.cpp b/optimization/optimizers/gradient_descent/main.cpp index 47820b6b617bef26700a636b00caf851d83334ee..dceb38c2e631eb0a58d684fb74d2a6b0d293a902 100644 --- a/optimization/optimizers/gradient_descent/main.cpp +++ b/optimization/optimizers/gradient_descent/main.cpp @@ -19,8 +19,9 @@ int main(int const argc, char const** argv) { std::string log_file_path; std::string vis_file_path; uint32_t dim = 2; - uint32_t n_steps = 1000; Scalar learning_rate = 0.0015; + uint32_t max_evaluations = 1000; + Scalar gradient_threshold = 1e-8; Scalar x0 = -1; Scalar y0 = 2; @@ -28,8 +29,9 @@ int main(int const argc, char const** argv) { clara::Arg(log_file_path, "log_file_path")("Location of the optimization log") | clara::Arg(vis_file_path, "vis_file_path")("Location of the visualization file") | clara::Opt(dim, "dim")["-d"]["--dimension"]("Dimensionality of the objective") | - clara::Opt(n_steps, "n_steps")["-n"]["--n-steps"]("Number of gradient descent steps") | clara::Opt(learning_rate, "learning_rate")["-l"]["--learning-rate"]("Gradient descent learning rate") | + clara::Opt(max_evaluations, "max_evaluations")["-n"]["--max-evaluations"]("Max number of gradient descent steps") | + clara::Opt(gradient_threshold, "gradient_threshold")["-g"]["--gradient-threshold"]("Terminate if the gradient norm is this small") | clara::Opt(x0, "x0")["-x"]["--x0"]("X coordinate of initial point") | clara::Opt(y0, "y0")["-y"]["--y0"]("Y coordinate of initial point"); auto const result = cli.parse(clara::Args(argc, argv)); @@ -52,9 +54,9 @@ int main(int const argc, char const** argv) { Objective objective; objective.dim() = dim; - GradientDescent<-1> optimizer(learning_rate, n_steps); + GradientDescent<-1> optimizer(learning_rate, max_evaluations, gradient_threshold); VectorXs minimum = optimizer.optimize(objective, initial_point); - std::cout << "n evaluations: " << n_steps << '\n'; + std::cout << "n evaluations: " << optimizer.n_evaluations() << '\n'; std::cout << "final point: " << minimum << '\n'; #ifdef VISUALIZE