Skip to content
Snippets Groups Projects
Commit f5d0f7b0 authored by Erik Strand's avatar Erik Strand
Browse files

Add gradient termination condition to gd

parent 6ce152ca
No related branches found
No related tags found
No related merge requests found
...@@ -4,6 +4,6 @@ add_executable(gradient_descent ...@@ -4,6 +4,6 @@ add_executable(gradient_descent
target_link_libraries(gradient_descent optimization_lib clara) target_link_libraries(gradient_descent optimization_lib clara)
if (VISUALIZE) if (VISUALIZE)
make_plot_target(gradient_descent 2d ARGS -d 2 -l 0.0015) make_plot_target(gradient_descent 2d ARGS -d 2 -l 0.0015 -n 10000)
make_plot_target(gradient_descent 10d ARGS -d 10 -l 0.0005 -n 10000) make_plot_target(gradient_descent 10d ARGS -d 10 -l 0.0005 -n 10000)
endif() endif()
...@@ -2,6 +2,7 @@ ...@@ -2,6 +2,7 @@
#define OPTIMIZATION_GRADIENT_DESCENT_H #define OPTIMIZATION_GRADIENT_DESCENT_H
#include "gradient_descent_log.h" #include "gradient_descent_log.h"
#include <iostream>
namespace optimization { namespace optimization {
...@@ -10,10 +11,14 @@ template <int32_t N> ...@@ -10,10 +11,14 @@ template <int32_t N>
class GradientDescent : public GradientDescentLog<N> { class GradientDescent : public GradientDescentLog<N> {
public: public:
GradientDescent() {} GradientDescent() {}
GradientDescent(Scalar learning_rate, uint32_t n_steps) GradientDescent(Scalar learning_rate, uint32_t me, Scalar gt)
: learning_rate_(learning_rate), n_steps_(n_steps) : learning_rate_(learning_rate), max_evaluations_(me), gradient_threshold_(gt)
{} {}
uint32_t n_evaluations() const { return n_evaluations_; }
// We evaluate the objective function exactly once per iteration.
uint32_t n_iterations() const { return n_evaluations_; }
Scalar& learning_rate() { return learning_rate_; } Scalar& learning_rate() { return learning_rate_; }
Scalar learning_rate() const { return learning_rate_; } Scalar learning_rate() const { return learning_rate_; }
...@@ -27,9 +32,15 @@ public: ...@@ -27,9 +32,15 @@ public:
GradientDescentLog<N>::initialize(objective); GradientDescentLog<N>::initialize(objective);
GradientDescentLog<N>::push_back(point, value, gradient); GradientDescentLog<N>::push_back(point, value, gradient);
for (uint32_t i = 0; i < n_steps_; ++i) { for (n_evaluations_ = 1; n_evaluations_ < max_evaluations_; ++n_evaluations_) {
if (gradient.norm() <= gradient_threshold_) {
std::cout << "Gradient norm below threshold: " << gradient.norm() << '\n';
break;
}
point -= learning_rate_ * gradient; point -= learning_rate_ * gradient;
objective.eval(point, value, gradient); objective.eval(point, value, gradient);
GradientDescentLog<N>::push_back(point, value, gradient); GradientDescentLog<N>::push_back(point, value, gradient);
} }
...@@ -37,8 +48,11 @@ public: ...@@ -37,8 +48,11 @@ public:
} }
private: private:
uint32_t n_evaluations_;
Scalar learning_rate_; Scalar learning_rate_;
uint32_t n_steps_; uint32_t max_evaluations_;
Scalar gradient_threshold_;
}; };
} }
......
...@@ -19,8 +19,9 @@ int main(int const argc, char const** argv) { ...@@ -19,8 +19,9 @@ int main(int const argc, char const** argv) {
std::string log_file_path; std::string log_file_path;
std::string vis_file_path; std::string vis_file_path;
uint32_t dim = 2; uint32_t dim = 2;
uint32_t n_steps = 1000;
Scalar learning_rate = 0.0015; Scalar learning_rate = 0.0015;
uint32_t max_evaluations = 1000;
Scalar gradient_threshold = 1e-8;
Scalar x0 = -1; Scalar x0 = -1;
Scalar y0 = 2; Scalar y0 = 2;
...@@ -28,8 +29,9 @@ int main(int const argc, char const** argv) { ...@@ -28,8 +29,9 @@ int main(int const argc, char const** argv) {
clara::Arg(log_file_path, "log_file_path")("Location of the optimization log") | clara::Arg(log_file_path, "log_file_path")("Location of the optimization log") |
clara::Arg(vis_file_path, "vis_file_path")("Location of the visualization file") | clara::Arg(vis_file_path, "vis_file_path")("Location of the visualization file") |
clara::Opt(dim, "dim")["-d"]["--dimension"]("Dimensionality of the objective") | clara::Opt(dim, "dim")["-d"]["--dimension"]("Dimensionality of the objective") |
clara::Opt(n_steps, "n_steps")["-n"]["--n-steps"]("Number of gradient descent steps") |
clara::Opt(learning_rate, "learning_rate")["-l"]["--learning-rate"]("Gradient descent learning rate") | clara::Opt(learning_rate, "learning_rate")["-l"]["--learning-rate"]("Gradient descent learning rate") |
clara::Opt(max_evaluations, "max_evaluations")["-n"]["--max-evaluations"]("Max number of gradient descent steps") |
clara::Opt(gradient_threshold, "gradient_threshold")["-g"]["--gradient-threshold"]("Terminate if the gradient norm is this small") |
clara::Opt(x0, "x0")["-x"]["--x0"]("X coordinate of initial point") | clara::Opt(x0, "x0")["-x"]["--x0"]("X coordinate of initial point") |
clara::Opt(y0, "y0")["-y"]["--y0"]("Y coordinate of initial point"); clara::Opt(y0, "y0")["-y"]["--y0"]("Y coordinate of initial point");
auto const result = cli.parse(clara::Args(argc, argv)); auto const result = cli.parse(clara::Args(argc, argv));
...@@ -52,9 +54,9 @@ int main(int const argc, char const** argv) { ...@@ -52,9 +54,9 @@ int main(int const argc, char const** argv) {
Objective objective; Objective objective;
objective.dim() = dim; objective.dim() = dim;
GradientDescent<-1> optimizer(learning_rate, n_steps); GradientDescent<-1> optimizer(learning_rate, max_evaluations, gradient_threshold);
VectorXs minimum = optimizer.optimize(objective, initial_point); VectorXs minimum = optimizer.optimize(objective, initial_point);
std::cout << "n evaluations: " << n_steps << '\n'; std::cout << "n evaluations: " << optimizer.n_evaluations() << '\n';
std::cout << "final point: " << minimum << '\n'; std::cout << "final point: " << minimum << '\n';
#ifdef VISUALIZE #ifdef VISUALIZE
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment