Solve nonlinear least squares via gradient descent

3739c8ea · Erik Strand · 0c1635ea · 3739c8ea · 3739c8ea · 3739c8ea
Commit 3739c8ea authored Apr 9, 2020 by Erik Strand
--- a/_code/pset_07/cpp/CMakeLists.txt
+++ b/_code/pset_07/cpp/CMakeLists.txt
-#add_library(sim
-#    glut_grapher.cpp
-#    glut_grapher.h
-#)
-#target_link_libraries(sim PUBLIC common ${OPENGL_LIBRARIES} ${GLUT_LIBRARY})
-#target_include_directories(sim PUBLIC
-#    ${OPENGL_INCLUDE_DIRS}
-#    ${GLUT_INCLUDE_DIRS}
-#)
-
 add_executable(svd
    svd.cpp
 )
 target_link_libraries(svd shared_settings shared_code)

-#add_executable(diffusion
-#    diffusion.cpp
-#)
-#target_link_libraries(diffusion sim)
+add_executable(levmar
+    levenberg_marquardt.cpp
+)
+target_link_libraries(levmar shared_settings shared_code)
--- a/_code/pset_07/cpp/levenberg_marquardt.cpp
+++ b/_code/pset_07/cpp/levenberg_marquardt.cpp
+#include "xorshift.h"
+#include <Eigen/Dense>
+#include <iostream>
+#include <random>
+#include <vector>
+
+using namespace std;
+using namespace Eigen;
+
+//--------------------------------------------------------------------------------------------------
+XorShift64 rg1;
+std::random_device rd{};
+std::mt19937 rg2{rd()};
+
+//--------------------------------------------------------------------------------------------------
+Eigen::ArrayXd draw_n_uniform(uint32_t n) {
+    Eigen::ArrayXd result(n);
+    for (uint32_t i = 0; i < n; ++i) {
+        result[i] = rg1.draw_double();
+    }
+    return result;
+}
+
+//--------------------------------------------------------------------------------------------------
+// TODO Box-Muller transform my uniform samples instead of using STL
+Eigen::ArrayXd draw_n_normal(uint32_t n, double std_deviation) {
+    std::normal_distribution<> normal{0, std_deviation};
+    Eigen::ArrayXd result(n);
+    for (uint32_t i = 0; i < n; ++i) {
+        result[i] = normal(rg2);
+    }
+    return result;
+}
+
+//--------------------------------------------------------------------------------------------------
+int main() {
+    uint32_t constexpr n_warmup = 1000;
+    for (uint32_t i = 0; i < n_warmup; ++i) {
+        rg1.advance();
+    }
+
+    uint32_t constexpr n_samples = 100;
+    double const std_deviation = 0.1;
+    Eigen::ArrayXd samples = draw_n_uniform(n_samples);
+    Eigen::ArrayXd errors = draw_n_normal(n_samples, std_deviation);
+    //Eigen::ArrayXd values = sin(2 + 3 * samples) + errors;
+    Eigen::ArrayXd values = sin(2 + 3 * samples);
+
+    double a = 1;
+    double b = 1;
+    double lambda = 1e-3;
+    constexpr bool vary_lambda = false;
+    double error = (values - sin(a + b * samples)).matrix().squaredNorm();
+    double new_error;
+    Eigen::Vector2d grad;
+    Eigen::Matrix2d mat;
+    Eigen::ArrayXd tmp(n_samples);
+    Eigen::Vector2d step;
+    std::cout << "error: " << error << '\n';
+
+    for (uint32_t i = 0; i < 100; ++i) {
+        tmp = (values - sin(a + b * samples)) * cos(a + b * samples);
+        grad[0] = -2 * tmp.sum();
+        grad[1] = -2 * (samples * tmp).sum();
+        tmp = values * sin(a + b * samples) + cos(2 * (a + b * samples));
+        mat(0, 0) = tmp.sum() * (1 + lambda);
+        mat(0, 1) = (samples * tmp).sum();
+        mat(1, 0) = mat(0, 1);
+        mat(1, 1) = (samples * samples * tmp).sum() * (1 + lambda);
+
+        // Levenber-Marquardt
+        //step = -mat.inverse() * grad;
+        // gradient descent
+        step = -lambda * grad;
+        a += step[0];
+        b += step[1];
+        new_error = (values - sin(a + b * samples)).matrix().squaredNorm();
+
+        if (vary_lambda) {
+            if (new_error < error) {
+                lambda *= 0.8;
+            } else {
+                lambda *= 1.2;
+            }
+        }
+        error = new_error;
+
+        std::cout << "error: " << error << '\n';
+    }
+
+    std::cout << "a = " << a << ", b = " << b << '\n';
+}
--- a/_code/pset_07/py/hessian.py
+++ b/_code/pset_07/py/hessian.py
+from sympy import *
+
+
+a = Symbol("a", real=True)
+b = Symbol("b", real=True)
+x = Symbol("x", real=True)
+y = Symbol("y", real=True)
+
+error = (y - sin(a + b * x))**2
+
+error_da = simplify(diff(error, a))
+error_db = simplify(diff(error, b))
+print(latex(error_da))
+print(latex(error_db))
+
+error_daa = simplify(diff(error_da, a))
+error_dab = simplify(diff(error_da, b))
+error_dba = simplify(diff(error_db, a))
+error_dbb = simplify(diff(error_db, b))
+print(latex(error_daa))
+print(latex(error_dab))
+print(latex(error_dba))
+print(latex(error_dbb))
--- a/_psets/07.md
+++ b/_psets/07.md
@@ -15,6 +15,53 @@ My code is
 [here](https://gitlab.cba.mit.edu/erik/nmm_2020_site/-/tree/master/_code/pset_07/cpp/svd.cpp). I
 found $$a = 2.10697$$ and $$b = 2.91616$$.

+## 2
+
+Generate 100 points $$x$$ uniformly distributed between 0 and 1, and let $$y = \sin(2 + 3x) +
+\zeta$$, where $$\zeta$$ is a Gaussian random variable with a standard deviation of 0.1. Write a
+Levenberg-Marquardt routine to fit $$y = \sin(a + bx)$$ to this data set starting from $$a = b = 1$$
+(remembering that the second-derivative term can be dropped in the Hessian), and investigate the
+convergence for both fixed and adaptively adjusted $$\lambda$$ values.
+
+We want to minimize
+
+$$
+\chi^2(a, b) = \sum_{i = 1}^N \left( y_i - \sin{a + b x_i} \right)^2
+$$
+
+The first derivatives of this function are
+
+$$
+\begin{aligned}
+\frac{\partial \chi^2(a, b)}{\partial a} &= -2 \sum_{i = 1}^N \left(y_i - \sin{\left(a + b x_i \right)}\right) \cos{\left(a + b x_i \right)} \\
+\frac{\partial \chi^2(a, b)}{\partial b} &= -2 \sum_{i = 1}^N x_i \left(y_i - \sin{\left(a + b x_i \right)}\right) \cos{\left(a + b x_i \right)}
+\end{aligned}
+$$
+
+The second derivatives are
+
+$$
+\begin{aligned}
+\frac{\partial^2 \chi^2(a, b)}{\partial a^2} &= 2 \sum_{i = 1}^N \left( y_i \sin{\left(a + b x_i \right)} + \cos{\left(2 (a + b x_i) \right)} \right) \\
+\frac{\partial^2 \chi^2(a, b)}{\partial a \partial b} &= 2 \sum_{i = 1}^N x_i \left(y_i \sin{\left(a + b x_i \right)} + \cos{\left(2 (a + b x_i) \right)}\right) \\
+\frac{\partial^2 \chi^2(a, b)}{\partial b^2} &= 2 \sum_{i = 1}^N x_i^2 \left(y_i \sin{\left(a + b x_i \right)} + \cos{\left(2 (a + b x_i) \right)}\right) \\
+\end{aligned}
+$$
+
+Using these we can construct our matrix $$\bold{M}$$.
+
+$$
+\bold{M} =
+\begin{bmatrix}
+\Large{\frac{1}{2} \frac{\partial^2 \chi^2}{\partial a^2}} \small{(1 + \lambda)} & \Large{\frac{1}{2} \frac{\partial^2 \chi^2}{\partial a \partial b}} \\
+\Large{\frac{1}{2} \frac{\partial^2 \chi^2}{\partial b \partial a}}  & \Large{\frac{1}{2} \frac{\partial^2 \chi^2}{\partial b^2}} \small{(1 + \lambda)}
+\end{bmatrix}
+$$
+
+My implementation lives
+[here](https://gitlab.cba.mit.edu/erik/nmm_2020_site/-/tree/master/_code/pset_07/cpp/levenberg_marquardt.cpp).
+
+
 ## 3

 {:.question}