Commit 3739c8ea authored by Erik Strand's avatar Erik Strand

Solve nonlinear least squares via gradient descent

parent 0c1635ea
#add_library(sim
# glut_grapher.cpp
# glut_grapher.h
#)
#target_link_libraries(sim PUBLIC common ${OPENGL_LIBRARIES} ${GLUT_LIBRARY})
#target_include_directories(sim PUBLIC
# ${OPENGL_INCLUDE_DIRS}
# ${GLUT_INCLUDE_DIRS}
#)
add_executable(svd
svd.cpp
)
target_link_libraries(svd shared_settings shared_code)
#add_executable(diffusion
# diffusion.cpp
#)
#target_link_libraries(diffusion sim)
add_executable(levmar
levenberg_marquardt.cpp
)
target_link_libraries(levmar shared_settings shared_code)
#include "xorshift.h"
#include <Eigen/Dense>
#include <iostream>
#include <random>
#include <vector>
using namespace std;
using namespace Eigen;
//--------------------------------------------------------------------------------------------------
XorShift64 rg1;
std::random_device rd{};
std::mt19937 rg2{rd()};
//--------------------------------------------------------------------------------------------------
Eigen::ArrayXd draw_n_uniform(uint32_t n) {
Eigen::ArrayXd result(n);
for (uint32_t i = 0; i < n; ++i) {
result[i] = rg1.draw_double();
}
return result;
}
//--------------------------------------------------------------------------------------------------
// TODO Box-Muller transform my uniform samples instead of using STL
Eigen::ArrayXd draw_n_normal(uint32_t n, double std_deviation) {
std::normal_distribution<> normal{0, std_deviation};
Eigen::ArrayXd result(n);
for (uint32_t i = 0; i < n; ++i) {
result[i] = normal(rg2);
}
return result;
}
//--------------------------------------------------------------------------------------------------
int main() {
uint32_t constexpr n_warmup = 1000;
for (uint32_t i = 0; i < n_warmup; ++i) {
rg1.advance();
}
uint32_t constexpr n_samples = 100;
double const std_deviation = 0.1;
Eigen::ArrayXd samples = draw_n_uniform(n_samples);
Eigen::ArrayXd errors = draw_n_normal(n_samples, std_deviation);
//Eigen::ArrayXd values = sin(2 + 3 * samples) + errors;
Eigen::ArrayXd values = sin(2 + 3 * samples);
double a = 1;
double b = 1;
double lambda = 1e-3;
constexpr bool vary_lambda = false;
double error = (values - sin(a + b * samples)).matrix().squaredNorm();
double new_error;
Eigen::Vector2d grad;
Eigen::Matrix2d mat;
Eigen::ArrayXd tmp(n_samples);
Eigen::Vector2d step;
std::cout << "error: " << error << '\n';
for (uint32_t i = 0; i < 100; ++i) {
tmp = (values - sin(a + b * samples)) * cos(a + b * samples);
grad[0] = -2 * tmp.sum();
grad[1] = -2 * (samples * tmp).sum();
tmp = values * sin(a + b * samples) + cos(2 * (a + b * samples));
mat(0, 0) = tmp.sum() * (1 + lambda);
mat(0, 1) = (samples * tmp).sum();
mat(1, 0) = mat(0, 1);
mat(1, 1) = (samples * samples * tmp).sum() * (1 + lambda);
// Levenber-Marquardt
//step = -mat.inverse() * grad;
// gradient descent
step = -lambda * grad;
a += step[0];
b += step[1];
new_error = (values - sin(a + b * samples)).matrix().squaredNorm();
if (vary_lambda) {
if (new_error < error) {
lambda *= 0.8;
} else {
lambda *= 1.2;
}
}
error = new_error;
std::cout << "error: " << error << '\n';
}
std::cout << "a = " << a << ", b = " << b << '\n';
}
from sympy import *
a = Symbol("a", real=True)
b = Symbol("b", real=True)
x = Symbol("x", real=True)
y = Symbol("y", real=True)
error = (y - sin(a + b * x))**2
error_da = simplify(diff(error, a))
error_db = simplify(diff(error, b))
print(latex(error_da))
print(latex(error_db))
error_daa = simplify(diff(error_da, a))
error_dab = simplify(diff(error_da, b))
error_dba = simplify(diff(error_db, a))
error_dbb = simplify(diff(error_db, b))
print(latex(error_daa))
print(latex(error_dab))
print(latex(error_dba))
print(latex(error_dbb))
......@@ -15,6 +15,53 @@ My code is
[here](https://gitlab.cba.mit.edu/erik/nmm_2020_site/-/tree/master/_code/pset_07/cpp/svd.cpp). I
found $$a = 2.10697$$ and $$b = 2.91616$$.
## 2
Generate 100 points $$x$$ uniformly distributed between 0 and 1, and let $$y = \sin(2 + 3x) +
\zeta$$, where $$\zeta$$ is a Gaussian random variable with a standard deviation of 0.1. Write a
Levenberg-Marquardt routine to fit $$y = \sin(a + bx)$$ to this data set starting from $$a = b = 1$$
(remembering that the second-derivative term can be dropped in the Hessian), and investigate the
convergence for both fixed and adaptively adjusted $$\lambda$$ values.
We want to minimize
$$
\chi^2(a, b) = \sum_{i = 1}^N \left( y_i - \sin{a + b x_i} \right)^2
$$
The first derivatives of this function are
$$
\begin{aligned}
\frac{\partial \chi^2(a, b)}{\partial a} &= -2 \sum_{i = 1}^N \left(y_i - \sin{\left(a + b x_i \right)}\right) \cos{\left(a + b x_i \right)} \\
\frac{\partial \chi^2(a, b)}{\partial b} &= -2 \sum_{i = 1}^N x_i \left(y_i - \sin{\left(a + b x_i \right)}\right) \cos{\left(a + b x_i \right)}
\end{aligned}
$$
The second derivatives are
$$
\begin{aligned}
\frac{\partial^2 \chi^2(a, b)}{\partial a^2} &= 2 \sum_{i = 1}^N \left( y_i \sin{\left(a + b x_i \right)} + \cos{\left(2 (a + b x_i) \right)} \right) \\
\frac{\partial^2 \chi^2(a, b)}{\partial a \partial b} &= 2 \sum_{i = 1}^N x_i \left(y_i \sin{\left(a + b x_i \right)} + \cos{\left(2 (a + b x_i) \right)}\right) \\
\frac{\partial^2 \chi^2(a, b)}{\partial b^2} &= 2 \sum_{i = 1}^N x_i^2 \left(y_i \sin{\left(a + b x_i \right)} + \cos{\left(2 (a + b x_i) \right)}\right) \\
\end{aligned}
$$
Using these we can construct our matrix $$\bold{M}$$.
$$
\bold{M} =
\begin{bmatrix}
\Large{\frac{1}{2} \frac{\partial^2 \chi^2}{\partial a^2}} \small{(1 + \lambda)} & \Large{\frac{1}{2} \frac{\partial^2 \chi^2}{\partial a \partial b}} \\
\Large{\frac{1}{2} \frac{\partial^2 \chi^2}{\partial b \partial a}} & \Large{\frac{1}{2} \frac{\partial^2 \chi^2}{\partial b^2}} \small{(1 + \lambda)}
\end{bmatrix}
$$
My implementation lives
[here](https://gitlab.cba.mit.edu/erik/nmm_2020_site/-/tree/master/_code/pset_07/cpp/levenberg_marquardt.cpp).
## 3
{:.question}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment