Commit 9a174bad authored by Erik Strand's avatar Erik Strand

Quick writeup

parent 92a660b9
import numpy as np
import math
import matplotlib.pyplot as plt
N = 100
noise_size = 0.1
x = np.random.uniform(-5.0, 5.0, N)
y = np.tanh(x) + np.random.normal(0.0, 0.25, N)
y = np.tanh(x) + np.random.normal(0.0, noise_size, N)
# number of clusters
M = 1
M = 3
# prevents variances from shrinking to zero
tiny = 0.0
tiny = 0.1
# initialize input distribution means and variances
input_means = np.random.uniform(-5.0, 5.0, M)
print("input_means")
print(input_means)
input_vars = np.array([ 1.0 ] * M)
print("input_vars")
print(input_vars)
# define input distributions p(x | c_m)
def input_dist(x, m):
......@@ -25,13 +23,13 @@ def input_dist(x, m):
var = input_vars[m]
scale = 1 / (np.sqrt(2.0 * math.pi * var))
return scale * np.exp(-(x - mu)**2.0 / (2 * var))
print("input probs at 0")
print([input_dist(0.0, i) for i in range(M)])
#print("input probs at 0")
#print([input_dist(0.0, i) for i in range(M)])
# initialize function parameters
beta = [ np.array([0, 1]) ] * M
print("beta")
print(beta)
beta = []
for _ in range(M):
beta.append(np.random.uniform(-2.0, 2.0, 2))
# initialize functions
def cluster_func(x, m):
......@@ -39,8 +37,6 @@ def cluster_func(x, m):
# initialize output variances
out_var = [ 1.0 ] * M
print("output variances")
print(out_var)
# define output distributions p(y | x, c_m)
def output_dist(y, x, m):
......@@ -48,13 +44,11 @@ def output_dist(y, x, m):
var = out_var[m]
scale = 1 / (np.sqrt(2.0 * math.pi * var))
return scale * np.exp(-(y - mu)**2.0 / (2 * var))
print("input probs for 0 -> 0")
print([output_dist(0.0, 0.0, i) for i in range(M)])
#print("input probs for 0 -> 0")
#print([output_dist(0.0, 0.0, i) for i in range(M)])
# initialize cluster probabilities p(c_m)
p_c = [ 1.0 / M ] * M
print("cluster probs p(c_m)")
print(p_c)
# define posterior distributions p(c_m | y, x)
# TODO denominator is shared, can calculate once each cycle
......@@ -62,8 +56,8 @@ def posterior(m, y, x):
numerator = output_dist(y, x, m) * input_dist(x, m) * p_c[m]
denominator = sum( output_dist(y, x, i) * input_dist(x, i) * p_c[i] for i in range(M))
return numerator / denominator
print("posterior(cluster 0 | y = 0, x = 0)")
print(posterior(0, 0.0, 0.0))
#print("posterior(cluster 0 | y = 0, x = 0)")
#print(posterior(0, 0.0, 0.0))
# define cluster weighted expectation helper
# values should be an N long array (your quantity of interest evaluated for all samples)
......@@ -74,52 +68,80 @@ def cw_expectation(values, m):
#denominator = sum(posterior(m, y[n], x[n]) for n in range(N))
return numerator / denominator
def em_step():
def print_everything():
print("================================================================================")
print("cluster probs p(c_m)")
print(p_c)
print("input_means")
print(input_means)
print("input_variances")
print(input_vars)
print("beta (function parameters)")
print(beta)
print("output variances")
print(out_var)
def plot_name(i):
return f'{i:03}'
def make_plot(name):
# make plot
fig1 = plt.figure()
left, bottom, width, height = 0.1, 0.1, 0.8, 0.8
ax1 = fig1.add_axes([left, bottom, width, height])
ax1.set_ylim(-2.0, 2.0)
# add samples
ax1.scatter(x, y)
# add local linear models
for m in range(M):
x_min = input_means[m] - 2.0 * np.sqrt(input_vars[m])
x_max = input_means[m] + 2.0 * np.sqrt(input_vars[m])
ax1.plot([x_min, x_max], [cluster_func(x_min, m), cluster_func(x_max, m)], "k-")
#plt.show(fig1)
fig1.savefig(f"../../assets/img/11_cwm_{name}.png", transparent=True)
plt.close()
def em_step():
# update cluster probabilities
# TODO posterior should operate on vectors directly
for m in range(M):
p_c[m] = (1.0 / N) * sum(posterior(m, y[n], x[n]) for n in range(N))
print("cluster probs p(c_m)")
print(p_c)
# update input means
# TODO do this computation as a vector thing
for m in range(M):
input_means[m] = cw_expectation(x, m)
print("input_means")
print(input_means)
# update input variances
for m in range(M):
input_vars[m] = cw_expectation((x - input_means)**2.0, m) + tiny
print("input_vars")
print(input_vars)
input_vars[m] = cw_expectation((x - input_means[m])**2.0, m) + tiny
# update functions
for m in range(M):
a = np.array([cw_expectation(y, m), cw_expectation(x * y, m)])
print("a")
print(a)
#print("a")
#print(a)
B = np.array([
[1.0, input_means[m]],
[input_means[m], cw_expectation(x**2.0, m)]
])
print("B")
print(B)
#print("B")
#print(B)
beta[m] = np.linalg.inv(B).dot(a)
print("beta")
print(beta)
# update output variances
for m in range(M):
predicted = np.array([cluster_func(x[n], m) for n in range(N)])
out_var[m] = cw_expectation((y - predicted)**2.0, m) + tiny
print("output variances")
print(out_var)
for _ in range(10):
make_plot(plot_name(0))
print_everything()
for i in range(50):
em_step()
make_plot(plot_name(i + 1))
print_everything()
......@@ -11,3 +11,11 @@ linear model for the clusters, $$y = a + bx$$. Plot the data and the resulting f
| x \rangle$$, uncertainty $$\langle \sigma_y^2 | x \rangle$$, and cluster probabilities $$p(x|c_m)
p(cm)$$ as the number of clusters is increased (starting with one), and animate their evolution
during the EM iterations.
My code lives [here](https://gitlab.cba.mit.edu/erik/nmm_2020_site/-/tree/master/_code/pset_11).
Here's a video of the convergence progress with three clusters.
<video width="480" height="320" controls="controls" muted plays-inline>
<source type="video/mp4" src="../assets/mp4/11_cwm.mp4">
</video>
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment