Commit 9f17e634 authored by Erik Strand's avatar Erik Strand

Add eleventh pset

parents 6a7bb75e 1817db9e
import numpy as np
import math
import matplotlib.pyplot as plt
N = 100
noise_size = 0.1
x = np.random.uniform(-5.0, 5.0, N)
y = np.tanh(x) + np.random.normal(0.0, noise_size, N)
# number of clusters
M = 3
# prevents variances from shrinking to zero
tiny = 0.1
# initialize input distribution means and variances
input_means = np.random.uniform(-5.0, 5.0, M)
input_vars = np.array([ 1.0 ] * M)
# define input distributions p(x | c_m)
def input_dist(x, m):
mu = input_means[m]
var = input_vars[m]
scale = 1 / (np.sqrt(2.0 * math.pi * var))
return scale * np.exp(-(x - mu)**2.0 / (2 * var))
#print("input probs at 0")
#print([input_dist(0.0, i) for i in range(M)])
# initialize function parameters
beta = []
for _ in range(M):
beta.append(np.random.uniform(-2.0, 2.0, 2))
# initialize functions
def cluster_func(x, m):
return beta[m][0] + beta[m][1] * x
# initialize output variances
out_var = [ 1.0 ] * M
# define output distributions p(y | x, c_m)
def output_dist(y, x, m):
mu = cluster_func(x, m)
var = out_var[m]
scale = 1 / (np.sqrt(2.0 * math.pi * var))
return scale * np.exp(-(y - mu)**2.0 / (2 * var))
#print("input probs for 0 -> 0")
#print([output_dist(0.0, 0.0, i) for i in range(M)])
# initialize cluster probabilities p(c_m)
p_c = [ 1.0 / M ] * M
# define posterior distributions p(c_m | y, x)
# TODO denominator is shared, can calculate once each cycle
def posterior(m, y, x):
numerator = output_dist(y, x, m) * input_dist(x, m) * p_c[m]
denominator = sum( output_dist(y, x, i) * input_dist(x, i) * p_c[i] for i in range(M))
return numerator / denominator
#print("posterior(cluster 0 | y = 0, x = 0)")
#print(posterior(0, 0.0, 0.0))
# define cluster weighted expectation helper
# values should be an N long array (your quantity of interest evaluated for all samples)
def cw_expectation(values, m):
numerator = sum(values[n] * posterior(m, y[n], x[n]) for n in range(N))
denominator = N * p_c[m]
# equivalently, could use
#denominator = sum(posterior(m, y[n], x[n]) for n in range(N))
return numerator / denominator
def print_everything():
print("================================================================================")
print("cluster probs p(c_m)")
print(p_c)
print("input_means")
print(input_means)
print("input_variances")
print(input_vars)
print("beta (function parameters)")
print(beta)
print("output variances")
print(out_var)
def plot_name(i):
return f'{i:03}'
def make_plot(name):
# make plot
fig1 = plt.figure()
left, bottom, width, height = 0.1, 0.1, 0.8, 0.8
ax1 = fig1.add_axes([left, bottom, width, height])
ax1.set_ylim(-2.0, 2.0)
# add samples
ax1.scatter(x, y)
# add local linear models
for m in range(M):
x_min = input_means[m] - 2.0 * np.sqrt(input_vars[m])
x_max = input_means[m] + 2.0 * np.sqrt(input_vars[m])
ax1.plot([x_min, x_max], [cluster_func(x_min, m), cluster_func(x_max, m)], "k-")
#plt.show(fig1)
fig1.savefig(f"../../assets/img/11_cwm_{name}.png", transparent=True)
plt.close()
def em_step():
# update cluster probabilities
# TODO posterior should operate on vectors directly
for m in range(M):
p_c[m] = (1.0 / N) * sum(posterior(m, y[n], x[n]) for n in range(N))
# update input means
# TODO do this computation as a vector thing
for m in range(M):
input_means[m] = cw_expectation(x, m)
# update input variances
for m in range(M):
input_vars[m] = cw_expectation((x - input_means[m])**2.0, m) + tiny
# update functions
for m in range(M):
a = np.array([cw_expectation(y, m), cw_expectation(x * y, m)])
#print("a")
#print(a)
B = np.array([
[1.0, input_means[m]],
[input_means[m], cw_expectation(x**2.0, m)]
])
#print("B")
#print(B)
beta[m] = np.linalg.inv(B).dot(a)
# update output variances
for m in range(M):
predicted = np.array([cluster_func(x[n], m) for n in range(N)])
out_var[m] = cw_expectation((y - predicted)**2.0, m) + tiny
print_everything()
make_plot(plot_name(0))
for i in range(50):
em_step()
make_plot(plot_name(i + 1))
print_everything()
---
title: Problem Set 11 (Density Estimation)
---
## 1
{:.question}
Generate a data set by sampling 100 points from $$y = \tanh(x)$$ between $$x = −5$$ to 5, adding
random noise with a magnitude of 0.25 to $$y$$. Fit it with cluster-weighted modeling, using a local
linear model for the clusters, $$y = a + bx$$. Plot the data and the resulting forecast $$\langle y
| x \rangle$$, uncertainty $$\langle \sigma_y^2 | x \rangle$$, and cluster probabilities $$p(x|c_m)
p(cm)$$ as the number of clusters is increased (starting with one), and animate their evolution
during the EM iterations.
My code lives [here](https://gitlab.cba.mit.edu/erik/nmm_2020_site/-/tree/master/_code/pset_11).
Here's a video of 50 EM iterations with three clusters. I'm displaying the local linear models over
a $$4 \sigma$$ range. (I don't indicate the output variance in any way, would be nice to add this.)
<video width="480" height="320" controls="controls" muted plays-inline>
<source type="video/mp4" src="../assets/mp4/11_cwm.mp4">
</video>
Not surprisingly, for tanh three clusters seems to work best. Any less and the fit degrades, any
more and some clusters end up with needlessly similar local models.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment