In [58]:

import matplotlib.pyplot as plt
import numpy as np
import scipy.interpolate
import scipy.stats

In [59]:

# Collect a ton of samples (takes ~20 minutes to run!).
num_samples=100000000
results = np.zeros(num_samples)
for i in xrange(num_samples):
    results[i] = np.random.random(12).sum() - 6

In [78]:

n, bins, _ = plt.hist(results, normed=True, histtype='step', bins=250)
plt.title("Histogram of sample data")

Out[78]:

<matplotlib.text.Text at 0xe00d3cc>

In [79]:

# Smooth out the histogram data with interpolation.
x_old = bins[:-1] + (bins[1:] - bins[:-1]) / 2.0  # midpoints of the bins
y_old = n  # number of points in the bins
x_new = np.linspace(-8, 8, num=100000)
y_new = scipy.interpolate.spline(x_old, y_old, x_new)
plt.title("Interpolated sample data")
plt.plot(x_new, y_new)

Out[79]:

[<matplotlib.lines.Line2D at 0xe00782c>]

In [80]:

# Get the PDF of the normal distribution with mean 0 and variance 1.
pdf = scipy.stats.norm().pdf
y_norm = pdf(x_new)

In [81]:

# Plot the normal PDF minus the sample PDF. Values of y > 0 show where
# the normal distribution is higher than the sample distribution.
plt.plot(x_new, y_norm - y_new, label='N(0,1) - samples')
plt.legend()

Out[81]:

<matplotlib.legend.Legend at 0xe2532cc>