import matplotlib.pyplot as plt
import numpy as np
import scipy.stats
def get_distributions(n, p):
"""Get a binomial distribution and its approximating normal distribution."""
binom = scipy.stats.binom(n, p)
mu = n * p
sigma = np.sqrt(n * p * (1.0 - p))
normal = scipy.stats.norm(loc=mu, scale=sigma)
return binom, normal
def binom_approx_pdf(n, normal_dist):
"""Approximate binomial PDF based on the given normal distribution."""
cdf = normal_dist.cdf
return cdf(n + 0.5) - cdf(n - 0.5)
vectorized_binom_approx_pdf = np.vectorize(binom_approx_pdf)
Visualize the difference between the binomial and its normal approximation for single values of n and p.
n = 20
p = 0.3
x_int = np.array(range(0, n + 1))
binom, normal = get_distributions(n, p)
y_binom = binom.pmf(x_int)
y_approx = vectorized_binom_approx_pdf(x_int, normal)
# Plot the normal PDF.
x = np.linspace(-1, n + 1, num=1000)
y_norm = normal.pdf(x)
plt.plot(x, y_norm, label='Normal')
# Plot the binomial PDF.
plot_x = np.array([-1] + list(x_int) + [n + 1]) + 0.5
plot_y_binom = np.array([0] + list(y_binom) + [0])
plt.step(plot_x, plot_y_binom, label='Binomial(n=20, p=0.3)', hold=True)
# Plot the approximate binomial PDF based on the normal distribution.
plot_y_approx = np.array([0] + list(y_approx) + [0])
plt.step(plot_x, plot_y_approx, label='Binomial Approximation', hold=True)
plt.legend()
<matplotlib.legend.Legend at 0xb61a1ac>
From the same data as plotted above, show the absolute differences between the binomial and its normal approximation at each value.
abs_diffs = abs(y_binom - y_approx)
plt.step(plot_x, [0] + list(abs_diffs) + [0], label='Absolute diff. between exact and approx')
plt.legend()
<matplotlib.legend.Legend at 0xb70efac>
Print the index and the value of where the difference between the exact and approximate PDF differs the most
max_idx = np.argmax(abs_diffs)
max_diff = abs_diffs[max_idx]
print 'Max diff {0} at {1}'.format(max_diff, max_idx)
Max diff 0.00956993285131 at 4