In [1]:

import matplotlib.pyplot as plt
import numpy as np
import scipy.stats

In [2]:

def get_distributions(n, p):
    """Get a binomial distribution and its approximating normal distribution."""
    binom = scipy.stats.binom(n, p)
    mu = n * p
    sigma = np.sqrt(n * p * (1.0 - p))
    normal = scipy.stats.norm(loc=mu, scale=sigma)
    return binom, normal

def binom_approx_pdf(n, normal_dist):
    """Approximate binomial PDF based on the given normal distribution."""
    cdf = normal_dist.cdf
    return cdf(n + 0.5) - cdf(n - 0.5)

vectorized_binom_approx_pdf = np.vectorize(binom_approx_pdf)

Visualize the difference between the binomial and its normal approximation for single values of n and p.

In [3]:

n = 20
p = 0.3

x_int = np.array(range(0, n + 1))

binom, normal = get_distributions(n, p)
y_binom = binom.pmf(x_int)
y_approx = vectorized_binom_approx_pdf(x_int, normal)

# Plot the normal PDF.
x = np.linspace(-1, n + 1, num=1000)
y_norm = normal.pdf(x)
plt.plot(x, y_norm, label='Normal')

# Plot the binomial PDF.
plot_x = np.array([-1] + list(x_int) + [n + 1]) + 0.5
plot_y_binom = np.array([0] + list(y_binom) + [0])
plt.step(plot_x, plot_y_binom, label='Binomial(n=20, p=0.3)', hold=True)

# Plot the approximate binomial PDF based on the normal distribution.
plot_y_approx = np.array([0] + list(y_approx) + [0])
plt.step(plot_x, plot_y_approx, label='Binomial Approximation', hold=True)

plt.legend()

Out[3]:

<matplotlib.legend.Legend at 0xb61a1ac>

From the same data as plotted above, show the absolute differences between the binomial and its normal approximation at each value.

In [4]:

abs_diffs = abs(y_binom - y_approx)
plt.step(plot_x, [0] + list(abs_diffs) + [0], label='Absolute diff. between exact and approx')
plt.legend()

Out[4]:

<matplotlib.legend.Legend at 0xb70efac>

Print the index and the value of where the difference between the exact and approximate PDF differs the most

In [5]:

max_idx = np.argmax(abs_diffs)
max_diff = abs_diffs[max_idx]
print 'Max diff {0} at {1}'.format(max_diff, max_idx)

Max diff 0.00956993285131 at 4