import matplotlib.pyplot as plt
import numpy as np
import scipy.stats
n = 100
probs = np.array([0.1, 0.2, 0.3, 0.4])
assert probs.sum() == 1.0
binomials = [scipy.stats.binom(n, p) for p in probs]
mults = np.random.multinomial(n, probs, size=10000)
xs = np.linspace(0, 100, num=1000)
# Plot the exact PMF of each binomial distribution in gray.
for i in xrange(len(probs)):
binom = binomials[i]
ys = [binom.pmf(int(x)) for x in xs]
plt.fill_between(xs, 0, ys, linewidth=0, color='black', alpha=0.5)
# Plot the histogram of each category from the multinomial distribution.
for i, prob in enumerate(probs):
label = 'Category {0}, Probability {1}'.format(i + 1, prob)
plt.hist(mults[:, i], range=(0, n), normed=True, bins=n, alpha=0.5, label=label)
plt.xlim(0, 60)
plt.legend()
plt.title('Histogram of each category based on 10,000 multinomial samples.\n'
'Plotted on top of the exact PDFs of the individual binomial distributions.')
<matplotlib.text.Text at 0xae3482c>