How well does the sum of 12 Uniform[0, 1] variates minus 6 approximate a Normal(0, 1) deviate?
import numpy as np
import matplotlib.pyplot as plt
def uniform_sums(size):
def sum_of_12():
return sum(np.random.uniform(size=12)) - 6
return [sum_of_12() for i in range(size)]
num_draws = 1000000
uniforms = uniform_sums(size=num_draws)
normal = np.random.normal(size=num_draws)
bins = np.linspace(-6, 6, num=100)
plt.hist(uniforms, label='sums of 12', bins=bins, histtype='step')
plt.hist(normal, label='normal', bins=bins, histtype='step')
plt.legend();
If $X_i \sim U[0, 1] $ and $\ Y_i = X_i - 1/2$, then $Y_i \sim U[-1/2, 1/2]$ and $\varphi(Y_i) = \int_{-1/2}^{1/2} e^{i t y} dy = \int_{-1/2}^{1/2} \cos(ty) dy = \frac{2}{t} \sin\frac{t}{2},$
so if $Y = \sum_{i = 1}^{12} Y_i$ for independent $Y_i$, then $\varphi(Y) = \left( \frac{2}{t} \sin\frac{t}{2} \right)^{12}.$
Use the inversion formula to produce a way to numerically evaluate the p.d.f. of Y:
$$p_Y(y) = \frac{1}{2\pi} \int_{-\infty}^\infty e^{-ity} \left( \frac{2}{t} \sin\frac{t}{2} \right)^{12} dt = \frac{1}{2\pi} \int_{-\infty}^\infty \cos tx \left( \frac{2}{t} \sin\frac{t}{2} \right)^{12} dt$$import scipy.integrate
import scipy.stats
def inverse_transform(x):
def integrand(t):
return 1 / (2 * np.pi) * (2 * np.sin(t/2) / t)**12 * np.cos(t*x)
p, _ = scipy.integrate.quad(integrand, -np.infty, np.infty)
return p
sum_of_uniform_pdf = np.vectorize(inverse_transform)
norm_pdf = scipy.stats.norm().pdf
xs, step= np.linspace(-6, 6, num=999, retstep=True)
ps_uniform_integration = sum_of_uniform_pdf(xs)
ps_normal = norm_pdf(xs)
Confirm that the p.d.f. produced is properly normalized:
ps_uniform_integration.sum() * step
0.99999999921025085
Visualize the absolute difference between this distribution and a real N(0, 1) distiribution.
plt.plot(xs, ps_uniform_integration, label="sum of uniforms")
plt.plot(xs, ps_normal, label="N(0, 1)")
plt.legend()
plt.title("Both pdfs");
plt.plot(xs, ps_uniform_integration - ps_normal)
plt.title('Absolute difference');
xs, step = np.linspace(-6, 6, num=999, retstep=True)
uniform = np.array([1. if -0.5 <= x <= 0.5 else 0. for x in xs])
ps_uniform_convolution = uniform
for i in range(11):
ps_uniform_convolution = scipy.convolve(ps_uniform_convolution, uniform, mode='same')
ps_uniform_convolution /= ps_uniform_convolution.sum() * step
plt.plot(xs, ps_uniform_convolution - ps_uniform_integration);
Uh oh - the two method don't exactly agree.
We can use the closed form expression of the Irwin-Hall p.d.f. as ground truth.
def irwin_hall(x, n):
x = x + n / 2
return 1. / (2. * scipy.misc.factorial(n - 1)) * sum((-1)**k * scipy.misc.comb(n, k) * (x - k)**(n - 1) * np.sign(x - k) for k in range(n + 1))
ps_uniform_exact = irwin_hall(xs, 12)
plt.plot(xs, ps_uniform_exact - ps_uniform_convolution);
plt.plot(xs, ps_uniform_exact - ps_uniform_integration);
Can anyone figure out how to make the convolution more accurate?
What is the expected number of independent Uniform[0, 1] deviates you need to sum to exceed 1?
(I learned about this from http://nbviewer.ipython.org/4731599/.)
def how_many_to_exceed(threshold):
total = 0
count = 0
while total < threshold:
total += np.random.uniform()
count += 1
return count
num_draws = 100000
draws = [how_many_to_exceed(1) for i in range(num_draws)]
running_average = np.cumsum(draws) / (np.arange(num_draws) + 1.)
plt.semilogx(running_average, label='Average draws')
plt.axhline(np.e, color='red', linestyle='--', label='e = {:.8f}'.format(np.e))
plt.ylim(1, 4)
plt.legend();