See http://ivory.idyll.org/blog/2013-pycon-awesome-big-data-algorithms-talk.html

In [7]:
# Flip a lot of coins;
# Use the distribution of longest run of zeros to infer how many
# coinflips were done, ex post facto.
import random
def generate_coinflips(num):
return [ random.choice([0,1]) for i in range(num) ]

def longest_run_zero(x):
i = 0
count = 0
max_count = 0
while i < len(x):
if x[i] == 0:
count += 1
else:
if count > max_count:
max_count = count
count = 0
i += 1

return max_count
In [8]:
def longest_run_mc(runsize, num):
z = []
for i in range(num):
x = generate_coinflips(runsize)
count = longest_run_zero(x)
z.append(count)

return z
In [9]:
# do 100 runs of 1000 coinflips, and plot the distribution
d = longest_run_mc(100, 1000)
hist(d, bins=max(d), range=(0, max(d)))

# we expect a peak right around...
print math.log(100, 2)
6.64385618977

In [10]:
# if we do 500 coinflips, the peak shifts right, to...
d = longest_run_mc(500, 1000)
hist(d, bins=max(d), range=(0, max(d)))
print math.log(500, 2)
8.96578428466