aheights = [6*12+1]*5 + [6*12]*30 + [5*12+11]*5
bheights = [5*12]*20 + [7*12]*20

print 'means:',mean(aheights),mean(bheights)
print 'variances:',var(aheights),var(bheights)
print 'std deviations:',std(aheights),std(bheights)

figure(figsize=(7,5))
hist(aheights,bins=arange(59.5,90))
hist(bheights,bins=arange(59.5,90))
xlabel('inches'),ylabel('#people with that height')
legend(['mean {}, std {}'.format(mean(d),std(d)) for d in (aheights,bheights)]);

from scipy.misc import comb
def probs(N,p=.5):
   return array([comb(N,k)*p**k*(1-p)**(N-k) for k in range(N+1)])

randint(1,7,10)

data=randint(1,7,1000000)
mean(data),var(data)

hcount=[0 for i in range(11)]
trials = 1000
for j in xrange(trials):
    flips= randint(0,2,size=10)  #a sequence of 10 coin flips
    heads = sum(flips)   #count the number of heads
    hcount[heads]+=1    #accumulate head counts

bar(range(11),hcount,align='center')
plot(trials*probs(10),'ro--')
xticks(range(11))
xlim(-.5,10.5)
xlabel('# heads in ten flips')
ylabel('#trials with that #heads ('+str(trials)+' total)')
legend(['predicted','counts'])
#;

zip([round(x,3) for x in trials*probs(10)],hcount)

randint(0,2,size=10)

trials,flips=100,10
title('{} trials of {} flips'.format(trials,flips))
xlim(-.5,10.5),xticks(range(11))
#        same thing in one line of code
hist([sum(randint(0,2,size=10)) for j in xrange(trials)], bins=arange(-.5,11));

#now switch to trials of 100 flips
trials = 1000
results=[sum(randint(0,2,size=100)) for j in xrange(trials)]

results[:10]   #first ten results

hist(results,bins=range(101))
xlabel('# heads in 100 flips')
ylabel('#trials with that #heads ('+str(trials)+' total)')
#;

std(results),sqrt(100./4)  #compare Var to N*p*(1-p)

mean(results)

x=randint(1,7,size=10) #now roll a die ten times

print x,', sixes:', sum(x==6)

x==6    # tests whether members of array are equal to 6, True=1

def sixes(rolls=100):
    rolls=randint(1,7,size=rolls) #default to 100 rolls
    return sum(rolls==6)  # return the number of sixes

trials = 1000
results = [sixes(100) for j in xrange(trials)]

hist(results,bins=range(101))
xlabel('# sixes in 100 rolls of die')
ylabel('#trials with that #sixes ('+str(trials)+' total)')
#;

print 'mean',mean(results),100./6
print 'std',std(results),sqrt(100*(1/6.)*(5/6.)) #compare to N*p(1-p)

trials,flips=1000,100
hist(randint(0,2,(trials,flips)).sum(1),arange(flips+1));

from random import random
import re
from collections import Counter

tosses= ''
for i in range(100):
    if random() >= .5:
        tosses += 'H'
    else:
        tosses += 'T'

tosses

re.findall('H+|T+',tosses)[:12]

map(len,re.findall('H+|T+',tosses))[:12]

max(map(len,re.findall('H+|T+',tosses)))

def maxlength():
  tosses= ''
  for i in xrange(100):
    if random() >= .5:
        tosses += 'H'
    else:
        tosses += 'T'
  return max(map(len,re.findall('H+|T+',tosses)))

trials = 10000
results = [maxlength() for t in xrange(trials)]

results[:10]

max(results)

counts=Counter(results)

counts

[(x,counts[x]) for x in sorted(counts)]

figure(figsize=(8,6))
hist(results,bins=range(25))
xticks(arange(.5,25.5),range(25));
xlabel('max length string of heads or tails')
ylabel('occurred # times in '+str(trials)+' trials')
#;

cumul=cumsum([counts[i] for i in range(max(counts),-1,-1)])[::-1]

print trials,'trials of 100 flips'
print 'length k',' '*10,'prob longest:'
print ' '*10,'at least k',' '*10, 'exactly k'
for i in range(min(counts),max(counts)+1):
  if i not in counts: counts[i] = 0
  print '{:>2}            {:<20} {:<6}'.format(i,cumul[i]/float(trials),
                                                       counts[i]/10000.)

hist([ max(diff( where(abs(diff(randint(0,2,100)))==1) )[0]) for j in xrange(10000)],arange(-.5,30));