def gaussian(x,mu=0.,sigma=1.):
    return exp(-(x-mu)**2/(2.*sigma**2))/(sigma*sqrt(2.*pi))

x=arange(-5,5.1,.1)
y=gaussian(x)
plot(x,y)
xticks(range(-4,5))
xlim(-4,4);

def q(xmax): return 2*.0001*sum(gaussian(arange(0,xmax,.0001)))

[(sigma,round(q(sigma),5)) for sigma in range(1,5)]

figure(figsize=(8,4))
x=arange(-5,5.1,.1)
y=gaussian(x)
plot(x,y)
fill_between(x,y,color='blue',where=abs(x)<3.01)
fill_between(x,y,color='#00AAAA',where=abs(x)<2.01)
fill_between(x,y,color='cyan',where=abs(x)<1.01)
for i,iy,pct in (1,.235,' 68'),(2,.05,' 95'),(3,.02,'99.7'):
  annotate(pct+'%',xytext=(i+(.4 if i < 3 else .46),iy),xy=(.4,iy),
      arrowprops={'arrowstyle':"<-"},ha='center',va='center',fontsize=14)
  annotate('$\pm'+str(i)+'\sigma$',xy=(-(i+.05),iy),xytext=(0,iy),
      arrowprops={'arrowstyle':"->"},ha='center',va='center',fontsize=15)
xticks(range(-4,5))
xlim(-4,4);

def p(xmax): return .0001*sum(gaussian(arange(xmax,10,.0001)))

round(p(1.645),4)

x=arange(-5,5.1,.05)
y=gaussian(x)
plot(x,y)
fill_between(x,y,color='r',where=x>1.64)
annotate('$1.645\sigma$',xytext=(.5,.08),xy=(1.645,.08),
      arrowprops=dict(arrowstyle="->"),ha='center',va='center',fontsize=15)
annotate('5%',xytext=(2.5,.08),xy=(2,.02),
      arrowprops=dict(arrowstyle="->"),ha='center',va='center',fontsize=14)
xticks(arange(-4,5));
xlim(-4,4);

N=100
ps=rand(N)
print ps
m=sum(ps)
s=sqrt(sum(ps*(1-ps)))
print '\n sum = ',m,', sqrt of sum of variances=',s

trials = 100000
def rtrial(): return sum(rand(N) < ps)
results = [rtrial() for t in xrange(trials)]
print mean(results),std(results)

figure(figsize=(8,4))
hist(results,arange(-.5,101),label='data');
xg=arange(int(m-4*s)+1,int(m+4*s),.1)
yg = gaussian(xg,m,s)
plot(xg,trials*yg,'r-',label='gaussian',linewidth=1.5)
plot((m,m),(0,trials*gaussian(m,m,s)),'y--')
annotate('',xytext=(m-s,trials*gaussian(m-s,m,s)),xy=(m+s,trials*gaussian(m+s,m,s)),
      arrowprops=dict(arrowstyle="<->"))
title('data fit by gaussian with predicted mean={}, stdev={}'.format(round(m,1),round(s,1)))
xlim(int(m-4*s)+1,int(m+4*s));

def height(n):
    return [6 if tall else 5.5 for tall in random.random(n) > .5]

N=100
trials=100000
results = [mean(height(N)) for t in xrange(trials)]

print mean(results),'expected mean',5.75
print std(results),'expected std',1/40.

step=.005
hist(results,arange(5.5-step/2,6.1,step),label='data');
xg=arange(5.6,6,step/5)
yg=step*trials*gaussian(xg,5.75,1./40)
plot(xg,yg,'r')
xlabel('average height of '+str(N)+' bimodal people')
xlim(5.65,5.85);

def rn(p,n): return sum(random.random(n) <= p)

trials=100000
results = [rn(.08,194) for t in xrange(trials)]

print mean(results),'expected',.08*194
print std(results),'expected',sqrt(.08*.92*194)

hist(results,bins=arange(-.5,36))
xg=arange(0.,35.,.1)
yg=trials*gaussian(xg,.08*194,sqrt(.08*.92*194))
plot(xg,yg,'r-')
for b in 3.5,7.5,23.5,27.5: plot((b,b),(0,trials/20),'k--')

from random import normalvariate
normalvariate(0,1),normalvariate(100,20)

trials=1000
xdata=arange(-4,4,.1)
plot(xdata,trials*.1*gaussian(xdata),'r')
hist([normalvariate(0,1) for t in xrange(trials)],bins=xdata);
#same thing:
#hist(norm.rvs(0,1,trials),bins=xdata)

from scipy.stats import norm
#http://docs.scipy.org/doc/scipy-0.14.0/reference/generated/scipy.stats.norm.html
#has some other useful methods

norm.rvs(0,1,10)

from urllib2 import urlopen
from IPython.display import Image
Image(urlopen('https://courses.cit.cornell.edu/info2950_2014fa/resources/ci90.gif').read())

p=.55
n=1000
j=0  #png counter
sigma=sqrt(p*(1-p)/n)
ci90=1.645*sigma  #90% confidence interval
h=gaussian(0,0,sigma) #value of gaussian at peak
x=arange(p-6.5*sigma,p+6.5*sigma,.001)
for q in arange(p-ci90,p+ci90+.001,2*ci90/23):
  figure(figsize=(6,4))
  errorbar(p, 27.5, xerr=ci90, fmt='go', label='$\pm1.645\sigma$')
  y=gaussian(x,q,sigma) #centered at q, std=sigma
  plot(x,y,'b',label='$\sigma={:.3f}$'.format(sigma))
  fill_between(x,y,color='b',alpha=.2,where=abs(x-q)<1.65*sigma)
  ch=gaussian(p,q,sigma) #value of q-centered gaussian at p=.55
  vlines((p-ci90,p,p+ci90),0,(h,ch,h),color='g',linestyles='--')
  plot(p,ch,'go')
  for pl in (.524, .576): text(pl,.5,pl,ha='center')
  xlim(p-6.5*sigma,p+6.5*sigma),ylim(0,29)
  title('90% confidence interval (n={0:}, k={1:}, $\sigma=\sqrt{{{2:}\cdot{3:}/{0:}}}$)'.format(n,int(p*n),p,1-p))
  # label='' on the fill_between is broken, so a non-visible hack for the legend:
  fill((0,.1,0),(5.1,5.0,4.9), color='b', alpha=.2,label='$\pm1.645\sigma$') 
  legend(numpoints=1)
  savefig('pngs/img{:03d}.png'.format(j))
  j+=1
  close();
## then convert -delay 20 -loop 0 img*.png ci90.gif