def gaussian(x,mu=0.,sigma=1.): return exp(-(x-mu)**2/(2.*sigma**2))/(sigma*sqrt(2.*pi)) x=arange(-5,5.1,.1) y=gaussian(x) plot(x,y) xticks(range(-4,5)) xlim(-4,4); def q(xmax): return 2*.0001*sum(gaussian(arange(0,xmax,.0001))) [(sigma,round(q(sigma),5)) for sigma in range(1,5)] figure(figsize=(8,4)) x=arange(-5,5.1,.1) y=gaussian(x) plot(x,y) fill_between(x,y,color='blue',where=abs(x)<3.01) fill_between(x,y,color='#00AAAA',where=abs(x)<2.01) fill_between(x,y,color='cyan',where=abs(x)<1.01) for i,iy,pct in (1,.235,' 68'),(2,.05,' 95'),(3,.02,'99.7'): annotate(pct+'%',xytext=(i+(.4 if i < 3 else .46),iy),xy=(.4,iy), arrowprops={'arrowstyle':"<-"},ha='center',va='center',fontsize=14) annotate('$\pm'+str(i)+'\sigma$',xy=(-(i+.05),iy),xytext=(0,iy), arrowprops={'arrowstyle':"->"},ha='center',va='center',fontsize=15) xticks(range(-4,5)) xlim(-4,4); def p(xmax): return .0001*sum(gaussian(arange(xmax,10,.0001))) round(p(1.645),4) x=arange(-5,5.1,.05) y=gaussian(x) plot(x,y) fill_between(x,y,color='r',where=x>1.64) annotate('$1.645\sigma$',xytext=(.5,.08),xy=(1.645,.08), arrowprops=dict(arrowstyle="->"),ha='center',va='center',fontsize=15) annotate('5%',xytext=(2.5,.08),xy=(2,.02), arrowprops=dict(arrowstyle="->"),ha='center',va='center',fontsize=14) xticks(arange(-4,5)); xlim(-4,4); N=100 ps=rand(N) print ps m=sum(ps) s=sqrt(sum(ps*(1-ps))) print '\n sum = ',m,', sqrt of sum of variances=',s trials = 100000 def rtrial(): return sum(rand(N) < ps) results = [rtrial() for t in xrange(trials)] print mean(results),std(results) figure(figsize=(8,4)) hist(results,arange(-.5,101),label='data'); xg=arange(int(m-4*s)+1,int(m+4*s),.1) yg = gaussian(xg,m,s) plot(xg,trials*yg,'r-',label='gaussian',linewidth=1.5) plot((m,m),(0,trials*gaussian(m,m,s)),'y--') annotate('',xytext=(m-s,trials*gaussian(m-s,m,s)),xy=(m+s,trials*gaussian(m+s,m,s)), arrowprops=dict(arrowstyle="<->")) title('data fit by gaussian with predicted mean={}, stdev={}'.format(round(m,1),round(s,1))) xlim(int(m-4*s)+1,int(m+4*s)); def height(n): return [6 if tall else 5.5 for tall in random.random(n) > .5] N=100 trials=100000 results = [mean(height(N)) for t in xrange(trials)] print mean(results),'expected mean',5.75 print std(results),'expected std',1/40. step=.005 hist(results,arange(5.5-step/2,6.1,step),label='data'); xg=arange(5.6,6,step/5) yg=step*trials*gaussian(xg,5.75,1./40) plot(xg,yg,'r') xlabel('average height of '+str(N)+' bimodal people') xlim(5.65,5.85); def rn(p,n): return sum(random.random(n) <= p) trials=100000 results = [rn(.08,194) for t in xrange(trials)] print mean(results),'expected',.08*194 print std(results),'expected',sqrt(.08*.92*194) hist(results,bins=arange(-.5,36)) xg=arange(0.,35.,.1) yg=trials*gaussian(xg,.08*194,sqrt(.08*.92*194)) plot(xg,yg,'r-') for b in 3.5,7.5,23.5,27.5: plot((b,b),(0,trials/20),'k--') from random import normalvariate normalvariate(0,1),normalvariate(100,20) trials=1000 xdata=arange(-4,4,.1) plot(xdata,trials*.1*gaussian(xdata),'r') hist([normalvariate(0,1) for t in xrange(trials)],bins=xdata); #same thing: #hist(norm.rvs(0,1,trials),bins=xdata) from scipy.stats import norm #http://docs.scipy.org/doc/scipy-0.14.0/reference/generated/scipy.stats.norm.html #has some other useful methods norm.rvs(0,1,10) from urllib2 import urlopen from IPython.display import Image Image(urlopen('https://courses.cit.cornell.edu/info2950_2014fa/resources/ci90.gif').read()) p=.55 n=1000 j=0 #png counter sigma=sqrt(p*(1-p)/n) ci90=1.645*sigma #90% confidence interval h=gaussian(0,0,sigma) #value of gaussian at peak x=arange(p-6.5*sigma,p+6.5*sigma,.001) for q in arange(p-ci90,p+ci90+.001,2*ci90/23): figure(figsize=(6,4)) errorbar(p, 27.5, xerr=ci90, fmt='go', label='$\pm1.645\sigma$') y=gaussian(x,q,sigma) #centered at q, std=sigma plot(x,y,'b',label='$\sigma={:.3f}$'.format(sigma)) fill_between(x,y,color='b',alpha=.2,where=abs(x-q)<1.65*sigma) ch=gaussian(p,q,sigma) #value of q-centered gaussian at p=.55 vlines((p-ci90,p,p+ci90),0,(h,ch,h),color='g',linestyles='--') plot(p,ch,'go') for pl in (.524, .576): text(pl,.5,pl,ha='center') xlim(p-6.5*sigma,p+6.5*sigma),ylim(0,29) title('90% confidence interval (n={0:}, k={1:}, $\sigma=\sqrt{{{2:}\cdot{3:}/{0:}}}$)'.format(n,int(p*n),p,1-p)) # label='' on the fill_between is broken, so a non-visible hack for the legend: fill((0,.1,0),(5.1,5.0,4.9), color='b', alpha=.2,label='$\pm1.645\sigma$') legend(numpoints=1) savefig('pngs/img{:03d}.png'.format(j)) j+=1 close(); ## then convert -delay 20 -loop 0 img*.png ci90.gif