import pylab as py
import emcee
style_text="""
axes.titlesize : 24
axes.labelsize : 20
lines.linewidth : 3
lines.markersize : 5
xtick.labelsize : 20
ytick.labelsize : 20
figure.figsize : 10,8
axes.grid: True
grid.linestyle: -
grid.color: 0.75
font.size: 20
font.family: sans-serif
legend.fontsize: 20
legend.frameon: False
legend.numpoints: 1
legend.scatterpoints: 1
lines.solid_capstyle: round
text.color: .15
xtick.color: .15
ytick.color: .15
xtick.direction: out
ytick.direction: out
axes.axisbelow: True
# these are tableau20 colors
axes.color_cycle: 1f77b4, aec7e8,ff7f0e,ffbb78,2ca02c,98df8a,d62728,ff9896,9467bd,c5b0d5,8c564b,c49c94,e377c2,f7b6d2,7f7f7f,c7c7c7,bcbd22,dbdb8d,17becf,9edae5
"""
with open('mystyle.mplstyle','w') as fid:
fid.write(style_text)
py.style.use('mystyle.mplstyle')
def histogram(y,bins=50,plot=True):
N,bins=np.histogram(y,bins)
dx=bins[1]-bins[0]
if dx==0.0: # all in 1 bin!
val=bins[0]
bins=np.linspace(val-abs(val),val+abs(val),50)
N,bins=np.histogram(y,bins)
dx=bins[1]-bins[0]
x=bins[0:-1]+(bins[1]-bins[0])/2.0
y=N*1.0/np.sum(N)/dx
if plot:
py.plot(x,y,'o-')
yl=py.gca().get_ylim()
py.gca().set_ylim([0,yl[1]])
xl=py.gca().get_xlim()
if xl[0]<=0 and xl[0]>=0:
py.plot([0,0],[0,yl[1]],'k--')
return x,y
import time
def time2str(tm):
frac=tm-int(tm)
tm=int(tm)
s=''
sc=tm % 60
tm=tm//60
mn=tm % 60
tm=tm//60
hr=tm % 24
tm=tm//24
dy=tm
if (dy>0):
s=s+"%d d, " % dy
if (hr>0):
s=s+"%d h, " % hr
if (mn>0):
s=s+"%d m, " % mn
s=s+"%.2f s" % (sc+frac)
return s
def timeit(reset=False):
global _timeit_data
try:
_timeit_data
except NameError:
_timeit_data=time.time()
if reset:
_timeit_data=time.time()
else:
return time2str(time.time()-_timeit_data)
from scipy.special import gammaln,gamma
def tpdf(x,df,mu,sd):
t=(x-mu)/float(sd)
return gamma((df+1)/2.0)/sqrt(df*pi)/gamma(df/2.0)/sd*(1+t**2/df)**(-(df+1)/2.0)
def logtpdf(x,df,mu,sd):
try:
N=len(x)
except TypeError:
N=1
t=(x-mu)/float(sd)
return N*(gammaln((df+1)/2.0)-0.5*log(df*np.pi)-gammaln(df/2.0)-np.log(sd))+(-(df+1)/2.0)*sum(np.log(1+t**2/df))
def loguniformpdf(x,mn,mx):
if mn < x < mx:
return np.log(1.0/(mx-mn))
return -np.inf
def logjeffreyspdf(x):
if x>0.0:
return -np.log(x)
return -np.inf
def lognormalpdf(x,mn,sig):
# 1/sqrt(2*pi*sigma^2)*exp(-x^2/2/sigma^2)
try:
N=len(x)
except TypeError:
N=1
return -0.5*log(2*np.pi*sig**2)*N - sum((x-mn)**2/sig**2/2.0)
def logexponpdf(x,scale):
if x<=0:
return -np.inf
return -np.log(scale)-x/scale
class BESTModel(object):
def __init__(self,y1,y2):
self.data=array(y1,float),array(y2,float)
pooled=np.concatenate((y1,y2))
self.S=np.std(pooled)
self.M=np.mean(pooled)
self.names=['mu1','mu2','sigma1','sigma2','nu']
self.params=[]
self.params_dict={}
def initial_value(self):
return array([self.M,self.M,self.S,self.S,10])
def prior(self,theta):
mu1,mu2,sd1,sd2,nu=theta
value=0.0
value+=lognormalpdf(mu1,self.M,1000*self.S)
value+=lognormalpdf(mu2,self.M,1000*self.S)
mn=0.001*self.S
mx=1000*self.S
value+=loguniformpdf(sd1,mn,mx-mn)
value+=loguniformpdf(sd2,mn,mx-mn)
value+=logexponpdf(nu-1,scale=29)
return value
def run_mcmc(self,iterations=1000,burn=0.1):
# Set up the sampler.
ndim, nwalkers = len(self), 100
val=self.initial_value()
pos=emcee.utils.sample_ball(val, .05*val+1e-4, size=nwalkers)
self.sampler = emcee.EnsembleSampler(nwalkers, ndim, model)
timeit(reset=True)
print("Running MCMC...")
self.sampler.run_mcmc(pos, iterations)
print("Done.")
print timeit()
burnin = int(self.sampler.chain.shape[1]*burn)
samples = self.sampler.chain[:, burnin:, :]
self.mu1=samples[:,:,0]
self.mu2=samples[:,:,1]
self.sigma1=samples[:,:,2]
self.sigma2=samples[:,:,3]
self.nu=samples[:,:,4]
self.params=[self.mu1,self.mu2,self.sigma1,self.sigma2,self.nu]
self.params_dict['mu1']=self.mu1
self.params_dict['mu2']=self.mu2
self.params_dict['sigma1']=self.sigma1
self.params_dict['sigma2']=self.sigma2
self.params_dict['nu']=self.nu
def __len__(self):
return 5 # mu1,mu2,sd1,sd2,nu
def likelihood(self,theta):
mu1,mu2,sd1,sd2,nu=theta
y1,y2=self.data
value=0.0
value+=logtpdf(y1,nu,mu1,sd1)
value+=logtpdf(y2,nu,mu2,sd2)
return value
def plot_chains(self,S,*args,**kwargs):
mu1,mu2,sigma1,sigma2,nu=self.params
N=float(prod(mu1.shape))
if '=' in S:
name,expr=S.split('=')
value=eval(expr)
else:
name=S
if name=='mu1':
name=r"\mu_1"
elif name=='mu2':
name=r"\mu_2"
elif name=='sigma1':
name=r"\sigma_1"
elif name=='sigma2':
name=r"\sigma_2"
elif name=='nu':
name=r"\nu"
else:
name=r"%s" % name
value=eval(S)
plot(value, color="k",alpha=0.02,**kwargs)
if "\\" in name:
ylabel("$"+name+"$")
else:
ylabel(name)
def plot_distribution(self,S,p=95):
pp=[(100-p)/2.0,50,100-(100-p)/2.0]
mu1,mu2,sigma1,sigma2,nu=self.params
N=float(prod(mu1.shape))
if '=' in S:
name,expr=S.split('=')
value=eval(expr)
else:
name=S
if name=='mu1':
name=r"\hat{\mu}_1"
elif name=='mu2':
name=r"\hat{\mu}_2"
elif name=='sigma1':
name=r"\hat{\sigma}_1"
elif name=='sigma2':
name=r"\hat{\sigma}_2"
elif name=='nu':
name=r"\hat{\nu}"
else:
name=r"\hat{%s}" % name
value=eval(S)
result=histogram(value.ravel(),bins=200)
v=np.percentile(value.ravel(), pp ,axis=0)
if r"\hat" in name:
title(r'$%s=%.3f^{+%.3f}_{-%.3f}$' % (name,v[1],(v[2]-v[1]),(v[1]-v[0])),va='bottom')
else:
title(r'%s$=%.3f^{+%.3f}_{-%.3f}$' % (name,v[1],(v[2]-v[1]),(v[1]-v[0])),va='bottom')
def P(self,S):
mu1,mu2,sigma1,sigma2,nu=self.params
N=float(prod(mu1.shape))
result=eval('sum(%s)/N' % S)
return result
def posterior(self,theta):
prior = self.prior(theta)
if not np.isfinite(prior):
return -np.inf
return prior + self.likelihood(theta)
def __call__(self,theta):
return self.posterior(theta)
drug = (101,100,102,104,102,97,105,105,98,101,100,123,105,103,100,95,102,106,
109,102,82,102,100,102,102,101,102,102,103,103,97,97,103,101,97,104,
96,103,124,101,101,100,101,101,104,100,101)
placebo = (99,101,100,101,102,100,97,101,104,101,102,102,100,105,88,101,100,
104,100,100,100,101,102,103,97,101,101,100,101,99,101,100,100,
101,100,99,101,100,102,99,100,99)
model=BESTModel(drug,placebo)
model.run_mcmc()
Running MCMC... Done. 5.82 s
for name,sample in zip(model.names,model.params):
figure(figsize=(12,5))
model.plot_chains(name)
for name,sample in zip(model.names,model.params):
figure(figsize=(12,5))
model.plot_distribution(name)
figure(figsize=(12,5))
model.plot_distribution('Difference of Means=mu1-mu2')
figure(figsize=(12,5))
model.plot_distribution('Difference of Std Devs=sigma1-sigma2')
figure(figsize=(12,5))
model.plot_distribution('Effect Size=(mu1-mu2)/sqrt((sigma1**2+sigma2**2)/2)')
model.P('(mu1>101) & (mu1<102.1)')
0.85502222222222224
model.P('sigma1<sigma2')
0.0060111111111111112