from __future__ import division from numpy import array, linspace, random from scipy.stats import bernoulli, norm from matplotlib import cm from matplotlib.pylab import figure, subplots #random.seed(101) # set random seed for reproducibility mua_true=4 # we are trying to estimate this from the data mub_true=7 # we are trying to estimate this from the data fa=norm(mua_true,1) # distribution for group A fb=norm(mub_true,1) # distribution for group B fz=bernoulli(0.25) # each group equally likely def sample(n=10): 'simulate picking from each group n times' tmp=fz.rvs(n) # choose n of the coins, A or B return tmp*(fb.rvs(n))+(1-tmp)*fa.rvs(n) # flip it n times xs = sample(1000) # generate some samples f,ax = subplots() x = linspace(mua_true-2,mub_true+2,100) ax.plot(x,fa.pdf(x),label='group A') ax.plot(x,fb.pdf(x),label='group B') ax.hist(xs,bins=50,normed=1,label='Samples'); ax.legend(loc=0); import sympy from sympy.abc import x, z from sympy import stats mu_a,mu_b = sympy.symbols('mu_a,mu_b') na=stats.Normal( 'x', mu_a,1) nb=stats.Normal( 'x', mu_b,1) L=(stats.density(na)(x)+stats.density(nb)(x))/2 # incomplete likelihood function def ez(x,mu_a,mu_b): # expected value of hidden variable return norm(mu_a).pdf(x) / ( norm(mu_a).pdf(x) + norm(mu_b).pdf(x) ) Lf=sympy.lambdify((x,mu_a,mu_b), sympy.log(abs(L)),'numpy') # convert to numpy function from sympy def run(): out, lout = [], [] mu_a_n=random.random() * 10 # initial guess mu_b_n=random.random() * 10 # initial guess for i in range(20): # iterations of expectation and maximization tau=ez(xs,mu_a_n,mu_b_n) # expected value of z-variable lout.append( sum(Lf(xs,mu_a_n,mu_b_n)) ) # save incomplete likelihood value (should be monotone) out.append((mu_a_n, mu_b_n)) # save of (pa,pb) steps mu_a_n=( sum(tau*xs) / sum(tau) ) # new maximum likelihood estimate of pa mu_b_n=( sum((1-tau) * xs) / sum(1-tau) ) return out, lout out, lout = run() fig=figure() fig.set_figwidth(12) ax=fig.add_subplot(121) ax.plot(array(out),'o-') ax.legend(('mu_a','mu_b'),loc=0) ax.hlines([mua_true,mub_true],0,len(out),['r','g']) ax.set_xlabel('iteration',fontsize=18) ax.set_ylabel('$\mu_a,\mu_b$ values',fontsize=24) ax=fig.add_subplot(122) ax.plot(array(lout),'o-') ax.set_xlabel('iteration',fontsize=18) ax.set_title('Incomplete likelihood',fontsize=16) out, lout = run() mua_step=linspace(0,10,30) mub_step=linspace(0,10,20) z=Lf(xs,mua_step[:,None],mub_step[:,None,None]).sum(axis=2) # numpy broadcasting fig=figure(figsize=(8,5)) ax=fig.add_subplot(111) p=ax.contourf(mua_step,mub_step,z,30,cmap=cm.gray) xa,xb=zip(*out) # unpack the container from the previous block ax.plot(xa,xb,'ro') # points per iteration in red ax.plot(mua_true,mub_true,'bs') # true values in blue ax.plot(xa[0],xb[0],'gx',ms=15.,mew=2.) # starting point in green ax.text(xa[0],xb[0],' start',color='g',fontsize=11.) ax.set_xlabel('$\mu_a$',fontsize=24) ax.set_ylabel('$\mu_b$',fontsize=24) ax.set_title('Incomplete Likelihood',fontsize=18) fig.colorbar(p);