from __future__ import division
from numpy import array, linspace, random
from scipy.stats import bernoulli, norm
from matplotlib import cm
from matplotlib.pylab import figure, subplots
#random.seed(101) # set random seed for reproducibility
mua_true=4 # we are trying to estimate this from the data
mub_true=7 # we are trying to estimate this from the data
fa=norm(mua_true,1) # distribution for group A
fb=norm(mub_true,1) # distribution for group B
fz=bernoulli(0.25) # each group equally likely 

def sample(n=10):
    'simulate picking from each group n times'
    tmp=fz.rvs(n) # choose n of the coins, A or B
    return tmp*(fb.rvs(n))+(1-tmp)*fa.rvs(n) # flip it n times

xs = sample(1000) # generate some samples

f,ax = subplots()
x = linspace(mua_true-2,mub_true+2,100)
ax.plot(x,fa.pdf(x),label='group A')
ax.plot(x,fb.pdf(x),label='group B')
ax.hist(xs,bins=50,normed=1,label='Samples');
ax.legend(loc=0);

import sympy
from sympy.abc import x, z
from sympy import stats

mu_a,mu_b = sympy.symbols('mu_a,mu_b')
na=stats.Normal( 'x', mu_a,1)
nb=stats.Normal( 'x', mu_b,1)

L=(stats.density(na)(x)+stats.density(nb)(x))/2 # incomplete likelihood function 

def ez(x,mu_a,mu_b): # expected value of hidden variable
  return norm(mu_a).pdf(x) / ( norm(mu_a).pdf(x) + norm(mu_b).pdf(x) )

Lf=sympy.lambdify((x,mu_a,mu_b), sympy.log(abs(L)),'numpy') # convert to numpy function from sympy

def run():
    out, lout = [], []
    mu_a_n=random.random() * 10 # initial guess
    mu_b_n=random.random() * 10 # initial guess
    for i in range(20): # iterations of expectation and maximization
        tau=ez(xs,mu_a_n,mu_b_n)                 # expected value of z-variable
        lout.append( sum(Lf(xs,mu_a_n,mu_b_n)) ) # save incomplete likelihood value (should be monotone)
        out.append((mu_a_n, mu_b_n))             # save of (pa,pb) steps
        mu_a_n=( sum(tau*xs) / sum(tau) )        # new maximum likelihood estimate of pa
        mu_b_n=( sum((1-tau) * xs) / sum(1-tau) )
    return out, lout

out, lout = run()

fig=figure()
fig.set_figwidth(12)
ax=fig.add_subplot(121)
ax.plot(array(out),'o-')
ax.legend(('mu_a','mu_b'),loc=0)
ax.hlines([mua_true,mub_true],0,len(out),['r','g'])
ax.set_xlabel('iteration',fontsize=18)
ax.set_ylabel('$\mu_a,\mu_b$ values',fontsize=24)
ax=fig.add_subplot(122)
ax.plot(array(lout),'o-')
ax.set_xlabel('iteration',fontsize=18)
ax.set_title('Incomplete likelihood',fontsize=16)

out, lout = run()
mua_step=linspace(0,10,30)
mub_step=linspace(0,10,20)
z=Lf(xs,mua_step[:,None],mub_step[:,None,None]).sum(axis=2) # numpy broadcasting
fig=figure(figsize=(8,5))
ax=fig.add_subplot(111)
p=ax.contourf(mua_step,mub_step,z,30,cmap=cm.gray)
xa,xb=zip(*out) # unpack the container from the previous block
ax.plot(xa,xb,'ro')                                    # points per iteration in red
ax.plot(mua_true,mub_true,'bs')                        # true values in blue
ax.plot(xa[0],xb[0],'gx',ms=15.,mew=2.)                # starting point in green
ax.text(xa[0],xb[0],'   start',color='g',fontsize=11.)
ax.set_xlabel('$\mu_a$',fontsize=24)
ax.set_ylabel('$\mu_b$',fontsize=24)
ax.set_title('Incomplete Likelihood',fontsize=18)
fig.colorbar(p);