import numpy as np import pymc as pm import matplotlib.pyplot as plt %matplotlib inline #We'll use this next function to show the graphical relations from IPython.display import Image ?norm from scipy.stats import bernoulli,norm truemean1=100.0 truemean2=200.0 truestd=25.0 truep=0.25 n=2000 data=np.zeros(n) for i in xrange(len(data)): p = bernoulli.rvs(truep) data[i]=p*norm.rvs(truemean1, truestd)+(1-p)*norm.rvs(truemean2, truestd) plt.hist(data); p = pm.Uniform("p", 0, 1) assignment = pm.Categorical("assignment", [p, 1 - p], size=np.shape(data)[0]) centers = pm.Normal("centers", [110, 210], [10**-2, 10**-2], size=2) taus = 1.0 / pm.Uniform("stds", 0, 100, size=2) ** 2 @pm.deterministic def center_i(assignment=assignment, centers=centers): return centers[assignment] @pm.deterministic def tau_i(assignment=assignment, taus=taus): return taus[assignment] # OK. Here's the final summary line representing our model that relates the centers and taus to the data # We assume that observations, the data, can be explained by the mixture model observations = pm.Normal("obs", center_i, tau_i, value=data, observed=True) ?pm.Normal # create a model class model = pm.Model([p, assignment, observations, taus, centers]) pm.graph.dag(pm.MCMC(model)).write_png('dag.png') Image('dag.png') #map1 = pm.MAP(model) #map1.fit() #stores the fitted variables' values in foo.value mcmc = pm.MCMC(model) #now use MCMC's sample method mcmc.sample(50000) from IPython.core.pylabtools import figsize figsize(12.5, 9) plt.subplot(311) lw = 1 center_trace = mcmc.trace("centers")[:] # for pretty colors. colors = ["#348ABD", "#A60628"] \ if center_trace[-1, 0] > center_trace[-1, 1] \ else ["#A60628", "#348ABD"] plt.plot(center_trace[:, 0], label="trace of center 0", c=colors[0], lw=lw) plt.plot(center_trace[:, 1], label="trace of center 1", c=colors[1], lw=lw) plt.title("Traces of unknown parameters") leg = plt.legend(loc="upper right") leg.get_frame().set_alpha(0.7) plt.subplot(312) std_trace = mcmc.trace("stds")[:] plt.plot(std_trace[:, 0], label="trace of standard deviation of cluster 0", c=colors[0], lw=lw) plt.plot(std_trace[:, 1], label="trace of standard deviation of cluster 1", c=colors[1], lw=lw) plt.legend(loc="upper left") plt.subplot(313) p_trace = mcmc.trace("p")[:] plt.plot(p_trace, label="$p$: frequency of assignment to cluster 0", color="#467821", lw=lw) plt.xlabel("Steps") plt.ylim(0, 1) plt.legend() mcmc.sample(100000) figsize(12.5, 4) center_trace = mcmc.trace("centers", chain=1)[:] prev_center_trace = mcmc.trace("centers", chain=0)[:] x = np.arange(50000) plt.plot(x, prev_center_trace[:, 0], label="previous trace of center 0", lw=lw, alpha=0.4, c=colors[1]) plt.plot(x, prev_center_trace[:, 1], label="previous trace of center 1", lw=lw, alpha=0.4, c=colors[0]) x = np.arange(50000, 150000) plt.plot(x, center_trace[:, 0], label="new trace of center 0", lw=lw, c="#348ABD") plt.plot(x, center_trace[:, 1], label="new trace of center 1", lw=lw, c="#A60628") plt.title("Traces of unknown center parameters") leg = plt.legend(loc="upper right") leg.get_frame().set_alpha(0.8) plt.xlabel("Steps") print mcmc.trace('centers')[:,0].mean(),mcmc.trace('centers')[:,1].mean(),mcmc.trace('p')[:].mean() print truemean1, truemean2, truep ?pm.MCMC.sample from pymc.Matplot import plot as mcplot mcplot(mcmc.trace("centers", 2), common_scale=False) pm.Matplot.plot(p) from IPython.display import YouTubeVideo YouTubeVideo("XbxIo7ScVzc") YouTubeVideo("WESld11iNcQ")