import matplotlib.pyplot as plt, numpy as np, seaborn as sns, \ powerlaw, pymc as pm %matplotlib inline # set random seed for reproducibility np.random.seed(12345) # simulate data from power law distribution test = powerlaw.Power_Law(xmin = 1., parameters = [1.5]) simulated = test.generate_random(1000) # fit with powerlaw package fit = powerlaw.Fit(simulated, xmin=1.) # PyMC model with xmin fixed xmin = 1. alpha = pm.Exponential('alpha', 1.5) @pm.stochastic(observed=True) def power_law(value=simulated, alpha=alpha, xmin=xmin): return np.sum(np.log((alpha-1) * xmin**(alpha-1) * value**-alpha)) model = pm.MCMC([alpha,xmin,power_law]) model.sample(iter=5000, burn=0, thin=1) # 5000 iterations with 0 burn-in is not enough pm.Matplot.plot(model) # PyMC model with xmin fixed xmin = 1. alpha = pm.Exponential('alpha', 1.5) @pm.stochastic(observed=True) def power_law(value=simulated, alpha=alpha, xmin=xmin): return np.sum(np.log((alpha-1) * xmin**(alpha-1) * value**-alpha)) model = pm.MCMC([alpha,xmin,power_law]) model.sample(iter=20000, burn=10000, thin=10) # in this case the burn-in is the important part, # but I'll run it for longer anyway, and thin to keep # just 1000 samples for fast computation pm.Matplot.plot(model) xmin = pm.Uninformative('xmin', value=0.1) alpha = pm.Exponential('alpha', 1.5) @pm.stochastic(observed=True) def power_law(value=simulated, alpha=alpha, xmin=xmin): if value.min() < xmin: return -np.inf return np.sum(np.log((alpha-1) * xmin**(alpha-1) * value**-alpha)) model = pm.MCMC([alpha,xmin,power_law]) model.sample(iter=5000, burn=2500) pm.Matplot.plot(model) # PyMC model with xmin as an additional parameter xmin = pm.Uniform('xmin', 0., 10., value=1.) alpha = pm.Uniform('alpha', 1., 10.) @pm.stochastic(observed=True) def power_law(value=simulated, alpha=alpha, xmin=xmin): # xmin must be less than all data values if xmin > np.min(value): return -np.inf return np.sum(np.log(alpha-1) + (alpha-1)*np.log(xmin) + -alpha*np.log(value)) model = pm.MCMC([alpha,xmin,power_law]) model.sample(iter=20000, burn=10000, thin=10) pm.Matplot.plot(model) print 'alpha:', fit.alpha, 'vs', alpha.trace().mean() print ' xmin:', fit.xmin, 'vs', xmin.trace().mean()