import matplotlib.pyplot as plt, numpy as np, seaborn as sns, \
powerlaw, pymc as pm
%matplotlib inline
# set random seed for reproducibility
np.random.seed(12345)
# simulate data from power law distribution
test = powerlaw.Power_Law(xmin = 1., parameters = [1.5])
simulated = test.generate_random(1000)
# fit with powerlaw package
fit = powerlaw.Fit(simulated, xmin=1.)
# PyMC model with xmin fixed
xmin = 1.
alpha = pm.Exponential('alpha', 1.5)
@pm.stochastic(observed=True)
def power_law(value=simulated, alpha=alpha, xmin=xmin):
return np.sum(np.log((alpha-1) * xmin**(alpha-1) * value**-alpha))
model = pm.MCMC([alpha,xmin,power_law])
model.sample(iter=5000, burn=0, thin=1) # 5000 iterations with 0 burn-in is not enough
[-----------------100%-----------------] 5000 of 5000 complete in 1.1 sec
pm.Matplot.plot(model)
Plotting alpha
# PyMC model with xmin fixed
xmin = 1.
alpha = pm.Exponential('alpha', 1.5)
@pm.stochastic(observed=True)
def power_law(value=simulated, alpha=alpha, xmin=xmin):
return np.sum(np.log((alpha-1) * xmin**(alpha-1) * value**-alpha))
model = pm.MCMC([alpha,xmin,power_law])
model.sample(iter=20000, burn=10000, thin=10) # in this case the burn-in is the important part,
# but I'll run it for longer anyway, and thin to keep
# just 1000 samples for fast computation
[-----------------100%-----------------] 20000 of 20000 complete in 4.2 sec
pm.Matplot.plot(model)
Plotting alpha
xmin = pm.Uninformative('xmin', value=0.1)
alpha = pm.Exponential('alpha', 1.5)
@pm.stochastic(observed=True)
def power_law(value=simulated, alpha=alpha, xmin=xmin):
if value.min() < xmin:
return -np.inf
return np.sum(np.log((alpha-1) * xmin**(alpha-1) * value**-alpha))
model = pm.MCMC([alpha,xmin,power_law])
model.sample(iter=5000, burn=2500)
[-----------------100%-----------------] 5000 of 5000 complete in 1.8 sec
pm.Matplot.plot(model)
Plotting xmin Plotting alpha
# PyMC model with xmin as an additional parameter
xmin = pm.Uniform('xmin', 0., 10., value=1.)
alpha = pm.Uniform('alpha', 1., 10.)
@pm.stochastic(observed=True)
def power_law(value=simulated, alpha=alpha, xmin=xmin):
# xmin must be less than all data values
if xmin > np.min(value):
return -np.inf
return np.sum(np.log(alpha-1) + (alpha-1)*np.log(xmin) + -alpha*np.log(value))
model = pm.MCMC([alpha,xmin,power_law])
model.sample(iter=20000, burn=10000, thin=10)
[-----------------100%-----------------] 20000 of 20000 complete in 4.6 sec
pm.Matplot.plot(model)
Plotting alpha Plotting xmin
print 'alpha:', fit.alpha, 'vs', alpha.trace().mean()
print ' xmin:', fit.xmin, 'vs', xmin.trace().mean()
alpha: 1.49787599133 vs 1.49828919861 xmin: 1.0 vs 0.998160020389