import pymc as mc
import pandas
import numpy as np
cpus = pandas.read_csv('cpus.csv') # from R's MASS package - available in gist c212194ecbd2ee050192
cpus.head()
name | syct | mmin | mmax | cach | chmin | chmax | perf | estperf | |
---|---|---|---|---|---|---|---|---|---|
0 | ADVISOR 32/60 | 125 | 256 | 6000 | 256 | 16 | 128 | 198 | 199 |
1 | AMDAHL 470V/7 | 29 | 8000 | 32000 | 32 | 8 | 32 | 269 | 253 |
2 | AMDAHL 470/7A | 29 | 8000 | 32000 | 32 | 8 | 32 | 220 | 253 |
3 | AMDAHL 470V/7B | 29 | 8000 | 32000 | 32 | 8 | 32 | 172 | 253 |
4 | AMDAHL 470V/7C | 29 | 8000 | 16000 | 32 | 8 | 16 | 132 | 132 |
with mc.Model() as model:
mc.glm.glm('perf ~ syct + mmin + mmax + cach + chmin + chmax', cpus)
trace = mc.sample(200, mc.NUTS())
[-----------------100%-----------------] 200 of 200 complete in 1.9 sec
class Laplace(mc.glm.families.Family):
sm_family = mc.glm.families.Gaussian # TODO: check into the correct sm_family
link = mc.glm.families.Identity
likelihood = mc.Laplace
parent = 'mu'
priors = {'b': ('sigma', mc.Uniform.dist(0, 100))}
with mc.Model() as model_laplace:
mc.glm.glm('perf ~ syct + mmin + mmax + cach + chmin + chmax', cpus, family=Laplace())
trace_laplace = mc.sample(200, mc.NUTS())
# note: quite slow, so only 200 samples for now
[-----------------100%-----------------] 201 of 200 complete in 9597.9 sec
for v in trace.varnames:
print '{v}:\n\tNormal:\t{normal}\n\tLaplace:\t{laplace}'.format(v=v, normal=np.mean(trace.samples[v].vals), laplace=np.mean(trace_laplace.samples[v].vals))
Intercept: Normal: -55.8440754817 Laplace: -20.2074181573 syct: Normal: 0.0486123276765 Laplace: 0.0153899700386 mmin: Normal: 0.0154247020762 Laplace: 0.0155863267598 mmax: Normal: 0.00556234346402 Laplace: 0.00346221718791 cach: Normal: 0.635439823768 Laplace: 0.567473469829 chmin: Normal: -0.276933454985 Laplace: 2.08713603599 chmax: Normal: 1.49145192411 Laplace: 0.360122961875 sigma: Normal: 60.0888469755 Laplace: 37.9504574424