import pandas as pd
import numpy as np
import pandas.rpy.common as com
from scipy import stats
from statsmodels.formula.api import ols
# emulates abline function, only possible to basic line styles and width
def abline(a, intercept, gradient, *args, **kwargs):
#a = gca()
xlim = a.get_xlim()
ylim = a.get_ylim()
if args:
sty = args[0]
else:
sty = 'r'
if kwargs:
lw = kwargs['linewidth']
else:
lw = 5
a.plot(xlim, [intercept + gradient * x for x in xlim], sty, linewidth=lw)
a.set_xlim(xlim)
a.set_ylim(ylim)
np.random.seed(333)
x = np.random.normal(size=30)
bootMean = [np.random.choice(x, size=30, replace=True).mean() for i in range(1000)]
sampledMean = [np.random.normal(size=30).mean() for i in range(1000)]
bootDensity = stats.kde.gaussian_kde(bootMean)
sampledDensity = stats.kde.gaussian_kde(sampledMean)
x = np.arange(-0.8, 0.8, .01)
plot(x, bootDensity(x), 'k')
plot(x, sampledDensity(x), 'r');
# this is a bit of a shame
import imp
bootstrap = imp.load_source('scikits.bootstrap', '/Users/erriza/scikits-bootstrap/src/package/scikits/bootstrap/bootstrap.py')
np.random.seed(333)
x = np.random.normal(size=30)
sampledMean = [np.random.normal(size=30).mean() for i in range(1000)]
# this computes the confidence interval
bootstrap.ci(x, statfunction=np.mean, n_samples=100)
/Users/erriza/scikits-bootstrap/src/package/scikits/bootstrap/bootstrap.py:114: InstabilityWarning: Some values used top 10 low/high samples; results may be unstable. warnings.warn("Some values used top 10 low/high samples; results may be unstable.", InstabilityWarning)
array([-0.27856777, 0.29389323])
nuclear = com.load_data('nuclear', package='boot')
nuke_lm = ols('np.log(cost) ~ date', nuclear).fit()
plot(nuclear['date'], np.log(nuclear['cost']), 'ok')
abline(gca(), nuke_lm.params['Intercept'], nuke_lm.params['date'], 'r', linewidth=3)
# bootsrapping explicitly
f, ax = subplots(ncols=3)
for axis in ax:
nuclear0 = nuclear.ix[np.random.choice(range(1, len(nuclear)+1), size=len(nuclear), replace=True)]
nuke_lm0 = ols('np.log(cost) ~ date', nuclear).fit()
axis.plot(nuclear0['date'], np.log(nuclear0['cost']), 'ok')
abline(axis, nuke_lm0.params['Intercept'], nuke_lm0.params['date'], 'r', linewidth=3)
f.set_size_inches(11, 3)
f.tight_layout();