import seaborn seaborn.set() colors = seaborn.color_palette() import moss import random import numpy as np import scipy, scipy.stats from numpy.matlib import repmat n_obs = 100 X = column_stack((randn(n_obs, 4), ones(n_obs))) w = rand(5) noise = randn(100) * 5 y = dot(X, w) + noise ols_fit = lambda X, y: dot(dot(inv(dot(X.T, X)), X.T), y) w_ols = ols_fit(X, y) bar(arange(5) + .1, w, .4, label="actual weights") bar(arange(5) + .5, w_ols, .4, color=colors[1], label="estimated weights"); X = column_stack((randn(1000, 4), ones(1000))) w = rand(5) # Large sample high noise; N = 1000, noise std = 5 y_large_n = dot(X, w) + randn(1000) * 5 # Small sample low noise; high signal to noise y_lownoise = dot(X[:100], w) + randn(100) # Small sample, high noise y_noisy = dot(X[:100], w) + randn(100) * 5 # Bootstrap n_boot = 1000 w_boot1 = moss.bootstrap(X, y_large_n, n_boot=n_boot, func=ols_fit) w_boot2 = moss.bootstrap(X[:100], y_lownoise, n_boot=n_boot, func=ols_fit) w_boot3 = moss.bootstrap(X[:100], y_noisy, n_boot=n_boot, func=ols_fit) w_model1 = mean(w_boot1, axis=0) w_model2 = mean(w_boot2, axis=0) w_model3 = mean(w_boot3, axis=0) barx = linspace(0, 1, 6)[:-1] models = [w, w_model1, w_model2, w_model3] cis = [None] + map(seaborn.ci_to_errsize, [ci1, ci2, ci3], models[1:]) for i, model in enumerate(models): bar(barx + i, model, 0.2, yerr=cis[i], color=colors[i], ecolor="gray") xticks([.5, 1.5, 2.5, 3.5], ["model", "large N", "low noise", "noisy"]); mas = [] for n_boots in xrange(100): w_boot = moss.bootstrap(X, y_large_n, n_boot=n_boot, func=ols_fit) w_est_boot = [] for indices in w_boot: boot_X = X[indices, :] boot_y = y[indices] w_est = w_ols w_est_boot.append(w_est.tolist()) w_est_boot = np.array(w_est_boot) w_end = w_est_boot - repmat(w_est_boot.mean(axis=0), w_est_boot.shape[0], 1) cov_estimation = np.dot(np.transpose(w_end), w_end) / (n_boots - 1) diff = np.linalg.norm(cov_estimation - cov) mas.append(diff) plt.plot(range(2, 100), difference_bootstrap, 'g-') plt.legend(('n_boots', 'Bootstrap'))