%matplotlib inline import numpy as np import matplotlib.pyplot as plt from scipy import stats # use seaborn plotting defaults import seaborn as sns; sns.set() def linear_data_sample(N=40, rseed=0, m=3, b=-2): rng = np.random.RandomState(rseed) x = 10 * rng.rand(N) dy = m / 2 * (1 + rng.rand(N)) y = m * x + b + dy * rng.randn(N) return (x, y, dy) x, y, dy = linear_data_sample() plt.errorbar(x, y, dy, fmt='o'); plt.plot(x, y, 'o'); def model(theta, x): b, m = theta return m * x + b def abs_deviation(theta, x, y): return np.sum(abs(model(theta, x) - y)) def square_deviation(theta, x, y): return np.sum((model(theta, x) - y) ** 2) from scipy.optimize import fmin theta_guess = [0, 1] theta_LAD = fmin(abs_deviation, theta_guess, args=(x, y)) theta_LS = fmin(square_deviation, theta_guess, args=(x, y)) xfit = np.linspace(0, 10) plt.plot(x, y, 'o') plt.plot(xfit, model(theta_LAD, xfit), label='Least Abs. Dev.') plt.plot(xfit, model(theta_LS, xfit), label='Least Sq.') plt.legend(loc='best'); ydiff = np.linspace(-4, 4, 1000) plt.plot(ydiff, stats.norm.pdf(ydiff, 0, 1)) plt.xlabel('$y - y_{true}$', size=18) plt.ylabel('$p(y|y_{true})$', size=18); X = np.vstack([np.ones_like(x), x]).T theta_hat = np.linalg.solve(np.dot(X.T, X), np.dot(X.T, y)) print(theta_hat) print(theta_LS) xfit = np.linspace(0, 10) plt.plot(x, y, 'o') plt.plot(xfit, model(theta_hat, xfit), label='Least Sq.') plt.legend(loc='best'); XT_Sigma_X = np.dot(X.T / dy ** 2, X) XT_Sigma_y = np.dot(X.T / dy ** 2, y) theta_LS_err = np.linalg.solve(XT_Sigma_X, XT_Sigma_y) plt.errorbar(x, y, dy, fmt='o') plt.plot(xfit, model(theta_LAD, xfit), label='Least Abs. Dev.') plt.plot(xfit, model(theta_LS, xfit), label='Least Sq.') plt.plot(xfit, model(theta_LS_err, xfit), label='Fit w/ errors') plt.legend(loc='best'); def linear_data_sample_big_errs(N=40, rseed=0, m=3, b=-2): """Generate data with some large errors""" rng = np.random.RandomState(rseed) x = 10 * rng.rand(N) dy = m / 2 * (1 + rng.rand(N)) dy[20:25] *= 10 y = m * x + b + dy * rng.randn(N) return (x, y, dy) x, y, dy = linear_data_sample_big_errs() plt.errorbar(x, y, dy, fmt='o'); theta_LAD = fmin(abs_deviation, theta_guess, args=(x, y)) X = np.vstack([np.ones_like(x), x]).T theta_LS = np.linalg.solve(np.dot(X.T, X), np.dot(X.T, y)) theta_LS_err = np.linalg.solve(np.dot(X.T / dy ** 2, X), np.dot(X.T / dy ** 2, y)) plt.errorbar(x, y, dy, fmt='o') plt.ylim(-20, 40) plt.plot(xfit, model(theta_LAD, xfit), label='Least Abs. Dev.') plt.plot(xfit, model(theta_LS, xfit), label='Least Sq.') plt.plot(xfit, model(theta_LS_err, xfit), label='Least Sq. w/ errors') plt.legend(loc='best');