%matplotlib inline import matplotlib.pyplot as plt # Some nice default configuration for plots plt.rcParams['figure.figsize'] = 10, 7.5 plt.rcParams['axes.grid'] = True plt.gray() import numpy as np import pandas as pd # a simulated sin wave X = np.random.uniform(2, 10, 100) y = np.sin(X) + np.random.normal(0, 0.2, 100) # high bias estimate est_bias = np.zeros(100) est_bias[:50] = np.mean(y[np.argsort(X)[:50]]) est_bias[50:] = np.mean(y[np.argsort(X)[50:]]) # high variance estimate def movingaverage(values, window): '''calculate simple moving average''' weigths = np.repeat(1.0, window)/window #including valid will REQUIRE there to be enough datapoints. #for example, if you take out valid, it will start @ point one, #not having any prior points, so itll be 1+0+0 = 1 /3 = .3333 smas = np.convolve(values, weigths, 'valid') return smas est_var = movingaverage(y[np.argsort(X)], 3) # MA(3) plt.scatter(X, y) # plot high bias estimate plt_bias, = plt.plot(np.insert(X[np.argsort(X)], 50, np.nan), np.insert(est_bias, 50, np.nan), # insert discontinuous point color='g', linewidth=2) # plot high variance estimate plt_var, = plt.plot(X[np.argsort(X)][2:], est_var, color='r') plt.xlabel("Predictor") plt.ylabel("Outcome") plt.legend([plt_bias, plt_var], ['High bias model', 'High variance model'])