%pylab inline import numpy as np import pylab as pb import GPy np.random.normal? X = np.random.normal(0, 1, size=(10)) print X X.mean() X.var() means = [] variances = [] samples = [10, 50, 100, 500, 1000, 5000, 10000, 50000, 100000] for n in samples: x = np.random.randn(n) mean = x.mean() variance = (x**2).mean() - mean**2 means.append(mean) variances.append(variance) means = np.asarray(means) variances = np.asarray(variances) samples = np.asarray(samples) plt.semilogx(samples, means) xlabel('$\log_{10}n$') ylabel('mean') plt.semilogx(samples, variances) xlabel('$\log_{10}n$') ylabel('variance') data = GPy.util.datasets.olympic_marathon_men() x = data['X'] y = data['Y'] print(x) print(y) plt.plot(x, y, 'rx') m = -0.4 c = 80 c = (y - m*x).mean() print c m = ((y - c)*x).sum()/(x**2).sum() print m x_test = np.linspace(1890, 2020, 130)[:, None] f_test = m*x_test + c plt.plot(x_test, f_test, 'b-') plt.plot(x, y, 'rx') for i in np.arange(10): m = ((y - c)*x).sum()/(x*x).sum() c = (y-m*x).sum()/y.shape[0] print(m) print(c) f_test = m*x_test + c plt.plot(x_test, f_test, 'b-') plt.plot(x, y, 'rx') X = np.hstack((np.ones_like(x), x)) print(X) np.dot(X.T, X) store = np.zeros((2, 2)) for i in np.arange(X.shape[0]): store += np.outer(X[i, :], X[i, :]) print store # Create a larger matrix for test A = np.random.normal(loc=0.0, scale=1.0, size=(10000, 4)) import time start = time.time() np.dot(A.T, A) end = time.time() print "Matrix multiplication: ", end - start start = time.time() store = np.zeros((A.shape[1], A.shape[1])) for i in np.arange(A.shape[0]): store += np.outer(A[i, :], A[i, :]) end = time.time() print "For loop over outer products: ", end - start np.linalg.solve? w = np.linalg.solve(np.dot(X.T, X), np.dot(X.T, y)) print w m = w[1]; c=w[0] f_test = m*x_test + c print(m) print(c) plt.plot(x_test, f_test, 'b-') plt.plot(x, y, 'rx') Phi = np.hstack([np.ones(x.shape), x, x**2]) w = np.linalg.solve(np.dot(Phi.T, Phi), np.dot(Phi.T, y)) print(w) f_test = w[2]*x_test**2 + w[1]*x_test + w[0] plt.plot(x_test, f_test, 'b-') plt.plot(x, y, 'rx') Phi_test = np.hstack((np.ones_like(x_test), x_test, x_test**2)) f_test = np.dot(Phi_test,w) plt.plot(x_test, f_test, 'b-') plt.plot(x, y, 'rx') num_data = x.shape[0] num_pred_data = 100 # how many points to use for plotting predictions x_pred = linspace(1890, 2016, num_pred_data)[:, None] # input locations for predictions order = 4 # The polynomial order to use. # build the basis set Phi = np.zeros((num_data, order+1)) Phi_pred = np.zeros((num_pred_data, order+1)) for i in range(0, order+1): Phi[:, i:i+1] = x**i Phi_pred[:, i:i+1] = x_pred**i # solve the linear system w_star = np.linalg.solve(np.dot(Phi.T, Phi), np.dot(Phi.T, y)) # predict at training and test points f = np.dot(Phi, w_star) f_pred = np.dot(Phi_pred, w_star) # compute the sum of squares term sum_squares = ((y-f)**2).sum() # fit the noise variance sigma2 = sum_squares/num_data error = 0.5*(num_data*np.log(sigma2) + sum_squares/sigma2) # print the error and plot the predictions print("The error is: %2.4f"%error) plt.plot(x_pred, f_pred) plt.plot(x, y, 'rx') ax = plt.gca() ax.set_title('Predictions for Order 5') ax.set_xlabel('year') ax.set_ylabel('pace (min/km)') # import the time model to allow python to pause. import time # import the IPython display module to clear the output. from IPython.display import clear_output num_data = len(x) error_list = [] max_order = 6 sigma2 = 1 fig, axes = plt.subplots(nrows=1, ncols=2) for order in range(0, max_order+1): # 1. build the basis set Phi = np.zeros((num_data, order+1)) Phi_pred = np.zeros((num_pred_data, order+1)) for i in range(0, order+1): Phi[:, i] = x.flatten()**i Phi_pred[:, i] = x_pred.flatten()**i # 2. solve the linear system w = np.linalg.solve(np.dot(Phi.T, Phi), np.dot(Phi.T, y)) # 3. make predictions at training and test points f_pred = np.dot(Phi_pred, w) f = np.dot(Phi, w) # 4. compute the error and append it to a list. error_list.append(((y-f)**2).sum() + num_data/2.*np.log(sigma2)) # 5. plot the predictions axes[0].clear() axes[1].clear() axes[0].plot(x_pred, f_pred) axes[0].plot(x, y, 'rx') axes[0].set_ylim((2.5, 5.5)) axes[0].set_title('Predictions for Order ' + str(order) + ' model.') axes[1].plot(np.arange(0, order+1), np.asarray(error_list)) axes[1].set_xlim((0, max_order)) axes[1].set_ylim((0, 10)) axes[1].set_title('Training Error') display(fig) time.sleep(1) clear_output()