# download the software import urllib urllib.urlretrieve('https://github.com/sods/ods/archive/master.zip', 'master.zip') # unzip the software import zipfile zip = zipfile.ZipFile('./master.zip', 'r') for name in zip.namelist(): zip.extract(name, '.') # add the module location to the python path. import sys sys.path.append("./ods-master/") import pods pods.notebook.display_google_book(id='ORUOAAAAQAAJ', page='213') pods.notebook.display_google_book(id='ORUOAAAAQAAJ', page='217') pods.notebook.display_google_book(id='ORUOAAAAQAAJ', page='221') from datetime import timedelta start=int(timedelta(hours=0, minutes=20, seconds=15).total_seconds()) YouTubeVideo('AvlnFnvFw_0',start=start) # set prior variance on w alpha = 4. # set the order of the polynomial basis set order = 5 # set the noise variance sigma2 = 0.01 import numpy as np data = pods.datasets.olympic_marathon_men() x = data['X'] y = data['Y'] num_data = x.shape[0] num_pred_data = 100 # how many points to use for plotting predictions x_pred = np.linspace(1890, 2016, num_pred_data)[:, None] # input locations for predictions def polynomial(x, degree, loc, scale): degrees = np.arange(degree+1) return ((x-loc)/scale)**degrees loc = 1950. scale = 1. degree = 5. Phi_pred = polynomial(x_pred, degree=degree, loc=loc, scale=scale) Phi = polynomial(x, degree=degree, loc=loc, scale=scale) w_vec = np.random.normal(size=200) print 'w sample mean is ', w_vec.mean() print 'w sample variance is ', w_vec.var() phi = 7 f_vec = phi*w_vec print 'True mean should be phi*0 = 0.' print 'True variance should be phi*phi*1 = ', phi*phi print 'f sample mean is ', f_vec.mean() print 'f sample variance is ', f_vec.var() mu = 4 # mean of the distribution alpha = 2 # variance of the distribution w_vec = np.random.normal(size=200)*np.sqrt(alpha) + mu print 'w sample mean is ', w_vec.mean() print 'w sample variance is ', w_vec.var() # First the standard normal import matplotlib.pyplot as plt %matplotlib inline z_vec = np.random.normal(size=1000) # by convention, in statistics, z is often used to denote samples from the standard normal w_vec = z_vec*np.sqrt(alpha) + mu # plot normalized histogram of w, and then normalized histogram of z on top plt.hist(w_vec, bins=30, normed=True) plt.hist(z_vec, bins=30, normed=True) plt.legend(('$w$', '$z$')) K = degree + 1 z_vec = np.random.normal(size=K) w_sample = z_vec*np.sqrt(alpha) print w_sample f_sample = np.dot(Phi_pred,w_sample) plt.plot(x_pred.flatten(), f_sample.flatten(), 'r-') scale = 100. Phi_pred = polynomial(x_pred, degree=degree, loc=loc, scale=scale) Phi = polynomial(x, degree=degree, loc=loc, scale=scale) f_sample = np.dot(Phi_pred,w_sample) plt.plot(x_pred.flatten(), f_sample.flatten(), 'r-') num_samples = 10 K = degree+1 for i in xrange(num_samples): z_vec = np.random.normal(size=K) w_sample = z_vec*np.sqrt(alpha) f_sample = np.dot(Phi_pred,w_sample) plt.plot(x_pred.flatten(), f_sample.flatten()) from datetime import timedelta start=int(timedelta(hours=0, minutes=0, seconds=15).total_seconds()) YouTubeVideo('AvlnFnvFw_0',start=start) start=int(timedelta(hours=0, minutes=22, seconds=42).total_seconds()) YouTubeVideo('Os1iqgpelPw', start=start) # Question 1 Answer Code # Write code for you answer to this question in this box # Do not delete these comments, otherwise you will get zero for this answer. # Make sure your code has run and the answer is correct *before* submitting your notebook for marking. sigma2 = w_cov = w_mean = w_sample = np.random.multivariate_normal(w_mean.flatten(), w_cov) f_sample = np.dot(Phi_pred,w_sample) plt.plot(x_pred.flatten(), f_sample.flatten(), 'r-') plt.plot(x, y, 'rx') # plot data to show fit. for i in xrange(num_samples): w_sample = np.random.multivariate_normal(w_mean.flatten(), w_cov) f_sample = np.dot(Phi_pred,w_sample) plt.plot(x_pred.flatten(), f_sample.flatten()) plt.plot(x, y, 'rx') # plot data to show fit. K = 10 # how many Gaussians to add. num_samples = 1000 # how many samples to have in y_vec mus = np.linspace(0, 5, K) # mean values generated linearly spaced between 0 and 5 sigmas = np.linspace(0.5, 2, K) # sigmas generated linearly spaced between 0.5 and 2 y_vec = np.zeros(num_samples) for mu, sigma in zip(mus, sigmas): z_vec = np.random.normal(size=num_samples) # z is from standard normal y_vec += z_vec*sigma + mu # add to y z*sigma + mu # now y_vec is the sum of each scaled and off set z. print 'Sample mean is ', y_vec.mean(), ' and sample variance is ', y_vec.var() print 'True mean should be ', mus.sum() print 'True variance should be ', (sigmas**2).sum(), ' standard deviation ', np.sqrt((sigmas**2).sum()) plt.hist(y_vec, bins=30, normed=True) plt.legend('$y$') # Question 2 Answer Code # Write code for you answer to this question in this box # Do not delete these comments, otherwise you will get zero for this answer. # Make sure your code has run and the answer is correct *before* submitting your notebook for marking. # compute mean under posterior density f_pred_mean = # plot the predictions # compute mean at the training data and sum of squares error f_mean = sum_squares = print 'The error is: ', sum_squares # Question 3 Answer Code # Write code for you answer to this question in this box # Do not delete these comments, otherwise you will get zero for this answer. # Make sure your code has run and the answer is correct *before* submitting your notebook for marking. # Compute variance at function values f_pred_var = f_pred_std = # plot the mean and error bars at 2 standard deviations above and below the mean # Question 4 Answer Code # Write code for you answer to this question in this box # Do not delete these comments, otherwise you will get zero for this answer. # Make sure your code has run and the answer is correct *before* submitting your notebook for marking. # Question 5 Answer Code # Write code for you answer to this question in this box # Do not delete these comments, otherwise you will get zero for this answer. # Make sure your code has run and the answer is correct *before* submitting your notebook for marking.