import numpy as np # code to get the data #%load solutions/get-data-curl.py #%load solutions/get-data-urllib.py # code to load the data #%load solutions/load-data.py #look at the first few examples. !cat data | grep "?" # code to load the data without strings %load solutions/load-data2.py auto = np.genfromtxt("data", usecols=(0,1,2,3,4,5,6,7), missing_values='?') #missing values are displayed as nan. Let's take a look which columns have them np.any(np.isnan(auto), axis=0) # Now let's take a look which rows have them np.any(np.isnan(auto), axis=1) # code to remove nans nan_rows = ? # fixme auto = auto[:,:] # fixme #%load solutions/remove-nans.py import numpy.linalg as la # code to fit and predict def linreg(X,y): # 1. Add a column of 1's to X; now it has a total of p columns # 2. Calculate the weight vector w (should have p columns too) # 3. Calculate the predicted values of the target, y_pred # 4. Calculate the error, sse return w, y_pred, sse # 1. Split the data into the training examples X (cylinders and displacement) # and target column y (miles per gallon) X = auto[:,:] # fixme y = auto[:,:] # fixme print linreg(X,y) #%load solutions/linreg.py from math import sqrt # code to print the RMS error #%load solutions/rms.py import matplotlib.pyplot as plt %matplotlib inline # predict and plot using just cylinders as a feature, print the sse X = auto[:,:] # fixme y = auto[:,:] # fixme w, y_pred, sse = linreg(X, y) plt.plot(X, y, 'o') plt.plot(X, y_pred, 'r') plt.show() #%load solutions/predict-cylinders.py # predict and plot using just weight as a feature, print the sse #%load solutions/predict-and-plot.py