import pandas as pd import scipy as sp import seaborn as sn from mpl_toolkits.mplot3d import Axes3D from __future__ import division %pylab inline cd ex1 #load in the data, set header to None so that first line isn't assigned as header data = pd.read_csv('ex1data1.txt', header = None) data.head() # assign the first and second columns to the variables X and y, respectively X = data.iloc[:,0] y = data.iloc[:,1] #check X X.head() #check y y.head() #check that the length of y matches the MatLab file (97 rows), which it does. Assign this to the variable m m = len(y) m #Plot a scatter of population size by profit plt.scatter(X,y, c = 'r', marker = 'x') plt.xlabel("Population of city in 10,000s") plt.ylabel("Profit in $10,000s") plt.title("Scatter plot of training data") plt.xlim(4,24) plt.ylim(-5,25) # X was automatically made into a series, so make it into a dataframe X = pd.DataFrame(X) #insert a column of ones in the first column of X # This takes into account the intercept term, theta0 X.insert(0, "theta zero",value =pd.Series(np.ones([m]))) #check this worked correctly X.head() #initialize fitting parameters as stated in the homework theta = np.zeros([2, 1]) iterations = 1500 alpha = 0.01 #this is how you calculate the hypothesis parameter hypothesis = X.dot(theta) hypothesis.head() # define a function that computes the cost function J() def costJ(X,y,theta): '''where X is a dataframe of input features, plus a column of ones to accommodate theta 0; y is a vector that we are trying to predict using these features, and theta is an array of the parameters''' m = len(y) hypothesis = X.dot(theta) # if you're using a np array, you need to use flatten to remove nesting a = (hypothesis[0]-y)**2 # otherwise this will give you completely the wrong answerX[:, 0] J = (1/(2*m)*sum(a)) return J costJ(X,y,theta) # define a function that will implement the gradient descent algorithm def gradDesc(X,y,theta,alpha,num_iters): '''Implement the gradient descent algorithm, where alpha is the learning rate and num_iters is the number of iterations to run''' Jhistory = [] #initiate an empty list to store values of cost function after each cycle theta_update = theta.copy() for num_iter in range(num_iters): #these update only once for each iteration hypothesis = X.dot(theta_update) loss = hypothesis[0]-y for i in range(len(X.columns)): #these will update once for every parameter theta_update[i] = theta_update[i] - (alpha*(1.0/m))*((loss*(X.iloc[:,i])).sum()) Jhistory.append(costJ(X,y,theta_update)) return Jhistory, theta_update #try running the function Jhistory, theta_update= gradDesc(X,y,theta,alpha,1500) Jhistory[:10] # These are the final values for theta theta_update # Check the output of Jhistory is decreasing towards a minimum plt.plot(Jhistory) plt.xlabel("number of iterations") plt.ylabel("cost function") #Add a line which shows the outcome of the gradient descent after 1000 iterations plt.scatter(X.iloc[:,1],y, c = 'r', marker = 'x') plt.xlabel("Population of city in 10,000s") plt.ylabel("Profit in $10,000s") plt.title("Scatter plot of training data") plt.xlim(4,24) plt.ylim(-5,25) plt.plot(X.iloc[:,1],X.dot(theta_update)) # Next I want to make a contour plot that plots theta0 against theta 1 and the outcome of J # First initiate values for theta0 and theta 1 theta0_vals = np.linspace(-10, 10, 100) theta1_vals = np.linspace(-1, 4, 100) #they both have length 100 len(theta1_vals) # initialize J_vals to a matrix of 0's J_vals = np.zeros([len(theta0_vals), len(theta1_vals)]) J_vals.shape for i in range(len(theta0_vals)): for j in range(len(theta1_vals)): t = np.array([[theta0_vals[i]], [theta1_vals[j]]]) J_vals[i,j] = costJ(X, y, t) plt.contour(theta0_vals, theta1_vals, J_vals, logspace(-2, 3, 20)) plt.contour(theta0mesh,theta1mesh,J_vals, np.logspace(-2, 3, 20)) sn.set_palette("Blues") plt.scatter(theta_update[0], theta_update[1], ) plt.contour(theta0_vals, theta1_vals, J_vals.T, logspace(-2, 3, 20)) sn.set_style("darkgrid") #numpy.meshgrid takes two (or more) 1d x and y coordinate arrays, and returns a pair of 2d x and y grid arrays. theta0mesh, theta1mesh = np.meshgrid(theta0_vals,theta1_vals) fig = plt.figure() ax = fig.gca(projection='3d') ax.plot_surface(theta0mesh, theta1mesh, J_vals.T)