import pandas as pd
import scipy as sp
import seaborn as sn
from mpl_toolkits.mplot3d import Axes3D
from __future__ import division
%pylab inline
Populating the interactive namespace from numpy and matplotlib
cd ex1
C:\Users\Admin\Dropbox\Data science\Coursera machine learning\machine-learning-ex1\ex1
#load in the data, set header to None so that first line isn't assigned as header
data = pd.read_csv('ex1data1.txt', header = None)
data.head()
0 | 1 | |
---|---|---|
0 | 6.1101 | 17.5920 |
1 | 5.5277 | 9.1302 |
2 | 8.5186 | 13.6620 |
3 | 7.0032 | 11.8540 |
4 | 5.8598 | 6.8233 |
# assign the first and second columns to the variables X and y, respectively
X = data.iloc[:,0]
y = data.iloc[:,1]
#check X
X.head()
0 6.1101 1 5.5277 2 8.5186 3 7.0032 4 5.8598 Name: 0, dtype: float64
#check y
y.head()
0 17.5920 1 9.1302 2 13.6620 3 11.8540 4 6.8233 Name: 1, dtype: float64
#check that the length of y matches the MatLab file (97 rows), which it does. Assign this to the variable m
m = len(y)
m
97
#Plot a scatter of population size by profit
plt.scatter(X,y, c = 'r', marker = 'x')
plt.xlabel("Population of city in 10,000s")
plt.ylabel("Profit in $10,000s")
plt.title("Scatter plot of training data")
plt.xlim(4,24)
plt.ylim(-5,25)
(-5, 25)
# X was automatically made into a series, so make it into a dataframe
X = pd.DataFrame(X)
#insert a column of ones in the first column of X
# This takes into account the intercept term, theta0
X.insert(0, "theta zero",value =pd.Series(np.ones([m])))
#check this worked correctly
X.head()
theta zero | 0 | |
---|---|---|
0 | 1 | 6.1101 |
1 | 1 | 5.5277 |
2 | 1 | 8.5186 |
3 | 1 | 7.0032 |
4 | 1 | 5.8598 |
#initialize fitting parameters as stated in the homework
theta = np.zeros([2, 1])
iterations = 1500
alpha = 0.01
#this is how you calculate the hypothesis parameter
hypothesis = X.dot(theta)
hypothesis.head()
0 | |
---|---|
0 | 0 |
1 | 0 |
2 | 0 |
3 | 0 |
4 | 0 |
# define a function that computes the cost function J()
def costJ(X,y,theta):
'''where X is a dataframe of input features, plus a column of ones to accommodate theta 0;
y is a vector that we are trying to predict using these features, and theta is an array of the parameters'''
m = len(y)
hypothesis = X.dot(theta) # if you're using a np array, you need to use flatten to remove nesting
a = (hypothesis[0]-y)**2 # otherwise this will give you completely the wrong answerX[:, 0]
J = (1/(2*m)*sum(a))
return J
costJ(X,y,theta)
32.072733877455654
# define a function that will implement the gradient descent algorithm
def gradDesc(X,y,theta,alpha,num_iters):
'''Implement the gradient descent algorithm, where alpha is the learning rate and num_iters is the number of iterations to run'''
Jhistory = [] #initiate an empty list to store values of cost function after each cycle
theta_update = theta.copy()
for num_iter in range(num_iters):
#these update only once for each iteration
hypothesis = X.dot(theta_update)
loss = hypothesis[0]-y
for i in range(len(X.columns)):
#these will update once for every parameter
theta_update[i] = theta_update[i] - (alpha*(1.0/m))*((loss*(X.iloc[:,i])).sum())
Jhistory.append(costJ(X,y,theta_update))
return Jhistory, theta_update
#try running the function
Jhistory, theta_update= gradDesc(X,y,theta,alpha,1500)
Jhistory[:10]
[6.7371904648700083, 5.9315935686049572, 5.9011547070813872, 5.8952285864442207, 5.8900949431173322, 5.8850041584436461, 5.8799324804914157, 5.8748790947625746, 5.8698439118063845, 5.8648268653129296]
# These are the final values for theta
theta_update
array([[-3.63029144], [ 1.16636235]])
# Check the output of Jhistory is decreasing towards a minimum
plt.plot(Jhistory)
plt.xlabel("number of iterations")
plt.ylabel("cost function")
<matplotlib.text.Text at 0xf4e8518>
#Add a line which shows the outcome of the gradient descent after 1000 iterations
plt.scatter(X.iloc[:,1],y, c = 'r', marker = 'x')
plt.xlabel("Population of city in 10,000s")
plt.ylabel("Profit in $10,000s")
plt.title("Scatter plot of training data")
plt.xlim(4,24)
plt.ylim(-5,25)
plt.plot(X.iloc[:,1],X.dot(theta_update))
[<matplotlib.lines.Line2D at 0x10833f28>]
# Next I want to make a contour plot that plots theta0 against theta 1 and the outcome of J
# First initiate values for theta0 and theta 1
theta0_vals = np.linspace(-10, 10, 100)
theta1_vals = np.linspace(-1, 4, 100)
#they both have length 100
len(theta1_vals)
100
# initialize J_vals to a matrix of 0's
J_vals = np.zeros([len(theta0_vals), len(theta1_vals)])
J_vals.shape
(100L, 100L)
for i in range(len(theta0_vals)):
for j in range(len(theta1_vals)):
t = np.array([[theta0_vals[i]], [theta1_vals[j]]])
J_vals[i,j] = costJ(X, y, t)
plt.contour(theta0_vals, theta1_vals, J_vals, logspace(-2, 3, 20))
<matplotlib.contour.QuadContourSet instance at 0x000000000CAE3348>
plt.contour(theta0mesh,theta1mesh,J_vals, np.logspace(-2, 3, 20))
<matplotlib.contour.QuadContourSet instance at 0x000000000D124948>
sn.set_palette("Blues")
plt.scatter(theta_update[0], theta_update[1], )
plt.contour(theta0_vals, theta1_vals, J_vals.T, logspace(-2, 3, 20))
sn.set_style("darkgrid")
#numpy.meshgrid takes two (or more) 1d x and y coordinate arrays, and returns a pair of 2d x and y grid arrays.
theta0mesh, theta1mesh = np.meshgrid(theta0_vals,theta1_vals)
fig = plt.figure()
ax = fig.gca(projection='3d')
ax.plot_surface(theta0mesh, theta1mesh, J_vals.T)
<mpl_toolkits.mplot3d.art3d.Poly3DCollection at 0x10e16da0>