In [37]:

import pandas as pd
import scipy as sp
import seaborn as sn
from mpl_toolkits.mplot3d import Axes3D
from __future__ import division
%pylab inline

Populating the interactive namespace from numpy and matplotlib

In [2]:

cd ex1

C:\Users\Admin\Dropbox\Data science\Coursera machine learning\machine-learning-ex1\ex1

In [3]:

#load in the data, set header to None so that first line isn't assigned as header
data = pd.read_csv('ex1data1.txt', header = None)

In [4]:

data.head()

Out[4]:

	0	1
0	6.1101	17.5920
1	5.5277	9.1302
2	8.5186	13.6620
3	7.0032	11.8540
4	5.8598	6.8233

In [5]:

# assign the first and second columns to the variables X and y, respectively
X = data.iloc[:,0]
y = data.iloc[:,1]

In [6]:

#check X
X.head()

Out[6]:

0    6.1101
1    5.5277
2    8.5186
3    7.0032
4    5.8598
Name: 0, dtype: float64

In [7]:

#check y
y.head()

Out[7]:

0    17.5920
1     9.1302
2    13.6620
3    11.8540
4     6.8233
Name: 1, dtype: float64

In [8]:

#check that the length of y matches the MatLab file (97 rows), which it does. Assign this to the variable m
m = len(y)
m

Out[8]:

In [9]:

#Plot a scatter of population size by profit
plt.scatter(X,y, c = 'r', marker = 'x')
plt.xlabel("Population of city in 10,000s")
plt.ylabel("Profit in $10,000s")
plt.title("Scatter plot of training data")
plt.xlim(4,24)
plt.ylim(-5,25)

Out[9]:

(-5, 25)

In [10]:

# X was automatically made into a series, so make it into a dataframe
X = pd.DataFrame(X)

In [11]:

#insert a column of ones in the first column of X
# This takes into account the intercept term, theta0
X.insert(0, "theta zero",value =pd.Series(np.ones([m])))

In [12]:

#check this worked correctly
X.head()

Out[12]:

	theta zero	0
0	1	6.1101
1	1	5.5277
2	1	8.5186
3	1	7.0032
4	1	5.8598

In [13]:

#initialize fitting parameters as stated in the homework
theta = np.zeros([2, 1])
iterations = 1500
alpha = 0.01

In [14]:

#this is how you calculate the hypothesis parameter
hypothesis = X.dot(theta)
hypothesis.head()

Out[14]:

	0
0	0
1	0
2	0
3	0
4	0

In [15]:

# define a function that computes the cost function J()
def costJ(X,y,theta):
    '''where X is a dataframe of input features, plus a column of ones to accommodate theta 0; 
    y is a vector that we are trying to predict using these features, and theta is an array of the parameters'''
    
    m = len(y)
    hypothesis = X.dot(theta) # if you're using a np array, you need to use flatten to remove nesting
    a = (hypothesis[0]-y)**2 # otherwise this will give you completely the wrong answerX[:, 0]
    J = (1/(2*m)*sum(a))
    return J

costJ(X,y,theta)

Out[15]:

32.072733877455654

In [16]:

# define a function that will implement the gradient descent algorithm
def gradDesc(X,y,theta,alpha,num_iters):
    '''Implement the gradient descent algorithm, where alpha is the learning rate and num_iters is the number of iterations to run'''
    
    Jhistory = [] #initiate an empty list to store values of cost function after each cycle
    theta_update = theta.copy()   
    
    for num_iter in range(num_iters):
        #these update only once for each iteration
        hypothesis = X.dot(theta_update)
        loss = hypothesis[0]-y 
        
        for i in range(len(X.columns)):
            #these will update once for every parameter
            theta_update[i] = theta_update[i] - (alpha*(1.0/m))*((loss*(X.iloc[:,i])).sum())

        Jhistory.append(costJ(X,y,theta_update))
        
    return Jhistory, theta_update

In [49]:

#try running the function 
Jhistory, theta_update= gradDesc(X,y,theta,alpha,1500)  
Jhistory[:10]

Out[49]:

[6.7371904648700083,
 5.9315935686049572,
 5.9011547070813872,
 5.8952285864442207,
 5.8900949431173322,
 5.8850041584436461,
 5.8799324804914157,
 5.8748790947625746,
 5.8698439118063845,
 5.8648268653129296]

In [50]:

# These are the final values for theta
theta_update

Out[50]:

array([[-3.63029144],
       [ 1.16636235]])

In [51]:

# Check the output of Jhistory is decreasing towards a minimum
plt.plot(Jhistory)
plt.xlabel("number of iterations")
plt.ylabel("cost function")

Out[51]:

<matplotlib.text.Text at 0xf4e8518>

In [209]:

#Add a line which shows the outcome of the gradient descent after 1000 iterations
plt.scatter(X.iloc[:,1],y, c = 'r', marker = 'x')
plt.xlabel("Population of city in 10,000s")
plt.ylabel("Profit in $10,000s")
plt.title("Scatter plot of training data")
plt.xlim(4,24)
plt.ylim(-5,25)
plt.plot(X.iloc[:,1],X.dot(theta_update))

Out[209]:

[<matplotlib.lines.Line2D at 0x10833f28>]

In [21]:

# Next I want to make a contour plot that plots theta0 against theta 1 and the outcome of J
# First initiate values for theta0 and theta 1
theta0_vals = np.linspace(-10, 10, 100)
theta1_vals = np.linspace(-1, 4, 100)

In [280]:

#they both have length 100
len(theta1_vals)

Out[280]:

In [22]:

# initialize J_vals to a matrix of 0's
J_vals = np.zeros([len(theta0_vals), len(theta1_vals)])
J_vals.shape

Out[22]:

(100L, 100L)

In [33]:

for i in range(len(theta0_vals)):
    for j in range(len(theta1_vals)):
        t = np.array([[theta0_vals[i]], [theta1_vals[j]]])
        
        J_vals[i,j] = costJ(X, y, t)

In [34]:

plt.contour(theta0_vals, theta1_vals, J_vals, logspace(-2, 3, 20))

Out[34]:

<matplotlib.contour.QuadContourSet instance at 0x000000000CAE3348>

In [36]:

plt.contour(theta0mesh,theta1mesh,J_vals, np.logspace(-2, 3, 20))

Out[36]:

<matplotlib.contour.QuadContourSet instance at 0x000000000D124948>

In [87]:

sn.set_palette("Blues")
plt.scatter(theta_update[0], theta_update[1], )
plt.contour(theta0_vals, theta1_vals, J_vals.T, logspace(-2, 3, 20))
sn.set_style("darkgrid")

In [40]:

#numpy.meshgrid takes two (or more) 1d x and y coordinate arrays, and returns a pair of 2d x and y grid arrays. 
theta0mesh, theta1mesh = np.meshgrid(theta0_vals,theta1_vals)

In [88]:

fig = plt.figure()
ax = fig.gca(projection='3d')
ax.plot_surface(theta0mesh, theta1mesh, J_vals.T)

Out[88]:

<mpl_toolkits.mplot3d.art3d.Poly3DCollection at 0x10e16da0>