This examples show two possible ways to fit a straigt line to the data points; (2, 4), (3, 6), (2, 6), and (3, 8). Both regression lines yield the same value for the sum of errors (0) and the sum of absolute errors (4). This indicates that the neither of the measures are not appropriate because we cannot get a unique solution by minimizing such type of measurements.
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
xData = [2, 3, 2, 3]
yData = [4, 6, 6, 8]
plt.scatter(xData,yData)
plt.xlabel('x')
plt.ylabel('y')
x = np.linspace(0, 5, 50)
y = 4*x-4
plt.plot(x, y, 'r--')
x = np.linspace(0, 5, 50)
y = x*0+6
plt.plot(x, y, 'b--')
[<matplotlib.lines.Line2D at 0x10e5a6f50>]
The best fit for the data turns out to be regression line (as shown below), which can be found by minimizing the sum of squared errors (exactly what the code did).
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
xData = np.array([2, 3, 2, 3])
yData = np.array([4, 6, 6, 8])
def linear_regress(xData, yData):
n = xData.size
x_avg = np.average(xData)
y_avg = np.average(yData)
S1=np.dot(xData,yData)-n*x_avg*y_avg
sum_x_squared = np.sum(np.square(xData))
S2=sum_x_squared-n*x_avg**2
a1=S1/S2
a0=y_avg-a1*x_avg
return a0, a1
a0, a1 = linear_regress(xData, yData)
print "a1=", a1
print "a0=", a0
plt.scatter(xData,yData)
x = np.linspace(0, 5, 50)
y = a0+a1*x
plt.plot(x, y, 'r--')
a1= 2.0 a0= 1.0
[<matplotlib.lines.Line2D at 0x10e52e210>]
Lets figure out why minimizing the sum of squared errors (cost) gives us a unique answer. The sum of squared errors is a (cost) function of the two parameters (coefficients) a0 and a1. Our goal is to find the combination of a0 and a1 so that the cost function yields the smallest value (minimizing it). We simply first the problem by setting a0=0, then the cost is a function of only a1. The following is a plot of the cost function.
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
xData = np.array([2, 3, 2, 3])
yData = np.array([4, 6, 6, 8])
def cost(x, y, a0, a1):
fx = a0+x*a1
return np.sum(np.square(y-fx))
a1 = np.linspace(0, 4, 50)
#print a1
i=0
y = np.arange(50)
for a in a1:
y[i] = cost(xData, yData, 0, a)
i=i+1
#print y
plt.plot(a1, y, 'r--')
plt.ylabel('cost')
plt.xlabel('$a_1$')
<matplotlib.text.Text at 0x1110bf090>
import numpy as np
from pylab import *
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d.axes3d import Axes3D
%matplotlib inline
xData = np.array([1, 2, 3])
yData = np.array([1, 2, 3])
def cost(x, y, a0, a1):
fx = a0+x*a1
return np.sum(np.square(y-fx))
size = 200
x = np.linspace(-100, 100, size)
y = np.linspace(-100, 100, size)
a_0,a_1 = meshgrid(x, y)
c = np.empty([size, size])
for i in range(0, size):
for j in range(0, size):
c[i][j] = cost(xData, yData, a_0[i][j], a_1[i][j])
#fig, ax = plt.subplots()
#p = ax.pcolor(a_0, a_1, c)
ax = plt.axes(projection='3d')
ax.plot_surface(a_0, a_1, c, rstride=1, cstride=1, cmap=cm.coolwarm, linewidth=0, antialiased=False)
ax.set_xlabel('$a_0$')
ax.set_ylabel('$a_1$')
ax.set_zlabel('cost')
<matplotlib.text.Text at 0x12eb28a50>
import numpy as np
from pylab import *
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d.axes3d import Axes3D
%matplotlib inline
xData = np.array([2, 3, 2, 3])
yData = np.array([4, 6, 6, 8])
def cost(x, y, a0, a1):
fx = a0+x*a1
return np.sum(np.square(y-fx))
size = 100
x = np.linspace(-10, 10, size)
y = np.linspace(-10, 10, size)
a_0,a_1 = meshgrid(x, y)
c = np.empty([size, size])
for i in range(0, size):
for j in range(0, size):
c[i][j] = cost(xData, yData, a_0[i][j], a_1[i][j])
#fig, ax = plt.subplots()
#p = ax.pcolor(a_0, a_1, c)
ax = plt.axes(projection='3d')
ax.plot_surface(a_0, a_1, c, rstride=1, cstride=1, cmap=cm.coolwarm, linewidth=0, antialiased=False)
ax.set_xlabel('$a_0$')
ax.set_ylabel('$a_1$')
ax.set_zlabel('cost')
<matplotlib.text.Text at 0x10f497d50>
import numpy as np
xData = np.array([1, 2, 3])
yData = np.array([1, 2, 3])
def linear_regress(xData, yData):
n = xData.size
x_avg = np.average(xData)
y_avg = np.average(yData)
S1=np.dot(xData,yData)-n*x_avg*y_avg
sum_x_squared = np.sum(np.square(xData))
S2=sum_x_squared-n*x_avg**2
a1=S1/S2
a0=y_avg-a1*x_avg
return a0, a1
a0, a1 = linear_regress(xData, yData)
print "a1=", a1
print "a0=", a0
a1= 1.0 a0= 0.0
a1= 2.0 a0= 1.0
[<matplotlib.lines.Line2D at 0x1084759d0>]
import numpy as np
xData = np.array([4, 7, 11, 13, 17])
yData = np.array([2, 0, 2, 6, 7])
def linear_regress(xData, yData):
n = xData.size
x_avg = np.average(xData)
y_avg = np.average(yData)
S1=np.dot(xData,yData)-n*x_avg*y_avg
sum_x_squared = np.sum(np.square(xData))
S2=sum_x_squared-n*x_avg**2
a1=S1/S2
a0=y_avg-a1*x_avg
return a0, a1
a0, a1 = linear_regress(xData, yData)
print "a1=", a1
print "a0=", a0
plt.scatter(xData,yData)
x = np.linspace(0, 20, 50)
y = a0+a1*x
plt.plot(x, y, 'r--')
a1= 0.486434108527 a0= -1.65891472868
[<matplotlib.lines.Line2D at 0x129f8d8d0>]
import numpy as np
# m is the number of training examples
def gradientDescent(x, y, theta, alpha, m, iterations):
x_t = x.transpose()
for i in range(0, iterations):
#print "x_t=",x_t
#print "theta=",theta
hypothesis = np.dot(x, theta)
#print hypothesis
error = hypothesis - y
# avg cost per example
cost = np.sum(error ** 2) / (2 * m)
# print("Iteration %d | Cost: %f" % (i, cost))
# avg gradient per example
gradient = np.dot(x_t, error) / m
# update
theta = theta - alpha * gradient
return theta
x = np.array([[1, 4],
[1, 7],
[1, 11],
[1, 13],
[1, 17]])
y = np.array([2, 0, 2, 6, 7])
m = x.size
n = 2 # number of features
iterations= 1000
alpha = 0.0005
theta = np.ones(n)
theta = gradientDescent(x, y, theta, alpha, m, iterations)
print(theta)
[ 0.84001345 0.28440757]
import numpy as np
x = np.eye(3, 4)
print x
print x[2, ...]
import numpy as np
from scipy.optimize import curve_fit
import matplotlib.pyplot as plt
%matplotlib inline
def f(x, a, b, c):
"""Fit function y=f(x,p) with parameters p=(a,b,c). """
return a * np.exp(-b * x) + c
#create fake data
x = np.linspace(0, 4, 50)
y = f(x, a=2.5, b=1.3, c=0.5)
#add noise
yi = y + 0.2 * np.random.normal(size=len(x))
#call curve fit function
popt, pcov = curve_fit(f, x, yi)
a, b, c = popt
print "Optimal parameters are a=%g, b=%g, and c=%g" % (a, b, c)
#plotting
yfitted = f(x, *popt) # equivalent to f(x, popt[0], popt[1], popt[2])
plt.plot(x, yi, 'o', label='data $y_i$')
plt.plot(x, yfitted , '-', label='fit $f(x_i)$')
plt.xlabel('x')
plt.legend()
Optimal parameters are a=2.60556, b=1.51529, and c=0.575288
<matplotlib.legend.Legend at 0x10a0160d0>
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
x = np.random.rand(5,2)*10
a = np.matrix([ [1,x[0][0]], [1,x[1][0]], [1,x[2][0]] ])
b = np.matrix([ [x[0][1]], [x[1][1]], [x[2][1]] ])
yy = (a.T * a).I * a.T*b
xx = np.linspace(1,10,50)
y = np.array(yy[0]+yy[1]*xx)
plt.figure(1)
plt.plot(xx,y.T,color='r')
plt.scatter([x[0][0],x[1][0],x[2][0] ],[x[0][1],x[1][1],x[2][1] ])
plt.show()
We can use the multi-function data flyer to visualize the function: f(x)=x4−3x3+2. http://shodor.org/interactivate/activities/MultiFunctionDataFly/
# From calculation, we expect that the local minimum occurs at x=9/4
x_old = 0
x_new = -1 # The algorithm starts at x=6
eps = 0.01 # step size
precision = 0.00001
def f(x):
return x**4-3*x**3+2
def f_prime(x):
return 4 * x**3 - 9 * x**2
while abs(x_new - x_old) > precision:
x_old = x_new
x_new = x_old - eps * f_prime(x_old)
print("Local minimum occurs at", x_new)
The plot of f′(x)=4x3−9x2 (blue line) shows that 2.25 is indeed a local minimum.
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
x = np.linspace(-5,5,200)
y = x**4 - 3*x**3 + 2.0
#print y
ax = plt.gca()
plt.plot(x, y, 'r')
ax.set_xlim([-5, 5])
ax.set_ylim([-10, 20])
y = 4*x**3 - 9*x**2
plt.plot(x, y, 'b')
plt.grid(True)
#plt.axis('equal')