%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import sklearn
import scipy.io
data = scipy.io.loadmat('ex5data1.mat')
plt.figure(figsize=(8, 6))
plt.xlabel('Change in water level(x)')
plt.ylabel('Water flowing out of the dam(y)')
plt.scatter(data['X'], data['y'], c='r', marker='x')
plt.show()
from sklearn.learning_curve import learning_curve
def plot_learning_curve(estimator, title, X, y, Xval, yval, xlim=None, ylim=None):
iterations = range(2, len(X))
train_error = []
val_error = []
for i in iterations:
estimator.fit(X[:i], y[:i])
train_error.append(100* (1 - estimator.score(X[:i], y[:i])))
val_error.append(100 * (1- estimator.score(Xval, yval)))
plt.figure(figsize=(8, 6))
plt.title(title)
plt.plot(iterations, train_error, label="Train")
plt.plot(iterations, val_error, label="Cross Validation")
plt.ylabel('Error (%)')
plt.xlabel('Number of training examples')
plt.xlim(xlim)
plt.ylim(ylim)
plt.legend()
return plt
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Ridge
clf = LinearRegression()
clf.fit(data['X'], data['y'])
print clf.coef_
print clf.intercept_
plt.figure(figsize=(8, 6))
plt.xlabel('Change in water level(x)')
plt.ylabel('Water flowing out of the dam(y)')
plt.scatter(data['X'], data['y'], c='r', marker='x')
plt.plot(data['X'], clf.predict(data['X']))
plt.show()
plot_learning_curve(clf, 'Learning curve for linear regression', data['X'], data['y'], data['Xval'], data['yval']).show()
from sklearn.preprocessing import PolynomialFeatures
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
clf2 = Pipeline([('poly', PolynomialFeatures(degree=6)),
('zscore', StandardScaler()),
('linear', Ridge(alpha=0.0))])
clf2.fit(data['X'], data['y'])
plt.figure(figsize=(8, 6))
plt.xlabel('Change in water level(x)')
plt.ylabel('Water flowing out of the dam(y)')
plt.scatter(data['X'], data['y'], c='r', marker='x')
xx = np.linspace(-60, 50, 100)[:, np.newaxis]
plt.plot(xx, clf2.predict(xx))
plt.show()
plot_learning_curve(clf2, 'Learning curve for polynomial regression', data['X'], data['y'], data['Xval'], data['yval']).show()
clf3 = Pipeline([('poly', PolynomialFeatures(degree=6)),
('zscore', StandardScaler()),
('linear', Ridge(alpha=1))])
clf3.fit(data['X'], data['y'])
print clf3.named_steps['linear'].coef_
plt.figure(figsize=(8, 6))
plt.xlabel('Change in water level(x)')
plt.ylabel('Water flowing out of the dam(y)')
plt.scatter(data['X'], data['y'], c='r', marker='x')
xx = np.linspace(-60, 50, 100)[:, np.newaxis]
plt.plot(xx, clf3.predict(xx))
plt.show()
plot_learning_curve(clf3, 'Learning curve for polynomial regression', data['X'], data['y'], data['Xval'], data['yval']).show()
clf4 = Pipeline([('poly', PolynomialFeatures(degree=6)),
('zscore', StandardScaler()),
('linear', Ridge(alpha=100))])
clf4.fit(data['X'], data['y'])
print clf4.named_steps['linear'].coef_
plt.figure(figsize=(8, 6))
plt.xlabel('Change in water level(x)')
plt.ylabel('Water flowing out of the dam(y)')
plt.scatter(data['X'], data['y'], c='r', marker='x')
xx = np.linspace(-60, 50, 100)[:, np.newaxis]
plt.plot(xx, clf4.predict(xx))
plt.show()
plot_learning_curve(clf4, 'Learning curve for polynomial regression', data['X'], data['y'], data['Xval'], data['yval']).show()