%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import sklearn
import scipy.io
data = scipy.io.loadmat('ex5data1.mat')
plt.figure(figsize=(8, 6))
plt.xlabel('Change in water level(x)')
plt.ylabel('Water flowing out of the dam(y)')
plt.scatter(data['X'], data['y'], c='r', marker='x')
plt.show()
from sklearn.learning_curve import learning_curve
def plot_learning_curve(estimator, title, X, y, Xval, yval, xlim=None, ylim=None):
iterations = range(2, len(X))
train_error = []
val_error = []
for i in iterations:
estimator.fit(X[:i], y[:i])
train_error.append(100* (1 - estimator.score(X[:i], y[:i])))
val_error.append(100 * (1- estimator.score(Xval, yval)))
plt.figure(figsize=(8, 6))
plt.title(title)
plt.plot(iterations, train_error, label="Train")
plt.plot(iterations, val_error, label="Cross Validation")
plt.ylabel('Error (%)')
plt.xlabel('Number of training examples')
plt.xlim(xlim)
plt.ylim(ylim)
plt.legend()
return plt
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Ridge
clf = LinearRegression()
clf.fit(data['X'], data['y'])
print clf.coef_
print clf.intercept_
[[ 0.36777923]] [ 13.08790351]
plt.figure(figsize=(8, 6))
plt.xlabel('Change in water level(x)')
plt.ylabel('Water flowing out of the dam(y)')
plt.scatter(data['X'], data['y'], c='r', marker='x')
plt.plot(data['X'], clf.predict(data['X']))
plt.show()
plot_learning_curve(clf, 'Learning curve for linear regression', data['X'], data['y'], data['Xval'], data['yval']).show()
from sklearn.preprocessing import PolynomialFeatures
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
clf2 = Pipeline([('poly', PolynomialFeatures(degree=6)),
('zscore', StandardScaler()),
('linear', Ridge(alpha=0.0))])
clf2.fit(data['X'], data['y'])
Pipeline(steps=[('poly', PolynomialFeatures(degree=6, include_bias=True, interaction_only=False)), ('zscore', StandardScaler(copy=True, with_mean=True, with_std=True)), ('linear', Ridge(alpha=0.0, copy_X=True, fit_intercept=True, max_iter=None, normalize=False, solver='auto', tol=0.001))])
plt.figure(figsize=(8, 6))
plt.xlabel('Change in water level(x)')
plt.ylabel('Water flowing out of the dam(y)')
plt.scatter(data['X'], data['y'], c='r', marker='x')
xx = np.linspace(-60, 50, 100)[:, np.newaxis]
plt.plot(xx, clf2.predict(xx))
plt.show()
plot_learning_curve(clf2, 'Learning curve for polynomial regression', data['X'], data['y'], data['Xval'], data['yval']).show()
clf3 = Pipeline([('poly', PolynomialFeatures(degree=6)),
('zscore', StandardScaler()),
('linear', Ridge(alpha=1))])
clf3.fit(data['X'], data['y'])
print clf3.named_steps['linear'].coef_
[[ 0. 8.38063402 5.21896919 3.62617237 2.11039464 1.95472334 0.78519225]]
plt.figure(figsize=(8, 6))
plt.xlabel('Change in water level(x)')
plt.ylabel('Water flowing out of the dam(y)')
plt.scatter(data['X'], data['y'], c='r', marker='x')
xx = np.linspace(-60, 50, 100)[:, np.newaxis]
plt.plot(xx, clf3.predict(xx))
plt.show()
plot_learning_curve(clf3, 'Learning curve for polynomial regression', data['X'], data['y'], data['Xval'], data['yval']).show()
clf4 = Pipeline([('poly', PolynomialFeatures(degree=6)),
('zscore', StandardScaler()),
('linear', Ridge(alpha=100))])
clf4.fit(data['X'], data['y'])
print clf4.named_steps['linear'].coef_
[[ 0. 1.01732905 0.31721843 0.79898768 0.12629107 0.60325488 -0.00502064]]
plt.figure(figsize=(8, 6))
plt.xlabel('Change in water level(x)')
plt.ylabel('Water flowing out of the dam(y)')
plt.scatter(data['X'], data['y'], c='r', marker='x')
xx = np.linspace(-60, 50, 100)[:, np.newaxis]
plt.plot(xx, clf4.predict(xx))
plt.show()
plot_learning_curve(clf4, 'Learning curve for polynomial regression', data['X'], data['y'], data['Xval'], data['yval']).show()