%matplotlib inline import matplotlib.pyplot as plt import numpy as np from sklearn.datasets.samples_generator import make_blobs X, y = make_blobs(n_samples=50, centers=2, random_state=0, cluster_std=0.60) plt.scatter(X[:, 0], X[:, 1], c=y, s=50); from sklearn.svm import SVC # "Support Vector Classifier" clf = SVC(kernel='linear') clf.fit(X, y) def plot_svc_decision_function(clf): """Plot the decision function for a 2D SVC""" x = np.linspace(plt.xlim()[0], plt.xlim()[1], 30) y = np.linspace(plt.ylim()[0], plt.ylim()[1], 30) Y, X = np.meshgrid(y, x) P = np.zeros_like(X) for i, xi in enumerate(x): for j, yj in enumerate(y): P[i, j] = clf.decision_function([xi, yj]) return plt.contour(X, Y, P, colors='k', levels=[-1, 0, 1], linestyles=['--', '-', '--']) plt.scatter(X[:, 0], X[:, 1], c=y, s=50) plot_svc_decision_function(clf); plt.scatter(X[:, 0], X[:, 1], c=y, s=50) plot_svc_decision_function(clf) plt.scatter(clf.support_vectors_[:, 0], clf.support_vectors_[:, 1], s=200, facecolors='none'); # %run fig_code/svm_gui.py clf = SVC(kernel='rbf') clf.fit(X, y) plt.scatter(X[:, 0], X[:, 1], c=y, s=50) plot_svc_decision_function(clf) plt.scatter(clf.support_vectors_[:, 0], clf.support_vectors_[:, 1], s=200, facecolors='none'); from sklearn.tree import DecisionTreeClassifier X, y = make_blobs(n_samples=300, centers=4, random_state=0, cluster_std=0.60) plt.scatter(X[:, 0], X[:, 1], c=y, s=50) def plot_estimator(estimator, X, y): estimator.fit(X, y) x_min, x_max = X[:, 0].min() - .1, X[:, 0].max() + .1 y_min, y_max = X[:, 1].min() - .1, X[:, 1].max() + .1 xx, yy = np.meshgrid(np.linspace(x_min, x_max, 50), np.linspace(y_min, y_max, 50)) Z = estimator.predict(np.c_[xx.ravel(), yy.ravel()]) # Put the result into a color plot Z = Z.reshape(xx.shape) plt.figure() plt.pcolormesh(xx, yy, Z, alpha=0.3) # Plot also the training points plt.scatter(X[:, 0], X[:, 1], c=y, s=50) plt.axis('tight') plt.axis('off') plt.tight_layout() clf = DecisionTreeClassifier(max_depth=10) plot_estimator(clf, X, y) from sklearn.ensemble import RandomForestClassifier clf = RandomForestClassifier(n_estimators=10, random_state=0) plot_estimator(clf, X, y) from sklearn.datasets import load_digits digits = load_digits() digits.keys() X = digits.data y = digits.target print(X.shape) print(y.shape) # set up the figure fig = plt.figure(figsize=(6, 6)) # figure size in inches fig.subplots_adjust(left=0, right=1, bottom=0, top=1, hspace=0.05, wspace=0.05) # plot the digits: each image is 8x8 pixels for i in range(64): ax = fig.add_subplot(8, 8, i + 1, xticks=[], yticks=[]) ax.imshow(digits.images[i], cmap=plt.cm.binary, interpolation='nearest') # label the image with the target value ax.text(0, 7, str(digits.target[i])) from sklearn.cross_validation import train_test_split from sklearn import metrics Xtrain, Xtest, ytrain, ytest = train_test_split(X, y, random_state=0) clf = DecisionTreeClassifier(max_depth=5) clf.fit(Xtrain, ytrain) ypred = clf.predict(Xtest) plt.imshow(metrics.confusion_matrix(ypred, ytest), interpolation='nearest', cmap=plt.cm.binary) plt.colorbar() plt.xlabel("true label") plt.ylabel("predicted label"); # run this to load the solution # %load solutions/04_svm_rf.py