from __future__ import print_function, division %matplotlib inline import numpy as np import matplotlib.pyplot as plt import pandas as pd # use seaborn for better matplotlib styles import seaborn; seaborn.set(style='white') from sklearn.datasets import fetch_lfw_people faces = fetch_lfw_people(min_faces_per_person=70, resize=0.4) faces.keys() n_samples, n_features = faces.data.shape print(n_samples, n_features) print(faces.target_names) fig, axes = plt.subplots(4, 8, figsize=(12, 9)) for i, ax in enumerate(axes.flat): ax.imshow(faces.images[i], cmap='binary_r') ax.set_title(faces.target_names[faces.target[i]], fontsize=10) ax.set_xticks([]); ax.set_yticks([]) X = faces.data y = faces.target from sklearn.decomposition import PCA from sklearn.manifold import Isomap X_pca = PCA(n_components=2).fit_transform(X) X_iso = Isomap(n_components=2).fit_transform(X) plt.scatter(X_pca[:, 0], X_pca[:, 1], c=faces.target, cmap='Blues') plt.title('PCA projection'); plt.scatter(X_iso[:, 0], X_iso[:, 1], c=faces.target, cmap='Blues') plt.title('Isomap projection'); # split the data from sklearn.cross_validation import train_test_split X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0) print(X_train.shape, X_test.shape) # instantiate the estimator from sklearn.svm import SVC clf = SVC() # Do a fit and check accuracy from sklearn.metrics import accuracy_score clf.fit(X_train, y_train) y_pred = clf.predict(X_test) accuracy_score(y_test, y_pred) # Note that we can also do this: clf.score(X_test, y_test) # Try out various hyper parameters for kernel in ['linear', 'rbf', 'poly']: clf = SVC(kernel=kernel).fit(X_train, y_train) score = clf.score(X_test, y_test) print("{0}: accuracy = {1}".format(kernel, score)) best_clf = SVC(kernel='linear').fit(X_train, y_train) y_pred = best_clf.predict(X_test) from sklearn.metrics import classification_report print(classification_report(y_test, y_pred, target_names=faces.target_names)) from sklearn.metrics import confusion_matrix confusion_matrix(y_test, y_pred) shape = faces.images.shape[-2:] last_names = [label.split()[-1] for label in faces.target_names] titles = ["True: {0}\nPred: {1}".format(last_names[i_test], last_names[i_pred]) for (i_test, i_pred) in zip(y_test, y_pred)] fig, axes = plt.subplots(4, 8, figsize=(12, 9), subplot_kw=dict(xticks=[], yticks=[])) for i, ax in enumerate(axes.flat): ax.imshow(X_test[i].reshape(shape), cmap='binary_r') ax.set_title(titles[i], fontsize=10)