%pylab inline # Score function from slides from sklearn.cross_validation import KFold from sklearn.metrics import accuracy_score def score(clf, X, Y, folds=2, verbose=False, metric=accuracy_score): predictions = np.zeros(len(Y)) for i, (train, test) in enumerate(KFold(len(X), n_folds=folds, shuffle=True)): clf.fit(X[train], Y[train]) predictions[test] = clf.predict(X[test]) if verbose: print("Fold {}: {}".format(i + 1, accuracy_score(Y[test], predictions[test]))) if metric: return metric(Y, predictions) return Y, predictions # Display given faces in a grid def show_faces(ims, grid_size=(16, 6)): fig = plt.figure(figsize=grid_size) for i in range(min(len(ims), 30)): ax = fig.add_subplot(3, 10, i + 1, xticks=[], yticks=[]) ax.imshow(ims[i].reshape((50, 37)), cmap=plt.cm.bone) from sklearn import datasets lfw_people = datasets.fetch_lfw_people(min_faces_per_person=70, resize=0.4) lfw_people.keys() PX = lfw_people['data'] PY = lfw_people['target'] PI = lfw_people['images'] show_faces(PI) # There are 7 different people in the dataset np.unique(PY) np.bincount(PY) from sklearn import decomposition pca = decomposition.RandomizedPCA(n_components=150, whiten=True) pca.fit(PX) show_faces(pca.components_) from sklearn import svm from sklearn.pipeline import Pipeline # whitening is very important! pipeline = Pipeline([('pca', decomposition.RandomizedPCA(n_components=150, whiten=True)), ('svm', svm.SVC())]) y, pred = score(pipeline, PX, PY, folds=5, metric=None) from sklearn import metrics metrics.confusion_matrix(y, pred) print metrics.classification_report(y, pred, target_names = lfw_people.target_names)