%matplotlib inline import numpy as np import matplotlib.pyplot as plt from scipy import stats # use seaborn plotting defaults import seaborn as sns; sns.set() from sklearn.datasets import fetch_lfw_people faces = fetch_lfw_people(min_faces_per_person=70, resize=0.4) X, y = faces.data, faces.target from sklearn.decomposition import PCA pca = PCA().fit(X) pca pca.n_components_ plt.axes(xscale='log') plt.plot(pca.explained_variance_ratio_.cumsum()) plt.xlabel('number of components') plt.ylabel('cubulative variance ratio'); pca = PCA(n_components=0.90) pca.fit(X) pca.n_components_ imshape = faces.images.shape[-2:] plt.axes(xticks=[], yticks=[]) plt.imshow(pca.mean_.reshape(imshape), cmap='binary_r'); fig, ax = plt.subplots(2, 5, figsize=(14, 6), subplot_kw=dict(xticks=[], yticks=[])) for i in range(10): ax.flat[i].imshow(pca.components_[i].reshape(imshape), cmap='binary_r') pca = PCA().fit(X) def plot_face(i=279): fig, ax = plt.subplots(1, 6, figsize=(14, 3), subplot_kw=dict(xticks=[], yticks=[])) ax[0].imshow(X[i].reshape(imshape), cmap='binary_r'); for j, ncomp in enumerate([10, 20, 40, 80, 100]): approx = pca.mean_ + np.dot(pca.transform(X[i:i + 1])[:, :ncomp], pca.components_[:ncomp]) ax[j + 1].imshow(approx.reshape(imshape), cmap='binary_r') ax[j + 1].set_title('{0} components'.format(ncomp)) plot_face(700) from IPython.html.widgets import interact interact(plot_face, i=(0, X.shape[0] - 1));