from __future__ import print_function, division

%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

# use seaborn for better matplotlib styles
import seaborn; seaborn.set(style='white')

from sklearn.datasets import fetch_lfw_people
faces = fetch_lfw_people(min_faces_per_person=70, resize=0.4)

faces.keys()

n_samples, n_features = faces.data.shape
print(n_samples, n_features)

print(faces.target_names)

fig, axes = plt.subplots(4, 8, figsize=(12, 9))

for i, ax in enumerate(axes.flat):
    ax.imshow(faces.images[i], cmap='binary_r')
    ax.set_title(faces.target_names[faces.target[i]], fontsize=10)
    ax.set_xticks([]); ax.set_yticks([])

X = faces.data
y = faces.target

from sklearn.decomposition import PCA
from sklearn.manifold import Isomap

X_pca = PCA(n_components=2).fit_transform(X)
X_iso = Isomap(n_components=2).fit_transform(X)

plt.scatter(X_pca[:, 0], X_pca[:, 1], c=faces.target,
            cmap='Blues')
plt.title('PCA projection');

plt.scatter(X_iso[:, 0], X_iso[:, 1], c=faces.target,
            cmap='Blues')
plt.title('Isomap projection');

# split the data
from sklearn.cross_validation import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
print(X_train.shape, X_test.shape)

# instantiate the estimator
from sklearn.svm import SVC
clf = SVC()

# Do a fit and check accuracy
from sklearn.metrics import accuracy_score

clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)

accuracy_score(y_test, y_pred)

# Note that we can also do this:
clf.score(X_test, y_test)

# Try out various hyper parameters
for kernel in ['linear', 'rbf', 'poly']:
    clf = SVC(kernel=kernel).fit(X_train, y_train)
    score = clf.score(X_test, y_test)
    print("{0}: accuracy = {1}".format(kernel, score))

best_clf = SVC(kernel='linear').fit(X_train, y_train)
y_pred = best_clf.predict(X_test)

from sklearn.metrics import classification_report
print(classification_report(y_test, y_pred, target_names=faces.target_names))

from sklearn.metrics import confusion_matrix
confusion_matrix(y_test, y_pred)

shape = faces.images.shape[-2:]
last_names = [label.split()[-1] for label in faces.target_names]

titles = ["True: {0}\nPred: {1}".format(last_names[i_test],
                                        last_names[i_pred])
          for (i_test, i_pred) in zip(y_test, y_pred)]
    
fig, axes = plt.subplots(4, 8, figsize=(12, 9),
                         subplot_kw=dict(xticks=[], yticks=[]))

for i, ax in enumerate(axes.flat):
    ax.imshow(X_test[i].reshape(shape), cmap='binary_r')
    ax.set_title(titles[i], fontsize=10)