%pylab inline
import pylab as pl
from sklearn.datasets import fetch_olivetti_faces
from sklearn.cross_validation import train_test_split
from sklearn.utils import shuffle
from scipy.stats.mstats import mquantiles
pl.gray()

olivetti = fetch_olivetti_faces()
faces = olivetti.images
faces_train, faces_test, target_train, target_test = train_test_split(
    olivetti.images, olivetti.target)

pl.imshow(faces_train[0])

from sklearn.feature_extraction.image import PatchExtractor

pe = PatchExtractor((8, 8), random_state=42)
patches = pe.transform(faces_train)

patches.shape

patches = patches.reshape(-1, 64)

def plot_patches(patches, n_rows=8, n_cols=12):
    pl.figure(figsize=(1.2 * n_cols, 1.2 * n_rows))
    for i in range(n_rows):
        for j in range(n_cols):
            idx = i * n_cols + j
            if idx < len(patches):
                pl.subplot(n_rows, n_cols, i * n_cols + j)
                pl.imshow(patches[idx].reshape(8, 8), interpolation='nearest')
                pl.axis('off')

plot_patches(patches)

n_patches = 100000

patches = shuffle(patches)[:n_patches]

plot_patches(patches)

patches_centered = patches - patches.mean(axis=1).reshape(n_patches, 1)

from sklearn.preprocessing import normalize

patches_normalized = normalize(patches_centered)

plot_patches(patches_normalized)

from sklearn.cluster import MiniBatchKMeans

km_10 = MiniBatchKMeans(n_clusters=10, batch_size=1000, verbose=0, n_init=3, random_state=42)
%time km_10.fit(patches_normalized)

centroids_10 = normalize(km_10.cluster_centers_)
activations_10 = np.dot(patches_normalized, centroids_10.T)

def plot_activations(activations, n_rows=8, n_cols=12, bins=10):
    pl.figure(figsize=(1.2 * n_cols, 1.2 * n_rows))
    for i in range(n_rows):
        for j in range(n_cols):
            idx = i * n_cols + j
            if idx < len(patches):
                pl.subplot(n_rows, n_cols, i * n_cols + j)
                pl.title("{:0.2f}".format(np.median(activations[:, idx])))
                pl.hist(activations[:, idx], bins=bins)
                pl.axis('off')

plot_patches(centroids_10, n_rows=1, n_cols=10)
plot_activations(activations_10, n_rows=1, n_cols=10)

km_100 = MiniBatchKMeans(n_clusters=100, batch_size=1000, verbose=0, n_init=3, random_state=42)
%time km_100.fit(patches_normalized)

centroids_100 = normalize(km_100.cluster_centers_)
activations_100 = np.dot(patches_normalized, centroids_100.T)

plot_patches(centroids_100)
plot_activations(activations_100)

km_1000 = MiniBatchKMeans(n_clusters=1000, batch_size=1000, verbose=0, n_init=3, random_state=42)
%time km_1000.fit(patches_normalized)

centroids_1000 = normalize(km_1000.cluster_centers_)
activations_1000 = np.dot(patches_normalized, centroids_1000.T)

plot_patches(centroids_1000)
plot_activations(activations_1000)

mquantiles(activations_1000, prob=0.9, axis=0).mean()

from sklearn.feature_extraction.image import extract_patches_2d

%time patches_0 = extract_patches_2d(faces_train[0], (8, 8))

patches_0.shape

plot_patches(patches_0)

from scipy import linalg
from sklearn.utils import array2d, as_float_array
from sklearn.base import TransformerMixin, BaseEstimator
 
 
class ZCA(BaseEstimator, TransformerMixin):
 
    def __init__(self, regularization=10**-5, copy=False):
        self.regularization = regularization
        self.copy = copy
     
    def fit(self, X, y=None):
        X = array2d(X)
        X = as_float_array(X, copy = self.copy)
        self.mean_ = np.mean(X, axis=0)
        X -= self.mean_
        sigma = np.dot(X.T, X) # / X.shape[1]
        U, S, V = linalg.svd(sigma)
        tmp = np.dot(U, np.diag(1 / np.sqrt(S + self.regularization))) * np.sqrt(X.shape[0])
        self.components_ = np.dot(tmp, U.T)
        return self
     
    def transform(self, X):
        X = array2d(X)
        X_transformed = X - self.mean_
        X_transformed = np.dot(X_transformed, self.components_.T)
        return X_transformed

%time patches_zca = ZCA().fit_transform(patches)

patches_zca.std(axis=0)

patches_zca_normalized = normalize(patches_zca)

plot_patches(patches_zca_normalized)

plot_patches(patches)

km_1000_zca = MiniBatchKMeans(n_clusters=1000, batch_size=1000, verbose=0, n_init=3, random_state=42)
%time km_1000_zca.fit(patches_zca_normalized)

centroids_1000_zca = normalize(km_1000_zca.cluster_centers_)
activations_1000_zca = np.dot(patches_zca_normalized, centroids_1000_zca.T)

plot_patches(centroids_1000_zca)
plot_activations(activations_1000_zca)

q90 = mquantiles(activations_1000_zca, prob=0.9, axis=0)

q90.min(), np.median(q90), q90.mean(), q90.max()

pl.plot(sorted(np.sum(activations_1000_zca > q90.mean(), axis=0)))