## resources¶

Try to use scipy svd and sklearn pca to see if there is any difference

## DISCUSSION ON PCA on Images and PCA on general data¶

• http://ufldl.stanford.edu/wiki/index.php/PCA
• for natural image / signal / text data - (1) removing DC is usually done per example (rowwise) instead of per feature (colwise). (2) variance normalization is NOT always necessary as the pixels (features) in the stationary images are usually based on the same statistics
In [162]:
import theano
import theano.tensor as T
from sklearn import decomposition
import numpy as np
from numpy import linalg
import pandas as pd


## PCA & ZCA in SVD way - 2D data¶

In [163]:
## load data
print data2d.shape
plot(data2d[:, 0], data2d[:, 1], 'bo')
title('Raw data')
print data2d.mean(axis = 0)
print data2d.var(axis = 0)

(45, 2)
[ 0.01851256  0.03179579]
[ 0.088  0.088]

In [164]:
normalized_data = data2d - data2d.mean(axis = 0) # featurewise/columnwise DC removal
print normalized_data.shape
plot(normalized_data[:, 0], normalized_data[:, 1], 'bo')
title('Normalized (feature-wise) data')

(45, 2)

Out[164]:
<matplotlib.text.Text at 0x10dd7c50>
## remove DC - instance-wise NOT NCESSARY IN THIS EXAMPLE data2d = data2d - np.mean(data2d, axis = 1).reshape(-1, 1) plot(data2d[:, 0], data2d[:, 1], 'bo')
In [165]:
## find PCA basis
## SVD == Eigen-decomposition for sysmetric & positive definite matrices (covariance matrix)
## The PC directions will be the columns of U
## The PC vectors will be the PC directions from the CENTER of the normalized data
U, S, V = linalg.svd(np.dot(normalized_data.T, normalized_data) / normalized_data.shape[0])
print U.shape, S.shape, V.shape
center = data2d.mean(axis = 0)
pc1, pc2 = U.T
print pc1
plot(normalized_data[:, 0], normalized_data[:, 1], 'g.')
plot([center[0], pc1[0]], [center[1], pc1[1]], 'b->')
plot([center[0], pc2[0]], [center[1], pc2[1]], 'b->')
title('PC of normalized data')
print np.dot(pc1, pc2), ' dot product == 0 means they are othogonal!!'

(2, 2) (2,) (2, 2)
[-0.70710678 -0.70710678]
0.0  dot product == 0 means they are othogonal!!

In [166]:
## calcualte xRot - X projection on the PC
xRot = np.dot(normalized_data, U)
print xRot.shape
plot(xRot[:, 0], xRot[:, 1], 'bo')
title('xRot')
ylim(-0.3, 0.5)
xlim(-1, 1.1)

(45, 2)

Out[166]:
(-1, 1.1)
In [172]:
## dimension reduction by PCA
## by selecting only the first pc
#xRot_pc1 = xRot[:, 1] = 0 # ignore the second PC ????
xRot_pc1 = xRot.copy() ## deep copy
xRot_pc1[:, 1] = 0
#print xRot
#print xRot_pc1
xHat = np.dot(xRot_pc1, U.T)
print xHat.shape
plot(xHat[:, 0], xHat[:, 1], 'bo')
title('xHat')

(45, 2)

Out[172]:
<matplotlib.text.Text at 0x119b5950>
In [173]:
## PCA WHITENING - NORMALIZE U by S matrix diagonal elements
whitened_pcs = U / np.sqrt(S)
print whitened_pcs
print U
whitened_X = np.dot(normalized_data, whitened_pcs)
#whitened_X = np.dot(np.dot(normalized_data, whitened_pcs), U.T)
print whitened_X.shape
plot(whitened_X[:, 0], whitened_X[:, 1], 'bo')

[[-1.7638156  -5.71992455]
[-1.7638156   5.71992455]]
[[-0.70710678 -0.70710678]
[-0.70710678  0.70710678]]
(45, 2)

Out[173]:
[<matplotlib.lines.Line2D at 0x11bf2b90>]
In [174]:
## ZCA WHITENING - NORMALIZE U by S matrix diagonal elements
whitened_pcs = U / np.sqrt(S)
print whitened_pcs
print U
whitened_Z = np.dot(np.dot(normalized_data, whitened_pcs), U.T)
print whitened_Z.shape
plot(whitened_Z[:, 0], whitened_Z[:, 1], 'bo')

[[-1.7638156  -5.71992455]
[-1.7638156   5.71992455]]
[[-0.70710678 -0.70710678]
[-0.70710678  0.70710678]]
(45, 2)

Out[174]:
[<matplotlib.lines.Line2D at 0x11c1e8d0>]

## PCA/ZCA Whitening - natural image data¶

In [178]:
## load data
from scipy import io
print images.shape
gray()
"""
fig, axes = subplots(nrows = 5, ncols = 2, figsize = (1 *2, 1 * 5))
axes = axes.flatten()
for i in range(images.shape[2]):
axes[i].imshow(images[:,:,i])
"""

(512, 512, 10)

Out[178]:
'\nfig, axes = subplots(nrows = 5, ncols = 2, figsize = (1 *2, 1 * 5))\naxes = axes.flatten()\nfor i in range(images.shape[2]):\n    axes[i].imshow(images[:,:,i])\n'
In [179]:
## get random patches of 10000 x 12 x 12
patches = np.zeros((10000, 12, 12))
im_index = np.random.randint(low = 0, high = 10, size = 10000)
x_index = np.random.randint(low = 0, high = 512 - 12, size = 10000)
y_index = np.random.randint(low = 0, high = 512 - 12, size = 10000)
for i, (im, x, y) in enumerate(zip(im_index, x_index, y_index)):
patches[i] = images[x:x+12, y:y+12, im]
print patches.shape
feats = patches.reshape(10000, -1)
print feats.shape

(10000, 12, 12)
(10000, 144)

In [180]:
## patchwise zero-mean of the data
print feats.mean(axis = 1)
normalized_feats = feats - feats.mean(axis = 1).reshape(-1, 1)
print normalized_feats.shape
print normalized_feats.mean(axis = 1)

[-0.66657157 -0.43762508 -0.40810873 ..., -0.44826995 -0.06041566
-0.59098646]
(10000, 144)
[ -2.46716228e-17  -1.18732185e-16   2.39006346e-17 ...,  -3.55040071e-16
7.95659834e-16   9.86864911e-17]

In [210]:
## PCA
cov_mat = np.dot(normalized_feats.T, normalized_feats) / normalized_feats.shape[0]
U, S, V = linalg.svd(cov_mat)
print U.shape, S.shape, V.shape

rot_feats = np.dot(normalized_feats, U)
rot_mat = np.dot(rot_feats.T, rot_feats) / rot_feats.shape[0]
fig, axes = subplots(nrows = 1, ncols = 2, figsize = (3 * 2, 3 * 1))
axes[0].imshow(cov_mat, cmap = cm.jet)
axes[0].set_title('cov before PC rotation')
axes[1].imshow(rot_mat, cmap = cm.jet)
axes[1].set_title('cov after PC rotation (diagonal)')

(144, 144) (144,) (144, 144)

Out[210]:
<matplotlib.text.Text at 0x105826d0>
In [182]:
## find k to retain 90% and 99% variance
explained_variance = S.cumsum() / S.sum()
print explained_variance
k90 = np.argwhere(explained_variance >= 0.9)[0][0] + 1
k99 = np.argwhere(explained_variance >= 0.99)[0][0] + 1
print k90, k99

[ 0.18922609  0.31745002  0.39037621  0.45891456  0.50817309  0.54564181
0.57887362  0.60912939  0.63324278  0.65456544  0.67494109  0.69293414
0.70860297  0.72233561  0.73520842  0.74790019  0.75879333  0.76937143
0.77864098  0.78742064  0.79599283  0.80394317  0.81182299  0.81876492
0.82534819  0.8317731   0.83756571  0.84298248  0.84825989  0.85336287
0.85826534  0.86265742  0.86691114  0.87100746  0.87495375  0.87880224
0.88253274  0.885964    0.88916345  0.89229671  0.89541402  0.89842408
0.90135338  0.90427612  0.90715755  0.90973173  0.91215753  0.91453486
0.91680731  0.91901338  0.92116872  0.92324489  0.92524687  0.92724079
0.92913019  0.93097005  0.93275465  0.93452772  0.93626262  0.93795901
0.93957299  0.94115693  0.94270027  0.94422658  0.94570366  0.94710458
0.94846121  0.94978767  0.95109665  0.95236435  0.95362702  0.95487126
0.95609081  0.95727432  0.95842701  0.95955849  0.96066878  0.96174807
0.96279999  0.96382532  0.96484326  0.9658365   0.96681719  0.96777379
0.96872401  0.96965618  0.97057545  0.97148587  0.97236718  0.97321796
0.97405449  0.97488447  0.9757015   0.97649948  0.97726663  0.97802884
0.97877904  0.97950508  0.98021825  0.98090933  0.98158483  0.98225344
0.98290628  0.9835344   0.98414539  0.98475011  0.98534357  0.9859317
0.98650866  0.98706454  0.98761174  0.9881546   0.98869003  0.98921436
0.9897265   0.99023258  0.99072464  0.9912082   0.99168285  0.9921432
0.99259544  0.99303836  0.99346938  0.99388292  0.99429051  0.99469465
0.99508083  0.99546251  0.9958355   0.99620615  0.99656296  0.99691107
0.99724847  0.99757465  0.99788021  0.99817963  0.99847502  0.99875527
0.99902271  0.99928279  0.99953345  0.99977377  1.          1.        ]
43 116

In [203]:
## get the ZCA
rot_feats90 = rot_feats.copy()
rot_feats90[:, k90:] = 0
rot_feats99 = rot_feats.copy()
rot_feats99[:, k99:] = 0
#feats90 = np.dot(rot_feats90, U.T)
#feats99 = np.dot(rot_feats99, U.T)
print feats90.shape, feats99.shape

## plotting - the first 25 patches
## original
figure()
fig, axes = subplots(nrows = 3, ncols = 3, figsize=(1 * 3, 1 * 3))
axes = axes.flatten()
fig.suptitle('original')
for i in xrange(9):
axes[i].imshow(feats[i].reshape(12, 12), interpolation='none')
axes[i].get_xaxis().set_visible(False)
axes[i].get_yaxis().set_visible(False)
## 99% variation
figure()
fig, axes = subplots(nrows = 3, ncols = 3, figsize=(1 * 3, 1 * 3))
axes = axes.flatten()
fig.suptitle('99%')
for i in xrange(9):
axes[i].imshow(feats99[i].reshape(12, 12), interpolation='none')
axes[i].get_xaxis().set_visible(False)
axes[i].get_yaxis().set_visible(False)
## 90% variation
figure()
fig, axes = subplots(nrows = 3, ncols = 3, figsize=(1 * 3, 1 * 3))
axes = axes.flatten()
fig.suptitle('90%')
for i in xrange(9):
axes[i].imshow(feats90[i].reshape(12, 12), interpolation='none')
axes[i].get_xaxis().set_visible(False)
axes[i].get_yaxis().set_visible(False)

(10000, 144) (10000, 144)

In [222]:
## ZCA whitening
epsilon = 0.1
white_U = U / (np.sqrt(S) + epsilon)
feats_zca = np.dot(np.dot(feats, white_U), U.T)
print feats_zca.shape
## plotting - the first 25 patches
## original
figure()
fig, axes = subplots(nrows = 3, ncols = 3, figsize=(1 * 3, 1 * 3))
axes = axes.flatten()
fig.suptitle('original')
for i in xrange(9):
axes[i].imshow(feats[i].reshape(12, 12), interpolation='none')
axes[i].get_xaxis().set_visible(False)
axes[i].get_yaxis().set_visible(False)
## ZCA
figure()
fig, axes = subplots(nrows = 3, ncols = 3, figsize=(1 * 3, 1 * 3))
axes = axes.flatten()
fig.suptitle('ZCA whitening -- enhanced edges')
for i in xrange(9):
axes[i].imshow(feats_zca[i].reshape(12, 12), interpolation='none')
axes[i].get_xaxis().set_visible(False)
axes[i].get_yaxis().set_visible(False)

(10000, 144)

In [221]:
## covariance of pca whitening (with/without reguarlization)
epsilon = 0.
white_U = U / (np.sqrt(S) + epsilon)
feats_pca = np.dot(feats, white_U)
figure(figsize = (4, 4))
imshow(np.dot(feats_pca.T, feats_pca) / feats_pca.shape[0], cmap = cm.jet)

epsilon = 1.
white_U = U / (np.sqrt(S) + epsilon)
feats_pca = np.dot(feats, white_U)
figure(figsize = (4, 4))
imshow(np.dot(feats_pca.T, feats_pca) / feats_pca.shape[0], cmap = cm.jet)

Out[221]:
<matplotlib.image.AxesImage at 0x15edeb50>
In [ ]: