In [ ]:
plt.prism()
from sklearn import datasets
_,data,target,_,_ = datasets.load_iris().values()
spfs = (4,4)
s = 3
In [ ]:
plt.figure(figsize=spfs)
for i in xrange(4):
    for j in xrange(4):
        plt.subplot(4,4,i*4+j+1)
        plt.scatter(data[:,i], data[:,j], alpha=.6,s=s)
        plt.xticks(())
        plt.yticks(())

plt.suptitle("$X$")
plt.savefig("presentation/pca-pics/iris-all-nocolor.pdf")
In [ ]:
plt.figure(figsize=spfs)
for i in xrange(4):
    for j in xrange(4):
        plt.subplot(4,4,i*4+j+1)
        plt.scatter(data[:,i], data[:,j],c=target, alpha=.6,s=s)
        plt.xticks(())
        plt.yticks(())
plt.suptitle("$X$, $Y$")
plt.savefig("presentation/pca-pics/iris-all.pdf")
In [ ]:
from sklearn.decomposition import PCA
pca = PCA(n_components=2)
In [ ]:
plt.figure(figsize=(3,3))
data2 = pca.fit_transform(data) 
plt.scatter(data2[:,0], data2[:,1])
plt.title("$X_{PCA}$")
plt.savefig("presentation/pca-pics/iris-2d-nocolor.pdf")
In [ ]:
data2 = pca.fit_transform(data) 
plt.scatter(data2[:,0], data2[:,1],c=target)
plt.savefig("presentation/pca-pics/iris-2d.pdf")
In [ ]:
print pca.components_
In [ ]:
data3 = pca.inverse_transform(data2)
plt.figure(figsize=spfs)
for i in xrange(4):
    for j in xrange(4):
        plt.subplot(4,4,i*4+j+1)
        plt.scatter(data3[:,i], data3[:,j],c=target,alpha=.6,s=s)
        plt.xticks(())
        plt.yticks(())
plt.savefig("presentation/pca-pics/iris-bt.pdf")
In [ ]:
data3 = pca.inverse_transform(data2)
plt.figure(figsize=spfs)
for i in xrange(4):
    for j in xrange(4):
        plt.subplot(4,4,i*4+j+1)
        plt.scatter(data3[:,i], data3[:,j],alpha=.6,s=s)
        plt.xticks(())
        plt.yticks(())
plt.suptitle("$X_{\mathrm{clean}}$")
plt.savefig("presentation/pca-pics/iris-bt-nocolor.pdf")
In [ ]:
from sklearn.cluster import KMeans
kmeans = KMeans(3)
labels = kmeans.fit(data).labels_
In [ ]:
plt.scatter(data[:,0],data[:,1],c=labels)
plt.scatter(kmeans.cluster_centers_[:,0], kmeans.cluster_centers_[:,1], c=[0,1,2], s=100);
plt.savefig("presentation/kmeans-pics/cluster-centers.pdf")
In [ ]:
plt.scatter(data[:,0],data[:,1])
plt.scatter(kmeans.cluster_centers_[:,0], kmeans.cluster_centers_[:,1], s=100);
plt.savefig("presentation/kmeans-pics/cluster-centers-nocolor.pdf")
In [ ]:
from matplotlib.patches import Ellipse
def plotEllipse(pos,P,edge,face,line_width):
    U, s , Vh = svd(P)
    orient = math.atan2(U[1,0],U[0,0])*180/pi
    ellipsePlot = Ellipse(xy=pos, width=2.0*math.sqrt(s[0]), height=2.0*math.sqrt(s[1]), angle=orient,facecolor=face, edgecolor=edge, lw=line_width)
    ax = gca()
    ax.add_patch(ellipsePlot);
    return ellipsePlot;
In [ ]:
plt.figure(figsize=(3,3))
plt.xlim(-4,6)
plt.ylim(-4,6)
Cov = [[2,-4],[-1,4]]
print np.linalg.det(Cov)
X = np.random.multivariate_normal([1,2], Cov, size=500)
plt.scatter(X[:,0], X[:,1],s=2,alpha=0.7)
plt.savefig("presentation/pca-pics/pointcloud-2d.pdf")
In [ ]:
plt.figsize(3,3)
pca = PCA()
pca.fit_transform(X)
plt.xlim(-4,6)
plt.ylim(-4,6)
C = (pca.explained_variance_ * pca.components_.T).T
mu = pca.mean_
plotEllipse(mu, C.T, 'k', 'none', 2)
plt.scatter(X[:,0], X[:,1], s=2, alpha=.2)
plt.scatter([mu[0]],[mu[1]], s=40, c='red')

# the point cloud is modeled as an ellipse
plt.savefig("presentation/pca-pics/pointcloud-2d-model.pdf") 
C = (np.sqrt(pca.explained_variance_) * pca.components_.T).T
a1 = plt.arrow(mu[0], mu[1], C[0,0], C[0,1], fc="b", ec='b', head_width=.5, head_length=.5, width=.1, length_includes_head=True)

# this is the direction with the maximum variance
plt.savefig("presentation/pca-pics/pointcloud-2d-vecs-1a.pdf")

a1.set_visible(False)

plt.plot(
    [mu[0] - 10*C[0,0], mu[0] + 10*C[0,0]],
    [mu[1] - 10*C[0,1], mu[1] + 10*C[0,1]])

L = []
for i in xrange(10):
    d = np.dot(mu - X[i,:], pca.components_[1,:]) * pca.components_[1,:]
    L.append(plt.arrow(X[i,0], X[i,1], d[0], d[1], head_width=.1, head_length=.1))
    
# we can project all points to this axis, and only use "little" information    
plt.savefig("presentation/pca-pics/pointcloud-2d-vecs-proj1.pdf")

map(lambda x: x.set_visible(False), L)
plt.arrow(mu[0], mu[1], C[1,0], C[1,1], fc="b", ec='b', head_width=.5, head_length=.5, width=.1, length_includes_head=True);

# the next axis has less variance, and even less error
plt.savefig("presentation/pca-pics/pointcloud-2d-vecs-2a.pdf")
In [ ]:
fs = (3,3)
s = 2
plt.figure(figsize=fs)
pca = PCA()
X3 = pca.fit_transform(X)
plt.xlim(-4,6)
plt.ylim(-4,6)
C = (pca.explained_variance_ * pca.components_.T).T
mu = pca.mean_
plt.scatter(X[:,0], X[:,1], s=s, alpha=.5)
plt.scatter([mu[0]],[mu[1]], s=40, c='red')
plt.savefig("presentation/pca-pics/pointcloud-2d-step1.pdf")

plt.figure(figsize=fs)
X2 = X - mu
plt.scatter(X2[:,0], X2[:,1], s=s, alpha=.5)
plt.xlim(-4-mu[0],6-mu[0])
plt.ylim(-4-mu[1],6-mu[1])
plt.scatter([0],[0], s=40, c='red')
plt.savefig("presentation/pca-pics/pointcloud-2d-step2.pdf")

plt.figure(figsize=fs)
plt.scatter(X2[:,0], X2[:,1], s=s, alpha=.5)
plt.xlim(-4-mu[0],6-mu[0])
plt.ylim(-4-mu[1],6-mu[1])
plotEllipse([0,0], C.T, 'k', 'none', 2)
plt.scatter([0],[0], s=40, c='red')
plt.savefig("presentation/pca-pics/pointcloud-2d-step3.pdf")

plt.figure(figsize=fs)
plt.xlim(-4-mu[0],6-mu[0])
plt.ylim(-4-mu[1],6-mu[1])
plt.scatter(X3[:,0], X3[:,1], s=4, alpha=.5)
plotEllipse([0,0], C.T, 'gray', 'none', 2)
plotEllipse([0,0], np.cov(X3, rowvar=False).T, 'k', 'none', 2)
plt.scatter([0],[0], s=40, c='red')
plt.savefig("presentation/pca-pics/pointcloud-2d-step4.pdf")

plt.figure(figsize=fs)
plt.xlim(-4-mu[0],6-mu[0])
plt.ylim(-4-mu[1],6-mu[1])
plt.scatter(X3[:,0], X3[:,1], s=s, alpha=.2)
plotEllipse([0,0], np.cov(X3, rowvar=False).T, 'k', 'none', 2)
plt.scatter([0],[0], s=40, c='red')

for i in xrange(10):
    d = np.dot(- X3[i,:], [0,1]) * np.array([0,1])
    plt.arrow(X3[i,0], X3[i,1], d[0], d[1], head_width=.1, head_length=.1)
plt.savefig("presentation/pca-pics/pointcloud-2d-step5.pdf")


plt.figure(figsize=fs)
plt.xlim(-4-mu[0],6-mu[0])
plt.ylim(-4-mu[1],6-mu[1])
plt.scatter(X3[:,0], X3[:,1], s=s, alpha=.2)
plotEllipse([0,0], np.cov(X3, rowvar=False).T, 'k', 'none', 2)
plt.scatter([0],[0], s=40, c='red')

for i in xrange(10):
    d = np.dot(- X3[i,:], [1,0]) * np.array([1,0])
    plt.arrow(X3[i,0], X3[i,1], d[0], d[1], head_width=.1, head_length=.1)
plt.savefig("presentation/pca-pics/pointcloud-2d-step6.pdf")
In [ ]: