%matplotlib inline import matplotlib.pyplot as plt import numpy as np np.random.seed(1) X = np.dot(np.random.random(size=(2, 2)), np.random.normal(size=(2, 200))).T plt.plot(X[:, 0], X[:, 1], 'og') plt.axis('equal') from sklearn.decomposition import PCA pca = PCA(n_components=2) pca.fit(X) print(pca.explained_variance_) print(pca.components_) plt.plot(X[:, 0], X[:, 1], 'og', alpha=0.3) plt.axis('equal') for length, vector in zip(pca.explained_variance_, pca.components_): v = vector * 3 * np.sqrt(length) plt.plot([0, v[0]], [0, v[1]], '-k', lw=3) clf = PCA(0.95) X_trans = clf.fit_transform(X) print(X.shape) print(X_trans.shape) X_new = clf.inverse_transform(X_trans) plt.plot(X[:, 0], X[:, 1], 'og', alpha=0.2) plt.plot(X_new[:, 0], X_new[:, 1], 'og', alpha=0.8) plt.axis('equal'); from sklearn.datasets import load_digits digits = load_digits() X = digits.data y = digits.target pca = PCA(2) # project from 64 to 2 dimensions Xproj = pca.fit_transform(X) print(X.shape) print(Xproj.shape) plt.scatter(Xproj[:, 0], Xproj[:, 1], c=y) plt.colorbar(); pca = PCA(64).fit(X) plt.semilogx(np.cumsum(pca.explained_variance_ratio_)) plt.xlabel('number of components') plt.ylabel('cumulative explained variance') from sklearn.datasets.samples_generator import make_blobs X, y = make_blobs(n_samples=300, centers=4, random_state=0, cluster_std=0.60) plt.scatter(X[:, 0], X[:, 1], s=50); from sklearn.cluster import KMeans est = KMeans(4) # 4 clusters est.fit(X) y_kmeans = est.predict(X) plt.scatter(X[:, 0], X[:, 1], c=y_kmeans, s=50); est = KMeans(n_clusters=10) clusters = est.fit_predict(digits.data) est.cluster_centers_.shape fig = plt.figure(figsize=(8, 3)) for i in range(10): ax = fig.add_subplot(2, 5, 1 + i, xticks=[], yticks=[]) ax.imshow(est.cluster_centers_[i].reshape((8, 8)), cmap=plt.cm.binary) X = PCA(2).fit_transform(digits.data) fig, ax = plt.subplots(1, 2, figsize=(8, 4)) ax[0].scatter(X[:, 0], X[:, 1], c=clusters) ax[1].scatter(X[:, 0], X[:, 1], c=digits.target); from sklearn.datasets import load_sample_image china = load_sample_image("china.jpg") plt.imshow(china); china.shape X = (china / 255.0).reshape(-1, 3) print(X.shape) def compress_image(image, n_colors): """Compress an image Parameters ========== image : numpy array array of shape (height, width, 3) with values between 0 and 1 n_colors : integer the number of colors in the final compressed image (i.e. the number of KMeans clusters to fit). Returns ======= new_image : numpy array array representing the new image, compressed via KMeans clustering. It has the same shape as the input image, but contains only ``n_colors`` distinct colors. """ X = (image / 255.0).reshape(-1, 3) new_image = image.copy() #------------ # Your KMeans code goes here! #------------ # if you convert back to integer, make sure it's the correct type! # i.e. new_image = (255 * new_image).astype(np.uint8) return new_image # create and plot the new image new_image = compress_image(china, 64) plt.imshow(new_image); # %load solutions/05_color_compression.py