%matplotlib inline import numpy as np import matplotlib.pyplot as plt from scipy import stats # use seaborn plotting defaults # If this causes an error, you can comment it out. import seaborn as sns sns.set() from sklearn.datasets.samples_generator import make_blobs X, y = make_blobs(n_samples=300, centers=4, random_state=0, cluster_std=0.60) plt.scatter(X[:, 0], X[:, 1], s=50); from sklearn.cluster import KMeans est = KMeans(4) # 4 clusters est.fit(X) y_kmeans = est.predict(X) plt.scatter(X[:, 0], X[:, 1], c=y_kmeans, s=50, cmap='rainbow'); from fig_code import plot_kmeans_interactive plot_kmeans_interactive() from sklearn.datasets import load_digits digits = load_digits() est = KMeans(n_clusters=10) clusters = est.fit_predict(digits.data) est.cluster_centers_.shape fig = plt.figure(figsize=(8, 3)) for i in range(10): ax = fig.add_subplot(2, 5, 1 + i, xticks=[], yticks=[]) ax.imshow(est.cluster_centers_[i].reshape((8, 8)), cmap=plt.cm.binary) from scipy.stats import mode labels = np.zeros_like(clusters) for i in range(10): mask = (clusters == i) labels[mask] = mode(digits.target[mask])[0] from sklearn.decomposition import PCA X = PCA(2).fit_transform(digits.data) kwargs = dict(cmap = plt.cm.get_cmap('rainbow', 10), edgecolor='none', alpha=0.6) fig, ax = plt.subplots(1, 2, figsize=(8, 4)) ax[0].scatter(X[:, 0], X[:, 1], c=labels, **kwargs) ax[0].set_title('learned cluster labels') ax[1].scatter(X[:, 0], X[:, 1], c=digits.target, **kwargs) ax[1].set_title('true labels'); from sklearn.metrics import accuracy_score accuracy_score(digits.target, labels) from sklearn.metrics import confusion_matrix print(confusion_matrix(digits.target, labels)) plt.imshow(confusion_matrix(digits.target, labels), cmap='Blues', interpolation='nearest') plt.colorbar() plt.grid(False) plt.ylabel('true') plt.xlabel('predicted'); from sklearn.datasets import load_sample_image china = load_sample_image("china.jpg") plt.imshow(china) plt.grid(False); china.shape X = (china / 255.0).reshape(-1, 3) print(X.shape) def compress_image(image, n_colors): """Compress an image Parameters ========== image : numpy array array of shape (height, width, 3) with values between 0 and 1 n_colors : integer the number of colors in the final compressed image (i.e. the number of KMeans clusters to fit). Returns ======= new_image : numpy array array representing the new image, compressed via KMeans clustering. It has the same shape as the input image, but contains only ``n_colors`` distinct colors. """ X = (image / 255.0).reshape(-1, 3) new_image = image.copy() #------------ # Your KMeans code goes here! #------------ # if you convert back to integer, make sure it's the correct type! # i.e. new_image = (255 * new_image).astype(np.uint8) return new_image # create and plot the new image new_image = compress_image(china, 64) plt.imshow(new_image) plt.grid(False);