import numpy as np import matplotlib.pyplot as plt from mpl_toolkits.mplot3d import Axes3D from sklearn.cluster import KMeans from sklearn import datasets from kHLL.kestimate import HyperKEstimator from kHLL.hash.image import md5_for_vec np.random.seed(5) centers = [[1, 1], [-1, -1], [1, -1]] iris = datasets.load_iris() X = iris.data y = iris.target kestimator = HyperKEstimator(1, 5, md5_for_vec, 20) kestimator.train(X) estimators = {'k_means_iris_3': KMeans(n_clusters=3), 'k_means_iris_hyper': KMeans(n_clusters=kestimator.getK())} fignum = 1 for name, est in estimators.items(): fig = plt.figure(fignum, figsize=(9, 5)) plt.clf() ax = Axes3D(fig, rect=[0, 0, .95, 1], elev=48, azim=134) plt.cla() est.fit(X) labels = est.labels_ ax.scatter(X[:, 3], X[:, 0], X[:, 2], c=labels.astype(np.float)) ax.w_xaxis.set_ticklabels([]) ax.w_yaxis.set_ticklabels([]) ax.w_zaxis.set_ticklabels([]) ax.set_xlabel('Petal width') ax.set_ylabel('Sepal length') ax.set_zlabel('Petal length') fignum = fignum + 1 import numpy as np import matplotlib.pyplot as plt from mpl_toolkits.mplot3d import Axes3D from sklearn.cluster import KMeans from sklearn import datasets from kHLL.kestimate import HyperKEstimator from kHLL.hash.image import md5_for_vec np.random.seed(5) centers = [[1, 1], [-1, -1], [1, -1]] iris = datasets.load_iris() X = iris.data y = iris.target kestimator = HyperKEstimator(1, 5, md5_for_vec, 20) kestimator.train(X) estimators = {'k_means_iris_3': KMeans(n_clusters=3), 'k_means_iris_hyper': KMeans(n_clusters=kestimator.getK())} fignum = 1 for name, est in estimators.items(): fig = plt.figure(fignum, figsize=(9, 5)) plt.clf() ax = Axes3D(fig, rect=[0, 0, .95, 1], elev=48, azim=134) plt.cla() est.fit(X) labels = est.labels_ ax.scatter(X[:, 3], X[:, 0], X[:, 2], c=labels.astype(np.float)) ax.w_xaxis.set_ticklabels([]) ax.w_yaxis.set_ticklabels([]) ax.w_zaxis.set_ticklabels([]) ax.set_xlabel('Petal width') ax.set_ylabel('Sepal length') ax.set_zlabel('Petal length') fignum = fignum + 1 import numpy as np import matplotlib.pyplot as plt from mpl_toolkits.mplot3d import Axes3D from sklearn.cluster import KMeans from sklearn import datasets np.random.seed(5) centers = [[1, 1], [-1, -1], [1, -1]] iris = datasets.load_iris() X = iris.data y = iris.target # Plot the ground truth fig = plt.figure(fignum, figsize=(9, 5)) plt.clf() ax = Axes3D(fig, rect=[0, 0, .95, 1], elev=48, azim=134) plt.cla() for name, label in [('Setosa', 0), ('Versicolour', 1), ('Virginica', 2)]: ax.text3D(X[y == label, 3].mean(), X[y == label, 0].mean() + 1.5, X[y == label, 2].mean(), name, horizontalalignment='center', bbox=dict(alpha=.5, edgecolor='w', facecolor='w')) # Reorder the labels to have colors matching the cluster results y = np.choose(y, [1, 2, 0]).astype(np.float) ax.scatter(X[:, 3], X[:, 0], X[:, 2], c=y) ax.w_xaxis.set_ticklabels([]) ax.w_yaxis.set_ticklabels([]) ax.w_zaxis.set_ticklabels([]) ax.set_xlabel('Petal width') ax.set_ylabel('Sepal length') ax.set_zlabel('Petal length') plt.show() import time import numpy as np import matplotlib.pyplot as plt from sklearn.cluster import KMeans from sklearn.mixture import DPGMM from sklearn import datasets from kHLL.kestimate import HyperKEstimator from kHLL.hash.image import md5_for_vec np.random.seed(5) centers = [[1, 1], [-1, -1], [1, -1]] iris = datasets.load_iris() X = iris.data y = iris.target estimators = {'dpgmm': DPGMM(), 'k_means_iris_hyper': KMeans(n_clusters=kestimator.getK())} ## DPGMM try_count = 5 dpgmm_elapsed_times = [] hyper_kmeans_elapsed_times = [] for i in xrange(1, try_count): start = time.time() for j in xrange(0, i): dpgmm_model = DPGMM() dpgmm_model.fit(X) dpgmm_elapsed_times.append(time.time() - start) start = time.time() for j in xrange(0, i): kestimator = HyperKEstimator(1, 5, md5_for_vec, 20) kestimator.train(X) kmeans_model= KMeans(n_clusters=3) kmeans_model.fit(X) hyper_kmeans_elapsed_times.append(time.time() - start) x = range(1, try_count) fig = plt.figure() axes = fig.add_axes([0.1, 0.1, 0.8, 0.8]) axes.plot(x, dpgmm_elapsed_times, 'r') axes.plot(x, hyper_kmeans_elapsed_times, 'g') axes.set_xlabel('x') axes.set_ylabel('y') axes.set_title('Comparison between DPGMM and HyperKMeans');