import numpy as np from sklearn.cluster.hierarchical import ward_tree from sklearn.neighbors import kneighbors_graph import pylab as pl %pylab inline dim = 10 npoints = 100 # very simple simulation: two blobs mean1 = -5 * np.ones(dim) cov1 = np.diag(np.ones(dim)) mean2 = 5 * np.ones(dim) cov2 = cov1 X1 = np.random.multivariate_normal(mean1,cov1,npoints) X2 = np.random.multivariate_normal(mean2,cov2,npoints) X = np.vstack((X1, X2)) connectivity = kneighbors_graph(X, n_neighbors=5) pl.figure(figsize=(5, 5)) pl.imshow(connectivity.todense(), interpolation='nearest'); # ward_tree fails when the connectivity matrix is sparse out = ward_tree(X, connectivity) connectivity_1 = kneighbors_graph(X, n_neighbors=npoints) pl.figure(figsize=(5,5)) pl.imshow(connectivity_1.todense(), interpolation='nearest'); # ward_tree doesn't fail now out = ward_tree(X, connectivity_1) print out[0][:10] print out[0][:10]