In [1]:

import numpy as np
from sklearn.cluster.hierarchical import ward_tree
from sklearn.neighbors import kneighbors_graph
import pylab as pl

%pylab inline

Populating the interactive namespace from numpy and matplotlib

In [2]:

dim = 10
npoints = 100

In [3]:

# very simple simulation: two blobs
mean1 = -5 * np.ones(dim)
cov1 = np.diag(np.ones(dim))
mean2 = 5 * np.ones(dim)
cov2 = cov1
    
X1 = np.random.multivariate_normal(mean1,cov1,npoints)
X2 = np.random.multivariate_normal(mean2,cov2,npoints)
    
X = np.vstack((X1, X2))

In [4]:

connectivity = kneighbors_graph(X, n_neighbors=5)
pl.figure(figsize=(5, 5))
pl.imshow(connectivity.todense(), interpolation='nearest');

In [5]:

# ward_tree fails when the connectivity matrix is sparse
out = ward_tree(X, connectivity)

/usr/local/lib/python2.7/site-packages/sklearn/cluster/hierarchical.py:167: UserWarning: the number of connected components of the connectivity matrix is 2 > 1. Completing it to avoid stopping the tree early.
  n_components=n_components)

In [6]:

connectivity_1 = kneighbors_graph(X, n_neighbors=npoints)
pl.figure(figsize=(5,5))
pl.imshow(connectivity_1.todense(), interpolation='nearest');

In [7]:

# ward_tree doesn't fail now
out = ward_tree(X, connectivity_1)
print out[0][:10]

In [8]:

print out[0][:10]

In [ ]: