%pylab inline from collections import defaultdict import pandas as pd from scipy.cluster.hierarchy import dendrogram, set_link_color_palette from fastcluster import linkage import seaborn as sns from matplotlib.colors import rgb2hex, colorConverter sns.set_palette('Set1', 10, 0.65) palette = sns.color_palette() set_link_color_palette(map(rgb2hex, palette)) sns.set_style('white') numpy.random.seed(0025) x, y = 3, 10 df = pd.DataFrame(np.random.randn(x, y), index=['sample_{}'.format(i) for i in range(1, x + 1)], columns=['gene_{}'.format(i) for i in range(1, y + 1)]) link = linkage(df, metric='correlation', method='ward') figsize(8, 3) den = dendrogram(link, labels=df.index, abv_threshold_color='#AAAAAA') plt.xticks(rotation=90) no_spine = {'left': True, 'bottom': True, 'right': True, 'top': True} sns.despine(**no_spine); plt.tight_layout() plt.savefig('tree1.png'); den cluster_idxs = defaultdict(list) for c, pi in zip(den['color_list'], den['icoord']): for leg in pi[1:3]: i = (leg - 5.0) / 10.0 if abs(i - int(i)) < 1e-5: cluster_idxs[c].append(int(i)) cluster_idxs class Clusters(dict): def _repr_html_(self): html = '' for c in self: hx = rgb2hex(colorConverter.to_rgb(c)) html += '' \ '' html += '' html += '
' \ ''.format(hx) html += c + '' html += repr(self[c]) + '' html += '
' return html cluster_classes = Clusters() for c, l in cluster_idxs.items(): i_l = [den['ivl'][i] for i in l] cluster_classes[c] = i_l cluster_classes print(cluster_classes._repr_html_()) def get_cluster_classes(den, label='ivl'): cluster_idxs = defaultdict(list) for c, pi in zip(den['color_list'], den['icoord']): for leg in pi[1:3]: i = (leg - 5.0) / 10.0 if abs(i - int(i)) < 1e-5: cluster_idxs[c].append(int(i)) cluster_classes = Clusters() for c, l in cluster_idxs.items(): i_l = [den[label][i] for i in l] cluster_classes[c] = i_l return cluster_classes x, y = 96, 10 df = pd.DataFrame(np.random.randn(x, y), index=['sample_{}'.format(i) for i in range(1, x + 1)], columns=['gene_{}'.format(i) for i in range(1, y + 1)]) link = linkage(df, metric='correlation', method='ward') figsize(12, 4) den = dendrogram(link, labels=df.index, abv_threshold_color='#AAAAAA') plt.xticks(rotation=90) sns.despine(**no_spine); plt.tight_layout() plt.savefig('tree2.png'); get_cluster_classes(den) print(_._repr_html_())