%load_ext autoreload
%autoreload 2
%matplotlib inline
%pdb
The autoreload extension is already loaded. To reload it, use: %reload_ext autoreload Automatic pdb calling has been turned ON
# import seaborn as sns
from seaborn.clustering import clusteredheatmap
import pandas as pd
import numpy as np
import string
# sns.set_axes_style('nogrid', 'talk')
shape = (10, 20)
np.random.seed(2013)
df = pd.DataFrame(np.random.randn(*shape), index=list(string.lowercase[0:shape[0]]),
columns=list(string.uppercase[0:shape[1]]))
# -- Add some structure in the matrix so we can visually check that clustering worked -- #
# Add 5 to rows a,b,c,d,e and columns K,L,M,N,O,P,Q,R,S,T
df.ix[0:5,10:20] += 3
# Subtract 5 from rows f,g,h,i,j and columns A,B,C,D,E
df.ix[5:10,0:5] -= 3
row_dendrogram, col_dendrogram = clusteredheatmap(df)
# To grab the figure to save it, use "plt.gcf()" to "get current figure"
fig = plt.gcf()
fig.savefig('clustered_heatmap.pdf')
/usr/local/lib/python2.7/site-packages/matplotlib/figure.py:1595: UserWarning: This figure includes Axes that are not compatible with tight_layout, so its results might be incorrect. warnings.warn("This figure includes Axes that are not "
# import seaborn as sns
from seaborn.clustering import clusteredheatmap
import pandas as pd
import numpy as np
import string
# sns.set_axes_style('nogrid', 'talk')
shape = (10, 20)
np.random.seed(2013)
df = pd.DataFrame(np.random.randn(*shape), index=list(string.lowercase[0:shape[0]]),
columns=list(string.uppercase[0:shape[1]]))
# -- Add some structure in the matrix so we can try to be correct -- #
# Add 5 to rows a,b,c,d,e and columns K,L,M,N,O,P,Q,R,S,T
df.ix[0:5,10:20] += 3
# Subtract 5 from rows f,g,h,i,j and columns A,B,C,D,E
df.ix[5:10,0:5] -= 3
row_dendrogram, col_dendrogram = clusteredheatmap(df)
# To grab the figure to save it, use "plt.gcf()" to "get current figure"
fig = plt.gcf()
fig.savefig('clustered_heatmap2.pdf')
color_scale='log'
¶np.random.seed(2013)
df = pd.DataFrame(np.random.uniform(*shape), index=list(string.lowercase[0:shape[0]]),
columns=list(string.uppercase[0:shape[1]]))
# -- Add some structure in the matrix so we can try to be correct -- #
# Add 5 to rows a,b,c,d,e and columns K,L,M,N,O,P,Q,R,S,T
df.ix[0:5,10:20] += np.random.uniform(high=1000,
low=100, size=(5,10))
# Subtract 5 from rows f,g,h,i,j and columns A,B,C,D,E
df.ix[5:10,0:5] = np.random.uniform(high=0.1, size=(5,5))
row_dendrogram, col_dendrogram = clusteredheatmap(df, color_scale='log')
# To grab the figure to save it, use "plt.gcf()" to "get current figure"
fig = plt.gcf()
fig.savefig('clustered_heatmap_log.pdf')
import brewer2mpl
np.random.seed(2013)
df = pd.DataFrame(np.random.uniform(*shape), index=list(string.lowercase[0:shape[0]]),
columns=list(string.uppercase[0:shape[1]]))
# -- Add some structure in the matrix so we can try to be correct -- #
# Add 5 to rows a,b,c,d,e and columns K,L,M,N,O,P,Q,R,S,T
df.ix[0:5,10:20] += np.random.uniform(high=1000,
low=100, size=(5,10))
# Subtract 5 from rows f,g,h,i,j and columns A,B,C,D,E
df.ix[5:10,0:5] = np.random.uniform(high=0.1, size=(5,5))
vowels = ['a', 'e', 'i', 'o', 'u']
vowels += map(string.upper, vowels)
set1 = brewer2mpl.get_map('Set1', 'qualitative', 9).mpl_colors
grey = set1[8]
pink = set1[7]
col_kws = {}
col_kws['side_colors'] = [pink if letter in vowels else grey for letter in df.columns]
row_dendrogram, col_dendrogram = clusteredheatmap(df, color_scale='log', col_kws=col_kws, pcolormesh_kws=dict(linewidth=0.1),
figsize=(20,10))
and removed log-scaling
from scipy.cluster.hierarchy import linkage
from scipy.spatial import distance
col_kws = {}
col_kws['side_colors'] = [pink if letter in vowels else grey for letter in df.columns]
col_kws['linkage'] = linkage(df.values.T, method='weighted', metric='cosine')
row_kws = dict(fontsize=14)
row_dendrogram, col_dendrogram = clusteredheatmap(df, col_kws=col_kws,
row_kws=row_kws,
pcolormesh_kws=dict(linewidth=0.1),
figsize=(20,10))
from scipy.cluster.hierarchy import linkage
from scipy.spatial import distance
col_kws = {}
col_kws['side_colors'] = [pink if letter in vowels else grey for letter in df.columns]
col_kws['linkage'] = linkage(df.values.T, method='weighted', metric='cosine')
row_kws = dict(fontsize=14, label_loc='heatmap')
row_dendrogram, col_dendrogram = clusteredheatmap(df, col_kws=col_kws,
row_kws=row_kws,
pcolormesh_kws=dict(linewidth=0.1),
figsize=(20,10))
Will have a white hole for all values less than 10
from scipy.cluster.hierarchy import linkage
from scipy.spatial import distance
col_kws = {}
col_kws['side_colors'] = [pink if letter in vowels else grey for letter in df.columns]
col_kws['linkage'] = linkage(df.values.T, method='weighted', metric='cosine')
row_kws = dict(fontsize=14, label_loc='heatmap')
pcolormesh_kws = dict(linewidth=0.1, vmin=10)
row_dendrogram, col_dendrogram = clusteredheatmap(df, col_kws=col_kws,
row_kws=row_kws,
pcolormesh_kws=pcolormesh_kws,
figsize=(20,10))
IF you want to provide a "tidy" data frame, then you need to supply pivot_kws
for how to pivot the dataframe into an index
x variable
orientation.
tidy_df = pd.melt(df.reset_index(), id_vars='index')
tidy_df.head()
index | variable | value | |
---|---|---|---|
0 | a | A | 16.754482 |
1 | b | A | 16.754482 |
2 | c | A | 16.754482 |
3 | d | A | 16.754482 |
4 | e | A | 16.754482 |
tidy_df.pivot(values='value', index='index', columns='variable')
variable | A | B | C | D | E | F | G | H | I | J | K | L | M | N | O | P | Q | R | S | T |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
index | ||||||||||||||||||||
a | 16.754482 | 16.754482 | 16.754482 | 16.754482 | 16.754482 | 16.754482 | 16.754482 | 16.754482 | 16.754482 | 16.754482 | 977.467822 | 357.573221 | 656.585007 | 685.393104 | 336.236542 | 747.845409 | 873.226435 | 652.618601 | 643.468569 | 901.881878 |
b | 16.754482 | 16.754482 | 16.754482 | 16.754482 | 16.754482 | 16.754482 | 16.754482 | 16.754482 | 16.754482 | 16.754482 | 744.065732 | 193.006425 | 414.248760 | 192.536007 | 265.631509 | 721.871328 | 500.605018 | 581.021070 | 825.276554 | 216.177039 |
c | 16.754482 | 16.754482 | 16.754482 | 16.754482 | 16.754482 | 16.754482 | 16.754482 | 16.754482 | 16.754482 | 16.754482 | 561.580966 | 463.321333 | 485.860848 | 560.689446 | 926.934887 | 914.650689 | 207.084161 | 358.139665 | 978.760896 | 261.890429 |
d | 16.754482 | 16.754482 | 16.754482 | 16.754482 | 16.754482 | 16.754482 | 16.754482 | 16.754482 | 16.754482 | 16.754482 | 272.054448 | 761.410113 | 507.357828 | 689.899092 | 957.919728 | 603.657998 | 1011.067256 | 670.823493 | 685.397084 | 843.935715 |
e | 16.754482 | 16.754482 | 16.754482 | 16.754482 | 16.754482 | 16.754482 | 16.754482 | 16.754482 | 16.754482 | 16.754482 | 438.662956 | 268.311954 | 524.052986 | 871.926162 | 138.924540 | 972.120320 | 573.118039 | 346.511309 | 576.126968 | 832.702172 |
f | 0.019749 | 0.090150 | 0.041076 | 0.034478 | 0.036481 | 16.754482 | 16.754482 | 16.754482 | 16.754482 | 16.754482 | 16.754482 | 16.754482 | 16.754482 | 16.754482 | 16.754482 | 16.754482 | 16.754482 | 16.754482 | 16.754482 | 16.754482 |
g | 0.005595 | 0.003681 | 0.021686 | 0.039696 | 0.088112 | 16.754482 | 16.754482 | 16.754482 | 16.754482 | 16.754482 | 16.754482 | 16.754482 | 16.754482 | 16.754482 | 16.754482 | 16.754482 | 16.754482 | 16.754482 | 16.754482 | 16.754482 |
h | 0.097598 | 0.003760 | 0.062413 | 0.001316 | 0.041316 | 16.754482 | 16.754482 | 16.754482 | 16.754482 | 16.754482 | 16.754482 | 16.754482 | 16.754482 | 16.754482 | 16.754482 | 16.754482 | 16.754482 | 16.754482 | 16.754482 | 16.754482 |
i | 0.020746 | 0.032760 | 0.034827 | 0.080269 | 0.026543 | 16.754482 | 16.754482 | 16.754482 | 16.754482 | 16.754482 | 16.754482 | 16.754482 | 16.754482 | 16.754482 | 16.754482 | 16.754482 | 16.754482 | 16.754482 | 16.754482 | 16.754482 |
j | 0.067976 | 0.084098 | 0.033085 | 0.090540 | 0.005442 | 16.754482 | 16.754482 | 16.754482 | 16.754482 | 16.754482 | 16.754482 | 16.754482 | 16.754482 | 16.754482 | 16.754482 | 16.754482 | 16.754482 | 16.754482 | 16.754482 | 16.754482 |
pivot_kws = dict(values='value', index='index', columns='variable')
row_dendrogram, col_dendrogram = clusteredheatmap(tidy_df,
pivot_kws=pivot_kws,
col_kws=col_kws,
row_kws=row_kws,
pcolormesh_kws=pcolormesh_kws,
figsize=(20,10))