from IPython.display import HTML HTML('') from IPython.display import HTML HTML('') %matplotlib inline import numpy as np import pandas as pd data = pd.DataFrame(np.vstack([np.random.normal(loc=1, scale=10, size=100), np.random.negative_binomial(20, 0.7, size=100)]).T, columns=('apple', 'banana')) data.head() data.plot() import seaborn as sns data.plot() sns.set(style='white', context='talk') data.plot() import matplotlib.pyplot as plt # the two ways you can access a column in a pandas Dataframe: # 1. data['columnname'] # 2. data.columnname plt.scatter(data['apple'], data.banana) import numpy as np import matplotlib.pyplot as plt fig = plt.figure(figsize=(8,6)) ax1 = plt.subplot2grid((4,4), (1,0), colspan=3, rowspan=3) ax2 = plt.subplot2grid((4,4), (0,0), colspan=3) ax3 = plt.subplot2grid((4,4), (1, 3), rowspan=3) plt.tight_layout() ax1.scatter(data.apple, data.banana) ax2.hist(data.apple) # Turn the histogram upside-down by switching the axis limits ax2_limits = ax2.axis() ax3.hist(data.banana, orientation='horizontal') import numpy as np import matplotlib.pyplot as plt fig = plt.figure(figsize=(8,6)) ax1 = plt.subplot2grid((4,4), (1,0), colspan=3, rowspan=3) ax2 = plt.subplot2grid((4,4), (0,0), colspan=3) ax3 = plt.subplot2grid((4,4), (1, 3), rowspan=3) plt.tight_layout() ax1.scatter(data.apple, data.banana) ax2.hist(data.apple) ax3.hist(data.banana, orientation='horizontal') pearsonr = np.correlate(data.apple, data.banana)[0] # Get axis limits of the scatterplot xmin, xmax, ymin, ymax = ax1.axis() dx = xmax - xmin dy = ymax - ymin ax1.text(x=xmin + .9*dx, y=ymin + .9*dy, s='pearsonr = {:.3f}'.format(pearsonr)) sns.jointplot('apple', 'banana', data) sns.jointplot('apple', 'banana', data, kind='reg') sns.jointplot('apple', 'banana', data, kind='kde') sns.jointplot('apple', 'banana', data, kind='hex') sns.jointplot('apple', 'banana', data, kind='hex') fig = plt.gcf() fig.savefig('jointplot_hex.pdf') plt.boxplot(data.values); sns.boxplot(data) sns.violinplot(data) # remove the top and right axes sns.despine() from scipy.stats import gaussian_kde def violinplot(ax, x, ys, bp=False, cut=False, bw_method=.5, width=None): """Make a violin plot of each dataset in the `ys` sequence. `ys` is a list of numpy arrays. Adapted by: Olga Botvinnik # Original Author: Teemu Ikonen # Based on code by Flavio Codeco Coelho, # http://pyinsci.blogspot.com/2009/09/violin-plot-with-matplotlib.html """ dist = np.max(x) - np.min(x) if width is None: width = min(0.15 * max(dist, 1.0), 0.4) for i, (d, p) in enumerate(zip(ys, x)): k = gaussian_kde(d, bw_method=bw_method) #calculates the kernel density # k.covariance_factor = 0.1 s = 0.0 if not cut: s = 1 * np.std(d) #FIXME: magic constant 1 m = k.dataset.min() - s #lower bound of violin M = k.dataset.max() + s #upper bound of violin x = np.linspace(m, M, 100) # support for violin v = k.evaluate(x) #violin profile (density curve) v = width * v / v.max() #scaling the violin to the available space ax.fill_betweenx(x, -v + p, v + p) if bp: ax.boxplot(ys, notch=1, positions=x, vert=1) fig, ax = plt.subplots() violinplot(ax, range(data.shape[1]), data.values) sns.violinplot(data, inner='points')