from IPython.display import HTML
HTML('')
from IPython.display import HTML
HTML('')
%matplotlib inline
import numpy as np
import pandas as pd
data = pd.DataFrame(np.vstack([np.random.normal(loc=1, scale=10, size=100),
np.random.negative_binomial(20, 0.7, size=100)]).T,
columns=('apple', 'banana'))
data.head()
data.plot()
import seaborn as sns
data.plot()
sns.set(style='white', context='talk')
data.plot()
import matplotlib.pyplot as plt
# the two ways you can access a column in a pandas Dataframe:
# 1. data['columnname']
# 2. data.columnname
plt.scatter(data['apple'], data.banana)
import numpy as np
import matplotlib.pyplot as plt
fig = plt.figure(figsize=(8,6))
ax1 = plt.subplot2grid((4,4), (1,0), colspan=3, rowspan=3)
ax2 = plt.subplot2grid((4,4), (0,0), colspan=3)
ax3 = plt.subplot2grid((4,4), (1, 3), rowspan=3)
plt.tight_layout()
ax1.scatter(data.apple, data.banana)
ax2.hist(data.apple)
# Turn the histogram upside-down by switching the axis limits
ax2_limits = ax2.axis()
ax3.hist(data.banana, orientation='horizontal')
import numpy as np
import matplotlib.pyplot as plt
fig = plt.figure(figsize=(8,6))
ax1 = plt.subplot2grid((4,4), (1,0), colspan=3, rowspan=3)
ax2 = plt.subplot2grid((4,4), (0,0), colspan=3)
ax3 = plt.subplot2grid((4,4), (1, 3), rowspan=3)
plt.tight_layout()
ax1.scatter(data.apple, data.banana)
ax2.hist(data.apple)
ax3.hist(data.banana, orientation='horizontal')
pearsonr = np.correlate(data.apple, data.banana)[0]
# Get axis limits of the scatterplot
xmin, xmax, ymin, ymax = ax1.axis()
dx = xmax - xmin
dy = ymax - ymin
ax1.text(x=xmin + .9*dx, y=ymin + .9*dy,
s='pearsonr = {:.3f}'.format(pearsonr))
sns.jointplot('apple', 'banana', data)
sns.jointplot('apple', 'banana', data, kind='reg')
sns.jointplot('apple', 'banana', data, kind='kde')
sns.jointplot('apple', 'banana', data, kind='hex')
sns.jointplot('apple', 'banana', data, kind='hex')
fig = plt.gcf()
fig.savefig('jointplot_hex.pdf')
plt.boxplot(data.values);
sns.boxplot(data)
sns.violinplot(data)
# remove the top and right axes
sns.despine()
from scipy.stats import gaussian_kde
def violinplot(ax, x, ys, bp=False, cut=False, bw_method=.5, width=None):
"""Make a violin plot of each dataset in the `ys` sequence. `ys` is a
list of numpy arrays.
Adapted by: Olga Botvinnik
# Original Author: Teemu Ikonen
# Based on code by Flavio Codeco Coelho,
# http://pyinsci.blogspot.com/2009/09/violin-plot-with-matplotlib.html
"""
dist = np.max(x) - np.min(x)
if width is None:
width = min(0.15 * max(dist, 1.0), 0.4)
for i, (d, p) in enumerate(zip(ys, x)):
k = gaussian_kde(d, bw_method=bw_method) #calculates the kernel density
# k.covariance_factor = 0.1
s = 0.0
if not cut:
s = 1 * np.std(d) #FIXME: magic constant 1
m = k.dataset.min() - s #lower bound of violin
M = k.dataset.max() + s #upper bound of violin
x = np.linspace(m, M, 100) # support for violin
v = k.evaluate(x) #violin profile (density curve)
v = width * v / v.max() #scaling the violin to the available space
ax.fill_betweenx(x, -v + p,
v + p)
if bp:
ax.boxplot(ys, notch=1, positions=x, vert=1)
fig, ax = plt.subplots()
violinplot(ax, range(data.shape[1]), data.values)
sns.violinplot(data, inner='points')