from __future__ import division, print_function
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
# The "magic" command is so the plots show up in output cells
# pyplot is the plotting interface and is what is typically used.
# Tradition dictates it be called plt (like numpy is np)
iris = pd.read_csv("iris.csv")
iris.columns
Index([u'Unnamed: 0', u'SepalLength', u'SepalWidth', u'PetalLength', u'PetalWidth', u'Species'], dtype='object')
plt.scatter(iris.SepalLength, iris.SepalWidth)
plt.scatter(iris.PetalLength, iris.PetalWidth, c=iris.Species.factorize()[0])
plt.scatter(iris.PetalLength, iris.PetalWidth, c=iris.Species.factorize()[0], cmap="Set1")
<matplotlib.collections.PathCollection at 0x107d4ac50>
iris.Species.unique()
array(['setosa', 'versicolor', 'virginica'], dtype=object)
matplotlib is a stateful library. State by default accumulates within the cell, so it is actually making several plots at once back in the prior cell. That is why there are four colors on the above plot, one supplied by the earlier plot!
#plt.scatter(iris.SepalLength, iris.SepalWidth)
#plt.scatter(iris.PetalLength, iris.PetalWidth, c=iris.Species.factorize()[0])
plt.scatter(iris.PetalLength, iris.PetalWidth, c=iris.Species.factorize()[0], cmap="Set1")
<matplotlib.collections.PathCollection at 0x107e93490>
State is used to build up more complex plots, layering and adding legends, grids and other guides.
matplotlib colormaps map from inputs in 0-1 to a set of colors
%pdoc plt.legend
cmap = plt.get_cmap("Set1")
species = iris.Species.unique()
for (i, spec) in enumerate(species):
subset = iris[iris.Species==spec]
plt.scatter(subset.PetalLength, subset.PetalWidth, c=cmap(i/len(species)), label=spec)
plt.legend(loc="lower right")
plt.grid()
The stateful interface is common, and commonly used for simple plots. However, building custom plots becomes cumbersome under that interface...too much is hidden. The object-oriented interface is more precise. I personally encourage its use.
goog = pd.read_hdf("GOOG.hdf5", "__data__")
appl = pd.read_hdf("AAPL.hdf5", "__data__")
goog.columns
Index([u'adj_close', u'close', u'date', u'high', u'low', u'open', u'volume'], dtype='object')
A few notes on HDF5:
fig,axes = plt.subplots()
axes.plot(goog.date, goog.close)
axes.set_xlabel("date")
axes.set_ylabel("price")
axes.set_title("Google Stock Price")
plt.show()
from datetime import datetime
fig,axes = plt.subplots(figsize=(12,3)) #figsize is in inches, dpi can also be set
axes.plot(goog.date.astype(datetime), goog.close)
axes.set_xlabel("date")
axes.set_ylabel("price")
axes.set_title("Google Stock Price")
plt.show()
fig,axes = plt.subplots(nrows=1, ncols=2, figsize=(12,3))
for (ax, stock) in zip(axes, [goog, appl]):
ax.plot(stock.date.astype(datetime), stock.close)
ax.set_xlabel('date')
ax.set_ylabel('price')
fig.autofmt_xdate()
fig, axes = plt.subplots()
axes.scatter(iris.PetalWidth, iris.PetalLength)
axes.set_xlabel("Petal Width")
axes.set_ylabel("Petal Height")
<matplotlib.text.Text at 0x1084a9350>
fig, axes = plt.subplots()
axes.hist(goog.close, bins=50, facecolor='g', alpha=0.75)[2]
axes.set_title('Histogram of closing prices')
<matplotlib.text.Text at 0x109011b10>
Matplot lib understands rgb tupples, but it also has a few color shortcuts:
fig, axes = plt.subplots(figsize=(12,3))
axes.plot(goog.date.astype(datetime), goog.close)
axes.plot(goog.date[::20], goog.close[::20], "rd")
[<matplotlib.lines.Line2D at 0x109529d50>]
Like the color shortcuts, there are shape shortcuts as well. Common ones are:
The "rd" above indicates that red diamons should be used.
Full list Note: If there are few enough data points, markers can be added with the 'markers' keyword argument. However, the downsampling required for clarity in this plot did not permit it.
fig, axes = plt.subplots(figsize=(12,3))
t2 = np.arange(0.0, 5.0, 0.02)
axes.plot(t2, np.cos(2*np.pi*t2), ls='--', c="b")
axes.plot(t2, np.cos(3*np.pi*t2), ls='-.', lw=5)
[<matplotlib.lines.Line2D at 0x1095807d0>]
n = np.array([0,1,2,3,4,5])
fig, axes = plt.subplots()
axes.bar(n, n**2, align="center", width=.5, alpha=.5, color="g")
<Container object of 6 artists>
fig, ax = plt.subplots()
xx = np.linspace(-1., 1., 100)
ax.plot(xx, xx**2)
ax.plot(xx, xx**3)
ax.text(0.2, 0.25, r"$y=x^2$", fontsize=22, color="b") #x/y position in input value space!
ax.text(0.55, 0.05, r"$y=x^3$", fontsize=22, color="g");
#adapted from http://nbviewer.ipython.org/github/jrjohansson/scientific-python-lectures/blob/master/Lecture-4-Matplotlib.ipynb
alpha = 0.7
phi_ext = 2 * np.pi * 0.5
def flux_qubit_potential(phi_m, phi_p):
return 2 + alpha - 2 * np.cos(phi_p)*np.cos(phi_m) - alpha * np.cos(phi_ext - 2*phi_p)
phi_m = np.linspace(0, 2*np.pi, 100)
phi_p = np.linspace(0, 2*np.pi, 100)
X,Y = np.meshgrid(phi_p, phi_m)
Z = flux_qubit_potential(X, Y).T
fig, ax = plt.subplots(ncols=2, figsize=(10,4))
p = ax[0].pcolor(X/(2*np.pi), Y/(2*np.pi), Z, cmap="RdBu", vmin=abs(Z).min(), vmax=abs(Z).max())
fig.colorbar(p, ax=ax[0])
p2 = ax[1].contour(Z, cmap="RdBu", vmin=abs(Z).min(), vmax=abs(Z).max(), extent=[0,1,0,1])
p2.clabel(p2.levels, inline=1, fontsize=10)
<a list of 25 text.Text objects>
//anaconda/lib/python2.7/site-packages/matplotlib/text.py:52: UnicodeWarning: Unicode equal comparison failed to convert both arguments to Unicode - interpreting them as being unequal if rotation in ('horizontal', None): //anaconda/lib/python2.7/site-packages/matplotlib/text.py:54: UnicodeWarning: Unicode equal comparison failed to convert both arguments to Unicode - interpreting them as being unequal elif rotation == 'vertical':
With the Iris data, make a scatterplot with the color tied to species. Include a legend.
<matplotlib.legend.Legend at 0x10b8cf3d0>
Create a plot with both AAPL and GOOG stock tickers on the same panel.
Place a histogram of Google and Apple stock prices side-by-side in different colors.
<matplotlib.text.Text at 0x1082177d0>
Make a 2x2 grid of line plots. At least one should include multiple series in it and at least with markers.