import pandas as pd
import numpy as np
Expository graphs are used to communicate our analysis results to other people. So it's very important to add labels, legends, plot titles, etc.
First load the the data from the course Dropbox URL:
#pData = pd.read_csv('https://dl.dropbox.com/u/7710864/data/csv_hid/ss06pid.csv')
pData = pd.read_csv('../data/ss06pid.csv')
pData
<class 'pandas.core.frame.DataFrame'> Int64Index: 14931 entries, 0 to 14930 Columns: 240 entries, Unnamed: 0 to pwgtp80 dtypes: float64(74), int64(163), object(3)
Scatter plot:
f = figure()
ax = f.add_subplot(1, 1, 1)
ax.scatter(pData['JWMNP'], pData['WAGP'], s=15, label='All surveyed');
# adjust axes ranges
ax.set_xlim(0,200);
ax.set_ylim(0, 250000);
# set y-axis ticks
ax.set_yticks([0, 50000, 150000])
# set axes labels and adjust font size
ax.set_xlabel('Travel time (min)', fontsize=15);
ax.set_ylabel('Last 12 month wages (dollars)', fontsize=15);
# adjust tick font sizes and orientation
for (tx, ty) in zip(ax.xaxis.get_major_ticks(), ax.yaxis.get_major_ticks()):
tx.label.set_fontsize(15)
ty.label.set_fontsize(15)
ty.label.set_rotation('vertical')
# add legend
ax.legend();
Scatter plot with different colours for different sexes in the data. We will see this pattern very often later on: when plotting points with different colours in a matplotlib
scatter plot, we need to repeat the scatter
commands and assign different colour each time.
f = figure()
ax = f.add_subplot(1, 1, 1)
ix1 = pData['SEX'] == 1
ix2 = pData['SEX'] == 2
ax.scatter(pData['JWMNP'][ix1], pData['WAGP'][ix1], c='k', s=15, label='men')
ax.scatter(pData['JWMNP'][ix2], pData['WAGP'][ix2], c='r', s=15, label='women', alpha=.5)
# adjust axes ranges
ax.set_xlim(0,200)
ax.set_ylim(0, 250000)
# set y-axis ticks
ax.set_yticks([0, 50000, 150000])
# set axes labels
ax.set_xlabel('TT (min)', fontsize=15)
ax.set_ylabel('Wages (dollars)', fontsize=15)
# adjust tick font sizes and orientation
for (tx, ty) in zip(ax.xaxis.get_major_ticks(), ax.yaxis.get_major_ticks()):
tx.label.set_fontsize(12)
ty.label.set_fontsize(12)
ty.label.set_rotation('vertical')
# add legend
ax.legend()
# add title
f.suptitle('Wages earned versus commute time', fontweight='bold', fontsize=16);
Plotting in multiple 'panels'. This will also become a very common pattern for making subplots.
f, (ax1, ax2) = subplots(ncols=2)
# first plot
ax1.hist(pData['JWMNP'], bins=100, range=(pData['JWMNP'].min(), pData['JWMNP'].max()))
ax1.set_xlabel('CT (min)')
ax1.set_ylabel('Frequency')
# second plot
ix1 = pData['SEX'] == 1
ix2 = pData['SEX'] == 2
ax2.scatter(pData['JWMNP'][ix1], pData['WAGP'][ix1], c='k', s=15, label='men')
ax2.scatter(pData['JWMNP'][ix2], pData['WAGP'][ix2], c='r', s=15, label='women', alpha=.5)
# adjust axes ranges
ax2.set_xlim(0,200)
ax2.set_ylim(0, 250000)
# set y-axis ticks
ax2.set_yticks([0, 50000, 150000])
# set axes labels
ax2.set_xlabel('CT (min)')
ax2.set_ylabel('Wages (dollars)')
# adjust tick orientation
for ty in ax2.yaxis.get_major_ticks():
ty.label.set_rotation('vertical')
# add legend
ax2.legend()
# add margin text
figtext(0.3, 0.95, '(a)')
figtext(0.7, 0.95, '(b)');
# save the figure
# to PDF
f.savefig('twoPanel.pdf')
# to PNG
f.savefig('twoPanel.png', dpi=100)