import pandas as pd import matplotlib.cm import matplotlib.pyplot as plt import numpy as np from string import upper def bin_pay(city, department, compensation): df = pd.DataFrame.from_csv('%s-2012.csv' % city) df['job_title'] = df['job_title'].apply(upper) dept = df[df['job_title'].str.contains(department.upper())] dept['binned_pay'] = dept[compensation].map(lambda x: (int(x) / 10000) * 10000) dept = dept[dept['binned_pay'] > 0] gb=dept.groupby(by=['binned_pay',]) mean_pay = dept[compensation].mean() print "Average for %(city)s %(department)s $%(mean_pay)2.02f" % locals() return gb['binned_pay'].count().apply(lambda n: (100 * ((n*1.0) / len(dept)))) def plot_compare(cities, department, compensation='total_pay_benefits'): d = dict((city, bin_pay(city.lower().replace(' ', '-'), department, compensation)) for city in cities) df = pd.DataFrame(data=d) df.plot(kind='bar', colormap='winter', figsize=(18,12), subplots=True) return df df = plot_compare(('Berkeley', 'Albany', 'El Cerrito', 'Richmond', 'Piedmont', 'Oakland', 'Emeryville'), 'police') df = plot_compare(('Berkeley', 'Albany', 'El Cerrito', 'Richmond', 'Piedmont', 'Oakland', 'Emeryville'), 'police', compensation='base_pay') df = plot_compare(('Berkeley', 'Albany', 'El Cerrito', 'Richmond', 'Piedmont', 'Oakland', 'Emeryville'), 'police', compensation='total_benefits')