from IPython.core.display import Image Image(filename='images/ipython-qtconsole.png') from IPython.core.display import Image Image(filename='images/ipython-notebook.png') from IPython.core.display import Image Image(filename='images/spyder-hacking.png') %cd import pandas as pd import numpy as np import matplotlib.pyplot as plt sched_df = pd.read_csv('data/SchedDaysAdv.csv') sched_df sched_df.head() # Check out the first few rows of sched_df sched_df.tail() # Check out the last few rows. sched_df['ScheduledDaysInAdvance'].describe() p05_leadtime = sched_df['ScheduledDaysInAdvance'].quantile(0.05) p05_leadtime p95_leadtime = sched_df['ScheduledDaysInAdvance'].quantile(0.95) p95_leadtime plt.hist(sched_df['ScheduledDaysInAdvance'], 50, normed=1, facecolor='green', alpha=0.75) # normed=1 plots probs instead of counts, alpha in [0,1] is transparency level (RGBA colors) plt.xlabel('Days') plt.ylabel('Probability') plt.title(r'Histogram of Schedule Lead Time') plt.axis([0, 200, 0, 0.06]) plt.grid(True) plt.show() fig1 = plt.figure() ax1 = fig1.add_subplot(1,1,1) n, bins, patches = plt.hist(sched_df['ScheduledDaysInAdvance'], 50, normed=1, facecolor='grey', alpha=0.75) ax1.patch.set_facecolor('#F0F0F0') ax1.set_title('Histogram of Scheduled Lead Time') ax1.set_xlabel('Days') ax1.set_ylabel('Probability') ax1.grid(True, color='k') [axp.set_facecolor('white') for axp in ax1.patches] # Seems like there should be a simpler way. Of course, it's easy to just rerun the plt.hist() with the desired color. # However, this fine level of control makes it possible to set individual # bar colors based on some condition. In fact, when you originally create the histogram and are specifying the # color property you can actually set color=[] where the list contains the colors of each bar. In the # process of generating the color list you could do all kinds of logical tests to pick the color of each bar. display(fig1) sched_df['ScheduledDaysInAdvance'].hist(bins=50, color='k', alpha=0.3, normed=True) sched_df['ScheduledDaysInAdvance'].plot(kind='kde', style='k--', xlim=[0,100], title='Histo of Sched Lead Time (using pandas)') bp = sched_df.boxplot(column='ScheduledDaysInAdvance', by='InsuranceStatus') fig2 = gcf() # 'g'et 'c'urrent 'f'igure so we can use it later ax2 = gca() # 'g'et 'c'urrent 'a'xes so we can use it later labels = ax2.get_xticklabels() for label in labels: label.set_rotation(90) display(fig2) bp = sched_df.boxplot(column='ScheduledDaysInAdvance', by='InsuranceStatus', vert=False) sched_df_grp1 = sched_df.groupby(['Urgency']) sched_df_grp1['ScheduledDaysInAdvance'].mean() sched_df_grp1['ScheduledDaysInAdvance'].quantile(0.95) sched_df_grp2 = sched_df.groupby(['Urgency','InsuranceStatus']) sched_df_grp2['ScheduledDaysInAdvance'].mean() sched_df_grp2['ScheduledDaysInAdvance'].quantile(0.95) sched_df['ScheduledDaysInAdvance'].hist(bins=50, color='k', alpha=0.3, normed=True, by=sched_df['Urgency'])