# Some Pandas/Matplotlib initialization which I more or less blindly copied from an example ipython notebook. import pandas as pd pd.set_option('display.max_columns', 15) pd.set_option('display.width', 400) pd.set_option('display.mpl_style', 'default') rcParams['figure.figsize'] = (12, 5) import matplotlib font = {'family' : 'sans-serif', 'weight' : 'normal', 'size' : 14} matplotlib.rc('font', **font) LINE_COLOR = (31, 119, 180) ################## def line_graph_inbox_count_over_time( # Path to the inbox_count.log file generated by gmail-logger inbox_count_file, title, # 'D' for daily, 'H' for hourly # http://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.resample.html sample_frequency, # YYYY-MM-DD min_date=None, max_date=None, ): inbox_count_data = pd.read_csv(inbox_count_file, sep='\t', header=None, names=['utc_date', 'date', 'count'], index_col='date', parse_dates=True) resampled = inbox_count_data.resample(sample_frequency, fill_method='pad') if min_date: resampled = resampled[[min_date <= i.strftime('%Y-%m-%d') for i in resampled.index]] if max_date: resampled = resampled[[i.strftime('%Y-%m-%d') <= max_date for i in resampled.index]] ax = resampled['count'].plot(legend=False, fontsize=14, color=[c/255. for c in LINE_COLOR], linewidth=0.8, figsize=(12, 5)) # x axis ax.xaxis.grid(False) ax.set_xlabel('') # y axis ax.yaxis.grid(color='black', linestyle='-', alpha=0.2) ax.set_ylabel("Number of messages", size=14, labelpad=10) # Ensure it starts at 0 ax.yaxis.set_view_interval(0, ax.yaxis.get_view_interval()[1]) # Hide topmost gridline ax.get_yticklabels()[-1].set_visible(False) ax.yaxis.get_gridlines()[-1].set_visible(False) # Clean up the background and borders ax.set_axis_bgcolor('white') for spine in ['top', 'right']: ax.spines[spine].set_visible(False) ax.patch.set_alpha(0) ax.figure.patch.set_alpha(0) # Title ax.set_title(title, fontsize=14, ha='center', va='top', position=(0.5, 0.99), color='#333333') return ax # Figure 1 ax = line_graph_inbox_count_over_time( 'inbox_count.log', 'Number of messages in inbox, hourly, March/April 2015', 'H', '2015-03-12', '2015-04-11', ) pass # TODO: for certain time scales (like this one), may want to customize # the display of the x-axis to replace the Pandas default formatters # See: # http://matplotlib.org/api/axis_api.html # http://matplotlib.org/api/dates_api.html # Figure 2 line_graph_inbox_count_over_time( 'inbox_count.log', 'Number of messages at end of day, March 2012 - May 2015', 'D', ) pass