This code generated the line graphs in "Figure 1" and "Figure 2" in Three Years of Logging My Inbox Count.
Paste the following into an IPython Notebook. (For inline graphs, start IPython Notebook with ipython notebook --pylab inline
.)
Call the line_graph_inbox_count_over_time
function as below. Its arguments are documented inline. inbox_count_file
should be the location of the inbox_count.log
file generated by gmail-logger.
# Some Pandas/Matplotlib initialization which I more or less blindly copied from an example ipython notebook.
import pandas as pd
pd.set_option('display.max_columns', 15)
pd.set_option('display.width', 400)
pd.set_option('display.mpl_style', 'default')
rcParams['figure.figsize'] = (12, 5)
import matplotlib
font = {'family' : 'sans-serif',
'weight' : 'normal',
'size' : 14}
matplotlib.rc('font', **font)
LINE_COLOR = (31, 119, 180)
##################
def line_graph_inbox_count_over_time(
# Path to the inbox_count.log file generated by gmail-logger
inbox_count_file,
title,
# 'D' for daily, 'H' for hourly
# http://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.resample.html
sample_frequency,
# YYYY-MM-DD
min_date=None,
max_date=None,
):
inbox_count_data = pd.read_csv(inbox_count_file, sep='\t', header=None, names=['utc_date', 'date', 'count'], index_col='date', parse_dates=True)
resampled = inbox_count_data.resample(sample_frequency, fill_method='pad')
if min_date:
resampled = resampled[[min_date <= i.strftime('%Y-%m-%d') for i in resampled.index]]
if max_date:
resampled = resampled[[i.strftime('%Y-%m-%d') <= max_date for i in resampled.index]]
ax = resampled['count'].plot(legend=False, fontsize=14, color=[c/255. for c in LINE_COLOR], linewidth=0.8, figsize=(12, 5))
# x axis
ax.xaxis.grid(False)
ax.set_xlabel('')
# y axis
ax.yaxis.grid(color='black', linestyle='-', alpha=0.2)
ax.set_ylabel("Number of messages", size=14, labelpad=10)
# Ensure it starts at 0
ax.yaxis.set_view_interval(0, ax.yaxis.get_view_interval()[1])
# Hide topmost gridline
ax.get_yticklabels()[-1].set_visible(False)
ax.yaxis.get_gridlines()[-1].set_visible(False)
# Clean up the background and borders
ax.set_axis_bgcolor('white')
for spine in ['top', 'right']:
ax.spines[spine].set_visible(False)
ax.patch.set_alpha(0)
ax.figure.patch.set_alpha(0)
# Title
ax.set_title(title, fontsize=14, ha='center', va='top', position=(0.5, 0.99), color='#333333')
return ax
# Figure 1
ax = line_graph_inbox_count_over_time(
'inbox_count.log',
'Number of messages in inbox, hourly, March/April 2015',
'H',
'2015-03-12',
'2015-04-11',
)
pass
# TODO: for certain time scales (like this one), may want to customize
# the display of the x-axis to replace the Pandas default formatters
# See:
# http://matplotlib.org/api/axis_api.html
# http://matplotlib.org/api/dates_api.html
# Figure 2
line_graph_inbox_count_over_time(
'inbox_count.log',
'Number of messages at end of day, March 2012 - May 2015',
'D',
)
pass