This code generated "Figure 3" in Three Years of Logging My Inbox Count.
compute_ages_on_each_day.py
from gmail-graphs on the directory containing the JSON output files of gmail-logger. This will generate a JSON object which maps keys like "2015-03-01"
to arrays like [0, 0, 0, 1, 3, 10, 12, 12]
.python compute_ages_on_each_day.py /path/to/your/json/log/files/dir/ > ages.json
AGES_BY_DAY_FILE
. Change other constants as desired.(For inline graphs, start IPython Notebook with ipython notebook --pylab inline
.)
# Some Pandas/Matplotlib initialization which I more or less blindly copied from an example ipython notebook.
import pandas as pd
pd.set_option('display.max_columns', 15)
pd.set_option('display.width', 400)
pd.set_option('display.mpl_style', 'default')
rcParams['figure.figsize'] = (12, 5)
import matplotlib
font = {'family' : 'sans-serif',
'weight' : 'normal',
'size' : 14}
matplotlib.rc('font', **font)
# File containing the output of compute_ages_on_each_day.py
AGES_BY_DAY_FILE = 'ages.json'
# 'YYYY-MM-DD'. If you don't want a min/max date, set the value to None
MIN_DATE = '2015-02-01'
MAX_DATE = '2015-05-02'
TITLE = u'Age of messages in inbox at end of day, 2015'
RANGES = [(0, 1), (2, 4), (5, 8), (9, 16), (17, 32), (33, 99999)]
# "RdYlBu" from http://colorbrewer2.org/
# The length of this list should match that of RANGES.
COLORS = list(reversed([[x/255. for x in c] for c in
[(215,48,39),(252,141,89),(254,224,144),(224,243,248),(145,191,219),(69,117,180)]
]))
# You will probably need to tweak this
LEGEND_POSITION = (0.05, 0.87)
##################
from collections import defaultdict
from datetime import datetime
import simplejson
ages_by_day = simplejson.loads(open(AGES_BY_DAY_FILE).read())
# Create a data series (x is date, y is count) for each of the ranges (0-1 days old, 2-4 days old, etc.)
# (There is certainly a better way to do this using Pandas...)
series_by_range = defaultdict(list)
index = sorted(ages_by_day.keys())
if MIN_DATE:
index = [k for k in index if k >= MIN_DATE]
if MAX_DATE:
index = [k for k in index if k <= MAX_DATE]
date_index = [datetime.strptime(k, '%Y-%m-%d') for k in index]
for d, ages in [(k, ages_by_day[k]) for k in index]:
for r in RANGES:
series_by_range[r].append(
sum([r[0] <= age <= r[1] for age in ages])
)
# Make a Pandas DataFrame out of the series
df = pd.DataFrame(data=series_by_range, index=date_index)
# Resample as daily so that the index includes days without data
df = df.resample('D')
# Plot as stacked bar
ax = df.plot(kind='bar', stacked=True, figsize=(12, 5), width=1, color=COLORS, edgecolor=[(0.6,) * 3])
# Make the legend. Reverse its default ordering so that the order matches the graph.
format_range_name = lambda r: "%s-%s" % (r[0], r[1]) if r[1] != 99999 else "%s+" % r[0]
handles, labels = ax.get_legend_handles_labels()
assert labels == map(str, RANGES)
labels = map(format_range_name, RANGES)
legend = ax.legend(reversed(handles), reversed(labels),
loc='upper left', fontsize=12, ncol=2, columnspacing=1, framealpha=0, bbox_to_anchor=LEGEND_POSITION)
legend.set_title('Days old', prop={'size': 12})
# x axis. Tweak this to show labels more or less often.
ax.xaxis.grid(False)
ax.xaxis.set_ticklabels([d.strftime('%b %d') if d.day in (1, 15) else "" for d in df.index], rotation=90, size=12)
# y axis
ax.yaxis.grid('major', color='black', linestyle='-', alpha=0.2)
ax.set_ylabel('Messages in inbox', size=14, labelpad=10)
# Clean up the background and borders
for spine in ['top', 'right']:
ax.spines[spine].set_visible(False)
ax.patch.set_alpha(0)
ax.figure.patch.set_alpha(0)
# Title
ax.set_title(TITLE, fontsize=14, ha='center', va='top', position=(0.5, 1.06), color='#333333')
# Add dots for dates with missing data
missing_days = [i for i, x in enumerate(df[RANGES[0]]) if str(x) == 'nan']
if missing_days:
for m in missing_days:
ax.text(m - 0.6, 1, ".", fontdict={'size': 14})
# You'll have to tweak these to make the "no data" text show in the right spot below the legend.
# This only applies if you have missing data.
#
# The correct way to do this would be to add a custom handler to the legend:
# http://matplotlib.org/users/legend_guide.html#legend-handlers
BELOW_X, BELOW_Y = (7, 37)
ax.text(BELOW_X, BELOW_Y, ".", fontdict={'size': 14}, zorder=99)
ax.text(BELOW_X + 3, BELOW_Y - 0.5, "no data", fontdict={'size': 12}, zorder=99)
# don't show the result of evaluating the last command
pass