# Some Pandas/Matplotlib initialization which I more or less blindly copied from an example ipython notebook. import pandas as pd pd.set_option('display.max_columns', 15) pd.set_option('display.width', 400) pd.set_option('display.mpl_style', 'default') rcParams['figure.figsize'] = (12, 5) import matplotlib font = {'family' : 'sans-serif', 'weight' : 'normal', 'size' : 14} matplotlib.rc('font', **font) # File containing the output of compute_ages_on_each_day.py AGES_BY_DAY_FILE = 'ages.json' # 'YYYY-MM-DD'. If you don't want a min/max date, set the value to None MIN_DATE = '2015-02-01' MAX_DATE = '2015-05-02' TITLE = u'Age of messages in inbox at end of day, 2015' RANGES = [(0, 1), (2, 4), (5, 8), (9, 16), (17, 32), (33, 99999)] # "RdYlBu" from http://colorbrewer2.org/ # The length of this list should match that of RANGES. COLORS = list(reversed([[x/255. for x in c] for c in [(215,48,39),(252,141,89),(254,224,144),(224,243,248),(145,191,219),(69,117,180)] ])) # You will probably need to tweak this LEGEND_POSITION = (0.05, 0.87) ################## from collections import defaultdict from datetime import datetime import simplejson ages_by_day = simplejson.loads(open(AGES_BY_DAY_FILE).read()) # Create a data series (x is date, y is count) for each of the ranges (0-1 days old, 2-4 days old, etc.) # (There is certainly a better way to do this using Pandas...) series_by_range = defaultdict(list) index = sorted(ages_by_day.keys()) if MIN_DATE: index = [k for k in index if k >= MIN_DATE] if MAX_DATE: index = [k for k in index if k <= MAX_DATE] date_index = [datetime.strptime(k, '%Y-%m-%d') for k in index] for d, ages in [(k, ages_by_day[k]) for k in index]: for r in RANGES: series_by_range[r].append( sum([r[0] <= age <= r[1] for age in ages]) ) # Make a Pandas DataFrame out of the series df = pd.DataFrame(data=series_by_range, index=date_index) # Resample as daily so that the index includes days without data df = df.resample('D') # Plot as stacked bar ax = df.plot(kind='bar', stacked=True, figsize=(12, 5), width=1, color=COLORS, edgecolor=[(0.6,) * 3]) # Make the legend. Reverse its default ordering so that the order matches the graph. format_range_name = lambda r: "%s-%s" % (r[0], r[1]) if r[1] != 99999 else "%s+" % r[0] handles, labels = ax.get_legend_handles_labels() assert labels == map(str, RANGES) labels = map(format_range_name, RANGES) legend = ax.legend(reversed(handles), reversed(labels), loc='upper left', fontsize=12, ncol=2, columnspacing=1, framealpha=0, bbox_to_anchor=LEGEND_POSITION) legend.set_title('Days old', prop={'size': 12}) # x axis. Tweak this to show labels more or less often. ax.xaxis.grid(False) ax.xaxis.set_ticklabels([d.strftime('%b %d') if d.day in (1, 15) else "" for d in df.index], rotation=90, size=12) # y axis ax.yaxis.grid('major', color='black', linestyle='-', alpha=0.2) ax.set_ylabel('Messages in inbox', size=14, labelpad=10) # Clean up the background and borders for spine in ['top', 'right']: ax.spines[spine].set_visible(False) ax.patch.set_alpha(0) ax.figure.patch.set_alpha(0) # Title ax.set_title(TITLE, fontsize=14, ha='center', va='top', position=(0.5, 1.06), color='#333333') # Add dots for dates with missing data missing_days = [i for i, x in enumerate(df[RANGES[0]]) if str(x) == 'nan'] if missing_days: for m in missing_days: ax.text(m - 0.6, 1, ".", fontdict={'size': 14}) # You'll have to tweak these to make the "no data" text show in the right spot below the legend. # This only applies if you have missing data. # # The correct way to do this would be to add a custom handler to the legend: # http://matplotlib.org/users/legend_guide.html#legend-handlers BELOW_X, BELOW_Y = (7, 37) ax.text(BELOW_X, BELOW_Y, ".", fontdict={'size': 14}, zorder=99) ax.text(BELOW_X + 3, BELOW_Y - 0.5, "no data", fontdict={'size': 12}, zorder=99) # don't show the result of evaluating the last command pass