#!/usr/bin/env python # coding: utf-8 # In[30]: import pandas as pd import numpy as np import matplotlib.pyplot as plt import seaborn as sns get_ipython().run_line_magic('matplotlib', 'inline') # In[31]: tips = sns.load_dataset("tips") tips.head() # In[42]: tips_counts = tips.groupby(['sex', 'time']).size() tips_counts = tips_counts.reset_index() tips_counts = tips_counts.rename(columns={0:'n'}) tips_counts.head() # In[43]: tips_counts['percentage'] = tips_counts.groupby(['sex'], as_index=False, group_keys=False).apply(lambda x: x['n']/x['n'].sum()) tips_counts.head() # Force categorical type, otherwise the order of categories to `factorplot` and `annotate_bars` are inconsistent, since `seaborn` creates a categorical type under the hood. # In[59]: tips_counts.sex = pd.Categorical(tips_counts.sex, categories=['Male', "Female"], ordered=True) tips_counts.time = pd.Categorical(tips_counts.time, categories=['Lunch', "Dinner"], ordered=True) # Make a factorplot and annotate it. # In[66]: def annotate_bars(x, y, x_groupby, hue_groupby, height_col, count_col, **kwargs): data = kwargs.pop('data') ax = plt.gca() n_hues = len(data.groupby(hue_groupby).size()) width = 0.8/n_hues x_base = -.5 - width/2.5 for group1, df1 in data.groupby(x_groupby): i = 0 for group2, df2 in df1.groupby(hue_groupby): i += 1 x_position = x_base + width*i + width/4 y_position = df2[height_col] ax.annotate(str(df2[count_col].values[0]), (x_position, y_position), textcoords='offset points', xytext=(0, 2), ha='center', va='bottom', fontsize=12) x_base += 1 g = sns.factorplot(x='sex', y='percentage', hue='time', kind='bar', data=tips_counts, legend=False, ci=None) g.map_dataframe(annotate_bars, 'sex', 'percentage', x_groupby='sex', hue_groupby='time', height_col='percentage', count_col='n') # g.add_legend(); # In[ ]: # In[ ]: