#!/usr/bin/env python
# coding: utf-8

# In[30]:


import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
get_ipython().run_line_magic('matplotlib', 'inline')


# In[31]:


tips = sns.load_dataset("tips")
tips.head()


# In[42]:


tips_counts = tips.groupby(['sex', 'time']).size()
tips_counts = tips_counts.reset_index()
tips_counts = tips_counts.rename(columns={0:'n'})
tips_counts.head()


# In[43]:


tips_counts['percentage'] = tips_counts.groupby(['sex'], as_index=False, group_keys=False).apply(lambda x: x['n']/x['n'].sum())
tips_counts.head()


# Force categorical type, otherwise the order of categories to `factorplot` and `annotate_bars` are inconsistent, since `seaborn` creates a categorical type under the hood.

# In[59]:


tips_counts.sex = pd.Categorical(tips_counts.sex, categories=['Male', "Female"], ordered=True)
tips_counts.time = pd.Categorical(tips_counts.time, categories=['Lunch', "Dinner"], ordered=True)


# Make a factorplot and annotate it.

# In[66]:


def annotate_bars(x, y, x_groupby, hue_groupby, height_col, count_col, **kwargs):
    data = kwargs.pop('data')
    ax = plt.gca()
    n_hues = len(data.groupby(hue_groupby).size())
    width = 0.8/n_hues
    x_base = -.5 - width/2.5
    for group1, df1 in data.groupby(x_groupby):
        i = 0
        for group2, df2 in df1.groupby(hue_groupby):
            i += 1
            x_position = x_base + width*i + width/4
            y_position = df2[height_col]
            ax.annotate(str(df2[count_col].values[0]), (x_position, y_position),
                        textcoords='offset points', xytext=(0, 2),
                        ha='center', va='bottom', fontsize=12)
        x_base += 1

    
g = sns.factorplot(x='sex', y='percentage', hue='time', 
                   kind='bar', data=tips_counts, legend=False, ci=None)
g.map_dataframe(annotate_bars, 'sex', 'percentage', x_groupby='sex', 
                hue_groupby='time', height_col='percentage', count_col='n')
# g.add_legend();


# In[ ]:


# In[ ]: