import pandas as pd
import re
pd.set_option('display.mpl_style', 'default')
font = {'family' : 'sans',
        'weight' : 'normal',
        'size'   : 11}
matplotlib.rc('font', **font)
matplotlib.rc('xtick', labelsize=12) 
matplotlib.rc('ytick', labelsize=12)

def get_questions(df):
    return df.columns[:-2]

def get_answer_ratios(df):
    questions = get_questions(df)
    ratios = pd.concat([df[col].value_counts() for col in questions], axis=1)
    ratios.columns = questions
    ratios = ratios.T
    sums = ratios.sum(axis=1).astype(float)
    return ratios['gender neutral'] / sums

def create_summary(everyone):
    women = everyone[everyone['Do you identify as a woman?'] == 'Yes']
    men = everyone[everyone['Do you identify as a woman?'] == 'No']
    print "Number of women:", len(women)
    print "Number of men:", len(men)
    summary = pd.concat([get_answer_ratios(everyone), get_answer_ratios(men), get_answer_ratios(women)], axis=1)
    summary.columns = ['Everyone', 'Men + Other', 'Women']
    return men, women, summary

everyone = pd.read_csv('./guys-guys-guys-no-email.csv', parse_dates=True, index_col='Timestamp')

men, women, summary = create_summary(everyone)

def draw_figure(summary, question):
    figure()
    figsize(9,3)
    fig, axes = subplots(1,3)
    for i, name in enumerate(summary.columns):
        ax = axes[i]
        ax.set_title(name)
        percent = summary.ix[question][name]
        percent_label = int(round(percent * 100))
        labels = [str(percent_label) + '%', str(100 - percent_label) + '%']
        ax.pie([percent, 1-percent], colors=['white', 'lightblue'], labels=labels)
    return fig

def make_filename(question, extension="png"):
    unwanted_re = re.compile("[!.']")
    fname = question.lower().replace(' ', '-')
    fname = unwanted_re.sub("", fname)
    return "images/" + fname + "." + extension

everyone['Do you identify as a woman?'].value_counts().plot(kind='bar', 
                                                            rot=0, 
                                                            title="Do you identify as a woman?")

for question in summary.index:
    print question
    fig = draw_figure(summary, question)
    fig.savefig(make_filename(question))

men, women, summary = create_summary(everyone)

# Compute binomial confidence interval
lengths = pd.Series([len(everyone), len(men), len(women)], index=summary.columns)
print lengths
error = np.sqrt(summary * (1-summary) / lengths)

# Scale everything up for graphing
error = error * 100
summary = summary * 100

font = {'family' : 'sans',
        'weight' : 'normal',
        'size'   : 25}
matplotlib.rc('font', **font)
matplotlib.rc('xtick', labelsize=30)
def plot_histogram(summmary):
    summary = summmary.copy()
    questions= ["Hey guys! I \njust saw that a \npenguin escaped.",
     "I'm going out with \nthe guys. You can deal \nwith the penguins.",
     "We're going to need to \nhire a Python guy to \ndeal with our \npenguin problems.",
     "This would never have \nhappened if the \nJava guys were here.",
     "That penguin is \nsuch a good guy",
     "Those guys dealt \nwith the penguin \nemergency so \nprofessionally.",
     "I met a great Erlang guy \nthe other day who \nknows how to deal with\nthorny penguin issues"]
    summary.index = questions
    # Reverse the summary
    summary = summary.ix[reversed(questions)]
    return summary[['Women', 'Men + Other']].plot(kind='barh', 
                                           figsize=(20, 20), 
                                           rot=0, 
                                           title="What percentage of people think this usage is gender-neutral", 
                                           colors = ['#466fd5', '#ff7400'],
                                           xlim=(0, 100))
fig = plot_histogram(summary)

plot_histogram(error)