import numpy as np import pandas as pd import matplotlib.pyplot as plt %matplotlib inline import seaborn as sns sns.set_palette("deep", desat=.6) sns.set_context(rc={"figure.figsize": (11, 6)}) import matplotlib as mpl mpl.rcParams['font.sans-serif'].insert(0, 'Arial') mpl.rcParams['font.sans-serif'].insert(0, 'Liberation Sans') mpl.rcParams['font.family'] = 'sans-serif' df = pd.read_csv('./ndata.csv') df['conference'].unique().size conf_year = df.groupby(['conference', 'year']) len(conf_year) df['gender'].describe() df['gender'].value_counts() # take only male and female, ignore the rest df = df[(df.gender == 'male') | (df.gender == 'female')] df['gender'].value_counts() gender_plot= df['gender'].value_counts().plot(kind = 'bar', title = 'Overall gender frequency in the collected data') plt.savefig('gender_plot.png', bbox_inches='tight') per_year = pd.crosstab(df['year'], df['gender']) per_year per_ear_plot = per_year.plot(kind='bar', stacked=True, title="Gender per year") plt.savefig(r'gender_per_year.png', bbox_inches='tight') per_year_perc = per_year.div(per_year.sum(axis = 1), axis = 0) per_ear_plot = per_year_perc.plot(kind='bar', stacked=True, title="Gender frequency per year") plt.savefig(r'gender_freq_per_year.png', bbox_inches='tight') conf_year = df.groupby(['conference', 'year']) conf_year['gender'].value_counts(normalize = True).to_dict() conf_data = df[(df.conference == 'Golden Gate Ruby Conference') ] conf_data_year = pd.crosstab(conf_data['year'], conf_data['gender']) conf_data_year.plot(kind = 'bar', stacked = True, title = 'Golden Gate Ruby Conference') conf_data = df[(df.conference == 'strangeloop.com') ] conf_data_year = pd.crosstab(conf_data['year'], conf_data['gender']) conf_data_year.plot(kind = 'bar', stacked = True, title = 'strangeloop.com') conf_data = df[(df.conference == 'PyCon US') ] conf_data_year = pd.crosstab(conf_data['year'], conf_data['gender']) conf_data_year.plot(kind = 'bar', stacked = True, title = 'PyCon US') conf_data = df[(df.conference == 'Cascadia Ruby') ] conf_data_year = pd.crosstab(conf_data['year'], conf_data['gender']) conf_data_year.plot(kind = 'bar', stacked = True, title = 'Cascadia Ruby') conf_data = df[(df.conference == 'Farmhouse Conf') ] conf_data_year = pd.crosstab(conf_data['year'], conf_data['gender']) conf_data_year.plot(kind = 'bar', stacked = True, title = 'Farmhouse Conf')