import pandas as pd data = pd.read_csv('karnataka-2013-results.csv') data.head() pd.set_option('display.line_width', 200) pd.set_option('display.max_columns', 20) data['Votes'].sum() data[data['Votes'] == data['Votes'].max()] data['Party'].value_counts() data['Rank'] = data.groupby('District')['Votes'].rank(ascending=False) data.head() data[data['Rank'] == 1]['Party'].value_counts() data[data['Rank'] == 1].set_index('District').head() rank1 = data[data['Rank'] == 1].set_index('District') rank2 = data[data['Rank'] == 2].set_index('District') pd.merge(rank1, rank2, left_index=True, right_index=True, suffixes=['1', '2']).head() margins = pd.merge(rank1, rank2, left_index=True, right_index=True, suffixes=['1', '2']) margins['Margin'] = margins['Votes1'] - margins['Votes2'] margins.sort('Margin', ascending=False).head() margins['Votes'] = data.groupby('District')['Votes'].sum() margins['% Margin'] = margins['Margin'].astype(float) / margins['Votes'] margins.sort('% Margin', ascending=False).head() margins[margins['Votes1'].astype(float) / margins['Votes'] > 0.5].head() margins[margins['Votes1'].astype(float) / margins['Votes'] > 0.5]['Party1'].value_counts() majority = pd.DataFrame({ 'Absolute majority': margins[margins['Votes1'].astype(float) / margins['Votes'] > 0.5]['Party1'].value_counts(), 'Winning party': data[data['Rank'] == 1]['Party'].value_counts(), }) majority['Ratio'] = majority['Absolute majority'] / majority['Winning party'] majority.dropna().sort('Ratio', ascending=False) data['District'].apply(lambda district: margins.ix[district]).head() data['% votes'] = data['Votes'].astype(float) / data['District'].apply(lambda district: margins['Votes'][district]) data.head() sum(data['% votes'] < 1. / 6) len(data) data[data['% votes'] < 1. / 6].sort('% votes').head(50)