import pandas as pd #This line is to make float division the default. For int division use // from __future__ import division path = "data/2014_indian_election_turnout.csv" csv_data = pd.read_csv(path) csv_data csv_data["Male Turnout"] = csv_data["Male Voters"] / csv_data["Male Electors"] csv_data["Female Turnout"] = csv_data["Female Voters"] / csv_data["Female Electors"] csv_data["Total Turnout"] = csv_data["Total Voters"] / csv_data["Total Electors"] csv_data.describe() print '{0:,}'.format(csv_data["Total Electors"].sum()) print '%.2f%%' % (csv_data["Female Electors"].sum() / csv_data["Male Electors"].sum()*100) print '{0:,}'.format(csv_data["Total Voters"].sum()) print '%.2f%%' % (csv_data["Total Voters"].sum() / csv_data["Total Electors"].sum() * 100) states = pd.DataFrame(csv_data["State"].unique(), columns=["State"]) print "The number of states: %s" % len(states) states # Calculating constituencies seats = csv_data["State"].value_counts() states["Seats"] = seats.sort_index().values states.sort("Seats", ascending=False) total_electors = pd.pivot_table(csv_data, values="Total Electors", rows=["State"], aggfunc="sum") total_votes = pd.pivot_table(csv_data, values="Total Voters", rows=["State"], aggfunc="sum") states["Total Electors"] = total_electors.sort_index().values states["Total Voters"] = total_votes.sort_index().values states.sort("Total Electors", ascending=False) states["Turnout"] = states["Total Voters"] / states["Total Electors"] states.sort("Turnout", ascending=False) states["Electors Per Seat"] = states["Total Electors"] / states["Seats"] states.sort("Electors Per Seat", ascending=False) path = "data/2014_indian_elections_results.csv" results_data = pd.read_csv(path, sep="\t") results_data Analyzing The results winners = results_data[results_data["Winner or Not?"] == "yes"].sort("Name of State/ UT") winners csv_data["Candidate Name"] = winners["Candidate Name"].values csv_data["Party Abbreviation"] = winners["Party Abbreviation"].values csv_data["Party Name"] = winners["Party Name"].values csv_data["Total Votes Polled"] = winners["Total Votes Polled"].values csv_data csv_data["Winner Percentage"] = csv_data["Total Votes Polled"] / csv_data["Total Voters"] csv_data.sort("Winner Percentage", ascending=False) results = pd.DataFrame(csv_data["Party Abbreviation"].value_counts(), columns=["Seats"]) results["Seats Percentage"] = results["Seats"] / len(winners) results all_votes = pd.pivot_table(results_data, values="Total Votes Polled", rows="Party Abbreviation", aggfunc="sum") all_votes_average = pd.pivot_table(results_data, values="Total Votes Polled", rows="Party Abbreviation", aggfunc="mean") get_total_votes = lambda x: all_votes[x] get_average_votes = lambda x: all_votes_average[x] results["Total Votes"] = pd.Series(results.index.values).apply(get_total_votes).values results["Average Votes"] = pd.Series(results.index.values).apply(get_average_votes).values results all_winning_votes = pd.pivot_table(csv_data, values="Total Votes Polled", rows="Party Abbreviation", aggfunc="sum") all_winning_votes_average = pd.pivot_table(csv_data, values="Total Votes Polled", rows="Party Abbreviation", aggfunc="mean") all_winning_votes_percentages = pd.pivot_table(csv_data, values="Winner Percentage", rows="Party Abbreviation", aggfunc="mean") average_winning_elecors = pd.pivot_table(csv_data, values="Total Electors", rows="Party Abbreviation", aggfunc="mean") get_total_winning_votes = lambda x: all_winning_votes[x] get_average_winning_votes = lambda x: all_winning_votes_average[x] get_average_winning_percentages = lambda x: all_winning_votes_percentages[x] get_average_winning_elecors = lambda x: average_winning_elecors[x] results["Total Winning Votes"] = pd.Series(results.index.values).apply(get_total_winning_votes).values results["Average Winning Votes"] = pd.Series(results.index.values).apply(get_average_winning_votes).values results["Average Winning Percentage"] = pd.Series(results.index.values).apply(get_average_winning_percentages).values results["Average Winning Electors"] = pd.Series(results.index.values).apply(get_average_winning_elecors).values results["Loosing Votes"] = results["Total Votes"] - results["Total Winning Votes"] results["Winning Votes Ratio"] = results["Total Winning Votes"] / results["Total Votes"] results results.describe() import matplotlib.pyplot as plt plt.figure(figsize=(18,8)) cmap = plt.cm.hsv colors = cmap(np.linspace(0., 1., len(results["Seats"]))) plt.pie(results["Seats"], labels=results.index, explode=np.array(range(len(results)))/10, autopct='%1.1f%%', colors = colors) plt.axis("equal") plt.title("Indian general election 2014 results of seats per party") plt.show(); plt.figure(figsize=(18,8)) plt.pie(results.sort("Total Votes", ascending=False)["Total Votes"], labels=results.sort("Total Votes", ascending=False).index, explode=np.array(range(len(results)))**1.5/40, autopct='%1.1f%%', colors = colors) plt.axis("equal") plt.title("Indian general election 2014 results of votes per party") plt.show(); plt.figure(figsize=(18,10)) for counter in range(len(results)): x = results["Total Votes"][counter] y = results["Seats"][counter] label = "%s\n%s" % (results.index[counter], y) plt.scatter(x,y, c = colors[counter], s=y/x * 1e9, label =results.index[counter]) plt.annotate(label, xy = (x, y), xytext = (-20, 20), textcoords = 'offset points', ha = 'right', va = 'bottom', bbox = dict(boxstyle = 'round,pad=0.5', fc = 'yellow', alpha = 0.5), arrowprops = dict(arrowstyle = '->', connectionstyle = 'arc3,rad=0')) plt.xscale("log") plt.yscale("log") plt.xlim(results["Total Votes"].min()*0.95, results["Total Votes"].max()*1.05) plt.ylim(0) plt.xlabel("Total Votes (log)") plt.ylabel("Seats (log)") plt.title("Indian General Elections 2014 Seats vs Total Votes on a log scale") plt.grid() plt.show(); print results