# # data source: # https://raw.githubusercontent.com/datameet/india-election-data/master/affidavits/myneta.2014.csv # https://github.com/datameet/india-election-data # import numpy as np import pandas as pd import matplotlib %matplotlib inline matplotlib.rc('figure', figsize=(10, 5)) matplotlib.rc('font', weight='bold', size=12) fname=r"/Users/Sukhbinder/myneta2014.csv" data =pd.read_csv(fname) data.describe() # No of party contesting elections print "Total number of Parties:",np.size(data["Party"].unique()) # how many candidates per party only choosing parties that are known mydata= data[data["Party"].isin(["AAP","BJP","INC","IND","BSP","SP","JDU","CPI(M)"])] p=mydata[["Party","Candidate"]].groupby(["Party"]).aggregate(lambda x: len(x.unique())).sort(["Candidate"],ascending=False) print p ip=p.plot(kind="bar",color=["lightgreen"],grid="off") # Get education background pv =pd.pivot_table(data,values=["Candidate"],aggfunc=lambda x: len(x.unique()),rows=["Education"]) pv=pv.sort(["Candidate"],ascending=False) a=pv.plot(kind="bar",color=["lightgreen"],grid="off") # Education Data by Party for knowm parties mydata= data[data["Party"].isin(["AAP","BJP","INC","BSP","SP","JDU","CPI(M)","IND"])] pv =pd.pivot_table(mydata,values=["Candidate"],aggfunc=lambda x: len(x.unique()),rows=["Education"],cols=["Party"]) print pv.fillna(0) # # Top Ten Criminal Cases Registered Candidate (Top 10) # mydata = data[["Candidate","Constituency","Party","Criminal Cases"]] print mydata.sort('Criminal Cases', ascending=False).head(10) # # Party with most criminals cases (Top 10) # a=mydata.groupby("Party").aggregate(np.sum).sort("Criminal Cases",ascending=False).head(10).plot(kind='bar',color=["lightgreen"],grid="off") # # Party with most Total Assets (top 10) # mydata = data[["Candidate","Constituency","Party","Total Assets"]] a=mydata.groupby("Party").aggregate(np.sum).sort("Total Assets",ascending=False).head(10).plot(kind='bar',color=["lightgreen"],grid="off")