#
# data source:
# https://raw.githubusercontent.com/datameet/india-election-data/master/affidavits/myneta.2014.csv
# https://github.com/datameet/india-election-data
#
import numpy as np
import pandas as pd
import matplotlib
%matplotlib inline
matplotlib.rc('figure', figsize=(10, 5))
matplotlib.rc('font', weight='bold', size=12)
fname=r"/Users/Sukhbinder/myneta2014.csv"
data =pd.read_csv(fname)
data.describe()
Criminal Cases | ID | Sno | Total Assets | Total Liabilities | Year | PC_CODE | |
---|---|---|---|---|---|---|---|
count | 8163.000000 | 8163.000000 | 8163.000000 | 8.163000e+03 | 8.163000e+03 | 8163 | 8163.000000 |
mean | 0.544653 | 4931.800196 | 4082.000000 | 4.931083e+07 | 3.987124e+06 | 2014 | 19.544898 |
std | 6.224428 | 2830.910900 | 2356.599457 | 9.335788e+08 | 4.180161e+07 | 0 | 17.105895 |
min | 0.000000 | 3.000000 | 1.000000 | 0.000000e+00 | 0.000000e+00 | 2014 | 1.000000 |
25% | 0.000000 | 2530.500000 | 2041.500000 | 4.422500e+05 | 0.000000e+00 | 2014 | 7.000000 |
50% | 0.000000 | 4857.000000 | 4082.000000 | 2.425000e+06 | 0.000000e+00 | 2014 | 15.000000 |
75% | 0.000000 | 7410.500000 | 6122.500000 | 1.150914e+07 | 4.000000e+05 | 2014 | 28.000000 |
max | 382.000000 | 9828.000000 | 8163.000000 | 7.710296e+10 | 2.218003e+09 | 2014 | 80.000000 |
8 rows × 7 columns
# No of party contesting elections
print "Total number of Parties:",np.size(data["Party"].unique())
Total number of Parties: 491
# how many candidates per party only choosing parties that are known
mydata= data[data["Party"].isin(["AAP","BJP","INC","IND","BSP","SP","JDU","CPI(M)"])]
p=mydata[["Party","Candidate"]].groupby(["Party"]).aggregate(lambda x: len(x.unique())).sort(["Candidate"],ascending=False)
print p
ip=p.plot(kind="bar",color=["lightgreen"],grid="off")
Candidate Party IND 3111 BSP 496 INC 461 AAP 425 BJP 423 SP 194 CPI(M) 93 [7 rows x 1 columns]
# Get education background
pv =pd.pivot_table(data,values=["Candidate"],aggfunc=lambda x: len(x.unique()),rows=["Education"])
pv=pv.sort(["Candidate"],ascending=False)
a=pv.plot(kind="bar",color=["lightgreen"],grid="off")
# Education Data by Party for knowm parties
mydata= data[data["Party"].isin(["AAP","BJP","INC","BSP","SP","JDU","CPI(M)","IND"])]
pv =pd.pivot_table(mydata,values=["Candidate"],aggfunc=lambda x: len(x.unique()),rows=["Education"],cols=["Party"])
print pv.fillna(0)
Candidate Party AAP BJP BSP CPI(M) INC IND SP Education 10th Pass 33 43 79 8 23 599 16 12th Pass 33 43 78 11 45 481 29 5th Pass 2 6 20 1 3 246 11 8th Pass 7 8 43 3 7 360 13 Doctorate 25 30 11 6 33 42 5 Graduate 81 106 82 23 114 480 48 Graduate Professional 76 82 58 16 90 253 22 Illiterate 0 0 6 0 0 63 1 Literate 2 3 17 1 2 150 5 Not Given 5 0 0 0 1 61 3 Others 16 7 14 1 5 71 2 Post Graduate 146 97 91 23 139 363 39 [12 rows x 7 columns]
#
# Top Ten Criminal Cases Registered Candidate (Top 10)
#
mydata = data[["Candidate","Constituency","Party","Criminal Cases"]]
print mydata.sort('Criminal Cases', ascending=False).head(10)
Candidate Constituency Party Criminal Cases 7686 Udayakumar S.P. KANNIYAKUMARI AAP 382 3669 M.Pushparayan THOOTHUKKUDI AAP 380 7122 Sridip Bhattacharya HOWRAH CPI(M) 57 3136 Kameshwar Baitha PALAMAU AITC 48 738 Atiq Ahmad SHRAWASTI SP 42 5041 Pawan Pandey SULTANPUR BSP 32 4646 Nepal Mahata PURULIA INC 25 5591 Rajesh Ranjan Urf Pappu Yadav MADHEPURA RJD 24 1508 Dadan Yadav BUXAR BSP 22 360 Amrishbhai Rasiklal Patel DHULE INC 21 [10 rows x 4 columns]
#
# Party with most criminals cases (Top 10)
#
a=mydata.groupby("Party").aggregate(np.sum).sort("Criminal Cases",ascending=False).head(10).plot(kind='bar',color=["lightgreen"],grid="off")
#
# Party with most Total Assets (top 10)
#
mydata = data[["Candidate","Constituency","Party","Total Assets"]]
a=mydata.groupby("Party").aggregate(np.sum).sort("Total Assets",ascending=False).head(10).plot(kind='bar',color=["lightgreen"],grid="off")