#
# data source:
# https://raw.githubusercontent.com/datameet/india-election-data/master/affidavits/myneta.2014.csv
# https://github.com/datameet/india-election-data
#
import numpy as np
import pandas as pd
import matplotlib

%matplotlib inline

matplotlib.rc('figure', figsize=(10, 5))
matplotlib.rc('font', weight='bold', size=12)


fname=r"/Users/Sukhbinder/myneta2014.csv"


data =pd.read_csv(fname)
data.describe()

# No of party contesting elections
print "Total number of Parties:",np.size(data["Party"].unique())


# how many candidates per party only choosing parties that are known 
mydata= data[data["Party"].isin(["AAP","BJP","INC","IND","BSP","SP","JDU","CPI(M)"])]
p=mydata[["Party","Candidate"]].groupby(["Party"]).aggregate(lambda x: len(x.unique())).sort(["Candidate"],ascending=False)
print p
ip=p.plot(kind="bar",color=["lightgreen"],grid="off")

# Get education background
pv =pd.pivot_table(data,values=["Candidate"],aggfunc=lambda x: len(x.unique()),rows=["Education"])
pv=pv.sort(["Candidate"],ascending=False)

a=pv.plot(kind="bar",color=["lightgreen"],grid="off")

# Education Data by Party for knowm parties 
mydata= data[data["Party"].isin(["AAP","BJP","INC","BSP","SP","JDU","CPI(M)","IND"])]
pv =pd.pivot_table(mydata,values=["Candidate"],aggfunc=lambda x: len(x.unique()),rows=["Education"],cols=["Party"])
print pv.fillna(0)


#
# Top Ten Criminal Cases Registered Candidate (Top 10)
#
mydata = data[["Candidate","Constituency","Party","Criminal Cases"]]
print  mydata.sort('Criminal Cases', ascending=False).head(10)


#
#  Party with most criminals cases (Top 10)
# 
a=mydata.groupby("Party").aggregate(np.sum).sort("Criminal Cases",ascending=False).head(10).plot(kind='bar',color=["lightgreen"],grid="off")

#
# Party with most Total Assets (top 10)
#
mydata = data[["Candidate","Constituency","Party","Total Assets"]]
a=mydata.groupby("Party").aggregate(np.sum).sort("Total Assets",ascending=False).head(10).plot(kind='bar',color=["lightgreen"],grid="off")