# Good practice 1: 
# - never do from module import *
# => always name things

# requests is a very nice http lib
import requests
from requests import get

# Get subventions data
r = get('http://www.regardscitoyens.org/wp-content/uploads/reserve-parlementaire-2011-parletdep.csv')
with open('../data/reserve-parlementaire-2011-parletdep.csv', 'w') as f:
    f.write(r.text.encode('utf-8'))

from bs4 import BeautifulSoup

r = get('http://www.nosdeputes.fr/synthese')
soup = BeautifulSoup(r.text)

# Scrape: select with css attributes
table = soup.find('div', class_='tableau_synthese').table

activity_data = []
for tr in table.find_all('tr'):
    tds = tr.find_all('td')[:2]
    activity_data.append({'name': tds[0].text, 'weeks': int(tds[1].text)})

activity_data[1]

import json

import pandas as pd

from fiona import collection

from matplotlib.ticker import Formatter, FixedLocator

# Import GEOFLA data
geofla_filepath = '../data/COMMUNES/COMMUNE_4326.SHP'

# Set a simple function to extract only useful data
def extract_data(com):
    properties = com['properties']
    return {
        'id': com['id'],
        'insee_code': properties['INSEE_COM'],
        'name': properties['NOM_COMM'],
        'population': properties['POPULATION'] * 1000,
        'POPULATION': properties['POPULATION'],
        'department': properties['NOM_DEPT'],
        'code_dep': properties['CODE_DEPT'],
    }

with collection(geofla_filepath) as communes:
    # DataFrame are like excel tables but far better
    com_data = pd.DataFrame([extract_data(com) for com in communes])

com_data.head()

# Export to geojson
with collection(geofla_filepath) as communes:
    data = []
    for feat in communes:
        feat['type'] = 'Feature'
        data.append(feat)
        
    my_layer = {
        "type": "FeatureCollection",
        "features": [com for com in data if com['properties']['POPULATION'] > 1]
    }
    with open("my_layer.json", "w") as f:
        f.write(json.dumps(my_layer))

#
# XXX: Test folium pacakge to make a chloropleth map with d3.js
# XXX: Works only with PYTHON 2.7
#


import folium

map_2 = folium.Map(location=[46.907, 1.662], zoom_start=6)
map_2.geo_json(geo_path="my_layer.json", data_out='data2.json', data=com_data[com_data['POPULATION']>1],
               columns=['id', 'POPULATION'],
               key_on='feature.id',
               fill_color='BuPu', fill_opacity=0.9, line_opacity=0.4,
               legend_name='POPULATION')
map_2.create_map(path='map_population.html')

import codecs
reserve_file = codecs.open("../data/reserve-parlementaire-2011-parletdep.csv", encoding='utf-8')

res_data = pd.io.parsers.read_csv(reserve_file, sep=';')
# print columns
res_data.keys()

res_data[['Département', 'Subvention allouée', 'Parlementaire transmetteur', 'Bénéficiaire']].head()

res_data['Groupe politique du parlementaire'].unique()

fig, ax = subplots(figsize=(8,6))
sorted_sub = res_data['Subvention allouée'].copy()
sorted_sub.sort()
id_max = int(sorted_sub.size * 0.9) + 1
ax.plot(sorted_sub[:id_max], pd.np.arange(id_max) * 1. / id_max)
ax.grid(True)
ax.set_title(u"Répartition des subventions")


print """Le nombre de commune ayant reçu des subvention est de %s
sur %s communes au total."""%(res_data['Bénéficiaire'].unique().size, com_data.shape[0])

gp = res_data.groupby(['Bénéficiaire', 'Département'])

agg = gp.agg({'Subvention allouée' : np.sum})
agg = agg.sort(column='Subvention allouée', ascending=False)
agg.head(n=10)

fig, ax = subplots(figsize=(8,6))
agg_cumsum = agg.cumsum()
total_sub = agg.sum()
quantiles = pd.np.arange(0.01, 1, 0.01)
ax.plot(quantiles, [agg_cumsum.quantile(q)/total_sub for q in quantiles])
ax.grid(True)
ax.set_title(u"Répartition des subventions par bénéficaire")
ax.set_xlabel(u'% des bénéficiaires ayant reçu une subvention')
ax.set_ylabel(u'% de la réserve parlementaire allouée')


second_tour = pd.ExcelFile('../data/municipales_2008_grosses_communes.xls').parse('Tour 2')

second_tour.columns[:15]

# Get columns of votes pct and parties
votes_pct = second_tour.filter(regex='\% Voix\/Ins(|\.\d{1,2})')
parties = second_tour.filter(regex='Code Nuance(|\.\d{1,2})')

# Get the winner id
parties['winner_id'] = votes_pct.apply(lambda row: row.dropna().argmax(), axis=1)

# And the winner party!
winner_parties = parties.apply(lambda row: row.ix[row['winner_id']], axis=1)

second_tour['winner_party'] = winner_parties

print winner_parties.unique()

# join only data of interest
data = pd.DataFrame({'code_dep': res_data['Département'],
                     'name': res_data['Bénéficiaire'],
                     'subvention': res_data['Subvention allouée'],
                     'politician': res_data['Parlementaire transmetteur'],
                     'party': res_data['Groupe politique du parlementaire']})

com_data.head()

# Join with municipal data
# XXX: this does not work for DOM cities
def insee_code(row):
    try:
        com_code = "%03d"%int(row[u'Code de la commune'])
    except:
        com_code = ("00%s"%row[u'Code de la commune'])[-3:]
    try:
        dep_code = "%02d"%int(row[u'Code du département'])
    except:
        dep_code = ("0%s"%row[u'Code du département'])[-2:]
    return "%s%s"%(dep_code, com_code)

second_tour['insee_code'] = second_tour.apply(insee_code, axis=1)
join_data_2 = join_data.merge(second_tour[['insee_code', 'winner_party']], on=['insee_code'])

# Join with geofla data
join_data = data.merge(com_data, on=['code_dep', 'name'])

second_tour[[u'Code du département', u'Code de la commune', 'insee_code']].head(n=10)

join_data_2[['subvention', 'party', 'code_dep', 'population']].head()

# Merge policitical party
second_tour['winner_party'].unique()

print res_data['Groupe politique du parlementaire'].unique()

# To simplify, we guess that have 4 parties: EG, G, C, D, ED 
municipal_mapping = {'LUG': 'G', 'LDVD': 'D', 'LMAJ': 'D',
                     'LVEC': 'G', 'LMC': 'D', 'LSOC': 'G',
                     'LGC': '?', 'LCOM': 'EG', 'LAUT': '?',
                     'LREG': '?'}

party_mapping = {'UMP': 'D', 'UDI': 'C', 'NI': '?', 'SRC': '?',
                 'SOC': 'G', 'SOCV': 'G', 'RDSE': 'G', 'GDR': 'D',
                 'UC': 'C', 'CRC-SPG': '?', 'NC': '?', 'CRC': '?',
                 'ECO': '?'}