!mkdir mkdata

import requests, re, urllib.request
from bs4 import BeautifulSoup


def linkgrabber_MK(response):
    soup=BeautifulSoup(response.content)

    results=[]

    #Get the list of results
    resultsblock=soup.find('div',{'class':'content right'})

    for result in resultsblock.findAll('a',text=re.compile('^Council Spend Data.*')):
        csvurl=result.attrs['href']
        fn=result.attrs['href'].split('/')[-1]
        results.append((csvurl,fn))
    return results

def harvester(links,path='.'):
    for csvurl,fn in links:
        print('Grabbing {0} as {1}'.format(csvurl, fn))
        urllib.request.urlretrieve(csvurl.strip('/'), '/'.join([path,fn]))

url="http://www.milton-keynes.gov.uk/your-council-and-elections/council-information-and-accounts/data-performance-and-spending/Milton-Keynes-Council-spend-archive"
html2= requests.get(url)
cc=linkgrabber_MK(html2)
harvester(cc,'mkdata')

def linkgrabber_MK2(response):
    soup=BeautifulSoup(response.content)

    results=[]

    #Get the list of results
    resultsblock=soup.find('ul',{'class':'documents'})

    for result in resultsblock.findAll('a',href=re.compile('.*\.csv$')):
        csvurl=result.attrs['href']
        csvurl='http://www.milton-keynes.gov.uk/'+csvurl
        fn=result.attrs['href'].split('/')[-1]
        results.append((csvurl,fn))
    return results

url='http://www.milton-keynes.gov.uk/your-council-and-elections/council-information-and-accounts/data-performance-and-spending/milton-keynes-council-spend'
html2= requests.get(url)
cc=linkgrabber_MK2(html2)
harvester(cc,'mkdata')

!head mkdata/04-2014.csv

#derived from /usr/local/bin/chardetect.py
import os
from chardet.universaldetector import UniversalDetector
def sniffer(fname):
    file=open(fname, 'rb')
    u = UniversalDetector()
    for line in file:
        u.feed(line)
    u.close()
    result = u.result
    if result['encoding']:
        return (result['encoding'], result['confidence'])

for fname in os.listdir('mkdata'):
    if fname.endswith('csv'):
        print(fname,sniffer('mkdata/'+fname))

import pandas as pd

dd=pd.DataFrame()
for fname in os.listdir('mkdata/'):
    enc,cert=sniffer('mkdata/'+fname)
    dd=pd.concat([dd,pd.read_csv('mkdata/'+fname,encoding=enc)])

dd.to_csv('mkSpendingData.csv',index=False)

dd.dtypes

from dateutil.parser import *
dd['Date']=dd['Date'].apply(parse)

dd[:5]

from ggplot import *

dd.sort('Date',inplace=True)
dd['Cumul Amount'] = dd['Amount'].cumsum()

#Financial year models
#http://stackoverflow.com/a/26342880/454773
def getFiscalYear(dt):
    year = dt.year
    if dt.month<4: year -= 1
    return year

dd['Financial Year']= dd['Date'].apply(getFiscalYear)

dd['Cumul Amount Tax Year'] = dd.groupby('Financial Year')['Amount'].cumsum()

#I don't know (yet?!) how to cast an April-March tax year x-axis that would allow us to compare
# accumulated totals using differently coloured lines in the same chart.
# Also not that the incomplete 2014 tax year has an x-axis of fewer days than the other years...
ggplot(dd,aes(x='Date',y='Cumul Amount Tax Year'))+geom_line()+facet_wrap('Financial Year',scales="free_x")

pd.options.display.float_format = '{:20,.2f}'.format

servSpend=pd.pivot_table(dd, values='Amount', index=['Service area categorisation'], columns=['Financial Year'], aggfunc=np.sum)
servSpend

dd.groupby(['Service area categorisation'])['Financial Year'].unique()