import pandas as pd xl=pd.ExcelFile('offending-history-tables.xls') xl.sheet_names def getTable(tname): dfx=xl.parse(tname,skiprows=4,thousands=',',index_col=0,na_values=['*','..']) #Need to identify group by empty line then back fill (fillna, ffill) up from last row in group? areas=['North East','North West','Yorkshire and the Humber','East Midlands','West Midlands', 'East of England','London','South East','South West','England','Wales','England & Wales'] dfx.dropna(how='all',axis=0,inplace=True) dfx['area']=dfx.index.values dfx['area'][~dfx['area'].isin(areas)]=None dfx['area'].fillna(method='backfill',inplace=True) dfx=dfx.rename(columns={'Unnamed: 0':'Local Authority'})[1:]#.set_index('Local Authority') metadata=xl.parse(tname,header=None)[0].iloc[0] return metadata,dfx metadata,dfx=getTable('Table Q6c') metadata dfx[:3] dfx.ix['Isle of Wight'] dfx.ix['South East'] #dfx.index.values metadata2,dfx2=getTable('Table Q6d') metadata2 dfx2.dtypes dfx2[:3] dfx2.ix['Isle of Wight'] dfx2.ix['South East'] dfx2.ix['England & Wales'][2014] import random #Library to support natural language text generation #!pip3 install git+https://github.com/pwdyson/inflect.py import inflect p = inflect.engine() def thous_sep(amount): return '{:,}'.format(amount) def otwRiseFall(now,then,amount=False,intify=False,nformat=None): if intify: now =int(now) then=int(then) delta=now-then if delta>0: txt=p.a(random.choice(['rise','increase'])) elif delta<0: txt=p.a(random.choice(['fall','decrease'])) if amount: if nformat is not None: #"{:,.1f}" val=nformat.format(abs(delta)) else: val=thous_sep(abs(delta)) txt+=' of {0}'.format(val) return txt def reporter(la,area,nation,year): metadata,dfx=getTable('Table Q6c') num="{:,}".format(int(dfx.ix[la][year])) txt="According to figures recently released by the Ministry of Justice" tableyear="for the 12 months ending December {year}".format(year=year) tablestat="the number of juvenile first time entrants to the criminal justice system residing in {LA}".format(LA=la) tablenum="was {num}".format(num=num) metadata2,dfx2=getTable('Table Q6d') txt2="The rate of juveniles receiving their first the youth caution or conviction per 100,000 of the 10-17 year old population by Local Authority of residence" num2="{:,.1f}".format(dfx2.ix[area][year]) num3="{:,.1f}".format(dfx2.ix[nation][year]) num4="{:,.1f}".format(dfx2.ix[la][year]) rate="was {num4}".format(num4=num4) areastat="This compares with a rate of {num2} in {area} and {num3} in {country}".format(num2=num2, num3=num3, area=area, country=nation) numLastyear="{:,}".format(int(dfx.ix[la][year-1])) rateLastyear="{:,.1f}".format(dfx2.ix[la][year-1]) txt3="Last year, (in {lastyear}), the figure was {numLastyear}, a rate of {rateLastyear} per 100,000".format(lastyear=year-1, numLastyear=numLastyear, rateLastyear=rateLastyear) differ=otwRiseFall(dfx.ix[la][year],dfx.ix[la][year-1],True,True) differ2=otwRiseFall(dfx2.ix[la][year],dfx2.ix[la][year-1],True,nformat="{:,.1f}") txt3diff="This year's numbers in {la} thus represent {differ}, and {differ2} per 100,000 on the rate.".format(la=la, differ=differ, differ2=differ2) tableOfficialCaveat="Since 8th April 2013 there have been a number of changes in out of court disposals. The previously known reprimand and warning disposal categories for juveniles have been replaced with a new out of court disposal: The Youth Caution for young offenders." tableOfficialCaveat=tableOfficialCaveat+" The figures are also estimated figures. Juveniles receiving disposals for the first time have been mapped to individual Local Authorities using the home address or postcode recorded by the police on the Police National Computer. For those with no address recorded, a model based on the patterns of offenders dealt with by police stations has been used to allocate offenders to Local Authorities. Therefore caution must be taken when using these figures." rawvals='{txt}, {tableyear} {tablestat} {tablenum}.'.format(txt=txt, tableyear=tableyear, tablestat=tablestat, tablenum=tablenum) ratevals='{txt2} {rate}. {areastat}.'.format(txt2=txt2, rate=rate, areastat=areastat) previously='{txt3}, compared with {lr} in the {r} and {ln} in {n}. {txt3diff}'.format(txt3=txt3, lr="{:,.1f}".format(dfx2.ix[area][year-1]), r=area, ln="{:,.1f}".format(dfx2.ix[nation][year-1]), n=nation, txt3diff=txt3diff) print('{rawvals} {ratevals} {previously}\n\n({caveat})'.format(rawvals=rawvals, ratevals=ratevals, previously=previously, caveat=tableOfficialCaveat)) #THe official reported rates per 100,000 of juveniles in an LA is not the best basis, given that LAs # are unlikely to have more than one or two tens of thousands people in that age range? reporter('Isle of Wight','South East','England',2014) #Might also make sense to contextualise further eg by population estimate of number of 10-17 year olds? #Tabular view def table_percentChanges(area,region,nation): dfxiw=dfx.loc[[area,region,nation]][[2014,2013]].rename(columns={2014:'2014',2013:'2013'}) dfx2iw=dfx2.loc[[area,region,nation]][[2014,2013]].rename(columns={2014:"2014 (rate per 100,000)", 2013:"2013 (rate per 100,000)"}) pdm=pd.merge(dfxiw,dfx2iw,left_index=True,right_index=True) pdm['% change in est. count, 2013-14']=100*(pdm['2014']-pdm['2013'])/pdm['2013'] pdm['% change in rate, 2013-14']=100*(pdm['2014 (rate per 100,000)']-pdm['2013 (rate per 100,000)'])/pdm['2013 (rate per 100,000)'] pdm['Area % vs (est. rate), 2014']=pdm['2014 (rate per 100,000)'].apply(lambda x: 100+100*(pdm.ix[area]['2014 (rate per 100,000)']-x)/x) pdm['Area % vs (est. rate), 2013']=pdm['2013 (rate per 100,000)'].apply(lambda x: 100+100*(pdm.ix[area]['2013 (rate per 100,000)']-x)/x) return pdm table_percentChanges('Isle of Wight','South East','England') #Make comparisons with extremes, such as places with highest and lowest rates? def reporter_extreme(df,col,typ='lowest',inarea=None): if inarea is not None: df=df[df['area']==inarea] if typ=='lowest': return df[df[col]==min(df[col])] return df[df[col]==max(df[col])] reporter_extreme(dfx2,2014,typ='highest') reporter_extreme(dfx2,2014,'lowest') reporter_extreme(dfx2,2014,typ='highest',inarea='South East') reporter_extreme(dfx2,2014,typ='lowest',inarea='South East') #Some example charts? % matplotlib inline import seaborn reporter_extreme(dfx2,2014,'lowest').plot(kind='bar') dfx2.ix['Isle of Wight'].plot(kind='bar') #As well as displaying the image, we can save it as an image file in its own right dfx2.ix['Isle of Wight'].plot(kind='bar').get_figure().savefig('test.png') dfx2.ix[['Isle of Wight','South East','England']].T.plot(kind='bar') dfx2.ix[['Isle of Wight','South East','England']].plot(kind='bar')