import pandas as pd
xl=pd.ExcelFile('offending-history-tables.xls')
xl.sheet_names
[u'Index', u'Table Q6.1', u'Table Q6.2', u'Table Q6.3', u'Table Q6.4', u'Table Q6.5', u'Table Q6.6', u'Table Q6a', u'Table Q6b', u'Table Q6c', u'Table Q6d', u'Table Q6e', u'Table Q6f', u'Table Q6g', u'Table Q6h', u'Table Q6i', u'Table A6.1', u'Table A6.2', u'Table A6.3', u'Table A6.4', u'Table A6.5', u'Table A6.6', u'Table A6.7', u'Table A6.8', u'Table A6.9', u'Table B6.1']
def getTable(tname):
dfx=xl.parse(tname,skiprows=4,thousands=',',index_col=0,na_values=['*','..'])
#Need to identify group by empty line then back fill (fillna, ffill) up from last row in group?
dfx.dropna(how='all',axis=0,inplace=True)
dfx=dfx.rename(columns={'Unnamed: 0':'Local Authority'})[1:]#.set_index('Local Authority')
metadata=xl.parse(tname,header=None)[0].iloc[0]
return metadata,dfx
metadata,dfx=getTable('Table Q6c')
metadata
u'Table Q6c - Number of juvenile first time entrants(1)(2) residing in England and Wales to the criminal justice system by Local Authority of residence, 12 months ending December 2004 to 12 months ending December 2014(3)'
dfx[:3]
2004 | 2005 | 2006 | 2007 | 2008 | 2009 | 2010 | 2011 | 2012 | 2013 | 2014 | |
---|---|---|---|---|---|---|---|---|---|---|---|
Durham | 885.029775 | 1105.851352 | 1246.063017 | 1367.517002 | 789.442116 | 389.411466 | 329.345411 | 299.100553 | 255.440134 | 210.651859 | 211.874064 |
Gateshead | 458.699695 | 484.740686 | 522.878797 | 529.026095 | 350.179998 | 309.260206 | 156.601603 | 118.111820 | 102.448491 | 93.324604 | 81.557813 |
Hartlepool | 208.588413 | 227.585046 | 282.106145 | 280.229630 | 200.226178 | 163.889427 | 95.765080 | 95.638256 | 60.028491 | 60.644453 | 41.081243 |
dfx.ix['Isle of Wight']
2004 288.775068 2005 354.839327 2006 308.111447 2007 295.057875 2008 213.333412 2009 195.211853 2010 191.285960 2011 96.341239 2012 143.427203 2013 99.158076 2014 56.257591 Name: Isle of Wight, dtype: float64
dfx.ix['South East']
2004 14444.076716 2005 16146.059200 2006 15218.571109 2007 15426.404272 2008 12226.012308 2009 10544.910856 2010 7328.750776 2011 5146.742147 2012 4033.554122 2013 3379.292148 2014 2844.758947 Name: South East, dtype: float64
#dfx.index.values
metadata2,dfx2=getTable('Table Q6d')
metadata2
u'Table Q6d - Rates of juveniles receiving their first the youth caution or conviction(1)(2) per 100,000 of the 10-17 year old population(3) by Local Authority of residence, 12 months ending December 2004 to 12 months ending December 2014(4)'
dfx2.dtypes
2004 object 2005 object 2006 object 2007 object 2008 object 2009 object 2010 object 2011 object 2012 object 2013 object 2014 object dtype: object
dfx2[:3]
2004 | 2005 | 2006 | 2007 | 2008 | 2009 | 2010 | 2011 | 2012 | 2013 | 2014 | |
---|---|---|---|---|---|---|---|---|---|---|---|
Durham | 1748.76 | 2205 | 2497.17 | 2776.628 | 1607.465 | 799.0878 | 689.3677 | 641.475 | 560.9384 | 472.5573 | 483.102 |
Gateshead | 2340.305 | 2499.952 | 2733.864 | 2784.201 | 1853.196 | 1637.164 | 843.7132 | 647.5429 | 568.5897 | 523.0907 | 463.7392 |
Hartlepool | 1985.233 | 2163.15 | 2701.907 | 2702.05 | 1940.176 | 1614.992 | 970.3625 | 994.7811 | 636.7719 | 666.0566 | 463.2526 |
dfx2.ix['Isle of Wight']
2004 2119.761 2005 2572.231 2006 2228.977 2007 2149.941 2008 1552.758 2009 1428.136 2010 1427.933 2011 726.8294 2012 1090.204 2013 781.8804 2014 457.3416 Name: Isle of Wight, dtype: object
dfx2.ix['South East']
2004 1738.964 2005 1929.604 2006 1809.12 2007 1831.373 2008 1446.766 2009 1249.483 2010 871.0383 2011 614.1008 2012 483.6396 2013 410.5667 2014 347.9512 Name: South East, dtype: object
dfx2.ix['England & Wales'][2014]
405.4629292012915
import random
#Library to support natural language text generation
#!pip3 install git+https://github.com/pwdyson/inflect.py
import inflect
p = inflect.engine()
def thous_sep(amount):
return '{:,}'.format(amount)
def otwRiseFall(now,then,amount=False,intify=False,nformat=None):
if intify:
now =int(now)
then=int(then)
delta=now-then
if delta>0:
txt=p.a(random.choice(['rise','increase']))
elif delta<0:
txt=p.a(random.choice(['fall','decrease']))
if amount:
if nformat is not None:
#"{:,.1f}"
val=nformat.format(abs(delta))
else: val=thous_sep(abs(delta))
txt+=' of {0}'.format(val)
return txt
def reporter(la,area,nation,year):
metadata,dfx=getTable('Table Q6c')
num="{:,}".format(int(dfx.ix[la][year]))
txt="According to figures recently released by the Ministry of Justice"
tableyear="for the 12 months ending December {year}".format(year=year)
tablestat="the number of juvenile first time entrants to the criminal justice system residing in {LA}".format(LA=la)
tablenum="was {num}".format(num=num)
metadata2,dfx2=getTable('Table Q6d')
txt2="The rate of juveniles receiving their first the youth caution or conviction per 100,000 of the 10-17 year old population by Local Authority of residence"
num2="{:,.1f}".format(dfx2.ix[area][year])
num3="{:,.1f}".format(dfx2.ix[nation][year])
num4="{:,.1f}".format(dfx2.ix[la][year])
rate="was {num4}".format(num4=num4)
areastat="This compares with a rate of {num2} in {area} and {num3} in {country}".format(num2=num2,
num3=num3,
area=area,
country=nation)
numLastyear="{:,}".format(int(dfx.ix[la][year-1]))
rateLastyear="{:,.1f}".format(dfx2.ix[la][year-1])
txt3="Last year, (in {lastyear}), the figure was {numLastyear}, a rate of {rateLastyear} per 100,000".format(lastyear=year-1,
numLastyear=numLastyear,
rateLastyear=rateLastyear)
differ=otwRiseFall(dfx.ix[la][year],dfx.ix[la][year-1],True,True)
differ2=otwRiseFall(dfx2.ix[la][year],dfx2.ix[la][year-1],True,nformat="{:,.1f}")
txt3diff="This year's numbers in {la} thus represent {differ}, and {differ2} per 100,000 on the rate.".format(la=la,
differ=differ,
differ2=differ2)
tableOfficialCaveat="Since 8th April 2013 there have been a number of changes in out of court disposals. The previously known reprimand and warning disposal categories for juveniles have been replaced with a new out of court disposal: The Youth Caution for young offenders."
tableOfficialCaveat=tableOfficialCaveat+" The figures are also estimated figures. Juveniles receiving disposals for the first time have been mapped to individual Local Authorities using the home address or postcode recorded by the police on the Police National Computer. For those with no address recorded, a model based on the patterns of offenders dealt with by police stations has been used to allocate offenders to Local Authorities. Therefore caution must be taken when using these figures."
rawvals='{txt}, {tableyear} {tablestat} {tablenum}.'.format(txt=txt,
tableyear=tableyear,
tablestat=tablestat,
tablenum=tablenum)
ratevals='{txt2} {rate}. {areastat}.'.format(txt2=txt2,
rate=rate,
areastat=areastat)
previously='{txt3}, compared with {lr} in the {r} and {ln} in {n}. {txt3diff}'.format(txt3=txt3,
lr="{:,.1f}".format(dfx2.ix[area][year-1]),
r=area,
ln="{:,.1f}".format(dfx2.ix[nation][year-1]),
n=nation,
txt3diff=txt3diff)
print('{rawvals} {ratevals} {previously}\n\n({caveat})'.format(rawvals=rawvals,
ratevals=ratevals,
previously=previously,
caveat=tableOfficialCaveat))
#THe official reported rates per 100,000 of juveniles in an LA is not the best basis, given that LAs
# are unlikely to have more than one or two tens of thousands people in that age range?
reporter('Isle of Wight','South East','England',2014)
According to figures recently released by the Ministry of Justice, for the 12 months ending December 2014 the number of juvenile first time entrants to the criminal justice system residing in Isle of Wight was 56. The rate of juveniles receiving their first the youth caution or conviction per 100,000 of the 10-17 year old population by Local Authority of residence was 457.3. This compares with a rate of 348.0 in South East and 409.1 in England. Last year, (in 2013), the figure was 99, a rate of 781.9 per 100,000, compared with 410.6 in the South East and 447.8 in England. This year's numbers in Isle of Wight thus represent a fall of 43, and a fall of 324.5 per 100,000 on the rate. (Since 8th April 2013 there have been a number of changes in out of court disposals. The previously known reprimand and warning disposal categories for juveniles have been replaced with a new out of court disposal: The Youth Caution for young offenders. The figures are also estimated figures. Juveniles receiving disposals for the first time have been mapped to individual Local Authorities using the home address or postcode recorded by the police on the Police National Computer. For those with no address recorded, a model based on the patterns of offenders dealt with by police stations has been used to allocate offenders to Local Authorities. Therefore caution must be taken when using these figures.)
#Make comparisons with extremes, such as places with highlest and lowest rates?
def reporter_extreme(df,col,typ='lowest'):
if typ=='lowest':
return df[df[col]==min(df[col])]
return df[df[col]==max(df[col])]
reporter_extreme(dfx2,2014,typ='highest')
2004 | 2005 | 2006 | 2007 | 2008 | 2009 | 2010 | 2011 | 2012 | 2013 | 2014 | |
---|---|---|---|---|---|---|---|---|---|---|---|
Bristol, City of | 1548.147349 | 1960.606588 | 2401.632469 | 2214.697219 | 1726.989042 | 1140.030194 | 1158.641839 | 1135.692269 | 957.162888 | 771.202322 | 808.640292 |
reporter_extreme(dfx2,2014,'lowest')
2004 | 2005 | 2006 | 2007 | 2008 | 2009 | 2010 | 2011 | 2012 | 2013 | 2014 | |
---|---|---|---|---|---|---|---|---|---|---|---|
Surrey | 1627.02416 | 1651.815454 | 1533.517173 | 1599.627488 | 1050.962444 | 855.642886 | 665.084912 | 323.272197 | 162.27229 | 197.566553 | 132.930965 |