%matplotlib inline
import geopandas as gp
from geopandas.tools import sjoin
import pandas as pd
import matplotlib.pyplot as plt
from lxml import etree
import numpy as np
from shapely.geometry import Point, MultiPoint
from fiona.crs import from_epsg
# get the shapefile here http://sensitivecities.com/extra/london.zip
df_london = gp.GeoDataFrame.from_file('../pycity/london')
df_london.crs = from_epsg(4326)
df_london.head()
AREA | AREA_CODE | CODE | DESCRIPT0 | DESCRIPT1 | DESCRIPTIO | FILE_NAME | HECTARES | NAME | NUMBER | NUMBER0 | POLYGON_ID | TYPE_COD0 | TYPE_CODE | UNIT_ID | geometry | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 0 | LBW | E05000405 | CIVIL VOTING AREA | None | London Borough Ward | GREATER_LONDON_AUTHORITY | 755.173 | Chessington South Ward | 52 | 733 | 50840 | None | VA | 10884 | POLYGON ((-0.3306790212463542 51.3290110115417... |
1 | 0 | LBW | E05000414 | CIVIL VOTING AREA | None | London Borough Ward | GREATER_LONDON_AUTHORITY | 259.464 | Tolworth and Hook Rise Ward | 106 | 734 | 117160 | None | VA | 11407 | POLYGON ((-0.3084572192029973 51.3758608060460... |
2 | 0 | LBW | E05000401 | CIVIL VOTING AREA | None | London Borough Ward | GREATER_LONDON_AUTHORITY | 145.390 | Berrylands Ward | 107 | 735 | 50449 | None | VA | 11413 | POLYGON ((-0.3038496397984196 51.3924869913464... |
3 | 0 | LBW | E05000400 | CIVIL VOTING AREA | None | London Borough Ward | GREATER_LONDON_AUTHORITY | 268.506 | Alexandra Ward | 108 | 736 | 50456 | None | VA | 11420 | POLYGON ((-0.269900081781261 51.38845120455578... |
4 | 0 | LBW | E05000402 | CIVIL VOTING AREA | None | London Borough Ward | GREATER_LONDON_AUTHORITY | 187.821 | Beverley Ward | 109 | 737 | 117161 | None | VA | 11417 | POLYGON ((-0.2466219835559891 51.3992118975101... |
# parse XML into dict
# available from openplaques: https://dl.dropbox.com/u/21695507/openplaques/plaques_20140619.xml
tree = etree.parse("data/plaques_20140619.xml")
root = tree.getroot()
output = dict()
output['raw'] = []
output['crs'] = []
output['lon'] = []
output['lat'] = []
for each in root.xpath('/openplaques/plaque/geo'):
# check what we got back
output['crs'].append(each.get('reference_system', None))
output['lon'].append(each.get('longitude', None))
output['lat'].append(each.get('latitude', None))
# now go back up to plaque
r = each.getparent().xpath('inscription/raw')[0]
if isinstance(r.text, str):
output['raw'].append(r.text.lstrip().rstrip())
else:
output['raw'].append(None)
df = pd.DataFrame(output)
df = df.replace({'raw': 0}, None)
df = df.dropna()
df[['lon', 'lat']] = df[['lon', 'lat']].astype(float)
df.head()
crs | lat | lon | raw | |
---|---|---|---|---|
0 | WGS84 | 53.144512 | -1.549882 | Rockside Hydro 1862-1939\nRAF Hospital during ... |
1 | WGS84 | 53.134787 | -1.548879 | Horseshoe Inn\nc. 1860-2010\nMatlock Green was... |
2 | WGS84 | 53.138379 | -1.555831 | Crown Hotel \nc 1895-1990 |
3 | WGS84 | 53.330366 | -1.654271 | George Herbert Lawrence\n1888-1940\nIndustrial... |
4 | WGS84 | 55.861110 | -4.248807 | City Chambers. William Young 1888 |
df_plaques = gp.GeoDataFrame({
'geometry': [Point(x, y) for x, y in zip(df['lon'], df['lat'])],
'raw': df['raw']})
# set crs, then convert to OSGB36 if necessary
df_plaques.crs = from_epsg(4326)
# df_plaques['geometry'] = df_plaques['geometry'].to_crs(epsg=2770)
join_inner_df = sjoin(df_plaques, df_london, how="inner")
len(join_inner_df)
2251
# Join the plaque counts per ward to the wards
df_london = df_london.merge(
join_inner_df['CODE'].value_counts().to_frame(),
how='left',
left_on='CODE', right_index=True)
# rename count column to something meaningful
df_london.rename(columns={0: 'Plaque_Count'}, inplace=True)
df_london[['NAME', 'HECTARES', 'Plaque_Count']].dropna().sort('Plaque_Count', ascending=False).head(10)
NAME | HECTARES | Plaque_Count | |
---|---|---|---|
484 | West End Ward | 199.973 | 165 |
478 | St. James's Ward | 346.379 | 161 |
489 | Marylebone High Street Ward | 100.107 | 67 |
501 | Bloomsbury Ward | 102.091 | 66 |
502 | Holborn and Covent Garden Ward | 119.493 | 49 |
480 | Knightsbridge and Belgravia Ward | 359.430 | 42 |
510 | Hampstead Town Ward | 244.939 | 40 |
245 | Cathedrals Ward | 176.822 | 38 |
475 | Queen's Gate Ward | 60.645 | 31 |
508 | Frognal and Fitzjohns Ward | 153.023 | 31 |
# plaque density per square km
df_london['Plaque_Density'] = (df_london['Plaque_Count'] / df_london.area) / 100000
df_london.replace(to_replace={'Plaque_Density': {0: np.nan}}, inplace=True)
df_london[['NAME', 'HECTARES', 'Plaque_Count', 'Plaque_Density']].dropna().sort('Plaque_Density', ascending=False).head(10)
NAME | HECTARES | Plaque_Count | Plaque_Density | |
---|---|---|---|---|
628 | Langbourn Ward | 5.193 | 8 | 11.894328 |
633 | Cordwainer Ward | 5.965 | 9 | 11.650257 |
635 | Cheap Ward | 9.161 | 12 | 10.113464 |
642 | Cornhill Ward | 6.478 | 8 | 9.534512 |
641 | Walbrook Ward | 7.474 | 9 | 9.297560 |
643 | Lime Street Ward | 5.096 | 6 | 9.091814 |
648 | Candlewick Ward | 5.209 | 6 | 8.894579 |
637 | Aldersgate Ward | 9.795 | 10 | 7.881302 |
639 | Bridge Ward | 9.421 | 9 | 7.376950 |
636 | Bassishaw Ward | 10.658 | 10 | 7.244112 |
plt.clf()
fig = plt.figure(figsize=(20, 15), dpi=100)
ax = fig.add_subplot(111, axisbg='w', frame_on=True)
df_london.plot(column='Plaque_Density', scheme='Quantiles', k=7, colormap='Blues', axes=ax)
plt.tight_layout()
plt.show()
<matplotlib.figure.Figure at 0x11bcc0810>