from IPython.core.display import HTML
css_file = './custom.css'
HTML(open(css_file, "r").read())
# Import libraries etc
%matplotlib inline
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import sys
import time
pd.options.display.mpl_style = 'default'
from matplotlib import rcParams
rcParams['figure.figsize'] = (12, 6)
rcParams['figure.dpi'] = 150
rcParams['font.size'] = 16
rcParams['font.family'] = 'sans-serif'
rcParams['axes.facecolor'] = '#ffffff'
rcParams['lines.linewidth'] = 2.0
# Load in the land registry data table (I added a header row)
fullData = pd.read_csv('./res/ppd_data.csv', index_col=2, parse_dates=True, dayfirst = True)
fullData.head()
columnsToDrop = ['ID', 'PAON', 'NA', 'City', 'City2', 'County','URL']
# I also know there are some nans in the postcode field, so lets drop these
cropData = fullData.drop(columnsToDrop,1).dropna(subset=['Postcode'])
print('Average price paid in 2014 = £ %d' % np.mean(cropData['Price']))
print('Median price paid in 2014 = £ %d' % np.median(cropData['Price']))
plt.hist(cropData['Price'],70)
plt.xlim([0, 1e6])
plt.xlabel('Purchase price (Pounds)')
plt.ylabel('Frequency')
plt.show()
monthMeans = cropData['Price'].groupby(cropData.index.month).aggregate(np.mean)
plt.plot(monthMeans)
plt.xlabel('Month')
plt.ylabel('Mean purchase price (Pounds)')
plt.show()
cropData['month'] = cropData.index.month
cropData.boxplot(column='Price',by='month')
plt.ylim((0, 1000000))
plt.xlabel('Month')
plt.ylabel('Purchase price (Pounds)')
plt.show()
saleCount = cropData['Price'].groupby(cropData.index.month).aggregate(len)
plt.plot(saleCount)
plt.ylim([0, 500])
plt.xlabel('Month')
plt.ylabel('Total sales')
plt.show()
# Import some more libraries
import requests
import json
## Practice on a couple of postcodes
# Form the request
url = 'http://api.postcodes.io/postcodes'
payload = {'postcodes' : ['OX49 5NU', 'M32 0JG'] } # Example postcodes
headers = {'content-type': 'application/json'}
# Make the request
r = requests.post(url, data=json.dumps(payload), headers=headers)
returnData = r.json()
# Print out the result, with some nicer formatting
print json.dumps(returnData, sort_keys=True, indent=4, separators=(',', ': '))
# Get the postcodes and assemble a list
postcodes = cropData['Postcode']
queryCodes = postcodes.tolist()
# We can only request 100 postcodes at a time
maxPostcodes = 100
nQueries = np.ceil(float(len(queryCodes))/maxPostcodes)
print('%d postcodes, requireing %d requests' % (len(queryCodes), nQueries))
# Assmble parts of the request which are constant
url = 'http://api.postcodes.io/postcodes'
headers = {'content-type': 'application/json'}
lats = []
longs = []
for iQuery in range(int(nQueries)):
# Assemble the postcode query
minQuery = iQuery*maxPostcodes
maxQuery = min(((iQuery+1)*maxPostcodes,len(queryCodes)))
payload = {'postcodes' : queryCodes[minQuery:maxQuery] }
# Make the request
r = requests.post(url, data=json.dumps(payload), headers=headers)
# Lets give the server a rest as it seems to get stressed
time.sleep(0.1)
if r.status_code is 200:
returnData = r.json()
sys.stdout.write('.')
else:
print('Query failed, status: %d' % r.status_code)
result = returnData['result']
# Lets comprehend the list to get the lat and log data
# We need an if statement to catch cases where the postcode cant be found
lats += [ result[idx]['result']['latitude'] if result[idx]['result'] is not None else np.nan for idx in range(len(result)) ]
longs += [ result[idx]['result']['longitude'] if result[idx]['result'] is not None else np.nan for idx in range(len(result)) ]
print("Finished queries")
im=plt.imread('./res/map.png')
mapFac = 1.5
plt.figure(figsize = (mapFac*9.98, mapFac*6.7))
plt.imshow(im)
plt.axis('off')
plt.title('A pretty map of Cambridge:\n')
plt.show()
def getTileNumber(lat,lon,zoom):
xtile = (lon+180)/360 * 2**zoom ;
ytile = (1 - np.log(np.tan(np.radians(lat)) + 1/np.cos(np.radians(lat)))/np.pi)/2 * 2**zoom
return (xtile, ytile)
def getLonLat(xtile, ytile, zoom):
n = 2 ** zoom
lon_deg = xtile / n * 360.0 - 180.0
lat_deg = np.degrees(np.arctan(np.sinh(np.pi * (1 - 2 * ytile / n))))
return (lon_deg, lat_deg)
def latLon2Box(lat, lon, zoom, width, height):
tile_size = 256.0
(xtile, ytile) = getTileNumber(lat, lon, zoom)
xtile_s = (xtile * tile_size - width/2) / tile_size;
ytile_s = (ytile * tile_size - height/2) / tile_size;
xtile_e = (xtile * tile_size + width/2) / tile_size;
ytile_e = (ytile * tile_size + height/2) / tile_size;
(lon_s, lat_s) = getLonLat(xtile_s, ytile_s, zoom);
(lon_e, lat_e) = getLonLat(xtile_e, ytile_e, zoom);
return (lon_s, lat_s, lon_e, lat_e)
(lon_s, lat_s, lon_e, lat_e) = latLon2Box(52.206344321410604, 0.13166427612304688, 13, 998, 670)
from mpl_toolkits.basemap import Basemap
m = Basemap(llcrnrlon=lon_s,llcrnrlat=lat_s,urcrnrlon=lon_e,urcrnrlat=lat_e,
resolution='h',projection='merc')
x1,y1 = m(longs,lats)
xs, ys = m(lon_s, lat_s)
xe, ye = m(lon_e, lat_e)
# Normalise to pixel co-ords
width = 998
height = 670
xPix = width*(np.array(x1) - xs)/(xe - xs)
yPix = height*(np.array(y1) - ys)/(ye - ys)
im=plt.imread('./res/map.png')
plt.figure(figsize = (mapFac*9.98, mapFac*6.7))
plt.imshow(np.flipud(im))
plt.plot(xPix,height-yPix,'o',markersize=5, markeredgecolor='none')
plt.xlim([0, width])
plt.ylim([0, height])
plt.axis('off')
plt.show()
maxPrice = 600000.0
import matplotlib as mpl
price = cropData['Price']
price = price.tolist()
price = np.divide(price,maxPrice)
price[price>1] = 1
ditherMag = 10
fig = plt.figure(figsize = (mapFac*9.98, mapFac*6.7))
cmap = mpl.cm.cool
plt.imshow(np.flipud(im))
plt.scatter(xPix + ditherMag*(np.random.rand(xPix.size)-0.5),
height - (yPix +ditherMag*(np.random.rand(yPix.size)-0.5)),
c=price, cmap=cmap, s=60)
plt.xlim([0, width])
plt.ylim([0, height])
plt.axis('off')
plt.title('House sales, Cambridge 2014 (Pink = expensive, Blue = cheap)\n')
plt.savefig('res/price_map.jpg')