from IPython.core.display import HTML css_file = './custom.css' HTML(open(css_file, "r").read()) # Import libraries etc %matplotlib inline import pandas as pd import matplotlib.pyplot as plt import numpy as np import sys import time pd.options.display.mpl_style = 'default' from matplotlib import rcParams rcParams['figure.figsize'] = (12, 6) rcParams['figure.dpi'] = 150 rcParams['font.size'] = 16 rcParams['font.family'] = 'sans-serif' rcParams['axes.facecolor'] = '#ffffff' rcParams['lines.linewidth'] = 2.0 # Load in the land registry data table (I added a header row) fullData = pd.read_csv('./res/ppd_data.csv', index_col=2, parse_dates=True, dayfirst = True) fullData.head() columnsToDrop = ['ID', 'PAON', 'NA', 'City', 'City2', 'County','URL'] # I also know there are some nans in the postcode field, so lets drop these cropData = fullData.drop(columnsToDrop,1).dropna(subset=['Postcode']) print('Average price paid in 2014 = £ %d' % np.mean(cropData['Price'])) print('Median price paid in 2014 = £ %d' % np.median(cropData['Price'])) plt.hist(cropData['Price'],70) plt.xlim([0, 1e6]) plt.xlabel('Purchase price (Pounds)') plt.ylabel('Frequency') plt.show() monthMeans = cropData['Price'].groupby(cropData.index.month).aggregate(np.mean) plt.plot(monthMeans) plt.xlabel('Month') plt.ylabel('Mean purchase price (Pounds)') plt.show() cropData['month'] = cropData.index.month cropData.boxplot(column='Price',by='month') plt.ylim((0, 1000000)) plt.xlabel('Month') plt.ylabel('Purchase price (Pounds)') plt.show() saleCount = cropData['Price'].groupby(cropData.index.month).aggregate(len) plt.plot(saleCount) plt.ylim([0, 500]) plt.xlabel('Month') plt.ylabel('Total sales') plt.show() # Import some more libraries import requests import json ## Practice on a couple of postcodes # Form the request url = 'http://api.postcodes.io/postcodes' payload = {'postcodes' : ['OX49 5NU', 'M32 0JG'] } # Example postcodes headers = {'content-type': 'application/json'} # Make the request r = requests.post(url, data=json.dumps(payload), headers=headers) returnData = r.json() # Print out the result, with some nicer formatting print json.dumps(returnData, sort_keys=True, indent=4, separators=(',', ': ')) # Get the postcodes and assemble a list postcodes = cropData['Postcode'] queryCodes = postcodes.tolist() # We can only request 100 postcodes at a time maxPostcodes = 100 nQueries = np.ceil(float(len(queryCodes))/maxPostcodes) print('%d postcodes, requireing %d requests' % (len(queryCodes), nQueries)) # Assmble parts of the request which are constant url = 'http://api.postcodes.io/postcodes' headers = {'content-type': 'application/json'} lats = [] longs = [] for iQuery in range(int(nQueries)): # Assemble the postcode query minQuery = iQuery*maxPostcodes maxQuery = min(((iQuery+1)*maxPostcodes,len(queryCodes))) payload = {'postcodes' : queryCodes[minQuery:maxQuery] } # Make the request r = requests.post(url, data=json.dumps(payload), headers=headers) # Lets give the server a rest as it seems to get stressed time.sleep(0.1) if r.status_code is 200: returnData = r.json() sys.stdout.write('.') else: print('Query failed, status: %d' % r.status_code) result = returnData['result'] # Lets comprehend the list to get the lat and log data # We need an if statement to catch cases where the postcode cant be found lats += [ result[idx]['result']['latitude'] if result[idx]['result'] is not None else np.nan for idx in range(len(result)) ] longs += [ result[idx]['result']['longitude'] if result[idx]['result'] is not None else np.nan for idx in range(len(result)) ] print("Finished queries") im=plt.imread('./res/map.png') mapFac = 1.5 plt.figure(figsize = (mapFac*9.98, mapFac*6.7)) plt.imshow(im) plt.axis('off') plt.title('A pretty map of Cambridge:\n') plt.show() def getTileNumber(lat,lon,zoom): xtile = (lon+180)/360 * 2**zoom ; ytile = (1 - np.log(np.tan(np.radians(lat)) + 1/np.cos(np.radians(lat)))/np.pi)/2 * 2**zoom return (xtile, ytile) def getLonLat(xtile, ytile, zoom): n = 2 ** zoom lon_deg = xtile / n * 360.0 - 180.0 lat_deg = np.degrees(np.arctan(np.sinh(np.pi * (1 - 2 * ytile / n)))) return (lon_deg, lat_deg) def latLon2Box(lat, lon, zoom, width, height): tile_size = 256.0 (xtile, ytile) = getTileNumber(lat, lon, zoom) xtile_s = (xtile * tile_size - width/2) / tile_size; ytile_s = (ytile * tile_size - height/2) / tile_size; xtile_e = (xtile * tile_size + width/2) / tile_size; ytile_e = (ytile * tile_size + height/2) / tile_size; (lon_s, lat_s) = getLonLat(xtile_s, ytile_s, zoom); (lon_e, lat_e) = getLonLat(xtile_e, ytile_e, zoom); return (lon_s, lat_s, lon_e, lat_e) (lon_s, lat_s, lon_e, lat_e) = latLon2Box(52.206344321410604, 0.13166427612304688, 13, 998, 670) from mpl_toolkits.basemap import Basemap m = Basemap(llcrnrlon=lon_s,llcrnrlat=lat_s,urcrnrlon=lon_e,urcrnrlat=lat_e, resolution='h',projection='merc') x1,y1 = m(longs,lats) xs, ys = m(lon_s, lat_s) xe, ye = m(lon_e, lat_e) # Normalise to pixel co-ords width = 998 height = 670 xPix = width*(np.array(x1) - xs)/(xe - xs) yPix = height*(np.array(y1) - ys)/(ye - ys) im=plt.imread('./res/map.png') plt.figure(figsize = (mapFac*9.98, mapFac*6.7)) plt.imshow(np.flipud(im)) plt.plot(xPix,height-yPix,'o',markersize=5, markeredgecolor='none') plt.xlim([0, width]) plt.ylim([0, height]) plt.axis('off') plt.show() maxPrice = 600000.0 import matplotlib as mpl price = cropData['Price'] price = price.tolist() price = np.divide(price,maxPrice) price[price>1] = 1 ditherMag = 10 fig = plt.figure(figsize = (mapFac*9.98, mapFac*6.7)) cmap = mpl.cm.cool plt.imshow(np.flipud(im)) plt.scatter(xPix + ditherMag*(np.random.rand(xPix.size)-0.5), height - (yPix +ditherMag*(np.random.rand(yPix.size)-0.5)), c=price, cmap=cmap, s=60) plt.xlim([0, width]) plt.ylim([0, height]) plt.axis('off') plt.title('House sales, Cambridge 2014 (Pink = expensive, Blue = cheap)\n') plt.savefig('res/price_map.jpg')