import pandas as pd from pandas import Series, DataFrame, Panel import numpy as np import datetime import matplotlib.pyplot as plt %matplotlib inline COLUMN_NAMES = ["amount paid", "paid duration mins", "start date", \ "start day", "end date", "end day", "start time", "end time", \ "DesignationType", "Hours of Control", "Tariff", "Max Stay", \ "Spaces", "Street", "x coordinate", "y coordinate", "latitude", \ "longitude"] PARKING_DATA = 'ParkingCashlessDenorm.csv' raw_data = pd.read_csv(PARKING_DATA, names=COLUMN_NAMES, header=None) raw_data.head() # pandas will usually do a good job of interpretting time and date information. # However, rather annoyingly the date and time data has been stored in separate columns # so we need to do a bit of work to get consistent timestamps to work with. # First we convert the date information into timestamp format # Since no time information is given pandas assumes the time as 00:00:00 raw_data['start date'] = pd.to_datetime(raw_data['start date']) raw_data['end date'] = pd.to_datetime(raw_data['end date']) # Next convert the time information into timestamp format # Since the time information has no date associated with it pandas assumes # the date is today, so we'll need to subtract today's date to get the time on its own raw_data['start time'] = pd.to_datetime(raw_data['start time']) raw_data['end time'] = pd.to_datetime(raw_data['end time']) # We can now construct a proper timestamp for each of the records by adding # the date and time and subtracting today's date ts_today = pd.to_datetime('00:00:00') raw_data['start time'] = raw_data['start time'] - ts_today raw_data['end time'] = raw_data['end time'] - ts_today raw_data['start datetime'] = raw_data['start date'] + raw_data['start time'] raw_data['end datetime'] = raw_data['end date'] + raw_data['end time'] # i've subsequently realised that the whole of the above code can be replaced with this rows = pd.read_csv(PARKING_CASHLESS, names=COLUMN_NAMES, header=None, parse_dates={'end datetime': ['end date','end time'] , 'start datetime':['start date','start time']}) # How nice is that! # Now we want to group together all records for each parking location based on # the latitude and longitude, the 'Street' is ambiguous since some streets have # more than one parking location parking_locations = raw_data.groupby(by = ['latitude', 'longitude']) # get a single location df1 = raw_data.ix[parking_locations.indices[parking_locations.indices.keys()[3]]] arrivals = Series(np.ones(df1.shape[0]), index = df1['start datetime']) departures = Series(-1*np.ones(df1.shape[0]), index = df1['end datetime']) arr_dep = arrivals.append(departures) occupancy = arr_dep.resample('10Min', how ='sum').fillna(0).cumsum() occupancy.plot(xlim =(occupancy.index[15000],occupancy.index[15200])) # We now construct a Panel object containing the average occupancy for each day of the week first_time = 1 for j, k in enumerate(parking_locations.indices.keys()): # print j # look at one location at a time df1 = raw_data.ix[parking_locations.indices[k]] # Calculate occupancy by incrementing a count for every arrival and decrementing the # same count for every departure arrivals = Series(np.ones(df1.shape[0]), index = df1['start datetime']) departures = Series(-1*np.ones(df1.shape[0]), index = df1['end datetime']) occupancy = arrivals.append(departures) occupancy = occupancy.resample('10Min', how = 'sum').fillna(0).cumsum() # split the data by day of the week mon = occupancy.ix[occupancy.index.dayoftheweek == 0] tue = occupancy.ix[occupancy.index.dayoftheweek == 1] wed = occupancy.ix[occupancy.index.dayoftheweek == 2] thu = occupancy.ix[occupancy.index.dayoftheweek == 3] fri = occupancy.ix[occupancy.index.dayoftheweek == 4] df2 = DataFrame(mon.groupby(mon.index.time).mean(), columns =['Monday']) df2['Tuesday']=tues.groupby(tues.index.time).mean() df2['Wednesday']=weds.groupby(weds.index.time).mean() df2['Thursday']=thur.groupby(thur.index.time).mean() df2['Friday']=fri.groupby(fri.index.time).mean() # store some other information for each location for ease of access df2['Street'] = df['Street'].values[0] df2['Spaces'] = df['Spaces'].values[0] df2['Tariff'] = df['Tariff'].values[0] # Add this newly created DataFrame containing occupancy information to a Panel object if first_time: first_time = 0 p = Panel(data = [df2.values], items = [k], major_axis = df2.index, minor_axis = df2.columns) else: p[k] = df2 # save/load the panel object # pd.save(p,'parking_data_panel.pkl') p = pd.read_pickle('parking_data_panel.pkl') p # We can now interrogate the panel object to look at different aspects of the data # let's look at the number of free spaces at 12:30 for an average monday h, m = 12, 30 # get the total number of spaces at each location spaces = p.minor_xs( u'Spaces') mon_occupancy = p.minor_xs( u'Monday') mon_free_spaces = spaces - mon_occupancy # Use the Tariff to set an alpha value so we can see which parking locations are more expensive tariff = p.minor_xs( u'Tariff').ix[datetime.time(h,m)] # get the free spaces for 12:30 free_space_0 = mon_free_spaces.ix[datetime.time(h,m)] # plot free space for each location using a the size of a circle to indicate the number of free spaces for tup in free_space_0.keys(): plt.plot(tup[1], tup[0], 'bo', ms = np.max([0,free_space_0[tup]/2]),alpha = (1 + float(tariff[tup]))/5.4) plt.title('Westminster \n Unoccupied Parking Spaces') plt.text(- 0.13, 51.53,'Mon ' + str(h).zfill(2) + ':' + str(m).zfill(2)) plt.xlabel('Longitude') plt.ylabel('Latitude') plt.show() # Finally, we can loop through different times to create a series of images for creating an animation # set the time step for visualising occupancy ts = 5 cnt = 0 for h in np.arange(0,24): for m in np.arange(0,60,ts): # Use the Tariff to set an alpha value so we can see which parking locations are more expensive tariff = p.minor_xs( u'Tariff').ix[datetime.time(h,m)] # get the free spaces for 12:30 free_space_0 = mon_free_spaces.ix[datetime.time(h,m)] # plot free space for each location using a the size of a circle to indicate the number of free spaces for tup in free_space_0.keys(): plt.plot(tup[1], tup[0], 'bo', ms = np.max([0,free_space_0[tup]/2]),alpha = (1 + float(tariff[tup]))/5.4) plt.title('Westminster \n Unoccupied Parking Spaces') plt.text(- 0.13, 51.53,'Mon ' + str(h).zfill(2) + ':' + str(m).zfill(2)) plt.xlabel('Longitude') plt.ylabel('Latitude') cnt = cnt + 1 fname = 'image' + str(cnt).zfill(5) + '.png' #print fname plt.savefig(fname) plt.clf() from IPython.display import YouTubeVideo YouTubeVideo('tW8o14Th8hw') # QGIS (visualisation of shapefiles backed by OpenLayers maps from Google and OSM) from IPython.display import Image Image(url = 'http://ianozsvald.com/wp-content/uploads/2013/10/whitehall_1to36_capacity_1341parkingbays_bytarrif.png')