Of course python isn't just for boring data analysis. You can use the same tools in unique ways to make some cool graphics.
I saw this incredible post from Dr. Nathan Yau on Flowing Data that showed where people run in different cities.
from IPython.display import Image
Image("http://i2.wp.com/flowingdata.com/wp-content/uploads/2014/02/DC-feature.png")
I liked this enough to want to recreate it for my town.
First, let's import everything we need.
import pandas as pd
import matplotlib.pyplot as plt
import glob
import gpxpy
import gpxpy.gpx
import os
%matplotlib inline
Next, let define a function that clears the all of the ticks and lines.
def clear_frame(ax=None):
# Taken from a post by Tony S Yu
if ax is None:
ax = plt.gca()
ax.xaxis.set_visible(False)
ax.yaxis.set_visible(False)
for spine in ax.spines.values():
spine.set_visible(False)
Remember our load run data from the previous notebook? Well here it is again, but just a little more streamlined.
def load_run_data(gpx_path, filter=""):
# List all of the GPX files in the path
gpx_files = glob.glob(os.path.join(gpx_path,filter+"*.gpx"))
run_data = []
# Loop through the files
for file_idx, gpx_file in enumerate(gpx_files):
# Parse the GPX File
try:
gpx = gpxpy.parse(open(gpx_file, 'r'))
except:
# Runkeeper often leaves off some XML tags causing an ivalid file.
# This can be fixed, but for the purpose of this talk, lets just
# remove the file from the data set.
os.remove(gpx_file)
continue
run_data_tmp = [[file_idx, gpx_file, point.latitude,point.longitude, point.elevation]
for track in gpx.tracks
for segment in track.segments
for point in segment.points]
run_data += run_data_tmp
return run_data
# Huntsville
run_data = load_run_data("hsvrundata")
We'll also need a function to plot the data. For this we will just accept in a 2-D list of points with a run index. We'll group the points by the index and plot the runs as seperate series (so our lines don't connect back).
def plot_run_data(coords, **kwargs):
coords_df = pd.DataFrame(coords, columns=['Index','File_Name','Latitude','Longitude','Altitude'])
grouped = coords_df.groupby('Index')
fig = plt.figure(figsize=kwargs.get('figsize',(13,8)))
fig.hold(True)
# Adjust these values to make the picture look a little different
bgcolor = kwargs.get('bgcolor', '#001933')
color = kwargs.get('color', '#FFFFFF')
linewidth = kwargs.get('linewidth', .35)
alpha = kwargs.get('alpha', .5)
grouped.plot('Longitude','Latitude', color=color, linewidth=linewidth, alpha=alpha)
ax = plt.gca()
plt.grid(False)
ax.patch.set_facecolor(bgcolor)
ax.set_aspect('auto','box','C')
clear_frame()
plt.subplots_adjust(left=0, right=1, top=1, bottom=.1)
plot_run_data(run_data)
# Set bounds for downtown huntsville
plt.xlim((-86.6,-86.57))
plt.ylim((34.72,34.74))
(34.72, 34.74)
Of course the inline motplotlib plotting doesn't allow us to interact very well. Matplotlib has a fiew modes that you can set to plot using a different "backend" system. This can be a little finicky on different platforms. There is new project by Jake Vanderplas (@jakevdp) called mpld3, which take matplotlib plots and renders them in the d3 javascript plotting library. Let's check it out!
Note: There is an issue with the curent version that doesn't clear the frame, so the bottom axis will show up on the plot below.
import mpld3
plot_run_data(run_data)
mpld3.display()
Let's take the run data from beofre, and then look at some of the popular races that take place in downtown Huntsville and how that overlay.
run_df = pd.DataFrame(run_data, columns=['Index','File_Name','Latitude','Longitude','Altitude'])
plot_run_data(run_data, bgcolor="#DDDDDD", color="#999999", alpha=.6)
music_moves_me = run_df[run_df['File_Name'].str.contains('Music_Moves_Me_5K_2012.gpx')]
music_moves_me.plot('Longitude','Latitude',color='#764B92', linewidth=3, linestyle='-', alpha=.3)
music_moves_me.plot('Longitude','Latitude',color='#764B92', linewidth=2, linestyle='-')
x,y = music_moves_me['Longitude'].iloc[0], music_moves_me['Latitude'].iloc[0]
plt.annotate('Music Moves Me',
xy=(x,y),
xytext=(x-.003,y+.002),
color='#764B92',
size=16,
arrowprops=dict(facecolor='#764B92'))
rudolph = run_df[run_df['File_Name'].str.contains('Rudolph_Run_5k.gpx')]
rudolph.plot('Longitude','Latitude',color='#BF1E2D', linewidth=3, linestyle='-', alpha=.3)
rudolph.plot('Longitude','Latitude',color='#BF1E2D', linewidth=2, linestyle='-')
x,y = rudolph['Longitude'].iloc[0], rudolph['Latitude'].iloc[0]
plt.annotate('Rudolph Run',
xy=(x,y),
xytext=(x+.003,y+.002),
color='#BF1E2D',
size=16,
arrowprops=dict(facecolor='#BF1E2D'))
spooktacular = run_df[run_df['File_Name'].str.contains('Spooktacular_5k.gpx')]
spooktacular.plot('Longitude','Latitude',color='#F36C32', linewidth=3, linestyle='-', alpha=.3)
spooktacular.plot('Longitude','Latitude',color='#F36C32', linewidth=2, linestyle='-')
x,y = spooktacular['Longitude'].iloc[0], spooktacular['Latitude'].iloc[0]
plt.annotate('Spooktactular',
xy=(x,y),
xytext=(x-.003,y-.004),
color='#F36C32',
size=16,
arrowprops=dict(facecolor='#F36C32'))
cotton_row = run_df[run_df['File_Name'].str.contains('Cotton_Row_5k.gpx')]
cotton_row.plot('Longitude','Latitude',color='#127700', linewidth=3, linestyle='-', alpha=.3)
cotton_row.plot('Longitude','Latitude',color='#127700', linewidth=2, linestyle='-')
x,y = cotton_row['Longitude'].iloc[0], cotton_row['Latitude'].iloc[0]
plt.annotate('Cotton Row',
xy=(x,y),
xytext=(x-.002,y-.002),
color='#127700',
size=16,
arrowprops=dict(facecolor='#127700'))
liz_hurley = run_df[run_df['File_Name'].str.contains('HTC_Liz_Hurley_Run_5k.gpx')]
liz_hurley.plot('Longitude','Latitude',color='#336699', linewidth=3, linestyle='-', alpha=.3)
liz_hurley.plot('Longitude','Latitude',color='#336699', linewidth=2, linestyle='-')
x,y = liz_hurley['Longitude'].iloc[0], liz_hurley['Latitude'].iloc[0]
plt.annotate('Liz Hurley',
xy=(x,y),
xytext=(x-.001,y-.002),
color='#336699',
size=16,
arrowprops=dict(facecolor='#336699'))
plt.grid('off')
# Set bounds for downtown huntsville
plt.xlim((-86.6,-86.57))
plt.ylim((34.72,34.74))
clear_frame()