import requests
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from IPython.display import IFrame
import matplotlib.font_manager as fm
plt.style.use('ggplot')
%matplotlib inline
pd.options.display.max_columns=25
/usr/local/lib/python2.7/dist-packages/matplotlib/__init__.py:872: UserWarning: axes.color_cycle is deprecated and replaced with axes.prop_cycle; please use the latter. warnings.warn(self.msg_depr % (key, alt_key))
This play is from the Golden State Warriors - Memphis Grizzlies game on November 2, 2015. The Warriors wound up winning by 50 points, and the third quarter was a showcase of Steph Curry's talents. In this play from the 3rd quarter, The Grizzlies' (Red) Mike Conley (11) dribbles baseline and kicks out to Matt Barnes (22) behind the 3-point arc who chucks up a brick that gets rebounded by Steph Curry (30). He begins to lead the offensive attack, and quickly three other Warriors are even with him at half court leading to a 4 on 3 break. Curry stops at the 3-point arc, drawing two defenders. From his left hand, he takes the ball behind his back, and the two defenders are convinced he's passing to Andre Igudala (9), but Curry brings the ball to his right hand and threads a pass between defenders to a cutting James Michael McAdoo (20) for the dunk. Watch the play below, or on nba.com here
IFrame('http://stats.nba.com/cvp.html?GameID=0021500051&GameEventID=383',width=900,height=400)
IFrame('http://stats.nba.com/movement/#!/?GameID=0021500051&GameEventID=383',width=700,height=400)
url = 'http://stats.nba.com/stats/locations_getmoments/?eventid=383&gameid=0021500051'
response = requests.get(url)
response.json().keys()
[u'moments', u'visitor', u'gamedate', u'gameid', u'home']
Our keys returned are:
Let's move home, visitor, and moments to their own variables. Then print out gamedate and gameid to see if there is anything else they contain.
home = response.json()["home"]
visitor = response.json()["visitor"]
moments = response.json()["moments"]
print response.json()["gamedate"]
print response.json()["gameid"]
2015-11-02 0021500051
Nothing of interest in gamedate and gameid (we already know this information just to look up the data).
Let's examine home data
home
{u'abbreviation': u'GSW', u'name': u'Golden State Warriors', u'players': [{u'firstname': u'Leandro', u'jersey': u'19', u'lastname': u'Barbosa', u'playerid': 2571, u'position': u'G'}, {u'firstname': u'Shaun', u'jersey': u'34', u'lastname': u'Livingston', u'playerid': 2733, u'position': u'G'}, {u'firstname': u'Andre', u'jersey': u'9', u'lastname': u'Iguodala', u'playerid': 2738, u'position': u'G-F'}, {u'firstname': u'Jason', u'jersey': u'1', u'lastname': u'Thompson', u'playerid': 201574, u'position': u'F-C'}, {u'firstname': u'Brandon', u'jersey': u'4', u'lastname': u'Rush', u'playerid': 201575, u'position': u'G-F'}, {u'firstname': u'Marreese', u'jersey': u'5', u'lastname': u'Speights', u'playerid': 201578, u'position': u'F-C'}, {u'firstname': u'Stephen', u'jersey': u'30', u'lastname': u'Curry', u'playerid': 201939, u'position': u'G'}, {u'firstname': u'Klay', u'jersey': u'11', u'lastname': u'Thompson', u'playerid': 202691, u'position': u'G'}, {u'firstname': u'Harrison', u'jersey': u'40', u'lastname': u'Barnes', u'playerid': 203084, u'position': u'F'}, {u'firstname': u'Festus', u'jersey': u'31', u'lastname': u'Ezeli', u'playerid': 203105, u'position': u'C'}, {u'firstname': u'Draymond', u'jersey': u'23', u'lastname': u'Green', u'playerid': 203110, u'position': u'F'}, {u'firstname': u'Ian', u'jersey': u'21', u'lastname': u'Clark', u'playerid': 203546, u'position': u'G'}, {u'firstname': u'James Michael', u'jersey': u'20', u'lastname': u'McAdoo', u'playerid': 203949, u'position': u'F'}], u'teamid': 1610612744}
visitor/home - contains all information about visiting/home team
len(moments)
251
moments[0]
[3, 1446527673002, 132.22, 7.09, None, [[-1, -1, 14.64474, 44.48211, 2.25707], [1610612744, 2738, 18.86386, 23.97772, 0.0], [1610612744, 201939, 18.87993, 41.50729, 0.0], [1610612744, 202691, 5.05095, 29.13419, 0.0], [1610612744, 203110, 12.88533, 34.84216, 0.0], [1610612744, 203949, 8.88615, 20.38291, 0.0], [1610612763, 2440, 27.47547, 8.76582, 0.0], [1610612763, 201145, 9.42341, 11.88861, 0.0], [1610612763, 201584, 4.219, 16.40298, 0.0], [1610612763, 201144, 16.38252, 44.27504, 0.0], [1610612763, 201188, 19.68304, 35.52727, 0.0]]]
moments - contain tracking data. len(moments)
is number of moments in the eventid (here, 251), usually .04 seconds apart. So for this data, I have about 10.04 seconds. Note: this data can and did change from day to day. Previously, there were 275 moments, or 11 seconds.
a single element in moments contains times and positions of all players and ball. Let's examine the output above:
moments[0][0]
is 3
, the quarter.moments[0][1]
is 1446527673002
, the unix timestamp.moments[0][2]
is 132.22
, and decreases as moments increase. it is the seconds left in the quartermoments[0][3]
is 7.09
, and decreases as moments increase, then reset to 24. it is seconds left in the shot clockmoments[0][4]
is None
, and is that value for all elements in moments. I'm not sure what this value is ?moments[0][5]
is a list of lists indicating teamid, playerid, x, y, zTo reiterate, the first 5 elements:
moments[0][0:5]
[3, 1446527673002, 132.22, 7.09, None]
[quarter, unixtime, timeleftgame, timeleftshot, unknown]
are the labels.
Let's look at the ball (element 0):
moments[0][5][0]
[-1, -1, 14.64474, 44.48211, 2.25707]
[teamid, playerid, x, y, z]
the ball has a teamid=-1
and playerid=-1
, and appears to be the only tracked element with a z value.
Some have speculated that the z coordinate is the radius of the ball, but let's look at a histogram of values:
ball_zdat = []
for m in moments:
ball_zdat.append(m[5][0][4])
fig = plt.figure(figsize=(8, 6))
plt.hist(ball_zdat,bins=range(1,20))
plt.title('Histogram of Z values of basketball')
plt.xlabel('Z values')
plt.ylabel('Frequency')
plt.show()
Looking at the histogram, it is likely that the z value is the height, in feet, of the basketball from the floor. The most common values are approximately waist height and below. A few values above the height of the rim (10ft.) would indicate a shot or lob pass. For the ball, let's plot the x,y,and color it by z.
plt.style.use('ggplot')
fig,ax = plt.subplots(figsize=(8, 6))
#scat = plt.scatter(y=ball_zdat,x=np.arange(0,len(ball_zdat))*.04,c=ball_zdat,cmap=plt.cm.RdBu,s=100)
scat = plt.scatter(y=ball_zdat,x=np.arange(0,len(ball_zdat))*.04,s=100)
court = plt.hlines(0,0,len(ball_zdat)*.04,colors='k')
hoop = plt.hlines(10,0,len(ball_zdat)*.04,colors='orange')
plt.autoscale(tight=True)
plt.tight_layout()
plt.ylabel('Ball height (feet)')
plt.xlabel('Time from start of event (seconds)')
plt.legend((hoop,court),('hoop rim', 'court'),fontsize=12)
#cbar = fig.colorbar(scat,orientation='vertical',fraction=0.05)
plt.title('Ball height over time',fontsize='x-large')
plt.savefig('img/NBA_ballheight.png',dpi=300)
plt.show()
We can see some of the action in the plot above.
ball_xdat = []
ball_ydat = []
for m in moments:
ball_xdat.append(m[5][0][2])
ball_ydat.append(m[5][0][3])
plt.style.use('seaborn-white')
fig = plt.figure(figsize=(15, 11.5))
scat = plt.scatter(x=ball_xdat,y=ball_ydat,c=ball_zdat,
cmap=plt.cm.RdBu, s=300, zorder=1)
plt.style.use('grayscale')
court = plt.imread("img/fullcourt.png")
img = plt.imshow(court, zorder=0, extent=[0,94,50,0])
plt.style.use('seaborn-white')
cbar = fig.colorbar(scat,orientation='vertical',fraction=0.025)
cbar.ax.invert_xaxis()
cbar.ax.set_ylabel('Ball height (ft.) above court')
# xaxis is 0-94 ft. yaxis 50-0 feet (inverted)
import matplotlib.font_manager as fm
# zorder should be less than zorder of scatter, so court is drawn first.
annotation_font = fm.FontProperties(family='Bitstream Vera Sans',style='normal',size=10,weight='normal',stretch='normal')
#scat.annotate('[START] Connelly drives baseline',
# xy = ball_xdat
#
#plt.imshow(court, zorder=0, extent=[0,94,50,0])
#plt.xlim(0,94)
plt.title('Ball movement',fontsize='x-large')
plt.savefig('img/Ball_movement.png',dpi=300)
plt.show()
moment_data = []
for m in moments:
for p in m[5]:
p+=[moments.index(m), m[2],m[3]]
moment_data.append(p)
moment_data[0]
[-1, -1, 14.64474, 44.48211, 2.25707, 0, 132.22, 7.09]
pm_df = pd.DataFrame(data=moment_data,columns=['teamID','playerID','xloc','yloc','zloc','moment','gametime','shottime'])
pm_df.head(15)
teamID | playerID | xloc | yloc | zloc | moment | gametime | shottime | |
---|---|---|---|---|---|---|---|---|
0 | -1 | -1 | 14.64474 | 44.48211 | 2.25707 | 0 | 132.22 | 7.09 |
1 | 1610612744 | 2738 | 18.86386 | 23.97772 | 0.00000 | 0 | 132.22 | 7.09 |
2 | 1610612744 | 201939 | 18.87993 | 41.50729 | 0.00000 | 0 | 132.22 | 7.09 |
3 | 1610612744 | 202691 | 5.05095 | 29.13419 | 0.00000 | 0 | 132.22 | 7.09 |
4 | 1610612744 | 203110 | 12.88533 | 34.84216 | 0.00000 | 0 | 132.22 | 7.09 |
5 | 1610612744 | 203949 | 8.88615 | 20.38291 | 0.00000 | 0 | 132.22 | 7.09 |
6 | 1610612763 | 2440 | 27.47547 | 8.76582 | 0.00000 | 0 | 132.22 | 7.09 |
7 | 1610612763 | 201145 | 9.42341 | 11.88861 | 0.00000 | 0 | 132.22 | 7.09 |
8 | 1610612763 | 201584 | 4.21900 | 16.40298 | 0.00000 | 0 | 132.22 | 7.09 |
9 | 1610612763 | 201144 | 16.38252 | 44.27504 | 0.00000 | 0 | 132.22 | 7.09 |
10 | 1610612763 | 201188 | 19.68304 | 35.52727 | 0.00000 | 0 | 132.22 | 7.09 |
11 | -1 | -1 | 14.33244 | 44.58121 | 2.58479 | 1 | 132.18 | 7.06 |
12 | 1610612744 | 2738 | 18.88402 | 24.07005 | 0.00000 | 1 | 132.18 | 7.06 |
13 | 1610612744 | 201939 | 18.58140 | 41.39098 | 0.00000 | 1 | 132.18 | 7.06 |
14 | 1610612744 | 202691 | 5.04115 | 28.73857 | 0.00000 | 1 | 132.18 | 7.06 |
def players_to_df(players):
players_reform=[[rec[arg] for arg in players["players"][0].keys()] for rec in players["players"]]
df=pd.DataFrame(players_reform,columns=players["players"][0].keys())
df['team']=players["abbreviation"]
return df
players_df = pd.concat([players_to_df(home),players_to_df(visitor)])
players_df
playerid | lastname | jersey | firstname | position | team | |
---|---|---|---|---|---|---|
0 | 2571 | Barbosa | 19 | Leandro | G | GSW |
1 | 2733 | Livingston | 34 | Shaun | G | GSW |
2 | 2738 | Iguodala | 9 | Andre | G-F | GSW |
3 | 201574 | Thompson | 1 | Jason | F-C | GSW |
4 | 201575 | Rush | 4 | Brandon | G-F | GSW |
5 | 201578 | Speights | 5 | Marreese | F-C | GSW |
6 | 201939 | Curry | 30 | Stephen | G | GSW |
7 | 202691 | Thompson | 11 | Klay | G | GSW |
8 | 203084 | Barnes | 40 | Harrison | F | GSW |
9 | 203105 | Ezeli | 31 | Festus | C | GSW |
10 | 203110 | Green | 23 | Draymond | F | GSW |
11 | 203546 | Clark | 21 | Ian | G | GSW |
12 | 203949 | McAdoo | 20 | James Michael | F | GSW |
0 | 1713 | Carter | 15 | Vince | G-F | MEM |
1 | 2216 | Randolph | 50 | Zach | F | MEM |
2 | 2440 | Barnes | 22 | Matt | F | MEM |
3 | 2754 | Allen | 9 | Tony | F-G | MEM |
4 | 2757 | Udrih | 19 | Beno | G | MEM |
5 | 201144 | Conley | 11 | Mike | G | MEM |
6 | 201145 | Green | 32 | Jeff | F | MEM |
7 | 201148 | Wright | 34 | Brandan | F | MEM |
8 | 201188 | Gasol | 33 | Marc | C | MEM |
9 | 201584 | Lee | 5 | Courtney | G | MEM |
10 | 203210 | Green | 0 | JaMychal | F | MEM |
11 | 203893 | Smith | 2 | Russ | G | MEM |
12 | 203919 | Adams | 3 | Jordan | G | MEM |
pm_merge_df = pm_df.merge(players_df,how='left',left_on='playerID',right_on='playerid')
pm_merge_df.head(11)
teamID | playerID | xloc | yloc | zloc | moment | gametime | shottime | playerid | lastname | jersey | firstname | position | team | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | -1 | -1 | 14.64474 | 44.48211 | 2.25707 | 0 | 132.22 | 7.09 | NaN | NaN | NaN | NaN | NaN | NaN |
1 | 1610612744 | 2738 | 18.86386 | 23.97772 | 0.00000 | 0 | 132.22 | 7.09 | 2738 | Iguodala | 9 | Andre | G-F | GSW |
2 | 1610612744 | 201939 | 18.87993 | 41.50729 | 0.00000 | 0 | 132.22 | 7.09 | 201939 | Curry | 30 | Stephen | G | GSW |
3 | 1610612744 | 202691 | 5.05095 | 29.13419 | 0.00000 | 0 | 132.22 | 7.09 | 202691 | Thompson | 11 | Klay | G | GSW |
4 | 1610612744 | 203110 | 12.88533 | 34.84216 | 0.00000 | 0 | 132.22 | 7.09 | 203110 | Green | 23 | Draymond | F | GSW |
5 | 1610612744 | 203949 | 8.88615 | 20.38291 | 0.00000 | 0 | 132.22 | 7.09 | 203949 | McAdoo | 20 | James Michael | F | GSW |
6 | 1610612763 | 2440 | 27.47547 | 8.76582 | 0.00000 | 0 | 132.22 | 7.09 | 2440 | Barnes | 22 | Matt | F | MEM |
7 | 1610612763 | 201145 | 9.42341 | 11.88861 | 0.00000 | 0 | 132.22 | 7.09 | 201145 | Green | 32 | Jeff | F | MEM |
8 | 1610612763 | 201584 | 4.21900 | 16.40298 | 0.00000 | 0 | 132.22 | 7.09 | 201584 | Lee | 5 | Courtney | G | MEM |
9 | 1610612763 | 201144 | 16.38252 | 44.27504 | 0.00000 | 0 | 132.22 | 7.09 | 201144 | Conley | 11 | Mike | G | MEM |
10 | 1610612763 | 201188 | 19.68304 | 35.52727 | 0.00000 | 0 | 132.22 | 7.09 | 201188 | Gasol | 33 | Marc | C | MEM |
from matplotlib.patches import Rectangle, Circle, Arc
from matplotlib.collections import PatchCollection
import matplotlib as mpl
import copy
clr='red'
def left_court_patches(clr='red'):
# creates the patches for the left hand side of an nba court
# returns: list of patches
rim = Circle((5.25,-25),radius=.75, color=clr, zorder=0, lw=.5, fill=False)
bkbrd = Rectangle((4,-28),width=0, height=6, color=clr, zorder=0,fill=False,lw=1)
ra_arc = Arc((5.25,-25),8,8,theta1=270,theta2=90,color=clr,zorder=9,fill=False,lw=1)
ra_t = Rectangle((4,-21),width=1.25,height=0,color=clr, zorder=0,fill=False,lw=1)
ra_b = Rectangle((4,-29),width=1.25,height=0,color=clr, zorder=0,fill=False,lw=1)
ft = Rectangle((0,-33),width=19,height=16,color=clr, zorder=0,fill=False,lw=1) #*16ft. outside
lane = Rectangle((0,-31),width=19,height=12,color=clr,zorder=0,fill=False,lw=1)#*12ft. outside
ft_ia = Arc((19,-25),12,12,theta1=90,theta2=270,color=clr,zorder=9,fill=False,lw=1,linestyle='--')
ft_oa = Arc((19,-25),12,12,theta1=270,theta2=90,color=clr,zorder=9,fill=False,lw=1)
three_t = Rectangle((0,-3),width=14,height=0,color=clr,zorder=0,fill=False,lw=1)
three_b = Rectangle((0,-47),width=14,height=0,color=clr,zorder=0,fill=False,lw=1)
three_arc = Arc((5.25,-25),23.75*2,23.75*2,theta1=292,theta2=68,color=clr,zorder=0,fill=False,lw=1)
return [rim,bkbrd,ra_arc,ra_t,ra_b,ft,lane,ft_ia,ft_oa,three_t,three_b,three_arc]
def draw_full_court(ax=None,clr='red'):
# Draws full court onto axis
# returns: axis
if ax is None:
ax = plt.gca()
#full court components
fullcourt = Rectangle((0,-50), width=94, height=50, color=clr, zorder=0,fill=False,lw=1) #base and side lines
midcourt = Rectangle((47,-50), width=0, height=50, color = clr, zorder=0,fill=False,lw=1) #half-court line
rest_circ = Circle((47,-25),radius=2,color=clr,zorder=0,lw=1,fill=False) #restraining circle
cent_circ = Circle((47,-25),radius=6,color=clr,zorder=0,lw=1,fill=False) #center circle
patch_list = [fullcourt,midcourt,rest_circ,cent_circ]
for patch in patch_list:
ax.add_patch(patch)
#left-hand side
left_patches = left_court_patches(clr=clr)
for patch in left_patches:
ax.add_patch(patch)
#right-hand side transform patches by rotating pi radians around center point
right_patches = left_court_patches(clr=clr)
tr = mpl.transforms.Affine2D().rotate_around(47,-25,np.pi) + ax.transData
for patch in right_patches:
patch.set_transform(tr)
ax.add_patch(patch)
return ax
plt.style.use('seaborn-white')
fig,ax = plt.subplots(figsize=(9.4,5.0))
draw_full_court()
plt.xlim(0,94)
plt.ylim(-50,0)
plt.show()
def plot_moment(df,moment=0,tc=['cyan','yellow'],ax=None):
if df is None:
raise ValueError("Input 'df' to plot_moment is None")
if ax is None:
ax=plt.gca()
binary_mask = (df['moment']==moment)
moment_df = df[binary_mask]
teams = moment_df.team.dropna().unique().tolist()
for t in teams:
team_mask = (moment_df['team']==t)
plt.scatter(x=moment_df[team_mask]['xloc'],y=-moment_df[team_mask]['yloc'],
c=tc[teams.index(t)], s=200, zorder=1, label=t, edgecolor=tc[teams.index(t)])
for j,x,y in zip(moment_df[team_mask]['jersey'], moment_df[team_mask]['xloc'], moment_df[team_mask]['yloc']):
plt.annotate(j,xy=(x,-y),xytext=(0,0),textcoords='offset points',
zorder=2, ha='center',va='center',snap=False)
#TODO(maybe): just plot annotations with circular bboxes, no scatter
ball_mask=(moment_df['playerID']==-1)
plt.scatter(x=moment_df[ball_mask]['xloc'],y=-moment_df[ball_mask]['yloc'],
s=100,zorder=3,c='orange',edgecolor='orange',label='ball')
plt.style.use('seaborn-white')
fig,ax = plt.subplots(figsize=(9.4,5.0))
draw_full_court(ax=ax,clr='gray')
plot_moment(pm_merge_df)
plt.legend(loc='lower right')
plt.xlim(0,94)
plt.ylim(-50,0)
plt.title('Player and Ball position at t=0',fontsize='x-large')
plt.savefig('img/Player_ball_positions.png',dpi=300)
plt.show()