import pandas as pd
from mpltools import style
from __future__ import division
style.use('ggplot')
NBA statistics for the 13/14 season (source)
!head player_stats team_stats
==> player_stats <== Player Team PS GP Min FGM FGA 3M 3A FTM FTA OR TR AS ST TO BK PF DQ PTS TC EJ FF Sta +/- acy,quincy sac SF 63 852 66 141 4 15 35 53 71 215 28 23 30 26 122 1 171 5 0 0 0 -134 adams,steven okl C 81 1200 93 185 0 0 79 136 142 332 44 40 71 57 203 3 265 1 0 0 20 57 adrien,jeff mil SF 53 963 143 275 0 0 76 119 102 306 38 24 39 36 108 0 362 2 0 0 12 -72 afflalo,arron orl SG 73 2550 464 1011 128 300 274 336 32 262 248 35 146 3 136 0 1330 4 0 0 73 -365 ajinca,alexis nor C 56 952 136 250 0 1 56 67 94 277 40 23 63 46 187 3 328 0 0 0 30 -99 aldrich,cole nyk C 46 336 33 61 0 0 26 30 37 129 14 8 18 30 40 0 92 0 0 0 2 -15 aldridge,lamarcu por PF 69 2496 652 1423 3 15 296 360 166 766 178 64 122 69 147 1 1603 2 0 0 69 367 allen,lavoy ind PF 65 1068 134 299 2 13 33 50 119 311 71 24 45 33 126 1 303 0 0 0 2 -209 allen,ray mia SG 73 1937 240 543 116 309 105 116 23 205 143 54 83 8 115 0 701 0 0 0 9 162 ==> team_stats <== team won lost min fgm fga 3m 3a ftm fta or tr as st to bk pf pts tc ej ff AtlantaHawks 38 44 19857 3061 6688 768 2116 1392 1782 713 3278 2041 679 1189 326 1577 8282 10 0 0 BostonCeltics 25 57 19730 2996 6881 575 1730 1325 1706 980 3485 1726 584 1183 344 1743 7892 16 0 0 CharlotteBobcats 43 39 19915 2976 6730 516 1471 1474 2000 776 3500 1778 499 954 421 1493 7942 16 0 0 ChicagoBulls 48 34 19957 2843 6577 508 1459 1486 1908 937 3621 1860 594 1148 423 1565 7680 38 0 0 ClevelandCavaliers 33 49 19947 3036 6954 584 1638 1398 1861 988 3617 1739 579 1105 304 1640 8054 13 0 0 DallasMavericks 49 33 19855 3249 6858 721 1877 1378 1733 840 3354 1935 704 1082 356 1636 8597 22 0 0 DenverNuggets 36 46 19766 3147 7041 702 1959 1563 2154 1008 3725 1839 615 1260 459 1890 8559 29 0 0 DetroitPistons 29 53 19780 3182 7124 507 1580 1415 2111 1196 3721 1714 687 1143 395 1666 8286 48 0 0 GSWarriors 51 31 19837 3236 7002 774 2037 1303 1731 895 3713 1913 642 1226 406 1784 8549 30 0 0
Read statistics into a DataFrame and remove columns we are not interested in
stats = pd.read_table('player_stats', sep=' +')
stats = stats[stats.Team != 'na']
del stats['PS'], stats['PF'], stats['DQ'], stats['TC'], stats['EJ'], stats['FF']
stats.head()
Player | Team | GP | Min | FGM | FGA | 3M | 3A | FTM | FTA | OR | TR | AS | ST | TO | BK | PTS | Sta | +/- | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | acy,quincy | sac | 63 | 852 | 66 | 141 | 4 | 15 | 35 | 53 | 71 | 215 | 28 | 23 | 30 | 26 | 171 | 0 | -134 |
1 | adams,steven | okl | 81 | 1200 | 93 | 185 | 0 | 0 | 79 | 136 | 142 | 332 | 44 | 40 | 71 | 57 | 265 | 20 | 57 |
2 | adrien,jeff | mil | 53 | 963 | 143 | 275 | 0 | 0 | 76 | 119 | 102 | 306 | 38 | 24 | 39 | 36 | 362 | 12 | -72 |
3 | afflalo,arron | orl | 73 | 2550 | 464 | 1011 | 128 | 300 | 274 | 336 | 32 | 262 | 248 | 35 | 146 | 3 | 1330 | 73 | -365 |
4 | ajinca,alexis | nor | 56 | 952 | 136 | 250 | 0 | 1 | 56 | 67 | 94 | 277 | 40 | 23 | 63 | 46 | 328 | 30 | -99 |
5 rows × 19 columns
team_stats = pd.read_table('team_stats', sep=' +', usecols=['team', 'won', 'lost'])
team_stats.head()
team | won | lost | |
---|---|---|---|
0 | AtlantaHawks | 38 | 44 |
1 | BostonCeltics | 25 | 57 |
2 | CharlotteBobcats | 43 | 39 |
3 | ChicagoBulls | 48 | 34 |
4 | ClevelandCavaliers | 33 | 49 |
5 rows × 3 columns
Merge Team Win Percentage
team_stats['Win%'] = team_stats.won / 82 * 100
team_stats.sort('team', inplace=True)
team_stats['Team_short'] = sorted(stats.Team.unique())
stats = stats.merge(team_stats[['Team_short', 'Win%']], right_on='Team_short', left_on='Team')
stats.Team = stats.Team.apply(str.upper)
teams = stats.Team.unique()
del stats['Team_short']
Calculate Per Game Statisics
stats['MPG'] = stats.Min / stats.GP
stats['PPG'] = stats.PTS / stats.GP
stats['RPG'] = stats.TR / stats.GP
stats['APG'] = stats.AS / stats.GP
stats['SPG'] = stats.ST / stats.GP
stats['BPG'] = stats.BK / stats.GP
stats['TPG'] = stats.TO / stats.GP
stats['FGMPG'] = stats.FGM / stats.GP
stats['FTMPG'] = stats.FTM / stats.GP
stats['StaPer'] = stats.Sta / stats.GP
stats['FGP'] = stats.FGM / stats.FGA * 100
stats['FTP'] = stats.FTM / stats.FTA * 100
stats['3PP'] = stats['3M'] / stats['3A'] * 100
stats['COMB'] = stats.PPG + stats.RPG + stats.APG + stats.SPG + stats.BPG - stats.TPG
stats['Player'] = stats.Player.apply(lambda x: ' '.join(map(str.title, x.split(',')[::-1])))
stats.reset_index(drop=True, inplace=True)
MVP should be in top half for GP and MPG
plt.scatter(stats.GP, stats.MPG)
plt.ylim([0, stats.MPG.max() + 2])
plt.xlim([0, stats.GP.max() + 2])
plt.tight_layout(rect=[0, 0, 2.4, 1.0])
gca().add_patch(Rectangle((stats.GP.median(), stats.MPG.median()),
stats.GP.max() - stats.GP.median() + 1,
stats.MPG.max() - stats.MPG.median() + 1, alpha=0.3))
plt.xlabel('Games Played')
plt.ylabel('Minutes Per Game')
plt.show()
stats = stats[(stats.GP > stats.GP.median()) & (stats.MPG > stats.MPG.median())]
MVP's are starters
plt.scatter(stats.GP, stats.StaPer)
plt.ylim([stats.StaPer.min() - 0.05, stats.StaPer.max() + 0.05])
plt.xlim([stats.GP.min() - 1, stats.GP.max() + 1])
plt.hlines(0.49, stats.GP.min() - 1, stats.GP.max() + 1, linestyle=':', lw=2, color='red')
plt.xlabel('Games Played')
plt.ylabel('Percentage Of Games Started')
plt.tight_layout(rect=[0, 0, 2.4, 1.0])
plt.show()
stats = stats[stats.StaPer >= 0.5]
MVP's have postive +/-
stats['+/-'].hist(bins=30)
plt.xlim(stats['+/-'].min(), stats['+/-'].max())
plt.vlines(0, 0, plt.ylim()[1], linestyle='--', lw=3, color='steelblue')
plt.xlabel('Player +/-')
plt.ylabel('Number of Players')
plt.tight_layout(rect=[0, 0, 2.4, 1.0])
plt.show()
stats = stats[stats['+/-'] > 0]
MVP's are winners
stats['Win%'].hist(bins=range(40,80,5))
plt.xlim(stats['Win%'].min(), stats['Win%'].max())
plt.vlines(50, 0, plt.ylim()[1], linestyle='--', lw=3, color='steelblue')
plt.xlabel('Win Percentage')
plt.ylabel('Number of Players')
plt.tight_layout(rect=[0, 0, 2.4, 1.0])
plt.show()
stats = stats[stats['Win%'] > 50.0]
Number Of Candidates Per Team
stats.groupby('Team').size().plot(kind='bar')
plt.tight_layout(rect=[0, 0, 2.4, 1.0])
plt.xlabel('')
plt.ylabel('Number of Candidates')
plt.tick_params(axis='x', labelsize=14)
Teams that didn't cut it
print ' '.join([team for team in teams if team not in stats.Team.unique()])
SAC MIL ORL NOR NYK PHI BOS ATL DEN MIN LAL CLE UTA DET
Top MVP Candidate By Team
candidates = stats.groupby('Team', as_index=False).apply(lambda player: player[player.COMB==player.COMB.max()])
for candidate in candidates.iterrows():
print candidate[1].Team, candidate[1].Player
BRO Deron Williams CHA Al Jefferson CHI Joakim Noah DAL Dirk Nowitzki GSW Stephen Curry HOU James Harden IND Paul George LAC Blake Griffin MEM Zach Randolph MIA Lebron James OKL Kevin Durant PHO Goran Dragic POR Lamarcu Aldridge SAN Tim Duncan TOR Demar Derozan WAS John Wall
The Top 10
top10 = candidates.sort('COMB', ascending=False).head(10).reset_index(drop=True)
for candidate in top10.iterrows():
print candidate[0] + 1, candidate[1].Player
1 Kevin Durant 2 Lebron James 3 Lamarcu Aldridge 4 Blake Griffin 5 Al Jefferson 6 Stephen Curry 7 James Harden 8 Paul George 9 John Wall 10 Dirk Nowitzki
Stats Profile
top10['TPG'] = top10['TPG'].apply(lambda x: -abs(x))
top10[['TPG', 'BPG', 'SPG', 'APG', 'RPG', 'PPG']].head(5).plot(kind='bar')
plt.xticks([x + 0.65 for x in range(5)], top10.Player.head(5), rotation=45)
plt.ylim([int(5 * round(top10.TPG.min()/5)), int(5 * round(top10.PPG.max()/5)) + 5])
plt.tight_layout(rect=[0, 0, 2.4, 1.0])
top10[['TPG', 'BPG', 'SPG', 'APG', 'RPG', 'PPG']].tail(5).plot(kind='bar')
plt.xticks([x + 0.65 for x in range(5)], top10.Player.tail(5).reset_index(drop=True), rotation=45)
plt.ylim([int(5 * round(top10.TPG.min()/5)), int(5 * round(top10.PPG.max()/5)) + 5])
plt.tight_layout(rect=[0, 0, 2.4, 1.0])
Shooting Percentage
top10[['3PP', 'FGP', 'FTP']].plot(kind='bar')
plt.ylabel('Shooting Percentage')
plt.xticks([x + 0.5 for x in range(10)], top10.Player, rotation=45)
plt.tight_layout(rect=[0, 0, 2.4, 1.0])