import pandas as pd from mpltools import style from __future__ import division style.use('ggplot') !head player_stats team_stats stats = pd.read_table('player_stats', sep=' +') stats = stats[stats.Team != 'na'] del stats['PS'], stats['PF'], stats['DQ'], stats['TC'], stats['EJ'], stats['FF'] stats.head() team_stats = pd.read_table('team_stats', sep=' +', usecols=['team', 'won', 'lost']) team_stats.head() team_stats['Win%'] = team_stats.won / 82 * 100 team_stats.sort('team', inplace=True) team_stats['Team_short'] = sorted(stats.Team.unique()) stats = stats.merge(team_stats[['Team_short', 'Win%']], right_on='Team_short', left_on='Team') stats.Team = stats.Team.apply(str.upper) teams = stats.Team.unique() del stats['Team_short'] stats['MPG'] = stats.Min / stats.GP stats['PPG'] = stats.PTS / stats.GP stats['RPG'] = stats.TR / stats.GP stats['APG'] = stats.AS / stats.GP stats['SPG'] = stats.ST / stats.GP stats['BPG'] = stats.BK / stats.GP stats['TPG'] = stats.TO / stats.GP stats['FGMPG'] = stats.FGM / stats.GP stats['FTMPG'] = stats.FTM / stats.GP stats['StaPer'] = stats.Sta / stats.GP stats['FGP'] = stats.FGM / stats.FGA * 100 stats['FTP'] = stats.FTM / stats.FTA * 100 stats['3PP'] = stats['3M'] / stats['3A'] * 100 stats['COMB'] = stats.PPG + stats.RPG + stats.APG + stats.SPG + stats.BPG - stats.TPG stats['Player'] = stats.Player.apply(lambda x: ' '.join(map(str.title, x.split(',')[::-1]))) stats.reset_index(drop=True, inplace=True) plt.scatter(stats.GP, stats.MPG) plt.ylim([0, stats.MPG.max() + 2]) plt.xlim([0, stats.GP.max() + 2]) plt.tight_layout(rect=[0, 0, 2.4, 1.0]) gca().add_patch(Rectangle((stats.GP.median(), stats.MPG.median()), stats.GP.max() - stats.GP.median() + 1, stats.MPG.max() - stats.MPG.median() + 1, alpha=0.3)) plt.xlabel('Games Played') plt.ylabel('Minutes Per Game') plt.show() stats = stats[(stats.GP > stats.GP.median()) & (stats.MPG > stats.MPG.median())] plt.scatter(stats.GP, stats.StaPer) plt.ylim([stats.StaPer.min() - 0.05, stats.StaPer.max() + 0.05]) plt.xlim([stats.GP.min() - 1, stats.GP.max() + 1]) plt.hlines(0.49, stats.GP.min() - 1, stats.GP.max() + 1, linestyle=':', lw=2, color='red') plt.xlabel('Games Played') plt.ylabel('Percentage Of Games Started') plt.tight_layout(rect=[0, 0, 2.4, 1.0]) plt.show() stats = stats[stats.StaPer >= 0.5] stats['+/-'].hist(bins=30) plt.xlim(stats['+/-'].min(), stats['+/-'].max()) plt.vlines(0, 0, plt.ylim()[1], linestyle='--', lw=3, color='steelblue') plt.xlabel('Player +/-') plt.ylabel('Number of Players') plt.tight_layout(rect=[0, 0, 2.4, 1.0]) plt.show() stats = stats[stats['+/-'] > 0] stats['Win%'].hist(bins=range(40,80,5)) plt.xlim(stats['Win%'].min(), stats['Win%'].max()) plt.vlines(50, 0, plt.ylim()[1], linestyle='--', lw=3, color='steelblue') plt.xlabel('Win Percentage') plt.ylabel('Number of Players') plt.tight_layout(rect=[0, 0, 2.4, 1.0]) plt.show() stats = stats[stats['Win%'] > 50.0] stats.groupby('Team').size().plot(kind='bar') plt.tight_layout(rect=[0, 0, 2.4, 1.0]) plt.xlabel('') plt.ylabel('Number of Candidates') plt.tick_params(axis='x', labelsize=14) print ' '.join([team for team in teams if team not in stats.Team.unique()]) candidates = stats.groupby('Team', as_index=False).apply(lambda player: player[player.COMB==player.COMB.max()]) for candidate in candidates.iterrows(): print candidate[1].Team, candidate[1].Player top10 = candidates.sort('COMB', ascending=False).head(10).reset_index(drop=True) for candidate in top10.iterrows(): print candidate[0] + 1, candidate[1].Player top10['TPG'] = top10['TPG'].apply(lambda x: -abs(x)) top10[['TPG', 'BPG', 'SPG', 'APG', 'RPG', 'PPG']].head(5).plot(kind='bar') plt.xticks([x + 0.65 for x in range(5)], top10.Player.head(5), rotation=45) plt.ylim([int(5 * round(top10.TPG.min()/5)), int(5 * round(top10.PPG.max()/5)) + 5]) plt.tight_layout(rect=[0, 0, 2.4, 1.0]) top10[['TPG', 'BPG', 'SPG', 'APG', 'RPG', 'PPG']].tail(5).plot(kind='bar') plt.xticks([x + 0.65 for x in range(5)], top10.Player.tail(5).reset_index(drop=True), rotation=45) plt.ylim([int(5 * round(top10.TPG.min()/5)), int(5 * round(top10.PPG.max()/5)) + 5]) plt.tight_layout(rect=[0, 0, 2.4, 1.0]) top10[['3PP', 'FGP', 'FTP']].plot(kind='bar') plt.ylabel('Shooting Percentage') plt.xticks([x + 0.5 for x in range(10)], top10.Player, rotation=45) plt.tight_layout(rect=[0, 0, 2.4, 1.0])