import pandas as pd
import matplotlib.pyplot as plt
df = pd.read_csv('/Users/danielforsyth/Desktop/salaries.csv')
df.head()
Rk | Player | Tm | Cap Hit | 2013-14 | 2014-15 | 2015-16 | 2016-17 | 2017-18 | Signed Using | Guaranteed | |
---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1 | Kobe Bryant | LAL | 30453805 | 30453805 | 23500000 | 25000000 | NaN | NaN | Bird Rights | 78953805 |
1 | 2 | Dirk Nowitzki | DAL | 22721381 | 22721381 | NaN | NaN | NaN | NaN | Bird Rights | 22721381 |
2 | 3 | Amar'e Stoudemire | NYK | 21679893 | 21679893 | 23410988 | NaN | NaN | NaN | Bird Rights | 45090881 |
3 | 4 | Joe Johnson | BRK | 21466718 | 21466718 | 23180790 | 24894863 | NaN | NaN | Bird Rights | 69542371 |
4 | 5 | Carmelo Anthony | NYK | 21388953 | 21388953 | 23333405 | NaN | NaN | NaN | Bird Rights | 44722359 |
5 rows × 11 columns
df = df[['Player','Cap Hit']]
df.head()
Player | Cap Hit | |
---|---|---|
0 | Kobe Bryant | 30453805 |
1 | Dirk Nowitzki | 22721381 |
2 | Amar'e Stoudemire | 21679893 |
3 | Joe Johnson | 21466718 |
4 | Carmelo Anthony | 21388953 |
5 rows × 2 columns
per = pd.read_csv('/Users/danielforsyth/Desktop/per.csv')
per.head()
Rk | Player | Pos | Age | Tm | G | MP | PER | TS% | eFG% | FTr | 3PAr | ORB% | DRB% | TRB% | AST% | STL% | BLK% | TOV% | USG% | ||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1 | DeAndre Liggins | SG | 25 | MIA | 1 | 1 | 128.3 | 1.000 | 1.000 | 0.000 | 0.000 | 100.0 | 0.0 | 62.5 | 0.0 | 0.0 | 0.0 | 0.0 | 47.5 | ... |
1 | 2 | Tony Mitchell | SF | 24 | MIL | 3 | 10 | 31.0 | 0.600 | 0.600 | 0.000 | 0.200 | 11.0 | 0.0 | 5.7 | 22.7 | 5.2 | 0.0 | 0.0 | 22.6 | ... |
2 | 3 | Kevin Durant | SF | 25 | OKC | 69 | 2654 | 30.4 | 0.639 | 0.564 | 0.488 | 0.285 | 2.2 | 19.4 | 11.2 | 27.2 | 1.7 | 1.6 | 12.4 | 32.9 | ... |
3 | 4 | LeBron James | PF | 29 | MIA | 66 | 2489 | 29.1 | 0.649 | 0.612 | 0.420 | 0.221 | 3.6 | 18.6 | 11.4 | 31.7 | 2.3 | 0.8 | 14.4 | 30.4 | ... |
4 | 5 | Kevin Love | PF | 25 | MIN | 66 | 2408 | 27.9 | 0.594 | 0.528 | 0.449 | 0.350 | 8.6 | 29.8 | 18.8 | 20.9 | 1.1 | 1.0 | 9.4 | 28.7 | ... |
5 rows × 26 columns
per['MPG'] = per['MP'] / per['G']
per = per[['Player','PER','MPG']]
per.head()
Player | PER | MPG | |
---|---|---|---|
2 | Kevin Durant | 30.4 | 38.463768 |
3 | LeBron James | 29.1 | 37.712121 |
4 | Kevin Love | 27.9 | 36.484848 |
5 | Anthony Davis | 27.2 | 36.245902 |
6 | Chris Paul | 26.1 | 34.826923 |
5 rows × 3 columns
final = pd.merge(df, per, on='Player', how='outer')
final.head()
Rk | Player | Cap Hit | PER | MPG | |
---|---|---|---|---|---|
0 | 1 | Kobe Bryant | 30453805 | 10.9 | 29.500000 |
1 | 2 | Dirk Nowitzki | 22721381 | 23.2 | 32.318841 |
2 | 3 | Amar'e Stoudemire | 21679893 | 18.6 | 21.296296 |
3 | 4 | Joe Johnson | 21466718 | 15.1 | 32.895522 |
4 | 5 | Carmelo Anthony | 21388953 | 24.9 | 38.820896 |
5 rows × 5 columns
final = final[final.MPG >6.09]
final.dropna()
final.head()
Player | Cap Hit | PER | MPG | |
---|---|---|---|---|
0 | Kobe Bryant | 30453805 | 10.9 | 29.500000 |
1 | Dirk Nowitzki | 22721381 | 23.2 | 32.318841 |
2 | Amar'e Stoudemire | 21679893 | 18.6 | 21.296296 |
3 | Joe Johnson | 21466718 | 15.1 | 32.895522 |
4 | Carmelo Anthony | 21388953 | 24.9 | 38.820896 |
5 rows × 4 columns
pd.options.display.mpl_style = 'default'
from matplotlib import rcParams
rcParams['figure.figsize'] = (10, 6)
rcParams['figure.dpi'] = 150
plt.scatter(final['PER'],final['Cap Hit'])
<matplotlib.collections.PathCollection at 0x112302450>
`smaller_frame=final[['Cap Hit', 'PER', 'MPG']]
from pandas.tools.plotting import scatter_matrix
axeslist=scatter_matrix(smaller_frame, alpha=0.8, figsize=(12, 12), diagonal="kde")
for ax in axeslist.flatten():
ax.grid(False)
final.corr()
Cap Hit | PER | MPG | |
---|---|---|---|
Cap Hit | 1.000000 | 0.502899 | 0.574893 |
PER | 0.502899 | 1.000000 | 0.654308 |
MPG | 0.574893 | 0.654308 | 1.000000 |
3 rows × 3 columns
final.dtypes
Player object Cap Hit float64 PER float64 MPG float64 dtype: object
final = final.dropna()
cap = final['Cap Hit'][:, np.newaxis]
per = final['PER'][:, np.newaxis]
from sklearn.cross_validation import train_test_split
X_train, X_test, y_train, y_test = train_test_split(per, cap)
clf1 = LinearRegression()
clf1.fit(X_train, y_train)
predicted_train = clf1.predict(X_train)
predicted_test = clf1.predict(X_test)
trains=X_train.reshape(1,-1).flatten()
tests=X_test.reshape(1,-1).flatten()
print clf1.coef_, clf1.intercept_
[[ 524954.09866003]] [-2557366.02426694]
plt.scatter(per,cap,c='r')
plt.plot(trains, predicted_train, c='b', alpha=0.5)
[<matplotlib.lines.Line2D at 0x111a9cf90>]
lr = LinearRegression()
lr.fit(per,cap)
b_0 = lr.intercept_
coeff = lr.coef_
pred = lr.predict(33.68)
pred
array([[ 14560859.24396323]])
ncaa = pd.read_csv('/Users/danielforsyth/Desktop/ncaa.csv')
ncaa = ncaa[ncaa.MPG > 6.09]
ncaa.head()
RK | PLAYER | GP | MPG | AST | TO | USG | ORR | DRR | REBR | PER | |
---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1 | Doug McDermott, CREI | 33 | 33.6 | 6.6 | 7.7 | 27.8 | 5.7 | 18.5 | 12.1 | 33.68 |
1 | 2 | Alan Williams, UCSB | 26 | 31.7 | 5.4 | 10.2 | 27.1 | 13.2 | 28.4 | 20.8 | 33.37 |
2 | 3 | De'Mon Brooks, DAV | 26 | 26.8 | 7.8 | 12.9 | 24.2 | 10.4 | 22.6 | 16.5 | 32.13 |
3 | 4 | T.J. Warren, NCST | 33 | 35.3 | 4.5 | 9.6 | 28.0 | 10.9 | 13.1 | 12.0 | 32.06 |
4 | 5 | Javonte Green, RAD | 30 | 26.1 | 6.1 | 10.8 | 23.0 | 9.7 | 27.7 | 18.7 | 30.61 |
5 rows × 11 columns
ncaa = ncaa[['PLAYER','PER']]
ncaa.head()
PLAYER | PER | |
---|---|---|
0 | Doug McDermott, CREI | 33.68 |
1 | Alan Williams, UCSB | 33.37 |
2 | De'Mon Brooks, DAV | 32.13 |
3 | T.J. Warren, NCST | 32.06 |
4 | Javonte Green, RAD | 30.61 |
5 rows × 2 columns
ncaa_per = ncaa['PER'][:, np.newaxis]
ncaa_player = ncaa['PLAYER'][:, np.newaxis]
predictions = []
for i in ncaa_per:
pred = lr.predict(i)
pred = float(pred)
pred = format(pred, '.2f')
predictions.append(pred)
ncaa['Predicted Salary'] = predictions
ncaa.head(15)
PLAYER | PER | Predicted Salary | |
---|---|---|---|
0 | Doug McDermott, CREI | 33.68 | 14560859.24 |
1 | Alan Williams, UCSB | 33.37 | 14406288.32 |
2 | De'Mon Brooks, DAV | 32.13 | 13788004.62 |
3 | T.J. Warren, NCST | 32.06 | 13753101.51 |
4 | Javonte Green, RAD | 30.61 | 13030108.47 |
5 | John Brown, HP | 30.56 | 13005177.68 |
6 | Javon McCrea, BUFF | 30.30 | 12875537.55 |
7 | Frank Kaminsky, WIS | 29.73 | 12591326.49 |
8 | Cameron Bairstow, UNM | 29.68 | 12566395.70 |
9 | Jabari Parker, DUKE | 29.46 | 12456700.20 |
10 | Troy Huff, UND | 29.44 | 12446727.89 |
11 | Billy Baron, CAN | 29.38 | 12416810.93 |
12 | Justin Sears, YALE | 28.98 | 12217364.58 |
13 | Jordan Parks, NCCU | 28.90 | 12177475.31 |
14 | Brad Waldow, SMC | 28.83 | 12142572.19 |
15 rows × 3 columns