import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
%matplotlib inline

dfShots = pd.read_csv('..\datasets\cl-shots-2012.csv', index_col=0, na_values='N/A')

dfShots.head()

dfShots.describe()

plt.scatter(dfShots.dist, dfShots.goal, alpha=0.01)

bins = np.linspace(dfShots.dist.min(), dfShots.dist.max(), 20)
groups = dfShots.groupby(np.digitize(dfShots.dist, bins))

chart = groups[['dist','goal']].mean()

plt.plot(chart.dist, chart.goal, 'bo-')
plt.ylim(0,1)
plt.ylabel('probability')
plt.xlabel('distance (m)')

dfShots[dfShots.dist > 40]

from IPython.display import YouTubeVideo
YouTubeVideo('HhJ84p9KLKY')

import statsmodels.api as sm

# adding a constant column to represent the intercept
dfShots['intercept']=1.0

# identify the independent and dependent variables 
ind_cols=['dist','intercept']
dep_cols=['goal']

#training the model
logit = sm.Logit(dfShots[dep_cols], dfShots[ind_cols])
result=logit.fit()

# get the fitted coefficients from the results
coeff = result.params
print coeff

def prob(dist,coeff):
  z = coeff[0]*dist + coeff[1]
  return 1/(1+np.exp(-1*z))

lf = pd.DataFrame(range(1,60), columns=["dist"])
lf['prob']=prob(lf['dist'], coeff)

lf.head()

plt.scatter(chart.dist, chart.goal, label="frequency")
plt.plot(lf['dist'],lf['prob'], label="regression")
plt.xlim(0, 60)
plt.xlabel("Distance (m)")
plt.ylim(0, 0.8)
plt.ylabel("Probability")
plt.legend()

x_size = 105.0
y_size = 68.0

#set up field    
fig = plt.figure()
fig.patch.set_facecolor('green')

axes = fig.add_subplot(1, 1, 1, axisbg='green')

axes.xaxis.set_visible(False)
axes.yaxis.set_visible(False)

plt.xlim([0,x_size])
plt.ylim([0,y_size])

#draw shots
for i, row in enumerate(dfShots.values):
    size = 1.0
    if row[4]:
        color = 'red'
        alpha = 0.4
    else:
        color = 'white'
        alpha = 0.1
       
    
    plt.arrow(row[9],row[10],row[1],row[2],fc=color, ec=color, head_width=size, head_length=size, alpha=alpha)
    
plt.show()