import numpy as np
import matplotlib.pyplot as plt
%pylab inline
from scipy import stats
import matplotlib.mlab as mlab
import bisect

colours=['y','c']

#actual variances of winning percentage
avNHL=0.0997**2
avNBA=0.1449**2
avNFL=0.1899**2
avMLB=0.0717**2

#random variace (variance due to luck) of winning percentage 
rvNHL=.5*.5/82
rvNBA=.5*.5/82
rvNFL=.5*.5/16
rvMLB=.5*.5/162

#true variance (variance due to skill) of winning percentage
tvNHL=avNHL-rvNHL
tvNBA=avNBA-rvNBA
tvNFL=avNFL-rvNFL
tvMLB=avMLB-rvMLB

avs={'NHL':avNHL, 'NBA':avNBA, 'NFL':avNFL, 'MLB':avMLB}
rvs={'NHL':rvNHL, 'NBA':rvNBA, 'NFL':rvNFL, 'MLB':rvMLB}
tvs={'NHL':tvNHL, 'NBA':tvNBA, 'NFL':tvNFL, 'MLB':tvMLB}

leagues=['NHL','NBA','NFL','MLB']

#function that calculates the chance that the team that won the most games is the more skilled team

def calcchance(t1rank,t2rank,percentage,numgames,gp,league):
    
    luckperc=(.5*.5/gp)/(tvs[league]+.5*.5/gp)
    skillperc=tvs[league]/(tvs[league]+.5*.5/gp)
    
    #find skill points for each team from the skill distribution
    pvals=np.arange(1,numteams[league]+1)
    pvals=pvals*(1.0/float(numteams[league]+1))
    zscores=stats.norm.isf(pvals)
    skillscores=.5+zscores/(2*max(zscores))
    t1skillpoints=skillscores[t1rank-1]
    t2skillpoints=skillscores[t2rank-1]

    #find difference of skill points weighted by skill%
    skilldiff=t1skillpoints-t2skillpoints    
    efskilldiff=skilldiff*skillperc

    #find the required z-score in the difference distribution
    z_score2=(efskilldiff-0)/(np.sqrt(2*(.14**2))*luckperc)
    
    #find the probability of no upset corresponding to that z-score
    p_val =1-stats.norm.sf(z_score2)

    #create a plot only for the purpose of extracting x/y pairs
    datapoints=plt.plot(gp,p_val)
    xvalues = datapoints[0].get_xdata()
    yvalues = datapoints[0].get_ydata()
   
    #find number of games required for given win probability
    for i in range(len(yvalues)):
        if yvalues[i] >= percentage:
            saveindex=i
            break
    
    #reuturn: [probabilities for all games in gp, probability at given games played, games played at given probability]
    results=[p_val,yvalues[numgames-1],xvalues[saveindex]]
    plt.close()
    return results 

#call function defined above and plot the results for every league

#function parameters (these are the first 5. The 6th, league, is varied in for loop)
t1rank=1
t2rank=15
percentage=.8
numgames=5
gp=np.arange(1,101)

numteams={'MLB':30, 'NHL':30, 'NBA':30, 'NFL':32}

allchances=[] #for the combined plot

for league in leagues:
    
    #call the function
    resultarray=calcchance(t1rank,t2rank,percentage,numgames,gp,league)
    
    #plot results for each league
    plt.title(league)
    plt.xlabel("# of games")
    plt.ylabel('chance seed %d wins more games than seed %d' %(t1rank,t2rank))
    plt.legend()
    datapoints=plt.plot(gp,resultarray[0])
    plt.show()
    
    allchances.append(resultarray[0].tolist()) #for the combined plot
    
    print percentage*100,'% chance at', resultarray[2], 'games' 
    print resultarray[1]*100, '% chance at', numgames, 'games'

#combined plot
for i in range(len(allchances)):
    plt.plot(gp,allchances[i],label=leagues[i])  
    plt.xlabel("# of games")
    plt.ylabel('chance seed %d wins more games than seed %d' %(t1rank,t2rank))
    plt.legend()
plt.show()    

#playoffs

#playoff parameters
t1rank=1
t2rankNHL=16
t2rankNBA=16
t2rankNFL=8
t2rankMLB=8
numgamesNHL=7
numgamesNBA=7
numgamesMLB=5
numgamesNFL=1

percentage=.9
gp=np.arange(1,100)

rlplayoffchance=[]
rlplayoffgames=[]

#NHL
resultNHL=calcchance(t1rank,t2rankNHL,percentage,numgamesNHL,gp,'NHL')
rlplayoffchance.append(resultNHL[1])
rlplayoffgames.append(resultNHL[2])
#NBA
resultNBA=calcchance(t1rank,t2rankNBA,percentage,numgamesNBA,gp,'NBA')
rlplayoffchance.append(resultNBA[1])
rlplayoffgames.append(resultNBA[2])
#NFL
resultNFL=calcchance(t1rank,t2rankNFL,percentage,numgamesNFL,gp,'NFL')
rlplayoffchance.append(resultNFL[1])
rlplayoffgames.append(resultNFL[2])
#MLB
resultMLB=calcchance(t1rank,t2rankMLB,percentage,numgamesMLB,gp,'MLB')
rlplayoffchance.append(resultMLB[1])
rlplayoffgames.append(resultMLB[2])

print "bar graph values from left to right"
print rlplayoffchance

#plot chances
N = len( rlplayoffchance )
x = np.arange(1, N+1)
y = rlplayoffchance
labels = leagues
width = .75
bar1 = plt.bar( x, y, width, color='CornflowerBlue')
plt.ylabel( 'chances top seed wins their first playoff series' )
plt.xticks(x + width/2.0, labels )
plt.ylim(0,1)
plt.show()

print "bar graph values from left to right"
print rlplayoffgames

#plot games needed
N = len( rlplayoffgames )
x = np.arange(1, N+1)
y = rlplayoffgames
labels = leagues
width = .75
bar1 = plt.bar( x, y, width, color='CornflowerBlue')
plt.ylabel( 'games needed for .%d chance top seed wins first playoff series' %(percentage*100))
plt.xticks(x + width/2.0, labels )
plt.ylim(0,60)
plt.show()