import numpy as np import matplotlib.pyplot as plt %pylab inline from scipy import stats import matplotlib.mlab as mlab import bisect colours=['y','c'] #actual variances of winning percentage avNHL=0.0997**2 avNBA=0.1449**2 avNFL=0.1899**2 avMLB=0.0717**2 #random variace (variance due to luck) of winning percentage rvNHL=.5*.5/82 rvNBA=.5*.5/82 rvNFL=.5*.5/16 rvMLB=.5*.5/162 #true variance (variance due to skill) of winning percentage tvNHL=avNHL-rvNHL tvNBA=avNBA-rvNBA tvNFL=avNFL-rvNFL tvMLB=avMLB-rvMLB avs={'NHL':avNHL, 'NBA':avNBA, 'NFL':avNFL, 'MLB':avMLB} rvs={'NHL':rvNHL, 'NBA':rvNBA, 'NFL':rvNFL, 'MLB':rvMLB} tvs={'NHL':tvNHL, 'NBA':tvNBA, 'NFL':tvNFL, 'MLB':tvMLB} leagues=['NHL','NBA','NFL','MLB'] #function that calculates the chance that the team that won the most games is the more skilled team def calcchance(t1rank,t2rank,percentage,numgames,gp,league): luckperc=(.5*.5/gp)/(tvs[league]+.5*.5/gp) skillperc=tvs[league]/(tvs[league]+.5*.5/gp) #find skill points for each team from the skill distribution pvals=np.arange(1,numteams[league]+1) pvals=pvals*(1.0/float(numteams[league]+1)) zscores=stats.norm.isf(pvals) skillscores=.5+zscores/(2*max(zscores)) t1skillpoints=skillscores[t1rank-1] t2skillpoints=skillscores[t2rank-1] #find difference of skill points weighted by skill% skilldiff=t1skillpoints-t2skillpoints efskilldiff=skilldiff*skillperc #find the required z-score in the difference distribution z_score2=(efskilldiff-0)/(np.sqrt(2*(.14**2))*luckperc) #find the probability of no upset corresponding to that z-score p_val =1-stats.norm.sf(z_score2) #create a plot only for the purpose of extracting x/y pairs datapoints=plt.plot(gp,p_val) xvalues = datapoints[0].get_xdata() yvalues = datapoints[0].get_ydata() #find number of games required for given win probability for i in range(len(yvalues)): if yvalues[i] >= percentage: saveindex=i break #reuturn: [probabilities for all games in gp, probability at given games played, games played at given probability] results=[p_val,yvalues[numgames-1],xvalues[saveindex]] plt.close() return results #call function defined above and plot the results for every league #function parameters (these are the first 5. The 6th, league, is varied in for loop) t1rank=1 t2rank=15 percentage=.8 numgames=5 gp=np.arange(1,101) numteams={'MLB':30, 'NHL':30, 'NBA':30, 'NFL':32} allchances=[] #for the combined plot for league in leagues: #call the function resultarray=calcchance(t1rank,t2rank,percentage,numgames,gp,league) #plot results for each league plt.title(league) plt.xlabel("# of games") plt.ylabel('chance seed %d wins more games than seed %d' %(t1rank,t2rank)) plt.legend() datapoints=plt.plot(gp,resultarray[0]) plt.show() allchances.append(resultarray[0].tolist()) #for the combined plot print percentage*100,'% chance at', resultarray[2], 'games' print resultarray[1]*100, '% chance at', numgames, 'games' #combined plot for i in range(len(allchances)): plt.plot(gp,allchances[i],label=leagues[i]) plt.xlabel("# of games") plt.ylabel('chance seed %d wins more games than seed %d' %(t1rank,t2rank)) plt.legend() plt.show() #playoffs #playoff parameters t1rank=1 t2rankNHL=16 t2rankNBA=16 t2rankNFL=8 t2rankMLB=8 numgamesNHL=7 numgamesNBA=7 numgamesMLB=5 numgamesNFL=1 percentage=.9 gp=np.arange(1,100) rlplayoffchance=[] rlplayoffgames=[] #NHL resultNHL=calcchance(t1rank,t2rankNHL,percentage,numgamesNHL,gp,'NHL') rlplayoffchance.append(resultNHL[1]) rlplayoffgames.append(resultNHL[2]) #NBA resultNBA=calcchance(t1rank,t2rankNBA,percentage,numgamesNBA,gp,'NBA') rlplayoffchance.append(resultNBA[1]) rlplayoffgames.append(resultNBA[2]) #NFL resultNFL=calcchance(t1rank,t2rankNFL,percentage,numgamesNFL,gp,'NFL') rlplayoffchance.append(resultNFL[1]) rlplayoffgames.append(resultNFL[2]) #MLB resultMLB=calcchance(t1rank,t2rankMLB,percentage,numgamesMLB,gp,'MLB') rlplayoffchance.append(resultMLB[1]) rlplayoffgames.append(resultMLB[2]) print "bar graph values from left to right" print rlplayoffchance #plot chances N = len( rlplayoffchance ) x = np.arange(1, N+1) y = rlplayoffchance labels = leagues width = .75 bar1 = plt.bar( x, y, width, color='CornflowerBlue') plt.ylabel( 'chances top seed wins their first playoff series' ) plt.xticks(x + width/2.0, labels ) plt.ylim(0,1) plt.show() print "bar graph values from left to right" print rlplayoffgames #plot games needed N = len( rlplayoffgames ) x = np.arange(1, N+1) y = rlplayoffgames labels = leagues width = .75 bar1 = plt.bar( x, y, width, color='CornflowerBlue') plt.ylabel( 'games needed for .%d chance top seed wins first playoff series' %(percentage*100)) plt.xticks(x + width/2.0, labels ) plt.ylim(0,60) plt.show()