$\lambda$ 에 대한 사전 분포를 고르기 위해 전 게임에 대한 통계를 확인
각 팀에 대한 $\lambda$ 추정을 위해 처음 네 게임의 점수를 사용
$\lambda$의 사후 분포를 통해 각 팀의 골의 분포, 골 차이의 분포, 각 팀이 다음에 이길 확률을 구함.
각 팀이 이번 시리즈에서 이길 확률을 구함.
"""This file contains code for use with "Think Bayes",
by Allen B. Downey, available from greenteapress.com
Copyright 2012 Allen B. Downey
License: GNU GPLv3 http://www.gnu.org/licenses/gpl.html
"""
import math
import columns
import thinkbayes
import thinkstats
import thinkplot
class Hockey(thinkbayes.Suite):
############################################################
#1. λ 값에 대한 가설의 스윗에 대한 정의
#2. 사전분포 : 평균 2.7, 표준편차 0.3인 가우시안 분포, 평균 상하로 4 시그마씩 범위를 갖음
############################################################
def __init__(self):
pmf = thinkbayes.MakeGaussianPmf(2.7, 0.85, 4)
thinkbayes.Suite.__init__(self, pmf)
def MakeGaussianPmf(mu, sigma, num_sigmas, n = 101):
pmf = Pmf()
low = mu - num_sigmas*sigma
high = mu + num_sigmas*sigma
############################################################
#1. low이상 high이하의 범위값 -> n개의 동일한 구간으로 나눈 값을 배열로 만듦
#2. norm.pdf는 가우시안 확률 밀도함수를 구함
############################################################
for x in numpy.linspace(low, high, n):
p = scipy.stats.norm.pdf(mu, sigma, x)
pmf.Set(x, p)
pmf.Normalize()
return pmf
def main():
formats = ['pdf', 'eps']
suite1 = Hockey()
thinkplot.Clf()
thinkplot.PrePlot(num=2)
thinkplot.Pmf(suite1)
thinkplot.Save(root='hockey_summary',
xlabel='Goals per game',
ylabel='Probability',
formats=formats)
if __name__ == '__main__':
main()
Writing hockey_summary.pdf Writing hockey_summary.eps
/Users/moodern/.venv/bayesianpy/lib/python2.7/site-packages/matplotlib/axes/_axes.py:475: UserWarning: No labelled objects found. Use label='...' kwarg on individual plots. warnings.warn("No labelled objects found. "
두가지 결과가 나올 수 있음.
"""This file contains code for use with "Think Bayes",
by Allen B. Downey, available from greenteapress.com
Copyright 2012 Allen B. Downey
License: GNU GPLv3 http://www.gnu.org/licenses/gpl.html
"""
import math
import columns
import thinkbayes
import thinkstats
import thinkplot
USE_SUMMARY_DATA = True
class Hockey(thinkbayes.Suite):
"""Represents hypotheses about the scoring rate for a team."""
def __init__(self, name=''):
"""Initializes the Hockey object.
name: string
"""
############################################################
# 7.7 토의 참조
############################################################
if USE_SUMMARY_DATA:
# prior based on each team's average goals scored
mu = 2.7
sigma = 0.3
else:
# prior based on each pair-wise match-up
mu = 2.7
sigma = 0.85
pmf = thinkbayes.MakeGaussianPmf(mu, sigma, 4)
thinkbayes.Suite.__init__(self, pmf, name=name)
def Likelihood(self, data, hypo):
"""Computes the likelihood of the data under the hypothesis.
Evaluates the Poisson PMF for lambda and k.
hypo: goal scoring rate in goals per game
data: goals scored in one period
"""
lam = hypo
k = data
like = thinkbayes.EvalPoissonPmf(k, lam)
return like
def MakeGoalPmf(suite, high=10):
"""Makes the distribution of goals scored, given distribution of lam.
suite: distribution of goal-scoring rate
high: upper bound
returns: Pmf of goals per game
"""
metapmf = thinkbayes.Pmf()
for lam, prob in suite.Items():
pmf = thinkbayes.MakePoissonPmf(lam, high)
metapmf.Set(pmf, prob)
mix = thinkbayes.MakeMixture(metapmf, name=suite.name)
return mix
def MakeGoalTimePmf(suite):
"""Makes the distribution of time til first goal.
suite: distribution of goal-scoring rate
returns: Pmf of goals per game
"""
metapmf = thinkbayes.Pmf()
for lam, prob in suite.Items():
pmf = thinkbayes.MakeExponentialPmf(lam, high=2, n=2001)
metapmf.Set(pmf, prob)
mix = thinkbayes.MakeMixture(metapmf, name=suite.name)
return mix
class Game(object):
"""Represents a game.
Attributes are set in columns.read_csv.
"""
convert = dict()
def clean(self):
self.goals = self.pd1 + self.pd2 + self.pd3
def ReadHockeyData(filename='hockey_data.csv'):
"""Read game scores from the data file.
filename: string
"""
game_list = columns.read_csv(filename, Game)
# map from gameID to list of two games
games = {}
for game in game_list:
if game.season != 2011:
continue
key = game.game
games.setdefault(key, []).append(game)
# map from (team1, team2) to (score1, score2)
pairs = {}
for key, pair in games.iteritems():
t1, t2 = pair
key = t1.team, t2.team
entry = t1.total, t2.total
pairs.setdefault(key, []).append(entry)
ProcessScoresTeamwise(pairs)
ProcessScoresPairwise(pairs)
def ProcessScoresPairwise(pairs):
"""Average number of goals for each team against each opponent.
pairs: map from (team1, team2) to (score1, score2)
"""
# map from (team1, team2) to list of goals scored
goals_scored = {}
for key, entries in pairs.iteritems():
t1, t2 = key
for entry in entries:
g1, g2 = entry
goals_scored.setdefault((t1, t2), []).append(g1)
goals_scored.setdefault((t2, t1), []).append(g2)
# make a list of average goals scored
lams = []
for key, goals in goals_scored.iteritems():
if len(goals) < 3:
continue
lam = thinkstats.Mean(goals)
lams.append(lam)
# make the distribution of average goals scored
cdf = thinkbayes.MakeCdfFromList(lams)
thinkplot.Cdf(cdf)
thinkplot.Show()
mu, var = thinkstats.MeanVar(lams)
print 'mu, sig', mu, math.sqrt(var)
print 'BOS v VAN', pairs['BOS', 'VAN']
def ProcessScoresTeamwise(pairs):
"""Average number of goals for each team.
pairs: map from (team1, team2) to (score1, score2)
"""
# map from team to list of goals scored
goals_scored = {}
for key, entries in pairs.iteritems():
t1, t2 = key
for entry in entries:
g1, g2 = entry
goals_scored.setdefault(t1, []).append(g1)
goals_scored.setdefault(t2, []).append(g2)
# make a list of average goals scored
lams = []
for key, goals in goals_scored.iteritems():
lam = thinkstats.Mean(goals)
lams.append(lam)
# make the distribution of average goals scored
cdf = thinkbayes.MakeCdfFromList(lams)
thinkplot.Cdf(cdf)
thinkplot.Show()
mu, var = thinkstats.MeanVar(lams)
print 'mu, sig', mu, math.sqrt(var)
############################################################
# high = 10 : 한 게임에서 10골이상 나올 가능성 낮음
# lam의 사후분포에 대해 골의 분포 생성 : lam의 값을 모르기 때문에 분포를 이용
# MakeMixture() : 혼합 분포 값 계산
############################################################
def MakeGoalPmf(suite, high=10):
"""Makes the distribution of goals scored, given distribution of lam.
suite: distribution of goal-scoring rate
high: upper bound
returns: Pmf of goals per game
"""
metapmf = thinkbayes.Pmf()
for lam, prob in suite.Items():
pmf = thinkbayes.MakePoissonPmf(lam, high)
metapmf.Set(pmf, prob)
mix = thinkbayes.MakeMixture(metapmf, name=suite.name)
return mix
def MakeGoalTimePmf(suite):
"""Makes the distribution of time til first goal.
suite: distribution of goal-scoring rate
returns: Pmf of goals per game
"""
metapmf = thinkbayes.Pmf()
for lam, prob in suite.Items():
pmf = thinkbayes.MakeExponentialPmf(lam, high=2, n=2001)
metapmf.Set(pmf, prob)
mix = thinkbayes.MakeMixture(metapmf, name=suite.name)
return mix
def main():
#ReadHockeyData()
#return
formats = ['pdf', 'eps']
suite1 = Hockey('bruins')
suite2 = Hockey('canucks')
thinkplot.Clf()
thinkplot.PrePlot(num=2)
thinkplot.Pmf(suite1)
thinkplot.Pmf(suite2)
thinkplot.Save(root='hockey0',
xlabel='Goals per game',
ylabel='Probability',
formats=formats)
suite1.UpdateSet([0, 2, 8, 4])
suite2.UpdateSet([1, 3, 1, 0])
thinkplot.Clf()
thinkplot.PrePlot(num=2)
thinkplot.Pmf(suite1)
thinkplot.Pmf(suite2)
thinkplot.Save(root='hockey1',
xlabel='Goals per game',
ylabel='Probability',
formats=formats)
#이길 확률 -> 골 수 차이의 분포
goal_dist1 = MakeGoalPmf(suite1)
goal_dist2 = MakeGoalPmf(suite2)
thinkplot.Clf()
thinkplot.PrePlot(num=2)
thinkplot.Pmf(goal_dist1)
thinkplot.Pmf(goal_dist2)
thinkplot.Save(root='hockey2',
xlabel='Goals',
ylabel='Probability',
formats=formats)
#서든데스 시 브루인스가 먼저 점수를 낼 확률
time_dist1 = MakeGoalTimePmf(suite1)
time_dist2 = MakeGoalTimePmf(suite2)
print 'MLE bruins', suite1.MaximumLikelihood()
print 'MLE canucks', suite2.MaximumLikelihood()
thinkplot.Clf()
thinkplot.PrePlot(num=2)
thinkplot.Pmf(time_dist1)
thinkplot.Pmf(time_dist2)
thinkplot.Save(root='hockey3',
xlabel='Games until goal',
ylabel='Probability',
formats=formats)
#골수 차이가 양수면 브루인스 승, 음수면 패, 0이면 비김
diff = goal_dist1 - goal_dist2
p_win = diff.ProbGreater(0)
p_loss = diff.ProbLess(0)
p_tie = diff.Prob(0)
print '브루인스가 이길 확률: ', p_win, '패할 확률 : ', p_loss, '비길확률 : ', p_tie
p_overtime = thinkbayes.PmfProbLess(time_dist1, time_dist2)
p_adjust = thinkbayes.PmfProbEqual(time_dist1, time_dist2)
p_overtime += p_adjust / 2
print 'p_overtime', p_overtime
print 'p_overtime + p_tie', p_overtime * p_tie
p_win += p_overtime * p_tie
print 'p_win', p_win
# 다음 두 경기에서 이길 확률
p_series = p_win**2
# 다음 두 경기에서 승점이 동일하고 세번째 게임에서 이길 확률
p_series += 2 * p_win * (1-p_win) * p_win
print 'p_series', p_series
if __name__ == '__main__':
main()
Writing hockey0.pdf Writing hockey0.eps Writing hockey1.pdf Writing hockey1.eps Writing hockey2.pdf Writing hockey2.eps MLE bruins 2.796 MLE canucks 2.52 Writing hockey3.pdf Writing hockey3.eps 브루인스가 이길 확률: 0.45844446316 패할 확률 : 0.366927595518 비길확률 : 0.174627941321 p_overtime 0.524770881768 p_overtime + p_tie 0.0916396587484 p_win 0.550084121909 p_series 0.57487491891
버스 정류장에 버스가 20분마다 오고, 당신이 버스 정류장에 도착하는 시간은 임의로 정해진다면 버스가 도착할 때까지 기다리는 시간은 0분에서 20분까지 균일하게 분포한다.
하지만 실제로는 버스 간 시간도 일정하지 않다. 만약 당신이 버스를 기다리고 있고, 기존 버스간의 시간의 분포 이력을 알고 있다고 해보자. 이때 기다리는 시간의 분포를 구하자
힌트 : 버스 시간 간격이 5분이거나 10분일 확률이 동일하다. 그러면 이렇게 10분 간격일 때 당신이 버스 정류장에 도착할 확률은 얼마인가?