from thinkbayes import Pmf pmf = Pmf() for x in range(1, 7): pmf.Set(x, 1) pmf.Print() pmf.Normalize() pmf.Print() pmf = Pmf() pmf.Set('Bowl 1', 0.5) pmf.Set('Bowl 2', 0.5) pmf.Mult('Bowl 1', 0.75) pmf.Mult('Bowl 2', 0.5) pmf.Normalize() print pmf.Prob('Bowl 1') from thinkbayes import Suite # Start with equal priors suite = Suite([4, 6, 8, 12, 20]) suite.Print() from thinkbayes import Suite class Dice(Suite): def Likelihood(self, hypo, data): """Computes the likelihood of the data under the hypothesis. hypo: integer number of sides on the die data: integer die roll """ num_sides = hypo outcome = data if outcome > num_sides: return 0 else: return 1.0/num_sides suite = Dice([4, 6, 8, 12, 20]) suite.Update(6) print 'After one 6' suite.Print() for roll in [6, 4, 8, 7, 7, 2]: suite.Update(roll) suite.Print() %pylab inline import thinkbayes import myplot class Train(thinkbayes.Suite): """Suite of hypotheses about the number of trains.""" def Likelihood(self, hypo, data): """Computes the likelihood of the data under the hypothesis. hypo: integer hypothesis about the number of trains data: integer serial number of the observed train """ num_trains = hypo train_seen = data if train_seen > num_trains: return 0 else: return 1.0/num_trains hypos = xrange(100, 1001) suite = Train(hypos) suite.Update(321) print 'Given the data, our mean estimate is:', print suite.Mean() myplot.Pmf(suite) for train in [782, 423]: suite.Update(train) print 'Given the data, our mean estimate is:', print suite.Mean() myplot.Pmf(suite) class Euro(thinkbayes.Suite): def Likelihood(self, hypo, data): """Computes the likelihood of the data under the hypothesis. hypo: integer value of x, the probability of heads (0-100) data: string 'H' or 'T' """ # Note: while we're writing the likelihood function, we # *assume* the hypothesis is true. Don't forget this! # We convert the hypothesis to a probability in the 0-1 range: x = hypo/100.0 # Now, since the hypothesis is about Heads, if the data is H # it means the hypothesis is True, so we can just return the hypothesis if data == 'H': return x else: return 1-x suite = Euro(range(0, 101)) myplot.Pmf(suite) suite.Update('H') myplot.Pmf(suite) suite.Update('H') myplot.Pmf(suite) suite.Update('T') myplot.Pmf(suite) for outocme in 'HHHHHHHHTTT': suite.Update(outocme) myplot.Pmf(suite) suite = Euro(range(0, 101)) for outocme in 'H'*140 + 'T'*110: suite.Update(outocme) myplot.Pmf(suite) suite.Prob(50) %load euro2.py class Euro(thinkbayes.Suite): def Likelihood(self, hypo, data): """Computes the likelihood of the data under the hypothesis. hypo: integer value of x, the probability of heads (0-100) data: string 'H' or 'T' """ x = hypo / 100.0 heads, tails = data like = x**heads * (1-x)**tails return like def AverageLikelihood(suite, data): """Computes the average likelihood over all hypothesis in suite. Args: suite: Suite of hypotheses data: some representation of the observed data Returns: float """ total = 0 for hypo, prob in suite.Items(): like = suite.Likelihood(hypo, data) total += prob * like return total fair = Euro() fair.Set(50, 1) bias = Euro() for x in range(0, 101): if x != 50: bias.Set(x, 1) bias.Normalize() data = 140, 110 like_bias = AverageLikelihood(bias, data) print 'like_bias', like_bias like_fair = AverageLikelihood(fair, data) print 'like_fair', like_fair ratio = like_bias / like_fair print 'Bayes factor', ratio bias = Euro() for x in range(40, 70): if x != 50: bias.Set(x, 1) bias.Normalize() like_bias = AverageLikelihood(bias, data) print 'like_bias', like_bias ratio = like_bias / like_fair print 'Bayes factor', ratio import csv import math import numpy import sys import matplotlib import myplot def ReadScale(filename='sat_scale.csv', col=2): """Reads a CSV file of SAT scales (maps from raw score to standard score). Args: filename: string filename col: which column to start with (0=Reading, 2=Math, 4=Writing) Returns: thinkbayes.Interpolator object """ def ParseRange(s): t = [int(x) for x in s.split('-')] return 1.0 * sum(t) / len(t) fp = open(filename) reader = csv.reader(fp) raws = [] scores = [] for t in reader: try: raw = int(t[col]) raws.append(raw) score = ParseRange(t[col+1]) scores.append(score) except: pass raws.sort() scores.sort() return thinkbayes.Interpolator(raws, scores) def ReadRanks(filename='sat_ranks.csv'): """Reads a CSV file of SAT scores. Args: filename: string filename Returns: list of (score, freq) pairs """ fp = open(filename) reader = csv.reader(fp) res = [] for t in reader: try: score = int(t[0]) freq = int(t[1]) res.append((score, freq)) except ValueError: pass return res def DivideValues(pmf, denom): """Divides the values in a Pmf by denom. Returns a new Pmf. """ new = thinkbayes.Pmf() denom = float(denom) for val, prob in pmf.Items(): x = val / denom new.Set(x, prob) return new class Exam(object): """Encapsulates information about an exam. Contains the distribution of scaled scores and an Interpolator that maps between scaled and raw scores. """ def __init__(self): self.scale = ReadScale() scores = ReadRanks() score_pmf = thinkbayes.MakePmfFromDict(dict(scores)) self.raw = self.ReverseScale(score_pmf) self.max_score = max(self.raw.Values()) self.prior = DivideValues(self.raw, denom=self.max_score) def Lookup(self, raw): """Looks up a raw score and returns a scaled score.""" return self.scale.Lookup(raw) def Reverse(self, score): """Looks up a scaled score and returns a raw score. Since we ignore the penalty, negative scores round up to zero. """ raw = self.scale.Reverse(score) return raw if raw > 0 else 0 def ReverseScale(self, pmf): """Applies the reverse scale to the values of a PMF. Args: pmf: Pmf object scale: Interpolator object Returns: new Pmf """ new = thinkbayes.Pmf() for val, prob in pmf.Items(): raw = self.Reverse(val) new.Incr(raw, prob) return new class Sat(thinkbayes.Suite): """Represents the distribution of efficacy for a test-taker.""" def __init__(self, exam): thinkbayes.Suite.__init__(self) self.exam = exam # start with the prior distribution for p, prob in exam.prior.Items(): self.Set(p, prob) def Likelihood(self, hypo, data): """Computes the likelihood of a test score, given efficacy.""" p = hypo score = data raw = self.exam.Reverse(score) yes, no = raw, self.exam.max_score - raw like = p**yes * (1-p)**no return like def MakeFigures(exam, alice, bob): formats=['png'] myplot.Pmf(exam.prior, label='prior') myplot.Save(root='sat_prior', formats=formats, xlabel='p', ylabel='PMF') myplot.Clf() myplot.Pmfs([alice, bob]) myplot.Save(root='sat_posterior', formats=formats, xlabel='p', ylabel='PMF') def PmfProbGreater(pmf1, pmf2): """Probability that a value from pmf1 is more than a value from pmf2. Args: pmf1: Pmf object pmf2: Pmf object Returns: float probability """ total = 0.0 for v1, p1 in pmf1.Items(): for v2, p2 in pmf2.Items(): if p1 > p2: total += p1*p2 return total exam = Exam() alice = Sat(exam) alice.name = 'alice' alice.Update(780) bob = Sat(exam) bob.name = 'bob' bob.Update(760) print 'Prob Alice is "smarter":', PmfProbGreater(alice, bob)