In [6]:

# Here we proceed with the assumption that the number of good boxes is 3

# Assume as a prior that there are 3 of 13 good boxes
priorG = 3.0/13.0
priorB = 1.0-priorG
print 'P( HG ) = ',priorG,' '*10,
print 'P( HB ) = ',priorB

# Calculate posteriors that box in hand is good box
piG = 9.0/10.0 * priorG
piB = 2.0/10.0 * priorB
posteriorG = piG/(piG + piB)
posteriorB = piB/(piG + piB)
print 'P( HG | data ) = ',posteriorG,' '*3,
print 'P( HB | data ) = ',posteriorB
print

# Calculate probability next item is valuable
print 'P( V | data ) = ',(8.0/9.0 * posteriorG + 1.0/9.0 * posteriorB) 

P( HG ) =  0.230769230769            P( HB ) =  0.769230769231
P( HG | data ) =  0.574468085106     P( HB | data ) =  0.425531914894

P( V | data ) =  0.557919621749

In [5]:

# Here we allow for a distribution of priors for the number of good boxes

def p_Hi_given_data(priors):
    pi = [(9.0/10.0*float(i)/13.0 + 2.0/10.0*(13.0-float(i))/13.0)*priors[i]\
              for i in range(len(priors))]
    return [p/sum(pi) for p in pi]

def p_v_given_Hi_data(i):
    pi_HG_given_Hi_data = 9.0/10.0*float(i)/13.0
    pi_HB_given_Hi_data = 2.0/10.0*(13.0-float(i))/13.0
    p_HG_given_Hi_data = pi_HG_given_Hi_data/(pi_HG_given_Hi_data + pi_HB_given_Hi_data)
    p_HB_given_Hi_data = pi_HB_given_Hi_data/(pi_HG_given_Hi_data + pi_HB_given_Hi_data)
    
    return 8.0/9.0*p_HG_given_Hi_data + 1.0/9.0*p_HB_given_Hi_data
    
    
def p_next_valuable(posteriors):
    return sum( p_v_given_Hi_data(i)*posteriors[i] for i in range(len(posteriors)))

# First we consider the same case as above, only allowing for 3 good boxes.
priors = [0.0, 0.0, 0.0, 1.0] + [0.0] * 9
posteriors = p_Hi_given_data(priors)
print 'Assuming 3 good boxes:'
print 'Priors:    ',priors
print 'Posteriors:',posteriors
print 'P( V | data ) = ',p_next_valuable(posteriors)
print 

# Finally, we consider the distribution Bill gave in class
print 'Assuming Bill\'s prior distribution:'
priors = [0.0, 0.0, .2, .3, .3, .1, .1] + [0.0] * 7
posteriors = p_Hi_given_data(priors)
print 'Priors:    ',priors
print 'Posteriors:',posteriors
print 'P( V | data ) = ',p_next_valuable(posteriors)
print

# Actually, let's add a flat prior on 2-6
print 'Assuming flat prior:'
priors = [0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0] + [0.0] * 7
posteriors = p_Hi_given_data(priors)
print 'Priors:    ',priors
print 'Posteriors:',posteriors
print 'P( V | data ) = ',p_next_valuable(posteriors)

Assuming 3 good boxes:
Priors:     [0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
Posteriors: [0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
P( V | data ) =  0.557919621749

Assuming Bill's prior distribution:
Priors:     [0.0, 0.0, 0.2, 0.3, 0.3, 0.1, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
Posteriors: [0.0, 0.0, 0.15625, 0.275390625, 0.31640624999999994, 0.119140625, 0.1328125, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
P( V | data ) =  0.603298611111

Assuming flat prior:
Priors:     [0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
Posteriors: [0.0, 0.0, 0.14814814814814814, 0.17407407407407408, 0.19999999999999998, 0.22592592592592592, 0.2518518518518518, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
P( V | data ) =  0.62962962963

We see, happily, that assuming exactly 3 good boxes gives the same result with both methods. Furthermore, the distributed prior gives an even higher probability that the next choice will be valuable.

In [ ]: