Notebook

In [32]:

# Here we proceed with the assumption that the number of good boxes is 3

# Assume as a prior that there are 3 of 13 good boxes
priorG = 3.0/13.0
priorB = 1.0-priorG
print 'P( HG ) = ',priorG,' '*7,
print 'P( HB ) = ',priorB

# Calculate posteriors that box in hand is good box
piG = 9.0/10.0 * priorG
piB = 2.0/10.0 * priorB
posteriorG = piG/(piG + piB)
posteriorB = piB/(piG + piB)
print 'P( HG | data ) = ',posteriorG,' '*7,
print 'P( HB | data ) = ',posteriorB
print

# Calculate probability next item is valuable
print 'P( V | data ) = ',(8.0/9.0 * posteriorG + 1.0/9.0 * posteriorB) 

P( HG ) =  0.230769230769         P( HB ) =  0.769230769231
P( HG | data ) =  0.574468085106         P( HB | data ) =  0.425531914894

P( V | data ) =  0.557919621749

In [4]:

# Here we allow for a distribution of priors for the number of good boxes

def p_Hi_given_data(priors):
    pi = [(9.0/10.0*float(i)/13.0 + 2.0/10.0*(13.0-float(i))/13.0)*priors[i]\
              for i in range(len(priors))]
    return [p/sum(pi) for p in pi]

def p_v_given_Hi_data(i):
    pi_HG_given_Hi_data = 9.0/10.0*float(i)/13.0
    pi_HB_given_Hi_data = 2.0/10.0*(13.0-float(i))/13.0
    p_HG_given_Hi_data = pi_HG_given_Hi_data/(pi_HG_given_Hi_data + pi_HB_given_Hi_data)
    p_HB_given_Hi_data = pi_HB_given_Hi_data/(pi_HG_given_Hi_data + pi_HB_given_Hi_data)
    
    return 8.0/9.0*p_HG_given_Hi_data + 1.0/9.0*p_HB_given_Hi_data
    
    
def p_next_valuable(posteriors):
    return sum( p_v_given_Hi_data(i)*posteriors[i] for i in range(len(posteriors)))

# First we consider the same case as above, only allowing for 3 good boxes.
priors = [0.0, 0.0, 0.0, 1.0] + [0.0] * 9
posteriors = p_Hi_given_data(priors)
print 'Assuming 3 good boxes:'
print 'Priors:    ',priors
print 'Posteriors:',posteriors
print 'P( V | data ) = ',p_next_valuable(posteriors)
print 

# Finally, we consider the distribution Bill gave in class
print 'Assuming following prior distribution:'
priors = [0.0, 0.0, .2, .3, .3, .1, .1] + [0.0] * 7
posteriors = p_Hi_given_data(priors)
print 'Priors:    ',priors
print 'Posteriors:',posteriors
print 'P( V | data ) = ',p_next_valuable(posteriors)

Assuming 3 good boxes:
Priors:     [0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
Posteriors: [0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
P( V | data ) =  0.557919621749

Assuming following prior distribution:
Priors:     [0.0, 0.0, 0.2, 0.3, 0.3, 0.1, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
Posteriors: [0.0, 0.0, 0.15625, 0.275390625, 0.31640624999999994, 0.119140625, 0.1328125, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
P( V | data ) =  0.603298611111

We see, happily, that assuming exactly 3 good boxes gives the same result with both methods. Furthermore, the distributed prior gives an even higher probability that the next choice will be valuable.

In [6]:

publish_snapshot('John Hawkins 01-22-14 Valuable Boxes.ipynb')

Out[6]:

'http://nbviewer.ipython.org/github/CS395T/2014/blob/master/John%20Hawkins%2001-22-14%20Valuable%20Boxes.ipynb'

In [ ]: