import matplotlib.pyplot as plt
import numpy as np
import sympy
# For each factor, the number of mutations the number of generations from its ancestor factor.
# Used to calculate the probability of the data.
old_mutations_and_generations = [
(0, 3), # Samuel
(0, 3), # Willard
(1, 5), # T8
(0, 5), # T6
(0, 6), # T3
(1, 11), # T4
(3, 10)] # T5
new_mutations_and_generations = [
(0, 1), # Jacob: new factor
(0, 2), # Samuel: 3 -> 2
(0, 3), # Willard
(1, 5), # T8
(0, 5), # T6
(0, 6), # T3
(1, 10), # T4: 11 -> 10
(3, 10)] # T5
def get_prob_r_given_data(mutations_and_generations):
"""
Returns a vectorized function that takes a value for r and returns P(r | data)
computed using Bayes' rule. The 'mutations_and_generations' parameter is used
to calculate the probability of the data.
"""
r = sympy.symbols('r')
p_data_given_r = 1
for mutations, generations in mutations_and_generations:
n = generations * 37
k = mutations
p_data_given_r *= sympy.binomial(n, k) * r**k * (1 - r)**(n - k)
p_r = 1 / r
p_data = sympy.integrate(p_data_given_r * p_r, (r, 0, 1))
expr = (p_data_given_r * p_r) / p_data
return np.vectorize(lambda r_val: expr.subs(r, r_val))
old_prob_r_func = get_prob_r_given_data(old_mutations_and_generations)
new_prob_r_func = get_prob_r_given_data(new_mutations_and_generations)
rs = np.linspace(1e-6, 0.01, num=1000)
plt.plot(rs, old_prob_r_func(rs), label='old')
plt.plot(rs, new_prob_r_func(rs), label='new')
plt.xlim(0, 0.01)
plt.legend()
plt.show()