import numpy as np
import scipy.stats
def p_tail(num_mutations, num_generations, num_loci=37, r=0.003):
"""Tail p-value of seeing `num_mutations` or more mutations in
`num_generations` generations with `num_loci` measured loci."""
k = np.arange(num_mutations, num_loci + 1)
n = num_generations * num_loci
return scipy.stats.binom.pmf(k, n, r).sum()
# Fields:
# 1) Name of person.
# 2) Number of mutations.
# 3) Number of generations from William Towne.
# 4) Number of loci measured.
data = [('T2', 23, 10, 37), ('T11', 5, 9, 37), ('T5', 3, 10, 37), ('T13', 4, 10, 12)]
for name, num_mutations, num_generations, num_loci in data:
pval = p_tail(num_mutations, num_generations, num_loci=num_loci)
print '{0}: {1}'.format(name, pval)
T2: 7.8209671638e-23 T11: 0.00357596170111 T5: 0.101333960602 T13: 0.000504346838529