Notes on "Why Most Published Research Findings Are False" by John P. A. Ioannidis (2005)
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
from sympy import symbols, Eq, solve, simplify, lambdify, init_printing
init_printing(use_latex=True)
$R$ is the ratio of true positive $t$ and negative $f$ findings:
$R = t / f$
$p$ is the probability of a positive:
$p = t / (t + f)$
$p$ in terms of $R$:
$$ 1 / p = 1 + f / t = 1 + 1 / R $$$$ p = 1 / (1 + 1 / R) $$Mulitply top and bottom of RHS by R:
$$ p = R / (R + 1) $$p, R, t, f = symbols('p, R, t, f')
req = Eq(R, t / f)
peq = Eq(p, t / (t + f))
solutions = solve((req, peq), R, t)
R_from_p = solutions[R]
R_from_p
p_from_R = solve(Eq(R, R_from_p), p)[0]
p_from_R
# Confirm R / (R + 1) == p
simplify(R_from_p / (R_from_p + 1))
The total at the top left of table 1 of Ioannidis
a, b = symbols(r'\alpha, \beta')
Rp = R_from_p
expr = (R + a - b * R) / (R + 1)
ion_end = simplify(expr.subs(R, Rp))
ion_end
Which is the same as the bottom right equivalent in appendix table 1 of Wacholder et al 2004:
simplify(ion_end - ((1 - b) * p + a * (1 - p)))
Values in table 2 of Ioannidis. Table 2 assumes that a proportion $u$ of the previously non-significant findings have become significant findings, due to bias:
u = symbols('u')
Top left value:
expr = ((1 - b) + u * b) * R / (R + 1)
ion2_first = simplify(expr.subs(R, Rp))
ion2_first
Confirm equivalent expression in terms of $p$:
assert simplify(ion2_first - ((1 - b) * p + u * b * p)) == 0
Investigating the second table with respect to the first:
# Table 1 from Ioannides 2005
from IPython.core.display import Image
Image('pics/ioannidis_table1.png')
c = symbols('c')
table1_00 = c * (1 - b) * R / (R + 1)
table1_01 = c * a / (R + 1)
table1_02 = c * (R + a - b * R) / (R + 1)
table1_10 = c * b * R / (R + 1)
table1_11 = c * (1 - a) / (R + 1)
table1_12 = c * (1 - a + b * R) / (R + 1)
# Check row totals
assert simplify(table1_02 - table1_00 - table1_01) == 0
assert simplify(table1_12 - table1_10 - table1_11) == 0
# Check column totals
assert simplify(c * R / (R + 1) - table1_00 - table1_10) == 0
assert simplify(c / (R + 1) - table1_01 - table1_11) == 0
# Table 2 from Ioannides 2005
Image('pics/ioannidis_table2.png')
table2_00 = (c * (1 - b) * R + u * c * b * R) / (R + 1)
assert simplify(table2_00 - table1_00 - u * table1_10) == 0
# table2_01 has missing brackets. Here's the version as written:
table2_01_bad = c * a + u * c * (1 - a) / (R + 1)
# Here's what it should be:
table2_01 = (c * a + u * c * (1 - a)) / (R + 1)
assert simplify(table2_01 - table1_01 - u * table1_11) == 0
table2_02 = c * (R + a - b * R + u - u * a + u * b * R) / (R + 1)
table2_10 = (1 - u) * c * b * R / (R + 1)
assert simplify(table2_10 - (1 - u) * table1_10) == 0
table2_11 = (1 - u) * c * (1 - a) / (R + 1)
assert simplify(table2_11 - (1 - u) * table1_11) == 0
table2_12 = c * (1 - u) * (1 - a + b * R) / (R + 1)
# Check row totals
assert simplify(table2_02 - table2_00 - table2_01) == 0
assert simplify(table2_12 - table2_10 - table2_11) == 0
# Check column totals
assert simplify(c * R / (R + 1) - table2_00 - table2_10) == 0
assert simplify(c / (R + 1) - table2_01 - table2_11) == 0
Expression for PPV:
ppv_nobias = table1_00 / (table1_00 + table1_01)
Check against text:
Image('pics/nobias_formula.png')
assert simplify(ppv_nobias - (1 - b) * R / (R - b * R + a)) == 0
Formula in the case of bias:
ppv_bias = simplify(table2_00 / (table2_00 + table2_01))
ppv_bias
Check against formula in text:
Image('pics/bias_formula.png')
ppv_bias_text = ((1 - b) * R + u * b * R) / (R + a - b * R + u - u * a + u * b * R)
assert simplify(ppv_bias - ppv_bias_text) == 0
Let's see how that works out numerically:
ppv_bias_func = lambdify((R, a, b, c, u), ppv_bias)
r_vals = np.linspace(0, 1)
fig, axes = plt.subplots(3, 1, figsize=(8, 16))
alpha = 0.05
for i, power in enumerate((0.8, 0.5, 0.2)):
beta = 1 - power
for bias in (0.05, 0.2, 0.5, 0.8):
ppv = ppv_bias_func(r_vals, alpha, beta, 1, bias)
axes[i].plot(r_vals, ppv, label='u={0}'.format(bias))
axes[i].set_title('Power = {0}'.format(power))
axes[-1].legend()
<matplotlib.legend.Legend at 0x1059b00d0>
Image('pics/ioannidis_table3.png')
Multiple studies:
n = symbols('n')
# Chance in table (without knowing p / R)
ns_true = b ** n
s_true = 1 - ns_true
ns_false = (1 - a) ** n
s_false = 1 - ns_false
# With knowledge of p / R
rr1 = R / (R + 1) # also p
s_true_post = s_true * rr1
s_false_post = s_false * (1 - rr1)
ppv_multi = simplify(s_true_post / (s_true_post + s_false_post))
ppv_multi
Image('pics/multi_formula.png')
assert simplify(ppv_multi - (R * (1 - b ** n) / (R + 1 - (1 - a) ** n - R * b ** n))) == 0
ppv_multi_func = lambdify((R, a, b, n), ppv_multi)
fig, axes = plt.subplots(3, 1, figsize=(8, 16))
alpha = 0.05
for i, power in enumerate((0.8, 0.5, 0.2)):
beta = 1 - power
for n_studies in (1, 5, 10, 50):
ppv = ppv_multi_func(r_vals, alpha, beta, n_studies)
axes[i].plot(r_vals, ppv, label='n={0}'.format(n_studies))
axes[i].set_title('Power = {0}'.format(power))
axes[-1].legend()
<matplotlib.legend.Legend at 0x106c44310>