import numpy as np from scipy import stats n_per_day = 10000 # Number of subjects per day control_rate = 0.05 # Baseline conversion rate rate_diff = 0.03 # Anticipated percent change to baseline rate alpha = 0.05 # False alarm rate beta = 0.95 # Correct detection rate test_rate = control_rate*(1.0 + rate_diff) p1 = max(control_rate, test_rate) p2 = min(control_rate, test_rate) p_bar = (p1 + p2)/2.0 # PPF is the "percent point function", a.k.a. quantile function. # Note that we divide alpha by 2 if we desire a two-sided test. za = stats.norm.ppf(1 - alpha/2) zb = stats.norm.ppf(beta) A = (za*np.sqrt(2*p_bar*(1-p_bar)) + zb*np.sqrt(p1*(1-p1) + p2*(1-p2)))**2 n = A*(((1 + np.sqrt(1 + 4*(p1-p2)/A))) / (2*(p1-p2)))**2 n_act = int(np.ceil(n)) # Print results print('Number of users needed each for control and test groups = {:,d} ({:,d} total)'.format(n_act, 2*n_act)) print('Estimated duration at {:,d} subjects per day: {:,d} days'.format(int(n_per_day), int(np.ceil(2.0*n_act/n_per_day)))) dns = 1000 astr = 'greater' if (rate_diff < 0) else 'less' # The tail probability we use depends on whether the change is positive or negative # Experimental results when null is true control0 = stats.binom.rvs(n, control_rate, size=ns) test0 = stats.binom.rvs(n, control_rate, size=ns) # Test and control are the same tables0 = [[[a, n_act-a], [b, n_act-b]] for a, b in zip(control0, test0)] # Contingency tables results0 = [stats.fisher_exact(T, alternative=astr) for T in tables0] decisions0 = [x[1] <= alpha for x in results0] # Experimental results when alternate is true control1 = stats.binom.rvs(n, control_rate, size=ns) test1 = stats.binom.rvs(n, test_rate, size=ns) # Test and control are different tables1 = [[[a, n_act-a], [b, n_act-b]] for a, b in zip(control1, test1)] # Contingency tables results1 = [stats.fisher_exact(T, alternative=astr) for T in tables1] decisions1 = [x[1] <= alpha for x in results1] # Compute false alarm and correct detection rates alpha_est = sum(decisions0)/float(ns) beta_est = sum(decisions1)/float(ns) print('True false alarm rate = %0.2f, estimated false alarm rate = %0.2f' % (alpha, alpha_est)) print('True correct detection rate = %0.2f, estimated correct detection rate = %0.2f' % (beta, beta_est))