import numpy as np from scipy.stats import beta as beta_dist %matplotlib inline mu = .10 tau = 1500 alpha, beta = mu*tau, (1-mu)*tau pA = beta_dist.rvs(alpha,beta,size=10000) pB = beta_dist.rvs(alpha,beta,size=10000) diff = Series(pA-pB) diff.hist(bins=np.arange(-.05,.06,.01), histtype='stepfilled') print(diff.describe()) successesA = 300 successesB = 350 failuresA = 3000 failuresB = 3000 pA, pB = successesA/(successesA+failuresA), successesB/(successesB+failuresB) pA, pB, (pB-pA)/pA def usual_method(alpha=1, beta=1): pA = beta_dist.rvs(alpha+successesA,beta+failuresA,size=10000) pB = beta_dist.rvs(alpha+successesB,beta+failuresB,size=10000) lift = Series((pB-pA)/pA) return lift def new_method(alpha0=3, beta0=27, tau=1500): mu_samples = beta_dist.rvs(alpha0, beta0, size=10000) def compute_p(mu): alpha, beta = mu*tau, (1-mu)*tau pA = beta_dist.rvs(alpha+successesA,beta+failuresA) pB = beta_dist.rvs(alpha+successesB,beta+failuresB) return pA, pB p = np.array([compute_p(mu) for mu in mu_samples]) pA, pB = p[:, 0], p[:, 1] lift = Series((pB-pA)/pA) return lift lift0 = usual_method() lift1 = usual_method(30, 270) lift2 = new_method() df = DataFrame({'Flat prior': lift0, 'Very informative prior': lift1, 'New method': lift2}) df = pd.melt(df) (lift0<0).mean(), (lift1<0).mean(), (lift2<0).mean() %load_ext rmagic %%R -i df -w 900 library(ggplot2) p <- ggplot(df, aes(x=value, color=variable)) + geom_density() p + xlab('Estimated conversion rate lift') + xlim(-.1, .4)