import sys sys.path.append('..') from collections import Counter, OrderedDict import numpy as np import scipy as sp import seaborn as sbn from matplotlib import pyplot as plt from trials import Trials params = OrderedDict([('Control', 0.70), ('A', 0.72), ('B', 0.75)]) sources = OrderedDict([(label, sp.stats.bernoulli(p)) for label, p in params.items()]) samples = 10 total = 1000 def do_experiments(): trial = Trials(params.keys()) metrics = {metric: [] for metric in ['expected lift', 'dominance', 'z-test dominance', 'empirical lift', 'lift CI']} for k in range(0, total, samples): # Update data = OrderedDict([(label, source.rvs(samples)) for label, source in sources.items()]) observations = OrderedDict([(label, (Counter(datum)[1], Counter(datum)[0])) for label, datum in data.items()]) trial.update(observations) # Calculate metrics for key in metrics: metrics[key].append(trial.evaluate(key)) return metrics paths = 15 for i, v in enumerate(sources): if v == 'Control': continue # Do experiments and evaluate the metrics arrays = {name: [] for name in ['lifts', 'elifts', 'ps', 'fps', 'ci']} for k in range(paths): metrics = do_experiments() arrays['lifts'].append([lift[v] for lift in metrics['expected lift']]) arrays['elifts'].append([lift[v] for lift in metrics['empirical lift']]) arrays['ps'].append([p[v] for p in metrics['dominance']]) arrays['fps'].append([p[v] for p in metrics['z-test dominance']]) arrays['ci'].append([i[v] for i in metrics['lift CI']]) for k in arrays: arrays[k] = np.array(arrays[k]) fig, (p_plot, lift_plot) = plt.subplots(2, sharex=True) dpi = 118 fig.set_size_inches(1600/dpi, 800/dpi, dpi=dpi) plt.xlim(0, total-samples) lift = (params[v]-params['Control'])/params['Control'] xs = [x * samples for x in range(len(lifts))] # Plot p-values for ps, fps in zip(arrays['ps'], arrays['fps']): p_plot.hlines(0.95, 0, total, color='red', linestyle='--') p_plot.hlines(0.05, 0, total, color='red', linestyle='--') p_plot.plot(xs, ps, color='blue') p_plot.plot(xs, fps, color='green') # Plot lift range for lifts, elifts, cis in zip(arrays['lifts'], arrays['elifts'], arrays['ci']): lift_plot.hlines(lift, 0, total, color='cyan', linestyle='--') lift_plot.plot(xs, elifts, color='green', alpha=0.05) lift_plot.fill_between(xs, [lower for lower, _, _ in cis], [upper for _, _, upper in cis], color='blue', alpha=0.1) fig.suptitle('Variation {}. Lift {:.2%}'.format(v, lift)) fig.show() paths = 25 ps_sses = [] fps_sses = [] lift_sses = [] elift_sses = [] for i, v in enumerate(sources): if v == 'Control': continue # Do experiments and evaluate the metrics arrays = {name: [] for name in ['lifts', 'elifts', 'ps', 'fps']} for k in range(paths): metrics = do_experiments() arrays['lifts'].append([lift[v] for lift in metrics['expected lift']]) arrays['elifts'].append([p[v] for p in metrics['empirical lift']]) arrays['ps'].append([p[v] for p in metrics['dominance']]) arrays['fps'].append([p[v] for p in metrics['z-test dominance']]) for k in arrays: arrays[k] = np.array(arrays[k]) # Plot square error fig = plt.figure() lift_plot = plt.subplot() plt.xlim(0, 50) dpi = 118 fig.set_size_inches(1600/dpi, 800/dpi, dpi=dpi) lift = (params[v]-params['Control'])/params['Control'] for ps, fps in zip(arrays['ps'], arrays['fps']): p_error = (ps - 1)**2 if lift > 0 else ps**2 fp_error = (fps - 1)**2 if lift > 0 else fps**2 ps_sses.append(np.sum(p_error)) fps_sses.append(np.sum(fp_error)) for lifts, elifts in zip(arrays['lifts'], arrays['elifts']): lift_error = (lifts - lift)**2 elift_error = (elifts - lift)**2 lift_sses.append(np.sum(lift_error)) elift_sses.append(np.sum(elift_error)) lift_plot.bar(range(0, len(lifts)*3, 3), lift_error, color='blue', alpha=0.05) lift_plot.bar(range(1, len(lifts)*3+1, 3), elift_error, color='green', alpha=0.05) fig.suptitle('Variation {}. Lift {:.2%}'.format(v, lift)) fig.show() print('Bayesian p-value mean SSE: {:.4}'.format(np.mean(ps_sses))) print('Frequentist (z-test) p-value mean SSE: {:.4}'.format(np.mean(fps_sses))) print('Bayesian lift mean SSE: {:.4}'.format(np.mean(lift_sses))) print('Empirical lift mean SSE: {:.4}'.format(np.mean(elift_sses)))