%matplotlib inline import numpy import ujson as json import matplotlib.pyplot as plt import pandas as pd import numpy as np import IPython import scipy.stats import statsmodels.stats.proportion as proportion import statsmodels.stats.weightstats as weightstats from moztelemetry.spark import get_pings from __future__ import division IPython.core.pylabtools.figsize(18, 7) sc.defaultParallelism pings = get_pings(sc, "Firefox", "beta", "35.0", "*", ("20141229", "20150104")) %%capture pings.count() experiment_name = "flash-protectedmode-beta35@experiments.mozilla.org" def fast_filter(raw): return "activeExperiment" in raw def parse(raw): return json.loads(raw) def is_experiment(ping): info = ping["info"] return info["activeExperiment"] == experiment_name and info["OS"] == "WINNT" \ and (info["version"] == "6.1" or info["version"] == "6.0") def extract_count(container, histogram_name, kind): histograms = container.get(histogram_name, {}) histogram = histograms.get(kind, None) return histogram["sum"] if histogram else 0 def is_control(ping): return ping["info"]["activeExperimentBranch"] == "control" def extract(ping): keyed = ping.get("keyedHistograms", {}) clientID = ping.get("clientID", None) uptime = ping["simpleMeasurements"]["uptime"] control = is_control(ping) aborts = extract_count(keyed, "SUBPROCESS_ABNORMAL_ABORT", "plugin") crashes = extract_count(keyed, "SUBPROCESS_CRASHES_WITH_DUMP", "plugin") hangs = extract_count(keyed, "SUBPROCESS_CRASHES_WITH_DUMP", "pluginhang") lag = ping["histograms"].get("EVENTLOOP_UI_LAG_EXP_MS", None) lag = lag[:-5] if lag else lag # only take buckets return (clientID, aborts, crashes, hangs, control, lag, uptime) data = pings.filter(fast_filter).map(parse).filter(is_experiment).map(extract) %%capture collected_data = data.collect() len(collected_data) frame = pd.DataFrame(collected_data, columns=["clientID", "aborts", "crashes", "hangs", "control", "lag", "uptime"]) frame = frame[frame["uptime"] > 0] # Ignore submissions with negative or zero uptime (see Bug 1106048) frame = frame.groupby("clientID").last().reset_index() # To avoid bias, use only the last submission for each Client ID frame["lag"] = frame["lag"].map(np.array) frame["lag"] = frame["lag"] / frame["uptime"] # Normalize lag histograms by uptime frame["ping"] = 1 len(frame) frame["aborts"] = frame["aborts"] - frame["crashes"] - frame["hangs"] binary_frame = frame.copy() binary_frame["aborts"] = binary_frame["aborts"].map(bool) binary_frame["crashes"] = binary_frame["crashes"].map(bool) binary_frame["hangs"] = binary_frame["hangs"].map(bool) frame.to_json("1111791.json") def error(row): count = row[True] nobs = row.sum() return map(lambda x: float("{:.3f}".format(x)), proportion.proportion_confint(count, nobs, method="wilson")) def compare_proportions(metric): agg = pd.pivot_table(binary_frame, index=["control", metric], values="ping", aggfunc=np.sum) default = pd.DataFrame({False: {False: 0, True: 0}, True: {False: 0, True: 0}}) contingency_table = agg.unstack().combine_first(default) control = contingency_table.ix[True] experiment = contingency_table.ix[False] p1 = control[True]/control.sum() p2 = experiment[True]/experiment.sum() pvalue = scipy.stats.fisher_exact(contingency_table)[1] print "Contingency table:\n" print contingency_table print "\nThe estimated proportion of {} in the experiment branch is {:.3f} and its CI is {}".format(metric, p2, error(experiment)) print "The estimated proportion of {} in the control branch is {:.3f} and its CI is {}".format(metric, p1, error(control)) print "The probability that the ratios are different purely by chance is {:.3f}".format(pvalue) compare_proportions("aborts") def compare_means(metric): control_mask = frame["control"] == True experiment = weightstats.DescrStatsW(frame[-control_mask][metric]) control = weightstats.DescrStatsW(frame[control_mask][metric]) mean_diff = experiment.mean - control.mean comparator = weightstats.CompareMeans(experiment, control) ci_diff = comparator.tconfint_diff(usevar="unequal")[1] - mean_diff ci_control = control.tconfint_mean()[1] - control.mean ci_experiment = experiment.tconfint_mean()[1] - experiment.mean print "The mean number of {} of the experiment branch is {:.3f} +- {:.3f}".format(metric, experiment.mean, ci_experiment) print "The mean number of {} of the control branch is {:.3f} +- {:.3f}".format(metric, control.mean, ci_control) print "The difference between the means is {:.3f} +- {:.3f}".format(mean_diff, ci_diff) print "The probability to see this difference purely by chance is {:.3f}".format(comparator.ttest_ind(usevar="unequal")[1]) compare_means("aborts") def hist_failures(metric): aborts = frame.groupby(["control", metric])["clientID"].count().unstack().fillna(0) aborts = (100 * aborts.T / aborts.sum(axis=1)).ix[1:] # remove 0 as its high frequency hides the ones of the other values aborts.plot(kind="bar") plt.title("Histogram of submissions for {}".format(metric)) plt.ylabel("submissions %") hist_failures("aborts") compare_proportions("crashes") compare_means("crashes") hist_failures("crashes") compare_proportions("hangs") compare_means("hangs") hist_failures("hangs") control_mask = frame["control"] control = frame[control_mask]["lag"] experiment = frame[-control_mask]["lag"] # Chi-Squared Histogram distance http://www.cs.huji.ac.il/~ofirpele/publications/ECCV2010.pdf def chi2_distance(histA, histB, eps = 1e-10): d = 0.5 * np.sum([((a - b) ** 2) / (a + b + eps) for (a, b) in zip(histA, histB)]) return d def mc_permutation_test(xs, ys, num): n, k = len(xs), 0 h1 = xs.sum() h2 = ys.sum() diff = chi2_distance(h1, h2) zs = pd.concat([xs, ys]) zs.index = np.arange(0, len(zs)) for j in range(num): zs = zs.reindex(np.random.permutation(zs.index)) h1 = zs[:n].sum() h2 = zs[n:].sum() k += diff < chi2_distance(h1, h2) return k / num print "{:.3f}".format(mc_permutation_test(control, experiment, 1000)) labels = ["0", "50", "74", "110", "163", "242", "359", "532", "789", "1.17k", "1.74k", "2.57k", "3.81k", "5.65k", "8.38k", "12.42k", "18.42k", "27.3k", "40.47k", "60k"] experiment_total = pd.Series(experiment.sum(), index=labels) control_total = pd.Series(control.sum(), index=labels) normalizer = control_total.sum() control_total = 100*control_total/normalizer experiment_total = 100*experiment_total/normalizer # Normalize both histogram relative to control to keep the proportions experiment_total.plot(kind="bar", alpha=0.5, color="green", label="experiment") control_total.plot(kind="bar", alpha=0.5, color="blue", label="control") plt.xlabel("Time it takes for the message before a UI message (ms)") plt.ylabel("Lag frequency %") plt.legend()